I have this code below that iterate through some tracks. And then for each track I want to use the musixmatch api to get and print the lyrics of the track based on the artist name and track name.
code that iterete trough some tracks and print the lyrics:
for i, v in tracks.items():
artist = tracks[i]['artist'].replace(" ", "+")
title = tracks[i]['title'].replace(" ", "+")
print(tracks)
print(song_lyric(title, artist))
The print(tracks) returns in this format:
{12: {'trackID': 12, 'title': 'Achtung Baby', 'number': '1', 'artist': 'U2', 'album': 'Achtung Baby', 'albumID': 2, 'duration': '291'}
When the code exuted the lyrics for the firsts tracks are printed, but then it appears an error:
Traceback (most recent call last):
File "C:/Users/Ozzy/PycharmProjects/getData/getData.py", line 239, in <module>
print(song_lyric(title, artist))
File "C:/Users/Ozzy/PycharmProjects/getData/getData.py", line 72, in song_lyric
lyrics_tracking(tracking_url)
File "C:/Users/Ozzy/PycharmProjects/getData/getData.py", line 79, in lyrics_tracking
request = urllib.request.Request(querystring)
File "C:\Users\Ozzy\AppData\Local\Programs\Python\Python36\lib\urllib\request.py", line 329, in __init__
self.full_url = url
File "C:\Users\Ozzy\AppData\Local\Programs\Python\Python36\lib\urllib\request.py", line 355, in full_url
self._parse()
File "C:\Users\Ozzy\AppData\Local\Programs\Python\Python36\lib\urllib\request.py", line 384, in _parse
raise ValueError("unknown url type: %r" % self.full_url)
ValueError: unknown url type: ''
Do you know why this error is appearing?
The methods to get the lyrics from musixmatch are public available:
def song_lyric(song_name, artist_name):
while True:
querystring = apiurl_musixmatch + "matcher.lyrics.get?q_track=" + urllib.parse.quote(
song_name) + "&q_artist=" + urllib.parse.quote(
artist_name) + "&apikey=" + apikey_musixmatch + "&format=json&f_has_lyrics=1"
# matcher.lyrics.get?q_track=sexy%20and%20i%20know%20it&q_artist=lmfao
request = urllib.request.Request(querystring)
# request.add_header("Authorization", "Bearer " + client_access_token)
request.add_header("User-Agent",
"curl/7.9.8 (i686-pc-linux-gnu) libcurl 7.9.8 (OpenSSL 0.9.6b) (ipv6 enabled)") # Must include user agent of some sort, otherwise 403 returned
while True:
try:
response = urllib.request.urlopen(request,
timeout=4) # timeout set to 4 seconds; automatically retries if times out
raw = response.read()
except socket.timeout:
print("Timeout raised and caught")
continue
break
json_obj = json.loads(raw.decode('utf-8'))
body = json_obj["message"]["body"]["lyrics"]["lyrics_body"]
copyright = json_obj["message"]["body"]["lyrics"]["lyrics_copyright"]
tracking_url = json_obj["message"]["body"]["lyrics"]["html_tracking_url"]
#print(tracking_url)
lyrics_tracking(tracking_url)
return (body + "\n\n" + copyright)
def lyrics_tracking(tracking_url):
while True:
querystring = tracking_url
request = urllib.request.Request(querystring)
# request.add_header("Authorization", "Bearer " + client_access_token)
request.add_header("User-Agent",
"curl/7.9.8 (i686-pc-linux-gnu) libcurl 7.9.8 (OpenSSL 0.9.6b) (ipv6 enabled)") # Must include user agent of some sort, otherwise 403 returned
try:
response = urllib.request.urlopen(request,
timeout=4) # timeout set to 4 seconds; automatically retries if times out
raw = response.read()
except socket.timeout:
print("Timeout raised and caught")
continue
break
print(raw)
Full working exemple that reproduces the error:
import requests
import json
import urllib.request, urllib.error, urllib.parse
import socket
apikey_musixmatch = '0b4a363bbd71974c2634837d5b5d1d9a' #generated for the example
apiurl_musixmatch = 'http://api.musixmatch.com/ws/1.1/'
api_key = "b088cbedecd40b35dd89e90f55227ac2" #generated for the example
def song_lyric(song_name, artist_name):
while True:
querystring = apiurl_musixmatch + "matcher.lyrics.get?q_track=" + urllib.parse.quote(
song_name) + "&q_artist=" + urllib.parse.quote(
artist_name) + "&apikey=" + apikey_musixmatch + "&format=json&f_has_lyrics=1"
# matcher.lyrics.get?q_track=sexy%20and%20i%20know%20it&q_artist=lmfao
request = urllib.request.Request(querystring)
# request.add_header("Authorization", "Bearer " + client_access_token)
request.add_header("User-Agent",
"curl/7.9.8 (i686-pc-linux-gnu) libcurl 7.9.8 (OpenSSL 0.9.6b) (ipv6 enabled)") # Must include user agent of some sort, otherwise 403 returned
while True:
try:
response = urllib.request.urlopen(request,
timeout=4) # timeout set to 4 seconds; automatically retries if times out
raw = response.read()
except socket.timeout:
print("Timeout raised and caught")
continue
break
json_obj = json.loads(raw.decode('utf-8'))
body = json_obj["message"]["body"]["lyrics"]["lyrics_body"]
copyright = json_obj["message"]["body"]["lyrics"]["lyrics_copyright"]
tracking_url = json_obj["message"]["body"]["lyrics"]["html_tracking_url"]
print("Tracking_url====================" +tracking_url + "==================================")
lyrics_tracking(tracking_url)
return (body + "\n\n" + copyright)
def lyrics_tracking(tracking_url):
while True:
querystring = tracking_url
request = urllib.request.Request(querystring)
# request.add_header("Authorization", "Bearer " + client_access_token)
request.add_header("User-Agent",
"curl/7.9.8 (i686-pc-linux-gnu) libcurl 7.9.8 (OpenSSL 0.9.6b) (ipv6 enabled)") # Must include user agent of some sort, otherwise 403 returned
try:
response = urllib.request.urlopen(request,
timeout=4) # timeout set to 4 seconds; automatically retries if times out
raw = response.read()
except socket.timeout:
print("Timeout raised and caught")
continue
break
print(raw)
ID = 0
#get top artists from country
artists = {}
for i in range(2, 3):
artists_response = requests.get(
'http://ws.audioscrobbler.com/2.0/?method=geo.gettopartists&country=spain&format=json&page=' + str(i) + '&api_key=' + api_key)
artists_data = artists_response.json()
for artist in artists_data["topartists"]["artist"]:
name = artist["name"]
url = artist["url"]
if ID > 1: continue
artists[ID] = {}
artists[ID]['ID'] = ID
artists[ID]['name'] = name
ID += 1
for i, v in artists.items():
chosen = artists[i]['name'].replace(" ", "+")
artist_response = requests.get(
'http://ws.audioscrobbler.com/2.0/?method=artist.getinfo&format=json&artist=' + chosen + '&api_key=' + api_key)
artist_data = artist_response.json()
# get top albums of the artists
albums = {}
for i, v in artists.items():
chosen = artists[i]['name'].replace(" ", "+")
topalbums_response = requests.get(
'http://ws.audioscrobbler.com/2.0/?method=artist.gettopalbums&format=json&artist=' + chosen + '&api_key=' + api_key + '&limit=5')
albums_data = topalbums_response.json()
for album in albums_data['topalbums']['album']:
name = album["name"]
url = album["url"]
albums[ID] = {}
albums[ID]['ID'] = ID
albums[ID]['artist'] = artists[i]['name']
albums[ID]['artistID'] = artists[i]['ID']
albums[ID]['name'] = name
ID += 1
# Get tracks of the album
tracks = {}
for i, v in albums.items():
artist = albums[i]['artist'].replace(" ", "+")
name = albums[i]['name'].replace(" ", "+")
album_response_data = requests.get(
'http://ws.audioscrobbler.com/2.0/?method=album.getinfo&format=json&api_key=' + api_key + '&artist=' + artist + '&album=' + name)
album_response = album_response_data.json()
for album in album_response['album']['tracks']['track']:
title = album['name']
tracks[ID] = {}
tracks[ID]['trackID'] = ID
tracks[ID]['title'] = title
tracks[ID]['artist'] = albums[i]['artist']
tracks[ID]['album'] = albums[i]['name']
tracks[ID]['albumID'] = albums[i]['ID']
ID += 1
for i, v in tracks.items():
artist = tracks[i]['artist'].replace(" ", "+")
title = tracks[i]['title'].replace(" ", "+")
# print the lyric of each track
print(song_lyric(title, artist))
It seems like url is not correct. It happens here:
tracking_url = json_obj["message"]["body"]["lyrics"]["html_tracking_url"]
If you have ability to run that API locally and see what is returned into tracking_url
, you can find out what is still wrong with it.
I reproduced it, so the urllib.request
cannot process empty string URL: ""
, that is why you need to check if the tracking_url != ""
and only if its not empty string or None you need to request for the song.