pythonpython-requestsmusixmatch

Getting ValueError: unknown url type: ' '


I have this code below that iterate through some tracks. And then for each track I want to use the musixmatch api to get and print the lyrics of the track based on the artist name and track name.

code that iterete trough some tracks and print the lyrics:

for i, v in tracks.items():
    artist = tracks[i]['artist'].replace(" ", "+")
    title = tracks[i]['title'].replace(" ", "+")

    print(tracks)

    print(song_lyric(title, artist))

The print(tracks) returns in this format:

{12: {'trackID': 12, 'title': 'Achtung Baby', 'number': '1', 'artist': 'U2', 'album': 'Achtung Baby', 'albumID': 2, 'duration': '291'}

When the code exuted the lyrics for the firsts tracks are printed, but then it appears an error:

Traceback (most recent call last):
  File "C:/Users/Ozzy/PycharmProjects/getData/getData.py", line 239, in <module>
    print(song_lyric(title, artist))
  File "C:/Users/Ozzy/PycharmProjects/getData/getData.py", line 72, in song_lyric
    lyrics_tracking(tracking_url)
  File "C:/Users/Ozzy/PycharmProjects/getData/getData.py", line 79, in lyrics_tracking
    request = urllib.request.Request(querystring)
  File "C:\Users\Ozzy\AppData\Local\Programs\Python\Python36\lib\urllib\request.py", line 329, in __init__
    self.full_url = url
  File "C:\Users\Ozzy\AppData\Local\Programs\Python\Python36\lib\urllib\request.py", line 355, in full_url
    self._parse()
  File "C:\Users\Ozzy\AppData\Local\Programs\Python\Python36\lib\urllib\request.py", line 384, in _parse
    raise ValueError("unknown url type: %r" % self.full_url)
ValueError: unknown url type: ''

Do you know why this error is appearing?

The methods to get the lyrics from musixmatch are public available:

def song_lyric(song_name, artist_name):
    while True:
        querystring = apiurl_musixmatch + "matcher.lyrics.get?q_track=" + urllib.parse.quote(
            song_name) + "&q_artist=" + urllib.parse.quote(
            artist_name) + "&apikey=" + apikey_musixmatch + "&format=json&f_has_lyrics=1"
        # matcher.lyrics.get?q_track=sexy%20and%20i%20know%20it&q_artist=lmfao
        request = urllib.request.Request(querystring)
        # request.add_header("Authorization", "Bearer " + client_access_token)
        request.add_header("User-Agent",
                           "curl/7.9.8 (i686-pc-linux-gnu) libcurl 7.9.8 (OpenSSL 0.9.6b) (ipv6 enabled)")  # Must include user agent of some sort, otherwise 403 returned
        while True:
            try:
                response = urllib.request.urlopen(request,
                                                  timeout=4)  # timeout set to 4 seconds; automatically retries if times out
                raw = response.read()
            except socket.timeout:
                print("Timeout raised and caught")
                continue
            break
        json_obj = json.loads(raw.decode('utf-8'))
        body = json_obj["message"]["body"]["lyrics"]["lyrics_body"]
        copyright = json_obj["message"]["body"]["lyrics"]["lyrics_copyright"]
        tracking_url = json_obj["message"]["body"]["lyrics"]["html_tracking_url"]
        #print(tracking_url)
        lyrics_tracking(tracking_url)
        return (body + "\n\n" + copyright)


def lyrics_tracking(tracking_url):
    while True:
        querystring = tracking_url
        request = urllib.request.Request(querystring)
        # request.add_header("Authorization", "Bearer " + client_access_token)
        request.add_header("User-Agent",
                           "curl/7.9.8 (i686-pc-linux-gnu) libcurl 7.9.8 (OpenSSL 0.9.6b) (ipv6 enabled)")  # Must include user agent of some sort, otherwise 403 returned
        try:
            response = urllib.request.urlopen(request,
                                              timeout=4)  # timeout set to 4 seconds; automatically retries if times out
            raw = response.read()
        except socket.timeout:
            print("Timeout raised and caught")
            continue
        break
        print(raw)

Full working exemple that reproduces the error:

import requests
import json
import urllib.request, urllib.error, urllib.parse
import socket

apikey_musixmatch = '0b4a363bbd71974c2634837d5b5d1d9a' #generated for the example
apiurl_musixmatch = 'http://api.musixmatch.com/ws/1.1/'

api_key = "b088cbedecd40b35dd89e90f55227ac2" #generated for the example
def song_lyric(song_name, artist_name):
    while True:
        querystring = apiurl_musixmatch + "matcher.lyrics.get?q_track=" + urllib.parse.quote(
            song_name) + "&q_artist=" + urllib.parse.quote(
            artist_name) + "&apikey=" + apikey_musixmatch + "&format=json&f_has_lyrics=1"
        # matcher.lyrics.get?q_track=sexy%20and%20i%20know%20it&q_artist=lmfao
        request = urllib.request.Request(querystring)
        # request.add_header("Authorization", "Bearer " + client_access_token)
        request.add_header("User-Agent",
                           "curl/7.9.8 (i686-pc-linux-gnu) libcurl 7.9.8 (OpenSSL 0.9.6b) (ipv6 enabled)")  # Must include user agent of some sort, otherwise 403 returned
        while True:
            try:
                response = urllib.request.urlopen(request,
                                                  timeout=4)  # timeout set to 4 seconds; automatically retries if times out
                raw = response.read()
            except socket.timeout:
                print("Timeout raised and caught")
                continue
            break
        json_obj = json.loads(raw.decode('utf-8'))
        body = json_obj["message"]["body"]["lyrics"]["lyrics_body"]
        copyright = json_obj["message"]["body"]["lyrics"]["lyrics_copyright"]
        tracking_url = json_obj["message"]["body"]["lyrics"]["html_tracking_url"]
        print("Tracking_url====================" +tracking_url + "==================================")
        lyrics_tracking(tracking_url)
        return (body + "\n\n" + copyright)


def lyrics_tracking(tracking_url):
    while True:
        querystring = tracking_url
        request = urllib.request.Request(querystring)
        # request.add_header("Authorization", "Bearer " + client_access_token)
        request.add_header("User-Agent",
                           "curl/7.9.8 (i686-pc-linux-gnu) libcurl 7.9.8 (OpenSSL 0.9.6b) (ipv6 enabled)")  # Must include user agent of some sort, otherwise 403 returned
        try:
            response = urllib.request.urlopen(request,
                                              timeout=4)  # timeout set to 4 seconds; automatically retries if times out
            raw = response.read()
        except socket.timeout:
            print("Timeout raised and caught")
            continue
        break
        print(raw)

ID = 0

#get top artists from country
artists = {}
for i in range(2, 3):
    artists_response = requests.get(
        'http://ws.audioscrobbler.com/2.0/?method=geo.gettopartists&country=spain&format=json&page=' + str(i) + '&api_key=' + api_key)
    artists_data = artists_response.json()
    for artist in artists_data["topartists"]["artist"]:


        name = artist["name"]
        url = artist["url"]
        if ID > 1: continue
        artists[ID] = {}
        artists[ID]['ID'] = ID
        artists[ID]['name'] = name
        ID += 1

    for i, v in artists.items():
        chosen = artists[i]['name'].replace(" ", "+")
        artist_response = requests.get(
            'http://ws.audioscrobbler.com/2.0/?method=artist.getinfo&format=json&artist=' + chosen + '&api_key=' + api_key)
        artist_data = artist_response.json()


# get top albums of the artists
albums = {}
for i, v in artists.items():
    chosen = artists[i]['name'].replace(" ", "+")
    topalbums_response = requests.get(
        'http://ws.audioscrobbler.com/2.0/?method=artist.gettopalbums&format=json&artist=' + chosen + '&api_key=' + api_key + '&limit=5')
    albums_data = topalbums_response.json()

    for album in albums_data['topalbums']['album']:
        name = album["name"]
        url = album["url"]

        albums[ID] = {}
        albums[ID]['ID'] = ID
        albums[ID]['artist'] = artists[i]['name']
        albums[ID]['artistID'] = artists[i]['ID']
        albums[ID]['name'] = name

        ID += 1

# Get tracks of the album

tracks = {}
for i, v in albums.items():
    artist = albums[i]['artist'].replace(" ", "+")
    name = albums[i]['name'].replace(" ", "+")
    album_response_data = requests.get(
        'http://ws.audioscrobbler.com/2.0/?method=album.getinfo&format=json&api_key=' + api_key + '&artist=' + artist + '&album=' + name)
    album_response = album_response_data.json()



    for album in album_response['album']['tracks']['track']:
        title = album['name']
        tracks[ID] = {}
        tracks[ID]['trackID'] = ID
        tracks[ID]['title'] = title
        tracks[ID]['artist'] = albums[i]['artist']
        tracks[ID]['album'] = albums[i]['name']
        tracks[ID]['albumID'] = albums[i]['ID']

        ID += 1

for i, v in tracks.items():
    artist = tracks[i]['artist'].replace(" ", "+")
    title = tracks[i]['title'].replace(" ", "+")

    # print the lyric of each track
    print(song_lyric(title, artist))

Solution

  • It seems like url is not correct. It happens here: tracking_url = json_obj["message"]["body"]["lyrics"]["html_tracking_url"] If you have ability to run that API locally and see what is returned into tracking_url, you can find out what is still wrong with it.

    UPDATE:

    I reproduced it, so the urllib.request cannot process empty string URL: "", that is why you need to check if the tracking_url != "" and only if its not empty string or None you need to request for the song.