python-3.xpython-requests

I am trying to create a driver scrapper for Lenovo drivers I am stummped at this one spot


import csv
import time
from urllib.parse import urlencode

import requests
from tabulate import tabulate

product_series = "thinkpad-x-series-laptops"
product_code = "thinkpad-x1-carbon-11th-gen-type-21hm-21hn"
lenovo_api = "https://pcsupport.lenovo.com/us/en/api/v4/downloads/drivers?"

headers = {
    "referer": f"https://pcsupport.lenovo.com/us/en/products/laptops-and-netbooks/{product_series}/{product_code}/downloads/driver-list/",
    "user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36 Edg/131.0.0.0",
    "x-requested-with": "XMLHttpRequest",
}
payload = {
    "productId": f"laptops-and-netbooks/{product_series}/{product_code}",
}

updates_data = requests.get(f"{lenovo_api}{urlencode(payload)}", headers=headers).json()

row_headers = [
    "Name", "Release Date", "Version",
    "Update status", "File Type", "URL",
    "Description",
]

rows = [
    [
        driver['NAME'],
        driver['DATE'],
        driver['VERSION'],
        driver['PRIORITY'],
        driver['TypeString'],
        driver['URL'],
        driver['SUMARY'],
    ] for driver in updates_data["DownloadData"]
]

I do not ubnderstand where to get this part from "DownloadData" in

] for driver in updates_data["DownloadData"]

I keep getting the following error:

Traceback (most recent call last): File "C:/Users/EDMTC7J4/Desktop/Lenovo Driver Scraper/rryh1.py", line 38, in ] for driver in updates_data["DownloadData"] KeyError: 'DownloadData'

I have worked on this based upon the post here I am trying to use python and selenium to create a driver scraper, I need to grab particular data from the web page and put it in csv row and column

I just need help understanding what to put where "DownloadData" is and I am having difficulty figuring it out.


Solution

  • It's giving KeyError because DownloadData does not exist in updates_data. There is a different key called DownloadItems, and I'm assuming that what you need.

    print(updates_data['body'].keys())
    
    >>> dict_keys(['AllCategories', 'AllOperatingSystems', 'AllPriorities', 'DownloadItems', 'ProductId', 'DCGFixIDNamingGuideImageUrl', 'RestrictCountryList'])
    

    You'll need to explore the dictionary further to access the specific fields you're looking for, i.e. updates_data['body']['DownloadItems']['Files']. In other words, a nested loop to go through each Files of each DownloadItems. Then you can organize the row items however you want.

    rows = []
    for driver in updates_data['body']['DownloadItems']:
        for f in driver['Files']:
            rows.append([
                f['Name'],
                f['Date'],
                f['Version'],
                f['Priority'],
                f['TypeString'],
                f['URL'],
                driver['Summary'],
            ])
    

    Output

    from pprint import pprint
    
    pprint(rows[:2])
    
    >>> [['Lenovo System Update',
      {'Unix': 1722348780000},
      '5.08.03.59',
      'Recommended',
      'EXE',
      'https://download.lenovo.com/pccbbs/thinkvantage_en/system_update_5.08.03.59.exe',
      'System Update enables IT administrators to distribute updates for software, '
      'drivers, and BIOS in a managed environment from a local server.'],
     ['README',
      {'Unix': 1722348840000},
      '5.08.03.59',
      'Recommended',
      'TXT README',
      'https://download.lenovo.com/pccbbs/thinkvantage_en/system_update_5.08.03.59.txt',
      'System Update enables IT administrators to distribute updates for software, '
      'drivers, and BIOS in a managed environment from a local server.']]
    

    Example of an item from DownloadItems.

    pprint(updates_data['body']['DownloadItems'][0])
    
    >>> {'AlertDataLoss': False,
     'Audiences': [{'Id': 'ANONYMOUS', 'Name': 'Anonymous'}],
     'Brocade': False,
     'Category': {'Classify': 'dl-category-thinkvantage',
                  'ID': '8FA7E883-9036-4E57-A106-3C9FA89F1490',
                  'Name': 'ThinkVantage Technology'},
     'Countries': [],
     'Date': {'Unix': 1722348720000},
     'DocId': 'DS012808',
     'Eods': '0',
     'Files': [{'Date': {'Unix': 1722348780000},
                'FileName': '',
                'HasReadme': False,
                'MD5': 'd54563ebb7080b18959146c102d52f26',
                'Name': 'Lenovo System Update',
                'OperatingSystemKeys': [],
                'Priority': 'Recommended',
                'PriorityWeight': 2,
                'ReadmeUrl': '',
                'Released': None,
                'SHA1': '11cddb61f89b850e053a6f476a4eea855c9133a8',
                'SHA256': 'e66794dc561a3e58e3dc68556eb053ee32b674ac9d99638e473ef7322f353e0d',
                'Size': '10.1 MB',
                'TypeEnString': '',
                'TypeString': 'EXE',
                'URL': 'https://download.lenovo.com/pccbbs/thinkvantage_en/system_update_5.08.03.59.exe',
                'Version': '5.08.03.59'},
               {'Date': {'Unix': 1722348840000},
                'FileName': '',
                'HasReadme': False,
                'MD5': '1ad005136b60036a9420211698db5935',
                'Name': 'README',
                'OperatingSystemKeys': [],
                'Priority': 'Recommended',
                'PriorityWeight': 2,
                'ReadmeUrl': '',
                'Released': None,
                'SHA1': '45cf2bad1e60565a0ecf0851d3d3caea7bf9ea3c',
                'SHA256': '1f0c6e404aa47d7ed24673c908ab1c7a81b26e7debe741ad1cf0a2e8d0e9f152',
                'Size': '16.3 KB',
                'TypeEnString': '',
                'TypeString': 'TXT README',
                'URL': 'https://download.lenovo.com/pccbbs/thinkvantage_en/system_update_5.08.03.59.txt',
                'Version': '5.08.03.59'}],
     'FixID': '',
     'Highlight': False,
     'Hit': '6580855',
     'ID': ['DS012808-LENOVO-SYSTEM-UPDATE-FOR-WINDOWS-10-7-32-BIT-64-BIT-DESKTOP-NOTEBOOK-WORKSTATION',
            'DS012808',
            'D295217D-1898-40AB-94CD-A729A310EBB0'],
     'InWarranty': False,
     'LanguageCode': 'en',
     'MetaData': 'System Update, TVSU, Updater, Software Installer, SWI, think '
                 'vantage system update,lenovo system update service cpu,DS012808',
     'OEMOnly': False,
     'OperatingSystemKeys': ['Windows 10 (32-bit)',
                             'Windows 10 (64-bit)',
                             'Windows 11 (32-bit)',
                             'Windows 11 (64-bit)',
                             'Windows 7 (32-bit)',
                             'Windows 7 (64-bit)'],
     'PNCheck': False,
     'RebootRequired': 0,
     'RedirectCode': 0,
     'RedirectTo': '',
     'RequireLogin': False,
     'RestrictedCountries': [],
     'SEOH1Content': 'Lenovo System Update for Windows 11, 10 & 7 (32-bit, 64-bit) '
                     '- Desktop, Notebook, Workstation',
     'Summary': 'System Update enables IT administrators to distribute updates for '
                'software, drivers, and BIOS in a managed environment from a local '
                'server.',
     'SummaryInfo': {'Priority': 'Recommended', 'Version': '5.08.03.59'},
     'Title': 'Lenovo System Update for Windows 11, 10 & 7 (32-bit, 64-bit) - '
              'Desktop, Notebook, Workstation',
     'Updated': {'Unix': 1722348888000},
     'rolePass': True}