pythonijson

How can i use ijson to extract a set of corresponding data from json file?


I have a json file just like this:

    {
  "CVE_data_type" : "CVE",
  "CVE_Items" : [ {
    "cve" : {

      "CVE_data_meta" : {
        "ID" : "CVE-2020-0001",
        "ASSIGNER" : "security@android.com"
      },
      ...
      
    "configurations" : {
      "CVE_data_version" : "4.0",
      "nodes" : [ {
        "operator" : "OR",
        "children" : [ ],
        "cpe_match" : [ {
          "vulnerable" : true,
          "cpe23Uri" : "cpe:2.3:o:google:android:8.0:*:*:*:*:*:*:*",
          "cpe_name" : [ ]
        }, {
          "vulnerable" : true,
          "cpe23Uri" : "cpe:2.3:o:google:android:8.1:*:*:*:*:*:*:*",
          "cpe_name" : [ ]
        }]
      } ]
    },
   ...
    "publishedDate" : "2020-01-08T19:15Z",
    "lastModifiedDate" : "2020-01-14T21:52Z"
  }]
}

And i want to extract the CVE-ID and corresponding CPE,so i can lcoate the CVE-ID through CPE,here is my code

import ijson
import datetime


def parse_json(filename):
    with open(filename, 'rb') as input_file:
        CVEID = ijson.items(input_file, 'CVE_Items.item.cve.CVE_data_meta.ID', )
        for id in CVEID:
            print("CVE id: %s" % id)
        # for prefix, event, value in parser:
        #     print('prefix={}, event={}, value={}'.format(prefix, event, value))

    with open(filename, 'rb') as input_file:
        cpes = ijson.items(input_file, 'CVE_Items.item.configurations.nodes.item.cpe_match.item', )
        for cpe in cpes:
            print("cpe: %s" % cpe['cpe23Uri'])


def main():
    
    parse_json("cve.json")
    end = datetime.datetime.now()
    

if __name__ == '__main__':
    main()

Results:

CVE id: CVE-2020-0633
CVE id: CVE-2020-0631
cpe: cpe:2.3:o:google:android:8.0:*:*:*:*:*:*:*
cpe: cpe:2.3:o:google:android:10.0:*:*:*:*:*:*:*
cpe: cpe:2.3:o:microsoft:windows_10:1607:*:*:*:*:*:*:*
cpe: cpe:2.3:o:microsoft:windows_server_2016:-:*:*:*:*:*:*:*

But above this just extract the data and no correspondence.

Could anyone help? A little help would be appreciated.


Solution

  • I think if you need to keep track of CVE IDs and their corresponding CPEs you'll need to iterate over whole cve items and extract the bits of data you need (so you'll only do one pass through the file). Not as efficient memory-wise as your original iteration, but if each item in CVE_Items is not too big then it's not a problem:

    with open(filename, 'rb') as input_file:
        for cves in ijson.items(input_file, 'CVE_Items.item')
            cve_id = cve['cve']['CVE_data_meta']['ID']
            cpes = [match
                    for node in cve['configurations']['nodes']
                    for match in node['cpe_match']]
    

    If you know there's always a single cpe_match element in nodes then you can replace the last list comprehension by cve['configurations']['nodes'][0]['cpe_match']