pythonpandascsvelementtreexmltocsv

getting specific attribute name and it's value from xml to csv via python and pandas


i am converting nested xml file to csv via python and pandas library. there is many attribute in xml but i just want to export specific attribute name with its value in the csv file.

    <TestReport xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:xsd="http://www.w3.org/2001/XMLSchema" id="33357fcc-edf2-3ad4-a6f2-588a9492f804" source="OI Stand" converter="irs_xml.xsl" version="1.6">
      <FileType>Test Report</FileType>
      <CreationTime>2020-09-28T09:42:07.3875529+02:00</CreationTime>
      <TestExecution start="2265.595799" time="68.619646" id="009b96f0-9d26-4226-9c9a-7364dcab0467" ts="2020-09-28T09:40:58">                     
        <Attr name="ProductNumber" type="String" value="11266" />        
        <Attr name="SerialNumber" type="Number" value="324" />      
        <TestSteps>
          <TestStep measid="CNID_0210" measname="UART Read Node ID" status="Passed" datatype="Boolean" value="True" stepname="[CNID_0210] UART Read Node ID" steptype="PassFailTest" start="2277.5219649" time="0.0280892" stepid="ID#:Ua1/2g1/6hGrTAABBUH5EC" group="[CNID] Config Node ID">
            <Attr name="ReadNodeId Cmd" type="String" value="RAC" />
            <Attr name="NodeId" type="String" value=" 255" />
          </TestStep>
          <TestStep measid="MOCV_1510" measname="Check OCV" status="Passed" datatype="Number" limlo="49.000" value="49.429" limhi="51.400" unit="V" stepname="[MOCV_1510] Check OCV" steptype="Test" start="2309.5348324" time="5.72E-05" stepid="ID#:pIldp+t98hG/wTz4Yu5AjB" group="[MOCV] Measure OCV" />
          <TestStep>
          .
          .
          </TestSteps>
          <PartNr />
      </TestExecution>
   </TestReport>

my code(it shows all the attribute)

import xml.etree.ElementTree as ET
import pandas as pd
tree = ET.parse('324.irp')
root = tree.getroot()
rows = []
df_columns = ['CreationTime', 'FileType', 'SerialNumber']
CreationTime = root.find("CreationTime").text
FileType = root.find("FileType").text
for child in root.iter('Attr'):
        name = child.attrib.get('name')
        value = child.attrib.get('value')
        print (name, value)
for elem in root.iter('TestStep'):
        measname = elem.attrib.get('measname')
        value = elem.attrib.get('value')
        print (measname, value)
if CreationTime and FileType and value:
        rows.append({"CreationTime": CreationTime, "FileType": FileType, "value": value})
        print(CreationTime, FileType, value)
df = pd.DataFrame(rows, columns= df_columns)
df.to_csv(r'C:\Users\PycharmProjects\pythonProject\output.csv', index = False)

Expecting result:

CreationTime,FileType,SerialNumber,Check OCV
2020-09-28T09:42:07.3875529+02:00,Test Report,324,49.429V

could you please anyone help me out how can i print specific attribute name(SerialNumber,Check OCV) with its value(324,49.429V). out of many attribute?


Solution

  • Try the below

    import xml.etree.ElementTree as ET
    import pandas as pd
    
    xml = '''<?xml version="1.0" encoding="UTF-8"?>
    <TestReport xmlns:xsd="http://www.w3.org/2001/XMLSchema" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" id="33357fcc-edf2-3ad4-a6f2-588a9492f804" source="OI Stand" converter="irs_xml.xsl" version="1.6">
       <FileType>Test Report</FileType>
       <CreationTime>2020-09-28T09:42:07.3875529+02:00</CreationTime>
       <TestExecution start="2265.595799" time="68.619646" id="009b96f0-9d26-4226-9c9a-7364dcab0467" ts="2020-09-28T09:40:58">
          <Attr name="ProductNumber" type="String" value="11266" />
          <Attr name="SerialNumber" type="Number" value="324" />
          <TestSteps>
             <TestStep measid="CNID_0210" measname="UART Read Node ID" status="Passed" datatype="Boolean" value="True" stepname="[CNID_0210] UART Read Node ID" steptype="PassFailTest" start="2277.5219649" time="0.0280892" stepid="ID#:Ua1/2g1/6hGrTAABBUH5EC" group="[CNID] Config Node ID">
                <Attr name="ReadNodeId Cmd" type="String" value="RAC" />
                <Attr name="NodeId" type="String" value=" 255" />
             </TestStep>
             <TestStep measid="MOCV_1510" measname="Check OCV" status="Passed" datatype="Number" limlo="49.000" value="49.429" limhi="51.400" unit="V" stepname="[MOCV_1510] Check OCV" steptype="Test" start="2309.5348324" time="5.72E-05" stepid="ID#:pIldp+t98hG/wTz4Yu5AjB" group="[MOCV] Measure OCV" />
             <TestStep />
          </TestSteps>
          <PartNr />
       </TestExecution>
    </TestReport>'''
    
    def get_ele(name):
      expr = f'.//{name}'
      return root.find(expr).text
    
    def get_attr(name):
      expr = f'.//Attr[@name="{name}"]'
      return root.find(expr).attrib['value']  
    
    df_columns = {'CreationTime':get_ele, 'FileType':get_ele, 'SerialNumber':get_attr}
    
    root = ET.fromstring(xml)
    data = [{name :func(name) for name,func in df_columns.items()}]
    
    df = pd.DataFrame(data)
    print(df)
    

    output

                          CreationTime     FileType SerialNumber
    0  2020-09-28T09:42:07.3875529+02:00  Test Report          324