pythonxmlinsertsequential

Insert Elements into XML code in the proper order/sequence? Python code


I am new to Python and I am attempting to create a script that will open an XML file and insert an XML element into the proper place within a sequence of XML elements.

So far, I have this code:

import os
import xml.etree.ElementTree as ET


def process_xml_files(source_dir, target_dir):
    # Create the target directory if it doesn't exist
    if not os.path.exists(target_dir):
        os.makedirs(target_dir)

    # Iterate through all files in the source directory
    for filename in os.listdir(source_dir):
        if filename.endswith('.xml'):
            file_path = os.path.join(source_dir, filename)
            tree = ET.parse(file_path)
            root = tree.getroot()

            # Process each platformID group
            for platformID in ['0', '1', '3']:
                elements = root.findall(f".//namerecord[@platformID='{platformID}']")
                if elements:
                    nameID_elements = {int(el.get('nameID')): el for el in elements}
                    if 8 not in nameID_elements:
                        # Create a new element with nameID="8"
                        new_element = ET.Element('namerecord', attrib={
                            'nameID': '8',
                            'platformID': platformID,
                            'platEncID': elements[0].get('platEncID'),
                            'langID': elements[0].get('langID'),

                        })
                        new_element.text = "\r" "    " "New Entry" "\r" "    "

                        # Insert the new element in the correct position
                        sorted_nameIDs = sorted(list(nameID_elements.keys()) + [8])
                        insert_index = sorted_nameIDs.index(8)

                        # Find the parent element
                        parent = root.find(".//name")
                        parent.insert(insert_index, new_element)

            # Write the modified XML to the target directory
            target_file_path = os.path.join(target_dir, filename)
            tree.write(target_file_path, encoding='utf-8', xml_declaration=True)


source_directory = input("Enter the source directory: ")
target_directory = input("Enter the target directory: ")
process_xml_files(source_directory, target_directory)

Here is an example of the input file requiring the insertion. Note that the platformIDs are grouped together and in sequential order by nameID:

<?xml version="1.0" encoding="UTF-8"?>
<xmlalter xmlaVersion="Alpha">

  <name>
    <namerecord nameID="0" platformID="0" platEncID="0" langID="0x0">
      Safe
    </namerecord>
    <namerecord nameID="1" platformID="0" platEncID="0" langID="0x0">
      Kitten
    </namerecord>
    <namerecord nameID="2" platformID="0" platEncID="0" langID="0x0">
      Calico
    </namerecord>
    <namerecord nameID="3" platformID="0" platEncID="0" langID="0x0">
      KittenCalico:1135918450
    </namerecord>
    <namerecord nameID="4" platformID="0" platEncID="0" langID="0x0">
      KittenCalico
    </namerecord>
    <namerecord nameID="5" platformID="0" platEncID="0" langID="0x0">
      1.00
    </namerecord>
    <namerecord nameID="6" platformID="0" platEncID="0" langID="0x0">
      KittenCalico
    </namerecord>
        
    <namerecord nameID="0" platformID="1" platEncID="0" langID="0x0" unicode="True">
      Safe
    </namerecord>
    <namerecord nameID="1" platformID="1" platEncID="0" langID="0x0" unicode="True">
      Kitten
    </namerecord>
    <namerecord nameID="2" platformID="1" platEncID="0" langID="0x0" unicode="True">
      Calico
    </namerecord>
    <namerecord nameID="3" platformID="1" platEncID="0" langID="0x0" unicode="True">
      KittenCalico:1135918450
    </namerecord>
    <namerecord nameID="4" platformID="1" platEncID="0" langID="0x0" unicode="True">
      KittenCalico
    </namerecord>
    <namerecord nameID="5" platformID="1" platEncID="0" langID="0x0" unicode="True">
      1.00
    </namerecord>
    <namerecord nameID="6" platformID="1" platEncID="0" langID="0x0" unicode="True">
      KittenCalico
    </namerecord>
        
    <namerecord nameID="0" platformID="3" platEncID="1" langID="0x409">
      Safe
    </namerecord>
    <namerecord nameID="1" platformID="3" platEncID="1" langID="0x409">
      Kitten Calico
    </namerecord>
    <namerecord nameID="2" platformID="3" platEncID="1" langID="0x409">
      Vanilla
    </namerecord>
    <namerecord nameID="3" platformID="3" platEncID="1" langID="0x409">
      KittenCalico:1135918450
    </namerecord>
    <namerecord nameID="4" platformID="3" platEncID="1" langID="0x409">
      KittenCalico
    </namerecord>
    <namerecord nameID="5" platformID="3" platEncID="1" langID="0x409">
      1.00
    </namerecord>
    <namerecord nameID="6" platformID="3" platEncID="1" langID="0x409">
      KittenCalico
    </namerecord>
  </name>

</xmlalter>

This Python code has produced the following. Note that the new entries are not grouped in the platformIDs in sequential order by nameID:

<?xml version='1.0' encoding='utf-8'?>
<xmlalter xmlaVersion="Alpha">

  <name>
    <namerecord nameID="0" platformID="0" platEncID="0" langID="0x0">
      Safe
    </namerecord>
    <namerecord nameID="1" platformID="0" platEncID="0" langID="0x0">
      Kitten
    </namerecord>
    <namerecord nameID="2" platformID="0" platEncID="0" langID="0x0">
      Calico
    </namerecord>
    <namerecord nameID="3" platformID="0" platEncID="0" langID="0x0">
      KittenCalico:1135918450
    </namerecord>
    <namerecord nameID="4" platformID="0" platEncID="0" langID="0x0">
      KittenCalico
    </namerecord>
    <namerecord nameID="5" platformID="0" platEncID="0" langID="0x0">
      1.00
    </namerecord>
    <namerecord nameID="6" platformID="0" platEncID="0" langID="0x0">
      KittenCalico
    </namerecord>
        
    <namerecord nameID="8" platformID="3" platEncID="1" langID="0x409">
    New Entry
    </namerecord><namerecord nameID="8" platformID="1" platEncID="0" langID="0x0">
    New Entry
    </namerecord><namerecord nameID="8" platformID="0" platEncID="0" langID="0x0">
    New Entry
    </namerecord><namerecord nameID="0" platformID="1" platEncID="0" langID="0x0" unicode="True">
      Safe
    </namerecord>
    <namerecord nameID="1" platformID="1" platEncID="0" langID="0x0" unicode="True">
      Kitten
    </namerecord>
    <namerecord nameID="2" platformID="1" platEncID="0" langID="0x0" unicode="True">
      Calico
    </namerecord>
    <namerecord nameID="3" platformID="1" platEncID="0" langID="0x0" unicode="True">
      KittenCalico:1135918450
    </namerecord>
    <namerecord nameID="4" platformID="1" platEncID="0" langID="0x0" unicode="True">
      KittenCalico
    </namerecord>
    <namerecord nameID="5" platformID="1" platEncID="0" langID="0x0" unicode="True">
      1.00
    </namerecord>
    <namerecord nameID="6" platformID="1" platEncID="0" langID="0x0" unicode="True">
      KittenCalico
    </namerecord>
        
    <namerecord nameID="0" platformID="3" platEncID="1" langID="0x409">
      Safe
    </namerecord>
    <namerecord nameID="1" platformID="3" platEncID="1" langID="0x409">
      Kitten Calico
    </namerecord>
    <namerecord nameID="2" platformID="3" platEncID="1" langID="0x409">
      Vanilla
    </namerecord>
    <namerecord nameID="3" platformID="3" platEncID="1" langID="0x409">
      KittenCalico:1135918450
    </namerecord>
    <namerecord nameID="4" platformID="3" platEncID="1" langID="0x409">
      KittenCalico
    </namerecord>
    <namerecord nameID="5" platformID="3" platEncID="1" langID="0x409">
      1.00
    </namerecord>
    <namerecord nameID="6" platformID="3" platEncID="1" langID="0x409">
      KittenCalico
    </namerecord>
  </name>

</xmlalter>

I was attempting to get an XML output that looks like this:

<?xml version='1.0' encoding='utf-8'?>
<xmlalter xmlaVersion="Alpha">

  <name>
    <namerecord nameID="0" platformID="0" platEncID="0" langID="0x0">
     Safe
    </namerecord>
    <namerecord nameID="1" platformID="0" platEncID="0" langID="0x0">
     Kitten
    </namerecord>
    <namerecord nameID="2" platformID="0" platEncID="0" langID="0x0">
     Calico
    </namerecord>
    <namerecord nameID="3" platformID="0" platEncID="0" langID="0x0">
     KittenCalico:1135918450
    </namerecord>
    <namerecord nameID="4" platformID="0" platEncID="0" langID="0x0">
     KittenCalico
    </namerecord>
    <namerecord nameID="5" platformID="0" platEncID="0" langID="0x0">
     1.00
    </namerecord>
    <namerecord nameID="6" platformID="0" platEncID="0" langID="0x0">
    KittenCalico
    </namerecord>
    <namerecord nameID="8" platformID="0" platEncID="0" langID="0x0">
    New Entry
    </namerecord>
    
    <namerecord nameID="0" platformID="1" platEncID="0" langID="0x0" unicode="True">
     Safe
    </namerecord>
    <namerecord nameID="1" platformID="1" platEncID="0" langID="0x0" unicode="True">
     Kitten
    </namerecord>
    <namerecord nameID="2" platformID="1" platEncID="0" langID="0x0" unicode="True">
     Calico
    </namerecord>
    <namerecord nameID="3" platformID="1" platEncID="0" langID="0x0" unicode="True">
     KittenCalico:1135918450
    </namerecord>
    <namerecord nameID="4" platformID="1" platEncID="0" langID="0x0" unicode="True">
     KittenCalico
    </namerecord>
    <namerecord nameID="5" platformID="1" platEncID="0" langID="0x0" unicode="True">
     1.00
    </namerecord>
    <namerecord nameID="6" platformID="1" platEncID="0" langID="0x0" unicode="True">
     KittenCalico
    </namerecord>
    <namerecord nameID="8" platformID="1" platEncID="0" langID="0x0" unicode="True">
    New Entry
    </namerecord>

    <namerecord nameID="0" platformID="3" platEncID="1" langID="0x409">
     Safe
    </namerecord>
    <namerecord nameID="1" platformID="3" platEncID="1" langID="0x409">
     Kitten Calico
    </namerecord>
    <namerecord nameID="2" platformID="3" platEncID="1" langID="0x409">
     Vanilla
    </namerecord>
    <namerecord nameID="3" platformID="3" platEncID="1" langID="0x409">
     KittenCalico:1135918450
    </namerecord>
    <namerecord nameID="4" platformID="3" platEncID="1" langID="0x409">
     KittenCalico
    </namerecord>
    <namerecord nameID="5" platformID="3" platEncID="1" langID="0x409">
     1.00
    </namerecord>
    <namerecord nameID="6" platformID="3" platEncID="1" langID="0x409">
     KittenCalico
    </namerecord>
    <namerecord nameID="8" platformID="3" platEncID="1" langID="0x409">
    New Entry
    </namerecord>
  </name>

</xmlalter>

Also some XML files will contain nameID="7" and nameIDs past 8, like nameID="9", nameID="10", etc., so the nameID=8 will not necessarily be at the end of each platformID, but in between the sequential nameID elements.

I am not sure how to write code to achieve this. Any help would be appreciated, thank you!


Updated 2024-11-20. Thank you @LMC!, I integrated your new code and it worked on the file example I gave you.
I was challenged when I tried it on another input file, which also might be a possible variation on the format. Apologies for not posting this example initially.

Here is the new example XML input file:

<?xml version="1.0" encoding="UTF-8"?>
<xmlalter xmlaVersion="Alpha">

   <name>
    <namerecord nameID="1" platformID="1" platEncID="0" langID="0x0" unicode="True">
      Puppy
    </namerecord>
    <namerecord nameID="2" platformID="1" platEncID="0" langID="0x0" unicode="True">
      Vanilla
    </namerecord>
    <namerecord nameID="4" platformID="1" platEncID="0" langID="0x0" unicode="True">
      Puppy
    </namerecord>
    <namerecord nameID="5" platformID="1" platEncID="0" langID="0x0" unicode="True">
      Kennel
    </namerecord>
    <namerecord nameID="6" platformID="1" platEncID="0" langID="0x0" unicode="True">
      PuppyKennel
    </namerecord>
    <namerecord nameID="0" platformID="3" platEncID="1" langID="0x409">
      Safe
    </namerecord>
    <namerecord nameID="1" platformID="3" platEncID="1" langID="0x409">
      Puppy
    </namerecord>
    <namerecord nameID="2" platformID="3" platEncID="1" langID="0x409">
      Vanilla
    </namerecord>
    <namerecord nameID="3" platformID="3" platEncID="1" langID="0x409">
      Kennel;Puppy
    </namerecord>
    <namerecord nameID="4" platformID="3" platEncID="1" langID="0x409">
      Puppy
    </namerecord>
    <namerecord nameID="5" platformID="3" platEncID="1" langID="0x409">
      Kennel
    </namerecord>
    <namerecord nameID="6" platformID="3" platEncID="1" langID="0x409">
      PuppyKennel
    </namerecord>
    <namerecord nameID="7" platformID="3" platEncID="1" langID="0x409">
      Safe
    </namerecord>
    <namerecord nameID="9" platformID="3" platEncID="1" langID="0x409">
      Spot
    </namerecord>
    <namerecord nameID="10" platformID="3" platEncID="1" langID="0x409">
      
Puppies are cute

and huggable

    </namerecord>

  </name>

</xmlalter>

Solution

  • The document is already sorted so the insertion index should be the index of the last element with nameID < 8. Additionally, element.tail field was used to add proper indentation.

    import xml.etree.ElementTree as ET
    import copy
    
    doc = ET.parse("/home/lmc/tmp/tmp2.xml")
    
    elpar = doc.getroot().find(".//namerecord[1]/..")
    
    elements = doc.getroot().findall(f".//namerecord[@platformID]")
    
    platId_nameId = {}
    count = 0
    for platformID in ['0', '1', '3']:
        platId_nameId[platformID] = [el for el in elements if el.get('platformID') == platformID]
    
        if len(platId_nameId[platformID]) == 0:
             continue
    
        haystack = [x for x in platId_nameId[platformID] if int(x.get("nameID")) < 8]
        idx = len(haystack) + count
    
        new_element = copy.deepcopy(haystack[-1])
        new_element.set("nameID", "8")
        
        new_element.text = elements[0].tail + " some text" + elements[0].tail
        new_element.tail = elements[0].tail
    
        print(f"Insertion index:  {idx}")
        elpar.insert(idx, new_element)
        
        count += len(platId_nameId[platformID]) + 1
    
    print(ET.tostring(elpar).decode('utf-8'))
    

    showing the relevant fragment

    <name>
      <!-- more elements -->
        <namerecord nameID="6" platformID="0" platEncID="0" langID="0x0">
          KittenCalico
        </namerecord>
        <namerecord nameID="8" platformID="0" platEncID="0" langID="0x0">
         some text
        </namerecord>
        <namerecord nameID="9" platformID="0" platEncID="0" langID="0x0">
          KittenCalico
        </namerecord>
    
    <!-- more elements -->
        <namerecord nameID="6" platformID="1" platEncID="0" langID="0x0" unicode="True">
          KittenCalico
        </namerecord>
    
        <namerecord nameID="8" platformID="1" platEncID="0" langID="0x0">
         some text
        </namerecord>
        <!-- more elements -->
        <namerecord nameID="6" platformID="3" platEncID="1" langID="0x409">
          KittenCalico
        </namerecord>
      <namerecord nameID="8" platformID="3" platEncID="0" langID="0x0">
         some text
        </namerecord>
    </name>
    

    The new element was inserted between 6 and 9

    Relevant source doc fragment:

        <namerecord nameID="6" platformID="0" platEncID="0" langID="0x0">
          KittenCalico
        </namerecord>
        <namerecord nameID="9" platformID="0" platEncID="0" langID="0x0">
          KittenCalico
        </namerecord>
    

    XML indexes start at 1 as can be seen inspecting the document with pyxml2xpath utility.

    pyxml2xpath tmp2.xml xpath '//namerecord[@platformID="0" and @nameID < 8]' False 100 True
    
    /xmlalter/name/namerecord[1]
    /xmlalter/name/namerecord[2]
    /xmlalter/name/namerecord[3]
    /xmlalter/name/namerecord[4]
    /xmlalter/name/namerecord[5]
    /xmlalter/name/namerecord[6]
    /xmlalter/name/namerecord[7]