I am new to Python and I am attempting to create a script that will open an XML file and insert an XML element into the proper place within a sequence of XML elements.
So far, I have this code:
import os
import xml.etree.ElementTree as ET
def process_xml_files(source_dir, target_dir):
# Create the target directory if it doesn't exist
if not os.path.exists(target_dir):
os.makedirs(target_dir)
# Iterate through all files in the source directory
for filename in os.listdir(source_dir):
if filename.endswith('.xml'):
file_path = os.path.join(source_dir, filename)
tree = ET.parse(file_path)
root = tree.getroot()
# Process each platformID group
for platformID in ['0', '1', '3']:
elements = root.findall(f".//namerecord[@platformID='{platformID}']")
if elements:
nameID_elements = {int(el.get('nameID')): el for el in elements}
if 8 not in nameID_elements:
# Create a new element with nameID="8"
new_element = ET.Element('namerecord', attrib={
'nameID': '8',
'platformID': platformID,
'platEncID': elements[0].get('platEncID'),
'langID': elements[0].get('langID'),
})
new_element.text = "\r" " " "New Entry" "\r" " "
# Insert the new element in the correct position
sorted_nameIDs = sorted(list(nameID_elements.keys()) + [8])
insert_index = sorted_nameIDs.index(8)
# Find the parent element
parent = root.find(".//name")
parent.insert(insert_index, new_element)
# Write the modified XML to the target directory
target_file_path = os.path.join(target_dir, filename)
tree.write(target_file_path, encoding='utf-8', xml_declaration=True)
source_directory = input("Enter the source directory: ")
target_directory = input("Enter the target directory: ")
process_xml_files(source_directory, target_directory)
Here is an example of the input file requiring the insertion. Note that the platformIDs are grouped together and in sequential order by nameID:
<?xml version="1.0" encoding="UTF-8"?>
<xmlalter xmlaVersion="Alpha">
<name>
<namerecord nameID="0" platformID="0" platEncID="0" langID="0x0">
Safe
</namerecord>
<namerecord nameID="1" platformID="0" platEncID="0" langID="0x0">
Kitten
</namerecord>
<namerecord nameID="2" platformID="0" platEncID="0" langID="0x0">
Calico
</namerecord>
<namerecord nameID="3" platformID="0" platEncID="0" langID="0x0">
KittenCalico:1135918450
</namerecord>
<namerecord nameID="4" platformID="0" platEncID="0" langID="0x0">
KittenCalico
</namerecord>
<namerecord nameID="5" platformID="0" platEncID="0" langID="0x0">
1.00
</namerecord>
<namerecord nameID="6" platformID="0" platEncID="0" langID="0x0">
KittenCalico
</namerecord>
<namerecord nameID="0" platformID="1" platEncID="0" langID="0x0" unicode="True">
Safe
</namerecord>
<namerecord nameID="1" platformID="1" platEncID="0" langID="0x0" unicode="True">
Kitten
</namerecord>
<namerecord nameID="2" platformID="1" platEncID="0" langID="0x0" unicode="True">
Calico
</namerecord>
<namerecord nameID="3" platformID="1" platEncID="0" langID="0x0" unicode="True">
KittenCalico:1135918450
</namerecord>
<namerecord nameID="4" platformID="1" platEncID="0" langID="0x0" unicode="True">
KittenCalico
</namerecord>
<namerecord nameID="5" platformID="1" platEncID="0" langID="0x0" unicode="True">
1.00
</namerecord>
<namerecord nameID="6" platformID="1" platEncID="0" langID="0x0" unicode="True">
KittenCalico
</namerecord>
<namerecord nameID="0" platformID="3" platEncID="1" langID="0x409">
Safe
</namerecord>
<namerecord nameID="1" platformID="3" platEncID="1" langID="0x409">
Kitten Calico
</namerecord>
<namerecord nameID="2" platformID="3" platEncID="1" langID="0x409">
Vanilla
</namerecord>
<namerecord nameID="3" platformID="3" platEncID="1" langID="0x409">
KittenCalico:1135918450
</namerecord>
<namerecord nameID="4" platformID="3" platEncID="1" langID="0x409">
KittenCalico
</namerecord>
<namerecord nameID="5" platformID="3" platEncID="1" langID="0x409">
1.00
</namerecord>
<namerecord nameID="6" platformID="3" platEncID="1" langID="0x409">
KittenCalico
</namerecord>
</name>
</xmlalter>
This Python code has produced the following. Note that the new entries are not grouped in the platformIDs in sequential order by nameID:
<?xml version='1.0' encoding='utf-8'?>
<xmlalter xmlaVersion="Alpha">
<name>
<namerecord nameID="0" platformID="0" platEncID="0" langID="0x0">
Safe
</namerecord>
<namerecord nameID="1" platformID="0" platEncID="0" langID="0x0">
Kitten
</namerecord>
<namerecord nameID="2" platformID="0" platEncID="0" langID="0x0">
Calico
</namerecord>
<namerecord nameID="3" platformID="0" platEncID="0" langID="0x0">
KittenCalico:1135918450
</namerecord>
<namerecord nameID="4" platformID="0" platEncID="0" langID="0x0">
KittenCalico
</namerecord>
<namerecord nameID="5" platformID="0" platEncID="0" langID="0x0">
1.00
</namerecord>
<namerecord nameID="6" platformID="0" platEncID="0" langID="0x0">
KittenCalico
</namerecord>
<namerecord nameID="8" platformID="3" platEncID="1" langID="0x409">
New Entry
</namerecord><namerecord nameID="8" platformID="1" platEncID="0" langID="0x0">
New Entry
</namerecord><namerecord nameID="8" platformID="0" platEncID="0" langID="0x0">
New Entry
</namerecord><namerecord nameID="0" platformID="1" platEncID="0" langID="0x0" unicode="True">
Safe
</namerecord>
<namerecord nameID="1" platformID="1" platEncID="0" langID="0x0" unicode="True">
Kitten
</namerecord>
<namerecord nameID="2" platformID="1" platEncID="0" langID="0x0" unicode="True">
Calico
</namerecord>
<namerecord nameID="3" platformID="1" platEncID="0" langID="0x0" unicode="True">
KittenCalico:1135918450
</namerecord>
<namerecord nameID="4" platformID="1" platEncID="0" langID="0x0" unicode="True">
KittenCalico
</namerecord>
<namerecord nameID="5" platformID="1" platEncID="0" langID="0x0" unicode="True">
1.00
</namerecord>
<namerecord nameID="6" platformID="1" platEncID="0" langID="0x0" unicode="True">
KittenCalico
</namerecord>
<namerecord nameID="0" platformID="3" platEncID="1" langID="0x409">
Safe
</namerecord>
<namerecord nameID="1" platformID="3" platEncID="1" langID="0x409">
Kitten Calico
</namerecord>
<namerecord nameID="2" platformID="3" platEncID="1" langID="0x409">
Vanilla
</namerecord>
<namerecord nameID="3" platformID="3" platEncID="1" langID="0x409">
KittenCalico:1135918450
</namerecord>
<namerecord nameID="4" platformID="3" platEncID="1" langID="0x409">
KittenCalico
</namerecord>
<namerecord nameID="5" platformID="3" platEncID="1" langID="0x409">
1.00
</namerecord>
<namerecord nameID="6" platformID="3" platEncID="1" langID="0x409">
KittenCalico
</namerecord>
</name>
</xmlalter>
I was attempting to get an XML output that looks like this:
<?xml version='1.0' encoding='utf-8'?>
<xmlalter xmlaVersion="Alpha">
<name>
<namerecord nameID="0" platformID="0" platEncID="0" langID="0x0">
Safe
</namerecord>
<namerecord nameID="1" platformID="0" platEncID="0" langID="0x0">
Kitten
</namerecord>
<namerecord nameID="2" platformID="0" platEncID="0" langID="0x0">
Calico
</namerecord>
<namerecord nameID="3" platformID="0" platEncID="0" langID="0x0">
KittenCalico:1135918450
</namerecord>
<namerecord nameID="4" platformID="0" platEncID="0" langID="0x0">
KittenCalico
</namerecord>
<namerecord nameID="5" platformID="0" platEncID="0" langID="0x0">
1.00
</namerecord>
<namerecord nameID="6" platformID="0" platEncID="0" langID="0x0">
KittenCalico
</namerecord>
<namerecord nameID="8" platformID="0" platEncID="0" langID="0x0">
New Entry
</namerecord>
<namerecord nameID="0" platformID="1" platEncID="0" langID="0x0" unicode="True">
Safe
</namerecord>
<namerecord nameID="1" platformID="1" platEncID="0" langID="0x0" unicode="True">
Kitten
</namerecord>
<namerecord nameID="2" platformID="1" platEncID="0" langID="0x0" unicode="True">
Calico
</namerecord>
<namerecord nameID="3" platformID="1" platEncID="0" langID="0x0" unicode="True">
KittenCalico:1135918450
</namerecord>
<namerecord nameID="4" platformID="1" platEncID="0" langID="0x0" unicode="True">
KittenCalico
</namerecord>
<namerecord nameID="5" platformID="1" platEncID="0" langID="0x0" unicode="True">
1.00
</namerecord>
<namerecord nameID="6" platformID="1" platEncID="0" langID="0x0" unicode="True">
KittenCalico
</namerecord>
<namerecord nameID="8" platformID="1" platEncID="0" langID="0x0" unicode="True">
New Entry
</namerecord>
<namerecord nameID="0" platformID="3" platEncID="1" langID="0x409">
Safe
</namerecord>
<namerecord nameID="1" platformID="3" platEncID="1" langID="0x409">
Kitten Calico
</namerecord>
<namerecord nameID="2" platformID="3" platEncID="1" langID="0x409">
Vanilla
</namerecord>
<namerecord nameID="3" platformID="3" platEncID="1" langID="0x409">
KittenCalico:1135918450
</namerecord>
<namerecord nameID="4" platformID="3" platEncID="1" langID="0x409">
KittenCalico
</namerecord>
<namerecord nameID="5" platformID="3" platEncID="1" langID="0x409">
1.00
</namerecord>
<namerecord nameID="6" platformID="3" platEncID="1" langID="0x409">
KittenCalico
</namerecord>
<namerecord nameID="8" platformID="3" platEncID="1" langID="0x409">
New Entry
</namerecord>
</name>
</xmlalter>
Also some XML files will contain nameID="7"
and nameIDs past 8, like nameID="9"
, nameID="10"
, etc., so the nameID=8
will not necessarily be at the end of each platformID, but in between the sequential nameID elements.
I am not sure how to write code to achieve this. Any help would be appreciated, thank you!
Updated 2024-11-20. Thank you @LMC!, I integrated your new code and it worked on the file example I gave you.
I was challenged when I tried it on another input file, which also might be a possible variation on the format. Apologies for not posting this example initially.
Here is the new example XML input file:
<?xml version="1.0" encoding="UTF-8"?>
<xmlalter xmlaVersion="Alpha">
<name>
<namerecord nameID="1" platformID="1" platEncID="0" langID="0x0" unicode="True">
Puppy
</namerecord>
<namerecord nameID="2" platformID="1" platEncID="0" langID="0x0" unicode="True">
Vanilla
</namerecord>
<namerecord nameID="4" platformID="1" platEncID="0" langID="0x0" unicode="True">
Puppy
</namerecord>
<namerecord nameID="5" platformID="1" platEncID="0" langID="0x0" unicode="True">
Kennel
</namerecord>
<namerecord nameID="6" platformID="1" platEncID="0" langID="0x0" unicode="True">
PuppyKennel
</namerecord>
<namerecord nameID="0" platformID="3" platEncID="1" langID="0x409">
Safe
</namerecord>
<namerecord nameID="1" platformID="3" platEncID="1" langID="0x409">
Puppy
</namerecord>
<namerecord nameID="2" platformID="3" platEncID="1" langID="0x409">
Vanilla
</namerecord>
<namerecord nameID="3" platformID="3" platEncID="1" langID="0x409">
Kennel;Puppy
</namerecord>
<namerecord nameID="4" platformID="3" platEncID="1" langID="0x409">
Puppy
</namerecord>
<namerecord nameID="5" platformID="3" platEncID="1" langID="0x409">
Kennel
</namerecord>
<namerecord nameID="6" platformID="3" platEncID="1" langID="0x409">
PuppyKennel
</namerecord>
<namerecord nameID="7" platformID="3" platEncID="1" langID="0x409">
Safe
</namerecord>
<namerecord nameID="9" platformID="3" platEncID="1" langID="0x409">
Spot
</namerecord>
<namerecord nameID="10" platformID="3" platEncID="1" langID="0x409">
Puppies are cute
and huggable
</namerecord>
</name>
</xmlalter>
The document is already sorted so the insertion index should be the index of the last element with nameID < 8. Additionally, element.tail
field was used to add proper indentation.
import xml.etree.ElementTree as ET
import copy
doc = ET.parse("/home/lmc/tmp/tmp2.xml")
elpar = doc.getroot().find(".//namerecord[1]/..")
elements = doc.getroot().findall(f".//namerecord[@platformID]")
platId_nameId = {}
count = 0
for platformID in ['0', '1', '3']:
platId_nameId[platformID] = [el for el in elements if el.get('platformID') == platformID]
if len(platId_nameId[platformID]) == 0:
continue
haystack = [x for x in platId_nameId[platformID] if int(x.get("nameID")) < 8]
idx = len(haystack) + count
new_element = copy.deepcopy(haystack[-1])
new_element.set("nameID", "8")
new_element.text = elements[0].tail + " some text" + elements[0].tail
new_element.tail = elements[0].tail
print(f"Insertion index: {idx}")
elpar.insert(idx, new_element)
count += len(platId_nameId[platformID]) + 1
print(ET.tostring(elpar).decode('utf-8'))
showing the relevant fragment
<name>
<!-- more elements -->
<namerecord nameID="6" platformID="0" platEncID="0" langID="0x0">
KittenCalico
</namerecord>
<namerecord nameID="8" platformID="0" platEncID="0" langID="0x0">
some text
</namerecord>
<namerecord nameID="9" platformID="0" platEncID="0" langID="0x0">
KittenCalico
</namerecord>
<!-- more elements -->
<namerecord nameID="6" platformID="1" platEncID="0" langID="0x0" unicode="True">
KittenCalico
</namerecord>
<namerecord nameID="8" platformID="1" platEncID="0" langID="0x0">
some text
</namerecord>
<!-- more elements -->
<namerecord nameID="6" platformID="3" platEncID="1" langID="0x409">
KittenCalico
</namerecord>
<namerecord nameID="8" platformID="3" platEncID="0" langID="0x0">
some text
</namerecord>
</name>
The new element was inserted between 6 and 9
Relevant source doc fragment:
<namerecord nameID="6" platformID="0" platEncID="0" langID="0x0">
KittenCalico
</namerecord>
<namerecord nameID="9" platformID="0" platEncID="0" langID="0x0">
KittenCalico
</namerecord>
XML indexes start at 1 as can be seen inspecting the document with pyxml2xpath
utility.
pyxml2xpath tmp2.xml xpath '//namerecord[@platformID="0" and @nameID < 8]' False 100 True
/xmlalter/name/namerecord[1]
/xmlalter/name/namerecord[2]
/xmlalter/name/namerecord[3]
/xmlalter/name/namerecord[4]
/xmlalter/name/namerecord[5]
/xmlalter/name/namerecord[6]
/xmlalter/name/namerecord[7]