pythonelementtree

ElementTree.find() seems to miss recently appended elements


I can't wrap my head around this one. Why is find() not finding the appended element when it can find the exact same element when it is loaded from an XML string?

The following function creates an element and adds subelements to it.

def create_element():
    d_004 = ET.Element("marcx:datafield", {"ind1": "0", "ind2": "0", "tag": "004"})
    d_004_a = ET.Element("marcx:subfield", {"code": "a"})
    d_004_a.text = "e"
    d_004.append(d_004_a)
    d_004_r = ET.Element("marcx:subfield", {"code": "r"})
    d_004_r.text = "n"
    d_004.append(d_004_r)
    return d_004

When I call that and then tries to find() it, it does not find it, so another element is appended:

def test_append_twice():
    namespaces = {"marcx": "info:lc/xmlns/marcxchange-v1"}
    my_element = ET.Element("MyElement")
    my_element.append(copy.deepcopy(d_004))
    added_element = my_element.find('./marcx:datafield[@tag="004"]', namespaces)
    if not added_element:
        my_element.append(create_element())
    assert len(list(my_element.iter())) == 4 # Returns 7 instead of 4

I wrote a test (below) for the same thing when I read the thing from a string. Now find() works as expected.

def test_find_from_string():
    namespaces = {"marcx": "info:lc/xmlns/marcxchange-v1"}
    element_str= """<?xml version="1.0" encoding="UTF-8"?>
        <MyElement>
            <marcx:datafield xmlns:marcx="info:lc/xmlns/marcxchange-v1" ind1="0" ind2="0" tag="004">
                <marcx:subfield code="a">e</marcx:subfield>
                <marcx:subfield code="r">n</marcx:subfield>
            </marcx:datafield>
        </MyElement>"""
    my_element= ET.ElementTree(ET.fromstring(element_str))
    added_element = my_element.find('./marcx:datafield[@tag="004"]', namespaces)
    if not added_element:
        my_element.append(create_element())
    assert len(list(my_element.iter())) == 4

What is going on? What am I missing? Has it to do with namespaces somehow?


Solution

  • You have to use QName when creating the elements:

    def create_element():
        ET.register_namespace("marcx", "info:lc/xmlns/marcxchange-v1")
        d_004 = ET.Element(
            ET.QName("info:lc/xmlns/marcxchange-v1", "datafield"),
            {"ind1": "0", "ind2": "0", "tag": "004"},
        )
        d_004_a = ET.Element(
            ET.QName("info:lc/xmlns/marcxchange-v1", "subfield"), {"code": "a"}
        )
        d_004_a.text = "e"
        d_004.append(d_004_a)
        d_004_r = ET.Element(
        ET.QName("info:lc/xmlns/marcxchange-v1", "subfield"), {"code": "r"}
        )
        d_004_r.text = "n"
        d_004.append(d_004_r)
        return d_004