XML parsing in python issue using elementTree

I need to parse a soap response and convert to a text file. I am trying to parse the values as detailed below. I am using ElementTree in python

I have the below xml response which I need to parse

<soapenv:Envelope xmlns:soapenv="http://schemas.xmlsoap.org/soap/envelope/" xmlns:tmf854="tmf854.v1" xmlns:alu="alu.v1">
  <soapenv:Header>
    <tmf854:header>
      <tmf854:activityName>query</tmf854:activityName>
      <tmf854:msgName>queryResponse</tmf854:msgName>
      <tmf854:msgType>RESPONSE</tmf854:msgType>
      <tmf854:senderURI>https:/destinationhost:8443/tmf854/services</tmf854:senderURI>
      <tmf854:destinationURI>https://localhost:8443</tmf854:destinationURI>
      <tmf854:activityStatus>SUCCESS</tmf854:activityStatus>
      <tmf854:correlationId>1</tmf854:correlationId>
      <tmf854:communicationPattern>MultipleBatchResponse</tmf854:communicationPattern>
      <tmf854:communicationStyle>RPC</tmf854:communicationStyle>
      <tmf854:requestedBatchSize>1500</tmf854:requestedBatchSize>
      <tmf854:batchSequenceNumber>1</tmf854:batchSequenceNumber>
      <tmf854:batchSequenceEndOfReply>true</tmf854:batchSequenceEndOfReply>
      <tmf854:iteratorReferenceURI>http://9195985371165397084</tmf854:iteratorReferenceURI>
      <tmf854:timestamp>20220915222121.472+0530</tmf854:timestamp>
    </tmf854:header>
  </soapenv:Header>
  <soapenv:Body>
    <queryResponse xmlns="alu.v1">
      <queryObjectData>
        <queryObject>
          <name>
            <tmf854:mdNm>AMS</tmf854:mdNm>
            <tmf854:meNm>CHEERLAVANCHA_281743</tmf854:meNm>
            <tmf854:ptpNm>/type=NE/CHEERLAVANCHA_281743</tmf854:ptpNm>
          </name>
          <vendorExtensions>
            <package>
              <NameAndStringValue>
                <tmf854:name>hubSubtendedStatus</tmf854:name>
                <tmf854:value>NONE</tmf854:value>
              </NameAndStringValue>
              <NameAndStringValue>
                <tmf854:name>productAndRelease</tmf854:name>
                <tmf854:value>DF.6.1</tmf854:value>
              </NameAndStringValue>
              <NameAndStringValue>
                <tmf854:name>adminUserName</tmf854:name>
                <tmf854:value>isadmin</tmf854:value>
              </NameAndStringValue>
              <NameAndStringValue>
           </package>
          </vendorExtensions>
        </queryObject>
      </queryObjectData>
     </queryResponse>
 </soapenv:Body>
</soapenv:Envelope>

JavaScript
​x
 
<soapenv:Envelope xmlns:soapenv="http://schemas.xmlsoap.org/soap/envelope/" xmlns:tmf854="tmf854.v1" xmlns:alu="alu.v1">
  <soapenv:Header>
    <tmf854:header>
      <tmf854:activityName>query</tmf854:activityName>
      <tmf854:msgName>queryResponse</tmf854:msgName>
      <tmf854:msgType>RESPONSE</tmf854:msgType>
      <tmf854:senderURI>https:/destinationhost:8443/tmf854/services</tmf854:senderURI>
      <tmf854:destinationURI>https://localhost:8443</tmf854:destinationURI>
      <tmf854:activityStatus>SUCCESS</tmf854:activityStatus>
      <tmf854:correlationId>1</tmf854:correlationId>
      <tmf854:communicationPattern>MultipleBatchResponse</tmf854:communicationPattern>
      <tmf854:communicationStyle>RPC</tmf854:communicationStyle>
      <tmf854:requestedBatchSize>1500</tmf854:requestedBatchSize>
      <tmf854:batchSequenceNumber>1</tmf854:batchSequenceNumber>
      <tmf854:batchSequenceEndOfReply>true</tmf854:batchSequenceEndOfReply>
      <tmf854:iteratorReferenceURI>http://9195985371165397084</tmf854:iteratorReferenceURI>
      <tmf854:timestamp>20220915222121.472+0530</tmf854:timestamp>
    </tmf854:header>
  </soapenv:Header>
  <soapenv:Body>
    <queryResponse xmlns="alu.v1">
      <queryObjectData>
        <queryObject>
          <name>
            <tmf854:mdNm>AMS</tmf854:mdNm>
            <tmf854:meNm>CHEERLAVANCHA_281743</tmf854:meNm>
            <tmf854:ptpNm>/type=NE/CHEERLAVANCHA_281743</tmf854:ptpNm>
          </name>
          <vendorExtensions>
            <package>
              <NameAndStringValue>
                <tmf854:name>hubSubtendedStatus</tmf854:name>
                <tmf854:value>NONE</tmf854:value>
              </NameAndStringValue>
              <NameAndStringValue>
                <tmf854:name>productAndRelease</tmf854:name>
                <tmf854:value>DF.6.1</tmf854:value>
              </NameAndStringValue>
              <NameAndStringValue>
                <tmf854:name>adminUserName</tmf854:name>
                <tmf854:value>isadmin</tmf854:value>
              </NameAndStringValue>
              <NameAndStringValue>
           </package>
          </vendorExtensions>
        </queryObject>
      </queryObjectData>
     </queryResponse>
 </soapenv:Body>
</soapenv:Envelope>
​
​

I need to use the below code snippet.

parser = ElementTree.parse("response.txt")
            root = parser.getroot()
            inventoryObjectData = root.find(".//{alu.v1}queryObjectData")
            for inventoryObject in inventoryObjectData:
                for device in inventoryObject:
                    if (device.tag.split("}")[1]) == "me":
                        vendorExtensionsNames = []
                        vendorExtensionsValues = []
                        if device.find(".//{tmf854.v1}mdNm") is not None:
                            mdnm = device.find(".//{tmf854.v1}mdNm").text
                        if device.find(".//{tmf854.v1}meNm") is not None:
                            menm = device.find(".//{tmf854.v1}meNm").text
                        if device.find(".//{tmf854.v1}userLabel") is not None:
                            userlabel = device.find(".//{tmf854.v1}userLabel").text
                        if device.find(".//{tmf854.v1}resourceState") is not None:
                            resourcestate = device.find(".//{tmf854.v1}resourceState").text
                        if device.find(".//{tmf854.v1}location") is not None:
                            location = device.find(".//{tmf854.v1}location").text
                        if device.find(".//{tmf854.v1}manufacturer") is not None:
                            manufacturer = device.find(".//{tmf854.v1}manufacturer").text
                        if device.find(".//{tmf854.v1}productName") is not None:
                            productname = device.find(".//{tmf854.v1}productName").text
                        if device.find(".//{tmf854.v1}version") is not None:
                            version = device.find(".//{tmf854.v1}version").text
                        vendorExtensions = device.find("vendorExtensions")
                        vendorExtensionsNamesElements = vendorExtensions.findall(".//{tmf854.v1}name")
                        for i in vendorExtensionsNamesElements:
                            vendorExtensionsNames.append(i.text.strip())
                         vendorExtensionsValuesElements = vendorExtensions.findall(".//{tmf854.v1}value")
                        for i in vendorExtensionsValuesElements:
                            vendorExtensionsValues.append(str(i.text or "").strip())

                        alu = ""
                        for i in vendorExtensions:
                            if i.attrib:
                                if alu == "":
                                    alu = i.attrib.get("{alu.v1}name")
                                else:
                                    alu = alu + "|" + i.attrib.get("{alu.v1}name")

JavaScript
 
parser = ElementTree.parse("response.txt")
            root = parser.getroot()
            inventoryObjectData = root.find(".//{alu.v1}queryObjectData")
            for inventoryObject in inventoryObjectData:
                for device in inventoryObject:
                    if (device.tag.split("}")[1]) == "me":
                        vendorExtensionsNames = []
                        vendorExtensionsValues = []
                        if device.find(".//{tmf854.v1}mdNm") is not None:
                            mdnm = device.find(".//{tmf854.v1}mdNm").text
                        if device.find(".//{tmf854.v1}meNm") is not None:
                            menm = device.find(".//{tmf854.v1}meNm").text
                        if device.find(".//{tmf854.v1}userLabel") is not None:
                            userlabel = device.find(".//{tmf854.v1}userLabel").text
                        if device.find(".//{tmf854.v1}resourceState") is not None:
                            resourcestate = device.find(".//{tmf854.v1}resourceState").text
                        if device.find(".//{tmf854.v1}location") is not None:
                            location = device.find(".//{tmf854.v1}location").text
                        if device.find(".//{tmf854.v1}manufacturer") is not None:
                            manufacturer = device.find(".//{tmf854.v1}manufacturer").text
                        if device.find(".//{tmf854.v1}productName") is not None:
                            productname = device.find(".//{tmf854.v1}productName").text
                        if device.find(".//{tmf854.v1}version") is not None:
                            version = device.find(".//{tmf854.v1}version").text
                        vendorExtensions = device.find("vendorExtensions")
                        vendorExtensionsNamesElements = vendorExtensions.findall(".//{tmf854.v1}name")
                        for i in vendorExtensionsNamesElements:
                            vendorExtensionsNames.append(i.text.strip())
                         vendorExtensionsValuesElements = vendorExtensions.findall(".//{tmf854.v1}value")
                        for i in vendorExtensionsValuesElements:
                            vendorExtensionsValues.append(str(i.text or "").strip())
​
                        alu = ""
                        for i in vendorExtensions:
                            if i.attrib:
                                if alu == "":
                                    alu = i.attrib.get("{alu.v1}name")
                                else:
                                    alu = alu + "|" + i.attrib.get("{alu.v1}name")
​

The issue is that The below code is not able to find the ‘vendorExtensions”‘. Please help here.

vendorExtensions = device.find("vendorExtensions")

JavaScript
 
vendorExtensions = device.find("vendorExtensions")
​

Have tried the below as well

vendorExtensions = device.find(".//queryObject/vendorExtensions")

JavaScript
 
vendorExtensions = device.find(".//queryObject/vendorExtensions")
​

Answer

Your document declares a default namespace of alu.v1:

<queryResponse xmlns="alu.v1">
...
</queryResponse>

JavaScript
 
<queryResponse xmlns="alu.v1">
...
</queryResponse>
​

Any attribute without an explicit namespace is in the alu.v1 namespace. You need to qualify your attribute name appropriately:

vendorExtensions = device.find("{alu.v1}vendorExtensions")

JavaScript
 
vendorExtensions = device.find("{alu.v1}vendorExtensions")
​

While the above is a real problem with your code that needs to be corrected (the Wikipedia entry on XML namespaces may be useful reading if you’re unfamiliar with how namespaces work), there are also some logic problems with your code.

Let’s drop the big list of conditionals from the code and see if it’s actually doing what we think it’s doing. If we run this:

from xml.etree import ElementTree

parser = ElementTree.parse("data.xml")
root = parser.getroot()
queryObjectData = root.find(".//{alu.v1}queryObjectData")
for queryObject in queryObjectData:
    for device in queryObject:
        print(device.tag)

JavaScript
 
from xml.etree import ElementTree
​
parser = ElementTree.parse("data.xml")
root = parser.getroot()
queryObjectData = root.find(".//{alu.v1}queryObjectData")
for queryObject in queryObjectData:
    for device in queryObject:
        print(device.tag)
​

Then using your sample data (once it has been corrected to be syntactically valid), we see as output:

{alu.v1}name
{alu.v1}vendorExtensions

JavaScript
 
{alu.v1}name
{alu.v1}vendorExtensions
​

Your search for the {alu.v1}vendorExtensions element will never succeed before the thing on which you’re trying to search (the device variable) is the thing you’re trying to find.

Additionally, the conditional in your loop…

if (device.tag.split("}")[1]) == "me":

JavaScript
 
if (device.tag.split("}")[1]) == "me":
​

…will never match (there is no element in the entire document for which tag.split("}")[1] == "me" is True).

I’m not entirely clear what you’re trying to do, but here’s are some thoughts:

Given your example data, you probably don’t want that for device in inventoryObject: loop
We can drastically simplify your code by replacing that long block of conditionals with a list of attributes in which we are interested and then a for loop to extract them.
Rather than assigning a bunch of individual variables, we can build up a dictionary with the data from the queryObject

That might look like:

from xml.etree import ElementTree
import json

attributeNames = [
    "mdNm",
    "meNm",
    "userLabel",
    "resourceState",
    "location",
    "manufacturer",
    "productName",
    "version",
]

parser = ElementTree.parse("data.xml")
root = parser.getroot()
queryObjectData = root.find(".//{alu.v1}queryObjectData")
for queryObject in queryObjectData:
    device = {}

    for name in attributeNames:
        if (value := queryObject.find(f".//{{tmf854.v1}}{name}")) is not None:
            device[name] = value.text

    vendorExtensions = queryObject.find("{alu.v1}vendorExtensions")
    extensionMap = {}

    for extension in vendorExtensions.findall(".//{alu.v1}NameAndStringValue"):
        extname = extension.find("{tmf854.v1}name").text
        extvalue = extension.find("{tmf854.v1}value").text
        extensionMap[extname] = extvalue

    device["vendorExtensions"] = extensionMap

    print(json.dumps(device, indent=2))

JavaScript
 
from xml.etree import ElementTree
import json
​
attributeNames = [
    "mdNm",
    "meNm",
    "userLabel",
    "resourceState",
    "location",
    "manufacturer",
    "productName",
    "version",
]
​
parser = ElementTree.parse("data.xml")
root = parser.getroot()
queryObjectData = root.find(".//{alu.v1}queryObjectData")
for queryObject in queryObjectData:
    device = {}
​
    for name in attributeNames:
        if (value := queryObject.find(f".//{{tmf854.v1}}{name}")) is not None:
            device[name] = value.text
​
    vendorExtensions = queryObject.find("{alu.v1}vendorExtensions")
    extensionMap = {}
​
    for extension in vendorExtensions.findall(".//{alu.v1}NameAndStringValue"):
        extname = extension.find("{tmf854.v1}name").text
        extvalue = extension.find("{tmf854.v1}value").text
        extensionMap[extname] = extvalue
​
    device["vendorExtensions"] = extensionMap
​
    print(json.dumps(device, indent=2))
​

Given your example data, this outputs:

{
  "mdNm": "AMS",
  "meNm": "CHEERLAVANCHA_281743",
  "vendorExtensions": {
    "hubSubtendedStatus": "NONE",
    "productAndRelease": "DF.6.1",
    "adminUserName": "isadmin"
  }
}

JavaScript
 
{
  "mdNm": "AMS",
  "meNm": "CHEERLAVANCHA_281743",
  "vendorExtensions": {
    "hubSubtendedStatus": "NONE",
    "productAndRelease": "DF.6.1",
    "adminUserName": "isadmin"
  }
}
​

An alternate approach, in which we just transform each queryObject into a dictionary, might look like this:

from xml.etree import ElementTree
import json


def localName(ele):
    return ele.tag.split("}")[1]


def etree_to_dict(t):
    if list(t):
        d = {}
        for child in t:
            if localName(child) == "NameAndStringValue":
                d.update(dict([[x.text.strip() for x in child]]))
            else:
                d.update({localName(child): etree_to_dict(child) for child in t})
        return d
    else:
        return t.text.strip()


parser = ElementTree.parse("data.xml")
root = parser.getroot()
queryObjectData = root.find(".//{alu.v1}queryObjectData") or []
for queryObject in queryObjectData:
    d = etree_to_dict(queryObject)
    print(json.dumps(d, indent=2))

JavaScript
 
from xml.etree import ElementTree
import json
​
​
def localName(ele):
    return ele.tag.split("}")[1]
​
​
def etree_to_dict(t):
    if list(t):
        d = {}
        for child in t:
            if localName(child) == "NameAndStringValue":
                d.update(dict([[x.text.strip() for x in child]]))
            else:
                d.update({localName(child): etree_to_dict(child) for child in t})
        return d
    else:
        return t.text.strip()
​
​
parser = ElementTree.parse("data.xml")
root = parser.getroot()
queryObjectData = root.find(".//{alu.v1}queryObjectData") or []
for queryObject in queryObjectData:
    d = etree_to_dict(queryObject)
    print(json.dumps(d, indent=2))
​

This will output:

{
  "name": {
    "mdNm": "AMS",
    "meNm": "CHEERLAVANCHA_281743",
    "ptpNm": "/type=NE/CHEERLAVANCHA_281743"
  },
  "vendorExtensions": {
    "package": {
      "hubSubtendedStatus": "NONE",
      "productAndRelease": "DF.6.1",
      "adminUserName": "isadmin"
    }
  }
}

JavaScript
 
{
  "name": {
    "mdNm": "AMS",
    "meNm": "CHEERLAVANCHA_281743",
    "ptpNm": "/type=NE/CHEERLAVANCHA_281743"
  },
  "vendorExtensions": {
    "package": {
      "hubSubtendedStatus": "NONE",
      "productAndRelease": "DF.6.1",
      "adminUserName": "isadmin"
    }
  }
}
​

That may or may not be appropriate depending on the structure of your real data and exactly what you’re trying to accomplish.

Advertisement

Answer