# xml_minidom_parse2

 

from xml.dom.minidom import parse

 

def getText(nodelist):

    rc = []

    for node in nodelist:

        if node.nodeType == node.TEXT_NODE:

            s = node.data.strip()

            if s:

                rc.append(node.data)

    return '\n'.join(rc)

 

def nodeName(node):

    try: return repr(node).split(':')[1].split()[0]

    except: return ''

 

def getDataList(nodelist, **kargs):

    resList = []

    for node in nodelist:

        node_name = nodeName(node)

        if node_name in kargs:

            keys = kargs[node_name].keys()

            for id in keys:

                try:

                    s = str(node.attributes[id].value)

                    v = kargs[node_name][id]

                    if not v or s in kargs[node_name][id]:

                        resList.append('%s %s=%s' % (node_name, id, s))

                       

                        if node.nodeType == node.ELEMENT_NODE:

                            nodes = node.childNodes

                            name = node.nodeName

                            print 'DOM element = %s' % name

                            s = []

                            for elem in nodes:

                                nm = nodeName(elem)

                                s.append('  %s%s' % (['', nm+': '][len(nm)>0 or 0],getText(elem.childNodes)))

                            print '\n'.join([i for i in s if i.strip()])

                        elif node.nodeType == node.TEXT_NODE:

                            s = getText(node)

                            print 'Text Node Text = %s' % s

                       

                except KeyError, e:

                    print 'Invalid node atribute:', e

                    pass

    return resList

 

'''

fn = r'H:\TEMP\temsys\sampleXML.txt'

 

dom1 = parse(fn)

 

process_elements = dom1.getElementsByTagName('process')

download_elements = dom1.getElementsByTagName('download')

 

elemDict = {'process': {'name': ["proc1", "proc2"]}, 'download': {'server': ['ftp', ]}}

x = getDataList(process_elements, **elemDict)

y = getDataList(download_elements, **elemDict)

 

print

print x

print y

'''

fn = r'H:\TEMP\temsys\sampleXML.txt'

 

dom1 = parse(fn)

elements = dom1.getElementsByTagName('frameNumber4')

 

 

'''

>>> DOM element = process

  mkdir: directory1

  mkdir: directory11

  mkdir: directory111

  copyfile: src1,dst1

DOM element = process

  copyfile: src2,dst2

DOM element = download

  destination: path

  unzip: *.jpg, *.doc, *.pdf

 

['process name=proc1', 'process name=proc2']

['download server=ftp']

>>>

'''