# # Example of XML functions and methods in Python # # Matt Bishop, MHI 289I, Winter 2018 # import xml.etree.ElementTree as ET import sys # # function to pause output # def pauseoutput(): try: toss = raw_input(">> ") except Exception: sys.exit(0) # # load the XML into memory # fptr = open("sample1.xml", "r") xmltree = ET.parse(fptr) xmltreeroot = xmltree.getroot() # # print the tree as an object # print xmltree pauseoutput() # # now print the whole tree # print "Here's the actual XML tree itself" for node in xmltree.iter(): print node.tag, node.attrib pauseoutput() # # Now let's see what the main headers are # these are the outline nodes under the root # print "Now the main headers:" for child in xmltreeroot: if child.tag == "outline": name = child.attrib.get("text") print name pauseoutput() # # now we want to print the name and the URL only # they are the text and xmlUrl attributes of outline nodes # all outline nodes have the name attribute, # but the ones without URLs are headers so we print them # against the left margin, and the others indented # print "Now here are the names and URLs of the podcasts" for node in xmltree.iter("outline"): # get them name = node.attrib.get("text") URL = node.attrib.get("xmlUrl") # now print them out if name and not URL: print name else: print "\t%s -- %s" % (name, URL) pauseoutput() # # okay, we just want the URLs and not the names # two ways to do this: # print "Now, just the URLs: method 1, look for outline nodes " print "and print the xmlUrl attribute value ... './/outline' + if:" for node in xmltree.findall(".//outline"): URL = node.attrib.get("xmlUrl") if URL: print URL pauseoutput() print "Method 2, we know they are all 2 outlines deep, so " print "just grab the xmlURL attribute value; thi assumes all " print "such nodes have that attribute ... './/outline/outline':" for node in xmltree.findall(".//outline/outline"): URL = node.attrib.get("xmlUrl") print URL pauseoutput() print "Method 3, we select all elements with attribute xmlUrl ... './/*[@xmlUrl]':" for node in xmltree.findall(".//*[@xmlUrl]"): URL = node.attrib.get("xmlUrl") print URL pauseoutput() # # Now we want to edit the tree # first, we want to get rid of the "Books and Fiction" part # print "Now delete the Books and Fiction subtree and print the main headers:" for child in xmltreeroot: if child.tag == "outline": name = child.attrib.get("text") if name == "Books and Fiction": xmltreeroot.remove(child) for child in xmltreeroot: if child.tag == "outline": name = child.attrib.get("text") print name pauseoutput() # # now write it out # xmltree.write("sample1-output1.xml") pauseoutput() # # now we want to change an attribute, the "MacBreak Weekly" to "MacWeek Breakly" # print "Now we change 'MacBreak Weekly' to 'MacWeek Breakly':" # find the element and change it for node in xmltreeroot.findall('.//outline'): if node.attrib.get("text") == "MacBreak Weekly": # print node.attrib.get("xmlUrl") node.set("text", "MacWeek Breakly") for node in xmltree.iter(): print node.tag, node.attrib pauseoutput() # # Let's add back something on fiction, but make it about computer science # print "Adding Computer Science Fiction and 2 books:" newelem = ET.SubElement(xmltreeroot, "outline") newelem.set('text', "Computer Scence Fiction") x1 = ET.SubElement(newelem, "outline") x1.set('text', "Shockwave Rider") x1.set('author', "John Brunner") x2 = ET.SubElement(newelem, "outline") x2.set('text', "Snow Crash") x2.set('author', "Neal Stephenson") for node in xmltree.iter(): print node.tag, node.attrib pauseoutput() # # now write it out # xmltree.write("sample1-output2.xml")