# # Example of XML functions and methods in Python # # Matt Bishop, MHI 289I, Fall 2019 # import xml.etree.ElementTree as ET import sys # # function to pause output # def pauseoutput(): try: toss = input(">> ") except Exception: sys.exit(0) # # load the XML into memory # fptr = open("sample1.xml", "r") xmltree = ET.parse(fptr) xmltreeroot = xmltree.getroot() # # print the tree as an object # print(xmltree) pauseoutput() # # now print the whole tree # print("Here's the actual XML tree itself") for node in xmltree.iter(): print(node.tag, node.attrib) pauseoutput() # # Now let's see what the main headers are # these are the outline nodes under the root # print("Now the main headers:") for child in xmltreeroot: if child.tag == "outline": name = child.attrib.get("text") print(name) pauseoutput() # # now we want to print the name and the URL only # they are the text and xmlUrl attributes of outline nodes # all outline nodes have the name attribute, # but the ones without URLs are headers so we print them # against the left margin, and the others indented # print("Now here are the names and URLs of the podcasts") for node in xmltree.iter("outline"): # get them name = node.attrib.get("text") URL = node.attrib.get("xmlUrl") # now print them out if name and not URL: print(name) else: print("\t%s -- %s" % (name, URL)) pauseoutput() # # okay, we just want the URLs and not the names # two ways to do this: # print("Now, just the URLs: method 1, look for outline nodes ") print("and print the xmlUrl attribute value ... './/outline' + if:") for node in xmltree.findall(".//outline"): URL = node.attrib.get("xmlUrl") if URL: print(URL) pauseoutput() print("Method 2, we know they are all 2 outlines deep, so ") print("just grab the xmlURL attribute value; thi assumes all ") print("such nodes have that attribute ... './/outline/outline':") for node in xmltree.findall(".//outline/outline"): URL = node.attrib.get("xmlUrl") print(URL) pauseoutput() print("Method 3, we select all elements with attribute xmlUrl ... './/*[@xmlUrl]':") for node in xmltree.findall(".//*[@xmlUrl]"): URL = node.attrib.get("xmlUrl") print(URL) pauseoutput() # # Now we want to edit the tree # first, we want to get rid of the "Books and Fiction" part # print("Now delete the Books and Fiction subtree and print the main headers:") for child in xmltreeroot: if child.tag == "outline": name = child.attrib.get("text") if name == "Books and Fiction": xmltreeroot.remove(child) for child in xmltreeroot: if child.tag == "outline": name = child.attrib.get("text") print(name) pauseoutput() # # now write it out # xmltree.write("sample1-output1.xml") pauseoutput() # # now we want to change an attribute, the "MacBreak Weekly" to "MacWeek Breakly" # print("Now we change 'MacBreak Weekly' to 'MacWeek Breakly':") # find the element and change it for node in xmltreeroot.findall('.//outline'): if node.attrib.get("text") == "MacBreak Weekly": # print node.attrib.get("xmlUrl") node.set("text", "MacWeek Breakly") for node in xmltree.iter(): print(node.tag, node.attrib) pauseoutput() # # Let's add back something on fiction, but make it about computer science # print("Adding Computer Science Fiction and 2 books:") newelem = ET.SubElement(xmltreeroot, "outline") newelem.set('text', "Computer Scence Fiction") x1 = ET.SubElement(newelem, "outline") x1.set('text', "Shockwave Rider") x1.set('author', "John Brunner") x2 = ET.SubElement(newelem, "outline") x2.set('text', "Snow Crash") x2.set('author', "Neal Stephenson") for node in xmltree.iter(): print(node.tag, node.attrib) pauseoutput() # # now write it out # xmltree.write("sample1-output2.xml")