# reads in the text file and prints a list of words in the file # repeated words are only printed once, and the list is sorted # ECS 10, May 18, 2009 # Matt Bishop import string # these are what we care about # a "word" is a maximal string of letters and digits # whitespace separates words # everything else is treated like whitespace lettdig = string.letters + string.digits + string.whitespace # returns the line with all non-lettdigs replaced by white space # parameters: line, the line to be transformed # returns: res, the transformed line def letteronly(line): # initialize string to be returned res = "" # go through the line character by character for i in line: # if a lettdig, append it to result # if anything else, append a whitespace if i in lettdig: res += i else: res += " " # return the transformed string return res # adds words to a list of words, without duplication # note the adding is done in place # parameters: wordlist, list of words being build # wl, list of words to add def addwords(wordlist, wl): for i in wl: wordlist[i] = wordlist.get(i, 0) + 1 # comparison function to sort list of items # parameters: (k1, v1), first item # (k2, v2), second item # returns: -1 if v1 > v2, 0 if v1 == v2, 1 if v1 < v2 def cmpfun((k1, v1), (k2, v2)): if v1 > v2: return -1 elif v1 == v2: return 0 else: return 1 # this puts it all together def main(): # get the file name and open it try: fname = raw_input("File name: ") infile = open(fname, "r") except IOError: print "Could not open", fname return except EOFError: return # now initialize the dictionary wordlist = {} # loop through the file, one line at a time for l in infile: # break the line up into words (as defined above) bareline = letteronly(l) wordsinline = string.split(bareline) # add each new word to the word list addwords(wordlist, wordsinline) # all done! sort the word list # and print the words, one per line # this sorts them alphabetically itemlist = wordlist.items() itemlist.sort(cmpfun) for i in itemlist: print "%3d %s" % (i[1], i[0]) main()