# reads in the text file and prints a list of words in the file # repeated words are only printed once, and the list is sorted # ECS 10, May 18, 2009 # Matt Bishop import string # these are what we care about # a "word" is a maximal string of letters and digits # whitespace separates words # everything else is treated like whitespace lettdig = string.letters + string.digits + string.whitespace # returns the line with all non-lettdigs replaced by white space # parameters: line, the line to be transformed # returns: res, the transformed line def letteronly(line): # initialize string to be returned res = "" # go through the line character by character for i in line: # if a lettdig, append it to result # if anything else, append a whitespace if i in lettdig: res += i else: res += " " # return the transformed string return res # adds words to a list of words, without duplication # note the adding is done in place # parameters: wordlist, list of words being build # wl, list of words to add def addwords(wordlist, wl): for i in wl: wordlist[i] = wordlist.get(i, 0) + 1 # this puts it all together def main(): # get the file name and open it try: fname = raw_input("File name: ") infile = open(fname, "r") except IOError: print "Could not open", fname return except EOFError: return # now initialize the dictionary wordlist = {} # loop through the file, one line at a time for l in infile: # break the line up into words (as defined above) bareline = letteronly(l) wordsinline = string.split(bareline) # add each new word to the word list addwords(wordlist, wordsinline) # all done! sort the word list # and print the words, one per line # this sorts them alphabetically keylist = wordlist.keys() keylist.sort() for i in keylist: print "%3d %s" % (wordlist[i], i) main()