# # Some bits about pattern matching # # Matt Bishop, ECS 36A, Winter 2019 # import re # # compile the pattern to make things easier # print("the pattern is: 'ab(cd([ef]+)g)'") p = re.compile(r'ab(cd([ef]+)g)') # # now show how grouping works # print("the text is: 'abcdeeffefg'") m = p.match('abcdeeffefg') if m: print("It matches! Here are the groupings:") print("m.group(): '"+m.group()+"'") print("m.group(0): '"+m.group(0)+"'") print("m.group(1): '"+m.group(1)+"'") print("m.group(2): '"+m.group(2)+"'") print("m.group(2, 0, 2, 1):", m.group(2, 0, 2, 1)) print("m.groups():", m.groups()) else: print("No match") # pause so the user can read the above output input("\nHit return to continue"); print("\n") # # compile the pattern to make things easier # print("the pattern is: '\\b(\w+)\\s+\\1\\b\\s+(abc*)'") print("it is written as r'\\b(\w+)\\s+\\1\\b\\s+(abc*)' so we don't need to escape the \\") p = re.compile(r'\b(\w+)\s+\1\b\s+(abc*)') # BAD FORM!!!!! Done here for expository purposes # function to print string, match it to the pattern, and print the results def pr(p, s): # do the matching x = p.search(s) # report success or failure if x: print("p.search('"+s+"').group(): \n\t", x.groups(), "--- spanning", x.span()) else: print("'"+s+"' does not match") # END OF BAD FORM!!!!! # show how the matching works pr(p, "h h abcc") pr(p, "h h h abcc") pr(p, "h h h h abcc") pr(p, "h h hh abcc") pr(p, "h hh h ab") pr(p, "hh h h ab") # pause so the user can read the above output input("\nHit return to continue"); print("\n") # # compile the pattern to make things easier # print("the pattern is: '\\\\b(\\\\w+)\\\\s+\\\\1\\\\b\\\\s+(abc*)'") print("note we had to escape the \\") p = re.compile('\\b(\\w+)\\s+\\1\\b\\s+(abc*)') # show how the matching works pr(p, "h h abcc") pr(p, "h h h abcc") pr(p, "h h h h abcc") pr(p, "h h hh abcc") pr(p, "h hh h ab") pr(p, "hh h h ab") # pause so the user can read the above output input("\nHit return to continue"); print("\n") # say where we are going print("Now here's how you split strings using patterns") # # compile the pattern to make things easier # print("the pattern is: r'[^A-Za-z]+'") p = re.compile(r'[^A-Za-z]+') # now do the splits x = p.split("Singing in the rain, just singing in the rain") print('p.split("Singing in the rain, just singing in the rain"):'); print("\t", x) x = p.split("Singing in the rain, just singing in the rain", 4) print('p.split("Singing in the rain, just singing in the rain", 4):'); print("\t", x) x = p.split("abcdef") print('p.split("abcdef"):'); print("\t", x) # pause so the user can read the above output input("\nHit return to continue"); print("\n") def pend(match): value = ">>> "+str(match.group()) + " <<<" return value # say where we are going print("Now here's how you do substitutions using patterns") # # compile the pattern to make things easier # print("the pattern is 'red|green|blue'") c = re.compile("red|green|blue") # now do the substitutions x = c.sub('primary color', "red face, blue sea, green grass") print("c.sub('primary color', 'red face, blue sea, green grass'):"); print("\t", x) x = c.sub('primary color', "red face, blue sea, green grass", count=2) print("c.sub('primary color', 'red face, blue sea, green grass', count=2):"); print("\t", x) x = c.sub('primary color', "orange face, azure sea, olive grass") print("c.sub('primary color', 'orange face, azure sea, olive grass'):"); print("\t", x) x = c.sub(pend, "red face, orange orange, blue sea, pink cat") print("c.sub('pend, red face, orange orange, blue sea, pink cat'):"); print("\t", x) # pause so the user can read the above output input("\nHit return to continue"); print("\n") # now some pattern matching stuff print("And finally some pattern matching things") # the string we use s = 'RegEx' # and now the pattern matching print("string = '"+s+"'\nlength =", len(s)) print("greedy match span =", re.match('<.*>', s).span()) print("greedy match string = '"+re.match('<.*>', s).group()+"'") print("non-greedy match span =", re.match('<.*?>', s).span()) print("non-greedy match string = '"+re.match('<.*?>', s).group()+"'") # the question mark ... print("Here's . for 'hello':", re.match(".", "hello").group()) print("Here's .? for 'hello':", re.match(".?", "hello").group()) print("Here's .* for 'hello':", re.match(".*", "hello").group()) print("Here's .*? for 'hello':", re.match(".*?", "hello").group())