# Program to print all web links on a web page
# It assumes the resource that the URL points to returns
# UTF-8 encoded characters
# Matt Bishop, MHI 289I, Fall 2021
#
import urllib.request
import re

debug = False
#
# ask for the URL
#
try:
    urlname = input("Please enter the URL: ")
except EOFError:
    print("Bye!")
except:
    print("That's not a valid string. ")
else:
    #
    # got it -- now try to read from the URL
    #
    try:
        webpage = urllib.request.urlopen(urlname)
    except Exception as msg:
        print("URL retrieval failed!", msg)
    else:
        # Read from the object, storing the page's contents in 's'.
        # and print it
        s = str(webpage.read().decode())
        ### debug: print out the web page
        if debug:
            print('Web page source code:[')
            print(s)
            print('----------')
        #
        # look for URLs on the web page
        # we apply re.search repeatedly, advancing the beginning of the
        # string to just beyond the end of the previous match
        # we also need not to go beyond the end of the string
        # so we stop before then
        #
        # get length to compare for end
        lens = len(s)
        # set n to index of beginning of string
        n = 0
        #
        # the loop -- get each URL and print it
        # this is a simple way
        #
        for sp in re.findall('<a href="([^"]*)">', s):
            # print it
            print(sp)