#
# a program to get some PubMed article IDs for keywords
# currently set to produce 3 IDs from the keyword(s) that
# the user enters
#
# This is for the project for MHI 289I, Fall 2021
#

# import network format modules
import ssl
import urllib.request
# imoport various web format modules
import json
import xml.etree.ElementTree as ET
# some standard stuff
import string
import sys

#
# constants
#
# number of ids to return
numret = 3
# these are the strings to construct the query to get the id list
qid = [
    "http://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi?db=pubmed&retmode=json&retmax=",
    "&sort=relevance&term="
]
# this is the string to construct fetching the information associated
# with the list of ids
qxml = "http://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=pubmed&retmode=xml&id="

#
# ignore SSL/TLS certificate errors
#
try:
    _create_unverified_https_context = ssl._create_unverified_context
except AttributeError:
    # Legacy Python that doesn't verify HTTPS certificates by default
    pass
else:
    # Handle target environment that doesn't support HTTPS verification
    ssl._create_default_https_context = _create_unverified_https_context


#
# read in the keywords
#
try:
    inwords = input("Enter a comma-separated list of keywords (no spaces!): ")
    # just in case the user couldn't follow the above instructions . . . 
    for ch in string.whitespace:
        x = inwords.replace(ch, "")
        inwords = x
    keywords = inwords
except Exception as msg:
    # oops . . . something failed; print error message and quit with error
    print(msg)
    sys.exit(1)

#
# first, open the database and read in the JSON data
#
html = urllib.request.urlopen(qid[0]+str(numret)+qid[1]+keywords)
cont = html.read().decode()

#
# now, load the JSON data into the Python structure
#
jsondata = json.loads(cont)

#
# now get the idlist; it's a value in a dictionary that is the value of an entry
# in another dictionary
#
idlist = jsondata["esearchresult"]["idlist"]

#
# now construct a string of comma-separated ids from the returned list
#
qstr = ""
for i in idlist:
    qstr = qstr + ',' +i

#
# now delete the leading comma
# easier than not putting it on in the first place
#
qstr = qstr[1:]

#
# construct the query to get the associated information
#
query = qxml + qstr

#
# print it out
#
print(query)

#
# exit with success code
#
sys.exit(0)