# # a program to get some PubMed article IDs for keywords # currently set to produce 3 IDs from the keyword(s) that # the user enters # # This is for the project for MHI 289I, Fall 2021 # # import network format modules import ssl import urllib.request # imoport various web format modules import json import xml.etree.ElementTree as ET # some standard stuff import string import sys # # constants # # number of ids to return numret = 3 # these are the strings to construct the query to get the id list qid = [ "http://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi?db=pubmed&retmode=json&retmax=", "&sort=relevance&term=" ] # this is the string to construct fetching the information associated # with the list of ids qxml = "http://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=pubmed&retmode=xml&id=" # # ignore SSL/TLS certificate errors # try: _create_unverified_https_context = ssl._create_unverified_context except AttributeError: # Legacy Python that doesn't verify HTTPS certificates by default pass else: # Handle target environment that doesn't support HTTPS verification ssl._create_default_https_context = _create_unverified_https_context # # read in the keywords # try: inwords = input("Enter a comma-separated list of keywords (no spaces!): ") # just in case the user couldn't follow the above instructions . . . for ch in string.whitespace: x = inwords.replace(ch, "") inwords = x keywords = inwords except Exception as msg: # oops . . . something failed; print error message and quit with error print(msg) sys.exit(1) # # first, open the database and read in the JSON data # html = urllib.request.urlopen(qid[0]+str(numret)+qid[1]+keywords) cont = html.read().decode() # # now, load the JSON data into the Python structure # jsondata = json.loads(cont) # # now get the idlist; it's a value in a dictionary that is the value of an entry # in another dictionary # idlist = jsondata["esearchresult"]["idlist"] # # now construct a string of comma-separated ids from the returned list # qstr = "" for i in idlist: qstr = qstr + ',' +i # # now delete the leading comma # easier than not putting it on in the first place # qstr = qstr[1:] # # construct the query to get the associated information # query = qxml + qstr # # print it out # print(query) # # exit with success code # sys.exit(0)