MichaelaEBI · December 7, 2018 12:06
diff --git a/GetAdverseEventsForOTdrugs.py b/GetAdverseEventsForOTdrugs.py
 # exec(open('GetAdverseEventsForOTdrugs.py').read())

 import requests
 import json

 # Drug names are keys in all three dictionaries
 Drugs = {} # Count of the different adverse events per drug
 DrugsAE = {} # The adverse events for each drug
 Drugs_ChEMBLID = {} # Drug name to ChEMBL ID mapping

 # Read in the ChEMBL tsv file to pull out the list of unique drugs
 filename = "Hackathon_July2018/evs_chembl.tsv"

 with open(filename, 'r') as input:
    n = 0
    for row in input:
        n += 1
        (DiseaseId, Disease, DrugName, DrugType, ChEMBL_ID, maxPhaseForDisease, DrugAction, DrugDescription, GeneID, GeneSymbol, EvidenceID) = tuple(row.rstrip().split('\t'))
        if DrugName not in Drugs_ChEMBLID:
            #Drugs[DrugName] = ""
            #DrugsAE[DrugName] = ""
            Drugs_ChEMBLID[DrugName] = ChEMBL_ID[39:]


 # Go through unique drug names and check for which API call to openFDA returns data
 for currDrug in Drugs:
    print(currDrug)
    #print("patient.drug.medicinalproduct:"+currDrug)
    response = requests.get("https://api.fda.gov/drug/event.json",
                            params={"search": "patient.drug.medicinalproduct:"+currDrug,
                                    "count": "patient.reaction.reactionmeddrapt.exact"})
    if response.status_code == 200:
        Drugs[currDrug] = len(response.json()['results'])
        print(len(response.json()['results']))
        DrugsAE[currDrug] = response.json()['results']

 import collections

 c=collections.Counter(Targets)
 print(c)

 import matplotlib
 import matplotlib.pyplot as plt
 import numpy as np

 fig, ax = plt.subplots()
 ax.scatter(c.keys(), c.values())
 #plt.gca().set_ylim([-5, 50])
 ax.set(xlabel='Number of different adverse events', ylabel='Number of drugs',
       title='Distribution of number of adverse events')
 ax.grid()
 fig.savefig("AdverseEventCounts_all.png")
 plt.show()

 #  *** Save the data in TSV format: DrugName, ChEMBL_ID, AdverseEvent, Count ***
 with open('drug_adverse_events.tsv', 'w') as f:
    #f.write(l[1] + "\t" + l[2] + "\t" + l[3] + "\t" + l[4] + "\t" + l[5] + "\n")
    f.write('drug_name' + '\t' + 'drug_ChEMBL_ID' + '\t' + 'adverse_event' + '\t' + 'adverse_event_count' + '\n')
    # Go through all drugs in the Drug dictionary
    for currDrug in Drugs:
        # For each adverse event for currDrug in DrugsAE
        for advEvent in DrugsAE[currDrug]:
            f.write(currDrug + '\t' + Drugs_ChEMBLID[currDrug] + '\t' + advEvent['term'] + '\t' + str(advEvent['count']) + '\n')


 # *** Write JSON lines into a file ***
 with open('drug_adverse_events.json', 'w') as f:
    for currDrug in Drugs:
        # For each adverse event for currDrug in DrugsAE
        d = {'drug_name': currDrug, 'drug_id': Drugs_ChEMBLID[currDrug], 'adverse_events': DrugsAE[currDrug]}
        if Drugs[currDrug] != '':
            f.write(json.dumps(d))
            f.write('\n')
	# exec(open('GetAdverseEventsForOTdrugs.py').read())

	import requests
	import json

	# Drug names are keys in all three dictionaries
	Drugs = {} # Count of the different adverse events per drug
	DrugsAE = {} # The adverse events for each drug
	Drugs_ChEMBLID = {} # Drug name to ChEMBL ID mapping

	# Read in the ChEMBL tsv file to pull out the list of unique drugs
	filename = "Hackathon_July2018/evs_chembl.tsv"

	with open(filename, 'r') as input:
	n = 0
	for row in input:
	n += 1
	(DiseaseId, Disease, DrugName, DrugType, ChEMBL_ID, maxPhaseForDisease, DrugAction, DrugDescription, GeneID, GeneSymbol, EvidenceID) = tuple(row.rstrip().split('\t'))
	if DrugName not in Drugs_ChEMBLID:
	#Drugs[DrugName] = ""
	#DrugsAE[DrugName] = ""
	Drugs_ChEMBLID[DrugName] = ChEMBL_ID[39:]


	# Go through unique drug names and check for which API call to openFDA returns data
	for currDrug in Drugs:
	print(currDrug)
	#print("patient.drug.medicinalproduct:"+currDrug)
	response = requests.get("https://api.fda.gov/drug/event.json",
	params={"search": "patient.drug.medicinalproduct:"+currDrug,
	"count": "patient.reaction.reactionmeddrapt.exact"})
	if response.status_code == 200:
	Drugs[currDrug] = len(response.json()['results'])
	print(len(response.json()['results']))
	DrugsAE[currDrug] = response.json()['results']

	import collections

	c=collections.Counter(Targets)
	print(c)

	import matplotlib
	import matplotlib.pyplot as plt
	import numpy as np

	fig, ax = plt.subplots()
	ax.scatter(c.keys(), c.values())
	#plt.gca().set_ylim([-5, 50])
	ax.set(xlabel='Number of different adverse events', ylabel='Number of drugs',
	title='Distribution of number of adverse events')
	ax.grid()
	fig.savefig("AdverseEventCounts_all.png")
	plt.show()

	# * Save the data in TSV format: DrugName, ChEMBL_ID, AdverseEvent, Count *
	with open('drug_adverse_events.tsv', 'w') as f:
	#f.write(l[1] + "\t" + l[2] + "\t" + l[3] + "\t" + l[4] + "\t" + l[5] + "\n")
	f.write('drug_name' + '\t' + 'drug_ChEMBL_ID' + '\t' + 'adverse_event' + '\t' + 'adverse_event_count' + '\n')
	# Go through all drugs in the Drug dictionary
	for currDrug in Drugs:
	# For each adverse event for currDrug in DrugsAE
	for advEvent in DrugsAE[currDrug]:
	f.write(currDrug + '\t' + Drugs_ChEMBLID[currDrug] + '\t' + advEvent['term'] + '\t' + str(advEvent['count']) + '\n')


	# * Write JSON lines into a file *
	with open('drug_adverse_events.json', 'w') as f:
	for currDrug in Drugs:
	# For each adverse event for currDrug in DrugsAE
	d = {'drug_name': currDrug, 'drug_id': Drugs_ChEMBLID[currDrug], 'adverse_events': DrugsAE[currDrug]}
	if Drugs[currDrug] != '':
	f.write(json.dumps(d))
	f.write('\n')
No results found