Last active
December 7, 2018 12:06
-
-
Save MichaelaEBI/c0711af2f7ba3dfa7bdf13c2785dcf4d to your computer and use it in GitHub Desktop.
Get side effects for all drugs in ChEMBL evidence strings
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| # exec(open('GetAdverseEventsForOTdrugs.py').read()) | |
| import requests | |
| import json | |
| # Drug names are keys in all three dictionaries | |
| Drugs = {} # Count of the different adverse events per drug | |
| DrugsAE = {} # The adverse events for each drug | |
| Drugs_ChEMBLID = {} # Drug name to ChEMBL ID mapping | |
| # Read in the ChEMBL tsv file to pull out the list of unique drugs | |
| filename = "Hackathon_July2018/evs_chembl.tsv" | |
| with open(filename, 'r') as input: | |
| n = 0 | |
| for row in input: | |
| n += 1 | |
| (DiseaseId, Disease, DrugName, DrugType, ChEMBL_ID, maxPhaseForDisease, DrugAction, DrugDescription, GeneID, GeneSymbol, EvidenceID) = tuple(row.rstrip().split('\t')) | |
| if DrugName not in Drugs_ChEMBLID: | |
| #Drugs[DrugName] = "" | |
| #DrugsAE[DrugName] = "" | |
| Drugs_ChEMBLID[DrugName] = ChEMBL_ID[39:] | |
| # Go through unique drug names and check for which API call to openFDA returns data | |
| for currDrug in Drugs: | |
| print(currDrug) | |
| #print("patient.drug.medicinalproduct:"+currDrug) | |
| response = requests.get("https://api.fda.gov/drug/event.json", | |
| params={"search": "patient.drug.medicinalproduct:"+currDrug, | |
| "count": "patient.reaction.reactionmeddrapt.exact"}) | |
| if response.status_code == 200: | |
| Drugs[currDrug] = len(response.json()['results']) | |
| print(len(response.json()['results'])) | |
| DrugsAE[currDrug] = response.json()['results'] | |
| import collections | |
| c=collections.Counter(Targets) | |
| print(c) | |
| import matplotlib | |
| import matplotlib.pyplot as plt | |
| import numpy as np | |
| fig, ax = plt.subplots() | |
| ax.scatter(c.keys(), c.values()) | |
| #plt.gca().set_ylim([-5, 50]) | |
| ax.set(xlabel='Number of different adverse events', ylabel='Number of drugs', | |
| title='Distribution of number of adverse events') | |
| ax.grid() | |
| fig.savefig("AdverseEventCounts_all.png") | |
| plt.show() | |
| # *** Save the data in TSV format: DrugName, ChEMBL_ID, AdverseEvent, Count *** | |
| with open('drug_adverse_events.tsv', 'w') as f: | |
| #f.write(l[1] + "\t" + l[2] + "\t" + l[3] + "\t" + l[4] + "\t" + l[5] + "\n") | |
| f.write('drug_name' + '\t' + 'drug_ChEMBL_ID' + '\t' + 'adverse_event' + '\t' + 'adverse_event_count' + '\n') | |
| # Go through all drugs in the Drug dictionary | |
| for currDrug in Drugs: | |
| # For each adverse event for currDrug in DrugsAE | |
| for advEvent in DrugsAE[currDrug]: | |
| f.write(currDrug + '\t' + Drugs_ChEMBLID[currDrug] + '\t' + advEvent['term'] + '\t' + str(advEvent['count']) + '\n') | |
| # *** Write JSON lines into a file *** | |
| with open('drug_adverse_events.json', 'w') as f: | |
| for currDrug in Drugs: | |
| # For each adverse event for currDrug in DrugsAE | |
| d = {'drug_name': currDrug, 'drug_id': Drugs_ChEMBLID[currDrug], 'adverse_events': DrugsAE[currDrug]} | |
| if Drugs[currDrug] != '': | |
| f.write(json.dumps(d)) | |
| f.write('\n') |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment