Skip to content

Instantly share code, notes, and snippets.

@Dbyrum
Last active July 4, 2018 16:41
Show Gist options
  • Select an option

  • Save Dbyrum/09c395f9584ddc3a45957721f5a6915c to your computer and use it in GitHub Desktop.

Select an option

Save Dbyrum/09c395f9584ddc3a45957721f5a6915c to your computer and use it in GitHub Desktop.
first task
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"# Import libraries up front\n",
"import json\n",
"\n",
"# From Table S13 in Plaisier et al., Cell Systems 2016\n",
"# These are Entrez IDs (https://www.ncbi.nlm.nih.gov/pmc/articles/PMC3013746/)\n",
"input = ['430', '1052', '1053', '1385', '84699', '9586', '1871', '1874', '144455', '79733', '1960', '1997', '2002', '2004', '80712', '2114', '2115', '2120', '51513', '2551', '2623', '2624', '2625', '9421', '3232', '10320', '3659', '3662', '3670', '91464', '3726', '10661', '11278', '128209', '10365', '9314', '1316', '51176', '9935', '23269', '4602', '4774', '4790', '7025', '9480', '5468', '5914', '5916', '3516', '5971', '864', '6257', '4093', '6659', '6660', '6662', '25803', '347853', '30009', '9496', '6929', '6925', '8463', '7022', '29842', '10155', '6935', '132625', '23051', '85416', '7707', '7764', '23528', '201516']\n",
"\n",
"# Loading JSON file\n",
"# https://www.safaribooksonline.com/library/view/python-cookbook-3rd/9781449357337/ch06s02.html\n",
"# Example:\n",
"# import json\n",
"#\n",
"# # Reading data back\n",
"# with open('data.json', 'r') as f:\n",
"# data = json.load(f)\n",
"\n",
"# Reading TF regulator to TF target gene relationships into Python\n",
"# The json library we import takes care of most of the work\n",
"with open('tfbsDb_plus_and_minus_5000_entrez.json', 'r') as f:\n",
" tfbsDb = json.load(f)"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"['SOX10_HMG_full_dimeric_16_1', 'V_AP2ALPHA_01_M00469', 'V_SIX6_01_M01345', 'Pitx1.1', 'ELF1_ETS_full_monomeric_12_1']\n"
]
}
],
"source": [
"# Example set of keys in tfbsDb, they are Motif IDs (http://jaspar.genereg.net/search?q=Homo%20sapiens&collection=CORE&tax_group=vertebrates)\n",
"print(list(tfbsDb.keys())[0:5])"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"['10', '100131211', '100288797', '100302736', '10057']\n"
]
}
],
"source": [
"# Example set of values under a specific Motif ID, they are Entrez IDs\n",
"print(tfbsDb[list(tfbsDb.keys())[1]][0:5])"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"1185\n"
]
}
],
"source": [
"print(len(tfbsDb[list(tfbsDb.keys())[0]]))"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"['Motif Name', 'Gene Symbol', 'Entrez ID']\n"
]
}
],
"source": [
"# Read in humanTFs file\n",
" \n",
"id2motif = {}\n",
"motif2id = {}\n",
"with open('id_conversion/humanTFs_All.csv','r') as inFile:\n",
" # Use the readline() function to read in a single line\n",
" # strip() gets rid of the newline character at the end of the line\n",
" # split(',') splits up the line into columns based on commas\n",
" header = inFile.readline().strip().split(',')\n",
" print (header)\n",
" while 1:\n",
" inLine = inFile.readline()\n",
" if not inLine:\n",
" break\n",
" split = inLine.strip().split(',') \n",
" \n",
" # TODO Fill out the id2motif dictionary (key = Entrez ID, value = Motif Name)\n",
" # if split[2]\n",
" \n",
" if not split[2] in id2motif:\n",
" id2motif[split[2]] = []\n",
" id2motif[split[2]].append(split[0]) \n",
" # TODO Fill out the motif2id dictionary (key = Motif Name, value = Entrez ID)\n",
" motif2id[split[0]]=split[2]\n",
" \n",
" \n",
" \n",
"\n",
"#print(len(motif2id))\n",
"#print(len(id2motif.keys()))\n",
"#print(id2motif)"
]
},
{
"cell_type": "code",
"execution_count": 22,
"metadata": {},
"outputs": [],
"source": [
"\n",
"## To build a TF regulator to TF target gene network (constrained to TFs within the input list).\n",
"## This will require mapping from:\n",
"## 1. Input list of potential TF regulator Entrez Gene IDs (input) \n",
"## 2. List of Motif IDs for an Entrez Gene ID in the input list (either id2motif or motif2id)\n",
"## 3. TF target genes that are Entrez Gene IDs that are the values under a specific Motif ID in tfbsDb\n",
"## 4. Restrict TF target genes to only those in the input list\n",
"## 5. Add new entry to tfNetwor dictionary that has as the key the TF regulator and the values all the TF target genes\n",
"tfNetwork = {}\n",
"\n",
"for eachTfReg in input: # for loop that assigns each iteration to eachTfReg\n",
" if eachTfReg in id2motif:\n",
" for eachMotif in id2motif[eachTfReg]: # loop function that checks motif2id in id2motif[eachTfReg]\n",
" if eachMotif in tfbsDb:\n",
" targets = tfbsDb[eachMotif] # assign targets from id2motif[eachTfReg]/eachTfreg\n",
" \n",
" for eachTarget in targets:\n",
" if not eachTfReg in tfNetwork:\n",
" tfNetwork[eachTfReg] = []\n",
" \n",
" if eachTarget in input and not eachTarget in tfNetwork[eachTfReg]:\n",
" tfNetwork[eachTfReg].append(eachTarget)\n",
" \n",
"#print (tfNetwork)"
]
},
{
"cell_type": "code",
"execution_count": 31,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"12\n",
"['84699', '9421', '3726', '128209', '10365', '1316', '5971', '4093', '347853', '8463', '23051', '85416']\n"
]
}
],
"source": [
"l1 = []\n",
"for eachTfReg in input:\n",
" if not eachTfReg in tfNetwork:\n",
" #print (eachTfReg)\n",
" l1.append(eachTfReg)\n",
" \n",
"print(len(l1))\n",
"print(l1)"
]
},
{
"cell_type": "code",
"execution_count": 23,
"metadata": {},
"outputs": [],
"source": [
"#sorted(id2motif.keys())"
]
},
{
"cell_type": "code",
"execution_count": 41,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"['Ascl2.1',\n",
" 'Ascl2.2',\n",
" 'Ascl2_bHLH_DBD_dimeric_10_1',\n",
" 'V_ASCL2_03_M02737',\n",
" 'V_ASCL2_04_M02841']"
]
},
"execution_count": 41,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"#motif2id['430']\n",
"id2motif['430']"
]
},
{
"cell_type": "code",
"execution_count": 42,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"'430'"
]
},
"execution_count": 42,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"motif2id['Ascl2.1']"
]
},
{
"cell_type": "code",
"execution_count": 43,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"74\n"
]
}
],
"source": [
"print (len(input))"
]
},
{
"cell_type": "code",
"execution_count": 44,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"['142', '190', '196', '257', '326']\n"
]
}
],
"source": [
"print (list(id2motif.keys())[0:5])"
]
},
{
"cell_type": "code",
"execution_count": 45,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"False"
]
},
"execution_count": 45,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"'142' in tfbsDb['Ascl2.1']"
]
},
{
"cell_type": "code",
"execution_count": 39,
"metadata": {},
"outputs": [],
"source": [
"#print(tfbsDb.keys())"
]
},
{
"cell_type": "code",
"execution_count": 41,
"metadata": {},
"outputs": [],
"source": [
"#for each in input:\n",
" # print (each)"
]
},
{
"cell_type": "code",
"execution_count": 25,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"62"
]
},
"execution_count": 25,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"len(tfNetwork)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.4"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment