fadliaulawi
commited on
Commit
•
31eb136
1
Parent(s):
63bec36
Tidy up interface
Browse files- app.py +118 -108
- resources/experiment.ipynb +584 -77
app.py
CHANGED
@@ -11,17 +11,20 @@ from langchain_text_splitters import TokenTextSplitter
|
|
11 |
from process import Process
|
12 |
from tempfile import NamedTemporaryFile
|
13 |
from stqdm import stqdm
|
|
|
14 |
|
15 |
buffer = io.BytesIO()
|
16 |
|
17 |
st.cache_data()
|
18 |
st.set_page_config(page_title="NutriGenMe Paper Extractor")
|
19 |
-
st.title("NutriGenMe - Paper
|
20 |
-
st.markdown("<div style='text-align:
|
|
|
21 |
|
22 |
-
|
23 |
-
|
24 |
-
|
|
|
25 |
|
26 |
with col1:
|
27 |
models = (
|
@@ -30,9 +33,7 @@ with col1:
|
|
30 |
# 'llama-3-sonar-large-32k-chat',
|
31 |
# 'mixtral-8x7b-instruct',
|
32 |
)
|
33 |
-
model = st.selectbox(
|
34 |
-
'Model selection:', models, key='model'
|
35 |
-
)
|
36 |
|
37 |
with col2:
|
38 |
tokens = (
|
@@ -40,118 +41,127 @@ with col2:
|
|
40 |
16000,
|
41 |
24000
|
42 |
)
|
43 |
-
chunk_option = st.selectbox(
|
44 |
-
'Token amounts per process:', tokens, key='token'
|
45 |
-
)
|
46 |
chunk_overlap = 0
|
47 |
|
48 |
with col3:
|
49 |
models_val = (
|
50 |
-
'gemini-1.5-pro-latest',
|
51 |
'gpt-4-turbo',
|
|
|
52 |
'mixtral-8x7b-instruct',
|
53 |
# 'llama-3-sonar-large-32k-chat',
|
54 |
)
|
55 |
-
model_val = st.selectbox(
|
56 |
-
|
57 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
58 |
|
|
|
|
|
|
|
59 |
|
60 |
if uploaded_files:
|
61 |
-
|
62 |
-
parseButtonHV = st.button("Get Result", key='table_HV')
|
63 |
|
64 |
-
|
65 |
-
with st.status("Extraction in progress ...", expanded=True) as status:
|
66 |
-
start_time = datetime.now()
|
67 |
|
68 |
-
|
69 |
-
|
70 |
-
|
71 |
-
|
72 |
-
|
73 |
-
|
74 |
-
|
75 |
-
|
76 |
-
|
77 |
-
|
78 |
-
|
79 |
-
|
80 |
-
|
81 |
-
|
82 |
-
|
83 |
-
|
84 |
-
|
85 |
-
|
86 |
-
|
87 |
-
|
88 |
-
|
89 |
-
|
90 |
-
)
|
91 |
-
|
92 |
-
|
93 |
-
|
94 |
-
|
95 |
-
|
96 |
-
|
97 |
-
|
98 |
-
|
99 |
-
|
100 |
-
|
101 |
-
|
102 |
-
|
103 |
-
|
104 |
-
|
105 |
-
|
106 |
-
|
107 |
-
|
108 |
-
|
109 |
-
|
110 |
-
|
111 |
-
|
112 |
-
|
113 |
-
|
114 |
-
|
115 |
-
|
116 |
-
|
117 |
-
|
118 |
-
|
119 |
-
|
120 |
-
|
121 |
-
|
122 |
-
|
123 |
-
|
124 |
-
|
125 |
-
|
126 |
-
|
127 |
-
result[k]
|
128 |
-
|
129 |
-
#
|
130 |
-
result =
|
131 |
-
|
132 |
-
|
133 |
-
|
134 |
-
|
135 |
-
|
136 |
-
|
137 |
-
|
138 |
-
|
139 |
-
|
140 |
-
|
141 |
-
|
142 |
-
|
143 |
-
|
144 |
-
|
145 |
-
|
|
|
|
|
|
|
146 |
df.to_excel(writer, sheet_name='Result Cleaned API LLM')
|
147 |
df_no_llm.to_excel(writer, sheet_name='Result Cleaned API')
|
148 |
-
|
149 |
-
|
150 |
-
|
151 |
-
|
152 |
-
|
153 |
-
|
154 |
-
|
155 |
-
|
156 |
-
|
157 |
-
)
|
|
|
|
|
|
11 |
from process import Process
|
12 |
from tempfile import NamedTemporaryFile
|
13 |
from stqdm import stqdm
|
14 |
+
from validate import Validation
|
15 |
|
16 |
buffer = io.BytesIO()
|
17 |
|
18 |
st.cache_data()
|
19 |
st.set_page_config(page_title="NutriGenMe Paper Extractor")
|
20 |
+
st.title("NutriGenMe - Paper Extractor")
|
21 |
+
st.markdown("<div style='text-align: justify;text-justify: inter-word;'>NutriGenMe Paper Extractor is a tool designed to extract relevant information from genomic papers related to the NutriGenMe project. It utilizes natural language processing techniques to parse through documents and extract key data points, enabling researchers and practitioners to efficiently gather insights from a large corpus of literature.</div>", unsafe_allow_html=True)
|
22 |
+
st.divider()
|
23 |
|
24 |
+
st.markdown("<h4>Extraction</h4>", unsafe_allow_html=True)
|
25 |
+
col1, col2 = st.columns(2)
|
26 |
+
st.markdown("<h4>Validation</h4>", unsafe_allow_html=True)
|
27 |
+
col3, col4 = st.columns(2)
|
28 |
|
29 |
with col1:
|
30 |
models = (
|
|
|
33 |
# 'llama-3-sonar-large-32k-chat',
|
34 |
# 'mixtral-8x7b-instruct',
|
35 |
)
|
36 |
+
model = st.selectbox('Model selection:', models, key='model')
|
|
|
|
|
37 |
|
38 |
with col2:
|
39 |
tokens = (
|
|
|
41 |
16000,
|
42 |
24000
|
43 |
)
|
44 |
+
chunk_option = st.selectbox('Token amounts per process:', tokens, key='token')
|
|
|
|
|
45 |
chunk_overlap = 0
|
46 |
|
47 |
with col3:
|
48 |
models_val = (
|
|
|
49 |
'gpt-4-turbo',
|
50 |
+
'gemini-1.5-pro-latest',
|
51 |
'mixtral-8x7b-instruct',
|
52 |
# 'llama-3-sonar-large-32k-chat',
|
53 |
)
|
54 |
+
model_val = st.selectbox('Model validator selection:', models_val, key='model_val')
|
55 |
+
|
56 |
+
with col4:
|
57 |
+
api = st.toggle('Validate with API')
|
58 |
+
|
59 |
+
if api:
|
60 |
+
st.warning("""This validation process leverage external application programming interfaces (APIs) from NCBI and EBI to verify information.
|
61 |
+
These APIs may have limitations on their usage, so please exercise responsible use of this functionality.
|
62 |
+
If you opt to employ API validation and the process takes a long time (more than 1 hour), consider refreshing the page and proceeding without API validation.""", icon="⚠️")
|
63 |
|
64 |
+
st.divider()
|
65 |
+
st.markdown("<h4>Process</h4>", unsafe_allow_html=True)
|
66 |
+
uploaded_files = st.file_uploader("Upload Paper(s) here :", type="pdf", accept_multiple_files=True)
|
67 |
|
68 |
if uploaded_files:
|
69 |
+
submit = st.button("Get Result", key='submit')
|
|
|
70 |
|
71 |
+
if uploaded_files and submit:
|
|
|
|
|
72 |
|
73 |
+
with st.status("Extraction in progress ...", expanded=True) as status:
|
74 |
+
for uploaded_file in stqdm(uploaded_files):
|
75 |
+
start_time = datetime.now()
|
76 |
+
with NamedTemporaryFile(dir='.', suffix=".pdf", delete=eval(os.getenv('DELETE_TEMP_PDF', 'True'))) as pdf:
|
77 |
+
|
78 |
+
pdf.write(uploaded_file.getbuffer())
|
79 |
+
st.markdown(f"Start Extraction process at <code>{datetime.now().strftime('%H:%M')}</code>", unsafe_allow_html=True)
|
80 |
+
|
81 |
+
# Load Documents
|
82 |
+
loader = PyPDFLoader(pdf.name)
|
83 |
+
pages = loader.load()
|
84 |
+
|
85 |
+
chunk_size = 120000
|
86 |
+
chunk_overlap = 0
|
87 |
+
docs = pages
|
88 |
+
|
89 |
+
# Split Documents
|
90 |
+
if chunk_option:
|
91 |
+
docs = [Document('\n'.join([page.page_content for page in pages]))]
|
92 |
+
docs[0].metadata = {'source': pages[0].metadata['source']}
|
93 |
+
|
94 |
+
chunk_size = chunk_option
|
95 |
+
chunk_overlap = int(0.25 * chunk_size)
|
96 |
+
|
97 |
+
text_splitter = TokenTextSplitter.from_tiktoken_encoder(
|
98 |
+
chunk_size=chunk_size, chunk_overlap=chunk_overlap
|
99 |
+
)
|
100 |
+
chunks = text_splitter.split_documents(docs)
|
101 |
+
|
102 |
+
# Start extraction process in parallel
|
103 |
+
process = Process(model)
|
104 |
+
with ThreadPoolExecutor() as executor:
|
105 |
+
result_gsd = executor.submit(process.get_entity, (chunks, 'gsd'))
|
106 |
+
result_summ = executor.submit(process.get_entity, (chunks, 'summ'))
|
107 |
+
result = executor.submit(process.get_entity, (chunks, 'all'))
|
108 |
+
result_one = executor.submit(process.get_entity_one, [c.page_content for c in chunks[:1]])
|
109 |
+
result_table = executor.submit(process.get_table, pdf.name)
|
110 |
+
|
111 |
+
result_gsd = result_gsd.result()
|
112 |
+
result_summ = result_summ.result()
|
113 |
+
result = result.result()
|
114 |
+
result_one = result_one.result()
|
115 |
+
res_gene, res_snp, res_dis = result_table.result()
|
116 |
+
|
117 |
+
# Combine Result
|
118 |
+
result['Genes'] = res_gene + result_gsd['Genes']
|
119 |
+
result['SNPs'] = res_snp + result_gsd['SNPs']
|
120 |
+
result['Diseases'] = res_dis + result_gsd['Diseases']
|
121 |
+
result['Conclusion'] = result_summ
|
122 |
+
for k in result_one.keys():
|
123 |
+
result[k] = result_one[k]
|
124 |
+
|
125 |
+
if len(result['Genes']) == 0:
|
126 |
+
result['Genes'] = ['']
|
127 |
+
|
128 |
+
# Adjust Genes, SNPs, Diseases
|
129 |
+
num_rows = max(max(len(result['Genes']), len(result['SNPs'])), len(result['Diseases']))
|
130 |
+
for k in ['Genes', 'SNPs', 'Diseases']:
|
131 |
+
while len(result[k]) < num_rows:
|
132 |
+
result[k].append('')
|
133 |
+
|
134 |
+
# Temporary handling
|
135 |
+
result[k] = result[k][:num_rows]
|
136 |
+
|
137 |
+
# Arrange Column
|
138 |
+
result = {key: value if isinstance(value, list) else [value] * num_rows for key, value in result.items()}
|
139 |
+
dataframe = pd.DataFrame(result)
|
140 |
+
dataframe = dataframe[['Genes', 'SNPs', 'Diseases', 'Title', 'Authors', 'Publisher Name', 'Publication Year', 'Population', 'Sample Size', 'Study Methodology', 'Study Level', 'Conclusion']]
|
141 |
+
dataframe = dataframe[dataframe['Genes'].astype(bool)].reset_index(drop=True)
|
142 |
+
dataframe.reset_index(drop=True, inplace=True)
|
143 |
+
|
144 |
+
# Validate Result
|
145 |
+
st.markdown(f"Start Validation process at <code>{datetime.now().strftime('%H:%M')}</code>", unsafe_allow_html=True)
|
146 |
+
validation = Validation(model_val)
|
147 |
+
df, df_no_llm, df_clean = validation.validate(dataframe, api)
|
148 |
+
df.drop_duplicates(['Genes', 'SNPs'], inplace=True)
|
149 |
+
st.write("Success in ", round((datetime.now().timestamp() - start_time.timestamp()) / 60, 2), "minutes")
|
150 |
+
|
151 |
+
st.dataframe(df)
|
152 |
+
with pd.ExcelWriter(buffer, engine='xlsxwriter') as writer:
|
153 |
+
if api:
|
154 |
df.to_excel(writer, sheet_name='Result Cleaned API LLM')
|
155 |
df_no_llm.to_excel(writer, sheet_name='Result Cleaned API')
|
156 |
+
else:
|
157 |
+
df.to_excel(writer, sheet_name='Result Cleaned LLM')
|
158 |
+
df_clean.to_excel(writer, sheet_name='Result Cleaned')
|
159 |
+
dataframe.to_excel(writer, sheet_name='Original')
|
160 |
+
writer.close()
|
161 |
+
|
162 |
+
st.download_button(
|
163 |
+
label="Save Result",
|
164 |
+
data=buffer,
|
165 |
+
file_name=f"{uploaded_file.name.replace('.pdf', '')}_{chunk_option}_{model.split('-')[0]}_{model_val.split('-')[0]}.xlsx",
|
166 |
+
mime='application/vnd.ms-excel'
|
167 |
+
)
|
resources/experiment.ipynb
CHANGED
@@ -2,7 +2,7 @@
|
|
2 |
"cells": [
|
3 |
{
|
4 |
"cell_type": "code",
|
5 |
-
"execution_count":
|
6 |
"metadata": {},
|
7 |
"outputs": [
|
8 |
{
|
@@ -38,23 +38,23 @@
|
|
38 |
},
|
39 |
{
|
40 |
"cell_type": "code",
|
41 |
-
"execution_count":
|
42 |
"metadata": {},
|
43 |
"outputs": [],
|
44 |
"source": [
|
45 |
-
"image = Image('
|
46 |
]
|
47 |
},
|
48 |
{
|
49 |
"cell_type": "code",
|
50 |
-
"execution_count":
|
51 |
"metadata": {},
|
52 |
"outputs": [
|
53 |
{
|
54 |
"name": "stdout",
|
55 |
"output_type": "stream",
|
56 |
"text": [
|
57 |
-
"
|
58 |
]
|
59 |
},
|
60 |
{
|
@@ -83,108 +83,196 @@
|
|
83 |
" <th>2</th>\n",
|
84 |
" <th>3</th>\n",
|
85 |
" <th>4</th>\n",
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
86 |
" </tr>\n",
|
87 |
" </thead>\n",
|
88 |
" <tbody>\n",
|
89 |
" <tr>\n",
|
90 |
" <th>0</th>\n",
|
91 |
-
" <td>
|
92 |
-
" <td>
|
93 |
-
" <td>
|
94 |
-
" <td>
|
95 |
-
" <td>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
96 |
" </tr>\n",
|
97 |
" <tr>\n",
|
98 |
" <th>1</th>\n",
|
99 |
-
" <td>
|
100 |
-
" <td>
|
101 |
-
" <td>
|
102 |
-
" <td>
|
103 |
-
" <td>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
104 |
" </tr>\n",
|
105 |
" <tr>\n",
|
106 |
" <th>2</th>\n",
|
107 |
-
" <td>
|
108 |
-
" <td>
|
109 |
-
" <td>
|
110 |
-
" <td>
|
111 |
-
" <td>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
112 |
" </tr>\n",
|
113 |
" <tr>\n",
|
114 |
" <th>3</th>\n",
|
115 |
" <td>None</td>\n",
|
116 |
-
" <td>potassium channel subunits (Kir6.2)</td>\n",
|
117 |
-
" <td>and TNDM, CHI, MODY</td>\n",
|
118 |
" <td>None</td>\n",
|
119 |
" <td>None</td>\n",
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
120 |
" </tr>\n",
|
121 |
" <tr>\n",
|
122 |
" <th>4</th>\n",
|
123 |
-
" <td>
|
124 |
-
" <td>
|
125 |
-
" <td>
|
126 |
-
" <td>
|
127 |
-
" <td>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
128 |
" </tr>\n",
|
129 |
" <tr>\n",
|
130 |
" <th>5</th>\n",
|
131 |
-
" <td>
|
132 |
-
" <td>
|
133 |
-
" <td>
|
134 |
-
" <td>
|
135 |
-
" <td>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
136 |
" </tr>\n",
|
137 |
" <tr>\n",
|
138 |
" <th>6</th>\n",
|
139 |
-
" <td>
|
140 |
-
" <td>
|
141 |
-
" <td>
|
142 |
-
" <td>
|
143 |
-
" <td>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
144 |
" </tr>\n",
|
145 |
" <tr>\n",
|
146 |
" <th>7</th>\n",
|
147 |
-
" <td>
|
148 |
-
" <td>
|
149 |
-
" <td>
|
150 |
-
" <td>
|
151 |
-
" <td>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
152 |
" </tr>\n",
|
153 |
" <tr>\n",
|
154 |
" <th>8</th>\n",
|
155 |
" <td>None</td>\n",
|
156 |
-
" <td>glucose transporter</td>\n",
|
157 |
" <td>None</td>\n",
|
158 |
-
" <td>(CC) in the promoter region</td>\n",
|
159 |
" <td>None</td>\n",
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
160 |
" </tr>\n",
|
161 |
" <tr>\n",
|
162 |
" <th>9</th>\n",
|
163 |
" <td>None</td>\n",
|
164 |
" <td>None</td>\n",
|
165 |
" <td>None</td>\n",
|
166 |
-
" <td>and SNPS rs5400 (T1101) and</td>\n",
|
167 |
" <td>None</td>\n",
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
168 |
" </tr>\n",
|
169 |
" </tbody>\n",
|
170 |
"</table>\n",
|
171 |
"</div>"
|
172 |
],
|
173 |
"text/plain": [
|
174 |
-
"
|
175 |
-
"0
|
176 |
-
"1
|
177 |
-
"2
|
178 |
-
"3
|
179 |
-
"4
|
180 |
-
"5
|
181 |
-
"6
|
182 |
-
"7
|
183 |
-
"8
|
184 |
-
"9
|
185 |
]
|
186 |
},
|
187 |
-
"execution_count":
|
188 |
"metadata": {},
|
189 |
"output_type": "execute_result"
|
190 |
}
|
@@ -204,30 +292,15 @@
|
|
204 |
},
|
205 |
{
|
206 |
"cell_type": "code",
|
207 |
-
"execution_count":
|
208 |
"metadata": {},
|
209 |
"outputs": [
|
210 |
{
|
211 |
"name": "stdout",
|
212 |
"output_type": "stream",
|
213 |
"text": [
|
214 |
-
"
|
215 |
-
"0
|
216 |
-
"1 Gene Name Major Function Syndromes T1D and/or T2D Refs.\n",
|
217 |
-
"2 KCNJ11 Encodes pore-forming inwardly-rectifying potas... PNDM (most common cause) and TNDM, CHI, MODY E23K 42-46\n",
|
218 |
-
"3 ABCC8 Encodes regulatory SUR1 subunits PNDM and TNDM, CHI, MODY A1369S, 1273AGA, R1420H 46,47,52\n",
|
219 |
-
"4 GCK A key glucose-phosphoryating enzyme; a glucose... GCK-MODY (MODY2), PNDM, CHI rs1799884 (G/A), rs4607517 (A/G), 3'UTR SNP, c... 75,78,79\n",
|
220 |
-
"5 SLC2A2 Encodes GLUT2, a high-capacity facilitative gl... FBS SNPS rs5393 (AA) and rs5394 (CC) in the promot... 93-100\n",
|
221 |
-
"6 HNF1A/TCF1 TF; regulator of pancreatic B-cell differentia... HNF1A-MODY (MODY3), most common cause of MODY,... G319S, C.1522G>A (p.E508K) 114, 118, 119\n",
|
222 |
-
"7 HNF4A Key TF for early fetal development HNF4A MODY (MODY1), CHI SNPS rs2144908, rs3818247 and rs884614, rs4810... 121-124, 274\n",
|
223 |
-
"8 HNF1B/TCF2 TF; required for the generation of pancreatic ... RCAD syndrome, or MODY5; TNDM and PNDM (rare) SNP rs757210 A, TS4430796 A, and TS7501939 C 141, 144\n",
|
224 |
-
"9 PDX1 TF; required for pancreas development, B-cell ... PNDM, MODY4 C18R, Q59L, D76N, R197H, G212R, P239Q, InsCCG2... 163-165, 167\n",
|
225 |
-
"10 PAX4 Islet TF that functions mainly as a transcript... MODY9 R121W, R133W, R37W, rs10229583 G 180, 181, 187\n",
|
226 |
-
"11 NEUROD1/BETA2 TF; required for the development of the endocr... MODY6 and PNDM R111L and 206 + C; A45T variant at rs1801262 (... 204-208\n",
|
227 |
-
"12 WFS1 A transmembrane protein; a negative regulator ... WFS1, sometimes referred to as DIDMOAD R456 and H611, SNPS at rs10010131, rs6446482; ... 223-225\n",
|
228 |
-
"13 PPARG TF; master regulator of adipogenesis, energy b... Monogenic diabetes Monogenic Diabetes Genes ... Pro12Ala variant (rs1801282), SNP at rs4684847... 240-243, 250\n",
|
229 |
-
"14 INS Predominant glucose-lowering hormone PNDM (2nd most common cause), TNDM, MODY10 Class I alleles of INS VNTR associated with T1... 273, 274, 276-281\n",
|
230 |
-
"15 GLIS3 TF; regulator of islet development, insulin ge... Neonatal diabetes syndrome associated with con... rs7020673 G associated with T1D; rs7034200 A a... 78, 214, 289, 291, 292, 295-308\n"
|
231 |
]
|
232 |
}
|
233 |
],
|
@@ -254,6 +327,440 @@
|
|
254 |
"print(dfc)"
|
255 |
]
|
256 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
257 |
{
|
258 |
"cell_type": "markdown",
|
259 |
"metadata": {},
|
@@ -263,14 +770,14 @@
|
|
263 |
},
|
264 |
{
|
265 |
"cell_type": "code",
|
266 |
-
"execution_count":
|
267 |
"metadata": {},
|
268 |
"outputs": [
|
269 |
{
|
270 |
"name": "stdout",
|
271 |
"output_type": "stream",
|
272 |
"text": [
|
273 |
-
"
|
274 |
]
|
275 |
}
|
276 |
],
|
|
|
2 |
"cells": [
|
3 |
{
|
4 |
"cell_type": "code",
|
5 |
+
"execution_count": 1,
|
6 |
"metadata": {},
|
7 |
"outputs": [
|
8 |
{
|
|
|
38 |
},
|
39 |
{
|
40 |
"cell_type": "code",
|
41 |
+
"execution_count": 24,
|
42 |
"metadata": {},
|
43 |
"outputs": [],
|
44 |
"source": [
|
45 |
+
"image = Image('testing/ukmss-2.png')"
|
46 |
]
|
47 |
},
|
48 |
{
|
49 |
"cell_type": "code",
|
50 |
+
"execution_count": 25,
|
51 |
"metadata": {},
|
52 |
"outputs": [
|
53 |
{
|
54 |
"name": "stdout",
|
55 |
"output_type": "stream",
|
56 |
"text": [
|
57 |
+
"1\n"
|
58 |
]
|
59 |
},
|
60 |
{
|
|
|
83 |
" <th>2</th>\n",
|
84 |
" <th>3</th>\n",
|
85 |
" <th>4</th>\n",
|
86 |
+
" <th>5</th>\n",
|
87 |
+
" <th>6</th>\n",
|
88 |
+
" <th>7</th>\n",
|
89 |
+
" <th>8</th>\n",
|
90 |
+
" <th>9</th>\n",
|
91 |
+
" <th>10</th>\n",
|
92 |
+
" <th>11</th>\n",
|
93 |
+
" <th>12</th>\n",
|
94 |
" </tr>\n",
|
95 |
" </thead>\n",
|
96 |
" <tbody>\n",
|
97 |
" <tr>\n",
|
98 |
" <th>0</th>\n",
|
99 |
+
" <td>SNP</td>\n",
|
100 |
+
" <td>Chr.</td>\n",
|
101 |
+
" <td>Position\\nB36\\n(bp)</td>\n",
|
102 |
+
" <td>Nearby\\ngenea</td>\n",
|
103 |
+
" <td>Risk\\nalleleb</td>\n",
|
104 |
+
" <td>Gene (transcript)</td>\n",
|
105 |
+
" <td>Tissue</td>\n",
|
106 |
+
" <td>Effect (s.e.m.)C</td>\n",
|
107 |
+
" <td>P value</td>\n",
|
108 |
+
" <td>P d\\nadj</td>\n",
|
109 |
+
" <td>SNP(2f</td>\n",
|
110 |
+
" <td>Pvalue</td>\n",
|
111 |
+
" <td>P g\\nadj</td>\n",
|
112 |
" </tr>\n",
|
113 |
" <tr>\n",
|
114 |
" <th>1</th>\n",
|
115 |
+
" <td>Novel loci reported in this study</td>\n",
|
116 |
+
" <td>Novel loci reported in this study</td>\n",
|
117 |
+
" <td>Novel loci reported in this study</td>\n",
|
118 |
+
" <td>None</td>\n",
|
119 |
+
" <td>None</td>\n",
|
120 |
+
" <td>None</td>\n",
|
121 |
+
" <td>None</td>\n",
|
122 |
+
" <td>None</td>\n",
|
123 |
+
" <td>None</td>\n",
|
124 |
+
" <td>None</td>\n",
|
125 |
+
" <td>None</td>\n",
|
126 |
+
" <td>None</td>\n",
|
127 |
+
" <td>None</td>\n",
|
128 |
" </tr>\n",
|
129 |
" <tr>\n",
|
130 |
" <th>2</th>\n",
|
131 |
+
" <td>rs4457053</td>\n",
|
132 |
+
" <td>5</td>\n",
|
133 |
+
" <td>76,460,705</td>\n",
|
134 |
+
" <td>ZBED3</td>\n",
|
135 |
+
" <td>G</td>\n",
|
136 |
+
" <td>PDE8B(NM 003719)</td>\n",
|
137 |
+
" <td>Adipose</td>\n",
|
138 |
+
" <td>0.302 (0.070)</td>\n",
|
139 |
+
" <td>2.8 X 10-5</td>\n",
|
140 |
+
" <td>0.80</td>\n",
|
141 |
+
" <td>rs6864250 (0.18)</td>\n",
|
142 |
+
" <td>3.1 X 10-17</td>\n",
|
143 |
+
" <td>5.8 X 10-13</td>\n",
|
144 |
" </tr>\n",
|
145 |
" <tr>\n",
|
146 |
" <th>3</th>\n",
|
147 |
" <td>None</td>\n",
|
|
|
|
|
148 |
" <td>None</td>\n",
|
149 |
" <td>None</td>\n",
|
150 |
+
" <td>None</td>\n",
|
151 |
+
" <td>None</td>\n",
|
152 |
+
" <td>ZBED3(NM 032367)</td>\n",
|
153 |
+
" <td>Adipose</td>\n",
|
154 |
+
" <td>0.429 (0.068)</td>\n",
|
155 |
+
" <td>1.0: x 10-9</td>\n",
|
156 |
+
" <td>0.011</td>\n",
|
157 |
+
" <td>rs4704389 (0.20)</td>\n",
|
158 |
+
" <td>3.9 x 10-16</td>\n",
|
159 |
+
" <td>6.0 X 10-9</td>\n",
|
160 |
" </tr>\n",
|
161 |
" <tr>\n",
|
162 |
" <th>4</th>\n",
|
163 |
+
" <td>rs972283</td>\n",
|
164 |
+
" <td>7</td>\n",
|
165 |
+
" <td>130,117,394</td>\n",
|
166 |
+
" <td>KLF14</td>\n",
|
167 |
+
" <td>G</td>\n",
|
168 |
+
" <td>KLF14(NM_138693)</td>\n",
|
169 |
+
" <td>Adipose</td>\n",
|
170 |
+
" <td>-0.387 (0.058)</td>\n",
|
171 |
+
" <td>8.1 X 10-11</td>\n",
|
172 |
+
" <td>0.058</td>\n",
|
173 |
+
" <td>rs738134 (0.30)</td>\n",
|
174 |
+
" <td>2.2 X 10-12</td>\n",
|
175 |
+
" <td>0.0014</td>\n",
|
176 |
" </tr>\n",
|
177 |
" <tr>\n",
|
178 |
" <th>5</th>\n",
|
179 |
+
" <td>rs896854</td>\n",
|
180 |
+
" <td>8</td>\n",
|
181 |
+
" <td>96,029,687</td>\n",
|
182 |
+
" <td>TP53INPI</td>\n",
|
183 |
+
" <td>T</td>\n",
|
184 |
+
" <td>CCNE2 (NM 057749)</td>\n",
|
185 |
+
" <td>Blood</td>\n",
|
186 |
+
" <td>0.225 (0.053)</td>\n",
|
187 |
+
" <td>3.8 X 10-5</td>\n",
|
188 |
+
" <td>0.78</td>\n",
|
189 |
+
" <td>rs4735339 (0.61)</td>\n",
|
190 |
+
" <td>5.8 X 10-7</td>\n",
|
191 |
+
" <td>0.0051</td>\n",
|
192 |
" </tr>\n",
|
193 |
" <tr>\n",
|
194 |
" <th>6</th>\n",
|
195 |
+
" <td>rs1552224</td>\n",
|
196 |
+
" <td>11</td>\n",
|
197 |
+
" <td>72,110,746</td>\n",
|
198 |
+
" <td>CENTD2</td>\n",
|
199 |
+
" <td>A</td>\n",
|
200 |
+
" <td>STARDIO(NM 006645)</td>\n",
|
201 |
+
" <td>Blood</td>\n",
|
202 |
+
" <td>0.337 (0.066)</td>\n",
|
203 |
+
" <td>8.6 x 10-7</td>\n",
|
204 |
+
" <td>0.026</td>\n",
|
205 |
+
" <td>rs519790 (0.04)</td>\n",
|
206 |
+
" <td>2.7x 10-24</td>\n",
|
207 |
+
" <td>1.6 X 10-1</td>\n",
|
208 |
" </tr>\n",
|
209 |
" <tr>\n",
|
210 |
" <th>7</th>\n",
|
211 |
+
" <td>rs7957197</td>\n",
|
212 |
+
" <td>12</td>\n",
|
213 |
+
" <td>119,945,069</td>\n",
|
214 |
+
" <td>HNFIA</td>\n",
|
215 |
+
" <td>T</td>\n",
|
216 |
+
" <td>ACADS (NM 000017)</td>\n",
|
217 |
+
" <td>Adipose</td>\n",
|
218 |
+
" <td>0.248 (0.067)</td>\n",
|
219 |
+
" <td>3.7 x 10-4</td>\n",
|
220 |
+
" <td>0.29</td>\n",
|
221 |
+
" <td>rs9204\\n(0.02)</td>\n",
|
222 |
+
" <td>1.3x 10-53</td>\n",
|
223 |
+
" <td>5.9 X 10-50</td>\n",
|
224 |
" </tr>\n",
|
225 |
" <tr>\n",
|
226 |
" <th>8</th>\n",
|
227 |
" <td>None</td>\n",
|
|
|
228 |
" <td>None</td>\n",
|
|
|
229 |
" <td>None</td>\n",
|
230 |
+
" <td>None</td>\n",
|
231 |
+
" <td>None</td>\n",
|
232 |
+
" <td>PSMD9 (NM 002813)</td>\n",
|
233 |
+
" <td>Blood</td>\n",
|
234 |
+
" <td>0.240 (0.065)</td>\n",
|
235 |
+
" <td>3.9 X 10-4</td>\n",
|
236 |
+
" <td>0.0088</td>\n",
|
237 |
+
" <td>rs3741593\\n(0.00)</td>\n",
|
238 |
+
" <td>8.3x 10-8</td>\n",
|
239 |
+
" <td>1.7 X 10-6</td>\n",
|
240 |
" </tr>\n",
|
241 |
" <tr>\n",
|
242 |
" <th>9</th>\n",
|
243 |
" <td>None</td>\n",
|
244 |
" <td>None</td>\n",
|
245 |
" <td>None</td>\n",
|
|
|
246 |
" <td>None</td>\n",
|
247 |
+
" <td>None</td>\n",
|
248 |
+
" <td>OASL (NM_003733)</td>\n",
|
249 |
+
" <td>Adipose</td>\n",
|
250 |
+
" <td>0.318 (0.068)</td>\n",
|
251 |
+
" <td>6.4 X 10-6</td>\n",
|
252 |
+
" <td>0.13</td>\n",
|
253 |
+
" <td>rs2259883\\n(0.19)</td>\n",
|
254 |
+
" <td>1.1x1 10-7</td>\n",
|
255 |
+
" <td>0.0018</td>\n",
|
256 |
" </tr>\n",
|
257 |
" </tbody>\n",
|
258 |
"</table>\n",
|
259 |
"</div>"
|
260 |
],
|
261 |
"text/plain": [
|
262 |
+
" 0 1 2 3 4 5 6 7 8 9 10 11 12\n",
|
263 |
+
"0 SNP Chr. Position\\nB36\\n(bp) Nearby\\ngenea Risk\\nalleleb Gene (transcript) Tissue Effect (s.e.m.)C P value P d\\nadj SNP(2f Pvalue P g\\nadj\n",
|
264 |
+
"1 Novel loci reported in this study Novel loci reported in this study Novel loci reported in this study None None None None None None None None None None\n",
|
265 |
+
"2 rs4457053 5 76,460,705 ZBED3 G PDE8B(NM 003719) Adipose 0.302 (0.070) 2.8 X 10-5 0.80 rs6864250 (0.18) 3.1 X 10-17 5.8 X 10-13\n",
|
266 |
+
"3 None None None None None ZBED3(NM 032367) Adipose 0.429 (0.068) 1.0: x 10-9 0.011 rs4704389 (0.20) 3.9 x 10-16 6.0 X 10-9\n",
|
267 |
+
"4 rs972283 7 130,117,394 KLF14 G KLF14(NM_138693) Adipose -0.387 (0.058) 8.1 X 10-11 0.058 rs738134 (0.30) 2.2 X 10-12 0.0014\n",
|
268 |
+
"5 rs896854 8 96,029,687 TP53INPI T CCNE2 (NM 057749) Blood 0.225 (0.053) 3.8 X 10-5 0.78 rs4735339 (0.61) 5.8 X 10-7 0.0051\n",
|
269 |
+
"6 rs1552224 11 72,110,746 CENTD2 A STARDIO(NM 006645) Blood 0.337 (0.066) 8.6 x 10-7 0.026 rs519790 (0.04) 2.7x 10-24 1.6 X 10-1\n",
|
270 |
+
"7 rs7957197 12 119,945,069 HNFIA T ACADS (NM 000017) Adipose 0.248 (0.067) 3.7 x 10-4 0.29 rs9204\\n(0.02) 1.3x 10-53 5.9 X 10-50\n",
|
271 |
+
"8 None None None None None PSMD9 (NM 002813) Blood 0.240 (0.065) 3.9 X 10-4 0.0088 rs3741593\\n(0.00) 8.3x 10-8 1.7 X 10-6\n",
|
272 |
+
"9 None None None None None OASL (NM_003733) Adipose 0.318 (0.068) 6.4 X 10-6 0.13 rs2259883\\n(0.19) 1.1x1 10-7 0.0018"
|
273 |
]
|
274 |
},
|
275 |
+
"execution_count": 25,
|
276 |
"metadata": {},
|
277 |
"output_type": "execute_result"
|
278 |
}
|
|
|
292 |
},
|
293 |
{
|
294 |
"cell_type": "code",
|
295 |
+
"execution_count": 8,
|
296 |
"metadata": {},
|
297 |
"outputs": [
|
298 |
{
|
299 |
"name": "stdout",
|
300 |
"output_type": "stream",
|
301 |
"text": [
|
302 |
+
" 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18\n",
|
303 |
+
"0 SNP rs584438 IGFBP4 rs6662509 H6PD rs2362965 R... Gene C T T C A C T A T T A A Effect Other allele allele A C A T G T C G C... OR 0.98 1.00 0.95 1.03 1.09 1.08 1.27 1.09 1... BMI tails 0.52 0.95 0.02 0.33 0.0001 0.0001... P 1.02 1.11 0.97 1.06 1.11 1.125 5.41 X 10-5 ... OR 0.64 0.07 0.25 0.11 0.0006 1.125 5.41 X 10... Obesity class III P 1.01 1.01 0.98 1.01 1.10... OR 0.47 0.83 0.20 0.58 1.10 1.06 X 10-8 1.125... Obesity class II P 1.00 0.99 0.99 1.00 1.04 ... OR 0.75 1.00 0.34 0.99 0.37 0.99 0.82 1.01 9.... Obesity class I P 0.59 0.005 0.35 -0.006 0.21... Overweight OR 0.22 0.27 0.05 0.33 8.80 X 10... Overweight class P 1.18 1.23 1.12 1.15 1.00 ... BMI (continuous)a Effect P 5.22 X 10-12 3.19 ... OR 0.025 9.43 X 10-11 0.031 7.76 X 10-12 0.017... Height tails 0.025 9.43 X 10-11 0.031 7.76 X... Height P Height (continuous)a Effect P\n"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
304 |
]
|
305 |
}
|
306 |
],
|
|
|
327 |
"print(dfc)"
|
328 |
]
|
329 |
},
|
330 |
+
{
|
331 |
+
"cell_type": "code",
|
332 |
+
"execution_count": 26,
|
333 |
+
"metadata": {},
|
334 |
+
"outputs": [
|
335 |
+
{
|
336 |
+
"data": {
|
337 |
+
"text/html": [
|
338 |
+
"<div>\n",
|
339 |
+
"<style scoped>\n",
|
340 |
+
" .dataframe tbody tr th:only-of-type {\n",
|
341 |
+
" vertical-align: middle;\n",
|
342 |
+
" }\n",
|
343 |
+
"\n",
|
344 |
+
" .dataframe tbody tr th {\n",
|
345 |
+
" vertical-align: top;\n",
|
346 |
+
" }\n",
|
347 |
+
"\n",
|
348 |
+
" .dataframe thead th {\n",
|
349 |
+
" text-align: right;\n",
|
350 |
+
" }\n",
|
351 |
+
"</style>\n",
|
352 |
+
"<table border=\"1\" class=\"dataframe\">\n",
|
353 |
+
" <thead>\n",
|
354 |
+
" <tr style=\"text-align: right;\">\n",
|
355 |
+
" <th></th>\n",
|
356 |
+
" <th>0</th>\n",
|
357 |
+
" <th>1</th>\n",
|
358 |
+
" <th>2</th>\n",
|
359 |
+
" <th>3</th>\n",
|
360 |
+
" <th>4</th>\n",
|
361 |
+
" <th>5</th>\n",
|
362 |
+
" <th>6</th>\n",
|
363 |
+
" <th>7</th>\n",
|
364 |
+
" <th>8</th>\n",
|
365 |
+
" <th>9</th>\n",
|
366 |
+
" <th>10</th>\n",
|
367 |
+
" <th>11</th>\n",
|
368 |
+
" <th>12</th>\n",
|
369 |
+
" </tr>\n",
|
370 |
+
" </thead>\n",
|
371 |
+
" <tbody>\n",
|
372 |
+
" <tr>\n",
|
373 |
+
" <th>0</th>\n",
|
374 |
+
" <td>SNP</td>\n",
|
375 |
+
" <td>Chr.</td>\n",
|
376 |
+
" <td>Position\\nB36\\n(bp)</td>\n",
|
377 |
+
" <td>Nearby\\ngenea</td>\n",
|
378 |
+
" <td>Risk\\nalleleb</td>\n",
|
379 |
+
" <td>Gene (transcript)</td>\n",
|
380 |
+
" <td>Tissue</td>\n",
|
381 |
+
" <td>Effect (s.e.m.)C</td>\n",
|
382 |
+
" <td>P value</td>\n",
|
383 |
+
" <td>P d\\nadj</td>\n",
|
384 |
+
" <td>SNP(2f</td>\n",
|
385 |
+
" <td>Pvalue</td>\n",
|
386 |
+
" <td>P g\\nadj</td>\n",
|
387 |
+
" </tr>\n",
|
388 |
+
" <tr>\n",
|
389 |
+
" <th>1</th>\n",
|
390 |
+
" <td>Novel loci reported in this study</td>\n",
|
391 |
+
" <td>Novel loci reported in this study</td>\n",
|
392 |
+
" <td>Novel loci reported in this study</td>\n",
|
393 |
+
" <td></td>\n",
|
394 |
+
" <td></td>\n",
|
395 |
+
" <td></td>\n",
|
396 |
+
" <td></td>\n",
|
397 |
+
" <td></td>\n",
|
398 |
+
" <td></td>\n",
|
399 |
+
" <td></td>\n",
|
400 |
+
" <td></td>\n",
|
401 |
+
" <td></td>\n",
|
402 |
+
" <td></td>\n",
|
403 |
+
" </tr>\n",
|
404 |
+
" <tr>\n",
|
405 |
+
" <th>2</th>\n",
|
406 |
+
" <td>rs4457053</td>\n",
|
407 |
+
" <td>5</td>\n",
|
408 |
+
" <td>76,460,705</td>\n",
|
409 |
+
" <td>ZBED3</td>\n",
|
410 |
+
" <td>G</td>\n",
|
411 |
+
" <td>PDE8B(NM 003719)</td>\n",
|
412 |
+
" <td>Adipose</td>\n",
|
413 |
+
" <td>0.302 (0.070)</td>\n",
|
414 |
+
" <td>2.8 X 10-5</td>\n",
|
415 |
+
" <td>0.80</td>\n",
|
416 |
+
" <td>rs6864250 (0.18)</td>\n",
|
417 |
+
" <td>3.1 X 10-17</td>\n",
|
418 |
+
" <td>5.8 X 10-13</td>\n",
|
419 |
+
" </tr>\n",
|
420 |
+
" <tr>\n",
|
421 |
+
" <th>3</th>\n",
|
422 |
+
" <td></td>\n",
|
423 |
+
" <td></td>\n",
|
424 |
+
" <td></td>\n",
|
425 |
+
" <td></td>\n",
|
426 |
+
" <td></td>\n",
|
427 |
+
" <td>ZBED3(NM 032367)</td>\n",
|
428 |
+
" <td>Adipose</td>\n",
|
429 |
+
" <td>0.429 (0.068)</td>\n",
|
430 |
+
" <td>1.0: x 10-9</td>\n",
|
431 |
+
" <td>0.011</td>\n",
|
432 |
+
" <td>rs4704389 (0.20)</td>\n",
|
433 |
+
" <td>3.9 x 10-16</td>\n",
|
434 |
+
" <td>6.0 X 10-9</td>\n",
|
435 |
+
" </tr>\n",
|
436 |
+
" <tr>\n",
|
437 |
+
" <th>4</th>\n",
|
438 |
+
" <td>rs972283</td>\n",
|
439 |
+
" <td>7</td>\n",
|
440 |
+
" <td>130,117,394</td>\n",
|
441 |
+
" <td>KLF14</td>\n",
|
442 |
+
" <td>G</td>\n",
|
443 |
+
" <td>KLF14(NM_138693)</td>\n",
|
444 |
+
" <td>Adipose</td>\n",
|
445 |
+
" <td>-0.387 (0.058)</td>\n",
|
446 |
+
" <td>8.1 X 10-11</td>\n",
|
447 |
+
" <td>0.058</td>\n",
|
448 |
+
" <td>rs738134 (0.30)</td>\n",
|
449 |
+
" <td>2.2 X 10-12</td>\n",
|
450 |
+
" <td>0.0014</td>\n",
|
451 |
+
" </tr>\n",
|
452 |
+
" <tr>\n",
|
453 |
+
" <th>5</th>\n",
|
454 |
+
" <td>rs896854</td>\n",
|
455 |
+
" <td>8</td>\n",
|
456 |
+
" <td>96,029,687</td>\n",
|
457 |
+
" <td>TP53INPI</td>\n",
|
458 |
+
" <td>T</td>\n",
|
459 |
+
" <td>CCNE2 (NM 057749)</td>\n",
|
460 |
+
" <td>Blood</td>\n",
|
461 |
+
" <td>0.225 (0.053)</td>\n",
|
462 |
+
" <td>3.8 X 10-5</td>\n",
|
463 |
+
" <td>0.78</td>\n",
|
464 |
+
" <td>rs4735339 (0.61)</td>\n",
|
465 |
+
" <td>5.8 X 10-7</td>\n",
|
466 |
+
" <td>0.0051</td>\n",
|
467 |
+
" </tr>\n",
|
468 |
+
" <tr>\n",
|
469 |
+
" <th>6</th>\n",
|
470 |
+
" <td>rs1552224</td>\n",
|
471 |
+
" <td>11</td>\n",
|
472 |
+
" <td>72,110,746</td>\n",
|
473 |
+
" <td>CENTD2</td>\n",
|
474 |
+
" <td>A</td>\n",
|
475 |
+
" <td>STARDIO(NM 006645)</td>\n",
|
476 |
+
" <td>Blood</td>\n",
|
477 |
+
" <td>0.337 (0.066)</td>\n",
|
478 |
+
" <td>8.6 x 10-7</td>\n",
|
479 |
+
" <td>0.026</td>\n",
|
480 |
+
" <td>rs519790 (0.04)</td>\n",
|
481 |
+
" <td>2.7x 10-24</td>\n",
|
482 |
+
" <td>1.6 X 10-1</td>\n",
|
483 |
+
" </tr>\n",
|
484 |
+
" <tr>\n",
|
485 |
+
" <th>7</th>\n",
|
486 |
+
" <td>rs7957197</td>\n",
|
487 |
+
" <td>12</td>\n",
|
488 |
+
" <td>119,945,069</td>\n",
|
489 |
+
" <td>HNFIA</td>\n",
|
490 |
+
" <td>T</td>\n",
|
491 |
+
" <td>ACADS (NM 000017)</td>\n",
|
492 |
+
" <td>Adipose</td>\n",
|
493 |
+
" <td>0.248 (0.067)</td>\n",
|
494 |
+
" <td>3.7 x 10-4</td>\n",
|
495 |
+
" <td>0.29</td>\n",
|
496 |
+
" <td>rs9204\\n(0.02)</td>\n",
|
497 |
+
" <td>1.3x 10-53</td>\n",
|
498 |
+
" <td>5.9 X 10-50</td>\n",
|
499 |
+
" </tr>\n",
|
500 |
+
" <tr>\n",
|
501 |
+
" <th>8</th>\n",
|
502 |
+
" <td></td>\n",
|
503 |
+
" <td></td>\n",
|
504 |
+
" <td></td>\n",
|
505 |
+
" <td></td>\n",
|
506 |
+
" <td></td>\n",
|
507 |
+
" <td>PSMD9 (NM 002813)</td>\n",
|
508 |
+
" <td>Blood</td>\n",
|
509 |
+
" <td>0.240 (0.065)</td>\n",
|
510 |
+
" <td>3.9 X 10-4</td>\n",
|
511 |
+
" <td>0.0088</td>\n",
|
512 |
+
" <td>rs3741593\\n(0.00)</td>\n",
|
513 |
+
" <td>8.3x 10-8</td>\n",
|
514 |
+
" <td>1.7 X 10-6</td>\n",
|
515 |
+
" </tr>\n",
|
516 |
+
" <tr>\n",
|
517 |
+
" <th>9</th>\n",
|
518 |
+
" <td></td>\n",
|
519 |
+
" <td></td>\n",
|
520 |
+
" <td></td>\n",
|
521 |
+
" <td></td>\n",
|
522 |
+
" <td></td>\n",
|
523 |
+
" <td>OASL (NM_003733)</td>\n",
|
524 |
+
" <td>Adipose</td>\n",
|
525 |
+
" <td>0.318 (0.068)</td>\n",
|
526 |
+
" <td>6.4 X 10-6</td>\n",
|
527 |
+
" <td>0.13</td>\n",
|
528 |
+
" <td>rs2259883\\n(0.19)</td>\n",
|
529 |
+
" <td>1.1x1 10-7</td>\n",
|
530 |
+
" <td>0.0018</td>\n",
|
531 |
+
" </tr>\n",
|
532 |
+
" <tr>\n",
|
533 |
+
" <th>10</th>\n",
|
534 |
+
" <td></td>\n",
|
535 |
+
" <td></td>\n",
|
536 |
+
" <td></td>\n",
|
537 |
+
" <td></td>\n",
|
538 |
+
" <td></td>\n",
|
539 |
+
" <td>OASL (NM_ _003733)</td>\n",
|
540 |
+
" <td>Blood</td>\n",
|
541 |
+
" <td>0.319 (0.064)</td>\n",
|
542 |
+
" <td>1.3 X 10-6</td>\n",
|
543 |
+
" <td>0.37</td>\n",
|
544 |
+
" <td>rs4556628\\n(0.21)</td>\n",
|
545 |
+
" <td>4.4> X 10-22</td>\n",
|
546 |
+
" <td>1.4 X 10-16</td>\n",
|
547 |
+
" </tr>\n",
|
548 |
+
" <tr>\n",
|
549 |
+
" <th>11</th>\n",
|
550 |
+
" <td></td>\n",
|
551 |
+
" <td></td>\n",
|
552 |
+
" <td></td>\n",
|
553 |
+
" <td></td>\n",
|
554 |
+
" <td></td>\n",
|
555 |
+
" <td>COQ5(NM_032314)</td>\n",
|
556 |
+
" <td>Blood</td>\n",
|
557 |
+
" <td>0.248 (0.065)</td>\n",
|
558 |
+
" <td>2.1 x1 10-4</td>\n",
|
559 |
+
" <td>0.92</td>\n",
|
560 |
+
" <td>rs10774561\\n(0.02)</td>\n",
|
561 |
+
" <td>8.7x 10-39</td>\n",
|
562 |
+
" <td>4.9 X 10 -</td>\n",
|
563 |
+
" </tr>\n",
|
564 |
+
" <tr>\n",
|
565 |
+
" <th>12</th>\n",
|
566 |
+
" <td></td>\n",
|
567 |
+
" <td></td>\n",
|
568 |
+
" <td></td>\n",
|
569 |
+
" <td></td>\n",
|
570 |
+
" <td></td>\n",
|
571 |
+
" <td>UNCI19B(NM 032661)</td>\n",
|
572 |
+
" <td>Blood</td>\n",
|
573 |
+
" <td>0.254 (0.064)</td>\n",
|
574 |
+
" <td>1.4x 10-4</td>\n",
|
575 |
+
" <td>0.048</td>\n",
|
576 |
+
" <td>rs11065202\\n(0.09)</td>\n",
|
577 |
+
" <td>7.8 x 10-12</td>\n",
|
578 |
+
" <td>2.3 X 10-9</td>\n",
|
579 |
+
" </tr>\n",
|
580 |
+
" <tr>\n",
|
581 |
+
" <th>13</th>\n",
|
582 |
+
" <td></td>\n",
|
583 |
+
" <td></td>\n",
|
584 |
+
" <td></td>\n",
|
585 |
+
" <td></td>\n",
|
586 |
+
" <td></td>\n",
|
587 |
+
" <td>CAMKK2 (NM 17 72215)</td>\n",
|
588 |
+
" <td>Adipose</td>\n",
|
589 |
+
" <td>0.497 (0.068)</td>\n",
|
590 |
+
" <td>1.2 x 10-12</td>\n",
|
591 |
+
" <td>0.18</td>\n",
|
592 |
+
" <td>rs11065504\\n(0.08)</td>\n",
|
593 |
+
" <td>2.7x 10-117</td>\n",
|
594 |
+
" <td>3.8 X 10-98</td>\n",
|
595 |
+
" </tr>\n",
|
596 |
+
" <tr>\n",
|
597 |
+
" <th>14</th>\n",
|
598 |
+
" <td></td>\n",
|
599 |
+
" <td></td>\n",
|
600 |
+
" <td></td>\n",
|
601 |
+
" <td></td>\n",
|
602 |
+
" <td></td>\n",
|
603 |
+
" <td>CAMKK2 (NM_ 1 172215)</td>\n",
|
604 |
+
" <td>Blood</td>\n",
|
605 |
+
" <td>0.360 (0.063)</td>\n",
|
606 |
+
" <td>3.4 X 10-8</td>\n",
|
607 |
+
" <td>0.68</td>\n",
|
608 |
+
" <td>rs11065504\\n(0.08)</td>\n",
|
609 |
+
" <td>7.0 X 10-105</td>\n",
|
610 |
+
" <td>5.7 X 10-94</td>\n",
|
611 |
+
" </tr>\n",
|
612 |
+
" <tr>\n",
|
613 |
+
" <th>15</th>\n",
|
614 |
+
" <td></td>\n",
|
615 |
+
" <td></td>\n",
|
616 |
+
" <td></td>\n",
|
617 |
+
" <td></td>\n",
|
618 |
+
" <td></td>\n",
|
619 |
+
" <td>P2RX4(NM 175568)</td>\n",
|
620 |
+
" <td>Blood</td>\n",
|
621 |
+
" <td>0.312 (0.065)</td>\n",
|
622 |
+
" <td>3.4 x 10-6</td>\n",
|
623 |
+
" <td>2.0 x 10-6</td>\n",
|
624 |
+
" <td>rs25644\\n(0.03)</td>\n",
|
625 |
+
" <td>3.4 x 10-17</td>\n",
|
626 |
+
" <td>1.9 x 10-17</td>\n",
|
627 |
+
" </tr>\n",
|
628 |
+
" <tr>\n",
|
629 |
+
" <th>16</th>\n",
|
630 |
+
" <td>rs8042680</td>\n",
|
631 |
+
" <td>15</td>\n",
|
632 |
+
" <td>89,322,341</td>\n",
|
633 |
+
" <td>PRCI</td>\n",
|
634 |
+
" <td>A</td>\n",
|
635 |
+
" <td>VPS33B (NM_018668)</td>\n",
|
636 |
+
" <td>Blood</td>\n",
|
637 |
+
" <td>0.371 (0.057)</td>\n",
|
638 |
+
" <td>2.9 x 10-10</td>\n",
|
639 |
+
" <td>0.50</td>\n",
|
640 |
+
" <td>rs12595616\\n(0.57)</td>\n",
|
641 |
+
" <td>2.3 x 10-21</td>\n",
|
642 |
+
" <td>4.5 X 10-1</td>\n",
|
643 |
+
" </tr>\n",
|
644 |
+
" <tr>\n",
|
645 |
+
" <th>17</th>\n",
|
646 |
+
" <td>Previously reported loci</td>\n",
|
647 |
+
" <td>Previously reported loci</td>\n",
|
648 |
+
" <td>Previously reported loci</td>\n",
|
649 |
+
" <td></td>\n",
|
650 |
+
" <td></td>\n",
|
651 |
+
" <td></td>\n",
|
652 |
+
" <td></td>\n",
|
653 |
+
" <td></td>\n",
|
654 |
+
" <td></td>\n",
|
655 |
+
" <td></td>\n",
|
656 |
+
" <td></td>\n",
|
657 |
+
" <td></td>\n",
|
658 |
+
" <td></td>\n",
|
659 |
+
" </tr>\n",
|
660 |
+
" <tr>\n",
|
661 |
+
" <th>18</th>\n",
|
662 |
+
" <td>rs7578326</td>\n",
|
663 |
+
" <td>2</td>\n",
|
664 |
+
" <td>226,728,897</td>\n",
|
665 |
+
" <td>IRSI</td>\n",
|
666 |
+
" <td>A</td>\n",
|
667 |
+
" <td>IRS/(Contig50189RC)</td>\n",
|
668 |
+
" <td>Adipose</td>\n",
|
669 |
+
" <td>-0.251 (0.059)</td>\n",
|
670 |
+
" <td>3.7 x 10-5</td>\n",
|
671 |
+
" <td>0.89</td>\n",
|
672 |
+
" <td>rs2943653 (0.93)</td>\n",
|
673 |
+
" <td>3.4 X 10-5</td>\n",
|
674 |
+
" <td>0.69</td>\n",
|
675 |
+
" </tr>\n",
|
676 |
+
" <tr>\n",
|
677 |
+
" <th>19</th>\n",
|
678 |
+
" <td></td>\n",
|
679 |
+
" <td></td>\n",
|
680 |
+
" <td></td>\n",
|
681 |
+
" <td></td>\n",
|
682 |
+
" <td></td>\n",
|
683 |
+
" <td>IRSI(NM 005544)</td>\n",
|
684 |
+
" <td>Adipose</td>\n",
|
685 |
+
" <td>0.331 (0.059)</td>\n",
|
686 |
+
" <td>5.7 X 10-8</td>\n",
|
687 |
+
" <td>0.58</td>\n",
|
688 |
+
" <td>rs2176040 (0.74)</td>\n",
|
689 |
+
" <td>7.8 X 10-10</td>\n",
|
690 |
+
" <td>0.0042</td>\n",
|
691 |
+
" </tr>\n",
|
692 |
+
" <tr>\n",
|
693 |
+
" <th>20</th>\n",
|
694 |
+
" <td>rs13081389</td>\n",
|
695 |
+
" <td>3</td>\n",
|
696 |
+
" <td>12,264,800</td>\n",
|
697 |
+
" <td>PPARG</td>\n",
|
698 |
+
" <td>A</td>\n",
|
699 |
+
" <td>IQSECI (NM 014869)</td>\n",
|
700 |
+
" <td>Adipose</td>\n",
|
701 |
+
" <td>-0.630(0.131)</td>\n",
|
702 |
+
" <td>2.9 x 10-6</td>\n",
|
703 |
+
" <td>1.4> x 10-4</td>\n",
|
704 |
+
" <td>rs9211\\n(0.01)</td>\n",
|
705 |
+
" <td>1.1x 10-96</td>\n",
|
706 |
+
" <td>7.4 X 10-94</td>\n",
|
707 |
+
" </tr>\n",
|
708 |
+
" <tr>\n",
|
709 |
+
" <th>21</th>\n",
|
710 |
+
" <td>rs6795735</td>\n",
|
711 |
+
" <td>3</td>\n",
|
712 |
+
" <td>64,680,405</td>\n",
|
713 |
+
" <td>ADAMTS9</td>\n",
|
714 |
+
" <td>C</td>\n",
|
715 |
+
" <td>BC040632(AK022320)</td>\n",
|
716 |
+
" <td>Adipose</td>\n",
|
717 |
+
" <td>0.229 (0.056)</td>\n",
|
718 |
+
" <td>7.6 X 10-5</td>\n",
|
719 |
+
" <td>0.28</td>\n",
|
720 |
+
" <td>rs4521216\\n(0.02)</td>\n",
|
721 |
+
" <td>3.0 X 10-13</td>\n",
|
722 |
+
" <td>8.7 x 10-10</td>\n",
|
723 |
+
" </tr>\n",
|
724 |
+
" </tbody>\n",
|
725 |
+
"</table>\n",
|
726 |
+
"</div>"
|
727 |
+
],
|
728 |
+
"text/plain": [
|
729 |
+
" 0 1 2 3 4 5 6 7 8 9 10 11 12\n",
|
730 |
+
"0 SNP Chr. Position\\nB36\\n(bp) Nearby\\ngenea Risk\\nalleleb Gene (transcript) Tissue Effect (s.e.m.)C P value P d\\nadj SNP(2f Pvalue P g\\nadj\n",
|
731 |
+
"1 Novel loci reported in this study Novel loci reported in this study Novel loci reported in this study \n",
|
732 |
+
"2 rs4457053 5 76,460,705 ZBED3 G PDE8B(NM 003719) Adipose 0.302 (0.070) 2.8 X 10-5 0.80 rs6864250 (0.18) 3.1 X 10-17 5.8 X 10-13\n",
|
733 |
+
"3 ZBED3(NM 032367) Adipose 0.429 (0.068) 1.0: x 10-9 0.011 rs4704389 (0.20) 3.9 x 10-16 6.0 X 10-9\n",
|
734 |
+
"4 rs972283 7 130,117,394 KLF14 G KLF14(NM_138693) Adipose -0.387 (0.058) 8.1 X 10-11 0.058 rs738134 (0.30) 2.2 X 10-12 0.0014\n",
|
735 |
+
"5 rs896854 8 96,029,687 TP53INPI T CCNE2 (NM 057749) Blood 0.225 (0.053) 3.8 X 10-5 0.78 rs4735339 (0.61) 5.8 X 10-7 0.0051\n",
|
736 |
+
"6 rs1552224 11 72,110,746 CENTD2 A STARDIO(NM 006645) Blood 0.337 (0.066) 8.6 x 10-7 0.026 rs519790 (0.04) 2.7x 10-24 1.6 X 10-1\n",
|
737 |
+
"7 rs7957197 12 119,945,069 HNFIA T ACADS (NM 000017) Adipose 0.248 (0.067) 3.7 x 10-4 0.29 rs9204\\n(0.02) 1.3x 10-53 5.9 X 10-50\n",
|
738 |
+
"8 PSMD9 (NM 002813) Blood 0.240 (0.065) 3.9 X 10-4 0.0088 rs3741593\\n(0.00) 8.3x 10-8 1.7 X 10-6\n",
|
739 |
+
"9 OASL (NM_003733) Adipose 0.318 (0.068) 6.4 X 10-6 0.13 rs2259883\\n(0.19) 1.1x1 10-7 0.0018\n",
|
740 |
+
"10 OASL (NM_ _003733) Blood 0.319 (0.064) 1.3 X 10-6 0.37 rs4556628\\n(0.21) 4.4> X 10-22 1.4 X 10-16\n",
|
741 |
+
"11 COQ5(NM_032314) Blood 0.248 (0.065) 2.1 x1 10-4 0.92 rs10774561\\n(0.02) 8.7x 10-39 4.9 X 10 -\n",
|
742 |
+
"12 UNCI19B(NM 032661) Blood 0.254 (0.064) 1.4x 10-4 0.048 rs11065202\\n(0.09) 7.8 x 10-12 2.3 X 10-9\n",
|
743 |
+
"13 CAMKK2 (NM 17 72215) Adipose 0.497 (0.068) 1.2 x 10-12 0.18 rs11065504\\n(0.08) 2.7x 10-117 3.8 X 10-98\n",
|
744 |
+
"14 CAMKK2 (NM_ 1 172215) Blood 0.360 (0.063) 3.4 X 10-8 0.68 rs11065504\\n(0.08) 7.0 X 10-105 5.7 X 10-94\n",
|
745 |
+
"15 P2RX4(NM 175568) Blood 0.312 (0.065) 3.4 x 10-6 2.0 x 10-6 rs25644\\n(0.03) 3.4 x 10-17 1.9 x 10-17\n",
|
746 |
+
"16 rs8042680 15 89,322,341 PRCI A VPS33B (NM_018668) Blood 0.371 (0.057) 2.9 x 10-10 0.50 rs12595616\\n(0.57) 2.3 x 10-21 4.5 X 10-1\n",
|
747 |
+
"17 Previously reported loci Previously reported loci Previously reported loci \n",
|
748 |
+
"18 rs7578326 2 226,728,897 IRSI A IRS/(Contig50189RC) Adipose -0.251 (0.059) 3.7 x 10-5 0.89 rs2943653 (0.93) 3.4 X 10-5 0.69\n",
|
749 |
+
"19 IRSI(NM 005544) Adipose 0.331 (0.059) 5.7 X 10-8 0.58 rs2176040 (0.74) 7.8 X 10-10 0.0042\n",
|
750 |
+
"20 rs13081389 3 12,264,800 PPARG A IQSECI (NM 014869) Adipose -0.630(0.131) 2.9 x 10-6 1.4> x 10-4 rs9211\\n(0.01) 1.1x 10-96 7.4 X 10-94\n",
|
751 |
+
"21 rs6795735 3 64,680,405 ADAMTS9 C BC040632(AK022320) Adipose 0.229 (0.056) 7.6 X 10-5 0.28 rs4521216\\n(0.02) 3.0 X 10-13 8.7 x 10-10"
|
752 |
+
]
|
753 |
+
},
|
754 |
+
"execution_count": 26,
|
755 |
+
"metadata": {},
|
756 |
+
"output_type": "execute_result"
|
757 |
+
}
|
758 |
+
],
|
759 |
+
"source": [
|
760 |
+
"dfc = df.fillna('')\n",
|
761 |
+
"dfc"
|
762 |
+
]
|
763 |
+
},
|
764 |
{
|
765 |
"cell_type": "markdown",
|
766 |
"metadata": {},
|
|
|
770 |
},
|
771 |
{
|
772 |
"cell_type": "code",
|
773 |
+
"execution_count": 19,
|
774 |
"metadata": {},
|
775 |
"outputs": [
|
776 |
{
|
777 |
"name": "stdout",
|
778 |
"output_type": "stream",
|
779 |
"text": [
|
780 |
+
"41 [('rs4607517', 'MODY2'), ('rs5400', 'T1101'), ('rs1799884', 'PNDM'), ('rs4607517', 'MODY'), ('rs5400', 'SNPS'), ('rs5394', 'SNPS'), ('rs2144908', 'MODY'), ('rs7020673', 'GLIS3'), ('rs5393', 'SLC2A2'), ('rs4684847', 'SNP'), ('rs5394', 'GLUT2'), ('rs7034200', 'T1D'), ('rs4607517', 'GCK'), ('rs3818247', 'SNPS'), ('rs2144908', 'CHI'), ('rs2144908', 'MODY1'), ('rs3818247', 'HNF4A'), ('rs6446482', 'DIDMOAD'), ('rs757210', 'MODY5'), ('rs1799884', 'MODY2'), ('rs757210', 'HNF1B'), ('rs5393', 'FBS'), ('rs757210', 'RCAD'), ('rs1799884', 'MODY'), ('rs757210', 'TS4430796'), ('rs5394', 'SLC2A2'), ('rs5404', 'T198T'), ('rs1799884', 'GCK'), ('rs4607517', 'PNDM'), ('rs3818247', 'MODY'), ('rs2144908', 'SNPS'), ('rs10010131', 'DIDMOAD'), ('rs5393', 'SNPS'), ('rs2144908', 'HNF4A'), ('rs5394', 'FBS'), ('rs3818247', 'CHI'), ('rs5393', 'GLUT2'), ('rs757210', 'SNP'), ('rs3818247', 'MODY1'), ('rs757210', 'TCF2'), ('rs1801282', 'PPARG')]\n"
|
781 |
]
|
782 |
}
|
783 |
],
|