Satoc commited on
Commit
e096153
·
1 Parent(s): 3a0dc06
app.py CHANGED
@@ -13,7 +13,7 @@ LLAMA3_8B = "Llama3-8b-8192"
13
  LLAMA3_70B = "Llama3-70b-8192"
14
  Mixtral = "mixtral-8x7b-32768"
15
 
16
- def custom_completion_to_prompt(completion:str) ->str:
17
  return completion_to_prompt(
18
  completion, system_prompt=(
19
  "You are a Q&A assistant. Your goal is to answer questions as "
@@ -22,34 +22,41 @@ def custom_completion_to_prompt(completion:str) ->str:
22
  )
23
 
24
  def getMutationEffect(cancer_name, gene_name):
25
- searchWords= "(" +str(cancer_name)+ ") AND " + "(" + str(gene_name) + ") AND(treatment)"
26
  studies = search(searchWords)
27
- df, abstracts= GetPubmedSummaryDf(studies)
28
- #Define LLM
 
29
  llm = Groq(
30
- model=LLAMA3_8B,
31
- temperature=0.01,
32
- context_window=4096,
33
- completion_to_prompt=custom_completion_to_prompt,
34
- messages_to_prompt=messages_to_prompt,)
35
- #set global service context
36
- #ctx = ServiceContext.from_defaults(llm=llm)
37
- #ctx = Settings(llm=llm)
38
  Settings.llm = llm
39
- #set_global_service_context(ctx)
40
  documents = [Document(text=t) for t in abstracts[:10]]
41
  index = SummaryIndex.from_documents(documents)
42
  query_engine = index.as_query_engine(response_mode="tree_summarize")
43
- prompt = "Please prepare a single summary of the abstracts of the following papers. Pay particular attention to the {} gene".format(gene_name)
44
  response = query_engine.query(prompt)
45
- return response
46
-
47
- demo = gr.Interface(fn=getMutationEffect,
48
- inputs=[gr.Textbox(label="CancerName"),
49
- gr.Textbox(label="GeneName"),
50
- ],
51
- outputs="text")
 
52
 
 
 
 
 
 
 
53
 
54
  if __name__ == "__main__":
55
  demo.launch()
 
13
  LLAMA3_70B = "Llama3-70b-8192"
14
  Mixtral = "mixtral-8x7b-32768"
15
 
16
+ def custom_completion_to_prompt(completion: str) -> str:
17
  return completion_to_prompt(
18
  completion, system_prompt=(
19
  "You are a Q&A assistant. Your goal is to answer questions as "
 
22
  )
23
 
24
  def getMutationEffect(cancer_name, gene_name):
25
+ searchWords = "(" + str(cancer_name) + ") AND " + "(" + str(gene_name) + ") AND(treatment)"
26
  studies = search(searchWords)
27
+ df, abstracts = GetPubmedSummaryDf(studies)
28
+
29
+ # Define LLM
30
  llm = Groq(
31
+ model=LLAMA3_8B,
32
+ temperature=0.01,
33
+ context_window=4096,
34
+ completion_to_prompt=custom_completion_to_prompt,
35
+ messages_to_prompt=messages_to_prompt,
36
+ )
37
+
38
+ # グローバルサービスコンテキストの設定
39
  Settings.llm = llm
 
40
  documents = [Document(text=t) for t in abstracts[:10]]
41
  index = SummaryIndex.from_documents(documents)
42
  query_engine = index.as_query_engine(response_mode="tree_summarize")
43
+ prompt = f"Please prepare a single summary of the abstracts of the following papers. Pay particular attention to the {gene_name} gene"
44
  response = query_engine.query(prompt)
45
+
46
+ # テキストをファイルに保存
47
+ summary_text = str(response)
48
+ outputname = cancer_name + "_" + gene_name + "_" + "mutation_effect_summary.txt"
49
+ with open(outputname, "w") as file:
50
+ file.write(summary_text)
51
+
52
+ return summary_text, outputname # テキストとダウンロード用ファイルを返す
53
 
54
+ # Gradioインターフェース設定
55
+ demo = gr.Interface(
56
+ fn=getMutationEffect,
57
+ inputs=[gr.Textbox(label="CancerName"), gr.Textbox(label="GeneName")],
58
+ outputs=[gr.Textbox(label="Summary"), gr.File(label="Download Summary as .txt")] # テキスト表示とダウンロードボタンを両方表示
59
+ )
60
 
61
  if __name__ == "__main__":
62
  demo.launch()
dev/dev.ipynb ADDED
@@ -0,0 +1,239 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "code",
5
+ "execution_count": 1,
6
+ "metadata": {},
7
+ "outputs": [
8
+ {
9
+ "name": "stderr",
10
+ "output_type": "stream",
11
+ "text": [
12
+ "/Users/satoc/miniforge3/envs/gradio/lib/python3.12/site-packages/pydantic/_internal/_fields.py:132: UserWarning: Field \"model_url\" in LlamaCPP has conflict with protected namespace \"model_\".\n",
13
+ "\n",
14
+ "You may be able to resolve this warning by setting `model_config['protected_namespaces'] = ()`.\n",
15
+ " warnings.warn(\n",
16
+ "/Users/satoc/miniforge3/envs/gradio/lib/python3.12/site-packages/pydantic/_internal/_fields.py:132: UserWarning: Field \"model_path\" in LlamaCPP has conflict with protected namespace \"model_\".\n",
17
+ "\n",
18
+ "You may be able to resolve this warning by setting `model_config['protected_namespaces'] = ()`.\n",
19
+ " warnings.warn(\n",
20
+ "/Users/satoc/miniforge3/envs/gradio/lib/python3.12/site-packages/pydantic/_internal/_fields.py:132: UserWarning: Field \"model_kwargs\" in LlamaCPP has conflict with protected namespace \"model_\".\n",
21
+ "\n",
22
+ "You may be able to resolve this warning by setting `model_config['protected_namespaces'] = ()`.\n",
23
+ " warnings.warn(\n"
24
+ ]
25
+ },
26
+ {
27
+ "name": "stdout",
28
+ "output_type": "stream",
29
+ "text": [
30
+ "* Running on local URL: http://127.0.0.1:7860\n",
31
+ "\n",
32
+ "To create a public link, set `share=True` in `launch()`.\n"
33
+ ]
34
+ },
35
+ {
36
+ "data": {
37
+ "text/html": [
38
+ "<div><iframe src=\"http://127.0.0.1:7860/\" width=\"100%\" height=\"500\" allow=\"autoplay; camera; microphone; clipboard-read; clipboard-write;\" frameborder=\"0\" allowfullscreen></iframe></div>"
39
+ ],
40
+ "text/plain": [
41
+ "<IPython.core.display.HTML object>"
42
+ ]
43
+ },
44
+ "metadata": {},
45
+ "output_type": "display_data"
46
+ },
47
+ {
48
+ "data": {
49
+ "text/plain": []
50
+ },
51
+ "execution_count": 1,
52
+ "metadata": {},
53
+ "output_type": "execute_result"
54
+ }
55
+ ],
56
+ "source": [
57
+ "from OpenAITools.ExpertTools import GetPubmedSummaryDf, generate, search\n",
58
+ "from llama_index.core import SummaryIndex\n",
59
+ "from llama_index.core import Document\n",
60
+ "from llama_index.llms.groq import Groq\n",
61
+ "from llama_index.core import ServiceContext, set_global_service_context\n",
62
+ "from llama_index.llms.llama_cpp.llama_utils import messages_to_prompt, completion_to_prompt\n",
63
+ "#from llama_index.settings import Settings\n",
64
+ "from llama_index.core import Settings\n",
65
+ "import gradio as gr\n",
66
+ "\n",
67
+ "#models\n",
68
+ "LLAMA3_8B = \"Llama3-8b-8192\"\n",
69
+ "LLAMA3_70B = \"Llama3-70b-8192\"\n",
70
+ "Mixtral = \"mixtral-8x7b-32768\" \n",
71
+ "\n",
72
+ "\n",
73
+ "def custom_completion_to_prompt(completion: str) -> str:\n",
74
+ " return completion_to_prompt(\n",
75
+ " completion, system_prompt=(\n",
76
+ " \"You are a Q&A assistant. Your goal is to answer questions as \"\n",
77
+ " \"accurately as possible is the instructions and context provided.\"\n",
78
+ " ),\n",
79
+ " )\n",
80
+ "\n",
81
+ "def getMutationEffect(cancer_name, gene_name):\n",
82
+ " searchWords = \"(\" + str(cancer_name) + \") AND \" + \"(\" + str(gene_name) + \") AND(treatment)\"\n",
83
+ " studies = search(searchWords)\n",
84
+ " df, abstracts = GetPubmedSummaryDf(studies)\n",
85
+ " \n",
86
+ " # Define LLM\n",
87
+ " llm = Groq(\n",
88
+ " model=LLAMA3_8B,\n",
89
+ " temperature=0.01,\n",
90
+ " context_window=4096,\n",
91
+ " completion_to_prompt=custom_completion_to_prompt,\n",
92
+ " messages_to_prompt=messages_to_prompt,\n",
93
+ " )\n",
94
+ " \n",
95
+ " # グローバルサービスコンテキストの設定\n",
96
+ " Settings.llm = llm\n",
97
+ " documents = [Document(text=t) for t in abstracts[:10]]\n",
98
+ " index = SummaryIndex.from_documents(documents)\n",
99
+ " query_engine = index.as_query_engine(response_mode=\"tree_summarize\")\n",
100
+ " prompt = f\"Please prepare a single summary of the abstracts of the following papers. Pay particular attention to the {gene_name} gene\"\n",
101
+ " response = query_engine.query(prompt)\n",
102
+ " \n",
103
+ " # テキストをファイルに保存\n",
104
+ " with open(\"mutation_effect_summary.txt\", \"w\") as file:\n",
105
+ " file.write(str(response)) # responseを文字列に変換して書き込み\n",
106
+ " \n",
107
+ " return \"mutation_effect_summary.txt\" # ダウンロードするファイル名を返す\n",
108
+ "\n",
109
+ "# Gradioインターフェース設定\n",
110
+ "demo = gr.Interface(\n",
111
+ " fn=getMutationEffect,\n",
112
+ " inputs=[gr.Textbox(label=\"CancerName\"), gr.Textbox(label=\"GeneName\")],\n",
113
+ " outputs=gr.File(label=\"Download Summary as .txt\") # ダウンロードボタンを表示\n",
114
+ ")\n",
115
+ "\n",
116
+ "demo.launch()"
117
+ ]
118
+ },
119
+ {
120
+ "cell_type": "code",
121
+ "execution_count": 3,
122
+ "metadata": {},
123
+ "outputs": [
124
+ {
125
+ "name": "stdout",
126
+ "output_type": "stream",
127
+ "text": [
128
+ "* Running on local URL: http://127.0.0.1:7862\n",
129
+ "\n",
130
+ "To create a public link, set `share=True` in `launch()`.\n"
131
+ ]
132
+ },
133
+ {
134
+ "data": {
135
+ "text/html": [
136
+ "<div><iframe src=\"http://127.0.0.1:7862/\" width=\"100%\" height=\"500\" allow=\"autoplay; camera; microphone; clipboard-read; clipboard-write;\" frameborder=\"0\" allowfullscreen></iframe></div>"
137
+ ],
138
+ "text/plain": [
139
+ "<IPython.core.display.HTML object>"
140
+ ]
141
+ },
142
+ "metadata": {},
143
+ "output_type": "display_data"
144
+ },
145
+ {
146
+ "data": {
147
+ "text/plain": []
148
+ },
149
+ "execution_count": 3,
150
+ "metadata": {},
151
+ "output_type": "execute_result"
152
+ }
153
+ ],
154
+ "source": [
155
+ "import gradio as gr\n",
156
+ "\n",
157
+ "# モデルの定義\n",
158
+ "LLAMA3_8B = \"Llama3-8b-8192\"\n",
159
+ "LLAMA3_70B = \"Llama3-70b-8192\"\n",
160
+ "Mixtral = \"mixtral-8x7b-32768\"\n",
161
+ "\n",
162
+ "def custom_completion_to_prompt(completion: str) -> str:\n",
163
+ " return completion_to_prompt(\n",
164
+ " completion, system_prompt=(\n",
165
+ " \"You are a Q&A assistant. Your goal is to answer questions as \"\n",
166
+ " \"accurately as possible is the instructions and context provided.\"\n",
167
+ " ),\n",
168
+ " )\n",
169
+ "\n",
170
+ "def getMutationEffect(cancer_name, gene_name):\n",
171
+ " searchWords = \"(\" + str(cancer_name) + \") AND \" + \"(\" + str(gene_name) + \") AND(treatment)\"\n",
172
+ " studies = search(searchWords)\n",
173
+ " df, abstracts = GetPubmedSummaryDf(studies)\n",
174
+ " \n",
175
+ " # Define LLM\n",
176
+ " llm = Groq(\n",
177
+ " model=LLAMA3_8B,\n",
178
+ " temperature=0.01,\n",
179
+ " context_window=4096,\n",
180
+ " completion_to_prompt=custom_completion_to_prompt,\n",
181
+ " messages_to_prompt=messages_to_prompt,\n",
182
+ " )\n",
183
+ " \n",
184
+ " # グローバルサービスコンテキストの設定\n",
185
+ " Settings.llm = llm\n",
186
+ " documents = [Document(text=t) for t in abstracts[:10]]\n",
187
+ " index = SummaryIndex.from_documents(documents)\n",
188
+ " query_engine = index.as_query_engine(response_mode=\"tree_summarize\")\n",
189
+ " prompt = f\"Please prepare a single summary of the abstracts of the following papers. Pay particular attention to the {gene_name} gene\"\n",
190
+ " response = query_engine.query(prompt)\n",
191
+ " \n",
192
+ " # テキストをファイルに保存\n",
193
+ " summary_text = str(response)\n",
194
+ " outputname = cancer_name + \"_\" + gene_name + \"_\" + \"mutation_effect_summary.txt\"\n",
195
+ " with open(outputname, \"w\") as file:\n",
196
+ " file.write(summary_text)\n",
197
+ " \n",
198
+ " return summary_text, outputname # テキストとダウンロード用ファイルを返す\n",
199
+ "\n",
200
+ "# Gradioインターフェース設定\n",
201
+ "demo = gr.Interface(\n",
202
+ " fn=getMutationEffect,\n",
203
+ " inputs=[gr.Textbox(label=\"CancerName\"), gr.Textbox(label=\"GeneName\")],\n",
204
+ " outputs=[gr.Textbox(label=\"Summary\"), gr.File(label=\"Download Summary as .txt\")] # テキスト表示とダウンロードボタンを両方表示\n",
205
+ ")\n",
206
+ "\n",
207
+ "demo.launch()\n"
208
+ ]
209
+ },
210
+ {
211
+ "cell_type": "code",
212
+ "execution_count": null,
213
+ "metadata": {},
214
+ "outputs": [],
215
+ "source": []
216
+ }
217
+ ],
218
+ "metadata": {
219
+ "kernelspec": {
220
+ "display_name": "gradio",
221
+ "language": "python",
222
+ "name": "python3"
223
+ },
224
+ "language_info": {
225
+ "codemirror_mode": {
226
+ "name": "ipython",
227
+ "version": 3
228
+ },
229
+ "file_extension": ".py",
230
+ "mimetype": "text/x-python",
231
+ "name": "python",
232
+ "nbconvert_exporter": "python",
233
+ "pygments_lexer": "ipython3",
234
+ "version": "3.12.3"
235
+ }
236
+ },
237
+ "nbformat": 4,
238
+ "nbformat_minor": 2
239
+ }
dev/glioma_IDH2_mutation_effect_summary.txt ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ Here is a summary of the abstracts, focusing on the IDH2 gene:
2
+
3
+ IDH2 mutations are a common feature of various cancers, including gliomas, acute myeloid leukemia, and chondrosarcoma. In gliomas, IDH2 mutations are often detected in combination with IDH1 mutations, and are associated with distinct clinical features. The IDH2 gene encodes a cytoplasmic and mitochondrial form of isocitrate dehydrogenase, which catalyzes the conversion of isocitrate to α-ketoglutarate. Mutations in IDH2 lead to the production of the oncometabolite D-2-hydroxyglutarate, which has profound effects on epigenetic, differentiation, and metabolic programs.
4
+
5
+ In gliomas, IDH2 mutations are more common than IDH1 mutations, and are often detected in combination with 1p/19q codeletion. The IDH2 gene is a key driver of gliomagenesis, and its mutations are associated with distinct clinical and molecular features. IDH2 mutations have been shown to increase the conversion of α-KG to 2-HG, leading to epigenetic dysregulation, altered gene expression, and impaired cell differentiation.
6
+
7
+ Targeting IDH2 mutations is a promising therapeutic approach for gliomas, and several small molecule inhibitors are currently being developed and tested in clinical trials. These inhibitors have shown promise in preclinical studies, decreasing intracellular 2-HG levels, reversing epigenetic dysregulation, and inducing cellular differentiation. Further research is needed to fully understand the role of IDH2 mutations in gliomagenesis and to develop effective therapeutic strategies for patients with IDH2-mutant gliomas.
dev/glioma_TERT_mutation_effect_summary.txt ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ Here is a summary of the abstracts, focusing on the TERT gene:
2
+
3
+ The TERT gene plays a crucial role in telomere maintenance and has been implicated in various cancers, including gliomas. Recent studies have shown that TERT promoter mutations are associated with increased telomere length and are a hallmark of glioma development. In one study, TERT promoter mutations were found to be negatively associated with ATRX expression in WHO grade II to IV gliomas. This suggests that TERT promoter mutations may be a key driver of glioma development and progression.
4
+
5
+ In another study, TERT promoter mutations were shown to correlate with elevated mRNA expression, supporting a role in telomerase reactivation. Additionally, TERT promoter mutations were found to be associated with increased telomere length, which is a hallmark of glioma development.
6
+
7
+ Furthermore, a study on medulloblastoma found that TERT promoter mutations are a key driver of this disease, and that inhibition of TERT promoter mutations can lead to increased median survival and immunological memory.
8
+
9
+ In summary, the TERT gene is a key player in glioma development and progression, and its mutations are associated with increased telomere length, telomerase reactivation, and poor prognosis. Further research on the TERT gene and its role in glioma development and progression may lead to the development of new therapeutic strategies for this disease.
dev/mutation_effect_summary.txt ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ Here is a summary of the abstracts, focusing on the IDH2 gene:
2
+
3
+ IDH2 mutations are a common feature of various cancers, including gliomas, acute myeloid leukemia, and chondrosarcoma. In gliomas, IDH2 mutations are often found in combination with IDH1 mutations, and are associated with distinct clinical features. The IDH2 gene encodes a cytoplasmic and mitochondrial enzyme that catalyzes the conversion of isocitrate to α-ketoglutarate. Mutations in IDH2 result in the production of the oncometabolite D-2-hydroxyglutarate, which has profound effects on epigenetic, differentiation, and metabolic programs.
4
+
5
+ In gliomas, IDH2 mutations are more commonly found in lower-grade gliomas, particularly in combination with 1p/19q codeletion. IDH2 mutations are also associated with a better prognosis compared to IDH1 mutations. The IDH2 gene is a key therapeutic target, and small molecule inhibitors of mutant IDH2 enzymes are being developed for the treatment of IDH2-mutated cancers.
6
+
7
+ Overall, IDH2 mutations play a crucial role in the development and progression of various cancers, and targeting IDH2 mutations with small molecule inhibitors shows promise as a therapeutic approach.