Spaces:
Sleeping
Sleeping
gegegegeg
Browse files- .gitignore +135 -0
- OpenAITools/.ipynb_checkpoints/ECarteTools-checkpoint.py +73 -0
- OpenAITools/.ipynb_checkpoints/ExpertTools-checkpoint.py +245 -0
- OpenAITools/.ipynb_checkpoints/FetchTools-checkpoint.py +158 -0
- OpenAITools/.ipynb_checkpoints/scrapeThisData-checkpoint.py +237 -0
- OpenAITools/CrinicalTrialTools.py +311 -0
- OpenAITools/ECarteTools.py +73 -0
- OpenAITools/ExpertTools.py +246 -0
- OpenAITools/FetchTools.py +158 -0
- OpenAITools/ReviewPaperTools.py +42 -0
- OpenAITools/scrapeThisData.py +237 -0
- PATHtoOriginaltool.ipynb +208 -0
- app.py +100 -0
- dev/ClinicalTrialApp.ipynb +169 -0
- dev/filtered_data.csv +59 -0
- environment.yml +487 -0
- requirements.txt +300 -0
.gitignore
ADDED
@@ -0,0 +1,135 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Add any directories, files, or patterns you don't want to be tracked by version control
|
2 |
+
|
3 |
+
|
4 |
+
# Byte-compiled / optimized / DLL files
|
5 |
+
__pycache__/
|
6 |
+
#*.py[cod]
|
7 |
+
#*$py.class
|
8 |
+
#*.txt
|
9 |
+
#*.tsv
|
10 |
+
#*.csv
|
11 |
+
*.xlsx
|
12 |
+
*.pdf
|
13 |
+
*.nii
|
14 |
+
#*.nii.gz
|
15 |
+
*.DS_Store
|
16 |
+
#*.png
|
17 |
+
#*.pyn
|
18 |
+
*.jpg
|
19 |
+
*.nii.gz
|
20 |
+
*.pkl
|
21 |
+
*-checkpoint.ipynb
|
22 |
+
*.pkls
|
23 |
+
*.pth
|
24 |
+
*.yaml
|
25 |
+
*.ckpt
|
26 |
+
# C extensions
|
27 |
+
#*.so
|
28 |
+
|
29 |
+
# Distribution / packaging
|
30 |
+
#.Python
|
31 |
+
#build/
|
32 |
+
#develop-eggs/
|
33 |
+
#dist/
|
34 |
+
#downloads/
|
35 |
+
#eggs/
|
36 |
+
#.eggs/
|
37 |
+
#lib/
|
38 |
+
#lib64/
|
39 |
+
#parts/
|
40 |
+
#sdist/
|
41 |
+
#var/
|
42 |
+
#wheels/
|
43 |
+
#*.egg-info/
|
44 |
+
#.installed.cfg
|
45 |
+
#*.egg
|
46 |
+
#MANIFEST
|
47 |
+
|
48 |
+
# PyInstaller
|
49 |
+
# Usually these files are written by a python script from a template
|
50 |
+
# before PyInstaller builds the exe, so as to inject date/other infos into it.
|
51 |
+
#*.manifest
|
52 |
+
#*.spec
|
53 |
+
|
54 |
+
# Installer logs
|
55 |
+
#pip-log.txt
|
56 |
+
#pip-delete-this-directory.txt
|
57 |
+
|
58 |
+
# Unit test / coverage reports
|
59 |
+
#htmlcov/
|
60 |
+
#.tox/
|
61 |
+
#.coverage
|
62 |
+
#.coverage.*
|
63 |
+
#.cache
|
64 |
+
#nosetests.xml
|
65 |
+
#coverage.xml
|
66 |
+
#*.cover
|
67 |
+
#.hypothesis/
|
68 |
+
#.pytest_cache/
|
69 |
+
|
70 |
+
# Translations
|
71 |
+
#*.mo
|
72 |
+
#*.pot
|
73 |
+
|
74 |
+
# Django stuff:
|
75 |
+
#*.log
|
76 |
+
#.static_storage/
|
77 |
+
#.media/
|
78 |
+
#local_settings.py
|
79 |
+
|
80 |
+
# Flask stuff:
|
81 |
+
#instance/
|
82 |
+
#.webassets-cache
|
83 |
+
|
84 |
+
# Scrapy stuff:
|
85 |
+
#.scrapy
|
86 |
+
|
87 |
+
# Sphinx documentation
|
88 |
+
#docs/_build/
|
89 |
+
|
90 |
+
# PyBuilder
|
91 |
+
#target/
|
92 |
+
|
93 |
+
# Jupyter Notebook
|
94 |
+
.ipynb_checkpoint/*
|
95 |
+
|
96 |
+
# pyenv
|
97 |
+
#.python-version
|
98 |
+
|
99 |
+
# celery beat schedule file
|
100 |
+
#celerybeat-schedule
|
101 |
+
|
102 |
+
# SageMath parsed files
|
103 |
+
#*.sage.py
|
104 |
+
|
105 |
+
# Environments
|
106 |
+
#.env
|
107 |
+
#.venv
|
108 |
+
#env/
|
109 |
+
#venv/
|
110 |
+
#ENV/
|
111 |
+
#env.bak/
|
112 |
+
#venv.bak/
|
113 |
+
|
114 |
+
# Spyder project settings
|
115 |
+
#.spyderproject
|
116 |
+
#.spyproject
|
117 |
+
|
118 |
+
# Rope project settings
|
119 |
+
#.ropeproject
|
120 |
+
|
121 |
+
# mkdocs documentation
|
122 |
+
#/site
|
123 |
+
/models/
|
124 |
+
# mypy
|
125 |
+
#.mypy_cache/
|
126 |
+
#over 100MB
|
127 |
+
|
128 |
+
# Add any directories, files, or patterns you don't want to be tracked by version control
|
129 |
+
|
130 |
+
|
131 |
+
#deep settings
|
132 |
+
*.h5
|
133 |
+
|
134 |
+
.OpenAITools/chromedriver
|
135 |
+
/OpenAITools/chromedriver
|
OpenAITools/.ipynb_checkpoints/ECarteTools-checkpoint.py
ADDED
@@ -0,0 +1,73 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import openai
|
2 |
+
import time
|
3 |
+
import wikipedia
|
4 |
+
import random
|
5 |
+
import re
|
6 |
+
import requests
|
7 |
+
from bs4 import BeautifulSoup
|
8 |
+
import os
|
9 |
+
import glob
|
10 |
+
from natsort import natsorted
|
11 |
+
import requests
|
12 |
+
from bs4 import BeautifulSoup
|
13 |
+
import xml.etree.ElementTree as ET
|
14 |
+
import pandas as pd
|
15 |
+
|
16 |
+
wikipedia.set_lang("ja")
|
17 |
+
# APIキーの設定
|
18 |
+
openai.api_key = os.environ['OPENAI_API_KEY']
|
19 |
+
engine="gpt-3.5-turbo"
|
20 |
+
|
21 |
+
|
22 |
+
def generate(system_template,prompt,engine="gpt-3.5-turbo"):
|
23 |
+
while True: #OpenAI APIが落ちてる時に無限リトライするので注意
|
24 |
+
try:
|
25 |
+
response = openai.ChatCompletion.create(
|
26 |
+
model=engine,
|
27 |
+
messages=[
|
28 |
+
{"role": "system", "content": system_template},
|
29 |
+
{"role": "user", "content":prompt},
|
30 |
+
]
|
31 |
+
)
|
32 |
+
result=response["choices"][0]["message"]["content"]
|
33 |
+
return result
|
34 |
+
except:
|
35 |
+
print("リトライ")
|
36 |
+
time.sleep(30)
|
37 |
+
pass
|
38 |
+
|
39 |
+
def generate_carte(prompt,engine="gpt-3.5-turbo"):
|
40 |
+
while True: #OpenAI APIが落ちてる時に無限リトライするので注意
|
41 |
+
try:
|
42 |
+
response = openai.ChatCompletion.create(
|
43 |
+
model=engine,
|
44 |
+
messages=[
|
45 |
+
{"role": "system", "content": "You are useful assistant"},
|
46 |
+
{"role": "user", "content":"%s\n・・・という患者と医師の会話をSOAP形式のカルテとして日本語で端的にまとめて下さい。各セクションはS),O), A),P)として下さい "%prompt},
|
47 |
+
]
|
48 |
+
)
|
49 |
+
result=response["choices"][0]["message"]["content"]
|
50 |
+
return result
|
51 |
+
except:
|
52 |
+
print("リトライ")
|
53 |
+
time.sleep(30)
|
54 |
+
pass
|
55 |
+
|
56 |
+
def get_selected_fileds(texts):
|
57 |
+
input_name = texts.replace(' ' , "+")
|
58 |
+
corona_fields = ct.get_study_fields(
|
59 |
+
search_expr="%s SEARCH[Location](AREA[LocationCountry]Japan AND AREA[LocationStatus]Recruiting)"%(input_name),
|
60 |
+
fields=["NCTId", "Condition", "BriefTitle",'BriefSummary','EligibilityCriteria'],
|
61 |
+
max_studies=500,
|
62 |
+
fmt="csv")
|
63 |
+
return corona_fields
|
64 |
+
|
65 |
+
def get_retriever_str(fields):
|
66 |
+
retriever_str=''
|
67 |
+
for i in range(1,len(fields)):
|
68 |
+
colnames = fields[0]
|
69 |
+
targetCol = fields[i]
|
70 |
+
for f in range(len(fields[0])):
|
71 |
+
retriever_str+=colnames[f] + ":" + targetCol[f] +"\n"
|
72 |
+
retriever_str+='\n'
|
73 |
+
return retriever_str
|
OpenAITools/.ipynb_checkpoints/ExpertTools-checkpoint.py
ADDED
@@ -0,0 +1,245 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
import openai
|
3 |
+
import time
|
4 |
+
import wikipedia
|
5 |
+
import random
|
6 |
+
import re
|
7 |
+
import requests
|
8 |
+
from bs4 import BeautifulSoup
|
9 |
+
import os
|
10 |
+
import glob
|
11 |
+
from natsort import natsorted
|
12 |
+
import requests
|
13 |
+
from bs4 import BeautifulSoup
|
14 |
+
import xml.etree.ElementTree as ET
|
15 |
+
from pytrials.client import ClinicalTrials
|
16 |
+
from Bio import Entrez
|
17 |
+
import pandas as pd
|
18 |
+
import numpy as np
|
19 |
+
import time
|
20 |
+
#from langchain.agents import create_pandas_dataframe_agent
|
21 |
+
from langchain_experimental.agents import create_pandas_dataframe_agent
|
22 |
+
from langchain.llms import OpenAI
|
23 |
+
|
24 |
+
# APIキーの設定
|
25 |
+
openai.api_key = os.environ['OPENAI_API_KEY']
|
26 |
+
gptengine="gpt-3.5-turbo"
|
27 |
+
|
28 |
+
|
29 |
+
"""def get_selected_fileds(texts):
|
30 |
+
ct = ClinicalTrials()
|
31 |
+
input_name = texts.replace(' ' , "+")
|
32 |
+
corona_fields = ct.get_study_fields(
|
33 |
+
search_expr="%s SEARCH[Location](AREA[LocationCountry]Japan AND AREA[LocationStatus]Recruiting)"%(input_name),
|
34 |
+
fields=["NCTId", "Condition", "BriefTitle",'BriefSummary','EligibilityCriteria'],
|
35 |
+
max_studies=500,
|
36 |
+
fmt="csv")
|
37 |
+
return corona_fields"""
|
38 |
+
|
39 |
+
def get_retriever_str(fields):
|
40 |
+
retriever_str=''
|
41 |
+
for i in range(1,len(fields)):
|
42 |
+
colnames = fields[0]
|
43 |
+
targetCol = fields[i]
|
44 |
+
for f in range(len(fields[0])):
|
45 |
+
retriever_str+=colnames[f] + ":" + targetCol[f] +"\n"
|
46 |
+
retriever_str+='\n'
|
47 |
+
return retriever_str
|
48 |
+
|
49 |
+
def get_chanked_retriever(fields):
|
50 |
+
retriever_list =[]
|
51 |
+
for i in range(1,len(fields)):
|
52 |
+
retriever_str=''
|
53 |
+
colnames = fields[0]
|
54 |
+
targetCol = fields[i]
|
55 |
+
for f in range(len(fields[0])):
|
56 |
+
retriever_str+=colnames[f] + ":" + targetCol[f] +"\n"
|
57 |
+
retriever_list.append(retriever_str)
|
58 |
+
return retriever_list
|
59 |
+
|
60 |
+
from pytrials.client import ClinicalTrials
|
61 |
+
def get_selected_fields(texts, split_criteria=False,
|
62 |
+
split_word_number = False, split_number=700):
|
63 |
+
ct = ClinicalTrials()
|
64 |
+
input_name = texts.replace(' ', "+")
|
65 |
+
corona_fields = ct.get_study_fields(
|
66 |
+
search_expr="%s SEARCH[Location](AREA[LocationCountry]Japan AND AREA[LocationStatus]Recruiting)" % (input_name),
|
67 |
+
fields=["NCTId", "Condition", "BriefTitle", 'BriefSummary', 'EligibilityCriteria'],
|
68 |
+
max_studies=500,
|
69 |
+
fmt="csv")
|
70 |
+
|
71 |
+
if split_criteria:
|
72 |
+
new_fields = []
|
73 |
+
|
74 |
+
# 検索対象の文字列
|
75 |
+
target_string1 = 'Exclusion Criteria'
|
76 |
+
target_string2 = 'Exclusion criteria'
|
77 |
+
|
78 |
+
# 各要素で検索対象の文字列を探し、直前で分割して新しいリストに格納
|
79 |
+
for corona_field in corona_fields:
|
80 |
+
new_list = []
|
81 |
+
for item in corona_field:
|
82 |
+
if target_string1 in item:
|
83 |
+
split_position = item.index(target_string1)
|
84 |
+
new_list.append(item[:split_position])
|
85 |
+
new_list.append(item[split_position:])
|
86 |
+
elif target_string2 in item:
|
87 |
+
split_position = item.index(target_string2)
|
88 |
+
new_list.append(item[:split_position])
|
89 |
+
new_list.append(item[split_position:])
|
90 |
+
else:
|
91 |
+
new_list.append(item)
|
92 |
+
new_fields.append(new_list)
|
93 |
+
else:
|
94 |
+
new_fields = corona_fields
|
95 |
+
|
96 |
+
if split_word_number:
|
97 |
+
split_fields = []
|
98 |
+
for new_field in new_fields:
|
99 |
+
new_list= []
|
100 |
+
|
101 |
+
# 各要素を調べて、700文字以上であれば分割し、新しいリストに格納
|
102 |
+
for item in new_field:
|
103 |
+
item_length = len(item)
|
104 |
+
if item_length > split_number:
|
105 |
+
num_parts = -(-item_length // split_number) # 向上の除算を用いて分割数を計算
|
106 |
+
for i in range(num_parts):
|
107 |
+
start_index = i * split_number
|
108 |
+
end_index = min((i + 1) * split_number, item_length) # 文字列の終わりを超えないように調整
|
109 |
+
new_list.append(item[start_index:end_index])
|
110 |
+
else:
|
111 |
+
new_list.append(item)
|
112 |
+
|
113 |
+
split_fields.append(new_list)
|
114 |
+
new_fields = split_fields
|
115 |
+
|
116 |
+
return new_fields
|
117 |
+
|
118 |
+
|
119 |
+
def print_agent_results(df, Ids,
|
120 |
+
interesteds = ['Condition', 'BriefTitle', 'BriefSummary', 'EligibilityCriteria'],
|
121 |
+
translater=None):
|
122 |
+
results = ""
|
123 |
+
for Id in Ids:
|
124 |
+
print("%s\n"%Id)
|
125 |
+
sdf = df[df['NCTId'] == Id]
|
126 |
+
for interested in interesteds:
|
127 |
+
# 最初の要素を取得
|
128 |
+
results += '%s: \n %s \n' % (interested, sdf[interested].iloc[0])
|
129 |
+
#print('%s: \n %s \n' % (interested, sdf[interested].iloc[0]))
|
130 |
+
if translater:
|
131 |
+
to_be_printed = translater.translate(results)
|
132 |
+
else:
|
133 |
+
to_be_printed =results
|
134 |
+
print(to_be_printed)
|
135 |
+
|
136 |
+
def search(query):
|
137 |
+
Entrez.email = os.getenv('MAIL_ADRESS')
|
138 |
+
#Entrez.email='[email protected]'
|
139 |
+
handle = Entrez.esearch(db='pubmed',
|
140 |
+
sort = 'relevance',
|
141 |
+
retmax = '20',
|
142 |
+
retmode = 'xml',
|
143 |
+
term = query)
|
144 |
+
results = Entrez.read(handle)
|
145 |
+
return results
|
146 |
+
|
147 |
+
def fetch_details(id_list):
|
148 |
+
ids = ','.join(id_list)
|
149 |
+
Entrez.email = os.getenv('MAIL_ADRESS')
|
150 |
+
#Entrez.email = '[email protected]'
|
151 |
+
handle = Entrez.efetch(db = 'pubmed',
|
152 |
+
retmode = 'xml',
|
153 |
+
id = ids)
|
154 |
+
results = Entrez.read(handle)
|
155 |
+
return results
|
156 |
+
'''def generate(prompt,engine=None):
|
157 |
+
if engine is None:
|
158 |
+
engine=gptengine
|
159 |
+
while True: #OpenAI APIが落ちてる時に無限リトライするので注意
|
160 |
+
try:
|
161 |
+
response = openai.ChatCompletion.create(
|
162 |
+
model=engine,
|
163 |
+
messages=[
|
164 |
+
{"role": "system", "content": "You are useful assistant"},
|
165 |
+
{"role": "user", "content":prompt},
|
166 |
+
]
|
167 |
+
)
|
168 |
+
result=response["choices"][0]["message"]["content"]
|
169 |
+
return result
|
170 |
+
except Exception as e:
|
171 |
+
print(e)
|
172 |
+
print("リトライ")
|
173 |
+
time.sleep(30)
|
174 |
+
pass
|
175 |
+
'''
|
176 |
+
|
177 |
+
def generate(prompt,engine=None):
|
178 |
+
if engine is None:
|
179 |
+
engine=gptengine
|
180 |
+
while True: #OpenAI APIが落ちてる時に無限リトライするので注意
|
181 |
+
try:
|
182 |
+
response = openai.chat.completions.create(
|
183 |
+
model=engine,
|
184 |
+
messages=[
|
185 |
+
{"role": "system", "content": "You are useful assistant"},
|
186 |
+
{"role": "user", "content":prompt},
|
187 |
+
]
|
188 |
+
)
|
189 |
+
#result=response["choices"][0]["message"]["content"]
|
190 |
+
result=response.choices[0].message.content
|
191 |
+
return result
|
192 |
+
except Exception as e:
|
193 |
+
print(e)
|
194 |
+
print("リトライ")
|
195 |
+
time.sleep(30)
|
196 |
+
pass
|
197 |
+
|
198 |
+
def GetPubmedSummaryDf(studies):
|
199 |
+
title_list= []
|
200 |
+
abstract_list=[]
|
201 |
+
journal_list = []
|
202 |
+
language_list =[]
|
203 |
+
pubdate_year_list = []
|
204 |
+
pubdate_month_list = []
|
205 |
+
studiesIdList = studies['IdList']
|
206 |
+
chunk_size = 10000
|
207 |
+
for chunk_i in range(0, len(studiesIdList), chunk_size):
|
208 |
+
chunk = studiesIdList[chunk_i:chunk_i + chunk_size]
|
209 |
+
|
210 |
+
try:
|
211 |
+
papers = fetch_details(chunk)
|
212 |
+
for i, paper in enumerate(papers['PubmedArticle']):
|
213 |
+
title_list.append(paper['MedlineCitation']['Article']['ArticleTitle'])
|
214 |
+
try:
|
215 |
+
abstract_list.append(paper['MedlineCitation']['Article']['Abstract']['AbstractText'][0])
|
216 |
+
except:
|
217 |
+
abstract_list.append('No Abstract')
|
218 |
+
journal_list.append(paper['MedlineCitation']['Article']['Journal']['Title'])
|
219 |
+
language_list.append(paper['MedlineCitation']['Article']['Language'][0])
|
220 |
+
try:
|
221 |
+
pubdate_year_list.append(paper['MedlineCitation']['Article']['Journal']['JournalIssue']['PubDate']['Year'])
|
222 |
+
except:
|
223 |
+
pubdate_year_list.append('No Data')
|
224 |
+
try:
|
225 |
+
pubdate_month_list.append(paper['MedlineCitation']['Article']['Journal']['JournalIssue']['PubDate']['Month'])
|
226 |
+
except:
|
227 |
+
pubdate_month_list.append('No Data')
|
228 |
+
except: # occasionally a chunk might annoy your parser
|
229 |
+
pass
|
230 |
+
df = pd.DataFrame(list(zip(
|
231 |
+
title_list, abstract_list, journal_list, language_list, pubdate_year_list,
|
232 |
+
pubdate_month_list)),
|
233 |
+
columns=['Title', 'Abstract', 'Journal', 'Language', 'Year','Month'])
|
234 |
+
return df, abstract_list
|
235 |
+
|
236 |
+
def ClinicalAgent(fileds, verbose=False):
|
237 |
+
df = pd.DataFrame.from_records(fileds[1:], columns=fileds[0])
|
238 |
+
return create_pandas_dataframe_agent(OpenAI(temperature=0, model='gpt-3.5-turbo-16k'), df, verbose=verbose)
|
239 |
+
|
240 |
+
def GetNCTID(results):
|
241 |
+
# NCTで始まる単語を検索する正規表現
|
242 |
+
pattern = r'\bNCT\d+\b'
|
243 |
+
# 正規表現を使って単語を抽出
|
244 |
+
nct_words = re.findall(pattern,results)
|
245 |
+
return nct_words
|
OpenAITools/.ipynb_checkpoints/FetchTools-checkpoint.py
ADDED
@@ -0,0 +1,158 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
import pandas as pd
|
3 |
+
#from llama_index.llms.replicate import Replicate
|
4 |
+
import requests
|
5 |
+
import re
|
6 |
+
|
7 |
+
|
8 |
+
def extract_japan_cities(text):
|
9 |
+
# 正規表現を使用して " - Japan" で終わる都市名を抽出
|
10 |
+
pattern = r'(\b\w+\s*\w*\b) - Japan'
|
11 |
+
cities = re.findall(pattern, text)
|
12 |
+
unique_cities = list(set(cities))
|
13 |
+
# ユニークな都市名をソートしてカンマで区切られた文字列に変換
|
14 |
+
unique_cities.sort()
|
15 |
+
return ', '.join(unique_cities)
|
16 |
+
|
17 |
+
def fetch_clinical_trials(cancer_name):
|
18 |
+
search_expr="%s SEARCH[Location](AREA[LocationCountry]Japan AND AREA[LocationStatus]Recruiting)" % (cancer_name)
|
19 |
+
# Initial URL for the first API call
|
20 |
+
base_url = "https://clinicaltrials.gov/api/v2/studies"
|
21 |
+
params = {
|
22 |
+
"query.titles": search_expr,
|
23 |
+
"pageSize": 100
|
24 |
+
}
|
25 |
+
|
26 |
+
# Initialize an empty list to store the data
|
27 |
+
data_list = []
|
28 |
+
# Loop until there is no nextPageToken
|
29 |
+
while True:
|
30 |
+
# Print the current URL (for debugging purposes)
|
31 |
+
print("Fetching data from:", base_url + '?' + '&'.join([f"{k}={v}" for k, v in params.items()]))
|
32 |
+
|
33 |
+
# Send a GET request to the API
|
34 |
+
response = requests.get(base_url, params=params)
|
35 |
+
|
36 |
+
# Check if the request was successful
|
37 |
+
if response.status_code == 200:
|
38 |
+
data = response.json() # Parse JSON response
|
39 |
+
studies = data.get('studies', []) # Extract the list of studies
|
40 |
+
|
41 |
+
# Loop through each study and extract specific information
|
42 |
+
for study in studies:
|
43 |
+
# Safely access nested keys
|
44 |
+
nctId = study['protocolSection']['identificationModule'].get('nctId', 'Unknown')
|
45 |
+
startDate = study['protocolSection']['statusModule'].get('startDateStruct', {}).get('date', 'Unknown Date')
|
46 |
+
conditions = ', '.join(study['protocolSection']['conditionsModule'].get('conditions', ['No conditions listed']))
|
47 |
+
title = study['protocolSection']['identificationModule'].get('briefTitle', 'no title')
|
48 |
+
summary = study['protocolSection']['descriptionModule'].get('briefSummary', 'no summary')
|
49 |
+
|
50 |
+
# Extract locations safely
|
51 |
+
locations_list = study['protocolSection'].get('contactsLocationsModule', {}).get('locations', [])
|
52 |
+
locations = ', '.join([f"{location.get('city', 'No City')} - {location.get('country', 'No Country')}" for location in locations_list]) if locations_list else "No locations listed"
|
53 |
+
|
54 |
+
JapanesLocations = extract_japan_cities(locations)
|
55 |
+
# Extract dates and phases
|
56 |
+
primaryCompletionDate = study['protocolSection']['statusModule'].get('primaryCompletionDateStruct', {}).get('date', 'Unknown Date')
|
57 |
+
|
58 |
+
phases = ', '.join(study['protocolSection']['designModule'].get('phases', ['Not Available']))
|
59 |
+
eligibilityCriteria = study['protocolSection']['eligibilityModule'].get('eligibilityCriteria', 'Unknown')
|
60 |
+
|
61 |
+
# Append the data to the list as a dictionary
|
62 |
+
data_list.append({
|
63 |
+
"NCTID": nctId,
|
64 |
+
"Title": title,
|
65 |
+
#"Start Date": startDate,
|
66 |
+
"Primary Completion Date": primaryCompletionDate,
|
67 |
+
#"Conditions": conditions,
|
68 |
+
"Cancer": conditions,
|
69 |
+
"Summary": summary,
|
70 |
+
"Japanes Locations": JapanesLocations,
|
71 |
+
#"Phases": phases,
|
72 |
+
"Eligibility Criteria": eligibilityCriteria
|
73 |
+
})
|
74 |
+
|
75 |
+
# Check for nextPageToken and update the params or break the loop
|
76 |
+
nextPageToken = data.get('nextPageToken')
|
77 |
+
if nextPageToken:
|
78 |
+
params['pageToken'] = nextPageToken # Set the pageToken for the next request
|
79 |
+
else:
|
80 |
+
break # Exit the loop if no nextPageToken is present
|
81 |
+
else:
|
82 |
+
print("Failed to fetch data. Status code:", response.status_code)
|
83 |
+
break
|
84 |
+
|
85 |
+
# Create a DataFrame from the list of dictionaries
|
86 |
+
df = pd.DataFrame(data_list)
|
87 |
+
return df
|
88 |
+
|
89 |
+
def fetch_clinical_trials_jp(cancer_name):
|
90 |
+
search_expr="%s SEARCH[Location](AREA[LocationCountry]Japan AND AREA[LocationStatus]Recruiting)" % (cancer_name)
|
91 |
+
# Initial URL for the first API call
|
92 |
+
base_url = "https://clinicaltrials.gov/api/v2/studies"
|
93 |
+
params = {
|
94 |
+
"query.titles": search_expr,
|
95 |
+
"pageSize": 100
|
96 |
+
}
|
97 |
+
|
98 |
+
# Initialize an empty list to store the data
|
99 |
+
data_list = []
|
100 |
+
# Loop until there is no nextPageToken
|
101 |
+
while True:
|
102 |
+
# Print the current URL (for debugging purposes)
|
103 |
+
print("Fetching data from:", base_url + '?' + '&'.join([f"{k}={v}" for k, v in params.items()]))
|
104 |
+
|
105 |
+
# Send a GET request to the API
|
106 |
+
response = requests.get(base_url, params=params)
|
107 |
+
|
108 |
+
# Check if the request was successful
|
109 |
+
if response.status_code == 200:
|
110 |
+
data = response.json() # Parse JSON response
|
111 |
+
studies = data.get('studies', []) # Extract the list of studies
|
112 |
+
|
113 |
+
# Loop through each study and extract specific information
|
114 |
+
for study in studies:
|
115 |
+
# Safely access nested keys
|
116 |
+
nctId = study['protocolSection']['identificationModule'].get('nctId', 'Unknown')
|
117 |
+
startDate = study['protocolSection']['statusModule'].get('startDateStruct', {}).get('date', 'Unknown Date')
|
118 |
+
conditions = ', '.join(study['protocolSection']['conditionsModule'].get('conditions', ['No conditions listed']))
|
119 |
+
title = study['protocolSection']['identificationModule'].get('briefTitle', 'no title')
|
120 |
+
summary = study['protocolSection']['descriptionModule'].get('briefSummary', 'no summary')
|
121 |
+
|
122 |
+
# Extract locations safely
|
123 |
+
locations_list = study['protocolSection'].get('contactsLocationsModule', {}).get('locations', [])
|
124 |
+
locations = ', '.join([f"{location.get('city', 'No City')} - {location.get('country', 'No Country')}" for location in locations_list]) if locations_list else "No locations listed"
|
125 |
+
|
126 |
+
JapanesLocations = extract_japan_cities(locations)
|
127 |
+
# Extract dates and phases
|
128 |
+
primaryCompletionDate = study['protocolSection']['statusModule'].get('primaryCompletionDateStruct', {}).get('date', 'Unknown Date')
|
129 |
+
|
130 |
+
phases = ', '.join(study['protocolSection']['designModule'].get('phases', ['Not Available']))
|
131 |
+
eligibilityCriteria = study['protocolSection']['eligibilityModule'].get('eligibilityCriteria', 'Unknown')
|
132 |
+
|
133 |
+
# Append the data to the list as a dictionary
|
134 |
+
data_list.append({
|
135 |
+
"NCTID": nctId,
|
136 |
+
"タイトル": title,
|
137 |
+
#"Start Date": startDate,
|
138 |
+
#"Primary Completion Date": primaryCompletionDate,
|
139 |
+
"対象となる癌": conditions,
|
140 |
+
"サマリー": summary,
|
141 |
+
"場所": JapanesLocations,
|
142 |
+
#"Phases": phases,
|
143 |
+
"クライテリア": eligibilityCriteria
|
144 |
+
})
|
145 |
+
|
146 |
+
# Check for nextPageToken and update the params or break the loop
|
147 |
+
nextPageToken = data.get('nextPageToken')
|
148 |
+
if nextPageToken:
|
149 |
+
params['pageToken'] = nextPageToken # Set the pageToken for the next request
|
150 |
+
else:
|
151 |
+
break # Exit the loop if no nextPageToken is present
|
152 |
+
else:
|
153 |
+
print("Failed to fetch data. Status code:", response.status_code)
|
154 |
+
break
|
155 |
+
|
156 |
+
# Create a DataFrame from the list of dictionaries
|
157 |
+
df = pd.DataFrame(data_list)
|
158 |
+
return df
|
OpenAITools/.ipynb_checkpoints/scrapeThisData-checkpoint.py
ADDED
@@ -0,0 +1,237 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from selenium import webdriver
|
2 |
+
from selenium.webdriver.support.ui import Select
|
3 |
+
from selenium.webdriver.common.by import By
|
4 |
+
|
5 |
+
import requests
|
6 |
+
from bs4 import BeautifulSoup
|
7 |
+
import re
|
8 |
+
|
9 |
+
import os
|
10 |
+
import time
|
11 |
+
|
12 |
+
from selenium.webdriver.support.ui import WebDriverWait
|
13 |
+
from selenium.webdriver.common.by import By
|
14 |
+
from selenium.webdriver.support import expected_conditions as EC
|
15 |
+
from selenium.webdriver.common.action_chains import ActionChains
|
16 |
+
import chromedriver_autoinstaller
|
17 |
+
|
18 |
+
class ScrapeThatData:
|
19 |
+
|
20 |
+
def __init__(self, time_threshold = 10):
|
21 |
+
|
22 |
+
try:
|
23 |
+
chrome_options = webdriver.ChromeOptions()
|
24 |
+
chrome_options.add_argument('--no-sandbox')
|
25 |
+
self.driver = webdriver.Chrome(options=chrome_options)
|
26 |
+
|
27 |
+
except:
|
28 |
+
chromedriver_autoinstaller.install()
|
29 |
+
chrome_options = webdriver.ChromeOptions()
|
30 |
+
chrome_options.add_argument('--no-sandbox')
|
31 |
+
self.driver = webdriver.Chrome(options=chrome_options)
|
32 |
+
|
33 |
+
|
34 |
+
|
35 |
+
self.wait = WebDriverWait(self.driver,time_threshold)
|
36 |
+
self.attribute_dict = {'status':1 ,'conditions':2, 'interventions': 3, 'study type':4,
|
37 |
+
'phase':5, 'sponsor':6, 'funder type':7 , 'study design': 8,
|
38 |
+
'outcome measures':9, 'number enrolled':10, 'sex':11, 'age':12,
|
39 |
+
'nct number': 13, 'other ids':14, 'title acronym': 15 , 'study start': 16,
|
40 |
+
'primary completion': 17, 'study completion': 18 , 'first posted': 19,
|
41 |
+
'last update posted': 20 , 'results first posted': 21 , 'locations':22, 'study documents': 23}
|
42 |
+
|
43 |
+
self.status_dict = {'not yet recruiting' : 'notYetRecrCB',
|
44 |
+
'recruiting' : 'recruitingCB',
|
45 |
+
'enrolling by invitation':'enrollingByInvCB',
|
46 |
+
'active, not recruiting': 'activeCB',
|
47 |
+
'suspended': 'suspendedCB',
|
48 |
+
'terminated':'terminatedCB',
|
49 |
+
'completed':'completedCB',
|
50 |
+
'withdrawn': 'withdrawnCB',
|
51 |
+
'unknown status': 'unknownCB'}
|
52 |
+
|
53 |
+
def clicking_show_hide_cols(self, driver):
|
54 |
+
columns = driver.find_element(By.XPATH,'//*[@id="theDataTable_wrapper"]/div[3]/button')
|
55 |
+
action_chain = ActionChains(driver)
|
56 |
+
action_chain.move_to_element(columns).click()
|
57 |
+
action_chain.perform()
|
58 |
+
|
59 |
+
def select_attributes_to_show(self, listed_attributes, attribute_dict):
|
60 |
+
ll = [value.lower() for value in listed_attributes if value.lower() in ['status', 'conditions', 'interventions', 'locations']]
|
61 |
+
if ll:
|
62 |
+
to_show = [value.lower() for value in listed_attributes if value.lower() not in ll]
|
63 |
+
to_hide = [value for value in ['status', 'conditions', 'interventions', 'locations'] if value not in ll]
|
64 |
+
to_click = to_hide + to_show
|
65 |
+
for att in to_click:
|
66 |
+
self.clicking_show_hide_cols(self.driver)
|
67 |
+
time.sleep(1)
|
68 |
+
self.wait.until(EC.presence_of_element_located((By.XPATH,'//*[@id="theDataTable_wrapper"]/div[3]/div[2]/button['+ str(attribute_dict[att]) + ']'))).click()
|
69 |
+
time.sleep(1)
|
70 |
+
else:
|
71 |
+
for att in listed_attributes:
|
72 |
+
self.clicking_show_hide_cols(self.driver)
|
73 |
+
time.sleep(1)
|
74 |
+
self.wait.until(EC.presence_of_element_located((By.XPATH,'//*[@id="theDataTable_wrapper"]/div[3]/div[2]/button['+ str(attribute_dict[att.lower()]) + ']'))).click()
|
75 |
+
time.sleep(1)
|
76 |
+
|
77 |
+
def select_by_status(self, listed_states, status_dict):
|
78 |
+
if listed_states:
|
79 |
+
for status in listed_states:
|
80 |
+
self.driver.find_element(By.ID,status_dict[status.lower()]).click()
|
81 |
+
|
82 |
+
self.driver.find_element(By.XPATH,'//*[@id="FiltersBody"]/div[1]/input[1]').click()
|
83 |
+
time.sleep(3)
|
84 |
+
|
85 |
+
|
86 |
+
select = Select(self.driver.find_element_by_name('theDataTable_length'))
|
87 |
+
select.select_by_value('100')
|
88 |
+
|
89 |
+
def collect_data_search_page(self,l_ordered, amount_of_data = None):
|
90 |
+
|
91 |
+
class_name = ''
|
92 |
+
page_index = 1
|
93 |
+
|
94 |
+
elements = [l_ordered]
|
95 |
+
|
96 |
+
while 'disabled' not in class_name :
|
97 |
+
|
98 |
+
|
99 |
+
|
100 |
+
time.sleep(10)
|
101 |
+
|
102 |
+
print('Getting data from page {}'.format(page_index))
|
103 |
+
|
104 |
+
#Counting how many rows of the table appear
|
105 |
+
table = self.driver.find_element(By.ID,'theDataTable')
|
106 |
+
row_count = len(table.find_elements(By.TAG_NAME,"tr"))
|
107 |
+
|
108 |
+
#Looping table page
|
109 |
+
for index in range(1, row_count):
|
110 |
+
row = []
|
111 |
+
if 'status' in l_ordered:
|
112 |
+
self.wait.until(EC.presence_of_element_located((By.CSS_SELECTOR,'#theDataTable > tbody > tr:nth-child('+str(index)+') > td:nth-child(3)')))
|
113 |
+
status_element = self.driver.find_elements(By.CLASS_NAME,'#theDataTable > tbody > tr:nth-child('+str(index)+') > td:nth-child(3) > span')
|
114 |
+
row.append(status_element.text.strip())
|
115 |
+
for i, val in enumerate(l_ordered):
|
116 |
+
if val == 'status':
|
117 |
+
continue
|
118 |
+
|
119 |
+
self.wait.until(EC.presence_of_element_located((By.CSS_SELECTOR,'#theDataTable > tbody > tr:nth-child('+str(index)+') > td:nth-child('+str(4+i)+')')))
|
120 |
+
element = self.driver.find_elements(By.CLASS_NAME,'#theDataTable > tbody > tr:nth-child('+str(index)+') > td:nth-child('+str(4+i)+')')
|
121 |
+
try:
|
122 |
+
row.append(element.text.strip())
|
123 |
+
except:
|
124 |
+
print(i, element)
|
125 |
+
else:
|
126 |
+
for i, val in enumerate(l_ordered):
|
127 |
+
self.wait.until(EC.presence_of_element_located((By.CSS_SELECTOR,'#theDataTable > tbody > tr:nth-child('+str(index)+') > td:nth-child('+str(3+i)+')')))
|
128 |
+
element = self.driver.find_elements(By.CLASS_NAME,'#theDataTable > tbody > tr:nth-child('+str(index)+') > td:nth-child('+str(3+i)+')')
|
129 |
+
try:
|
130 |
+
row.append(element.text.strip())
|
131 |
+
except:
|
132 |
+
print(i, element)
|
133 |
+
elements.append(row)
|
134 |
+
|
135 |
+
|
136 |
+
|
137 |
+
|
138 |
+
#Getting next page button
|
139 |
+
next_page= self.driver.find_element(By.ID,"theDataTable_next")
|
140 |
+
|
141 |
+
#Getting the class attribute of the next page button
|
142 |
+
class_name = next_page.get_attribute('class')
|
143 |
+
|
144 |
+
#Going to the next page
|
145 |
+
next_page.click()
|
146 |
+
page_index += 1
|
147 |
+
|
148 |
+
if amount_of_data:
|
149 |
+
if len(elements) >= amount_of_data or row_count < amount_of_data :
|
150 |
+
break
|
151 |
+
else:
|
152 |
+
continue
|
153 |
+
|
154 |
+
return elements
|
155 |
+
|
156 |
+
def get_criteria(self, NCTnumber):
|
157 |
+
|
158 |
+
url = 'https://clinicaltrials.gov/ct2/show/' + NCTnumber
|
159 |
+
ClinicalTrialpage = requests.get(url)
|
160 |
+
soup = BeautifulSoup(ClinicalTrialpage.text, 'html.parser')
|
161 |
+
|
162 |
+
wrapping_crit_class = soup.find_all("div", {"class": "tr-indent2"})
|
163 |
+
list_elements = wrapping_crit_class[1].find_all(re.compile("(ul|ol)"))
|
164 |
+
inclusion, exclusion = ('','')
|
165 |
+
|
166 |
+
|
167 |
+
if not list_elements:
|
168 |
+
print ("WARNING: Study number " + NCTnumber + " doesn't have eligibility criteria or HTML tag format is not a list")
|
169 |
+
else:
|
170 |
+
|
171 |
+
if len(list_elements) == 1:
|
172 |
+
try:
|
173 |
+
if wrapping_crit_class[1].find(text = 'Inclusion Criteria:'):
|
174 |
+
inclusion = list_elements[0].find_all("li")
|
175 |
+
|
176 |
+
elif wrapping_crit_class[1].find(text = 'Exclusion Criteria:'):
|
177 |
+
exclusion = list_elements[0].find_all("li")
|
178 |
+
except:
|
179 |
+
print('criteria doesnt exist')
|
180 |
+
else:
|
181 |
+
inclusion = list_elements[0].find_all("li")
|
182 |
+
exclusion = list_elements[1].find_all("li")
|
183 |
+
|
184 |
+
|
185 |
+
inclusion = ' '.join([t.text.strip() for t in inclusion ])
|
186 |
+
exclusion = ' '.join([t.text.strip() for t in exclusion ])
|
187 |
+
|
188 |
+
return(inclusion, exclusion)
|
189 |
+
|
190 |
+
#function that gets number of patients enrolled in a study
|
191 |
+
def get_enrollment (self, NCTnumber):
|
192 |
+
url = 'https://clinicaltrials.gov/ct2/show/' + NCTnumber
|
193 |
+
ClinicalTrialpage = requests.get(url)
|
194 |
+
soup = BeautifulSoup(ClinicalTrialpage.text, 'html.parser')
|
195 |
+
enrollment = ''
|
196 |
+
wrapping_enrol_class = soup.find_all('td', {'headers':'studyInfoColData','style':"padding-left:1em"})
|
197 |
+
if not wrapping_enrol_class:
|
198 |
+
print('WARNING: Number of Participants in Study number '+ NCTnumber +' is unavailable')
|
199 |
+
else:
|
200 |
+
enrollment = wrapping_enrol_class[1]
|
201 |
+
enrollment = enrollment.text.split()[0]
|
202 |
+
if enrollment.isdigit() == False:
|
203 |
+
print ('WARNING: Number of Participants in Study number '+ NCTnumber +' is unavailable')
|
204 |
+
else:
|
205 |
+
return(enrollment)
|
206 |
+
|
207 |
+
|
208 |
+
|
209 |
+
def __call__(self, condition, listed_attributes, listed_states, amount_of_data):
|
210 |
+
|
211 |
+
self.driver.get('https://clinicaltrials.gov/ct2/results?cond=' + condition + '&rank=1&view=record#rowId0')
|
212 |
+
self.select_attributes_to_show(listed_attributes, self.attribute_dict)
|
213 |
+
|
214 |
+
try:
|
215 |
+
self.select_by_status(listed_states, self.status_dict)
|
216 |
+
except:
|
217 |
+
print('select by status is a problem')
|
218 |
+
n = []
|
219 |
+
for i in listed_attributes:
|
220 |
+
n.append(self.attribute_dict[i.lower()])
|
221 |
+
attribute_ordered = [list(self.attribute_dict.keys())[list(self.attribute_dict.values()).index(i)]for i in sorted(n)]
|
222 |
+
|
223 |
+
search_data = self.collect_data_search_page(attribute_ordered, amount_of_data=amount_of_data)
|
224 |
+
nct_numbers = [e[search_data[0].index('nct number')] for e in search_data[1:]]
|
225 |
+
search_data[0].extend(['inclusion', 'exclusion', 'enrollment'])
|
226 |
+
for index, nct in enumerate(nct_numbers):
|
227 |
+
if index % 100 == 0 and index!= 0:
|
228 |
+
print("Collected Data from {} Studies: ".format(index))
|
229 |
+
|
230 |
+
inc, exc = self.get_criteria(nct)
|
231 |
+
enrol = self.get_enrollment(nct)
|
232 |
+
search_data[index + 1].extend([inc, exc, enrol])
|
233 |
+
return search_data
|
234 |
+
# except:
|
235 |
+
# print('no data available with the specified status')
|
236 |
+
|
237 |
+
|
OpenAITools/CrinicalTrialTools.py
ADDED
@@ -0,0 +1,311 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from langchain_community.agent_toolkits import create_sql_agent
|
2 |
+
from langchain_openai import ChatOpenAI
|
3 |
+
from langchain_groq import ChatGroq
|
4 |
+
from langchain_core.prompts import ChatPromptTemplate
|
5 |
+
from langchain_core.pydantic_v1 import BaseModel, Field
|
6 |
+
import pandas as pd
|
7 |
+
from pydantic import BaseModel, Field
|
8 |
+
|
9 |
+
from langchain.text_splitter import RecursiveCharacterTextSplitter
|
10 |
+
from langchain_community.vectorstores import Chroma
|
11 |
+
from langchain.embeddings import HuggingFaceEmbeddings
|
12 |
+
from langchain_core.runnables import RunnablePassthrough
|
13 |
+
from langchain_core.output_parsers import StrOutputParser
|
14 |
+
|
15 |
+
|
16 |
+
|
17 |
+
gpt = ChatOpenAI(model="gpt-3.5-turbo", temperature=0)
|
18 |
+
#agent_gpt_executor = create_sql_agent(gpt, db=db, agent_type="tool-calling", verbose=True)
|
19 |
+
|
20 |
+
## make database
|
21 |
+
from langchain_community.utilities import SQLDatabase
|
22 |
+
from sqlalchemy import create_engine
|
23 |
+
|
24 |
+
from langchain.prompts import ChatPromptTemplate
|
25 |
+
from langchain.schema import SystemMessage
|
26 |
+
from langchain_core.prompts import MessagesPlaceholder
|
27 |
+
#agent_groq_executor = create_sql_agent(llm, db=db, agent_type="tool-calling", verbose=True)
|
28 |
+
|
29 |
+
from OpenAITools.FetchTools import fetch_clinical_trials, fetch_clinical_trials_jp
|
30 |
+
|
31 |
+
|
32 |
+
|
33 |
+
## Cancer Name の抽出
|
34 |
+
class ExtractTumorName(BaseModel):
|
35 |
+
"""Extract tumor name from the user's question."""
|
36 |
+
tumor_name: str = Field(description="Extracted tumor name from the question, or 'None' if no tumor found")
|
37 |
+
|
38 |
+
class TumorNameExtractor:
|
39 |
+
def __init__(self, llm):
|
40 |
+
self.llm = llm
|
41 |
+
|
42 |
+
# LLMの出力を構造化するための設定
|
43 |
+
self.structured_llm_extractor = self.llm.with_structured_output(ExtractTumorName)
|
44 |
+
|
45 |
+
# システムプロンプトを設定
|
46 |
+
self.system_prompt = """あなたは、ユーザーの質問に基づいて腫瘍名を英語で抽出するシステムです。\n
|
47 |
+
質問文に腫瘍の種類や名前が含まれている場合、それを英語で返してください。\n
|
48 |
+
質問文に腫瘍名がない場合は 'None' と返答してください。"""
|
49 |
+
|
50 |
+
# プロンプトテンプレート
|
51 |
+
self.grade_prompt = ChatPromptTemplate.from_messages(
|
52 |
+
[
|
53 |
+
("system", self.system_prompt),
|
54 |
+
("human", "ユーザーの質問: {question}"),
|
55 |
+
]
|
56 |
+
)
|
57 |
+
|
58 |
+
def extract_tumor_name(self, question: str) -> str:
|
59 |
+
"""
|
60 |
+
腫瘍名を抽出するメソッド。
|
61 |
+
:param question: 質問文
|
62 |
+
:return: 抽出された腫瘍名
|
63 |
+
"""
|
64 |
+
# 質問から腫瘍名を抽出する処理
|
65 |
+
tumor_extractor = self.grade_prompt | self.structured_llm_extractor
|
66 |
+
result = tumor_extractor.invoke({"question": question})
|
67 |
+
return result.tumor_name
|
68 |
+
|
69 |
+
### 質問変更システム
|
70 |
+
|
71 |
+
# ModifyQuestionの出力形式を定義
|
72 |
+
class ModifyQuestion(BaseModel):
|
73 |
+
"""Class for modifying a question by inserting NCTID."""
|
74 |
+
modified_question: str = Field(description="The modified question with the inserted NCTID.")
|
75 |
+
|
76 |
+
class QuestionModifier:
|
77 |
+
def __init__(self, llm):
|
78 |
+
self.llm = llm
|
79 |
+
|
80 |
+
# LLMの出力を構造化するための設定
|
81 |
+
self.structured_llm_modifier = self.llm.with_structured_output(ModifyQuestion)
|
82 |
+
|
83 |
+
# システムプロンプトを設定
|
84 |
+
self.system_prompt = """あなたは、ユーザーの質問に対して適切なNCTIDを挿入して質問を変更するシステムです。\n
|
85 |
+
質問文にNCTIDを挿入し、形式に基づいて新しい質問を生成してください。\n
|
86 |
+
例えば16歳男性の神経膠腫の患者さんが参加できる臨床治験を教えて下さいという質問に対しては\n
|
87 |
+
16歳男性の神経膠腫の患者さんは{nct_id}に参加できますか?と変更して下さい\n
|
88 |
+
NCTIDは {nct_id} を使用してください。"""
|
89 |
+
|
90 |
+
# プロンプトテンプレート
|
91 |
+
self.modify_prompt = ChatPromptTemplate.from_messages(
|
92 |
+
[
|
93 |
+
("system", self.system_prompt),
|
94 |
+
("human", "ユーザーの質問: {question}"),
|
95 |
+
]
|
96 |
+
)
|
97 |
+
|
98 |
+
def modify_question(self, question: str, nct_id: str) -> str:
|
99 |
+
"""
|
100 |
+
質問を変更するメソッド。
|
101 |
+
:param question: 質問文
|
102 |
+
:param nct_id: NCTID
|
103 |
+
:return: NCTIDを挿入した新しい質問
|
104 |
+
"""
|
105 |
+
# 質問を変更するプロセス
|
106 |
+
question_modifier = self.modify_prompt | self.structured_llm_modifier
|
107 |
+
result = question_modifier.invoke({"question": question, "nct_id": nct_id})
|
108 |
+
modify_question = result.modified_question
|
109 |
+
return modify_question
|
110 |
+
|
111 |
+
class QuestionModifierSecond:
|
112 |
+
def __init__(self, llm):
|
113 |
+
self.llm = llm
|
114 |
+
|
115 |
+
# LLMの出力を構造化するための設定
|
116 |
+
self.structured_llm_modifier = self.llm.with_structured_output(ModifyQuestion)
|
117 |
+
|
118 |
+
# システムプロンプトを設定
|
119 |
+
self.system_prompt = """あなたは、ユーザーの質問を変更するシステムです。\n
|
120 |
+
形式に基づいて新しい質問を生成してください。\n
|
121 |
+
例えば16歳男性の神経膠腫の患者さんが参加できる臨床治験を教えて下さいという質問に対しては\n
|
122 |
+
16歳男性の神経膠腫の患者さんはlこの治験に参加できますか?と変更して下さい\n
|
123 |
+
"""
|
124 |
+
|
125 |
+
# プロンプトテンプレート
|
126 |
+
self.modify_prompt = ChatPromptTemplate.from_messages(
|
127 |
+
[
|
128 |
+
("system", self.system_prompt),
|
129 |
+
("human", "ユーザーの質問: {question}"),
|
130 |
+
]
|
131 |
+
)
|
132 |
+
|
133 |
+
def modify_question(self, question: str) -> str:
|
134 |
+
"""
|
135 |
+
質問を変更するメソッド。
|
136 |
+
:param question: 質問文
|
137 |
+
:param nct_id: NCTID
|
138 |
+
:return: NCTIDを挿入した新しい質問
|
139 |
+
"""
|
140 |
+
# 質問を変更するプロセス
|
141 |
+
question_modifier = self.modify_prompt | self.structured_llm_modifier
|
142 |
+
result = question_modifier.invoke({"question": question})
|
143 |
+
modify_question = result.modified_question
|
144 |
+
return modify_question
|
145 |
+
|
146 |
+
class QuestionModifierEnglish:
|
147 |
+
def __init__(self, llm):
|
148 |
+
self.llm = llm
|
149 |
+
|
150 |
+
# LLMの出力を構造化するための設定
|
151 |
+
self.structured_llm_modifier = self.llm.with_structured_output(ModifyQuestion)
|
152 |
+
|
153 |
+
# システムプロンプトを設定
|
154 |
+
self.system_prompt = """あなたは、ユーザーの質問を変更し英語に翻訳するシステムです。\n
|
155 |
+
形式に基づいて新しい質問を生成してください。\n
|
156 |
+
例えば16歳男性の神経膠腫の患者さんが参加できる臨床治験を教えて下さいという質問に対しては\n
|
157 |
+
Can a 16 year old male patient with glioma participate in this clinical trial?と変更して下さい\n
|
158 |
+
"""
|
159 |
+
|
160 |
+
# プロンプトテンプレート
|
161 |
+
self.modify_prompt = ChatPromptTemplate.from_messages(
|
162 |
+
[
|
163 |
+
("system", self.system_prompt),
|
164 |
+
("human", "ユーザーの質問: {question}"),
|
165 |
+
]
|
166 |
+
)
|
167 |
+
|
168 |
+
def modify_question(self, question: str) -> str:
|
169 |
+
"""
|
170 |
+
質問を変更するメソッド。
|
171 |
+
:param question: 質問文
|
172 |
+
:param nct_id: NCTID
|
173 |
+
:return: NCTIDを挿入した新しい質問
|
174 |
+
"""
|
175 |
+
# 質問を変更するプロセス
|
176 |
+
question_modifier = self.modify_prompt | self.structured_llm_modifier
|
177 |
+
result = question_modifier.invoke({"question": question})
|
178 |
+
modify_question = result.modified_question
|
179 |
+
return modify_question
|
180 |
+
|
181 |
+
|
182 |
+
### Make criteria check Agent
|
183 |
+
|
184 |
+
class ClinicalTrialAgent:
|
185 |
+
def __init__(self, llm, db):
|
186 |
+
self.llm = llm
|
187 |
+
self.db = db
|
188 |
+
|
189 |
+
# システムプロンプトの定義
|
190 |
+
self.system_prompt = """
|
191 |
+
あなたは患者さんに適した治験を探すエージェントです。
|
192 |
+
データベースのEligibility Criteriaをチェックして患者さんがその治験を受けることが可能かどうか答えて下さい
|
193 |
+
"""
|
194 |
+
|
195 |
+
# プロンプトテンプレートを作成
|
196 |
+
self.prompt = ChatPromptTemplate.from_messages(
|
197 |
+
[("system", self.system_prompt),
|
198 |
+
("human", "{input}"),
|
199 |
+
MessagesPlaceholder("agent_scratchpad")]
|
200 |
+
)
|
201 |
+
|
202 |
+
# SQL Agentの設定
|
203 |
+
self.agent_executor = self.create_sql_agent(self.llm, self.db, self.prompt)
|
204 |
+
|
205 |
+
def create_sql_agent(self, llm, db, prompt):
|
206 |
+
"""SQLエージェントを作成するメソッド"""
|
207 |
+
agent_executor = create_sql_agent(
|
208 |
+
llm,
|
209 |
+
db=db,
|
210 |
+
prompt=prompt,
|
211 |
+
agent_type="tool-calling",
|
212 |
+
verbose=True
|
213 |
+
)
|
214 |
+
return agent_executor
|
215 |
+
|
216 |
+
def get_agent_judgment(self, modify_question: str) -> str:
|
217 |
+
"""
|
218 |
+
Modifyされた質問を元に、患者さんが治験に参加可能かどうかのエージェント判断を取得。
|
219 |
+
:param modify_question: NCTIDが挿入された質問
|
220 |
+
:return: エージェントの判断 (AgentJudgment)
|
221 |
+
"""
|
222 |
+
# LLMに質問を投げて、判断を得る
|
223 |
+
result = self.agent_executor.invoke({"input": modify_question})
|
224 |
+
return result
|
225 |
+
|
226 |
+
|
227 |
+
class SimpleClinicalTrialAgent:
|
228 |
+
def __init__(self, llm):
|
229 |
+
self.llm = llm
|
230 |
+
|
231 |
+
def evaluate_eligibility(self, TargetCriteria: str, question: str) -> str:
|
232 |
+
"""
|
233 |
+
臨床試験の参加適格性を評価するメソッド。
|
234 |
+
:param TargetCriteria: 試験基準 (Inclusion/Exclusion criteria)
|
235 |
+
:param question: 患者の条件に関する質問
|
236 |
+
:return: 臨床試験に参加可能かどうかのLLMからの応答
|
237 |
+
"""
|
238 |
+
|
239 |
+
# プロンプトの定義
|
240 |
+
prompt_template = """
|
241 |
+
You are an agent looking for a suitable clinical trial for a patient.
|
242 |
+
Please answer whether the patient is eligible for this clinical trial based on the following criteria. If you do not know the answer, say you do not know. Your answer should be brief, no more than 3 sentences.
|
243 |
+
Question: {question}
|
244 |
+
Criteria:
|
245 |
+
""" + TargetCriteria
|
246 |
+
|
247 |
+
# プロンプトテンプレートの作成
|
248 |
+
criteria_prompt = ChatPromptTemplate.from_messages(
|
249 |
+
[
|
250 |
+
(
|
251 |
+
"human",
|
252 |
+
prompt_template
|
253 |
+
)
|
254 |
+
]
|
255 |
+
)
|
256 |
+
|
257 |
+
# RAGチェーンの作成
|
258 |
+
rag_chain = (
|
259 |
+
{"question": RunnablePassthrough()}
|
260 |
+
| criteria_prompt
|
261 |
+
| self.llm
|
262 |
+
| StrOutputParser()
|
263 |
+
)
|
264 |
+
|
265 |
+
# 質問をチェーンに渡して、応答を得る
|
266 |
+
response = rag_chain.invoke(question)
|
267 |
+
return response
|
268 |
+
|
269 |
+
|
270 |
+
### output 評価システム
|
271 |
+
class TrialEligibilityGrader(BaseModel):
|
272 |
+
"""3値評価: yes, no, unclear"""
|
273 |
+
score: str = Field(
|
274 |
+
description="The eligibility of the patient for the clinical trial based on the document. Options are: 'yes', 'no', or 'unclear'."
|
275 |
+
)
|
276 |
+
|
277 |
+
class GraderAgent:
|
278 |
+
def __init__(self, llm):
|
279 |
+
self.llm = llm
|
280 |
+
|
281 |
+
# LLMの出力を構造化するための設定
|
282 |
+
self.structured_llm_grader = self.llm.with_structured_output(TrialEligibilityGrader)
|
283 |
+
|
284 |
+
# Graderの入力プロンプト
|
285 |
+
self.system_prompt = """
|
286 |
+
あなたは治験に参加する患者の適合性を評価するGraderです。
|
287 |
+
以下のドキュメントを読み、患者が治験に参加可能かどうかを判断してください。
|
288 |
+
'yes'(参加可能)、'no'(参加不可能)、'unclear'(判断できない)の3値で答えてください。
|
289 |
+
"""
|
290 |
+
|
291 |
+
# 評価のためのプロンプトを作成
|
292 |
+
self.grade_prompt = ChatPromptTemplate.from_messages(
|
293 |
+
[
|
294 |
+
("system", self.system_prompt),
|
295 |
+
(
|
296 |
+
"human",
|
297 |
+
"取得したドキュメント: \n\n {document} ",
|
298 |
+
),
|
299 |
+
]
|
300 |
+
)
|
301 |
+
|
302 |
+
def evaluate_eligibility(self, AgentJudgment_output: str) -> str:
|
303 |
+
"""
|
304 |
+
AgentJudgment['output']を基に患者が治験に参加可能かどうかを評価し、スコア(AgentGrade)を返す。
|
305 |
+
:param AgentJudgment_output: エージェント判断の 'output' の値
|
306 |
+
:return: 評価されたスコア (AgentGrade)
|
307 |
+
"""
|
308 |
+
GraderAgent = self.grade_prompt | self.structured_llm_grader
|
309 |
+
result = GraderAgent.invoke({"document": AgentJudgment_output})
|
310 |
+
AgentGrade = result.score
|
311 |
+
return AgentGrade
|
OpenAITools/ECarteTools.py
ADDED
@@ -0,0 +1,73 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import openai
|
2 |
+
import time
|
3 |
+
import wikipedia
|
4 |
+
import random
|
5 |
+
import re
|
6 |
+
import requests
|
7 |
+
from bs4 import BeautifulSoup
|
8 |
+
import os
|
9 |
+
import glob
|
10 |
+
from natsort import natsorted
|
11 |
+
import requests
|
12 |
+
from bs4 import BeautifulSoup
|
13 |
+
import xml.etree.ElementTree as ET
|
14 |
+
import pandas as pd
|
15 |
+
|
16 |
+
wikipedia.set_lang("ja")
|
17 |
+
# APIキーの設定
|
18 |
+
openai.api_key = os.environ['OPENAI_API_KEY']
|
19 |
+
engine="gpt-3.5-turbo"
|
20 |
+
|
21 |
+
|
22 |
+
def generate(system_template,prompt,engine="gpt-3.5-turbo"):
|
23 |
+
while True: #OpenAI APIが落ちてる時に無限リトライするので注意
|
24 |
+
try:
|
25 |
+
response = openai.ChatCompletion.create(
|
26 |
+
model=engine,
|
27 |
+
messages=[
|
28 |
+
{"role": "system", "content": system_template},
|
29 |
+
{"role": "user", "content":prompt},
|
30 |
+
]
|
31 |
+
)
|
32 |
+
result=response["choices"][0]["message"]["content"]
|
33 |
+
return result
|
34 |
+
except:
|
35 |
+
print("リトライ")
|
36 |
+
time.sleep(30)
|
37 |
+
pass
|
38 |
+
|
39 |
+
def generate_carte(prompt,engine="gpt-3.5-turbo"):
|
40 |
+
while True: #OpenAI APIが落ちてる時に無限リトライするので注意
|
41 |
+
try:
|
42 |
+
response = openai.ChatCompletion.create(
|
43 |
+
model=engine,
|
44 |
+
messages=[
|
45 |
+
{"role": "system", "content": "You are useful assistant"},
|
46 |
+
{"role": "user", "content":"%s\n・・・という患者と医師の会話をSOAP形式のカルテとして日本語で端的にまとめて下さい。各セクションはS),O), A),P)として下さい "%prompt},
|
47 |
+
]
|
48 |
+
)
|
49 |
+
result=response["choices"][0]["message"]["content"]
|
50 |
+
return result
|
51 |
+
except:
|
52 |
+
print("リトライ")
|
53 |
+
time.sleep(30)
|
54 |
+
pass
|
55 |
+
|
56 |
+
def get_selected_fileds(texts):
|
57 |
+
input_name = texts.replace(' ' , "+")
|
58 |
+
corona_fields = ct.get_study_fields(
|
59 |
+
search_expr="%s SEARCH[Location](AREA[LocationCountry]Japan AND AREA[LocationStatus]Recruiting)"%(input_name),
|
60 |
+
fields=["NCTId", "Condition", "BriefTitle",'BriefSummary','EligibilityCriteria'],
|
61 |
+
max_studies=500,
|
62 |
+
fmt="csv")
|
63 |
+
return corona_fields
|
64 |
+
|
65 |
+
def get_retriever_str(fields):
|
66 |
+
retriever_str=''
|
67 |
+
for i in range(1,len(fields)):
|
68 |
+
colnames = fields[0]
|
69 |
+
targetCol = fields[i]
|
70 |
+
for f in range(len(fields[0])):
|
71 |
+
retriever_str+=colnames[f] + ":" + targetCol[f] +"\n"
|
72 |
+
retriever_str+='\n'
|
73 |
+
return retriever_str
|
OpenAITools/ExpertTools.py
ADDED
@@ -0,0 +1,246 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
import openai
|
3 |
+
import time
|
4 |
+
import wikipedia
|
5 |
+
import random
|
6 |
+
import re
|
7 |
+
import requests
|
8 |
+
from bs4 import BeautifulSoup
|
9 |
+
import os
|
10 |
+
import glob
|
11 |
+
from natsort import natsorted
|
12 |
+
import requests
|
13 |
+
from bs4 import BeautifulSoup
|
14 |
+
import xml.etree.ElementTree as ET
|
15 |
+
from pytrials.client import ClinicalTrials
|
16 |
+
from Bio import Entrez
|
17 |
+
import pandas as pd
|
18 |
+
import numpy as np
|
19 |
+
import time
|
20 |
+
#from langchain.agents import create_pandas_dataframe_agent
|
21 |
+
from langchain_experimental.agents import create_pandas_dataframe_agent
|
22 |
+
#from langchain.llms import OpenAI
|
23 |
+
from langchain_community.llms import OpenAI
|
24 |
+
|
25 |
+
# APIキーの設定
|
26 |
+
openai.api_key = os.environ['OPENAI_API_KEY']
|
27 |
+
gptengine="gpt-3.5-turbo"
|
28 |
+
|
29 |
+
|
30 |
+
"""def get_selected_fileds(texts):
|
31 |
+
ct = ClinicalTrials()
|
32 |
+
input_name = texts.replace(' ' , "+")
|
33 |
+
corona_fields = ct.get_study_fields(
|
34 |
+
search_expr="%s SEARCH[Location](AREA[LocationCountry]Japan AND AREA[LocationStatus]Recruiting)"%(input_name),
|
35 |
+
fields=["NCTId", "Condition", "BriefTitle",'BriefSummary','EligibilityCriteria'],
|
36 |
+
max_studies=500,
|
37 |
+
fmt="csv")
|
38 |
+
return corona_fields"""
|
39 |
+
|
40 |
+
def get_retriever_str(fields):
|
41 |
+
retriever_str=''
|
42 |
+
for i in range(1,len(fields)):
|
43 |
+
colnames = fields[0]
|
44 |
+
targetCol = fields[i]
|
45 |
+
for f in range(len(fields[0])):
|
46 |
+
retriever_str+=colnames[f] + ":" + targetCol[f] +"\n"
|
47 |
+
retriever_str+='\n'
|
48 |
+
return retriever_str
|
49 |
+
|
50 |
+
def get_chanked_retriever(fields):
|
51 |
+
retriever_list =[]
|
52 |
+
for i in range(1,len(fields)):
|
53 |
+
retriever_str=''
|
54 |
+
colnames = fields[0]
|
55 |
+
targetCol = fields[i]
|
56 |
+
for f in range(len(fields[0])):
|
57 |
+
retriever_str+=colnames[f] + ":" + targetCol[f] +"\n"
|
58 |
+
retriever_list.append(retriever_str)
|
59 |
+
return retriever_list
|
60 |
+
|
61 |
+
from pytrials.client import ClinicalTrials
|
62 |
+
def get_selected_fields(texts, split_criteria=False,
|
63 |
+
split_word_number = False, split_number=700):
|
64 |
+
ct = ClinicalTrials()
|
65 |
+
input_name = texts.replace(' ', "+")
|
66 |
+
corona_fields = ct.get_study_fields(
|
67 |
+
search_expr="%s SEARCH[Location](AREA[LocationCountry]Japan AND AREA[LocationStatus]Recruiting)" % (input_name),
|
68 |
+
fields=["NCTId", "Condition", "BriefTitle", 'BriefSummary', 'EligibilityCriteria'],
|
69 |
+
max_studies=500,
|
70 |
+
fmt="csv")
|
71 |
+
|
72 |
+
if split_criteria:
|
73 |
+
new_fields = []
|
74 |
+
|
75 |
+
# 検索対象の文字列
|
76 |
+
target_string1 = 'Exclusion Criteria'
|
77 |
+
target_string2 = 'Exclusion criteria'
|
78 |
+
|
79 |
+
# 各要素で検索対象の文字列を探し、直前で分割して新しいリストに格納
|
80 |
+
for corona_field in corona_fields:
|
81 |
+
new_list = []
|
82 |
+
for item in corona_field:
|
83 |
+
if target_string1 in item:
|
84 |
+
split_position = item.index(target_string1)
|
85 |
+
new_list.append(item[:split_position])
|
86 |
+
new_list.append(item[split_position:])
|
87 |
+
elif target_string2 in item:
|
88 |
+
split_position = item.index(target_string2)
|
89 |
+
new_list.append(item[:split_position])
|
90 |
+
new_list.append(item[split_position:])
|
91 |
+
else:
|
92 |
+
new_list.append(item)
|
93 |
+
new_fields.append(new_list)
|
94 |
+
else:
|
95 |
+
new_fields = corona_fields
|
96 |
+
|
97 |
+
if split_word_number:
|
98 |
+
split_fields = []
|
99 |
+
for new_field in new_fields:
|
100 |
+
new_list= []
|
101 |
+
|
102 |
+
# 各要素を調べて、700文字以上であれば分割し、新しいリストに格納
|
103 |
+
for item in new_field:
|
104 |
+
item_length = len(item)
|
105 |
+
if item_length > split_number:
|
106 |
+
num_parts = -(-item_length // split_number) # 向上の除算を用いて分割数を計算
|
107 |
+
for i in range(num_parts):
|
108 |
+
start_index = i * split_number
|
109 |
+
end_index = min((i + 1) * split_number, item_length) # 文字列の終わりを超えないように調整
|
110 |
+
new_list.append(item[start_index:end_index])
|
111 |
+
else:
|
112 |
+
new_list.append(item)
|
113 |
+
|
114 |
+
split_fields.append(new_list)
|
115 |
+
new_fields = split_fields
|
116 |
+
|
117 |
+
return new_fields
|
118 |
+
|
119 |
+
|
120 |
+
def print_agent_results(df, Ids,
|
121 |
+
interesteds = ['Condition', 'BriefTitle', 'BriefSummary', 'EligibilityCriteria'],
|
122 |
+
translater=None):
|
123 |
+
results = ""
|
124 |
+
for Id in Ids:
|
125 |
+
print("%s\n"%Id)
|
126 |
+
sdf = df[df['NCTId'] == Id]
|
127 |
+
for interested in interesteds:
|
128 |
+
# 最初の要素を取得
|
129 |
+
results += '%s: \n %s \n' % (interested, sdf[interested].iloc[0])
|
130 |
+
#print('%s: \n %s \n' % (interested, sdf[interested].iloc[0]))
|
131 |
+
if translater:
|
132 |
+
to_be_printed = translater.translate(results)
|
133 |
+
else:
|
134 |
+
to_be_printed =results
|
135 |
+
print(to_be_printed)
|
136 |
+
|
137 |
+
def search(query):
|
138 |
+
Entrez.email = os.getenv('MAIL_ADRESS')
|
139 |
+
#Entrez.email='[email protected]'
|
140 |
+
handle = Entrez.esearch(db='pubmed',
|
141 |
+
sort = 'relevance',
|
142 |
+
retmax = '20',
|
143 |
+
retmode = 'xml',
|
144 |
+
term = query)
|
145 |
+
results = Entrez.read(handle)
|
146 |
+
return results
|
147 |
+
|
148 |
+
def fetch_details(id_list):
|
149 |
+
ids = ','.join(id_list)
|
150 |
+
Entrez.email = os.getenv('MAIL_ADRESS')
|
151 |
+
#Entrez.email = '[email protected]'
|
152 |
+
handle = Entrez.efetch(db = 'pubmed',
|
153 |
+
retmode = 'xml',
|
154 |
+
id = ids)
|
155 |
+
results = Entrez.read(handle)
|
156 |
+
return results
|
157 |
+
'''def generate(prompt,engine=None):
|
158 |
+
if engine is None:
|
159 |
+
engine=gptengine
|
160 |
+
while True: #OpenAI APIが落ちてる時に無限リトライするので注意
|
161 |
+
try:
|
162 |
+
response = openai.ChatCompletion.create(
|
163 |
+
model=engine,
|
164 |
+
messages=[
|
165 |
+
{"role": "system", "content": "You are useful assistant"},
|
166 |
+
{"role": "user", "content":prompt},
|
167 |
+
]
|
168 |
+
)
|
169 |
+
result=response["choices"][0]["message"]["content"]
|
170 |
+
return result
|
171 |
+
except Exception as e:
|
172 |
+
print(e)
|
173 |
+
print("リトライ")
|
174 |
+
time.sleep(30)
|
175 |
+
pass
|
176 |
+
'''
|
177 |
+
|
178 |
+
def generate(prompt,engine=None):
|
179 |
+
if engine is None:
|
180 |
+
engine=gptengine
|
181 |
+
while True: #OpenAI APIが落ちてる時に無限リトライするので注意
|
182 |
+
try:
|
183 |
+
response = openai.chat.completions.create(
|
184 |
+
model=engine,
|
185 |
+
messages=[
|
186 |
+
{"role": "system", "content": "You are useful assistant"},
|
187 |
+
{"role": "user", "content":prompt},
|
188 |
+
]
|
189 |
+
)
|
190 |
+
#result=response["choices"][0]["message"]["content"]
|
191 |
+
result=response.choices[0].message.content
|
192 |
+
return result
|
193 |
+
except Exception as e:
|
194 |
+
print(e)
|
195 |
+
print("リトライ")
|
196 |
+
time.sleep(30)
|
197 |
+
pass
|
198 |
+
|
199 |
+
def GetPubmedSummaryDf(studies):
|
200 |
+
title_list= []
|
201 |
+
abstract_list=[]
|
202 |
+
journal_list = []
|
203 |
+
language_list =[]
|
204 |
+
pubdate_year_list = []
|
205 |
+
pubdate_month_list = []
|
206 |
+
studiesIdList = studies['IdList']
|
207 |
+
chunk_size = 10000
|
208 |
+
for chunk_i in range(0, len(studiesIdList), chunk_size):
|
209 |
+
chunk = studiesIdList[chunk_i:chunk_i + chunk_size]
|
210 |
+
|
211 |
+
try:
|
212 |
+
papers = fetch_details(chunk)
|
213 |
+
for i, paper in enumerate(papers['PubmedArticle']):
|
214 |
+
title_list.append(paper['MedlineCitation']['Article']['ArticleTitle'])
|
215 |
+
try:
|
216 |
+
abstract_list.append(paper['MedlineCitation']['Article']['Abstract']['AbstractText'][0])
|
217 |
+
except:
|
218 |
+
abstract_list.append('No Abstract')
|
219 |
+
journal_list.append(paper['MedlineCitation']['Article']['Journal']['Title'])
|
220 |
+
language_list.append(paper['MedlineCitation']['Article']['Language'][0])
|
221 |
+
try:
|
222 |
+
pubdate_year_list.append(paper['MedlineCitation']['Article']['Journal']['JournalIssue']['PubDate']['Year'])
|
223 |
+
except:
|
224 |
+
pubdate_year_list.append('No Data')
|
225 |
+
try:
|
226 |
+
pubdate_month_list.append(paper['MedlineCitation']['Article']['Journal']['JournalIssue']['PubDate']['Month'])
|
227 |
+
except:
|
228 |
+
pubdate_month_list.append('No Data')
|
229 |
+
except: # occasionally a chunk might annoy your parser
|
230 |
+
pass
|
231 |
+
df = pd.DataFrame(list(zip(
|
232 |
+
title_list, abstract_list, journal_list, language_list, pubdate_year_list,
|
233 |
+
pubdate_month_list)),
|
234 |
+
columns=['Title', 'Abstract', 'Journal', 'Language', 'Year','Month'])
|
235 |
+
return df, abstract_list
|
236 |
+
|
237 |
+
def ClinicalAgent(fileds, verbose=False):
|
238 |
+
df = pd.DataFrame.from_records(fileds[1:], columns=fileds[0])
|
239 |
+
return create_pandas_dataframe_agent(OpenAI(temperature=0, model='gpt-3.5-turbo-16k'), df, verbose=verbose)
|
240 |
+
|
241 |
+
def GetNCTID(results):
|
242 |
+
# NCTで始まる単語を検索する正規表現
|
243 |
+
pattern = r'\bNCT\d+\b'
|
244 |
+
# 正規表現を使って単語を抽出
|
245 |
+
nct_words = re.findall(pattern,results)
|
246 |
+
return nct_words
|
OpenAITools/FetchTools.py
ADDED
@@ -0,0 +1,158 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
import pandas as pd
|
3 |
+
#from llama_index.llms.replicate import Replicate
|
4 |
+
import requests
|
5 |
+
import re
|
6 |
+
|
7 |
+
|
8 |
+
def extract_japan_cities(text):
|
9 |
+
# 正規表現を使用して " - Japan" で終わる都市名を抽出
|
10 |
+
pattern = r'(\b\w+\s*\w*\b) - Japan'
|
11 |
+
cities = re.findall(pattern, text)
|
12 |
+
unique_cities = list(set(cities))
|
13 |
+
# ユニークな都市名をソートしてカンマで区切られた文字列に変換
|
14 |
+
unique_cities.sort()
|
15 |
+
return ', '.join(unique_cities)
|
16 |
+
|
17 |
+
def fetch_clinical_trials(cancer_name):
|
18 |
+
search_expr="%s SEARCH[Location](AREA[LocationCountry]Japan AND AREA[LocationStatus]Recruiting)" % (cancer_name)
|
19 |
+
# Initial URL for the first API call
|
20 |
+
base_url = "https://clinicaltrials.gov/api/v2/studies"
|
21 |
+
params = {
|
22 |
+
"query.titles": search_expr,
|
23 |
+
"pageSize": 100
|
24 |
+
}
|
25 |
+
|
26 |
+
# Initialize an empty list to store the data
|
27 |
+
data_list = []
|
28 |
+
# Loop until there is no nextPageToken
|
29 |
+
while True:
|
30 |
+
# Print the current URL (for debugging purposes)
|
31 |
+
print("Fetching data from:", base_url + '?' + '&'.join([f"{k}={v}" for k, v in params.items()]))
|
32 |
+
|
33 |
+
# Send a GET request to the API
|
34 |
+
response = requests.get(base_url, params=params)
|
35 |
+
|
36 |
+
# Check if the request was successful
|
37 |
+
if response.status_code == 200:
|
38 |
+
data = response.json() # Parse JSON response
|
39 |
+
studies = data.get('studies', []) # Extract the list of studies
|
40 |
+
|
41 |
+
# Loop through each study and extract specific information
|
42 |
+
for study in studies:
|
43 |
+
# Safely access nested keys
|
44 |
+
nctId = study['protocolSection']['identificationModule'].get('nctId', 'Unknown')
|
45 |
+
startDate = study['protocolSection']['statusModule'].get('startDateStruct', {}).get('date', 'Unknown Date')
|
46 |
+
conditions = ', '.join(study['protocolSection']['conditionsModule'].get('conditions', ['No conditions listed']))
|
47 |
+
title = study['protocolSection']['identificationModule'].get('briefTitle', 'no title')
|
48 |
+
summary = study['protocolSection']['descriptionModule'].get('briefSummary', 'no summary')
|
49 |
+
|
50 |
+
# Extract locations safely
|
51 |
+
locations_list = study['protocolSection'].get('contactsLocationsModule', {}).get('locations', [])
|
52 |
+
locations = ', '.join([f"{location.get('city', 'No City')} - {location.get('country', 'No Country')}" for location in locations_list]) if locations_list else "No locations listed"
|
53 |
+
|
54 |
+
JapanesLocations = extract_japan_cities(locations)
|
55 |
+
# Extract dates and phases
|
56 |
+
primaryCompletionDate = study['protocolSection']['statusModule'].get('primaryCompletionDateStruct', {}).get('date', 'Unknown Date')
|
57 |
+
|
58 |
+
phases = ', '.join(study['protocolSection']['designModule'].get('phases', ['Not Available']))
|
59 |
+
eligibilityCriteria = study['protocolSection']['eligibilityModule'].get('eligibilityCriteria', 'Unknown')
|
60 |
+
|
61 |
+
# Append the data to the list as a dictionary
|
62 |
+
data_list.append({
|
63 |
+
"NCTID": nctId,
|
64 |
+
"Title": title,
|
65 |
+
#"Start Date": startDate,
|
66 |
+
"Primary Completion Date": primaryCompletionDate,
|
67 |
+
#"Conditions": conditions,
|
68 |
+
"Cancer": conditions,
|
69 |
+
"Summary": summary,
|
70 |
+
"Japanes Locations": JapanesLocations,
|
71 |
+
#"Phases": phases,
|
72 |
+
"Eligibility Criteria": eligibilityCriteria
|
73 |
+
})
|
74 |
+
|
75 |
+
# Check for nextPageToken and update the params or break the loop
|
76 |
+
nextPageToken = data.get('nextPageToken')
|
77 |
+
if nextPageToken:
|
78 |
+
params['pageToken'] = nextPageToken # Set the pageToken for the next request
|
79 |
+
else:
|
80 |
+
break # Exit the loop if no nextPageToken is present
|
81 |
+
else:
|
82 |
+
print("Failed to fetch data. Status code:", response.status_code)
|
83 |
+
break
|
84 |
+
|
85 |
+
# Create a DataFrame from the list of dictionaries
|
86 |
+
df = pd.DataFrame(data_list)
|
87 |
+
return df
|
88 |
+
|
89 |
+
def fetch_clinical_trials_jp(cancer_name):
|
90 |
+
search_expr="%s SEARCH[Location](AREA[LocationCountry]Japan AND AREA[LocationStatus]Recruiting)" % (cancer_name)
|
91 |
+
# Initial URL for the first API call
|
92 |
+
base_url = "https://clinicaltrials.gov/api/v2/studies"
|
93 |
+
params = {
|
94 |
+
"query.titles": search_expr,
|
95 |
+
"pageSize": 100
|
96 |
+
}
|
97 |
+
|
98 |
+
# Initialize an empty list to store the data
|
99 |
+
data_list = []
|
100 |
+
# Loop until there is no nextPageToken
|
101 |
+
while True:
|
102 |
+
# Print the current URL (for debugging purposes)
|
103 |
+
print("Fetching data from:", base_url + '?' + '&'.join([f"{k}={v}" for k, v in params.items()]))
|
104 |
+
|
105 |
+
# Send a GET request to the API
|
106 |
+
response = requests.get(base_url, params=params)
|
107 |
+
|
108 |
+
# Check if the request was successful
|
109 |
+
if response.status_code == 200:
|
110 |
+
data = response.json() # Parse JSON response
|
111 |
+
studies = data.get('studies', []) # Extract the list of studies
|
112 |
+
|
113 |
+
# Loop through each study and extract specific information
|
114 |
+
for study in studies:
|
115 |
+
# Safely access nested keys
|
116 |
+
nctId = study['protocolSection']['identificationModule'].get('nctId', 'Unknown')
|
117 |
+
startDate = study['protocolSection']['statusModule'].get('startDateStruct', {}).get('date', 'Unknown Date')
|
118 |
+
conditions = ', '.join(study['protocolSection']['conditionsModule'].get('conditions', ['No conditions listed']))
|
119 |
+
title = study['protocolSection']['identificationModule'].get('briefTitle', 'no title')
|
120 |
+
summary = study['protocolSection']['descriptionModule'].get('briefSummary', 'no summary')
|
121 |
+
|
122 |
+
# Extract locations safely
|
123 |
+
locations_list = study['protocolSection'].get('contactsLocationsModule', {}).get('locations', [])
|
124 |
+
locations = ', '.join([f"{location.get('city', 'No City')} - {location.get('country', 'No Country')}" for location in locations_list]) if locations_list else "No locations listed"
|
125 |
+
|
126 |
+
JapanesLocations = extract_japan_cities(locations)
|
127 |
+
# Extract dates and phases
|
128 |
+
primaryCompletionDate = study['protocolSection']['statusModule'].get('primaryCompletionDateStruct', {}).get('date', 'Unknown Date')
|
129 |
+
|
130 |
+
phases = ', '.join(study['protocolSection']['designModule'].get('phases', ['Not Available']))
|
131 |
+
eligibilityCriteria = study['protocolSection']['eligibilityModule'].get('eligibilityCriteria', 'Unknown')
|
132 |
+
|
133 |
+
# Append the data to the list as a dictionary
|
134 |
+
data_list.append({
|
135 |
+
"NCTID": nctId,
|
136 |
+
"タイトル": title,
|
137 |
+
#"Start Date": startDate,
|
138 |
+
#"Primary Completion Date": primaryCompletionDate,
|
139 |
+
"対象となる癌": conditions,
|
140 |
+
"サマリー": summary,
|
141 |
+
"場所": JapanesLocations,
|
142 |
+
#"Phases": phases,
|
143 |
+
"クライテリア": eligibilityCriteria
|
144 |
+
})
|
145 |
+
|
146 |
+
# Check for nextPageToken and update the params or break the loop
|
147 |
+
nextPageToken = data.get('nextPageToken')
|
148 |
+
if nextPageToken:
|
149 |
+
params['pageToken'] = nextPageToken # Set the pageToken for the next request
|
150 |
+
else:
|
151 |
+
break # Exit the loop if no nextPageToken is present
|
152 |
+
else:
|
153 |
+
print("Failed to fetch data. Status code:", response.status_code)
|
154 |
+
break
|
155 |
+
|
156 |
+
# Create a DataFrame from the list of dictionaries
|
157 |
+
df = pd.DataFrame(data_list)
|
158 |
+
return df
|
OpenAITools/ReviewPaperTools.py
ADDED
@@ -0,0 +1,42 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import re
|
2 |
+
import pandas as pd
|
3 |
+
|
4 |
+
def parse_text_file(text):
|
5 |
+
# セクションを分割するための正規表現パターンを定義
|
6 |
+
# \d+ は1つ以上の数字にマッチします
|
7 |
+
pattern = re.compile(r'\n\n\n\d+\.')
|
8 |
+
|
9 |
+
# テキストをセクションごとに分割
|
10 |
+
sections = pattern.split(text)[1:] # 最初の空のセクションを除外
|
11 |
+
|
12 |
+
# 各セクションの前後の空白を削除
|
13 |
+
sections = [section.strip() for section in sections]
|
14 |
+
|
15 |
+
return sections
|
16 |
+
|
17 |
+
def split_sections(text):
|
18 |
+
contents = text.split('\n\n')
|
19 |
+
contents = [section.strip() for section in contents if section.strip()]
|
20 |
+
if len(contents) == 8 :
|
21 |
+
keys = ['PublishInfo', 'Title', 'AuthorName', 'AuthorInfo', 'Abstract', 'Copyrights', 'DOI', 'COI']
|
22 |
+
elif len(contents) == 7 :
|
23 |
+
keys = ['PublishInfo', 'Title', 'AuthorName', 'AuthorInfo', 'Abstract', 'Copyrights', 'DOI']
|
24 |
+
elif len(contents) == 6:
|
25 |
+
keys = ['PublishInfo', 'Title', 'AuthorName', 'AuthorInfo', 'Abstract', 'DOI']
|
26 |
+
elif len(contents) == 5:
|
27 |
+
keys = ['PublishInfo', 'Title', 'AuthorName', 'Abstract', 'DOI']
|
28 |
+
|
29 |
+
# 辞書を作成し、キーが存在しない場合は空の文字列を設定
|
30 |
+
section_dict = {key: contents[i] if i < len(contents) else "" for i, key in enumerate(keys)}
|
31 |
+
return section_dict
|
32 |
+
|
33 |
+
|
34 |
+
def GetSummaryDf(textdir):
|
35 |
+
with open(textdir, 'r', encoding='utf-8') as f:
|
36 |
+
content = f.read()
|
37 |
+
sections = parse_text_file(content)
|
38 |
+
dicts = []
|
39 |
+
for section in sections:
|
40 |
+
splited_dic = split_sections(section)
|
41 |
+
dicts.append(splited_dic)
|
42 |
+
return pd.DataFrame(dicts)
|
OpenAITools/scrapeThisData.py
ADDED
@@ -0,0 +1,237 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from selenium import webdriver
|
2 |
+
from selenium.webdriver.support.ui import Select
|
3 |
+
from selenium.webdriver.common.by import By
|
4 |
+
|
5 |
+
import requests
|
6 |
+
from bs4 import BeautifulSoup
|
7 |
+
import re
|
8 |
+
|
9 |
+
import os
|
10 |
+
import time
|
11 |
+
|
12 |
+
from selenium.webdriver.support.ui import WebDriverWait
|
13 |
+
from selenium.webdriver.common.by import By
|
14 |
+
from selenium.webdriver.support import expected_conditions as EC
|
15 |
+
from selenium.webdriver.common.action_chains import ActionChains
|
16 |
+
import chromedriver_autoinstaller
|
17 |
+
|
18 |
+
class ScrapeThatData:
|
19 |
+
|
20 |
+
def __init__(self, time_threshold = 10):
|
21 |
+
|
22 |
+
try:
|
23 |
+
chrome_options = webdriver.ChromeOptions()
|
24 |
+
chrome_options.add_argument('--no-sandbox')
|
25 |
+
self.driver = webdriver.Chrome(options=chrome_options)
|
26 |
+
|
27 |
+
except:
|
28 |
+
chromedriver_autoinstaller.install()
|
29 |
+
chrome_options = webdriver.ChromeOptions()
|
30 |
+
chrome_options.add_argument('--no-sandbox')
|
31 |
+
self.driver = webdriver.Chrome(options=chrome_options)
|
32 |
+
|
33 |
+
|
34 |
+
|
35 |
+
self.wait = WebDriverWait(self.driver,time_threshold)
|
36 |
+
self.attribute_dict = {'status':1 ,'conditions':2, 'interventions': 3, 'study type':4,
|
37 |
+
'phase':5, 'sponsor':6, 'funder type':7 , 'study design': 8,
|
38 |
+
'outcome measures':9, 'number enrolled':10, 'sex':11, 'age':12,
|
39 |
+
'nct number': 13, 'other ids':14, 'title acronym': 15 , 'study start': 16,
|
40 |
+
'primary completion': 17, 'study completion': 18 , 'first posted': 19,
|
41 |
+
'last update posted': 20 , 'results first posted': 21 , 'locations':22, 'study documents': 23}
|
42 |
+
|
43 |
+
self.status_dict = {'not yet recruiting' : 'notYetRecrCB',
|
44 |
+
'recruiting' : 'recruitingCB',
|
45 |
+
'enrolling by invitation':'enrollingByInvCB',
|
46 |
+
'active, not recruiting': 'activeCB',
|
47 |
+
'suspended': 'suspendedCB',
|
48 |
+
'terminated':'terminatedCB',
|
49 |
+
'completed':'completedCB',
|
50 |
+
'withdrawn': 'withdrawnCB',
|
51 |
+
'unknown status': 'unknownCB'}
|
52 |
+
|
53 |
+
def clicking_show_hide_cols(self, driver):
|
54 |
+
columns = driver.find_element(By.XPATH,'//*[@id="theDataTable_wrapper"]/div[3]/button')
|
55 |
+
action_chain = ActionChains(driver)
|
56 |
+
action_chain.move_to_element(columns).click()
|
57 |
+
action_chain.perform()
|
58 |
+
|
59 |
+
def select_attributes_to_show(self, listed_attributes, attribute_dict):
|
60 |
+
ll = [value.lower() for value in listed_attributes if value.lower() in ['status', 'conditions', 'interventions', 'locations']]
|
61 |
+
if ll:
|
62 |
+
to_show = [value.lower() for value in listed_attributes if value.lower() not in ll]
|
63 |
+
to_hide = [value for value in ['status', 'conditions', 'interventions', 'locations'] if value not in ll]
|
64 |
+
to_click = to_hide + to_show
|
65 |
+
for att in to_click:
|
66 |
+
self.clicking_show_hide_cols(self.driver)
|
67 |
+
time.sleep(1)
|
68 |
+
self.wait.until(EC.presence_of_element_located((By.XPATH,'//*[@id="theDataTable_wrapper"]/div[3]/div[2]/button['+ str(attribute_dict[att]) + ']'))).click()
|
69 |
+
time.sleep(1)
|
70 |
+
else:
|
71 |
+
for att in listed_attributes:
|
72 |
+
self.clicking_show_hide_cols(self.driver)
|
73 |
+
time.sleep(1)
|
74 |
+
self.wait.until(EC.presence_of_element_located((By.XPATH,'//*[@id="theDataTable_wrapper"]/div[3]/div[2]/button['+ str(attribute_dict[att.lower()]) + ']'))).click()
|
75 |
+
time.sleep(1)
|
76 |
+
|
77 |
+
def select_by_status(self, listed_states, status_dict):
|
78 |
+
if listed_states:
|
79 |
+
for status in listed_states:
|
80 |
+
self.driver.find_element(By.ID,status_dict[status.lower()]).click()
|
81 |
+
|
82 |
+
self.driver.find_element(By.XPATH,'//*[@id="FiltersBody"]/div[1]/input[1]').click()
|
83 |
+
time.sleep(3)
|
84 |
+
|
85 |
+
|
86 |
+
select = Select(self.driver.find_element_by_name('theDataTable_length'))
|
87 |
+
select.select_by_value('100')
|
88 |
+
|
89 |
+
def collect_data_search_page(self,l_ordered, amount_of_data = None):
|
90 |
+
|
91 |
+
class_name = ''
|
92 |
+
page_index = 1
|
93 |
+
|
94 |
+
elements = [l_ordered]
|
95 |
+
|
96 |
+
while 'disabled' not in class_name :
|
97 |
+
|
98 |
+
|
99 |
+
|
100 |
+
time.sleep(10)
|
101 |
+
|
102 |
+
print('Getting data from page {}'.format(page_index))
|
103 |
+
|
104 |
+
#Counting how many rows of the table appear
|
105 |
+
table = self.driver.find_element(By.ID,'theDataTable')
|
106 |
+
row_count = len(table.find_elements(By.TAG_NAME,"tr"))
|
107 |
+
|
108 |
+
#Looping table page
|
109 |
+
for index in range(1, row_count):
|
110 |
+
row = []
|
111 |
+
if 'status' in l_ordered:
|
112 |
+
self.wait.until(EC.presence_of_element_located((By.CSS_SELECTOR,'#theDataTable > tbody > tr:nth-child('+str(index)+') > td:nth-child(3)')))
|
113 |
+
status_element = self.driver.find_elements(By.CLASS_NAME,'#theDataTable > tbody > tr:nth-child('+str(index)+') > td:nth-child(3) > span')
|
114 |
+
row.append(status_element.text.strip())
|
115 |
+
for i, val in enumerate(l_ordered):
|
116 |
+
if val == 'status':
|
117 |
+
continue
|
118 |
+
|
119 |
+
self.wait.until(EC.presence_of_element_located((By.CSS_SELECTOR,'#theDataTable > tbody > tr:nth-child('+str(index)+') > td:nth-child('+str(4+i)+')')))
|
120 |
+
element = self.driver.find_elements(By.CLASS_NAME,'#theDataTable > tbody > tr:nth-child('+str(index)+') > td:nth-child('+str(4+i)+')')
|
121 |
+
try:
|
122 |
+
row.append(element.text.strip())
|
123 |
+
except:
|
124 |
+
print(i, element)
|
125 |
+
else:
|
126 |
+
for i, val in enumerate(l_ordered):
|
127 |
+
self.wait.until(EC.presence_of_element_located((By.CSS_SELECTOR,'#theDataTable > tbody > tr:nth-child('+str(index)+') > td:nth-child('+str(3+i)+')')))
|
128 |
+
element = self.driver.find_elements(By.CLASS_NAME,'#theDataTable > tbody > tr:nth-child('+str(index)+') > td:nth-child('+str(3+i)+')')
|
129 |
+
try:
|
130 |
+
row.append(element.text.strip())
|
131 |
+
except:
|
132 |
+
print(i, element)
|
133 |
+
elements.append(row)
|
134 |
+
|
135 |
+
|
136 |
+
|
137 |
+
|
138 |
+
#Getting next page button
|
139 |
+
next_page= self.driver.find_element(By.ID,"theDataTable_next")
|
140 |
+
|
141 |
+
#Getting the class attribute of the next page button
|
142 |
+
class_name = next_page.get_attribute('class')
|
143 |
+
|
144 |
+
#Going to the next page
|
145 |
+
next_page.click()
|
146 |
+
page_index += 1
|
147 |
+
|
148 |
+
if amount_of_data:
|
149 |
+
if len(elements) >= amount_of_data or row_count < amount_of_data :
|
150 |
+
break
|
151 |
+
else:
|
152 |
+
continue
|
153 |
+
|
154 |
+
return elements
|
155 |
+
|
156 |
+
def get_criteria(self, NCTnumber):
|
157 |
+
|
158 |
+
url = 'https://clinicaltrials.gov/ct2/show/' + NCTnumber
|
159 |
+
ClinicalTrialpage = requests.get(url)
|
160 |
+
soup = BeautifulSoup(ClinicalTrialpage.text, 'html.parser')
|
161 |
+
|
162 |
+
wrapping_crit_class = soup.find_all("div", {"class": "tr-indent2"})
|
163 |
+
list_elements = wrapping_crit_class[1].find_all(re.compile("(ul|ol)"))
|
164 |
+
inclusion, exclusion = ('','')
|
165 |
+
|
166 |
+
|
167 |
+
if not list_elements:
|
168 |
+
print ("WARNING: Study number " + NCTnumber + " doesn't have eligibility criteria or HTML tag format is not a list")
|
169 |
+
else:
|
170 |
+
|
171 |
+
if len(list_elements) == 1:
|
172 |
+
try:
|
173 |
+
if wrapping_crit_class[1].find(text = 'Inclusion Criteria:'):
|
174 |
+
inclusion = list_elements[0].find_all("li")
|
175 |
+
|
176 |
+
elif wrapping_crit_class[1].find(text = 'Exclusion Criteria:'):
|
177 |
+
exclusion = list_elements[0].find_all("li")
|
178 |
+
except:
|
179 |
+
print('criteria doesnt exist')
|
180 |
+
else:
|
181 |
+
inclusion = list_elements[0].find_all("li")
|
182 |
+
exclusion = list_elements[1].find_all("li")
|
183 |
+
|
184 |
+
|
185 |
+
inclusion = ' '.join([t.text.strip() for t in inclusion ])
|
186 |
+
exclusion = ' '.join([t.text.strip() for t in exclusion ])
|
187 |
+
|
188 |
+
return(inclusion, exclusion)
|
189 |
+
|
190 |
+
#function that gets number of patients enrolled in a study
|
191 |
+
def get_enrollment (self, NCTnumber):
|
192 |
+
url = 'https://clinicaltrials.gov/ct2/show/' + NCTnumber
|
193 |
+
ClinicalTrialpage = requests.get(url)
|
194 |
+
soup = BeautifulSoup(ClinicalTrialpage.text, 'html.parser')
|
195 |
+
enrollment = ''
|
196 |
+
wrapping_enrol_class = soup.find_all('td', {'headers':'studyInfoColData','style':"padding-left:1em"})
|
197 |
+
if not wrapping_enrol_class:
|
198 |
+
print('WARNING: Number of Participants in Study number '+ NCTnumber +' is unavailable')
|
199 |
+
else:
|
200 |
+
enrollment = wrapping_enrol_class[1]
|
201 |
+
enrollment = enrollment.text.split()[0]
|
202 |
+
if enrollment.isdigit() == False:
|
203 |
+
print ('WARNING: Number of Participants in Study number '+ NCTnumber +' is unavailable')
|
204 |
+
else:
|
205 |
+
return(enrollment)
|
206 |
+
|
207 |
+
|
208 |
+
|
209 |
+
def __call__(self, condition, listed_attributes, listed_states, amount_of_data):
|
210 |
+
|
211 |
+
self.driver.get('https://clinicaltrials.gov/ct2/results?cond=' + condition + '&rank=1&view=record#rowId0')
|
212 |
+
self.select_attributes_to_show(listed_attributes, self.attribute_dict)
|
213 |
+
|
214 |
+
try:
|
215 |
+
self.select_by_status(listed_states, self.status_dict)
|
216 |
+
except:
|
217 |
+
print('select by status is a problem')
|
218 |
+
n = []
|
219 |
+
for i in listed_attributes:
|
220 |
+
n.append(self.attribute_dict[i.lower()])
|
221 |
+
attribute_ordered = [list(self.attribute_dict.keys())[list(self.attribute_dict.values()).index(i)]for i in sorted(n)]
|
222 |
+
|
223 |
+
search_data = self.collect_data_search_page(attribute_ordered, amount_of_data=amount_of_data)
|
224 |
+
nct_numbers = [e[search_data[0].index('nct number')] for e in search_data[1:]]
|
225 |
+
search_data[0].extend(['inclusion', 'exclusion', 'enrollment'])
|
226 |
+
for index, nct in enumerate(nct_numbers):
|
227 |
+
if index % 100 == 0 and index!= 0:
|
228 |
+
print("Collected Data from {} Studies: ".format(index))
|
229 |
+
|
230 |
+
inc, exc = self.get_criteria(nct)
|
231 |
+
enrol = self.get_enrollment(nct)
|
232 |
+
search_data[index + 1].extend([inc, exc, enrol])
|
233 |
+
return search_data
|
234 |
+
# except:
|
235 |
+
# print('no data available with the specified status')
|
236 |
+
|
237 |
+
|
PATHtoOriginaltool.ipynb
ADDED
@@ -0,0 +1,208 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"cells": [
|
3 |
+
{
|
4 |
+
"cell_type": "code",
|
5 |
+
"execution_count": 2,
|
6 |
+
"metadata": {},
|
7 |
+
"outputs": [
|
8 |
+
{
|
9 |
+
"data": {
|
10 |
+
"text/plain": [
|
11 |
+
"['/Users/satoc/miniforge3/envs/gradio/lib/python312.zip',\n",
|
12 |
+
" '/Users/satoc/miniforge3/envs/gradio/lib/python3.12',\n",
|
13 |
+
" '/Users/satoc/miniforge3/envs/gradio/lib/python3.12/lib-dynload',\n",
|
14 |
+
" '',\n",
|
15 |
+
" '/Users/satoc/.local/lib/python3.12/site-packages',\n",
|
16 |
+
" '/Users/satoc/miniforge3/envs/gradio/lib/python3.12/site-packages',\n",
|
17 |
+
" '/Users/satoc/Dropbox/programing/python/gradio']"
|
18 |
+
]
|
19 |
+
},
|
20 |
+
"execution_count": 2,
|
21 |
+
"metadata": {},
|
22 |
+
"output_type": "execute_result"
|
23 |
+
}
|
24 |
+
],
|
25 |
+
"source": [
|
26 |
+
"import sys\n",
|
27 |
+
"import os \n",
|
28 |
+
"paths =sys.path\n",
|
29 |
+
"paths"
|
30 |
+
]
|
31 |
+
},
|
32 |
+
{
|
33 |
+
"cell_type": "code",
|
34 |
+
"execution_count": 3,
|
35 |
+
"metadata": {},
|
36 |
+
"outputs": [
|
37 |
+
{
|
38 |
+
"data": {
|
39 |
+
"text/plain": [
|
40 |
+
"'/Users/satoc/Dropbox/programing/python/gradio'"
|
41 |
+
]
|
42 |
+
},
|
43 |
+
"execution_count": 3,
|
44 |
+
"metadata": {},
|
45 |
+
"output_type": "execute_result"
|
46 |
+
}
|
47 |
+
],
|
48 |
+
"source": [
|
49 |
+
"paths[-1]"
|
50 |
+
]
|
51 |
+
},
|
52 |
+
{
|
53 |
+
"cell_type": "code",
|
54 |
+
"execution_count": 4,
|
55 |
+
"metadata": {},
|
56 |
+
"outputs": [
|
57 |
+
{
|
58 |
+
"data": {
|
59 |
+
"text/plain": [
|
60 |
+
"'/Users/satoc/Dropbox/programing/python/gradio/original_tools.pth'"
|
61 |
+
]
|
62 |
+
},
|
63 |
+
"execution_count": 4,
|
64 |
+
"metadata": {},
|
65 |
+
"output_type": "execute_result"
|
66 |
+
}
|
67 |
+
],
|
68 |
+
"source": [
|
69 |
+
"fileName = paths[-1] + '/' +'original_tools.pth'\n",
|
70 |
+
"fileName"
|
71 |
+
]
|
72 |
+
},
|
73 |
+
{
|
74 |
+
"cell_type": "markdown",
|
75 |
+
"metadata": {},
|
76 |
+
"source": [
|
77 |
+
"name = ! pwd"
|
78 |
+
]
|
79 |
+
},
|
80 |
+
{
|
81 |
+
"cell_type": "code",
|
82 |
+
"execution_count": 5,
|
83 |
+
"metadata": {},
|
84 |
+
"outputs": [],
|
85 |
+
"source": [
|
86 |
+
"name = os.getcwd()"
|
87 |
+
]
|
88 |
+
},
|
89 |
+
{
|
90 |
+
"cell_type": "code",
|
91 |
+
"execution_count": 6,
|
92 |
+
"metadata": {},
|
93 |
+
"outputs": [
|
94 |
+
{
|
95 |
+
"data": {
|
96 |
+
"text/plain": [
|
97 |
+
"'/Users/satoc/Dropbox/programing/python/ClinicalTrialV2'"
|
98 |
+
]
|
99 |
+
},
|
100 |
+
"execution_count": 6,
|
101 |
+
"metadata": {},
|
102 |
+
"output_type": "execute_result"
|
103 |
+
}
|
104 |
+
],
|
105 |
+
"source": [
|
106 |
+
"name"
|
107 |
+
]
|
108 |
+
},
|
109 |
+
{
|
110 |
+
"cell_type": "code",
|
111 |
+
"execution_count": 7,
|
112 |
+
"metadata": {},
|
113 |
+
"outputs": [],
|
114 |
+
"source": [
|
115 |
+
"with open (fileName, mode='w') as f:\n",
|
116 |
+
" f.write(name)"
|
117 |
+
]
|
118 |
+
},
|
119 |
+
{
|
120 |
+
"cell_type": "code",
|
121 |
+
"execution_count": null,
|
122 |
+
"metadata": {},
|
123 |
+
"outputs": [],
|
124 |
+
"source": []
|
125 |
+
},
|
126 |
+
{
|
127 |
+
"cell_type": "code",
|
128 |
+
"execution_count": 8,
|
129 |
+
"metadata": {},
|
130 |
+
"outputs": [
|
131 |
+
{
|
132 |
+
"name": "stdout",
|
133 |
+
"output_type": "stream",
|
134 |
+
"text": [
|
135 |
+
"['/Users/satoc/miniforge3/envs/gradio/lib/python312.zip', '/Users/satoc/miniforge3/envs/gradio/lib/python3.12', '/Users/satoc/miniforge3/envs/gradio/lib/python3.12/lib-dynload', '', '/Users/satoc/.local/lib/python3.12/site-packages', '/Users/satoc/miniforge3/envs/gradio/lib/python3.12/site-packages', '/Users/satoc/Dropbox/programing/python/gradio']\n",
|
136 |
+
"/Users/satoc/Dropbox/programing/python/gradio/original_tools.pth\n",
|
137 |
+
"/Users/satoc/Dropbox/programing/python/ClinicalTrialV2\n"
|
138 |
+
]
|
139 |
+
}
|
140 |
+
],
|
141 |
+
"source": [
|
142 |
+
"import sys\n",
|
143 |
+
"import os \n",
|
144 |
+
"paths =sys.path\n",
|
145 |
+
"print(paths)\n",
|
146 |
+
"fileName = paths[-1] + '/' +'original_tools.pth'\n",
|
147 |
+
"print(fileName)\n",
|
148 |
+
"cwd = os.getcwd()\n",
|
149 |
+
"print(cwd)\n",
|
150 |
+
"with open (fileName, mode='w') as f:\n",
|
151 |
+
" f.write(cwd)"
|
152 |
+
]
|
153 |
+
},
|
154 |
+
{
|
155 |
+
"cell_type": "code",
|
156 |
+
"execution_count": 9,
|
157 |
+
"metadata": {},
|
158 |
+
"outputs": [],
|
159 |
+
"source": [
|
160 |
+
"cwd = os.getcwd()"
|
161 |
+
]
|
162 |
+
},
|
163 |
+
{
|
164 |
+
"cell_type": "code",
|
165 |
+
"execution_count": 10,
|
166 |
+
"metadata": {},
|
167 |
+
"outputs": [],
|
168 |
+
"source": [
|
169 |
+
"from pytrials.client import ClinicalTrials "
|
170 |
+
]
|
171 |
+
},
|
172 |
+
{
|
173 |
+
"cell_type": "code",
|
174 |
+
"execution_count": null,
|
175 |
+
"metadata": {},
|
176 |
+
"outputs": [],
|
177 |
+
"source": []
|
178 |
+
},
|
179 |
+
{
|
180 |
+
"cell_type": "code",
|
181 |
+
"execution_count": null,
|
182 |
+
"metadata": {},
|
183 |
+
"outputs": [],
|
184 |
+
"source": []
|
185 |
+
}
|
186 |
+
],
|
187 |
+
"metadata": {
|
188 |
+
"kernelspec": {
|
189 |
+
"display_name": "Python 3 (ipykernel)",
|
190 |
+
"language": "python",
|
191 |
+
"name": "python3"
|
192 |
+
},
|
193 |
+
"language_info": {
|
194 |
+
"codemirror_mode": {
|
195 |
+
"name": "ipython",
|
196 |
+
"version": 3
|
197 |
+
},
|
198 |
+
"file_extension": ".py",
|
199 |
+
"mimetype": "text/x-python",
|
200 |
+
"name": "python",
|
201 |
+
"nbconvert_exporter": "python",
|
202 |
+
"pygments_lexer": "ipython3",
|
203 |
+
"version": "3.12.3"
|
204 |
+
}
|
205 |
+
},
|
206 |
+
"nbformat": 4,
|
207 |
+
"nbformat_minor": 4
|
208 |
+
}
|
app.py
ADDED
@@ -0,0 +1,100 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import gradio as gr
|
2 |
+
import pandas as pd
|
3 |
+
from OpenAITools.FetchTools import fetch_clinical_trials, fetch_clinical_trials_jp
|
4 |
+
from langchain_openai import ChatOpenAI
|
5 |
+
from langchain_groq import ChatGroq
|
6 |
+
from OpenAITools.CrinicalTrialTools import QuestionModifierEnglish, TumorNameExtractor, SimpleClinicalTrialAgent, GraderAgent
|
7 |
+
|
8 |
+
# モデルとエージェントの初期化
|
9 |
+
groq = ChatGroq(model_name="llama3-70b-8192", temperature=0)
|
10 |
+
modifier = QuestionModifierEnglish(groq)
|
11 |
+
extractor = TumorNameExtractor(groq)
|
12 |
+
CriteriaCheckAgent = SimpleClinicalTrialAgent(groq)
|
13 |
+
grader_agent = GraderAgent(groq)
|
14 |
+
|
15 |
+
# データフレームを生成する関数
|
16 |
+
def generate_dataframe_from_question(ex_question):
|
17 |
+
# Modify and extract tumor name
|
18 |
+
modified_question = modifier.modify_question(ex_question)
|
19 |
+
tumor_name = extractor.extract_tumor_name(ex_question)
|
20 |
+
|
21 |
+
# Get clinical trials data based on tumor name
|
22 |
+
df = fetch_clinical_trials(tumor_name)
|
23 |
+
df['AgentJudgment'] = None
|
24 |
+
df['AgentGrade'] = None
|
25 |
+
|
26 |
+
# NCTIDのリストを作成し、プログレスバーを表示
|
27 |
+
NCTIDs = list(df['NCTID'])
|
28 |
+
progress = gr.Progress(track_tqdm=True)
|
29 |
+
|
30 |
+
for i, nct_id in enumerate(NCTIDs):
|
31 |
+
target_criteria = df.loc[df['NCTID'] == nct_id, 'Eligibility Criteria'].values[0]
|
32 |
+
agent_judgment = CriteriaCheckAgent.evaluate_eligibility(target_criteria, modified_question)
|
33 |
+
agent_grade = grader_agent.evaluate_eligibility(agent_judgment)
|
34 |
+
|
35 |
+
# Update DataFrame
|
36 |
+
df.loc[df['NCTID'] == nct_id, 'AgentJudgment'] = agent_judgment
|
37 |
+
df.loc[df['NCTID'] == nct_id, 'AgentGrade'] = agent_grade
|
38 |
+
|
39 |
+
# プログレスバーを更新(進行状況を浮動小数点数で渡す)
|
40 |
+
progress((i + 1) / len(NCTIDs))
|
41 |
+
|
42 |
+
# 列を指定した順に並び替え
|
43 |
+
columns_order = ['NCTID', 'AgentGrade', 'Title', 'AgentJudgment', 'Japanes Locations',
|
44 |
+
'Primary Completion Date', 'Cancer', 'Summary', 'Eligibility Criteria']
|
45 |
+
df = df[columns_order]
|
46 |
+
|
47 |
+
return df, df # フィルタ用と表示用にデータフレームを返す
|
48 |
+
|
49 |
+
# AgentGradeが特定の値(yes, no, unclear)の行だけを選択する関数
|
50 |
+
def filter_rows_by_grade(original_df, grade):
|
51 |
+
df_filtered = original_df[original_df['AgentGrade'] == grade]
|
52 |
+
return df_filtered, df_filtered # フィルタした結果を2つ返す
|
53 |
+
|
54 |
+
# CSVとして保存しダウンロードする関数
|
55 |
+
def download_filtered_csv(df):
|
56 |
+
file_path = "filtered_data.csv" # 現在の作業ディレクトリに保存
|
57 |
+
df.to_csv(file_path, index=False) # CSVファイルとして保存
|
58 |
+
return file_path
|
59 |
+
|
60 |
+
# Gradioインターフェースの作成
|
61 |
+
with gr.Blocks() as demo:
|
62 |
+
# 説明
|
63 |
+
gr.Markdown("## 質問を入力して、患者さんが参加可能な臨床治験の情報を収集。参加可能か否かを判断根拠も含めて提示します。結果はcsvとしてダウンロード可能です")
|
64 |
+
|
65 |
+
# 質問入力ボックス
|
66 |
+
question_input = gr.Textbox(label="質問を入力してください", placeholder="例: 65歳男性でBRCA遺伝子の変異がある前立腺癌患者さんが参加できる臨床治験を教えて下さい。")
|
67 |
+
|
68 |
+
# データフレーム表示エリア
|
69 |
+
dataframe_output = gr.DataFrame()
|
70 |
+
|
71 |
+
# データの元となるDataFrameを保存するためのstate
|
72 |
+
original_df = gr.State()
|
73 |
+
filtered_df = gr.State()
|
74 |
+
|
75 |
+
# データフレームを生成するボタン
|
76 |
+
generate_button = gr.Button("日本で行われている患者さんの癌腫の臨床治験を全て取得する")
|
77 |
+
|
78 |
+
# ボタンでAgentGradeがyes, no, unclearの行のみ表示
|
79 |
+
yes_button = gr.Button("AI Agentが患者さんが参加可能であると判断した臨床治験のみを表示")
|
80 |
+
no_button = gr.Button("I Agentが患者さんが参加不可であると判断した臨床治験のみを表示")
|
81 |
+
unclear_button = gr.Button("AI Agentが与えられた情報だけでは判断不可能とした臨床治験のみを表示")
|
82 |
+
|
83 |
+
# フィルタ結果をダウンロードするボタン
|
84 |
+
download_button = gr.Button("フィルタ結果をCSVとしてダウンロード")
|
85 |
+
download_output = gr.File() # ダウンロード用の出力エリア
|
86 |
+
|
87 |
+
# データフレームを生成して保存
|
88 |
+
generate_button.click(fn=generate_dataframe_from_question, inputs=question_input, outputs=[dataframe_output, original_df])
|
89 |
+
|
90 |
+
# yesボタン、noボタン、unclearボタンが押されたらフィルタしたデータを表示
|
91 |
+
yes_button.click(fn=filter_rows_by_grade, inputs=[original_df, gr.State("yes")], outputs=[dataframe_output, filtered_df])
|
92 |
+
no_button.click(fn=filter_rows_by_grade, inputs=[original_df, gr.State("no")], outputs=[dataframe_output, filtered_df])
|
93 |
+
unclear_button.click(fn=filter_rows_by_grade, inputs=[original_df, gr.State("unclear")], outputs=[dataframe_output, filtered_df])
|
94 |
+
|
95 |
+
# ダウンロードボタンを押すとフィルタ結果のCSV��ダウンロード
|
96 |
+
download_button.click(fn=download_filtered_csv, inputs=filtered_df, outputs=download_output)
|
97 |
+
|
98 |
+
|
99 |
+
if __name__ == "__main__":
|
100 |
+
demo.launch()
|
dev/ClinicalTrialApp.ipynb
ADDED
@@ -0,0 +1,169 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"cells": [
|
3 |
+
{
|
4 |
+
"cell_type": "code",
|
5 |
+
"execution_count": 3,
|
6 |
+
"metadata": {},
|
7 |
+
"outputs": [
|
8 |
+
{
|
9 |
+
"name": "stdout",
|
10 |
+
"output_type": "stream",
|
11 |
+
"text": [
|
12 |
+
"* Running on local URL: http://127.0.0.1:7862\n",
|
13 |
+
"\n",
|
14 |
+
"To create a public link, set `share=True` in `launch()`.\n"
|
15 |
+
]
|
16 |
+
},
|
17 |
+
{
|
18 |
+
"data": {
|
19 |
+
"text/html": [
|
20 |
+
"<div><iframe src=\"http://127.0.0.1:7862/\" width=\"100%\" height=\"500\" allow=\"autoplay; camera; microphone; clipboard-read; clipboard-write;\" frameborder=\"0\" allowfullscreen></iframe></div>"
|
21 |
+
],
|
22 |
+
"text/plain": [
|
23 |
+
"<IPython.core.display.HTML object>"
|
24 |
+
]
|
25 |
+
},
|
26 |
+
"metadata": {},
|
27 |
+
"output_type": "display_data"
|
28 |
+
},
|
29 |
+
{
|
30 |
+
"data": {
|
31 |
+
"text/plain": []
|
32 |
+
},
|
33 |
+
"execution_count": 3,
|
34 |
+
"metadata": {},
|
35 |
+
"output_type": "execute_result"
|
36 |
+
}
|
37 |
+
],
|
38 |
+
"source": [
|
39 |
+
"import gradio as gr\n",
|
40 |
+
"import pandas as pd\n",
|
41 |
+
"from OpenAITools.FetchTools import fetch_clinical_trials, fetch_clinical_trials_jp\n",
|
42 |
+
"from langchain_openai import ChatOpenAI\n",
|
43 |
+
"from langchain_groq import ChatGroq\n",
|
44 |
+
"from OpenAITools.CrinicalTrialTools import QuestionModifierEnglish, TumorNameExtractor, SimpleClinicalTrialAgent, GraderAgent\n",
|
45 |
+
"\n",
|
46 |
+
"# モデルとエージェントの初期化\n",
|
47 |
+
"groq = ChatGroq(model_name=\"llama3-70b-8192\", temperature=0)\n",
|
48 |
+
"modifier = QuestionModifierEnglish(groq)\n",
|
49 |
+
"extractor = TumorNameExtractor(groq)\n",
|
50 |
+
"CriteriaCheckAgent = SimpleClinicalTrialAgent(groq)\n",
|
51 |
+
"grader_agent = GraderAgent(groq)\n",
|
52 |
+
"\n",
|
53 |
+
"# データフレームを生成する関数\n",
|
54 |
+
"def generate_dataframe_from_question(ex_question):\n",
|
55 |
+
" # Modify and extract tumor name\n",
|
56 |
+
" modified_question = modifier.modify_question(ex_question)\n",
|
57 |
+
" tumor_name = extractor.extract_tumor_name(ex_question)\n",
|
58 |
+
"\n",
|
59 |
+
" # Get clinical trials data based on tumor name\n",
|
60 |
+
" df = fetch_clinical_trials(tumor_name)\n",
|
61 |
+
" df['AgentJudgment'] = None\n",
|
62 |
+
" df['AgentGrade'] = None\n",
|
63 |
+
" \n",
|
64 |
+
" # NCTIDのリストを作成し、プログレスバーを表示\n",
|
65 |
+
" NCTIDs = list(df['NCTID'])\n",
|
66 |
+
" progress = gr.Progress(track_tqdm=True)\n",
|
67 |
+
" \n",
|
68 |
+
" for i, nct_id in enumerate(NCTIDs):\n",
|
69 |
+
" target_criteria = df.loc[df['NCTID'] == nct_id, 'Eligibility Criteria'].values[0]\n",
|
70 |
+
" agent_judgment = CriteriaCheckAgent.evaluate_eligibility(target_criteria, modified_question)\n",
|
71 |
+
" agent_grade = grader_agent.evaluate_eligibility(agent_judgment)\n",
|
72 |
+
" \n",
|
73 |
+
" # Update DataFrame\n",
|
74 |
+
" df.loc[df['NCTID'] == nct_id, 'AgentJudgment'] = agent_judgment\n",
|
75 |
+
" df.loc[df['NCTID'] == nct_id, 'AgentGrade'] = agent_grade\n",
|
76 |
+
" \n",
|
77 |
+
" # プログレスバーを更新(進行状況を浮動小数点数で渡す)\n",
|
78 |
+
" progress((i + 1) / len(NCTIDs))\n",
|
79 |
+
" \n",
|
80 |
+
" # 列を指定した順に並び替え\n",
|
81 |
+
" columns_order = ['NCTID', 'AgentGrade', 'Title', 'AgentJudgment', 'Japanes Locations', \n",
|
82 |
+
" 'Primary Completion Date', 'Cancer', 'Summary', 'Eligibility Criteria']\n",
|
83 |
+
" df = df[columns_order]\n",
|
84 |
+
"\n",
|
85 |
+
" return df, df # フィルタ用と表示用にデータフレームを返す\n",
|
86 |
+
"\n",
|
87 |
+
"# AgentGradeが特定の値(yes, no, unclear)の行だけを選択する関数\n",
|
88 |
+
"def filter_rows_by_grade(original_df, grade):\n",
|
89 |
+
" df_filtered = original_df[original_df['AgentGrade'] == grade]\n",
|
90 |
+
" return df_filtered, df_filtered # フィルタした結果を2つ返す\n",
|
91 |
+
"\n",
|
92 |
+
"# CSVとして保存しダウンロードする関数\n",
|
93 |
+
"def download_filtered_csv(df):\n",
|
94 |
+
" file_path = \"filtered_data.csv\" # 現在の作業ディレクトリに保存\n",
|
95 |
+
" df.to_csv(file_path, index=False) # CSVファイルとして保存\n",
|
96 |
+
" return file_path\n",
|
97 |
+
"\n",
|
98 |
+
"# Gradioインターフェースの作成\n",
|
99 |
+
"with gr.Blocks() as demo:\n",
|
100 |
+
" # 説明\n",
|
101 |
+
" gr.Markdown(\"## 質問を入力して、患者さんが参加可能な臨床治験の情報を収集。参加可能か否かを判断根拠も含めて提示します。結果はcsvとしてダウンロード可能です\")\n",
|
102 |
+
" \n",
|
103 |
+
" # 質問入力ボックス\n",
|
104 |
+
" question_input = gr.Textbox(label=\"質問を入力してください\", placeholder=\"例: 65歳男性でBRCA遺伝子の変異がある前立腺癌患者さんが参加できる臨床治験を教えて下さい。\")\n",
|
105 |
+
"\n",
|
106 |
+
" # データフレーム表示エリア\n",
|
107 |
+
" dataframe_output = gr.DataFrame()\n",
|
108 |
+
" \n",
|
109 |
+
" # データの元となるDataFrameを保存するためのstate\n",
|
110 |
+
" original_df = gr.State()\n",
|
111 |
+
" filtered_df = gr.State()\n",
|
112 |
+
"\n",
|
113 |
+
" # データフレームを生成するボタン\n",
|
114 |
+
" generate_button = gr.Button(\"日本で行われている患者さんの癌腫の臨床治験を全て取得する\")\n",
|
115 |
+
"\n",
|
116 |
+
" # ボタンでAgentGradeがyes, no, unclearの行のみ表示\n",
|
117 |
+
" yes_button = gr.Button(\"AI Agentが患者さんが参加可能であると判断した臨床治験のみを表示\")\n",
|
118 |
+
" no_button = gr.Button(\"I Agentが患者さんが参加不可であると判断した臨床治験のみを表示\")\n",
|
119 |
+
" unclear_button = gr.Button(\"AI Agentが与えられた情報だけでは判断不可能とした臨床治験のみを表示\")\n",
|
120 |
+
" \n",
|
121 |
+
" # フィルタ結果をダウンロードするボタン\n",
|
122 |
+
" download_button = gr.Button(\"フィルタ結果をCSVとしてダウンロード\")\n",
|
123 |
+
" download_output = gr.File() # ダウンロード用の出力エリア\n",
|
124 |
+
"\n",
|
125 |
+
" # データフレームを生成して保存\n",
|
126 |
+
" generate_button.click(fn=generate_dataframe_from_question, inputs=question_input, outputs=[dataframe_output, original_df])\n",
|
127 |
+
"\n",
|
128 |
+
" # yesボタン、noボタン、unclearボタンが押されたらフィルタしたデータを表示\n",
|
129 |
+
" yes_button.click(fn=filter_rows_by_grade, inputs=[original_df, gr.State(\"yes\")], outputs=[dataframe_output, filtered_df])\n",
|
130 |
+
" no_button.click(fn=filter_rows_by_grade, inputs=[original_df, gr.State(\"no\")], outputs=[dataframe_output, filtered_df])\n",
|
131 |
+
" unclear_button.click(fn=filter_rows_by_grade, inputs=[original_df, gr.State(\"unclear\")], outputs=[dataframe_output, filtered_df])\n",
|
132 |
+
"\n",
|
133 |
+
" # ダウンロードボタンを押すとフィルタ結果のCSVをダウンロード\n",
|
134 |
+
" download_button.click(fn=download_filtered_csv, inputs=filtered_df, outputs=download_output)\n",
|
135 |
+
"\n",
|
136 |
+
"# Gradioインターフェースの起動\n",
|
137 |
+
"demo.launch()"
|
138 |
+
]
|
139 |
+
},
|
140 |
+
{
|
141 |
+
"cell_type": "code",
|
142 |
+
"execution_count": null,
|
143 |
+
"metadata": {},
|
144 |
+
"outputs": [],
|
145 |
+
"source": []
|
146 |
+
}
|
147 |
+
],
|
148 |
+
"metadata": {
|
149 |
+
"kernelspec": {
|
150 |
+
"display_name": "gradio",
|
151 |
+
"language": "python",
|
152 |
+
"name": "python3"
|
153 |
+
},
|
154 |
+
"language_info": {
|
155 |
+
"codemirror_mode": {
|
156 |
+
"name": "ipython",
|
157 |
+
"version": 3
|
158 |
+
},
|
159 |
+
"file_extension": ".py",
|
160 |
+
"mimetype": "text/x-python",
|
161 |
+
"name": "python",
|
162 |
+
"nbconvert_exporter": "python",
|
163 |
+
"pygments_lexer": "ipython3",
|
164 |
+
"version": "3.12.3"
|
165 |
+
}
|
166 |
+
},
|
167 |
+
"nbformat": 4,
|
168 |
+
"nbformat_minor": 2
|
169 |
+
}
|
dev/filtered_data.csv
ADDED
@@ -0,0 +1,59 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
NCTID,AgentGrade,Title,AgentJudgment,Japanes Locations,Primary Completion Date,Cancer,Summary,Eligibility Criteria
|
2 |
+
NCT03423628,yes,A Study to Assess the Safety and Tolerability of AZD1390 Given With Radiation Therapy in Patients With Brain Cancer,"Based on the provided criteria, the 30-year-old male patient with glioblastoma is eligible for this clinical trial, specifically for Arm A or Arm C, as he meets the inclusion criteria for histologically proven diagnosis of GBM and Karnofsky Performance Score of ≥60.","ku, shi",2026-04-22,"Recurrent Glioblastoma Multiforme, Primary Glioblastoma Multiforme, Brain Neoplasms, Malignant, Leptomeningeal Disease (LMD)","This study will test an investigational drug called AZD1390 in combination with radiation therapy for the treatment of brain tumors. This is the first time AZD1390 is being given to patients. This study will test safety, tolerability and PK (how the drug is absorbed, distributed and eliminated) of ascending doses of AZD1390 in combination with distinct regimens of radiation therapy","Inclusion Criteria:
|
3 |
+
|
4 |
+
* Provision of formalin-fixed paraffin embedded tissue sample from primary or metastatic disease
|
5 |
+
* Karnofsky Performance Score of ≥60.
|
6 |
+
* Additional Inclusion Criteria Specific for Arm A and Japan:
|
7 |
+
|
8 |
+
* Histologically proven diagnosis of GBM. Patients who have had RT for low-grade glioma (LGG) or grade 3 glioma and have subsequently relapsed to histologically confirmed GBM can be considered
|
9 |
+
* A radiological diagnosis of recurrent/relapsed or progressive disease according to RANO criteria.
|
10 |
+
* Completion of first-line radiation at least 6 months prior to Cycle 1 Day 1.
|
11 |
+
* Patients with tumor-induced seizures must be well controlled on a stable anti-epileptic treatment
|
12 |
+
* Willing to receive anti-epileptic prophylaxis for the duration of study drug administration.
|
13 |
+
* Additional Inclusion Criteria Specific for Arm B:
|
14 |
+
|
15 |
+
\*\*Arm B has now closed to recruitment\*\*
|
16 |
+
|
17 |
+
* Histologically proven diagnosis of solid tumor malignancy and Magnetic Resonance (MR) imaging documenting brain lesions.
|
18 |
+
* Not eligible for Stereotactic Radiosurgery (SRS) treatment of brain tumor.
|
19 |
+
* Patient has not received any previous brain RT to the area that is to be irradiated. Prior PBRT may be allowed if there is not significant overlap between the prior and new radiation fields.
|
20 |
+
* Non-CNS malignant disease must be sufficiently controlled so that patients can be without additional systemic therapy for the required washout period before starting therapy until 5 days after the end of RT. Required washout period before starting the first dose of AZD1390 (Cycle 1) is 28 days for immune checkpoint inhibitors and 7 days for all other agents
|
21 |
+
* Not received radiation to the lung fields within the past 8 weeks.
|
22 |
+
* No history of seizures related to the brain metastases or LMD.
|
23 |
+
* Receiving PBRT (rather than WBRT) during Cycle 1 as standard of care for brain metastases
|
24 |
+
|
25 |
+
• Additional Inclusion Criteria Specific for Arm C:
|
26 |
+
* Histologically proven primary diagnosis of GBM with unmethylated O6-methylguanine-DNA methyltransferase (MGMT). Grade 4 astrocytoma or histology with molecular features of GBM can be considered.
|
27 |
+
* Determination of MGMT promoter status by methylation-specific polymerase chain reaction (PCR) or pyrosequencing per local institutional guidelines is required to assess eligibility for this Arm.
|
28 |
+
* Patients will have to undergo mutational testing for Isocitrate dehydrogenase 1 (IDH1) on a tumor specimen before entering study. Patients are eligible for Arm C regardless of their IDH1 mutational status.
|
29 |
+
* No history of uncontrolled seizures after surgery for primary GBM (despite adequate antiepileptic therapy) or with need for concurrent administration of more than 2 antiepileptic drugs.
|
30 |
+
* Willing to receive anti-epileptic prophylaxis for the duration of study drug administration
|
31 |
+
|
32 |
+
Additional Inclusion criteria for Food Effect Assessment (Arm A) (Not applicable for Japan part):
|
33 |
+
|
34 |
+
* For the fed assessment portion: fast overnight (for at least 10 hours) prior to consuming a high-fat meal consisting of approximately 800 to 1000 calories, with around 54% of the calories coming from fat.
|
35 |
+
* For the fasted assessment portion: fast overnight (for at least 10 hours prior to dosing) and until 4 hours after dosing.
|
36 |
+
|
37 |
+
\*Note: the optional food effect assessment is currently not open to enrolment\*
|
38 |
+
|
39 |
+
Exclusion Criteria:
|
40 |
+
|
41 |
+
* Administration of chemotherapy or any investigational drug in the 28 days or carmustine (CCNU) or lomustine (BCNU) in the 6 weeks prior to receiving the first dose of treatment in Arms A and C. Administration of checkpoint inhibitors within 28 days prior to first dose of treatment and any other agent within 7 days of beginning study treatment in Arm B. Hormonal therapies are allowed during study treatment for patients in Arm B.
|
42 |
+
* History of severe brain-injury or stroke.
|
43 |
+
* Patient not eligible for sequential MRI evaluations are not eligible for this study.
|
44 |
+
* History of epileptic disorder or any seizure history unrelated to tumor
|
45 |
+
* Treatment with Strong inhibitors or inducers of CYP3A4 within 2 weeks prior to receiving study drug
|
46 |
+
* Concurrent therapy with other seizurogenic medications.
|
47 |
+
* Past medical history of interstitial lung disease (ILD), drug-induced ILD, radiation pneumonitis which required steroid treatment, or any evidence of clinically active ILD.
|
48 |
+
* Concurrent severe and/or uncontrolled medical condition (e.g., severe COPD).
|
49 |
+
* Prior treatment with pneumotoxic drugs, e.g. busulfan, bleomycin, within the past year. If prior therapy in lifetime, then excluded if history of pulmonary toxicities from administration. Patients who have received treatment with nitrosoureas (e.g., carmustine, lomustine) in the year before study entry without experiencing lung toxicity are allowed on study.
|
50 |
+
* History or presence of myopathy or raised creatine kinase (CK) \>5 x upper limit of normal (ULN) on 2 occasions at screening.
|
51 |
+
* Cardiac dysfunction defined as: Myocardial infarction within six months of study entry, NYHA (New York Heart Association) Class II/III/IV heart failure, unstable angina, unstable cardiac arrhythmias
|
52 |
+
* Evidence of severe pulmonary infections, as judged by the investigator (For Japan part only this includes active infection including tuberculosis, chronic active or uncontrolled Hep B or Hep C)
|
53 |
+
* With the exception of alopecia, any unresolved toxicities from prior therapy greater than National Cancer Institute Common Terminology Criteria for Adverse Events (NCI CTCAE 4.03) Grade 1 at the time of starting study treatment and patients with chronic Grade 2 unresolved toxicities may be eligible Additional exclusion criteria for Arm A and Japan Part
|
54 |
+
* Has previously received ATM inhibitor with concurrent RT
|
55 |
+
|
56 |
+
Additional Exclusion criteria for Food Effect Assessment (Arm A) (Not applicable for the Japan Part):
|
57 |
+
|
58 |
+
* Diabetes Type I, Type II, or steroid-induced diabetes.
|
59 |
+
* Undergoing systemic steroid treatment \*Note: the optional food effect assessment is currently not open to enrolment\*"
|
environment.yml
ADDED
@@ -0,0 +1,487 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
name: gradio
|
2 |
+
channels:
|
3 |
+
- conda-forge
|
4 |
+
dependencies:
|
5 |
+
- aiofiles=23.2.1
|
6 |
+
- altair=5.3.0
|
7 |
+
- annotated-types=0.7.0
|
8 |
+
- anyio=4.3.0
|
9 |
+
- aom=3.9.0
|
10 |
+
- appnope=0.1.4
|
11 |
+
- argon2-cffi=23.1.0
|
12 |
+
- argon2-cffi-bindings=21.2.0
|
13 |
+
- arrow=1.3.0
|
14 |
+
- asttokens=2.4.1
|
15 |
+
- async-lru=2.0.4
|
16 |
+
- attrs=23.2.0
|
17 |
+
- babel=2.14.0
|
18 |
+
- beautifulsoup4=4.12.3
|
19 |
+
- bleach=6.1.0
|
20 |
+
- blinker=1.8.2
|
21 |
+
- brotli=1.1.0
|
22 |
+
- brotli-bin=1.1.0
|
23 |
+
- brotli-python=1.1.0
|
24 |
+
- bzip2=1.0.8
|
25 |
+
- ca-certificates=2024.8.30
|
26 |
+
- cached-property=1.5.2
|
27 |
+
- cached_property=1.5.2
|
28 |
+
- cairo=1.18.0
|
29 |
+
- certifi=2024.8.30
|
30 |
+
- cffi=1.16.0
|
31 |
+
- charset-normalizer=3.3.2
|
32 |
+
- click=8.1.7
|
33 |
+
- colorama=0.4.6
|
34 |
+
- comm=0.2.2
|
35 |
+
- contourpy=1.2.1
|
36 |
+
- cycler=0.12.1
|
37 |
+
- dav1d=1.2.1
|
38 |
+
- debugpy=1.8.1
|
39 |
+
- decorator=5.1.1
|
40 |
+
- defusedxml=0.7.1
|
41 |
+
- dnspython=2.6.1
|
42 |
+
- email-validator=2.1.1
|
43 |
+
- email_validator=2.1.1
|
44 |
+
- entrypoints=0.4
|
45 |
+
- exceptiongroup=1.2.0
|
46 |
+
- executing=2.0.1
|
47 |
+
- expat=2.6.2
|
48 |
+
- fastapi=0.111.0
|
49 |
+
- fastapi-cli=0.0.4
|
50 |
+
- ffmpeg=7.0.1
|
51 |
+
- ffmpy=0.3.0
|
52 |
+
- filelock=3.14.0
|
53 |
+
- flask=3.0.3
|
54 |
+
- font-ttf-dejavu-sans-mono=2.37
|
55 |
+
- font-ttf-inconsolata=3.000
|
56 |
+
- font-ttf-source-code-pro=2.038
|
57 |
+
- font-ttf-ubuntu=0.83
|
58 |
+
- fontconfig=2.14.2
|
59 |
+
- fonts-conda-ecosystem=1
|
60 |
+
- fonts-conda-forge=1
|
61 |
+
- fonttools=4.53.0
|
62 |
+
- fqdn=1.5.1
|
63 |
+
- freetype=2.12.1
|
64 |
+
- fribidi=1.0.10
|
65 |
+
- fsspec=2024.6.0
|
66 |
+
- gettext=0.22.5
|
67 |
+
- gettext-tools=0.22.5
|
68 |
+
- gmp=6.3.0
|
69 |
+
- gnutls=3.7.9
|
70 |
+
- gradio=4.33.0
|
71 |
+
- gradio-client=0.17.0
|
72 |
+
- graphite2=1.3.13
|
73 |
+
- h11=0.14.0
|
74 |
+
- h2=4.1.0
|
75 |
+
- harfbuzz=8.5.0
|
76 |
+
- hpack=4.0.0
|
77 |
+
- httpcore=1.0.5
|
78 |
+
- httpx=0.27.0
|
79 |
+
- huggingface_hub=0.23.2
|
80 |
+
- hyperframe=6.0.1
|
81 |
+
- icu=73.2
|
82 |
+
- idna=3.7
|
83 |
+
- importlib-metadata=7.1.0
|
84 |
+
- importlib-resources=6.4.0
|
85 |
+
- importlib_metadata=7.1.0
|
86 |
+
- importlib_resources=6.4.0
|
87 |
+
- ipykernel=6.29.3
|
88 |
+
- ipython=8.25.0
|
89 |
+
- ipywidgets=8.1.3
|
90 |
+
- isoduration=20.11.0
|
91 |
+
- itsdangerous=2.2.0
|
92 |
+
- jedi=0.19.1
|
93 |
+
- jinja2=3.1.4
|
94 |
+
- joblib=1.4.2
|
95 |
+
- json5=0.9.25
|
96 |
+
- jsonpointer=2.4
|
97 |
+
- jsonschema=4.22.0
|
98 |
+
- jsonschema-specifications=2023.12.1
|
99 |
+
- jsonschema-with-format-nongpl=4.22.0
|
100 |
+
- jupyter=1.1.1
|
101 |
+
- jupyter-lsp=2.2.5
|
102 |
+
- jupyter_client=8.6.2
|
103 |
+
- jupyter_console=6.6.3
|
104 |
+
- jupyter_core=5.7.2
|
105 |
+
- jupyter_events=0.10.0
|
106 |
+
- jupyter_server=2.14.1
|
107 |
+
- jupyter_server_terminals=0.5.3
|
108 |
+
- jupyterlab=4.2.5
|
109 |
+
- jupyterlab_pygments=0.3.0
|
110 |
+
- jupyterlab_server=2.27.2
|
111 |
+
- jupyterlab_widgets=3.0.11
|
112 |
+
- kiwisolver=1.4.5
|
113 |
+
- krb5=1.21.2
|
114 |
+
- lame=3.100
|
115 |
+
- lcms2=2.16
|
116 |
+
- lerc=4.0.0
|
117 |
+
- libabseil=20240116.2
|
118 |
+
- libasprintf=0.22.5
|
119 |
+
- libasprintf-devel=0.22.5
|
120 |
+
- libass=0.17.1
|
121 |
+
- libblas=3.9.0
|
122 |
+
- libbrotlicommon=1.1.0
|
123 |
+
- libbrotlidec=1.1.0
|
124 |
+
- libbrotlienc=1.1.0
|
125 |
+
- libcblas=3.9.0
|
126 |
+
- libcxx=17.0.6
|
127 |
+
- libdeflate=1.20
|
128 |
+
- libedit=3.1.20191231
|
129 |
+
- libexpat=2.6.2
|
130 |
+
- libffi=3.4.2
|
131 |
+
- libgettextpo=0.22.5
|
132 |
+
- libgettextpo-devel=0.22.5
|
133 |
+
- libgfortran=5.0.0
|
134 |
+
- libgfortran5=13.2.0
|
135 |
+
- libglib=2.80.2
|
136 |
+
- libhwloc=2.10.0
|
137 |
+
- libiconv=1.17
|
138 |
+
- libidn2=2.3.7
|
139 |
+
- libintl=0.22.5
|
140 |
+
- libintl-devel=0.22.5
|
141 |
+
- libjpeg-turbo=3.0.0
|
142 |
+
- liblapack=3.9.0
|
143 |
+
- libopenblas=0.3.27
|
144 |
+
- libopenvino=2024.1.0
|
145 |
+
- libopenvino-arm-cpu-plugin=2024.1.0
|
146 |
+
- libopenvino-auto-batch-plugin=2024.1.0
|
147 |
+
- libopenvino-auto-plugin=2024.1.0
|
148 |
+
- libopenvino-hetero-plugin=2024.1.0
|
149 |
+
- libopenvino-ir-frontend=2024.1.0
|
150 |
+
- libopenvino-onnx-frontend=2024.1.0
|
151 |
+
- libopenvino-paddle-frontend=2024.1.0
|
152 |
+
- libopenvino-pytorch-frontend=2024.1.0
|
153 |
+
- libopenvino-tensorflow-frontend=2024.1.0
|
154 |
+
- libopenvino-tensorflow-lite-frontend=2024.1.0
|
155 |
+
- libopus=1.3.1
|
156 |
+
- libpng=1.6.43
|
157 |
+
- libprotobuf=4.25.3
|
158 |
+
- libsodium=1.0.18
|
159 |
+
- libsqlite=3.45.3
|
160 |
+
- libtasn1=4.19.0
|
161 |
+
- libtiff=4.6.0
|
162 |
+
- libunistring=0.9.10
|
163 |
+
- libvpx=1.14.0
|
164 |
+
- libwebp-base=1.4.0
|
165 |
+
- libxcb=1.15
|
166 |
+
- libxml2=2.12.7
|
167 |
+
- libzlib=1.3.1
|
168 |
+
- llvm-openmp=18.1.6
|
169 |
+
- markdown-it-py=3.0.0
|
170 |
+
- markupsafe=2.1.5
|
171 |
+
- matplotlib=3.8.4
|
172 |
+
- matplotlib-base=3.8.4
|
173 |
+
- matplotlib-inline=0.1.7
|
174 |
+
- mdurl=0.1.2
|
175 |
+
- mistune=3.0.2
|
176 |
+
- munkres=1.1.4
|
177 |
+
- natsort=8.4.0
|
178 |
+
- nbclient=0.10.0
|
179 |
+
- nbconvert=7.16.4
|
180 |
+
- nbconvert-core=7.16.4
|
181 |
+
- nbconvert-pandoc=7.16.4
|
182 |
+
- nbformat=5.10.4
|
183 |
+
- ncurses=6.5
|
184 |
+
- nest-asyncio=1.6.0
|
185 |
+
- nettle=3.9.1
|
186 |
+
- notebook=7.2.0
|
187 |
+
- notebook-shim=0.2.4
|
188 |
+
- numpy=1.26.4
|
189 |
+
- openh264=2.4.1
|
190 |
+
- openjpeg=2.5.2
|
191 |
+
- openssl=3.3.2
|
192 |
+
- orjson=3.10.3
|
193 |
+
- overrides=7.7.0
|
194 |
+
- p11-kit=0.24.1
|
195 |
+
- pandoc=3.2
|
196 |
+
- pandocfilters=1.5.0
|
197 |
+
- parso=0.8.4
|
198 |
+
- patsy=0.5.6
|
199 |
+
- pcre2=10.43
|
200 |
+
- pexpect=4.9.0
|
201 |
+
- pickleshare=0.7.5
|
202 |
+
- pillow=10.3.0
|
203 |
+
- pip=24.0
|
204 |
+
- pixman=0.43.4
|
205 |
+
- pkgutil-resolve-name=1.3.10
|
206 |
+
- platformdirs=4.2.2
|
207 |
+
- prometheus_client=0.20.0
|
208 |
+
- prompt-toolkit=3.0.42
|
209 |
+
- prompt_toolkit=3.0.42
|
210 |
+
- psutil=5.9.8
|
211 |
+
- pthread-stubs=0.4
|
212 |
+
- ptyprocess=0.7.0
|
213 |
+
- pugixml=1.14
|
214 |
+
- pure_eval=0.2.2
|
215 |
+
- pycparser=2.22
|
216 |
+
- pydub=0.25.1
|
217 |
+
- pygments=2.18.0
|
218 |
+
- pyobjc-core=10.2
|
219 |
+
- pyobjc-framework-cocoa=10.2
|
220 |
+
- pyparsing=3.1.2
|
221 |
+
- pysocks=1.7.1
|
222 |
+
- python=3.12.3
|
223 |
+
- python-fastjsonschema=2.19.1
|
224 |
+
- python-json-logger=2.0.7
|
225 |
+
- python-multipart=0.0.9
|
226 |
+
- python-tzdata=2024.1
|
227 |
+
- python_abi=3.12
|
228 |
+
- pytz=2024.1
|
229 |
+
- pyyaml=6.0.1
|
230 |
+
- pyzmq=26.0.3
|
231 |
+
- qtconsole-base=5.5.2
|
232 |
+
- qtpy=2.4.1
|
233 |
+
- readline=8.2
|
234 |
+
- referencing=0.35.1
|
235 |
+
- requests=2.32.3
|
236 |
+
- rfc3339-validator=0.1.4
|
237 |
+
- rfc3986-validator=0.1.1
|
238 |
+
- rich=13.7.1
|
239 |
+
- rpds-py=0.18.1
|
240 |
+
- ruff=0.4.7
|
241 |
+
- scikit-learn=1.5.2
|
242 |
+
- scipy=1.13.1
|
243 |
+
- seaborn=0.13.2
|
244 |
+
- seaborn-base=0.13.2
|
245 |
+
- semantic_version=2.10.0
|
246 |
+
- send2trash=1.8.3
|
247 |
+
- setuptools=70.0.0
|
248 |
+
- shellingham=1.5.4
|
249 |
+
- six=1.16.0
|
250 |
+
- snappy=1.2.0
|
251 |
+
- sniffio=1.3.1
|
252 |
+
- soupsieve=2.5
|
253 |
+
- stack_data=0.6.2
|
254 |
+
- starlette=0.37.2
|
255 |
+
- statsmodels=0.14.2
|
256 |
+
- svt-av1=2.1.0
|
257 |
+
- tbb=2021.12.0
|
258 |
+
- terminado=0.18.1
|
259 |
+
- threadpoolctl=3.5.0
|
260 |
+
- tinycss2=1.3.0
|
261 |
+
- tk=8.6.13
|
262 |
+
- tomli=2.0.1
|
263 |
+
- tomlkit=0.12.0
|
264 |
+
- toolz=0.12.1
|
265 |
+
- tornado=6.4
|
266 |
+
- tqdm=4.66.4
|
267 |
+
- traitlets=5.14.3
|
268 |
+
- typer=0.12.3
|
269 |
+
- typer-slim=0.12.3
|
270 |
+
- typer-slim-standard=0.12.3
|
271 |
+
- types-python-dateutil=2.9.0.20240316
|
272 |
+
- typing-extensions=4.12.1
|
273 |
+
- typing_extensions=4.12.1
|
274 |
+
- typing_utils=0.1.0
|
275 |
+
- tzdata=2024a
|
276 |
+
- ujson=5.10.0
|
277 |
+
- uri-template=1.3.0
|
278 |
+
- urllib3=2.2.1
|
279 |
+
- uvicorn=0.30.1
|
280 |
+
- wcwidth=0.2.13
|
281 |
+
- webcolors=1.13
|
282 |
+
- webencodings=0.5.1
|
283 |
+
- websocket-client=1.8.0
|
284 |
+
- websockets=11.0.3
|
285 |
+
- werkzeug=3.0.3
|
286 |
+
- wheel=0.43.0
|
287 |
+
- widgetsnbextension=4.0.11
|
288 |
+
- wikipedia=1.4.0
|
289 |
+
- wtforms=3.1.2
|
290 |
+
- x264=1!164.3095
|
291 |
+
- x265=3.5
|
292 |
+
- xorg-libxau=1.0.11
|
293 |
+
- xorg-libxdmcp=1.1.3
|
294 |
+
- xz=5.2.6
|
295 |
+
- yaml=0.2.5
|
296 |
+
- zeromq=4.3.5
|
297 |
+
- zipp=3.17.0
|
298 |
+
- zlib=1.3.1
|
299 |
+
- zstd=1.5.6
|
300 |
+
- pip:
|
301 |
+
- aiohttp==3.9.5
|
302 |
+
- aioitertools==0.12.0
|
303 |
+
- aiosignal==1.3.1
|
304 |
+
- aiosmtplib==2.0.2
|
305 |
+
- aiosqlite==0.20.0
|
306 |
+
- alembic==1.13.3
|
307 |
+
- anthropic==0.34.2
|
308 |
+
- arize-phoenix==5.1.2
|
309 |
+
- arize-phoenix-evals==0.16.1
|
310 |
+
- arize-phoenix-otel==0.5.1
|
311 |
+
- asgiref==3.8.1
|
312 |
+
- astor==0.8.1
|
313 |
+
- authlib==1.3.2
|
314 |
+
- azure-core==1.31.0
|
315 |
+
- azure-identity==1.17.1
|
316 |
+
- backoff==2.2.1
|
317 |
+
- bcrypt==4.2.0
|
318 |
+
- bio==1.7.1
|
319 |
+
- biopython==1.83
|
320 |
+
- biothings-client==0.3.1
|
321 |
+
- boto3==1.35.18
|
322 |
+
- botocore==1.35.18
|
323 |
+
- build==1.2.2
|
324 |
+
- cachetools==5.5.0
|
325 |
+
- chroma-hnswlib==0.7.6
|
326 |
+
- chromadb==0.5.11
|
327 |
+
- cohere==5.9.1
|
328 |
+
- coloredlogs==15.0.1
|
329 |
+
- cryptography==43.0.1
|
330 |
+
- dataclasses-json==0.6.6
|
331 |
+
- deprecated==1.2.14
|
332 |
+
- dirtyjson==1.0.8
|
333 |
+
- diskcache==5.6.3
|
334 |
+
- distro==1.9.0
|
335 |
+
- duckdb==1.1.1
|
336 |
+
- duckduckgo-search==6.2.13
|
337 |
+
- durationpy==0.9
|
338 |
+
- faker==19.13.0
|
339 |
+
- fastapi-mail==1.4.1
|
340 |
+
- fastavro==1.9.7
|
341 |
+
- flatbuffers==24.3.25
|
342 |
+
- frozenlist==1.4.1
|
343 |
+
- google-auth==2.35.0
|
344 |
+
- google-search-results==2.4.2
|
345 |
+
- googleapis-common-protos==1.65.0
|
346 |
+
- gprofiler-official==1.0.0
|
347 |
+
- graphql-core==3.2.4
|
348 |
+
- greenlet==3.0.3
|
349 |
+
- groq==0.11.0
|
350 |
+
- grpc-interceptor==0.15.4
|
351 |
+
- grpcio==1.66.2
|
352 |
+
- grpcio-tools==1.66.2
|
353 |
+
- hdbscan==0.8.38.post1
|
354 |
+
- httptools==0.6.1
|
355 |
+
- httpx-sse==0.4.0
|
356 |
+
- humanfriendly==10.0
|
357 |
+
- jiter==0.5.0
|
358 |
+
- jmespath==1.0.1
|
359 |
+
- jsonpatch==1.33
|
360 |
+
- jsonpath-python==1.0.6
|
361 |
+
- kubernetes==31.0.0
|
362 |
+
- langchain==0.3.1
|
363 |
+
- langchain-anthropic==0.2.1
|
364 |
+
- langchain-community==0.3.1
|
365 |
+
- langchain-core==0.3.9
|
366 |
+
- langchain-experimental==0.0.60
|
367 |
+
- langchain-groq==0.2.0
|
368 |
+
- langchain-openai==0.2.2
|
369 |
+
- langchain-text-splitters==0.3.0
|
370 |
+
- langgraph==0.2.34
|
371 |
+
- langgraph-checkpoint==2.0.0
|
372 |
+
- langsmith==0.1.130
|
373 |
+
- llama-cloud==0.1.0
|
374 |
+
- llama-cpp-python==0.2.77
|
375 |
+
- llama-index==0.11.14
|
376 |
+
- llama-index-agent-openai==0.3.4
|
377 |
+
- llama-index-callbacks-arize-phoenix==0.2.1
|
378 |
+
- llama-index-cli==0.3.1
|
379 |
+
- llama-index-core==0.11.14
|
380 |
+
- llama-index-embeddings-adapter==0.2.1
|
381 |
+
- llama-index-embeddings-openai==0.2.5
|
382 |
+
- llama-index-experimental==0.3.1
|
383 |
+
- llama-index-finetuning==0.2.0
|
384 |
+
- llama-index-indices-managed-llama-cloud==0.4.0
|
385 |
+
- llama-index-legacy==0.9.48
|
386 |
+
- llama-index-llms-azure-openai==0.2.1
|
387 |
+
- llama-index-llms-groq==0.2.0
|
388 |
+
- llama-index-llms-llama-cpp==0.1.3
|
389 |
+
- llama-index-llms-mistralai==0.2.3
|
390 |
+
- llama-index-llms-openai==0.2.9
|
391 |
+
- llama-index-llms-openai-like==0.2.0
|
392 |
+
- llama-index-llms-replicate==0.1.3
|
393 |
+
- llama-index-multi-modal-llms-openai==0.2.1
|
394 |
+
- llama-index-postprocessor-cohere-rerank==0.2.0
|
395 |
+
- llama-index-program-openai==0.2.0
|
396 |
+
- llama-index-question-gen-openai==0.2.0
|
397 |
+
- llama-index-readers-file==0.2.2
|
398 |
+
- llama-index-readers-llama-parse==0.3.0
|
399 |
+
- llama-parse==0.5.6
|
400 |
+
- llamaindex-py-client==0.1.19
|
401 |
+
- llvmlite==0.43.0
|
402 |
+
- mako==1.3.5
|
403 |
+
- marshmallow==3.21.2
|
404 |
+
- mistralai==1.0.3
|
405 |
+
- mmh3==5.0.1
|
406 |
+
- monotonic==1.6
|
407 |
+
- mpmath==1.3.0
|
408 |
+
- msal==1.31.0
|
409 |
+
- msal-extensions==1.2.0
|
410 |
+
- msgpack==1.1.0
|
411 |
+
- multidict==6.0.5
|
412 |
+
- mygene==3.2.2
|
413 |
+
- mypy-extensions==1.0.0
|
414 |
+
- networkx==3.3
|
415 |
+
- nltk==3.9.1
|
416 |
+
- numba==0.60.0
|
417 |
+
- numexpr==2.10.1
|
418 |
+
- oauthlib==3.2.2
|
419 |
+
- onnxruntime==1.19.2
|
420 |
+
- openai==1.51.0
|
421 |
+
- openinference-instrumentation==0.1.18
|
422 |
+
- openinference-instrumentation-llama-index==3.0.2
|
423 |
+
- openinference-semantic-conventions==0.1.10
|
424 |
+
- opentelemetry-api==1.27.0
|
425 |
+
- opentelemetry-exporter-otlp==1.27.0
|
426 |
+
- opentelemetry-exporter-otlp-proto-common==1.27.0
|
427 |
+
- opentelemetry-exporter-otlp-proto-grpc==1.27.0
|
428 |
+
- opentelemetry-exporter-otlp-proto-http==1.27.0
|
429 |
+
- opentelemetry-instrumentation==0.48b0
|
430 |
+
- opentelemetry-instrumentation-asgi==0.48b0
|
431 |
+
- opentelemetry-instrumentation-fastapi==0.48b0
|
432 |
+
- opentelemetry-proto==1.27.0
|
433 |
+
- opentelemetry-sdk==1.27.0
|
434 |
+
- opentelemetry-semantic-conventions==0.48b0
|
435 |
+
- opentelemetry-util-http==0.48b0
|
436 |
+
- packaging==23.2
|
437 |
+
- pandas==1.5.3
|
438 |
+
- pandasai==2.2.15
|
439 |
+
- parameterized==0.9.0
|
440 |
+
- pooch==1.8.1
|
441 |
+
- portalocker==2.10.1
|
442 |
+
- posthog==3.7.0
|
443 |
+
- primp==0.6.3
|
444 |
+
- protobuf==4.25.5
|
445 |
+
- pyarrow==17.0.0
|
446 |
+
- pyasn1==0.6.1
|
447 |
+
- pyasn1-modules==0.4.1
|
448 |
+
- pydantic==2.9.2
|
449 |
+
- pydantic-core==2.23.4
|
450 |
+
- pydantic-settings==2.5.2
|
451 |
+
- pyjwt==2.9.0
|
452 |
+
- pynndescent==0.5.13
|
453 |
+
- pypdf==4.2.0
|
454 |
+
- pypika==0.48.9
|
455 |
+
- pyproject-hooks==1.2.0
|
456 |
+
- python-dateutil==2.9.0.post0
|
457 |
+
- python-dotenv==1.0.1
|
458 |
+
- pytrials==1.0.0
|
459 |
+
- qdrant-client==1.11.3
|
460 |
+
- regex==2024.5.15
|
461 |
+
- replicate==0.26.0
|
462 |
+
- requests-oauthlib==2.0.0
|
463 |
+
- requests-toolbelt==1.0.0
|
464 |
+
- rsa==4.9
|
465 |
+
- s3transfer==0.10.2
|
466 |
+
- safetensors==0.4.3
|
467 |
+
- sentence-transformers==2.7.0
|
468 |
+
- sqlalchemy==2.0.30
|
469 |
+
- sqlean-py==3.45.1
|
470 |
+
- sqlglot==25.24.3
|
471 |
+
- sqlglotrs==0.2.12
|
472 |
+
- strawberry-graphql==0.236.0
|
473 |
+
- striprtf==0.0.26
|
474 |
+
- sympy==1.13.2
|
475 |
+
- tenacity==8.3.0
|
476 |
+
- tiktoken==0.7.0
|
477 |
+
- tokenizers==0.19.1
|
478 |
+
- torch==2.4.1
|
479 |
+
- transformers==4.41.2
|
480 |
+
- types-requests==2.32.0.20240907
|
481 |
+
- typing-inspect==0.9.0
|
482 |
+
- umap-learn==0.5.6
|
483 |
+
- uvloop==0.20.0
|
484 |
+
- watchfiles==0.24.0
|
485 |
+
- wrapt==1.16.0
|
486 |
+
- yarl==1.9.4
|
487 |
+
prefix: /Users/satoc/miniforge3/envs/gradio
|
requirements.txt
ADDED
@@ -0,0 +1,300 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
aiofiles @ file:///home/conda/feedstock_root/build_artifacts/aiofiles_1698945915105/work
|
2 |
+
aiohappyeyeballs==2.4.3
|
3 |
+
aiohttp==3.10.9
|
4 |
+
aiosignal==1.3.1
|
5 |
+
altair @ file:///home/conda/feedstock_root/build_artifacts/altair-split_1711824856061/work
|
6 |
+
annotated-types @ file:///home/conda/feedstock_root/build_artifacts/annotated-types_1716290248287/work
|
7 |
+
anyio @ file:///home/conda/feedstock_root/build_artifacts/anyio_1708355285029/work
|
8 |
+
appnope @ file:///home/conda/feedstock_root/build_artifacts/appnope_1707233003401/work
|
9 |
+
argon2-cffi @ file:///home/conda/feedstock_root/build_artifacts/argon2-cffi_1692818318753/work
|
10 |
+
argon2-cffi-bindings @ file:///Users/runner/miniforge3/conda-bld/argon2-cffi-bindings_1725356639941/work
|
11 |
+
arrow @ file:///home/conda/feedstock_root/build_artifacts/arrow_1696128962909/work
|
12 |
+
asgiref==3.8.1
|
13 |
+
asttokens @ file:///home/conda/feedstock_root/build_artifacts/asttokens_1698341106958/work
|
14 |
+
async-lru @ file:///home/conda/feedstock_root/build_artifacts/async-lru_1690563019058/work
|
15 |
+
attrs @ file:///home/conda/feedstock_root/build_artifacts/attrs_1704011227531/work
|
16 |
+
Babel @ file:///home/conda/feedstock_root/build_artifacts/babel_1702422572539/work
|
17 |
+
backoff==2.2.1
|
18 |
+
bcrypt==4.2.0
|
19 |
+
beautifulsoup4 @ file:///home/conda/feedstock_root/build_artifacts/beautifulsoup4_1705564648255/work
|
20 |
+
bio==1.7.1
|
21 |
+
biopython==1.84
|
22 |
+
biothings-client==0.3.1
|
23 |
+
bleach @ file:///home/conda/feedstock_root/build_artifacts/bleach_1696630167146/work
|
24 |
+
blinker @ file:///home/conda/feedstock_root/build_artifacts/blinker_1715091184126/work
|
25 |
+
Brotli @ file:///Users/runner/miniforge3/conda-bld/brotli-split_1725267563793/work
|
26 |
+
build==1.2.2.post1
|
27 |
+
cached-property @ file:///home/conda/feedstock_root/build_artifacts/cached_property_1615209429212/work
|
28 |
+
cachetools==5.5.0
|
29 |
+
certifi @ file:///home/conda/feedstock_root/build_artifacts/certifi_1725278078093/work/certifi
|
30 |
+
cffi @ file:///Users/runner/miniforge3/conda-bld/cffi_1696001825047/work
|
31 |
+
chardet==3.0.4
|
32 |
+
charset-normalizer @ file:///home/conda/feedstock_root/build_artifacts/charset-normalizer_1698833585322/work
|
33 |
+
chroma-hnswlib==0.7.6
|
34 |
+
chromadb==0.5.11
|
35 |
+
click @ file:///home/conda/feedstock_root/build_artifacts/click_1692311806742/work
|
36 |
+
colorama @ file:///home/conda/feedstock_root/build_artifacts/colorama_1666700638685/work
|
37 |
+
coloredlogs==15.0.1
|
38 |
+
comm @ file:///home/conda/feedstock_root/build_artifacts/comm_1710320294760/work
|
39 |
+
contourpy @ file:///Users/runner/miniforge3/conda-bld/contourpy_1712429958595/work
|
40 |
+
cycler @ file:///home/conda/feedstock_root/build_artifacts/cycler_1696677705766/work
|
41 |
+
dataclasses-json==0.6.7
|
42 |
+
debugpy @ file:///Users/runner/miniforge3/conda-bld/debugpy_1707444662218/work
|
43 |
+
decorator @ file:///home/conda/feedstock_root/build_artifacts/decorator_1641555617451/work
|
44 |
+
defusedxml @ file:///home/conda/feedstock_root/build_artifacts/defusedxml_1615232257335/work
|
45 |
+
Deprecated==1.2.14
|
46 |
+
dirtyjson==1.0.8
|
47 |
+
diskcache==5.6.3
|
48 |
+
distro==1.9.0
|
49 |
+
dnspython @ file:///home/conda/feedstock_root/build_artifacts/dnspython_1709190728464/work
|
50 |
+
durationpy==0.9
|
51 |
+
email_validator @ file:///home/conda/feedstock_root/build_artifacts/email-validator-meta_1709024422129/work
|
52 |
+
entrypoints @ file:///home/conda/feedstock_root/build_artifacts/entrypoints_1643888246732/work
|
53 |
+
exceptiongroup @ file:///home/conda/feedstock_root/build_artifacts/exceptiongroup_1704921103267/work
|
54 |
+
executing @ file:///home/conda/feedstock_root/build_artifacts/executing_1698579936712/work
|
55 |
+
fastapi==0.115.3
|
56 |
+
fastapi-cli @ file:///home/conda/feedstock_root/build_artifacts/fastapi-cli_1716205772095/work
|
57 |
+
fastjsonschema @ file:///home/conda/feedstock_root/build_artifacts/python-fastjsonschema_1703780968325/work/dist
|
58 |
+
ffmpy @ file:///home/conda/feedstock_root/build_artifacts/ffmpy_1659474992694/work
|
59 |
+
filelock @ file:///home/conda/feedstock_root/build_artifacts/filelock_1714422806336/work
|
60 |
+
Flask @ file:///home/conda/feedstock_root/build_artifacts/flask_1712667726126/work
|
61 |
+
flatbuffers==24.3.25
|
62 |
+
fonttools @ file:///Users/runner/miniforge3/conda-bld/fonttools_1717209310468/work
|
63 |
+
fqdn @ file:///home/conda/feedstock_root/build_artifacts/fqdn_1638810296540/work/dist
|
64 |
+
frozenlist==1.4.1
|
65 |
+
fsspec @ file:///home/conda/feedstock_root/build_artifacts/fsspec_1717498510112/work
|
66 |
+
google-auth==2.35.0
|
67 |
+
googleapis-common-protos==1.65.0
|
68 |
+
googletrans==3.1.0a0
|
69 |
+
gprofiler-official==1.0.0
|
70 |
+
gradio==5.3.0
|
71 |
+
gradio_client==1.4.2
|
72 |
+
greenlet==3.1.1
|
73 |
+
groq==0.11.0
|
74 |
+
grpcio==1.66.2
|
75 |
+
h11==0.14.0
|
76 |
+
h2==3.2.0
|
77 |
+
hpack==3.0.0
|
78 |
+
hstspreload==2024.10.1
|
79 |
+
httpcore==1.0.6
|
80 |
+
httptools==0.6.1
|
81 |
+
httpx==0.27.2
|
82 |
+
huggingface-hub==0.26.1
|
83 |
+
humanfriendly==10.0
|
84 |
+
hyperframe==5.2.0
|
85 |
+
idna==2.10
|
86 |
+
importlib_metadata @ file:///home/conda/feedstock_root/build_artifacts/importlib-metadata_1710971335535/work
|
87 |
+
importlib_resources @ file:///home/conda/feedstock_root/build_artifacts/importlib_resources_1711040877059/work
|
88 |
+
ipykernel @ file:///Users/runner/miniforge3/conda-bld/ipykernel_1708996616394/work
|
89 |
+
ipython @ file:///home/conda/feedstock_root/build_artifacts/ipython_1717182742060/work
|
90 |
+
ipywidgets @ file:///home/conda/feedstock_root/build_artifacts/ipywidgets_1716897651763/work
|
91 |
+
isoduration @ file:///home/conda/feedstock_root/build_artifacts/isoduration_1638811571363/work/dist
|
92 |
+
itsdangerous @ file:///home/conda/feedstock_root/build_artifacts/itsdangerous_1713372668944/work
|
93 |
+
jedi @ file:///home/conda/feedstock_root/build_artifacts/jedi_1696326070614/work
|
94 |
+
Jinja2 @ file:///home/conda/feedstock_root/build_artifacts/jinja2_1715127149914/work
|
95 |
+
jiter==0.5.0
|
96 |
+
joblib @ file:///home/conda/feedstock_root/build_artifacts/joblib_1714665484399/work
|
97 |
+
json5 @ file:///home/conda/feedstock_root/build_artifacts/json5_1712986206667/work
|
98 |
+
jsonpatch==1.33
|
99 |
+
jsonpointer @ file:///Users/runner/miniforge3/conda-bld/jsonpointer_1695397393385/work
|
100 |
+
jsonschema @ file:///home/conda/feedstock_root/build_artifacts/jsonschema-meta_1714573116818/work
|
101 |
+
jsonschema-specifications @ file:///tmp/tmpkv1z7p57/src
|
102 |
+
jupyter @ file:///home/conda/feedstock_root/build_artifacts/jupyter_1725037521377/work
|
103 |
+
jupyter-console @ file:///home/conda/feedstock_root/build_artifacts/jupyter_console_1678118109161/work
|
104 |
+
jupyter-events @ file:///home/conda/feedstock_root/build_artifacts/jupyter_events_1710805637316/work
|
105 |
+
jupyter-lsp @ file:///home/conda/feedstock_root/build_artifacts/jupyter-lsp-meta_1712707420468/work/jupyter-lsp
|
106 |
+
jupyter_client @ file:///home/conda/feedstock_root/build_artifacts/jupyter_client_1716472197302/work
|
107 |
+
jupyter_core @ file:///home/conda/feedstock_root/build_artifacts/jupyter_core_1727163409502/work
|
108 |
+
jupyter_server @ file:///home/conda/feedstock_root/build_artifacts/jupyter_server_1720529946765/work
|
109 |
+
jupyter_server_terminals @ file:///home/conda/feedstock_root/build_artifacts/jupyter_server_terminals_1710262634903/work
|
110 |
+
jupyterlab @ file:///home/conda/feedstock_root/build_artifacts/jupyterlab_1724745148804/work
|
111 |
+
jupyterlab_pygments @ file:///home/conda/feedstock_root/build_artifacts/jupyterlab_pygments_1707149102966/work
|
112 |
+
jupyterlab_server @ file:///home/conda/feedstock_root/build_artifacts/jupyterlab_server-split_1716433953404/work
|
113 |
+
jupyterlab_widgets @ file:///home/conda/feedstock_root/build_artifacts/jupyterlab_widgets_1716891641122/work
|
114 |
+
kiwisolver @ file:///Users/runner/miniforge3/conda-bld/kiwisolver_1724956774301/work
|
115 |
+
kubernetes==31.0.0
|
116 |
+
langchain==0.3.2
|
117 |
+
langchain-community==0.3.1
|
118 |
+
langchain-core==0.3.9
|
119 |
+
langchain-experimental==0.3.2
|
120 |
+
langchain-groq==0.2.0
|
121 |
+
langchain-openai==0.2.2
|
122 |
+
langchain-text-splitters==0.3.0
|
123 |
+
langchainhub==0.1.21
|
124 |
+
langgraph==0.2.34
|
125 |
+
langgraph-checkpoint==2.0.1
|
126 |
+
langsmith==0.1.131
|
127 |
+
llama-cloud==0.1.4
|
128 |
+
llama-index==0.11.20
|
129 |
+
llama-index-agent-openai==0.3.4
|
130 |
+
llama-index-cli==0.3.1
|
131 |
+
llama-index-core==0.11.20
|
132 |
+
llama-index-embeddings-openai==0.2.5
|
133 |
+
llama-index-indices-managed-llama-cloud==0.4.0
|
134 |
+
llama-index-legacy==0.9.48.post3
|
135 |
+
llama-index-llms-groq==0.2.0
|
136 |
+
llama-index-llms-llama-cpp==0.2.3
|
137 |
+
llama-index-llms-openai==0.2.16
|
138 |
+
llama-index-llms-openai-like==0.2.0
|
139 |
+
llama-index-multi-modal-llms-openai==0.2.3
|
140 |
+
llama-index-program-openai==0.2.0
|
141 |
+
llama-index-question-gen-openai==0.2.0
|
142 |
+
llama-index-readers-file==0.2.2
|
143 |
+
llama-index-readers-llama-parse==0.3.0
|
144 |
+
llama-parse==0.5.12
|
145 |
+
llama_cpp_python==0.2.90
|
146 |
+
llamaindex-py-client==0.1.19
|
147 |
+
markdown-it-py @ file:///home/conda/feedstock_root/build_artifacts/markdown-it-py_1686175045316/work
|
148 |
+
MarkupSafe @ file:///Users/runner/miniforge3/conda-bld/markupsafe_1724959568784/work
|
149 |
+
marshmallow==3.22.0
|
150 |
+
matplotlib @ file:///Users/runner/miniforge3/conda-bld/matplotlib-suite_1715976209092/work
|
151 |
+
matplotlib-inline @ file:///home/conda/feedstock_root/build_artifacts/matplotlib-inline_1713250518406/work
|
152 |
+
mdurl @ file:///home/conda/feedstock_root/build_artifacts/mdurl_1704317613764/work
|
153 |
+
mistune @ file:///home/conda/feedstock_root/build_artifacts/mistune_1698947099619/work
|
154 |
+
mmh3==5.0.1
|
155 |
+
monotonic==1.6
|
156 |
+
mpmath==1.3.0
|
157 |
+
msgpack==1.1.0
|
158 |
+
multidict==6.1.0
|
159 |
+
munkres==1.1.4
|
160 |
+
mygene==3.2.2
|
161 |
+
mypy-extensions==1.0.0
|
162 |
+
natsort @ file:///home/conda/feedstock_root/build_artifacts/natsort_1687263093793/work
|
163 |
+
nbclient @ file:///home/conda/feedstock_root/build_artifacts/nbclient_1710317608672/work
|
164 |
+
nbconvert @ file:///home/conda/feedstock_root/build_artifacts/nbconvert-meta_1718135430380/work
|
165 |
+
nbformat @ file:///home/conda/feedstock_root/build_artifacts/nbformat_1712238998817/work
|
166 |
+
nest_asyncio @ file:///home/conda/feedstock_root/build_artifacts/nest-asyncio_1705850609492/work
|
167 |
+
networkx==3.3
|
168 |
+
nltk==3.9.1
|
169 |
+
notebook @ file:///home/conda/feedstock_root/build_artifacts/notebook_1715879655750/work
|
170 |
+
notebook_shim @ file:///home/conda/feedstock_root/build_artifacts/notebook-shim_1707957777232/work
|
171 |
+
numpy @ file:///Users/runner/miniforge3/conda-bld/numpy_1707225421156/work/dist/numpy-1.26.4-cp312-cp312-macosx_11_0_arm64.whl#sha256=011d57633d659db8280a5811d62d0c5a615719a6d7cbd46a04f6ffce0c2a7db3
|
172 |
+
oauthlib==3.2.2
|
173 |
+
onnxruntime==1.19.2
|
174 |
+
openai==1.51.0
|
175 |
+
opentelemetry-api==1.27.0
|
176 |
+
opentelemetry-exporter-otlp-proto-common==1.27.0
|
177 |
+
opentelemetry-exporter-otlp-proto-grpc==1.27.0
|
178 |
+
opentelemetry-instrumentation==0.48b0
|
179 |
+
opentelemetry-instrumentation-asgi==0.48b0
|
180 |
+
opentelemetry-instrumentation-fastapi==0.48b0
|
181 |
+
opentelemetry-proto==1.27.0
|
182 |
+
opentelemetry-sdk==1.27.0
|
183 |
+
opentelemetry-semantic-conventions==0.48b0
|
184 |
+
opentelemetry-util-http==0.48b0
|
185 |
+
orjson @ file:///Users/runner/miniforge3/conda-bld/orjson_1714769754085/work/target/wheels/orjson-3.10.3-cp312-cp312-macosx_11_0_arm64.whl#sha256=aa5de6049aaf224d3534a8f34f905ac75ce81c4ad36473b8ae76effaea5c40a2
|
186 |
+
overrides @ file:///home/conda/feedstock_root/build_artifacts/overrides_1706394519472/work
|
187 |
+
packaging @ file:///home/conda/feedstock_root/build_artifacts/packaging_1718189413536/work
|
188 |
+
pandas @ file:///Users/runner/miniforge3/conda-bld/pandas_1726878422361/work
|
189 |
+
pandocfilters @ file:///home/conda/feedstock_root/build_artifacts/pandocfilters_1631603243851/work
|
190 |
+
parso @ file:///home/conda/feedstock_root/build_artifacts/parso_1712320355065/work
|
191 |
+
patsy @ file:///home/conda/feedstock_root/build_artifacts/patsy_1704469236901/work
|
192 |
+
pexpect @ file:///home/conda/feedstock_root/build_artifacts/pexpect_1706113125309/work
|
193 |
+
pickleshare @ file:///home/conda/feedstock_root/build_artifacts/pickleshare_1602536217715/work
|
194 |
+
pillow @ file:///Users/runner/miniforge3/conda-bld/pillow_1712154527146/work
|
195 |
+
pkgutil_resolve_name @ file:///home/conda/feedstock_root/build_artifacts/pkgutil-resolve-name_1694617248815/work
|
196 |
+
platformdirs @ file:///home/conda/feedstock_root/build_artifacts/platformdirs_1715777629804/work
|
197 |
+
pooch==1.8.2
|
198 |
+
posthog==3.7.0
|
199 |
+
prometheus_client @ file:///home/conda/feedstock_root/build_artifacts/prometheus_client_1707932675456/work
|
200 |
+
prompt-toolkit @ file:///home/conda/feedstock_root/build_artifacts/prompt-toolkit_1702399386289/work
|
201 |
+
protobuf==4.25.5
|
202 |
+
psutil @ file:///Users/runner/miniforge3/conda-bld/psutil_1705722460205/work
|
203 |
+
ptyprocess @ file:///home/conda/feedstock_root/build_artifacts/ptyprocess_1609419310487/work/dist/ptyprocess-0.7.0-py2.py3-none-any.whl
|
204 |
+
pure-eval @ file:///home/conda/feedstock_root/build_artifacts/pure_eval_1642875951954/work
|
205 |
+
pyasn1==0.6.1
|
206 |
+
pyasn1_modules==0.4.1
|
207 |
+
pycparser @ file:///home/conda/feedstock_root/build_artifacts/pycparser_1711811537435/work
|
208 |
+
pydantic @ file:///home/conda/feedstock_root/build_artifacts/pydantic_1726601062926/work
|
209 |
+
pydantic-settings==2.5.2
|
210 |
+
pydantic_core @ file:///Users/runner/miniforge3/conda-bld/pydantic-core_1726525028433/work
|
211 |
+
pydub @ file:///home/conda/feedstock_root/build_artifacts/pydub_1615612442567/work
|
212 |
+
Pygments @ file:///home/conda/feedstock_root/build_artifacts/pygments_1714846767233/work
|
213 |
+
pyobjc-core @ file:///Users/runner/miniforge3/conda-bld/pyobjc-core_1710590594941/work
|
214 |
+
pyobjc-framework-Cocoa @ file:///Users/runner/miniforge3/conda-bld/pyobjc-framework-cocoa_1710597239745/work
|
215 |
+
pyparsing @ file:///home/conda/feedstock_root/build_artifacts/pyparsing_1709721012883/work
|
216 |
+
pypdf==4.3.1
|
217 |
+
PyPika==0.48.9
|
218 |
+
pyproject_hooks==1.2.0
|
219 |
+
PySocks @ file:///home/conda/feedstock_root/build_artifacts/pysocks_1661604839144/work
|
220 |
+
python-dateutil @ file:///home/conda/feedstock_root/build_artifacts/python-dateutil_1709299778482/work
|
221 |
+
python-dotenv==1.0.1
|
222 |
+
python-json-logger @ file:///home/conda/feedstock_root/build_artifacts/python-json-logger_1677079630776/work
|
223 |
+
python-multipart @ file:///home/conda/feedstock_root/build_artifacts/python-multipart_1707760088566/work
|
224 |
+
pytz @ file:///home/conda/feedstock_root/build_artifacts/pytz_1706886791323/work
|
225 |
+
PyYAML @ file:///Users/runner/miniforge3/conda-bld/pyyaml_1695373531920/work
|
226 |
+
pyzmq @ file:///Users/runner/miniforge3/conda-bld/pyzmq_1715024418681/work
|
227 |
+
qtconsole @ file:///home/conda/feedstock_root/build_artifacts/qtconsole-base_1714942934316/work
|
228 |
+
QtPy @ file:///home/conda/feedstock_root/build_artifacts/qtpy_1698112029416/work
|
229 |
+
referencing @ file:///home/conda/feedstock_root/build_artifacts/referencing_1714619483868/work
|
230 |
+
regex==2024.9.11
|
231 |
+
requests @ file:///home/conda/feedstock_root/build_artifacts/requests_1717057054362/work
|
232 |
+
requests-oauthlib==2.0.0
|
233 |
+
requests-toolbelt==1.0.0
|
234 |
+
rfc3339-validator @ file:///home/conda/feedstock_root/build_artifacts/rfc3339-validator_1638811747357/work
|
235 |
+
rfc3986==1.5.0
|
236 |
+
rfc3986-validator @ file:///home/conda/feedstock_root/build_artifacts/rfc3986-validator_1598024191506/work
|
237 |
+
rich @ file:///home/conda/feedstock_root/build_artifacts/rich-split_1709150387247/work/dist
|
238 |
+
rpds-py @ file:///Users/runner/miniforge3/conda-bld/rpds-py_1715090025913/work
|
239 |
+
rsa==4.9
|
240 |
+
ruff @ file:///Users/runner/miniforge3/conda-bld/ruff_1717203589742/work
|
241 |
+
safetensors==0.4.5
|
242 |
+
scikit-learn @ file:///Users/runner/miniforge3/conda-bld/scikit-learn_1726082669650/work/dist/scikit_learn-1.5.2-cp312-cp312-macosx_11_0_arm64.whl#sha256=c8ea39773eafcbeeb602482bb255def5febff2e4642648568c8cac21cb5b3839
|
243 |
+
scipy @ file:///Users/runner/miniforge3/conda-bld/scipy-split_1716470295081/work/dist/scipy-1.13.1-cp312-cp312-macosx_11_0_arm64.whl#sha256=86ef5aef692e7c18a0cc387509e32862b283077a7d535281455caad95420fd22
|
244 |
+
seaborn @ file:///home/conda/feedstock_root/build_artifacts/seaborn-split_1714494649443/work
|
245 |
+
semantic-version @ file:///home/conda/feedstock_root/build_artifacts/semantic_version_1653579368137/work
|
246 |
+
Send2Trash @ file:///Users/runner/miniforge3/conda-bld/send2trash_1712585336584/work
|
247 |
+
sentence-transformers==3.1.1
|
248 |
+
setuptools==70.0.0
|
249 |
+
shellingham @ file:///home/conda/feedstock_root/build_artifacts/shellingham_1698144360966/work
|
250 |
+
six @ file:///home/conda/feedstock_root/build_artifacts/six_1620240208055/work
|
251 |
+
sniffio @ file:///home/conda/feedstock_root/build_artifacts/sniffio_1708952932303/work
|
252 |
+
soupsieve @ file:///home/conda/feedstock_root/build_artifacts/soupsieve_1693929250441/work
|
253 |
+
SQLAlchemy==2.0.35
|
254 |
+
stack-data @ file:///home/conda/feedstock_root/build_artifacts/stack_data_1669632077133/work
|
255 |
+
starlette==0.41.0
|
256 |
+
statsmodels @ file:///Users/runner/miniforge3/conda-bld/statsmodels_1715941250846/work
|
257 |
+
striprtf==0.0.26
|
258 |
+
sympy==1.13.3
|
259 |
+
tavily-python==0.5.0
|
260 |
+
tenacity==8.5.0
|
261 |
+
terminado @ file:///Users/runner/miniforge3/conda-bld/terminado_1710263781917/work
|
262 |
+
threadpoolctl @ file:///home/conda/feedstock_root/build_artifacts/threadpoolctl_1714400101435/work
|
263 |
+
tiktoken==0.8.0
|
264 |
+
tinycss2 @ file:///home/conda/feedstock_root/build_artifacts/tinycss2_1713974937325/work
|
265 |
+
tokenizers==0.20.0
|
266 |
+
tomli @ file:///home/conda/feedstock_root/build_artifacts/tomli_1644342247877/work
|
267 |
+
tomlkit @ file:///home/conda/feedstock_root/build_artifacts/tomlkit_1690458286251/work
|
268 |
+
toolz @ file:///home/conda/feedstock_root/build_artifacts/toolz_1706112571092/work
|
269 |
+
torch==2.4.1
|
270 |
+
tornado @ file:///Users/runner/miniforge3/conda-bld/tornado_1724956150763/work
|
271 |
+
tqdm @ file:///home/conda/feedstock_root/build_artifacts/tqdm_1714854870413/work
|
272 |
+
traitlets @ file:///home/conda/feedstock_root/build_artifacts/traitlets_1713535121073/work
|
273 |
+
transformers==4.45.1
|
274 |
+
typer==0.12.3
|
275 |
+
typer-slim==0.12.3
|
276 |
+
types-python-dateutil @ file:///home/conda/feedstock_root/build_artifacts/types-python-dateutil_1710589910274/work
|
277 |
+
types-requests==2.32.0.20240914
|
278 |
+
typing-inspect==0.9.0
|
279 |
+
typing-utils @ file:///home/conda/feedstock_root/build_artifacts/typing_utils_1622899189314/work
|
280 |
+
typing_extensions @ file:///home/conda/feedstock_root/build_artifacts/typing_extensions_1717287769032/work
|
281 |
+
tzdata @ file:///home/conda/feedstock_root/build_artifacts/python-tzdata_1707747584337/work
|
282 |
+
ujson @ file:///Users/runner/miniforge3/conda-bld/ujson_1724954437531/work
|
283 |
+
uri-template @ file:///home/conda/feedstock_root/build_artifacts/uri-template_1688655812972/work/dist
|
284 |
+
urllib3 @ file:///home/conda/feedstock_root/build_artifacts/urllib3_1708239446578/work
|
285 |
+
uvicorn @ file:///Users/runner/miniforge3/conda-bld/uvicorn-split_1717405019600/work
|
286 |
+
uvloop==0.20.0
|
287 |
+
watchfiles==0.24.0
|
288 |
+
wcwidth @ file:///home/conda/feedstock_root/build_artifacts/wcwidth_1704731205417/work
|
289 |
+
webcolors @ file:///home/conda/feedstock_root/build_artifacts/webcolors_1679900785843/work
|
290 |
+
webencodings @ file:///home/conda/feedstock_root/build_artifacts/webencodings_1694681268211/work
|
291 |
+
websocket-client @ file:///home/conda/feedstock_root/build_artifacts/websocket-client_1713923384721/work
|
292 |
+
websockets @ file:///Users/runner/miniforge3/conda-bld/websockets_1695410204039/work
|
293 |
+
Werkzeug @ file:///home/conda/feedstock_root/build_artifacts/werkzeug_1715000201436/work
|
294 |
+
wheel==0.43.0
|
295 |
+
widgetsnbextension @ file:///home/conda/feedstock_root/build_artifacts/widgetsnbextension_1716891659446/work
|
296 |
+
wikipedia==1.4.0
|
297 |
+
wrapt==1.16.0
|
298 |
+
WTForms @ file:///home/conda/feedstock_root/build_artifacts/wtforms_1708541191515/work
|
299 |
+
yarl==1.13.1
|
300 |
+
zipp @ file:///home/conda/feedstock_root/build_artifacts/zipp_1695255097490/work
|