Spaces:

Satoc
/

GetMutationInfo

Sleeping

App Files Files Community

高橋慧 commited on Jun 7, 2024

Commit

5703564

1 Parent(s): cebc20f

Move chromedriver to Git LFS

Browse files

Files changed (11) hide show

.gitignore +135 -0
OpenAITools/.ipynb_checkpoints/ECarteTools-checkpoint.py +73 -0
OpenAITools/.ipynb_checkpoints/ExpertTools-checkpoint.py +245 -0
OpenAITools/ECarteTools.py +73 -0
OpenAITools/ExpertTools.py +243 -0
OpenAITools/ReviewPaperTools.py +42 -0
OpenAITools/scrapeThisData.py +237 -0
README.md +4 -4
app.py +51 -0
environment.yml +369 -0
requirements.txt +71 -0

.gitignore ADDED Viewed

	@@ -0,0 +1,135 @@

+# Add any directories, files, or patterns you don't want to be tracked by version control
+# Byte-compiled / optimized / DLL files
+__pycache__/
+#*.py[cod]
+#*$py.class
+#*.txt
+#*.tsv
+#*.csv
+*.xlsx
+*.pdf
+*.nii
+#*.nii.gz
+*.DS_Store
+#*.png
+#*.pyn
+*.jpg
+*.nii.gz
+*.pkl
+*-checkpoint.ipynb
+*.pkls
+*.pth
+*.yaml
+*.ckpt
+# C extensions
+#*.so
+# Distribution / packaging
+#.Python
+#build/
+#develop-eggs/
+#dist/
+#downloads/
+#eggs/
+#.eggs/
+#lib/
+#lib64/
+#parts/
+#sdist/
+#var/
+#wheels/
+#*.egg-info/
+#.installed.cfg
+#*.egg
+#MANIFEST
+# PyInstaller
+#  Usually these files are written by a python script from a template
+#  before PyInstaller builds the exe, so as to inject date/other infos into it.
+#*.manifest
+#*.spec
+# Installer logs
+#pip-log.txt
+#pip-delete-this-directory.txt
+# Unit test / coverage reports
+#htmlcov/
+#.tox/
+#.coverage
+#.coverage.*
+#.cache
+#nosetests.xml
+#coverage.xml
+#*.cover
+#.hypothesis/
+#.pytest_cache/
+# Translations
+#*.mo
+#*.pot
+# Django stuff:
+#*.log
+#.static_storage/
+#.media/
+#local_settings.py
+# Flask stuff:
+#instance/
+#.webassets-cache
+# Scrapy stuff:
+#.scrapy
+# Sphinx documentation
+#docs/_build/
+# PyBuilder
+#target/
+# Jupyter Notebook
+.ipynb_checkpoint/*
+# pyenv
+#.python-version
+# celery beat schedule file
+#celerybeat-schedule
+# SageMath parsed files
+#*.sage.py
+# Environments
+#.env
+#.venv
+#env/
+#venv/
+#ENV/
+#env.bak/
+#venv.bak/
+# Spyder project settings
+#.spyderproject
+#.spyproject
+# Rope project settings
+#.ropeproject
+# mkdocs documentation
+#/site
+/models/
+# mypy
+#.mypy_cache/
+#over 100MB
+# Add any directories, files, or patterns you don't want to be tracked by version control
+#deep settings
+*.h5
+.OpenAITools/chromedriver
+/OpenAITools/chromedriver

OpenAITools/.ipynb_checkpoints/ECarteTools-checkpoint.py ADDED Viewed

	@@ -0,0 +1,73 @@

+import openai
+import time
+import wikipedia
+import random
+import re
+import requests
+from bs4 import BeautifulSoup
+import os
+import glob
+from natsort import natsorted
+import requests
+from bs4 import BeautifulSoup
+import xml.etree.ElementTree as ET
+import pandas as pd
+wikipedia.set_lang("ja")
+# APIキーの設定
+openai.api_key = os.environ['OPENAI_API_KEY']
+engine="gpt-3.5-turbo"
+def generate(system_template,prompt,engine="gpt-3.5-turbo"):
+    while True:  #OpenAI APIが落ちてる時に無限リトライするので注意
+        try:
+            response = openai.ChatCompletion.create(
+                model=engine,
+                messages=[
+                    {"role": "system", "content": system_template},
+                    {"role": "user", "content":prompt},
+                    ]
+            )
+            result=response["choices"][0]["message"]["content"]
+            return result
+        except:
+            print("リトライ")
+            time.sleep(30)
+            pass
+def generate_carte(prompt,engine="gpt-3.5-turbo"):
+    while True:  #OpenAI APIが落ちてる時に無限リトライするので注意
+        try:
+            response = openai.ChatCompletion.create(
+                model=engine,
+                messages=[
+                    {"role": "system", "content": "You are useful assistant"},
+                    {"role": "user", "content":"%s\n・・・という患者と医師の会話をSOAP形式のカルテとして日本語で端的にまとめて下さい。各セクションはS),O), A),P)として下さい "%prompt},
+                    ]
+            )
+            result=response["choices"][0]["message"]["content"]
+            return result
+        except:
+            print("リトライ")
+            time.sleep(30)
+            pass
+def get_selected_fileds(texts):
+    input_name = texts.replace(' ' , "+")
+    corona_fields = ct.get_study_fields(
+    search_expr="%s SEARCH[Location](AREA[LocationCountry]Japan AND AREA[LocationStatus]Recruiting)"%(input_name),
+    fields=["NCTId", "Condition", "BriefTitle",'BriefSummary','EligibilityCriteria'],
+    max_studies=500,
+    fmt="csv")
+    return corona_fields
+def get_retriever_str(fields):
+    retriever_str=''
+    for i in range(1,len(fields)):
+        colnames = fields[0]
+        targetCol = fields[i]
+        for f in range(len(fields[0])):
+            retriever_str+=colnames[f] + ":" + targetCol[f] +"\n"
+        retriever_str+='\n'
+    return retriever_str

OpenAITools/.ipynb_checkpoints/ExpertTools-checkpoint.py ADDED Viewed

	@@ -0,0 +1,245 @@

+import os
+import openai
+import time
+import wikipedia
+import random
+import re
+import requests
+from bs4 import BeautifulSoup
+import os
+import glob
+from natsort import natsorted
+import requests
+from bs4 import BeautifulSoup
+import xml.etree.ElementTree as ET
+from pytrials.client import ClinicalTrials
+from Bio import Entrez
+import pandas as pd
+import numpy as np
+import time
+#from langchain.agents import create_pandas_dataframe_agent
+from langchain_experimental.agents import create_pandas_dataframe_agent
+from langchain.llms import OpenAI
+# APIキーの設定
+openai.api_key = os.environ['OPENAI_API_KEY']
+gptengine="gpt-3.5-turbo"
+"""def get_selected_fileds(texts):
+    ct = ClinicalTrials()
+    input_name = texts.replace(' ' , "+")
+    corona_fields = ct.get_study_fields(
+    search_expr="%s SEARCH[Location](AREA[LocationCountry]Japan AND AREA[LocationStatus]Recruiting)"%(input_name),
+    fields=["NCTId", "Condition", "BriefTitle",'BriefSummary','EligibilityCriteria'],
+    max_studies=500,
+    fmt="csv")
+    return corona_fields"""
+def get_retriever_str(fields):
+    retriever_str=''
+    for i in range(1,len(fields)):
+        colnames = fields[0]
+        targetCol = fields[i]
+        for f in range(len(fields[0])):
+            retriever_str+=colnames[f] + ":" + targetCol[f] +"\n"
+        retriever_str+='\n'
+    return retriever_str
+def get_chanked_retriever(fields):
+    retriever_list =[]
+    for i in range(1,len(fields)):
+        retriever_str=''
+        colnames = fields[0]
+        targetCol = fields[i]
+        for f in range(len(fields[0])):
+            retriever_str+=colnames[f] + ":" + targetCol[f] +"\n"
+        retriever_list.append(retriever_str)
+    return retriever_list
+from pytrials.client import ClinicalTrials
+def get_selected_fields(texts, split_criteria=False,
+                        split_word_number = False, split_number=700):
+    ct = ClinicalTrials()
+    input_name = texts.replace(' ', "+")
+    corona_fields = ct.get_study_fields(
+        search_expr="%s SEARCH[Location](AREA[LocationCountry]Japan AND AREA[LocationStatus]Recruiting)" % (input_name),
+        fields=["NCTId", "Condition", "BriefTitle", 'BriefSummary', 'EligibilityCriteria'],
+        max_studies=500,
+        fmt="csv")
+    if split_criteria:
+        new_fields = []
+        # 検索対象の文字列
+        target_string1 = 'Exclusion Criteria'
+        target_string2 = 'Exclusion criteria'
+        # 各要素で検索対象の文字列を探し、直前で分割して新しいリストに格納
+        for corona_field in corona_fields:
+            new_list = []
+            for item in corona_field:
+                if target_string1 in item:
+                    split_position = item.index(target_string1)
+                    new_list.append(item[:split_position])
+                    new_list.append(item[split_position:])
+                elif target_string2 in item:
+                    split_position = item.index(target_string2)
+                    new_list.append(item[:split_position])
+                    new_list.append(item[split_position:])
+                else:
+                    new_list.append(item)
+            new_fields.append(new_list)
+    else:
+        new_fields = corona_fields
+    if split_word_number:
+        split_fields = []
+        for new_field in new_fields:
+            new_list= []
+             # 各要素を調べて、700文字以上であれば分割し、新しいリストに格納
+            for item in new_field:
+                item_length = len(item)
+                if item_length > split_number:
+                    num_parts = -(-item_length // split_number)  # 向上の除算を用いて分割数を計算
+                    for i in range(num_parts):
+                        start_index = i * split_number
+                        end_index = min((i + 1) * split_number, item_length)  # 文字列の終わりを超えないように調整
+                        new_list.append(item[start_index:end_index])
+                else:
+                    new_list.append(item)
+            split_fields.append(new_list)
+        new_fields = split_fields
+    return new_fields
+def print_agent_results(df, Ids,
+                        interesteds = ['Condition', 'BriefTitle', 'BriefSummary', 'EligibilityCriteria'],
+                       translater=None):
+    results = ""
+    for Id in Ids:
+        print("%s\n"%Id)
+        sdf = df[df['NCTId'] == Id]
+        for interested in interesteds:
+            # 最初の要素を取得
+            results += '%s: \n %s \n' % (interested, sdf[interested].iloc[0])
+            #print('%s: \n %s \n' % (interested, sdf[interested].iloc[0]))
+        if translater:
+            to_be_printed = translater.translate(results)
+        else:
+            to_be_printed =results
+        print(to_be_printed)
+def search(query):
+    Entrez.email = os.getenv('MAIL_ADRESS')
+    #Entrez.email='[email protected]'
+    handle = Entrez.esearch(db='pubmed',
+                           sort = 'relevance',
+                           retmax = '20',
+                           retmode = 'xml',
+                           term = query)
+    results = Entrez.read(handle)
+    return results
+def fetch_details(id_list):
+    ids = ','.join(id_list)
+    Entrez.email = os.getenv('MAIL_ADRESS')
+    #Entrez.email = '[email protected]'
+    handle = Entrez.efetch(db = 'pubmed',
+                          retmode = 'xml',
+                          id = ids)
+    results = Entrez.read(handle)
+    return results
+'''def generate(prompt,engine=None):
+    if engine is None:
+        engine=gptengine
+    while True:  #OpenAI APIが落ちてる時に無限リトライするので注意
+        try:
+            response = openai.ChatCompletion.create(
+                model=engine,
+                messages=[
+                    {"role": "system", "content": "You are useful assistant"},
+                    {"role": "user", "content":prompt},
+                    ]
+            )
+            result=response["choices"][0]["message"]["content"]
+            return result
+        except Exception as e:
+            print(e)
+            print("リトライ")
+            time.sleep(30)
+            pass
+'''
+def generate(prompt,engine=None):
+    if engine is None:
+        engine=gptengine
+    while True:  #OpenAI APIが落ちてる時に無限リトライするので注意
+        try:
+            response = openai.chat.completions.create(
+                model=engine,
+                messages=[
+                    {"role": "system", "content": "You are useful assistant"},
+                    {"role": "user", "content":prompt},
+                    ]
+            )
+            #result=response["choices"][0]["message"]["content"]
+            result=response.choices[0].message.content
+            return result
+        except Exception as e:
+            print(e)
+            print("リトライ")
+            time.sleep(30)
+            pass
+def GetPubmedSummaryDf(studies):
+    title_list= []
+    abstract_list=[]
+    journal_list = []
+    language_list =[]
+    pubdate_year_list = []
+    pubdate_month_list = []
+    studiesIdList = studies['IdList']
+    chunk_size = 10000
+    for chunk_i in range(0, len(studiesIdList), chunk_size):
+        chunk = studiesIdList[chunk_i:chunk_i + chunk_size]
+        try:
+            papers = fetch_details(chunk)
+            for i, paper in enumerate(papers['PubmedArticle']):
+                title_list.append(paper['MedlineCitation']['Article']['ArticleTitle'])
+                try:
+                    abstract_list.append(paper['MedlineCitation']['Article']['Abstract']['AbstractText'][0])
+                except:
+                    abstract_list.append('No Abstract')
+                journal_list.append(paper['MedlineCitation']['Article']['Journal']['Title'])
+                language_list.append(paper['MedlineCitation']['Article']['Language'][0])
+                try:
+                    pubdate_year_list.append(paper['MedlineCitation']['Article']['Journal']['JournalIssue']['PubDate']['Year'])
+                except:
+                    pubdate_year_list.append('No Data')
+                try:
+                    pubdate_month_list.append(paper['MedlineCitation']['Article']['Journal']['JournalIssue']['PubDate']['Month'])
+                except:
+                    pubdate_month_list.append('No Data')
+        except: # occasionally a chunk might annoy your parser
+            pass
+    df = pd.DataFrame(list(zip(
+        title_list, abstract_list, journal_list, language_list, pubdate_year_list,
+                          pubdate_month_list)),
+                      columns=['Title', 'Abstract', 'Journal', 'Language', 'Year','Month'])
+    return df, abstract_list
+def ClinicalAgent(fileds, verbose=False):
+    df = pd.DataFrame.from_records(fileds[1:], columns=fileds[0])
+    return create_pandas_dataframe_agent(OpenAI(temperature=0, model='gpt-3.5-turbo-16k'), df, verbose=verbose)
+def GetNCTID(results):
+    # NCTで始まる単語を検索する正規表現
+    pattern = r'\bNCT\d+\b'
+    # 正規表現を使って単語を抽出
+    nct_words = re.findall(pattern,results)
+    return nct_words

OpenAITools/ECarteTools.py ADDED Viewed

	@@ -0,0 +1,73 @@

+import openai
+import time
+import wikipedia
+import random
+import re
+import requests
+from bs4 import BeautifulSoup
+import os
+import glob
+from natsort import natsorted
+import requests
+from bs4 import BeautifulSoup
+import xml.etree.ElementTree as ET
+import pandas as pd
+wikipedia.set_lang("ja")
+# APIキーの設定
+openai.api_key = os.environ['OPENAI_API_KEY']
+engine="gpt-3.5-turbo"
+def generate(system_template,prompt,engine="gpt-3.5-turbo"):
+    while True:  #OpenAI APIが落ちてる時に無限リトライするので注意
+        try:
+            response = openai.ChatCompletion.create(
+                model=engine,
+                messages=[
+                    {"role": "system", "content": system_template},
+                    {"role": "user", "content":prompt},
+                    ]
+            )
+            result=response["choices"][0]["message"]["content"]
+            return result
+        except:
+            print("リトライ")
+            time.sleep(30)
+            pass
+def generate_carte(prompt,engine="gpt-3.5-turbo"):
+    while True:  #OpenAI APIが落ちてる時に無限リトライするので注意
+        try:
+            response = openai.ChatCompletion.create(
+                model=engine,
+                messages=[
+                    {"role": "system", "content": "You are useful assistant"},
+                    {"role": "user", "content":"%s\n・・・という患者と医師の会話をSOAP形式のカルテとして日本語で端的にまとめて下さい。各セクションはS),O), A),P)として下さい "%prompt},
+                    ]
+            )
+            result=response["choices"][0]["message"]["content"]
+            return result
+        except:
+            print("リトライ")
+            time.sleep(30)
+            pass
+def get_selected_fileds(texts):
+    input_name = texts.replace(' ' , "+")
+    corona_fields = ct.get_study_fields(
+    search_expr="%s SEARCH[Location](AREA[LocationCountry]Japan AND AREA[LocationStatus]Recruiting)"%(input_name),
+    fields=["NCTId", "Condition", "BriefTitle",'BriefSummary','EligibilityCriteria'],
+    max_studies=500,
+    fmt="csv")
+    return corona_fields
+def get_retriever_str(fields):
+    retriever_str=''
+    for i in range(1,len(fields)):
+        colnames = fields[0]
+        targetCol = fields[i]
+        for f in range(len(fields[0])):
+            retriever_str+=colnames[f] + ":" + targetCol[f] +"\n"
+        retriever_str+='\n'
+    return retriever_str

OpenAITools/ExpertTools.py ADDED Viewed

	@@ -0,0 +1,243 @@

+import os
+import openai
+import time
+import wikipedia
+import random
+import re
+import requests
+from bs4 import BeautifulSoup
+import os
+import glob
+from natsort import natsorted
+import requests
+from bs4 import BeautifulSoup
+import xml.etree.ElementTree as ET
+from pytrials.client import ClinicalTrials
+from Bio import Entrez
+import pandas as pd
+import numpy as np
+import time
+#from langchain.agents import create_pandas_dataframe_agent
+from langchain_experimental.agents import create_pandas_dataframe_agent
+from langchain.llms import OpenAI
+# APIキーの設定
+openai.api_key = os.environ['OPENAI_API_KEY']
+gptengine="gpt-3.5-turbo"
+"""def get_selected_fileds(texts):
+    ct = ClinicalTrials()
+    input_name = texts.replace(' ' , "+")
+    corona_fields = ct.get_study_fields(
+    search_expr="%s SEARCH[Location](AREA[LocationCountry]Japan AND AREA[LocationStatus]Recruiting)"%(input_name),
+    fields=["NCTId", "Condition", "BriefTitle",'BriefSummary','EligibilityCriteria'],
+    max_studies=500,
+    fmt="csv")
+    return corona_fields"""
+def get_retriever_str(fields):
+    retriever_str=''
+    for i in range(1,len(fields)):
+        colnames = fields[0]
+        targetCol = fields[i]
+        for f in range(len(fields[0])):
+            retriever_str+=colnames[f] + ":" + targetCol[f] +"\n"
+        retriever_str+='\n'
+    return retriever_str
+def get_chanked_retriever(fields):
+    retriever_list =[]
+    for i in range(1,len(fields)):
+        retriever_str=''
+        colnames = fields[0]
+        targetCol = fields[i]
+        for f in range(len(fields[0])):
+            retriever_str+=colnames[f] + ":" + targetCol[f] +"\n"
+        retriever_list.append(retriever_str)
+    return retriever_list
+from pytrials.client import ClinicalTrials
+def get_selected_fields(texts, split_criteria=False,
+                        split_word_number = False, split_number=700):
+    ct = ClinicalTrials()
+    input_name = texts.replace(' ', "+")
+    corona_fields = ct.get_study_fields(
+        search_expr="%s SEARCH[Location](AREA[LocationCountry]Japan AND AREA[LocationStatus]Recruiting)" % (input_name),
+        fields=["NCTId", "Condition", "BriefTitle", 'BriefSummary', 'EligibilityCriteria'],
+        max_studies=500,
+        fmt="csv")
+    if split_criteria:
+        new_fields = []
+        # 検索対象の文字列
+        target_string1 = 'Exclusion Criteria'
+        target_string2 = 'Exclusion criteria'
+        # 各要素で検索対象の文字列を探し、直前で分割して新しいリストに格納
+        for corona_field in corona_fields:
+            new_list = []
+            for item in corona_field:
+                if target_string1 in item:
+                    split_position = item.index(target_string1)
+                    new_list.append(item[:split_position])
+                    new_list.append(item[split_position:])
+                elif target_string2 in item:
+                    split_position = item.index(target_string2)
+                    new_list.append(item[:split_position])
+                    new_list.append(item[split_position:])
+                else:
+                    new_list.append(item)
+            new_fields.append(new_list)
+    else:
+        new_fields = corona_fields
+    if split_word_number:
+        split_fields = []
+        for new_field in new_fields:
+            new_list= []
+             # 各要素を調べて、700文字以上であれば分割し、新しいリストに格納
+            for item in new_field:
+                item_length = len(item)
+                if item_length > split_number:
+                    num_parts = -(-item_length // split_number)  # 向上の除算を用いて分割数を計算
+                    for i in range(num_parts):
+                        start_index = i * split_number
+                        end_index = min((i + 1) * split_number, item_length)  # 文字列の終わりを超えないように調整
+                        new_list.append(item[start_index:end_index])
+                else:
+                    new_list.append(item)
+            split_fields.append(new_list)
+        new_fields = split_fields
+    return new_fields
+def print_agent_results(df, Ids,
+                        interesteds = ['Condition', 'BriefTitle', 'BriefSummary', 'EligibilityCriteria'],
+                       translater=None):
+    results = ""
+    for Id in Ids:
+        print("%s\n"%Id)
+        sdf = df[df['NCTId'] == Id]
+        for interested in interesteds:
+            # 最初の要素を取得
+            results += '%s: \n %s \n' % (interested, sdf[interested].iloc[0])
+            #print('%s: \n %s \n' % (interested, sdf[interested].iloc[0]))
+        if translater:
+            to_be_printed = translater.translate(results)
+        else:
+            to_be_printed =results
+        print(to_be_printed)
+def search(query):
+    Entrez.email = os.getenv('MAIL_ADRESS')
+    handle = Entrez.esearch(db='pubmed',
+                           sort = 'relevance',
+                           retmax = '20',
+                           retmode = 'xml',
+                           term = query)
+    results = Entrez.read(handle)
+    return results
+def fetch_details(id_list):
+    ids = ','.join(id_list)
+    Entrez.email = os.getenv('MAIL_ADRESS')
+    handle = Entrez.efetch(db = 'pubmed',
+                          retmode = 'xml',
+                          id = ids)
+    results = Entrez.read(handle)
+    return results
+'''def generate(prompt,engine=None):
+    if engine is None:
+        engine=gptengine
+    while True:  #OpenAI APIが落ちてる時に無限リトライするので注意
+        try:
+            response = openai.ChatCompletion.create(
+                model=engine,
+                messages=[
+                    {"role": "system", "content": "You are useful assistant"},
+                    {"role": "user", "content":prompt},
+                    ]
+            )
+            result=response["choices"][0]["message"]["content"]
+            return result
+        except Exception as e:
+            print(e)
+            print("リトライ")
+            time.sleep(30)
+            pass
+'''
+def generate(prompt,engine=None):
+    if engine is None:
+        engine=gptengine
+    while True:  #OpenAI APIが落ちてる時に無限リトライするので注意
+        try:
+            response = openai.chat.completions.create(
+                model=engine,
+                messages=[
+                    {"role": "system", "content": "You are useful assistant"},
+                    {"role": "user", "content":prompt},
+                    ]
+            )
+            #result=response["choices"][0]["message"]["content"]
+            result=response.choices[0].message.content
+            return result
+        except Exception as e:
+            print(e)
+            print("リトライ")
+            time.sleep(30)
+            pass
+def GetPubmedSummaryDf(studies):
+    title_list= []
+    abstract_list=[]
+    journal_list = []
+    language_list =[]
+    pubdate_year_list = []
+    pubdate_month_list = []
+    studiesIdList = studies['IdList']
+    chunk_size = 10000
+    for chunk_i in range(0, len(studiesIdList), chunk_size):
+        chunk = studiesIdList[chunk_i:chunk_i + chunk_size]
+        try:
+            papers = fetch_details(chunk)
+            for i, paper in enumerate(papers['PubmedArticle']):
+                title_list.append(paper['MedlineCitation']['Article']['ArticleTitle'])
+                try:
+                    abstract_list.append(paper['MedlineCitation']['Article']['Abstract']['AbstractText'][0])
+                except:
+                    abstract_list.append('No Abstract')
+                journal_list.append(paper['MedlineCitation']['Article']['Journal']['Title'])
+                language_list.append(paper['MedlineCitation']['Article']['Language'][0])
+                try:
+                    pubdate_year_list.append(paper['MedlineCitation']['Article']['Journal']['JournalIssue']['PubDate']['Year'])
+                except:
+                    pubdate_year_list.append('No Data')
+                try:
+                    pubdate_month_list.append(paper['MedlineCitation']['Article']['Journal']['JournalIssue']['PubDate']['Month'])
+                except:
+                    pubdate_month_list.append('No Data')
+        except: # occasionally a chunk might annoy your parser
+            pass
+    df = pd.DataFrame(list(zip(
+        title_list, abstract_list, journal_list, language_list, pubdate_year_list,
+                          pubdate_month_list)),
+                      columns=['Title', 'Abstract', 'Journal', 'Language', 'Year','Month'])
+    return df, abstract_list
+def ClinicalAgent(fileds, verbose=False):
+    df = pd.DataFrame.from_records(fileds[1:], columns=fileds[0])
+    return create_pandas_dataframe_agent(OpenAI(temperature=0, model='gpt-3.5-turbo-16k'), df, verbose=verbose)
+def GetNCTID(results):
+    # NCTで始まる単語を検索する正規表現
+    pattern = r'\bNCT\d+\b'
+    # 正規表現を使って単語を抽出
+    nct_words = re.findall(pattern,results)
+    return nct_words

OpenAITools/ReviewPaperTools.py ADDED Viewed

	@@ -0,0 +1,42 @@

+import re
+import pandas as pd
+def parse_text_file(text):
+    # セクションを分割するための正規表現パターンを定義
+    # \d+ は1つ以上の数字にマッチします
+    pattern = re.compile(r'\n\n\n\d+\.')
+    # テキストをセクションごとに分割
+    sections = pattern.split(text)[1:]  # 最初の空のセクションを除外
+    # 各セクションの前後の空白を削除
+    sections = [section.strip() for section in sections]
+    return sections
+def split_sections(text):
+    contents = text.split('\n\n')
+    contents = [section.strip() for section in contents if section.strip()]
+    if len(contents) == 8 :
+        keys = ['PublishInfo', 'Title', 'AuthorName', 'AuthorInfo', 'Abstract', 'Copyrights', 'DOI', 'COI']
+    elif len(contents) == 7 :
+        keys = ['PublishInfo', 'Title', 'AuthorName', 'AuthorInfo', 'Abstract', 'Copyrights', 'DOI']
+    elif len(contents) == 6:
+        keys = ['PublishInfo', 'Title', 'AuthorName', 'AuthorInfo', 'Abstract', 'DOI']
+    elif len(contents) == 5:
+        keys = ['PublishInfo', 'Title', 'AuthorName', 'Abstract', 'DOI']
+    # 辞書を作成し、キーが存在しない場合は空の文字列を設定
+    section_dict = {key: contents[i] if i < len(contents) else "" for i, key in enumerate(keys)}
+    return section_dict
+def GetSummaryDf(textdir):
+    with open(textdir, 'r', encoding='utf-8') as f:
+        content = f.read()
+    sections = parse_text_file(content)
+    dicts = []
+    for section in sections:
+        splited_dic = split_sections(section)
+        dicts.append(splited_dic)
+    return pd.DataFrame(dicts)

OpenAITools/scrapeThisData.py ADDED Viewed

	@@ -0,0 +1,237 @@

+from selenium import webdriver
+from selenium.webdriver.support.ui import Select
+from selenium.webdriver.common.by import By
+import requests
+from bs4 import BeautifulSoup
+import re
+import os
+import time
+from selenium.webdriver.support.ui import WebDriverWait
+from selenium.webdriver.common.by import By
+from selenium.webdriver.support import expected_conditions as EC
+from selenium.webdriver.common.action_chains import ActionChains
+import chromedriver_autoinstaller
+class ScrapeThatData:
+    def __init__(self, time_threshold = 10):
+        try:
+            chrome_options = webdriver.ChromeOptions()
+            chrome_options.add_argument('--no-sandbox')
+            self.driver = webdriver.Chrome(options=chrome_options)
+        except:
+            chromedriver_autoinstaller.install()
+            chrome_options = webdriver.ChromeOptions()
+            chrome_options.add_argument('--no-sandbox')
+            self.driver = webdriver.Chrome(options=chrome_options)
+        self.wait = WebDriverWait(self.driver,time_threshold)
+        self.attribute_dict = {'status':1 ,'conditions':2, 'interventions': 3, 'study type':4,
+                   'phase':5, 'sponsor':6, 'funder type':7 , 'study design': 8,
+                   'outcome measures':9, 'number enrolled':10, 'sex':11, 'age':12,
+                   'nct number': 13, 'other ids':14, 'title acronym': 15 , 'study start': 16,
+                   'primary completion': 17, 'study completion': 18 , 'first posted': 19,
+                   'last update posted': 20 , 'results first posted': 21 , 'locations':22, 'study documents': 23}
+        self.status_dict =     {'not yet recruiting' : 'notYetRecrCB',
+         'recruiting' : 'recruitingCB',
+         'enrolling by invitation':'enrollingByInvCB',
+         'active, not recruiting': 'activeCB',
+         'suspended': 'suspendedCB',
+         'terminated':'terminatedCB',
+         'completed':'completedCB',
+         'withdrawn': 'withdrawnCB',
+         'unknown status': 'unknownCB'}
+    def clicking_show_hide_cols(self, driver):
+        columns = driver.find_element(By.XPATH,'//*[@id="theDataTable_wrapper"]/div[3]/button')
+        action_chain = ActionChains(driver)
+        action_chain.move_to_element(columns).click()
+        action_chain.perform()
+    def select_attributes_to_show(self, listed_attributes, attribute_dict):
+        ll = [value.lower() for value in listed_attributes if value.lower() in ['status', 'conditions', 'interventions', 'locations']]
+        if ll:
+            to_show = [value.lower() for  value in listed_attributes if value.lower() not in ll]
+            to_hide = [value for value in ['status', 'conditions', 'interventions', 'locations'] if value not in ll]
+            to_click = to_hide + to_show
+            for att in to_click:
+                self.clicking_show_hide_cols(self.driver)
+                time.sleep(1)
+                self.wait.until(EC.presence_of_element_located((By.XPATH,'//*[@id="theDataTable_wrapper"]/div[3]/div[2]/button['+ str(attribute_dict[att]) + ']'))).click()
+                time.sleep(1)
+        else:
+            for att in listed_attributes:
+                self.clicking_show_hide_cols(self.driver)
+                time.sleep(1)
+                self.wait.until(EC.presence_of_element_located((By.XPATH,'//*[@id="theDataTable_wrapper"]/div[3]/div[2]/button['+ str(attribute_dict[att.lower()]) + ']'))).click()
+                time.sleep(1)
+    def select_by_status(self, listed_states, status_dict):
+        if listed_states:
+            for status in listed_states:
+                self.driver.find_element(By.ID,status_dict[status.lower()]).click()
+            self.driver.find_element(By.XPATH,'//*[@id="FiltersBody"]/div[1]/input[1]').click()
+            time.sleep(3)
+        select = Select(self.driver.find_element_by_name('theDataTable_length'))
+        select.select_by_value('100')
+    def collect_data_search_page(self,l_ordered, amount_of_data = None):
+        class_name = ''
+        page_index = 1
+        elements = [l_ordered]
+        while 'disabled' not in class_name :
+            time.sleep(10)
+            print('Getting data from page {}'.format(page_index))
+            #Counting how many rows of the table appear
+            table = self.driver.find_element(By.ID,'theDataTable')
+            row_count = len(table.find_elements(By.TAG_NAME,"tr"))
+            #Looping table page
+            for index in range(1, row_count):
+                row = []
+                if 'status' in l_ordered:
+                    self.wait.until(EC.presence_of_element_located((By.CSS_SELECTOR,'#theDataTable > tbody > tr:nth-child('+str(index)+') > td:nth-child(3)')))
+                    status_element = self.driver.find_elements(By.CLASS_NAME,'#theDataTable > tbody > tr:nth-child('+str(index)+') > td:nth-child(3) > span')
+                    row.append(status_element.text.strip())
+                    for i, val  in enumerate(l_ordered):
+                        if val == 'status':
+                            continue
+                        self.wait.until(EC.presence_of_element_located((By.CSS_SELECTOR,'#theDataTable > tbody > tr:nth-child('+str(index)+') > td:nth-child('+str(4+i)+')')))
+                        element = self.driver.find_elements(By.CLASS_NAME,'#theDataTable > tbody > tr:nth-child('+str(index)+') > td:nth-child('+str(4+i)+')')
+                        try:
+                            row.append(element.text.strip())
+                        except:
+                            print(i, element)
+                else:
+                    for i, val  in enumerate(l_ordered):
+                        self.wait.until(EC.presence_of_element_located((By.CSS_SELECTOR,'#theDataTable > tbody > tr:nth-child('+str(index)+') > td:nth-child('+str(3+i)+')')))
+                        element = self.driver.find_elements(By.CLASS_NAME,'#theDataTable > tbody > tr:nth-child('+str(index)+') > td:nth-child('+str(3+i)+')')
+                        try:
+                            row.append(element.text.strip())
+                        except:
+                            print(i, element)
+                elements.append(row)
+            #Getting next page button
+            next_page= self.driver.find_element(By.ID,"theDataTable_next")
+            #Getting the class attribute of the next page button
+            class_name = next_page.get_attribute('class')
+            #Going to the next page
+            next_page.click()
+            page_index += 1
+            if amount_of_data:
+                if len(elements) >= amount_of_data or row_count < amount_of_data :
+                    break
+                else:
+                    continue
+        return elements
+    def get_criteria(self, NCTnumber):
+        url = 'https://clinicaltrials.gov/ct2/show/' + NCTnumber
+        ClinicalTrialpage = requests.get(url)
+        soup = BeautifulSoup(ClinicalTrialpage.text, 'html.parser')
+        wrapping_crit_class = soup.find_all("div", {"class": "tr-indent2"})
+        list_elements = wrapping_crit_class[1].find_all(re.compile("(ul|ol)"))
+        inclusion, exclusion  = ('','')
+        if not list_elements:
+            print ("WARNING: Study number " + NCTnumber + " doesn't have eligibility criteria or HTML tag format is not a list")
+        else:
+            if len(list_elements) == 1:
+                try:
+                    if wrapping_crit_class[1].find(text = 'Inclusion Criteria:'):
+                        inclusion = list_elements[0].find_all("li")
+                    elif wrapping_crit_class[1].find(text = 'Exclusion Criteria:'):
+                        exclusion = list_elements[0].find_all("li")
+                except:
+                    print('criteria doesnt exist')
+            else:
+                inclusion = list_elements[0].find_all("li")
+                exclusion = list_elements[1].find_all("li")
+        inclusion = ' '.join([t.text.strip() for t in inclusion ])
+        exclusion = ' '.join([t.text.strip() for t in exclusion ])
+        return(inclusion, exclusion)
+#function that gets number of patients enrolled in a study
+    def get_enrollment (self, NCTnumber):
+        url = 'https://clinicaltrials.gov/ct2/show/' + NCTnumber
+        ClinicalTrialpage = requests.get(url)
+        soup = BeautifulSoup(ClinicalTrialpage.text, 'html.parser')
+        enrollment = ''
+        wrapping_enrol_class = soup.find_all('td', {'headers':'studyInfoColData','style':"padding-left:1em"})
+        if not wrapping_enrol_class:
+            print('WARNING: Number of Participants in Study number '+ NCTnumber +' is unavailable')
+        else:
+            enrollment = wrapping_enrol_class[1]
+            enrollment = enrollment.text.split()[0]
+            if enrollment.isdigit() == False:
+                print ('WARNING: Number of Participants in Study number '+ NCTnumber +' is unavailable')
+            else:
+                return(enrollment)
+    def __call__(self, condition, listed_attributes, listed_states, amount_of_data):
+        self.driver.get('https://clinicaltrials.gov/ct2/results?cond=' + condition + '&rank=1&view=record#rowId0')
+        self.select_attributes_to_show(listed_attributes, self.attribute_dict)
+        try:
+            self.select_by_status(listed_states, self.status_dict)
+        except:
+            print('select by status is a problem')
+        n = []
+        for i in listed_attributes:
+            n.append(self.attribute_dict[i.lower()])
+        attribute_ordered = [list(self.attribute_dict.keys())[list(self.attribute_dict.values()).index(i)]for i in sorted(n)]
+        search_data = self.collect_data_search_page(attribute_ordered, amount_of_data=amount_of_data)
+        nct_numbers = [e[search_data[0].index('nct number')] for e in search_data[1:]]
+        search_data[0].extend(['inclusion', 'exclusion', 'enrollment'])
+        for index, nct in enumerate(nct_numbers):
+            if index % 100 == 0 and index!= 0:
+                print("Collected Data from {} Studies: ".format(index))
+            inc, exc = self.get_criteria(nct)
+            enrol = self.get_enrollment(nct)
+            search_data[index + 1].extend([inc, exc, enrol])
+        return search_data
+#     except:
+#         print('no data available with the specified status')

README.md CHANGED Viewed

@@ -1,10 +1,10 @@
 ---
 title: GetMutationInfo
-emoji: 💻
-colorFrom: pink
-colorTo: red
 sdk: gradio
-sdk_version: 4.36.0
 app_file: app.py
 pinned: false
 ---

 ---
 title: GetMutationInfo
+emoji: 🔥
+colorFrom: green
+colorTo: green
 sdk: gradio
+sdk_version: 4.33.0
 app_file: app.py
 pinned: false
 ---

app.py ADDED Viewed

	@@ -0,0 +1,51 @@

+from OpenAITools.ExpertTools import GetPubmedSummaryDf, generate, search
+from llama_index.core import VectorStoreIndex, SimpleDirectoryReader, SummaryIndex
+from llama_index.core import Document
+from llama_index.llms.groq import Groq
+from llama_index.core import ServiceContext, set_global_service_context
+from llama_index.llms.llama_cpp.llama_utils import messages_to_prompt, completion_to_prompt
+import gradio as gr
+#models
+LLAMA3_8B = "Llama3-8b-8192"
+LLAMA3_70B = "Llama3-70b-8192"
+Mixtral  = "mixtral-8x7b-32768"
+def custom_completion_to_prompt(completion:str) ->str:
+    return completion_to_prompt(
+        completion, system_prompt=(
+            "You are a Q&A assistant. Your goal is to answer questions as "
+            "accurately as possible is the instructions and context provided."
+        ),
+    )
+def getMutationEffect(cancer_name, gene_name):
+    searchWords=  "(" +str(cancer_name)+ ") AND " + "(" + str(gene_name) + ") AND(treatment)"
+    studies = search(searchWords)
+    df, abstracts= GetPubmedSummaryDf(studies)
+    #Define LLM
+    llm = Groq(
+    model=LLAMA3_8B,
+    temperature=0.01,
+    context_window=4096,
+    completion_to_prompt=custom_completion_to_prompt,
+    messages_to_prompt=messages_to_prompt,)
+    #set global service context
+    ctx = ServiceContext.from_defaults(llm=llm)
+    set_global_service_context(ctx)
+    documents = [Document(text=t) for t in abstracts[:10]]
+    index = SummaryIndex.from_documents(documents)
+    query_engine = index.as_query_engine(response_mode="tree_summarize")
+    prompt = "Please prepare a single summary of the abstracts of the following papers. Pay particular attention to the {} gene".format(gene_name)
+    response = query_engine.query(prompt)
+    return response
+demo = gr.Interface(fn=getMutationEffect,
+                    inputs=[gr.Textbox(label="CancerName"),
+                            gr.Textbox(label="GeneName"),
+                    ],
+                    outputs="text")
+if __name__ == "__main__":
+    demo.launch()

environment.yml ADDED Viewed

	@@ -0,0 +1,369 @@

+name: gradio
+channels:
+  - conda-forge
+dependencies:
+  - aiofiles=23.2.1
+  - altair=5.3.0
+  - annotated-types=0.7.0
+  - anyio=4.3.0
+  - aom=3.9.0
+  - appnope=0.1.4
+  - argon2-cffi=23.1.0
+  - argon2-cffi-bindings=21.2.0
+  - arrow=1.3.0
+  - asttokens=2.4.1
+  - async-lru=2.0.4
+  - attrs=23.2.0
+  - babel=2.14.0
+  - beautifulsoup4=4.12.3
+  - bleach=6.1.0
+  - blinker=1.8.2
+  - brotli=1.1.0
+  - brotli-bin=1.1.0
+  - brotli-python=1.1.0
+  - bzip2=1.0.8
+  - ca-certificates=2024.6.2
+  - cached-property=1.5.2
+  - cached_property=1.5.2
+  - cairo=1.18.0
+  - certifi=2024.2.2
+  - cffi=1.16.0
+  - charset-normalizer=3.3.2
+  - click=8.1.7
+  - colorama=0.4.6
+  - comm=0.2.2
+  - contourpy=1.2.1
+  - cycler=0.12.1
+  - dav1d=1.2.1
+  - debugpy=1.8.1
+  - decorator=5.1.1
+  - defusedxml=0.7.1
+  - dnspython=2.6.1
+  - email-validator=2.1.1
+  - email_validator=2.1.1
+  - entrypoints=0.4
+  - exceptiongroup=1.2.0
+  - executing=2.0.1
+  - expat=2.6.2
+  - fastapi=0.111.0
+  - fastapi-cli=0.0.4
+  - ffmpeg=7.0.1
+  - ffmpy=0.3.0
+  - filelock=3.14.0
+  - flask=3.0.3
+  - font-ttf-dejavu-sans-mono=2.37
+  - font-ttf-inconsolata=3.000
+  - font-ttf-source-code-pro=2.038
+  - font-ttf-ubuntu=0.83
+  - fontconfig=2.14.2
+  - fonts-conda-ecosystem=1
+  - fonts-conda-forge=1
+  - fonttools=4.53.0
+  - fqdn=1.5.1
+  - freetype=2.12.1
+  - fribidi=1.0.10
+  - fsspec=2024.6.0
+  - gettext=0.22.5
+  - gettext-tools=0.22.5
+  - gmp=6.3.0
+  - gnutls=3.7.9
+  - gradio=4.33.0
+  - gradio-client=0.17.0
+  - graphite2=1.3.13
+  - h11=0.14.0
+  - h2=4.1.0
+  - harfbuzz=8.5.0
+  - hpack=4.0.0
+  - httpcore=1.0.5
+  - httpx=0.27.0
+  - huggingface_hub=0.23.2
+  - hyperframe=6.0.1
+  - icu=73.2
+  - idna=3.7
+  - importlib-metadata=7.1.0
+  - importlib-resources=6.4.0
+  - importlib_metadata=7.1.0
+  - importlib_resources=6.4.0
+  - ipykernel=6.29.3
+  - ipython=8.25.0
+  - ipywidgets=8.1.3
+  - isoduration=20.11.0
+  - itsdangerous=2.2.0
+  - jedi=0.19.1
+  - jinja2=3.1.4
+  - joblib=1.4.2
+  - json5=0.9.25
+  - jsonpointer=2.4
+  - jsonschema=4.22.0
+  - jsonschema-specifications=2023.12.1
+  - jsonschema-with-format-nongpl=4.22.0
+  - jupyter=1.0.0
+  - jupyter-lsp=2.2.5
+  - jupyter_client=8.6.2
+  - jupyter_console=6.6.3
+  - jupyter_core=5.7.2
+  - jupyter_events=0.10.0
+  - jupyter_server=2.14.1
+  - jupyter_server_terminals=0.5.3
+  - jupyterlab=4.2.1
+  - jupyterlab_pygments=0.3.0
+  - jupyterlab_server=2.27.2
+  - jupyterlab_widgets=3.0.11
+  - kiwisolver=1.4.5
+  - krb5=1.21.2
+  - lame=3.100
+  - lcms2=2.16
+  - lerc=4.0.0
+  - libabseil=20240116.2
+  - libasprintf=0.22.5
+  - libasprintf-devel=0.22.5
+  - libass=0.17.1
+  - libblas=3.9.0
+  - libbrotlicommon=1.1.0
+  - libbrotlidec=1.1.0
+  - libbrotlienc=1.1.0
+  - libcblas=3.9.0
+  - libcxx=17.0.6
+  - libdeflate=1.20
+  - libedit=3.1.20191231
+  - libexpat=2.6.2
+  - libffi=3.4.2
+  - libgettextpo=0.22.5
+  - libgettextpo-devel=0.22.5
+  - libgfortran=5.0.0
+  - libgfortran5=13.2.0
+  - libglib=2.80.2
+  - libhwloc=2.10.0
+  - libiconv=1.17
+  - libidn2=2.3.7
+  - libintl=0.22.5
+  - libintl-devel=0.22.5
+  - libjpeg-turbo=3.0.0
+  - liblapack=3.9.0
+  - libopenblas=0.3.27
+  - libopenvino=2024.1.0
+  - libopenvino-arm-cpu-plugin=2024.1.0
+  - libopenvino-auto-batch-plugin=2024.1.0
+  - libopenvino-auto-plugin=2024.1.0
+  - libopenvino-hetero-plugin=2024.1.0
+  - libopenvino-ir-frontend=2024.1.0
+  - libopenvino-onnx-frontend=2024.1.0
+  - libopenvino-paddle-frontend=2024.1.0
+  - libopenvino-pytorch-frontend=2024.1.0
+  - libopenvino-tensorflow-frontend=2024.1.0
+  - libopenvino-tensorflow-lite-frontend=2024.1.0
+  - libopus=1.3.1
+  - libpng=1.6.43
+  - libprotobuf=4.25.3
+  - libsodium=1.0.18
+  - libsqlite=3.45.3
+  - libtasn1=4.19.0
+  - libtiff=4.6.0
+  - libunistring=0.9.10
+  - libvpx=1.14.0
+  - libwebp-base=1.4.0
+  - libxcb=1.15
+  - libxml2=2.12.7
+  - libzlib=1.3.1
+  - llvm-openmp=18.1.6
+  - markdown-it-py=3.0.0
+  - markupsafe=2.1.5
+  - matplotlib=3.8.4
+  - matplotlib-base=3.8.4
+  - matplotlib-inline=0.1.7
+  - mdurl=0.1.2
+  - mistune=3.0.2
+  - munkres=1.1.4
+  - natsort=8.4.0
+  - nbclient=0.10.0
+  - nbconvert=7.16.4
+  - nbconvert-core=7.16.4
+  - nbconvert-pandoc=7.16.4
+  - nbformat=5.10.4
+  - ncurses=6.5
+  - nest-asyncio=1.6.0
+  - nettle=3.9.1
+  - notebook=7.2.0
+  - notebook-shim=0.2.4
+  - numpy=1.26.4
+  - openh264=2.4.1
+  - openjpeg=2.5.2
+  - openssl=3.3.1
+  - orjson=3.10.3
+  - overrides=7.7.0
+  - p11-kit=0.24.1
+  - pandas=2.2.2
+  - pandoc=3.2
+  - pandocfilters=1.5.0
+  - parso=0.8.4
+  - patsy=0.5.6
+  - pcre2=10.43
+  - pexpect=4.9.0
+  - pickleshare=0.7.5
+  - pillow=10.3.0
+  - pip=24.0
+  - pixman=0.43.4
+  - pkgutil-resolve-name=1.3.10
+  - platformdirs=4.2.2
+  - prometheus_client=0.20.0
+  - prompt-toolkit=3.0.42
+  - prompt_toolkit=3.0.42
+  - psutil=5.9.8
+  - pthread-stubs=0.4
+  - ptyprocess=0.7.0
+  - pugixml=1.14
+  - pure_eval=0.2.2
+  - pycparser=2.22
+  - pydantic=2.7.3
+  - pydantic-core=2.18.4
+  - pydub=0.25.1
+  - pygments=2.18.0
+  - pyobjc-core=10.2
+  - pyobjc-framework-cocoa=10.2
+  - pyparsing=3.1.2
+  - pysocks=1.7.1
+  - python=3.12.3
+  - python-dateutil=2.9.0
+  - python-fastjsonschema=2.19.1
+  - python-json-logger=2.0.7
+  - python-multipart=0.0.9
+  - python-tzdata=2024.1
+  - python_abi=3.12
+  - pytz=2024.1
+  - pyyaml=6.0.1
+  - pyzmq=26.0.3
+  - qtconsole-base=5.5.2
+  - qtpy=2.4.1
+  - readline=8.2
+  - referencing=0.35.1
+  - requests=2.32.3
+  - rfc3339-validator=0.1.4
+  - rfc3986-validator=0.1.1
+  - rich=13.7.1
+  - rpds-py=0.18.1
+  - ruff=0.4.7
+  - scikit-learn=1.5.0
+  - scipy=1.13.1
+  - seaborn=0.13.2
+  - seaborn-base=0.13.2
+  - semantic_version=2.10.0
+  - send2trash=1.8.3
+  - setuptools=70.0.0
+  - shellingham=1.5.4
+  - six=1.16.0
+  - snappy=1.2.0
+  - sniffio=1.3.1
+  - soupsieve=2.5
+  - stack_data=0.6.2
+  - starlette=0.37.2
+  - statsmodels=0.14.2
+  - svt-av1=2.1.0
+  - tbb=2021.12.0
+  - terminado=0.18.1
+  - threadpoolctl=3.5.0
+  - tinycss2=1.3.0
+  - tk=8.6.13
+  - tomli=2.0.1
+  - tomlkit=0.12.0
+  - toolz=0.12.1
+  - tornado=6.4
+  - tqdm=4.66.4
+  - traitlets=5.14.3
+  - typer=0.12.3
+  - typer-slim=0.12.3
+  - typer-slim-standard=0.12.3
+  - types-python-dateutil=2.9.0.20240316
+  - typing-extensions=4.12.1
+  - typing_extensions=4.12.1
+  - typing_utils=0.1.0
+  - tzdata=2024a
+  - ujson=5.10.0
+  - uri-template=1.3.0
+  - urllib3=2.2.1
+  - uvicorn=0.30.1
+  - wcwidth=0.2.13
+  - webcolors=1.13
+  - webencodings=0.5.1
+  - websocket-client=1.8.0
+  - websockets=11.0.3
+  - werkzeug=3.0.3
+  - wheel=0.43.0
+  - widgetsnbextension=4.0.11
+  - wikipedia=1.4.0
+  - wtforms=3.1.2
+  - x264=1!164.3095
+  - x265=3.5
+  - xorg-libxau=1.0.11
+  - xorg-libxdmcp=1.1.3
+  - xz=5.2.6
+  - yaml=0.2.5
+  - zeromq=4.3.5
+  - zipp=3.17.0
+  - zlib=1.3.1
+  - zstd=1.5.6
+  - pip:
+      - aiohttp==3.9.5
+      - aiosignal==1.3.1
+      - bio==1.7.1
+      - biopython==1.83
+      - biothings-client==0.3.1
+      - dataclasses-json==0.6.6
+      - deprecated==1.2.14
+      - dirtyjson==1.0.8
+      - diskcache==5.6.3
+      - distro==1.9.0
+      - frozenlist==1.4.1
+      - gprofiler-official==1.0.0
+      - greenlet==3.0.3
+      - jsonpatch==1.33
+      - langchain==0.2.2
+      - langchain-community==0.2.2
+      - langchain-core==0.2.4
+      - langchain-experimental==0.0.60
+      - langchain-openai==0.1.8
+      - langchain-text-splitters==0.2.1
+      - langsmith==0.1.71
+      - llama-cpp-python==0.2.77
+      - llama-index==0.10.43
+      - llama-index-agent-openai==0.2.7
+      - llama-index-cli==0.1.12
+      - llama-index-core==0.10.43
+      - llama-index-embeddings-openai==0.1.10
+      - llama-index-indices-managed-llama-cloud==0.1.6
+      - llama-index-legacy==0.9.48
+      - llama-index-llms-groq==0.1.4
+      - llama-index-llms-llama-cpp==0.1.3
+      - llama-index-llms-openai==0.1.22
+      - llama-index-llms-openai-like==0.1.3
+      - llama-index-llms-replicate==0.1.3
+      - llama-index-multi-modal-llms-openai==0.1.6
+      - llama-index-program-openai==0.1.6
+      - llama-index-question-gen-openai==0.1.3
+      - llama-index-readers-file==0.1.23
+      - llama-index-readers-llama-parse==0.1.4
+      - llama-parse==0.4.4
+      - llamaindex-py-client==0.1.19
+      - marshmallow==3.21.2
+      - multidict==6.0.5
+      - mygene==3.2.2
+      - mypy-extensions==1.0.0
+      - networkx==3.3
+      - nltk==3.8.1
+      - openai==1.31.0
+      - packaging==23.2
+      - pooch==1.8.1
+      - pypdf==4.2.0
+      - pytrials==1.0.0
+      - regex==2024.5.15
+      - replicate==0.26.0
+      - safetensors==0.4.3
+      - sqlalchemy==2.0.30
+      - striprtf==0.0.26
+      - tenacity==8.3.0
+      - tiktoken==0.7.0
+      - tokenizers==0.19.1
+      - transformers==4.41.2
+      - typing-inspect==0.9.0
+      - wrapt==1.16.0
+      - yarl==1.9.4
+prefix: /Users/satoc/miniforge3/envs/gradio

requirements.txt ADDED Viewed

	@@ -0,0 +1,71 @@

+aiohttp==3.9.5
+aiosignal==1.3.1
+bio==1.7.1
+biopython==1.83
+biothings-client==0.3.1
+dataclasses-json==0.6.6
+Deprecated==1.2.14
+dirtyjson==1.0.8
+diskcache==5.6.3
+distro==1.9.0
+frozenlist==1.4.1
+gprofiler-official==1.0.0
+greenlet==3.0.3
+hpack==4.0.0
+jsonpatch==1.33
+langchain==0.2.2
+langchain-community==0.2.2
+langchain-core==0.2.4
+langchain-experimental==0.0.60
+langchain-openai==0.1.8
+langchain-text-splitters==0.2.1
+langsmith==0.1.71
+llama-index==0.10.43
+llama-index-agent-openai==0.2.7
+llama-index-cli==0.1.12
+llama-index-core==0.10.43
+llama-index-embeddings-openai==0.1.10
+llama-index-indices-managed-llama-cloud==0.1.6
+llama-index-legacy==0.9.48
+llama-index-llms-groq==0.1.4
+llama-index-llms-llama-cpp==0.1.3
+llama-index-llms-openai==0.1.22
+llama-index-llms-openai-like==0.1.3
+llama-index-llms-replicate==0.1.3
+llama-index-multi-modal-llms-openai==0.1.6
+llama-index-program-openai==0.1.6
+llama-index-question-gen-openai==0.1.3
+llama-index-readers-file==0.1.23
+llama-index-readers-llama-parse==0.1.4
+llama-parse==0.4.4
+llama_cpp_python==0.2.77
+llamaindex-py-client==0.1.19
+marshmallow==3.21.2
+multidict==6.0.5
+munkres==1.1.4
+mygene==3.2.2
+mypy-extensions==1.0.0
+networkx==3.3
+nltk
+openai
+packaging==23.2
+pooch==1.8.1
+pypdf==4.2.0
+pytrials==1.0.0
+regex==2024.5.15
+replicate==0.26.0
+safetensors
+setuptools==70.0.0
+SQLAlchemy==2.0.30
+striprtf==0.0.26
+tenacity==8.3.0
+tiktoken==0.7.0
+tokenizers==0.19.1
+transformers==4.41.2
+typer==0.12.3
+typer-slim==0.12.3
+typing-inspect==0.9.0
+wheel==0.43.0
+wikipedia==1.4.0
+wrapt==1.16.0
+yarl==1.9.4