Spaces:

Satoc
/

ClinicalTrialV2

Running

File size: 10,187 Bytes
{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/var/folders/yw/qz00x75d7kb98f7vm8dkhkvw0000gn/T/ipykernel_57352/998829134.py:6: LangChainDeprecationWarning: As of langchain-core 0.3.0, LangChain uses pydantic v2 internally. The langchain_core.pydantic_v1 module was a compatibility shim for pydantic v1, and should no longer be used. Please update the code to import from Pydantic directly.\n",
      "\n",
      "For example, replace imports like: `from langchain_core.pydantic_v1 import BaseModel`\n",
      "with: `from pydantic import BaseModel`\n",
      "or the v1 compatibility namespace if you are working in a code base that has not been fully upgraded to pydantic 2 yet. \tfrom pydantic.v1 import BaseModel\n",
      "\n",
      "  from OpenAITools.CrinicalTrialTools import SimpleClinicalTrialAgent, GraderAgent, LLMTranslator, generate_ex_question_English\n",
      "/Users/satoc/miniforge3/envs/gradio/lib/python3.12/site-packages/transformers/tokenization_utils_base.py:1617: FutureWarning: `clean_up_tokenization_spaces` was not set. It will be set to `True` by default. This behavior will be deprecated in transformers v4.45, and will be then set to `False` by default. For more details check this issue: https://github.com/huggingface/transformers/issues/31884\n",
      "  warnings.warn(\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "* Running on local URL:  http://127.0.0.1:7861\n",
      "\n",
      "To create a public link, set `share=True` in `launch()`.\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div><iframe src=\"http://127.0.0.1:7861/\" width=\"100%\" height=\"500\" allow=\"autoplay; camera; microphone; clipboard-read; clipboard-write;\" frameborder=\"0\" allowfullscreen></iframe></div>"
      ],
      "text/plain": [
       "<IPython.core.display.HTML object>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/plain": []
     },
     "execution_count": 1,
     "metadata": {},
     "output_type": "execute_result"
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/var/folders/yw/qz00x75d7kb98f7vm8dkhkvw0000gn/T/ipykernel_57352/998829134.py:29: SettingWithCopyWarning: \n",
      "A value is trying to be set on a copy of a slice from a DataFrame.\n",
      "Try using .loc[row_indexer,col_indexer] = value instead\n",
      "\n",
      "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
      "  df['AgentJudgment'] = None\n",
      "/var/folders/yw/qz00x75d7kb98f7vm8dkhkvw0000gn/T/ipykernel_57352/998829134.py:30: SettingWithCopyWarning: \n",
      "A value is trying to be set on a copy of a slice from a DataFrame.\n",
      "Try using .loc[row_indexer,col_indexer] = value instead\n",
      "\n",
      "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
      "  df['AgentGrade'] = None\n"
     ]
    }
   ],
   "source": [
    "import gradio as gr\n",
    "import pandas as pd\n",
    "from OpenAITools.FetchTools import fetch_clinical_trials\n",
    "from langchain_openai import ChatOpenAI\n",
    "from langchain_groq import ChatGroq\n",
    "from OpenAITools.CrinicalTrialTools import SimpleClinicalTrialAgent, GraderAgent, LLMTranslator, generate_ex_question_English\n",
    "from OpenAITools.JRCTTools import get_matched_df,GetJRCTCriteria\n",
    "from sentence_transformers import SentenceTransformer\n",
    "from sentence_transformers import util\n",
    "\n",
    "# モデルとエージェントの初期化\n",
    "groq = ChatGroq(model_name=\"llama3-70b-8192\", temperature=0)\n",
    "translator = LLMTranslator(groq)\n",
    "CriteriaCheckAgent = SimpleClinicalTrialAgent(groq)\n",
    "grader_agent = GraderAgent(groq)\n",
    "selectionModel =  SentenceTransformer('pritamdeka/S-PubMedBert-MS-MARCO')\n",
    "\n",
    "# データフレームを生成する関数\n",
    "def generate_dataframe(age, sex, tumor_type, GeneMutation, Meseable, Biopsiable):\n",
    "    # 日本語の腫瘍タイプを英語に翻訳\n",
    "    TumorName = translator.translate(tumor_type)\n",
    "\n",
    "    # 質問文を生成\n",
    "    ex_question = generate_ex_question_English(age, sex, TumorName, GeneMutation, Meseable, Biopsiable)\n",
    "    \n",
    "    # 臨床試験データの取得\n",
    "    basedf = pd.read_csv(\"../ClinicalTrialCSV/JRCT20241215CancerPost.csv\", index_col=0)\n",
    "    df = get_matched_df(basedf=basedf, query=TumorName, model=selectionModel, threshold=0.925)\n",
    "    df['AgentJudgment'] = None\n",
    "    df['AgentGrade'] = None\n",
    "    \n",
    "    # 臨床試験の適格性の評価\n",
    "    progress = gr.Progress(track_tqdm=True)\n",
    "    for i in range(len(df)):\n",
    "        TargetCriteria = GetJRCTCriteria(df, i)\n",
    "        AgentJudgment = CriteriaCheckAgent.evaluate_eligibility(TargetCriteria, ex_question)\n",
    "        AgentGrade = grader_agent.evaluate_eligibility(AgentJudgment)\n",
    "        # df.locを使って値を代入（行・列名で指定）\n",
    "        df.loc[df.index[i], 'AgentJudgment'] = AgentJudgment\n",
    "        df.loc[df.index[i], 'AgentGrade'] = AgentGrade\n",
    "        progress((i + 1) / len(df))\n",
    "    \n",
    "    # 列を指定した順に並び替え\n",
    "    columns_order = ['JRCT ID', 'Title', '研究・治験の目的','AgentJudgment', 'AgentGrade','主たる選択基準', '主たる除外基準','Inclusion Criteria','Exclusion Criteria','NCT No', 'JapicCTI No']\n",
    "    df = df[columns_order]\n",
    "        \n",
    "    return df, df  # フィルタ用と表示用にデータフレームを返す\n",
    "\n",
    "# 特定のAgentGrade（yes, no, unclear）に基づいて行をフィルタリングする関数\n",
    "def filter_rows_by_grade(original_df, grade):\n",
    "    df_filtered = original_df[original_df['AgentGrade'] == grade]\n",
    "    return df_filtered, df_filtered\n",
    "\n",
    "# CSVとして保存しダウンロードする関数\n",
    "def download_filtered_csv(df):\n",
    "    file_path = \"filtered_data.csv\"\n",
    "    df.to_csv(file_path, index=False)\n",
    "    return file_path\n",
    "\n",
    "# 全体結果をCSVとして保存しダウンロードする関数\n",
    "def download_full_csv(df):\n",
    "    file_path = \"full_data.csv\"\n",
    "    df.to_csv(file_path, index=False)\n",
    "    return file_path\n",
    "\n",
    "# Gradioインターフェースの作成\n",
    "with gr.Blocks() as demo:\n",
    "    gr.Markdown(\"## 臨床試験適格性評価インターフェース\")\n",
    "\n",
    "    # 各種入力フィールド\n",
    "    age_input = gr.Textbox(label=\"Age\", placeholder=\"例: 65\")\n",
    "    sex_input = gr.Dropdown(choices=[\"男性\", \"女性\"], label=\"Sex\")\n",
    "    tumor_type_input = gr.Textbox(label=\"Tumor Type\", placeholder=\"例: gastric cancer, 日本でも良いですが英語の方が精度が高いです。\")\n",
    "    gene_mutation_input = gr.Textbox(label=\"Gene Mutation\", placeholder=\"例: HER2\")\n",
    "    measurable_input = gr.Dropdown(choices=[\"有り\", \"無し\", \"不明\"], label=\"Measurable Tumor\")\n",
    "    biopsiable_input = gr.Dropdown(choices=[\"有り\", \"無し\", \"不明\"], label=\"Biopsiable Tumor\")\n",
    "\n",
    "    # データフレーム表示エリア\n",
    "    dataframe_output = gr.DataFrame()\n",
    "    original_df = gr.State()\n",
    "    filtered_df = gr.State()\n",
    "\n",
    "    # データフレーム生成ボタン\n",
    "    generate_button = gr.Button(\"Generate Clinical Trials Data\")\n",
    "\n",
    "    # フィルタリングボタン\n",
    "    yes_button = gr.Button(\"Show Eligible Trials\")\n",
    "    no_button = gr.Button(\"Show Ineligible Trials\")\n",
    "    unclear_button = gr.Button(\"Show Unclear Trials\")\n",
    "    \n",
    "    # ダウンロードボタン\n",
    "    download_filtered_button = gr.Button(\"Download Filtered Data\")\n",
    "    download_filtered_output = gr.File(label=\"Download Filtered Data\")\n",
    "\n",
    "    download_full_button = gr.Button(\"Download Full Data\")\n",
    "    download_full_output = gr.File(label=\"Download Full Data\")\n",
    "\n",
    "\n",
    "    # ボタン動作の設定\n",
    "    generate_button.click(fn=generate_dataframe, inputs=[age_input, sex_input, tumor_type_input, gene_mutation_input, measurable_input, biopsiable_input], outputs=[dataframe_output, original_df])\n",
    "    yes_button.click(fn=filter_rows_by_grade, inputs=[original_df, gr.State(\"yes\")], outputs=[dataframe_output, filtered_df])\n",
    "    no_button.click(fn=filter_rows_by_grade, inputs=[original_df, gr.State(\"no\")], outputs=[dataframe_output, filtered_df])\n",
    "    unclear_button.click(fn=filter_rows_by_grade, inputs=[original_df, gr.State(\"unclear\")], outputs=[dataframe_output, filtered_df])\n",
    "    download_filtered_button.click(fn=download_filtered_csv, inputs=filtered_df, outputs=download_filtered_output)\n",
    "    download_full_button.click(fn=download_full_csv, inputs=original_df, outputs=download_full_output)\n",
    "\n",
    "\n",
    "# インターフェースの起動\n",
    "demo.launch()\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "gradio",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.12.3"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}