Spaces:
Running
Running
高橋慧
commited on
Commit
·
c308431
1
Parent(s):
5e2744f
stage3a
Browse files- app.py +141 -26
- requirements.txt +1 -26
- requirements_stage3b.txt +28 -0
app.py
CHANGED
@@ -3,19 +3,29 @@ import pandas as pd
|
|
3 |
import time
|
4 |
import traceback
|
5 |
import os
|
|
|
|
|
|
|
|
|
|
|
6 |
|
7 |
-
# 完全版のimportエラー対策
|
8 |
try:
|
9 |
-
from OpenAITools.FetchTools import fetch_clinical_trials
|
10 |
-
from langchain_openai import ChatOpenAI
|
11 |
from langchain_groq import ChatGroq
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
12 |
from OpenAITools.CrinicalTrialTools import SimpleClinicalTrialAgent, GraderAgent, LLMTranslator, generate_ex_question_English
|
13 |
-
|
14 |
-
|
|
|
15 |
except ImportError as e:
|
16 |
print(f"⚠️ 完全版モジュールのインポートに失敗: {e}")
|
17 |
print("軽量版モードで動作します")
|
18 |
-
FULL_VERSION = False
|
19 |
|
20 |
# 環境変数チェック
|
21 |
def check_environment():
|
@@ -57,6 +67,61 @@ def safe_init_agents():
|
|
57 |
# エージェント初期化
|
58 |
translator, CriteriaCheckAgent, grader_agent = safe_init_agents()
|
59 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
60 |
# エラーハンドリング付きでエージェント評価を実行する関数
|
61 |
def evaluate_with_retry(agent, criteria, question, max_retries=3):
|
62 |
"""エラーハンドリング付きでエージェント評価を実行"""
|
@@ -156,11 +221,44 @@ def generate_sample_dataframe(age, sex, tumor_type, GeneMutation, Meseable, Biop
|
|
156 |
print(f"サンプルデータ生成エラー: {e}")
|
157 |
return pd.DataFrame()
|
158 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
159 |
# 完全版データ生成関数
|
160 |
def generate_full_dataframe(age, sex, tumor_type, GeneMutation, Meseable, Biopsiable):
|
161 |
-
"""完全版のデータ生成(実際のAPI
|
162 |
try:
|
163 |
-
# 入力検証
|
164 |
if not all([age, sex, tumor_type]):
|
165 |
return pd.DataFrame()
|
166 |
|
@@ -228,7 +326,7 @@ def generate_full_dataframe(age, sex, tumor_type, GeneMutation, Meseable, Biopsi
|
|
228 |
available_columns = [col for col in columns_order if col in df.columns]
|
229 |
df = df[available_columns]
|
230 |
|
231 |
-
print(f"
|
232 |
return df
|
233 |
|
234 |
except Exception as e:
|
@@ -259,25 +357,35 @@ def download_full_csv(df):
|
|
259 |
print(f"CSV保存エラー: {e}")
|
260 |
return None
|
261 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
262 |
# Gradioインターフェースの作成
|
263 |
with gr.Blocks(title="臨床試験適格性評価", theme=gr.themes.Soft()) as demo:
|
264 |
gr.Markdown("## 🏥 臨床試験適格性評価インターフェース")
|
265 |
|
266 |
-
#
|
267 |
-
|
268 |
-
|
269 |
-
elif FULL_VERSION and not env_ok:
|
270 |
-
gr.Markdown("⚠️ **モード**: 完全版(API制限あり)")
|
271 |
-
else:
|
272 |
-
gr.Markdown("🔧 **モード**: 軽量版(サンプルデータ)")
|
273 |
|
274 |
-
#
|
275 |
-
if
|
276 |
-
gr.Markdown("✅
|
|
|
|
|
277 |
else:
|
278 |
-
gr.Markdown("
|
279 |
|
280 |
-
gr.Markdown("💡 **使用方法**:
|
281 |
|
282 |
# 各種入力フィールド
|
283 |
with gr.Row():
|
@@ -304,8 +412,10 @@ with gr.Blocks(title="臨床試験適格性評価", theme=gr.themes.Soft()) as d
|
|
304 |
|
305 |
# ボタン類
|
306 |
with gr.Row():
|
307 |
-
if FULL_VERSION
|
308 |
-
generate_button = gr.Button("🔍 Generate Clinical Trials Data (
|
|
|
|
|
309 |
else:
|
310 |
generate_button = gr.Button("📋 Generate Sample Data", variant="primary")
|
311 |
|
@@ -330,9 +440,12 @@ with gr.Blocks(title="臨床試験適格性評価", theme=gr.themes.Soft()) as d
|
|
330 |
def update_dataframe_and_state(age, sex, tumor_type, gene_mutation, measurable, biopsiable):
|
331 |
"""データフレーム生成と状態更新"""
|
332 |
try:
|
333 |
-
if FULL_VERSION
|
334 |
-
progress_text.value = "🔍
|
335 |
df = generate_full_dataframe(age, sex, tumor_type, gene_mutation, measurable, biopsiable)
|
|
|
|
|
|
|
336 |
else:
|
337 |
progress_text.value = "📋 サンプルデータを生成中..."
|
338 |
df = generate_sample_dataframe(age, sex, tumor_type, gene_mutation, measurable, biopsiable)
|
@@ -408,7 +521,9 @@ with gr.Blocks(title="臨床試験適格性評価", theme=gr.themes.Soft()) as d
|
|
408 |
|
409 |
# フッター情報
|
410 |
gr.Markdown("---")
|
411 |
-
gr.
|
|
|
|
|
412 |
|
413 |
if __name__ == "__main__":
|
414 |
demo.launch(
|
|
|
3 |
import time
|
4 |
import traceback
|
5 |
import os
|
6 |
+
import requests
|
7 |
+
|
8 |
+
# 完全版のimportエラー対策(段階的フォールバック)
|
9 |
+
LANGCHAIN_AVAILABLE = False
|
10 |
+
FULL_VERSION = False
|
11 |
|
|
|
12 |
try:
|
|
|
|
|
13 |
from langchain_groq import ChatGroq
|
14 |
+
from langchain_openai import ChatOpenAI
|
15 |
+
LANGCHAIN_AVAILABLE = True
|
16 |
+
print("✅ LangChain基本ライブラリが利用可能です")
|
17 |
+
except ImportError as e:
|
18 |
+
print(f"⚠️ LangChain基本ライブラリが利用できません: {e}")
|
19 |
+
|
20 |
+
try:
|
21 |
+
from OpenAITools.FetchTools import fetch_clinical_trials
|
22 |
from OpenAITools.CrinicalTrialTools import SimpleClinicalTrialAgent, GraderAgent, LLMTranslator, generate_ex_question_English
|
23 |
+
if LANGCHAIN_AVAILABLE:
|
24 |
+
FULL_VERSION = True
|
25 |
+
print("✅ 完全版モジュールが正常にロードされました")
|
26 |
except ImportError as e:
|
27 |
print(f"⚠️ 完全版モジュールのインポートに失敗: {e}")
|
28 |
print("軽量版モードで動作します")
|
|
|
29 |
|
30 |
# 環境変数チェック
|
31 |
def check_environment():
|
|
|
67 |
# エージェント初期化
|
68 |
translator, CriteriaCheckAgent, grader_agent = safe_init_agents()
|
69 |
|
70 |
+
# 基本的なClinicalTrials.gov API呼び出し(軽量版)
|
71 |
+
def fetch_clinical_trials_basic(cancer_name):
|
72 |
+
"""基本的な臨床試験データ取得(requestsのみ使用)"""
|
73 |
+
try:
|
74 |
+
search_expr = f"{cancer_name} SEARCH[Location](AREA[LocationCountry]Japan AND AREA[LocationStatus]Recruiting)"
|
75 |
+
base_url = "https://clinicaltrials.gov/api/v2/studies"
|
76 |
+
params = {
|
77 |
+
"query.titles": search_expr,
|
78 |
+
"pageSize": 20 # 軽量版では20件に制限
|
79 |
+
}
|
80 |
+
|
81 |
+
print(f"基本API呼び出し: {cancer_name}")
|
82 |
+
response = requests.get(base_url, params=params)
|
83 |
+
|
84 |
+
if response.status_code == 200:
|
85 |
+
data = response.json()
|
86 |
+
studies = data.get('studies', [])
|
87 |
+
|
88 |
+
data_list = []
|
89 |
+
for study in studies:
|
90 |
+
nctId = study['protocolSection']['identificationModule'].get('nctId', 'Unknown')
|
91 |
+
title = study['protocolSection']['identificationModule'].get('briefTitle', 'no title')
|
92 |
+
conditions = ', '.join(study['protocolSection']['conditionsModule'].get('conditions', ['No conditions listed']))
|
93 |
+
summary = study['protocolSection']['descriptionModule'].get('briefSummary', 'no summary')
|
94 |
+
|
95 |
+
# 場所情報の抽出
|
96 |
+
locations_list = study['protocolSection'].get('contactsLocationsModule', {}).get('locations', [])
|
97 |
+
japan_locations = []
|
98 |
+
for location in locations_list:
|
99 |
+
if location.get('country') == 'Japan':
|
100 |
+
city = location.get('city', 'Unknown City')
|
101 |
+
japan_locations.append(city)
|
102 |
+
|
103 |
+
primaryCompletionDate = study['protocolSection']['statusModule'].get('primaryCompletionDateStruct', {}).get('date', 'Unknown Date')
|
104 |
+
eligibilityCriteria = study['protocolSection']['eligibilityModule'].get('eligibilityCriteria', 'Unknown')
|
105 |
+
|
106 |
+
data_list.append({
|
107 |
+
"NCTID": nctId,
|
108 |
+
"Title": title,
|
109 |
+
"Primary Completion Date": primaryCompletionDate,
|
110 |
+
"Cancer": conditions,
|
111 |
+
"Summary": summary,
|
112 |
+
"Japanes Locations": ', '.join(set(japan_locations)) if japan_locations else "No Japan locations",
|
113 |
+
"Eligibility Criteria": eligibilityCriteria
|
114 |
+
})
|
115 |
+
|
116 |
+
return pd.DataFrame(data_list)
|
117 |
+
else:
|
118 |
+
print(f"API呼び出し失敗: {response.status_code}")
|
119 |
+
return pd.DataFrame()
|
120 |
+
|
121 |
+
except Exception as e:
|
122 |
+
print(f"基本API呼び出しエラー: {e}")
|
123 |
+
return pd.DataFrame()
|
124 |
+
|
125 |
# エラーハンドリング付きでエージェント評価を実行する関数
|
126 |
def evaluate_with_retry(agent, criteria, question, max_retries=3):
|
127 |
"""エラーハンドリング付きでエージェント評価を実行"""
|
|
|
221 |
print(f"サンプルデータ生成エラー: {e}")
|
222 |
return pd.DataFrame()
|
223 |
|
224 |
+
# 基本版データ生成関数(ClinicalTrials.gov API使用、AI評価なし)
|
225 |
+
def generate_basic_dataframe(age, sex, tumor_type, GeneMutation, Meseable, Biopsiable):
|
226 |
+
"""基本版のデータ生成(API使用、AI評価なし)"""
|
227 |
+
try:
|
228 |
+
if not all([age, sex, tumor_type]):
|
229 |
+
return pd.DataFrame()
|
230 |
+
|
231 |
+
# 実際のAPI呼び出し
|
232 |
+
df = fetch_clinical_trials_basic(tumor_type)
|
233 |
+
|
234 |
+
if df.empty:
|
235 |
+
print("臨床試験データが見つかりませんでした")
|
236 |
+
return pd.DataFrame()
|
237 |
+
|
238 |
+
# AI評価なしのプレースホルダー
|
239 |
+
df['AgentJudgment'] = f'基本版:{age}歳{sex}の{tumor_type}患者への詳細評価にはAI機能が必要です'
|
240 |
+
df['AgentGrade'] = 'unclear'
|
241 |
+
|
242 |
+
# 列を指定した順に並び替え
|
243 |
+
columns_order = ['NCTID', 'AgentGrade', 'Title', 'AgentJudgment', 'Japanes Locations',
|
244 |
+
'Primary Completion Date', 'Cancer', 'Summary', 'Eligibility Criteria']
|
245 |
+
|
246 |
+
# 存在する列のみを選択
|
247 |
+
available_columns = [col for col in columns_order if col in df.columns]
|
248 |
+
df = df[available_columns]
|
249 |
+
|
250 |
+
print(f"基本版評価完了。結果: {len(df)} 件")
|
251 |
+
return df
|
252 |
+
|
253 |
+
except Exception as e:
|
254 |
+
print(f"基本版データフレーム生成中に予期しないエラー: {e}")
|
255 |
+
traceback.print_exc()
|
256 |
+
return pd.DataFrame()
|
257 |
+
|
258 |
# 完全版データ生成関数
|
259 |
def generate_full_dataframe(age, sex, tumor_type, GeneMutation, Meseable, Biopsiable):
|
260 |
+
"""完全版のデータ生成(実際のAPI使用 + AI評価)"""
|
261 |
try:
|
|
|
262 |
if not all([age, sex, tumor_type]):
|
263 |
return pd.DataFrame()
|
264 |
|
|
|
326 |
available_columns = [col for col in columns_order if col in df.columns]
|
327 |
df = df[available_columns]
|
328 |
|
329 |
+
print(f"完全版評価完了。結果: {len(df)} 件")
|
330 |
return df
|
331 |
|
332 |
except Exception as e:
|
|
|
357 |
print(f"CSV保存エラー: {e}")
|
358 |
return None
|
359 |
|
360 |
+
# システム状態の確認
|
361 |
+
def get_system_status():
|
362 |
+
"""システムの現在の状態を確認"""
|
363 |
+
if FULL_VERSION and env_ok:
|
364 |
+
return "🟢 完全版", "全機能が利用可能です"
|
365 |
+
elif LANGCHAIN_AVAILABLE and env_ok:
|
366 |
+
return "🟡 基本版", "ClinicalTrials.gov API検索が可能です(AI評価機能は制限)"
|
367 |
+
elif LANGCHAIN_AVAILABLE:
|
368 |
+
return "🟡 基本版", "API検索可能(環境変数要設定)"
|
369 |
+
else:
|
370 |
+
return "🔴 軽量版", "サンプルデータのみ表示"
|
371 |
+
|
372 |
# Gradioインターフェースの作成
|
373 |
with gr.Blocks(title="臨床試験適格性評価", theme=gr.themes.Soft()) as demo:
|
374 |
gr.Markdown("## 🏥 臨床試験適格性評価インターフェース")
|
375 |
|
376 |
+
# システム状態表示
|
377 |
+
status_level, status_message = get_system_status()
|
378 |
+
gr.Markdown(f"**システム状態**: {status_level} - {status_message}")
|
|
|
|
|
|
|
|
|
379 |
|
380 |
+
# 機能説明
|
381 |
+
if FULL_VERSION:
|
382 |
+
gr.Markdown("✅ **利用可能機能**: リアルタイム検索 + AI適格性評価 + データエクスポート")
|
383 |
+
elif LANGCHAIN_AVAILABLE:
|
384 |
+
gr.Markdown("🔧 **利用可能機能**: ClinicalTrials.gov検索 + 基本評価 + データエクスポート")
|
385 |
else:
|
386 |
+
gr.Markdown("📋 **利用可能機能**: サンプルデータ表示 + フィルタリング")
|
387 |
|
388 |
+
gr.Markdown("💡 **使用方法**: 患者情報を入力してボタンをクリックしてください。")
|
389 |
|
390 |
# 各種入力フィールド
|
391 |
with gr.Row():
|
|
|
412 |
|
413 |
# ボタン類
|
414 |
with gr.Row():
|
415 |
+
if FULL_VERSION:
|
416 |
+
generate_button = gr.Button("🔍 Generate Clinical Trials Data (AI評価付き)", variant="primary")
|
417 |
+
elif LANGCHAIN_AVAILABLE:
|
418 |
+
generate_button = gr.Button("📡 Generate Clinical Trials Data (基本版)", variant="primary")
|
419 |
else:
|
420 |
generate_button = gr.Button("📋 Generate Sample Data", variant="primary")
|
421 |
|
|
|
440 |
def update_dataframe_and_state(age, sex, tumor_type, gene_mutation, measurable, biopsiable):
|
441 |
"""データフレーム生成と状態更新"""
|
442 |
try:
|
443 |
+
if FULL_VERSION:
|
444 |
+
progress_text.value = "🔍 実際の臨床試験データを検索中(AI評価付き)..."
|
445 |
df = generate_full_dataframe(age, sex, tumor_type, gene_mutation, measurable, biopsiable)
|
446 |
+
elif LANGCHAIN_AVAILABLE:
|
447 |
+
progress_text.value = "📡 ClinicalTrials.govから基本データを検索中..."
|
448 |
+
df = generate_basic_dataframe(age, sex, tumor_type, gene_mutation, measurable, biopsiable)
|
449 |
else:
|
450 |
progress_text.value = "📋 サンプルデータを生成中..."
|
451 |
df = generate_sample_dataframe(age, sex, tumor_type, gene_mutation, measurable, biopsiable)
|
|
|
521 |
|
522 |
# フッター情報
|
523 |
gr.Markdown("---")
|
524 |
+
with gr.Row():
|
525 |
+
gr.Markdown("🔬 **技術情報**: ClinicalTrials.gov API, LangChain, Groq/OpenAI API使用")
|
526 |
+
gr.Markdown("📝 **依存関係状況**: " + ("LangChain利用可能" if LANGCHAIN_AVAILABLE else "基本ライブラリのみ"))
|
527 |
|
528 |
if __name__ == "__main__":
|
529 |
demo.launch(
|
requirements.txt
CHANGED
@@ -1,30 +1,5 @@
|
|
1 |
-
# Stage
|
2 |
gradio==4.36.1
|
3 |
numpy==1.21.6
|
4 |
pandas==1.3.5
|
5 |
requests==2.31.0
|
6 |
-
|
7 |
-
# LangChain ecosystem
|
8 |
-
langchain==0.1.20
|
9 |
-
langchain-community==0.0.38
|
10 |
-
langchain-core==0.1.52
|
11 |
-
langchain-openai==0.1.7
|
12 |
-
langchain-groq==0.1.5
|
13 |
-
langchain-text-splitters==0.0.1
|
14 |
-
|
15 |
-
# LLM providers
|
16 |
-
openai==1.12.0
|
17 |
-
groq==0.4.2
|
18 |
-
|
19 |
-
# Database utilities
|
20 |
-
SQLAlchemy==2.0.23
|
21 |
-
|
22 |
-
# Pydantic
|
23 |
-
pydantic==2.5.3
|
24 |
-
|
25 |
-
# Text processing
|
26 |
-
tiktoken==0.5.2
|
27 |
-
|
28 |
-
# Utilities
|
29 |
-
tenacity==8.2.3
|
30 |
-
packaging==23.0.0
|
|
|
1 |
+
# Stage 3A: 最小限依存関係(段階的デプロイ用)
|
2 |
gradio==4.36.1
|
3 |
numpy==1.21.6
|
4 |
pandas==1.3.5
|
5 |
requests==2.31.0
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
requirements_stage3b.txt
ADDED
@@ -0,0 +1,28 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Stage 3B: LangChain追加版(段階的デプロイ用)
|
2 |
+
gradio==4.36.1
|
3 |
+
numpy==1.21.6
|
4 |
+
pandas==1.3.5
|
5 |
+
requests==2.31.0
|
6 |
+
|
7 |
+
# LangChain ecosystem(最新安定版)
|
8 |
+
langchain==0.2.16
|
9 |
+
langchain-community==0.2.16
|
10 |
+
langchain-core==0.2.38
|
11 |
+
langchain-openai==0.1.23
|
12 |
+
langchain-groq==0.1.9
|
13 |
+
|
14 |
+
# LLM providers
|
15 |
+
openai>=1.0.0,<2.0.0
|
16 |
+
groq>=0.4.0
|
17 |
+
|
18 |
+
# Database utilities
|
19 |
+
SQLAlchemy>=2.0.0,<3.0.0
|
20 |
+
|
21 |
+
# Pydantic(LangChain互換性のため)
|
22 |
+
pydantic>=2.5.0,<3.0.0
|
23 |
+
|
24 |
+
# Text processing
|
25 |
+
tiktoken>=0.4.0
|
26 |
+
|
27 |
+
# Utilities
|
28 |
+
tenacity>=8.0.0
|