{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "/Users/satoc/miniforge3/envs/gradio/lib/python3.12/site-packages/sentence_transformers/cross_encoder/CrossEncoder.py:13: TqdmExperimentalWarning: Using `tqdm.autonotebook.tqdm` in notebook mode. Use `tqdm.tqdm` instead to force console mode (e.g. in jupyter console)\n", " from tqdm.autonotebook import tqdm, trange\n" ] } ], "source": [ "from sentence_transformers import SentenceTransformer, util\n", "import pandas as pd\n", "import re" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "# Target列を分割する関数\n", "def split_target(target):\n", " # 指定された区切り文字で分割\n", " split_words = re.split(r'[,\\n、・及びおよび又はまたは]+', target)\n", " # 空白文字を除外してリストとして返す\n", " return [word.strip() for word in split_words if word.strip()]" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [], "source": [ "basedf = pd.read_csv('../ClinicalTrialCSV/JRCT20241202Cancer.csv', index_col=0)\n", "basedf = basedf.dropna(subset=['試験等のフェーズ'])\n", "# Target列を分割してTargetWord列を追加\n", "basedf['TargetWord'] = basedf['Target'].apply(split_target)" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [ { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "5ae79a392bff4b6284636dc3fae01e9e", "version_major": 2, "version_minor": 0 }, "text/plain": [ "modules.json: 0%| | 0.00/229 [00:00, ?B/s]" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "d77922c7458f4888b48b689b9e150b1d", "version_major": 2, "version_minor": 0 }, "text/plain": [ "config_sentence_transformers.json: 0%| | 0.00/201 [00:00, ?B/s]" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "833ce903e16b40dfb4ef29a71c56f888", "version_major": 2, "version_minor": 0 }, "text/plain": [ "README.md: 0%| | 0.00/9.63k [00:00, ?B/s]" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "de2121e1cd8045aabfbc47e200f069be", "version_major": 2, "version_minor": 0 }, "text/plain": [ "sentence_bert_config.json: 0%| | 0.00/54.0 [00:00, ?B/s]" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "8f4b2b0060b2464282008cb21b24e77e", "version_major": 2, "version_minor": 0 }, "text/plain": [ "config.json: 0%| | 0.00/975 [00:00, ?B/s]" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "7b02beb2a3624815b7a19c6f728ad3d8", "version_major": 2, "version_minor": 0 }, "text/plain": [ "configuration_retrieva_bert.py: 0%| | 0.00/6.56k [00:00, ?B/s]" ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stderr", "output_type": "stream", "text": [ "A new version of the following files was downloaded from https://huggingface.co/pkshatech/RoSEtta-base:\n", "- configuration_retrieva_bert.py\n", ". Make sure to double-check they do not contain any added malicious code. To avoid downloading new versions of the code file, you can pin a revision.\n" ] }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "3633bdeaf5bc4c389cdae1c0c3ce75a8", "version_major": 2, "version_minor": 0 }, "text/plain": [ "retrieva_modeling.py: 0%| | 0.00/72.9k [00:00, ?B/s]" ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stderr", "output_type": "stream", "text": [ "A new version of the following files was downloaded from https://huggingface.co/pkshatech/RoSEtta-base:\n", "- retrieva_modeling.py\n", ". Make sure to double-check they do not contain any added malicious code. To avoid downloading new versions of the code file, you can pin a revision.\n" ] }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "54456e42adbb4d4a834a2faa94beab6c", "version_major": 2, "version_minor": 0 }, "text/plain": [ "model.safetensors: 0%| | 0.00/762M [00:00, ?B/s]" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "aeb5faf6c37640f787095938c707356c", "version_major": 2, "version_minor": 0 }, "text/plain": [ "tokenizer_config.json: 0%| | 0.00/2.08k [00:00, ?B/s]" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "7844a7d1ac894faead53ab8bdb3a61d0", "version_major": 2, "version_minor": 0 }, "text/plain": [ "tokenizer.json: 0%| | 0.00/6.41M [00:00, ?B/s]" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "c793b2ab45f84faa8386abc875b0c153", "version_major": 2, "version_minor": 0 }, "text/plain": [ "special_tokens_map.json: 0%| | 0.00/1.07k [00:00, ?B/s]" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "932a623910b146e2b7bbf01bc8262c9d", "version_major": 2, "version_minor": 0 }, "text/plain": [ "1_Pooling/config.json: 0%| | 0.00/296 [00:00, ?B/s]" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "# モデルのロード\n", "model = SentenceTransformer(\"pkshatech/RoSEtta-base\", trust_remote_code=True)" ] }, { "cell_type": "code", "execution_count": 55, "metadata": {}, "outputs": [], "source": [ "# クエリ\n", "#query = \"乳がん\"\n", "query = \"神経膠腫\"\n", "threshold = 0.8\n", "#threshold = 0.675\n", "# クエリをベクトル化\n", "query_vec = model.encode(query, convert_to_tensor=True)\n" ] }, { "cell_type": "code", "execution_count": 56, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", " | JRCT ID | \n", "NCT No | \n", "JapicCTI No | \n", "Title | \n", "Target | \n", "研究・治験の目的 | \n", "試験等のフェーズ | \n", "試験の種類 | \n", "無作為化 | \n", "盲検化 | \n", "... | \n", "purpose | \n", "Inclusion Criteria | \n", "Exclusion Criteria | \n", "Age Minimum | \n", "Age Maximum | \n", "Gender | \n", "Discontinuation Criteria | \n", "Keyword | \n", "Intervention(s) | \n", "TargetWord | \n", "
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
72 | \n", "jRCT2051240141 | \n", "NCT05580562 | \n", "NaN | \n", "新たに診断され放射線療法を完了したH3 K27M変異を有するびまん性神経膠腫の治療のためのO... | \n", "H3 K27M 変異を有する初発びまん性神経膠腫 | \n", "H3 K27M変異びまん性神経膠腫を有する被験者を対象に、放射線療法後に投与されたONC20... | \n", "3 | \n", "NaN | \n", "無作為化比較 | \n", "二重盲検 | \n", "... | \n", "treatment purpose | \n", "1. Able to understand the study procedures and... | \n", "1. Primary spinal tumor.\\r\\n2. Diffuse intrins... | \n", "No limit | \n", "No limit | \n", "NaN | \n", "NaN | \n", "NaN | \n", "Participants will be randomized at baseline in... | \n", "[H3 K27M 変異を有する初発, ん性神経膠腫] | \n", "
103 | \n", "jRCT2051240121 | \n", "NCT06413706 | \n", "NaN | \n", "放射線療法後の悪性神経膠腫の小児および若年成人を対象に、アベマシクリブ+テモゾロミドとテモゾ... | \n", "悪性神経膠腫 | \n", "放射線療法後の悪性神経膠腫を有する小児および若年成人を対象に、アベマシクリブとテモゾロミドの... | \n", "2 | \n", "NaN | \n", "無作為化比較 | \n", "非盲検 | \n", "... | \n", "treatment purpose | \n", "Subjects required to meet all the folloiwng cr... | \n", "Patients who meets any of the following criter... | \n", "No limit | \n", "21age old not | \n", "NaN | \n", "NaN | \n", "NaN | \n", "Drug: Abemaciclib\\r\\nAdministered orally\\r\\nOt... | \n", "[悪性神経膠腫] | \n", "
224 | \n", "jRCT2031240090 | \n", "NaN | \n", "NaN | \n", "再発悪性神経膠腫に対する治療用放射性薬剤64Cu-ATSMの有効性を検証するランダム化比較医... | \n", "悪性神経膠腫 | \n", "再発・難治性悪性神経膠腫を対象として、64Cu-diacetyl-bis(N4-methyl... | \n", "3 | \n", "NaN | \n", "無作為化比較 | \n", "非盲検 | \n", "... | \n", "treatment purpose | \n", "1) Histologically diagnosed as high grade glio... | \n", "1) Have a history or merger of other malignanc... | \n", "18age old over | \n", "75age old under | \n", "Both | \n", "NaN | \n", "glioblastoma, grade3/4 astrocytoma, grade3 oli... | \n", "Group A: BPC Therapy\\r\\nDepending on the patie... | \n", "[悪性神経膠腫] | \n", "
473 | \n", "jRCT2051230069 | \n", "NaN | \n", "NaN | \n", "神経膠腫患者に対するロムスチン療法とプロカルバジン、ロムスチン、ビンクリスチン併⽤療法の安全... | \n", "神経膠腫 | \n", "神経膠腫患者におけるロムスチン療法とプロカルバジン、ロムスチン、ビンクリスチン併⽤療法の安全... | \n", "1 | \n", "NaN | \n", "単一群 | \n", "非盲検 | \n", "... | \n", "treatment purpose | \n", "<Cohort 1>\\r\\n All of the following items shal... | \n", "<Common to Cohort 1 and Cohort 2>\\r\\n1) Active... | \n", "18age old over | \n", "No limit | \n", "Both | \n", "NaN | \n", "NaN | \n", "<cohort1>\\r\\nLomustine 130 mg/m2 orally every ... | \n", "[神経膠腫] | \n", "
549 | \n", "jRCT2031230007 | \n", "NaN | \n", "NaN | \n", "BRAF融合遺伝子陽性の進行・再発の低悪性度神経膠腫または膵癌に対するビニメチニブの第Ⅱ相医... | \n", "低悪性度神経膠腫、膵癌 | \n", "BRAF融合遺伝子または遺伝子再構成陽性の切除不能または再発の低悪性度神経膠腫(コホート A... | \n", "2 | \n", "NaN | \n", "単一群 | \n", "非盲検 | \n", "... | \n", "treatment purpose | \n", "Inclusion criteria for both cohort A and B\\r\\n... | \n", "1) Active double primary cancer (but not (1)-(... | \n", "12age old over | \n", "No limit | \n", "Both | \n", "NaN | \n", "BRAF fusion gene, BRAF rearrangement, low-grad... | \n", "Binimetinib is administered 45 mg orally, twic... | \n", "[低悪性度神経膠腫, 膵癌] | \n", "
875 | \n", "jRCT2031210299 | \n", "NaN | \n", "NaN | \n", "再発悪性神経膠腫患者を対象としたDSP-0390の第1相試験 | \n", "再発悪性神経膠腫 | \n", "再発悪性神経膠腫患者を対象にDSP-0390を経口投与したときの安全性、薬物動態、薬力学及び... | \n", "1 | \n", "NaN | \n", "単一群 | \n", "非盲検 | \n", "... | \n", "treatment purpose | \n", "1.\\tEstimated life expectancy >= 3 months\\r\\n2... | \n", "1.\\tPrior therapy with bevacizumab or other an... | \n", "18age old over | \n", "No limit | \n", "Both | \n", "NaN | \n", "NaN | \n", "Patients will receive DSP-0390 orally once dai... | \n", "[再発悪性神経膠腫] | \n", "
981 | \n", "jRCT2031200153 | \n", "NaN | \n", "NaN | \n", "Cellm-001による初発膠芽腫治療効果無作為比較対照試験 | \n", "膠芽腫 | \n", "脳腫瘍のうち、初発悪性膠芽腫に対する自家脳腫瘍免疫賦活剤であるCellm-001について、術... | \n", "3 | \n", "NaN | \n", "無作為化比較 | \n", "二重盲検 | \n", "... | \n", "treatment purpose | \n", "(1) 18 years old or older and 75 years old or ... | \n", "(1) Systemic administration of corticosteroids... | \n", "18age old over | \n", "75age old under | \n", "Both | \n", "NaN | \n", "NaN | \n", "Inject the investigational drug (Cellm-001 or ... | \n", "[膠芽腫] | \n", "
7 rows × 39 columns
\n", "\n", " | JRCT ID | \n", "NCT No | \n", "JapicCTI No | \n", "Title | \n", "Target | \n", "研究・治験の目的 | \n", "試験等のフェーズ | \n", "試験の種類 | \n", "無作為化 | \n", "盲検化 | \n", "... | \n", "purpose | \n", "Inclusion Criteria | \n", "Exclusion Criteria | \n", "Age Minimum | \n", "Age Maximum | \n", "Gender | \n", "Discontinuation Criteria | \n", "Keyword | \n", "Intervention(s) | \n", "TargetWord | \n", "
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
103 | \n", "jRCT2051240121 | \n", "NCT06413706 | \n", "NaN | \n", "放射線療法後の悪性神経膠腫の小児および若年成人を対象に、アベマシクリブ+テモゾロミドとテモゾ... | \n", "悪性神経膠腫 | \n", "放射線療法後の悪性神経膠腫を有する小児および若年成人を対象に、アベマシクリブとテモゾロミドの... | \n", "2 | \n", "NaN | \n", "無作為化比較 | \n", "非盲検 | \n", "... | \n", "treatment purpose | \n", "Subjects required to meet all the folloiwng cr... | \n", "Patients who meets any of the following criter... | \n", "No limit | \n", "21age old not | \n", "NaN | \n", "NaN | \n", "NaN | \n", "Drug: Abemaciclib\\r\\nAdministered orally\\r\\nOt... | \n", "[悪性神経膠腫] | \n", "
224 | \n", "jRCT2031240090 | \n", "NaN | \n", "NaN | \n", "再発悪性神経膠腫に対する治療用放射性薬剤64Cu-ATSMの有効性を検証するランダム化比較医... | \n", "悪性神経膠腫 | \n", "再発・難治性悪性神経膠腫を対象として、64Cu-diacetyl-bis(N4-methyl... | \n", "3 | \n", "NaN | \n", "無作為化比較 | \n", "非盲検 | \n", "... | \n", "treatment purpose | \n", "1) Histologically diagnosed as high grade glio... | \n", "1) Have a history or merger of other malignanc... | \n", "18age old over | \n", "75age old under | \n", "Both | \n", "NaN | \n", "glioblastoma, grade3/4 astrocytoma, grade3 oli... | \n", "Group A: BPC Therapy\\r\\nDepending on the patie... | \n", "[悪性神経膠腫] | \n", "
473 | \n", "jRCT2051230069 | \n", "NaN | \n", "NaN | \n", "神経膠腫患者に対するロムスチン療法とプロカルバジン、ロムスチン、ビンクリスチン併⽤療法の安全... | \n", "神経膠腫 | \n", "神経膠腫患者におけるロムスチン療法とプロカルバジン、ロムスチン、ビンクリスチン併⽤療法の安全... | \n", "1 | \n", "NaN | \n", "単一群 | \n", "非盲検 | \n", "... | \n", "treatment purpose | \n", "<Cohort 1>\\r\\n All of the following items shal... | \n", "<Common to Cohort 1 and Cohort 2>\\r\\n1) Active... | \n", "18age old over | \n", "No limit | \n", "Both | \n", "NaN | \n", "NaN | \n", "<cohort1>\\r\\nLomustine 130 mg/m2 orally every ... | \n", "[神経膠腫] | \n", "
875 | \n", "jRCT2031210299 | \n", "NaN | \n", "NaN | \n", "再発悪性神経膠腫患者を対象としたDSP-0390の第1相試験 | \n", "再発悪性神経膠腫 | \n", "再発悪性神経膠腫患者を対象にDSP-0390を経口投与したときの安全性、薬物動態、薬力学及び... | \n", "1 | \n", "NaN | \n", "単一群 | \n", "非盲検 | \n", "... | \n", "treatment purpose | \n", "1.\\tEstimated life expectancy >= 3 months\\r\\n2... | \n", "1.\\tPrior therapy with bevacizumab or other an... | \n", "18age old over | \n", "No limit | \n", "Both | \n", "NaN | \n", "NaN | \n", "Patients will receive DSP-0390 orally once dai... | \n", "[再発悪性神経膠腫] | \n", "
981 | \n", "jRCT2031200153 | \n", "NaN | \n", "NaN | \n", "Cellm-001による初発膠芽腫治療効果無作為比較対照試験 | \n", "膠芽腫 | \n", "脳腫瘍のうち、初発悪性膠芽腫に対する自家脳腫瘍免疫賦活剤であるCellm-001について、術... | \n", "3 | \n", "NaN | \n", "無作為化比較 | \n", "二重盲検 | \n", "... | \n", "treatment purpose | \n", "(1) 18 years old or older and 75 years old or ... | \n", "(1) Systemic administration of corticosteroids... | \n", "18age old over | \n", "75age old under | \n", "Both | \n", "NaN | \n", "NaN | \n", "Inject the investigational drug (Cellm-001 or ... | \n", "[膠芽腫] | \n", "
5 rows × 39 columns
\n", "