In [1]:
from OpenAITools.JRCTTools import DfPostProcess, get_matched_df
from sentence_transformers import SentenceTransformer, util
import pandas as pd

  from tqdm.autonotebook import tqdm, trange


In [2]:
exclusive_words = ["triple negative", "double positive","Malignant","recurrent","IDH wild-type", "High-Grade", "Low-grade"]
csv_loc = '../ClinicalTrialCSV/JRCT20241215Cancer.csv'
df = pd.read_csv(csv_loc, index_col=0)
# モデルのロード
model = SentenceTransformer('pritamdeka/S-PubMedBert-MS-MARCO')




In [4]:
#processed_df = DfPostProcess(exclusive_words, model, csv_loc=csv_loc)
processed_df = DfPostProcess(exclusive_words, model, dataframe=df)

In [4]:
processed_df.to_csv("../ClinicalTrialCSV/JRCT20241215CancerPost.csv")

In [16]:
def get_matched_df(basedf, query, model, threshold=0.5):
    # queryをベクトル化（テンソル化）しCPUへ移動
    query_vec = model.encode(query, convert_to_tensor=True).cpu()

    matched_indices = []
    for idx, target_vec_str in enumerate(basedf['TargetVec']):
        # CSVから読み込んだ時点でTargetVecはPythonリストを文字列化したものになっているため、
        # ここでliteral_evalでリストに戻します。
        if isinstance(target_vec_str, str):
            # target_vec_strは"[[...], [...]]"のようなリスト形式
            target_list = ast.literal_eval(target_vec_str)  # リストに変換
            target_vecs = torch.tensor(target_list)         # リストからTensorへ
        else:
            # 万が一既にTensorの場合はそのまま使用
            target_vecs = target_vec_str

        # 必要であればCPUへ移動（通常はすでにCPU上のはず）
        """if target_vecs[0].is_cuda:
            target_vecs = target_vecs.cpu()"""

        # コサイン類似度を計算
        cosine_scores = util.cos_sim(query_vec, target_vecs).squeeze()

        # thresholdを超えるスコアが1つでもあればマッチと判断
        if (cosine_scores >= threshold).any():
            matched_indices.append(idx)

    # 条件を満たした行を抽出
    matched_df = basedf.iloc[matched_indices]
    return matched_df

In [17]:
processed_df['TargetVec'][0]

[[-0.12602144479751587,
  -0.7141546607017517,
  -0.4860573410987854,
  -0.6787610650062561,
  -0.5019499659538269,
  -0.13430769741535187,
  -0.6577554941177368,
  0.6076976656913757,
  0.12889647483825684,
  0.46750736236572266,
  0.018368367105722427,
  0.43061959743499756,
  0.1905517727136612,
  -0.32873567938804626,
  -0.43094030022621155,
  -0.21434563398361206,
  0.07665695995092392,
  0.27581971883773804,
  0.5334398150444031,
  0.1360682100057602,
  0.42274555563926697,
  0.310781329870224,
  -0.09085263311862946,
  -0.13402250409126282,
  -0.7091479301452637,
  0.3295823633670807,
  -0.017893081530928612,
  0.14529234170913696,
  0.33790770173072815,
  -0.5387213230133057,
  0.1965356171131134,
  0.43226656317710876,
  0.08384066075086594,
  1.2179083824157715,
  -0.1406896710395813,
  -0.19285115599632263,
  0.2328663468360901,
  -0.2901802659034729,
  0.21684598922729492,
  0.08382049202919006,
  0.8460990190505981,
  -0.24980926513671875,
  0.25551164150238037,
  0.210331

In [18]:
get_matched_df(processed_df, "breast cancer", model, threshold=0.925)

Unnamed: 0,JRCT ID,NCT No,JapicCTI No,Title,TargetJ,Target,TargetEnglish,研究・治験の目的,試験等のフェーズ,試験の種類,...,Inclusion Criteria,Exclusion Criteria,Age Minimum,Age Maximum,Gender,Discontinuation Criteria,Keyword,Intervention(s),TargetWord,TargetVec
31,jRCT2061240081,NCT06393374,,術前薬物療法後の手術時に病理学的完全奏効を達成していないトリプルネガティブ乳癌患者を対象に、...,トリプルネガティブ乳癌,トリプルネガティブ乳癌,Triple negative breast cancer,術前薬物療法後の手術時に病理学的完全奏効を達成していないトリプルネガティブ乳癌患者を対象に、...,3,,...,"-Has centrally confirmed TNBC, as defined by t...",-Has a known germline breast cancer gene (BRCA...,18age old over,No limit,Both,,,-Arm 1: MK-2870 4mg/kg intravenous (IV) every ...,"[Triple Negative, Breast Cancer]","[[-0.21876227855682373, -0.4577382504940033, -..."
32,jRCT2031240476,NCT06312176,,ホルモン受容体陽性・ヒト上皮成長因子受容体2陰性（HR+/HER2-）の切除不能な局所進行又...,HR+/HER2-の切除不能局所進行又は転移性乳癌の患者,HR+/HER2-の切除不能局所進行又は転移性乳癌の患者,HR+/HER2 unresectable locally advanced or meta...,HR+/HER2-の切除不能な局所進行又は転移性乳癌患者を対象にMK-2870の単剤及びペム...,3,,...,-Has unresectable locally advanced or metastat...,-Has breast cancer amenable to treatment with ...,18age old over,No limit,Both,,,Arm A:MK-2870 Participants receive 4 mg/kg of ...,"[Hr+/Her2 Unresectable Locally Advanced, Metas...","[[0.04888693988323212, -0.7584289908409119, -0..."
157,jRCT2052240059,,,遺伝子HSD17B4高メチル化を有するHER2陽性ER陰性乳癌における非手術療法の有用性を評...,乳がん,乳がん,Breast cancer,HSD17B4高メチル化(HSD17B4 hypermethylation:HH)を有するH...,2,,...,1. Histologically confirmed invasive breast ca...,1. History of other malignancy within the last...,20age old over,No limit,Female,,,Omitting breast surgery after preoperative che...,[Breast Cancer],"[[-0.09860841184854507, -0.629493236541748, -0..."
170,jRCT2031240096,NCT06380751,,本治験の主要目的は、BRCA1、BRCA2 又は PALB2 変異を有し、HR 陽性、HER...,進行乳癌,進行乳癌,Advanced Breast Cancer,Treatment,3,,...,"- Adult females, pre/peri-menopausal and/or po...",- Participants with history of MDS/AML or with...,18age old over,No limit,Both,,,Experimental: Arm 1: saruparib (AZD5305) plus ...,[Advanced Breast Cancer],"[[-0.21163925528526306, -0.5019860863685608, -..."
209,jRCT2031230750,NCT06188559,,治療歴のある HER2 陽性又は HER2 低発現の切除不能又は転移性乳癌の被験者を対象に，...,HER2 陽性又は HER2 低発現の切除不能又は転移性乳癌,HER2 陽性又は HER2 低発現の切除不能又は転移性乳癌,HER2-positive or HER2-low unresectable or meta...,治療,2,,...,"(1) Male or female, aged >=18 years at the tim...","(1) Presence of brain or subdural metastases, ...",18age old over,No limit,Both,,,Generic Name:NA Study Treatment in dose optimi...,"[Her2-Positive, Her2-Low Unresectable, Metasta...","[[-0.08892004191875458, -0.8978654146194458, -..."
215,jRCT2031230723,NCT06112379,,未治療のトリプルネガティブ又はホルモン受容体低発現／HER2陰性乳癌の成人患者を対象として、...,乳癌,乳癌,Breast Cancer,未治療のトリプルネガティブ又はホルモン受容体低発現/HER2陰性乳癌の成人患者を対象として、...,3,,...,1. Participant must be >= 18 years at the time...,"1. As judged by the investigator, any evidence...",18age old over,No limit,Both,,,- Experimental arm: Dato-DXd plus durvalumab n...,[Breast Cancer],"[[-0.09860841184854507, -0.629493236541748, -0..."
224,jRCT2051230196,NCT06105632,,CDK4/6 阻害薬による前治療で進行したホルモン受容体陽性HER2 陰性の進行または転移乳...,ホルモン受容体（HR）陽性ヒト上皮増殖因子受容体2（HER2）陰性の進行または転移乳癌,ホルモン受容体(HR)陽性ヒト上皮増殖因子受容体2(HER2)陰性の進行または転移乳癌,"HR-positive, HER2-negative Advanced or Metasta...",CDK4/6阻害薬による前治療で進行したHR陽性HER2陰性の進行または転移乳癌患者を対象に...,3,,...,* Histological confirmation of breast cancer w...,* Any medical or psychiatric condition that ma...,18age old over,No limit,Both,,,Arm A (Experimental Arm) PF-07220060 plus Fulv...,"[Hr-Positive, Her2-Negative Advanced, Metastat...","[[-0.083187997341156, -0.7571865916252136, -0...."
234,jRCT2031230677,NCT06206837,,18歳以上のER陽性／HER2陰性の進行または転移乳癌患者を対象として，経口タンパク質分解誘...,エストロゲン受容体陽性／ヒト上皮増殖因子受容体 2 陰性の進行または転移乳癌,エストロゲン受容体陽性/ヒト上皮増殖因子受容体 2 陰性の進行または転移乳癌,ER+/HER2- advanced or metastatic breast cancer,"進行乳癌患者を対象として,vepdegestrantとPF-07220060を併用投与したと...",1-2,,...,Inclusion Criteria: * Histological or cytologi...,Exclusion Criteria: * visceral crisis at risk ...,18age old over,No limit,Both,,,Drug: vepdegestrant Daily oral dosages of vepd...,"[Er+/Her2- Advanced, Metastatic Breast Cancer]","[[-0.154923215508461, -0.39443543553352356, -0..."
240,jRCT2061230102,NCT06103864,,Programmed death-ligand（PD-L1）陽性の局所再発手術不能又は転移性...,乳癌,乳癌,Breast Cancer,PD-L1陽性の局所再発手術不能または転移性TNBC患者を対象に、デュルバルマブ併用または非...,3,,...,Histologically or cytologically documented loc...,"As judged by investigator, severe or uncontrol...",18age old over,No limit,Both,,,Arm 1: Dato-DXd + durvalumab Arm 2: Investigat...,[Breast Cancer],"[[-0.09860841184854507, -0.629493236541748, -0..."
307,jRCT2061230074,NCT05952557,,根治的局所治療（化学療法の併用または非併用）を受けて疾患の兆候のない、再発リスクが中間～高リ...,乳がん、早期乳がん,乳がん、早期乳がん,"Breast Cancer, Early Breast Cancer",Treatment,3,,...,- Women and Men; 18 years or more at the time ...,- Inoperable locally advanced or metastatic br...,18age old over,No limit,,,,arm A: continue with SoC ET as directed by inv...,"[Breast Cancer, Early Breast Cancer]","[[-0.09860841184854507, -0.629493236541748, -0..."


In [19]:
get_matched_df(processed_df, "glioma", model, threshold=0.92)

Unnamed: 0,JRCT ID,NCT No,JapicCTI No,Title,TargetJ,Target,TargetEnglish,研究・治験の目的,試験等のフェーズ,試験の種類,...,Inclusion Criteria,Exclusion Criteria,Age Minimum,Age Maximum,Gender,Discontinuation Criteria,Keyword,Intervention(s),TargetWord,TargetVec
82,jRCT2051240121,NCT06413706,,放射線療法後の悪性神経膠腫の小児および若年成人を対象に、アベマシクリブ＋テモゾロミドとテモゾ...,悪性神経膠腫,悪性神経膠腫,High-Grade Glioma,放射線療法後の悪性神経膠腫を有する小児および若年成人を対象に、アベマシクリブとテモゾロミドの...,2,,...,Subjects required to meet all the folloiwng cr...,Patients who meets any of the following criter...,No limit,21age old not,,,,Drug: Abemaciclib Administered orally Other Na...,[High-Grade Glioma],"[[-0.12244764715433121, -0.5778073668479919, -..."
87,jRCT2051240119,NCT03423628,,多形性膠芽腫患者及び固形がんの脳転移患者を対象に、放射線療法併用下でAZD1390を漸増投与...,再発多形性膠芽腫,再発多形性膠芽腫,Recurrent Glioblastoma Multiforme,Treatment,1,,...,- Provision of formalin-fixed paraffin embedde...,- Administration of chemotherapy or any invest...,18age old over,130age old under,Both,,,AZD1390 + Radiation Therapy Drug: AZD1390 - AZ...,"[Recurrent, Glioblastoma Multif, Me]","[[-0.2750183045864105, -0.5264511704444885, -0..."
175,jRCT2031240090,,,再発悪性神経膠腫に対する治療用放射性薬剤64Cu-ATSMの有効性を検証するランダム化比較医...,悪性神経膠腫,悪性神経膠腫,Malignant glioma,再発・難治性悪性神経膠腫を対象として、64Cu-diacetyl-bis(N4-methyl...,3,,...,1) Histologically diagnosed as high grade glio...,1) Have a history or merger of other malignanc...,18age old over,75age old under,Both,,"glioblastoma, grade3/4 astrocytoma, grade3 oli...",Group A: BPC Therapy Depending on the patient'...,[Malignant Glioma],"[[-0.33767956495285034, -0.4648125171661377, -..."
263,jRCT2041230136,,,再発膠芽腫に対するTUG1を標的とした核酸医薬医師主導第I相試験,再発膠芽腫,再発膠芽腫,recurrent glioblastoma,再発膠芽腫患者を対象としてTUG1ASOを投与した場合の投与時の安全性を検討し、至適投与量を...,1,,...,(1) Patients with a histological diagnosis of ...,(1) Patients with extracerebral metastases. (2...,18age old over,75age old under,Both,,,Dosing frequency: 1 time /week Dose Levels Lev...,"[Recurrent, Glioblastoma]","[[-0.27501848340034485, -0.5264514088630676, -..."
361,jRCT2051230069,,,神経膠腫患者に対するロムスチン療法とプロカルバジン、ロムスチン、ビンクリスチン併⽤療法の安全...,神経膠腫,神経膠腫,Glioma,神経膠腫患者におけるロムスチン療法とプロカルバジン、ロムスチン、ビンクリスチン併用療法の安全...,1,,...,<Cohort 1> All of the following items shall be...,<Common to Cohort 1 and Cohort 2> 1) Active mu...,18age old over,No limit,Both,,,<cohort1> Lomustine 130 mg/m2 orally every 6 w...,[Glioma],"[[-0.21631155908107758, -0.5687925219535828, -..."
401,jRCT2032230060,,,初発IDH野生型低悪性度神経膠腫に対する交流電場腫瘍治療システムの有効性と安全性を検証する多...,IDH野生型神経膠腫（グレードⅢ）（退形成性星細胞腫）,IDH野生型神経膠腫(グレードIII)(退形成性星細胞腫),IDH wild-type glioma (grade III) (anaplastic a...,初発IDH野生型低悪性度神経膠腫を対象に、放射線化学療法後のテモゾロミド維持療法期における交...,2,,...,"At the time of enrollment, patients will be in...",Any of the following conditions shall not appl...,18age old over,No limit,Both,,Glioma,The patient using the product should shave all...,[Idh Wild-Type Glioma (Grade Iii) (Anaplastic ...,"[[-0.3403506577014923, -0.5657796859741211, -0..."
414,jRCT2031230007,,,BRAF融合遺伝子陽性の進行・再発の低悪性度神経膠腫または膵癌に対するビニメチニブの第Ⅱ相医...,低悪性度神経膠腫、膵癌,低悪性度神経膠腫、膵癌,"Low-grade glioma, pancreatic cancer",BRAF融合遺伝子または遺伝子再構成陽性の切除不能または再発の低悪性度神経膠腫(コホート A...,2,,...,Inclusion criteria for both cohort A and B 1) ...,1) Active double primary cancer (but not (1)-(...,12age old over,No limit,Both,,"BRAF fusion gene, BRAF rearrangement, low-grad...","Binimetinib is administered 45 mg orally, twic...","[Low-Grade Glioma, Pancreatic Cancer]","[[-0.18604964017868042, -0.547483503818512, -0..."
636,jRCT2031210299,,,再発悪性神経膠腫患者を対象としたDSP-0390の第1相試験,再発悪性神経膠腫,再発悪性神経膠腫,Recurrent High-Grade Glioma,再発悪性神経膠腫患者を対象にDSP-0390を経口投与したときの安全性、薬物動態、薬力学及び...,1,,...,1. Estimated life expectancy >= 3 months 2. Re...,1. Prior therapy with bevacizumab or other ant...,18age old over,No limit,Both,,,Patients will receive DSP-0390 orally once dai...,"[Recurrent, High-Grade Glioma]","[[-0.2750183045864105, -0.5264511704444885, -0..."
712,jRCT2031200153,,,Cellm-001による初発膠芽腫治療効果無作為比較対照試験,膠芽腫,膠芽腫,glioblastoma,脳腫瘍のうち、初発悪性膠芽腫に対する自家脳腫瘍免疫賦活剤であるCellm-001について、術...,3,,...,(1) 18 years old or older and 75 years old or ...,(1) Systemic administration of corticosteroids...,18age old over,75age old under,Both,,,Inject the investigational drug (Cellm-001 or ...,[Glioblastoma],"[[-0.15113382041454315, -0.5002245306968689, -..."


In [28]:
get_matched_df(processed_df, "glioma", model, threshold=0.9)

Unnamed: 0,JRCT ID,NCT No,JapicCTI No,Title,TargetJ,Target,TargetEnglish,研究・治験の目的,試験等のフェーズ,試験の種類,...,Inclusion Criteria,Exclusion Criteria,Age Minimum,Age Maximum,Gender,Discontinuation Criteria,Keyword,Intervention(s),TargetWord,TargetVec
82,jRCT2051240121,NCT06413706,,放射線療法後の悪性神経膠腫の小児および若年成人を対象に、アベマシクリブ＋テモゾロミドとテモゾ...,悪性神経膠腫,悪性神経膠腫,High-Grade Glioma,放射線療法後の悪性神経膠腫を有する小児および若年成人を対象に、アベマシクリブとテモゾロミドの...,2,,...,Subjects required to meet all the folloiwng cr...,Patients who meets any of the following criter...,No limit,21age old not,,,,Drug: Abemaciclib Administered orally Other Na...,[High-Grade Glioma],"[[tensor(-0.1224), tensor(-0.5778), tensor(-0...."
87,jRCT2051240119,NCT03423628,,多形性膠芽腫患者及び固形がんの脳転移患者を対象に、放射線療法併用下でAZD1390を漸増投与...,再発多形性膠芽腫,再発多形性膠芽腫,Recurrent Glioblastoma Multiforme,Treatment,1,,...,- Provision of formalin-fixed paraffin embedde...,- Administration of chemotherapy or any invest...,18age old over,130age old under,Both,,,AZD1390 + Radiation Therapy Drug: AZD1390 - AZ...,"[Recurrent, Glioblastoma Multif, Me]","[[tensor(-0.2750), tensor(-0.5265), tensor(-0...."
175,jRCT2031240090,,,再発悪性神経膠腫に対する治療用放射性薬剤64Cu-ATSMの有効性を検証するランダム化比較医...,悪性神経膠腫,悪性神経膠腫,Malignant glioma,再発・難治性悪性神経膠腫を対象として、64Cu-diacetyl-bis(N4-methyl...,3,,...,1) Histologically diagnosed as high grade glio...,1) Have a history or merger of other malignanc...,18age old over,75age old under,Both,,"glioblastoma, grade3/4 astrocytoma, grade3 oli...",Group A: BPC Therapy Depending on the patient'...,[Malignant Glioma],"[[tensor(-0.3377), tensor(-0.4648), tensor(-0...."
263,jRCT2041230136,,,再発膠芽腫に対するTUG1を標的とした核酸医薬医師主導第I相試験,再発膠芽腫,再発膠芽腫,recurrent glioblastoma,再発膠芽腫患者を対象としてTUG1ASOを投与した場合の投与時の安全性を検討し、至適投与量を...,1,,...,(1) Patients with a histological diagnosis of ...,(1) Patients with extracerebral metastases. (2...,18age old over,75age old under,Both,,,Dosing frequency: 1 time /week Dose Levels Lev...,"[Recurrent, Glioblastoma]","[[tensor(-0.2750), tensor(-0.5265), tensor(-0...."
272,jRCT2032230554,,,初発膠芽腫に対する新型高出力中性子線源を用いた加速器BNCT装置iBNCT001及びSPM-...,初発膠芽腫,初発膠芽腫,Newly-diagnosed glioblastoma,治験機器iBNCT001及び治験薬SPM-011を用いたホウ素中性子捕捉療法(BNCT)に、...,1,,...,- Karnofsky Performance Status is greater than...,"- Multiple lesions, bilateral lesions, dissemi...",18age old over,85age old under,Both,,"boron, neutron, glioblastoma",- Intravenous administration of investigationa...,[Newly-Diagnosed Glioblastoma],"[[tensor(-0.5050), tensor(-0.8343), tensor(-0...."
361,jRCT2051230069,,,神経膠腫患者に対するロムスチン療法とプロカルバジン、ロムスチン、ビンクリスチン併⽤療法の安全...,神経膠腫,神経膠腫,Glioma,神経膠腫患者におけるロムスチン療法とプロカルバジン、ロムスチン、ビンクリスチン併用療法の安全...,1,,...,<Cohort 1> All of the following items shall be...,<Common to Cohort 1 and Cohort 2> 1) Active mu...,18age old over,No limit,Both,,,<cohort1> Lomustine 130 mg/m2 orally every 6 w...,[Glioma],"[[tensor(-0.2163), tensor(-0.5688), tensor(-0...."
401,jRCT2032230060,,,初発IDH野生型低悪性度神経膠腫に対する交流電場腫瘍治療システムの有効性と安全性を検証する多...,IDH野生型神経膠腫（グレードⅢ）（退形成性星細胞腫）,IDH野生型神経膠腫(グレードIII)(退形成性星細胞腫),IDH wild-type glioma (grade III) (anaplastic a...,初発IDH野生型低悪性度神経膠腫を対象に、放射線化学療法後のテモゾロミド維持療法期における交...,2,,...,"At the time of enrollment, patients will be in...",Any of the following conditions shall not appl...,18age old over,No limit,Both,,Glioma,The patient using the product should shave all...,[Idh Wild-Type Glioma (Grade Iii) (Anaplastic ...,"[[tensor(-0.3404), tensor(-0.5658), tensor(-0...."
414,jRCT2031230007,,,BRAF融合遺伝子陽性の進行・再発の低悪性度神経膠腫または膵癌に対するビニメチニブの第Ⅱ相医...,低悪性度神経膠腫、膵癌,低悪性度神経膠腫、膵癌,"Low-grade glioma, pancreatic cancer",BRAF融合遺伝子または遺伝子再構成陽性の切除不能または再発の低悪性度神経膠腫(コホート A...,2,,...,Inclusion criteria for both cohort A and B 1) ...,1) Active double primary cancer (but not (1)-(...,12age old over,No limit,Both,,"BRAF fusion gene, BRAF rearrangement, low-grad...","Binimetinib is administered 45 mg orally, twic...","[Low-Grade Glioma, Pancreatic Cancer]","[[tensor(-0.1860), tensor(-0.5475), tensor(-0...."
636,jRCT2031210299,,,再発悪性神経膠腫患者を対象としたDSP-0390の第1相試験,再発悪性神経膠腫,再発悪性神経膠腫,Recurrent High-Grade Glioma,再発悪性神経膠腫患者を対象にDSP-0390を経口投与したときの安全性、薬物動態、薬力学及び...,1,,...,1. Estimated life expectancy >= 3 months 2. Re...,1. Prior therapy with bevacizumab or other ant...,18age old over,No limit,Both,,,Patients will receive DSP-0390 orally once dai...,"[Recurrent, High-Grade Glioma]","[[tensor(-0.2750), tensor(-0.5265), tensor(-0...."
712,jRCT2031200153,,,Cellm-001による初発膠芽腫治療効果無作為比較対照試験,膠芽腫,膠芽腫,glioblastoma,脳腫瘍のうち、初発悪性膠芽腫に対する自家脳腫瘍免疫賦活剤であるCellm-001について、術...,3,,...,(1) 18 years old or older and 75 years old or ...,(1) Systemic administration of corticosteroids...,18age old over,75age old under,Both,,,Inject the investigational drug (Cellm-001 or ...,[Glioblastoma],"[[tensor(-0.1511), tensor(-0.5002), tensor(-0...."
