In [1]:
from sentence_transformers import SentenceTransformer, util
import pandas as pd
import re

  from tqdm.autonotebook import tqdm, trange


In [76]:
# Target列を分割する関数
def split_target(target):
    # 指定された区切り文字で分割
    split_words = re.split(r'[,\n、・及びおよび又はまたは]+', target)
    # 空白文字を除外してリストとして返す
    return [word.strip() for word in split_words if word.strip()]


# Target列を分割する関数（改良後）
def split_target_English(target):
    # 区切り文字を (,) or (\n) or (、) or (・) または文字列"or" として扱う
    # 正規表現では、パイプ(|)でor条件を定義し、"(?: ... )"はグルーピングのみ行う非捕捉グループ
    # [,\n、・] はいずれかの1文字とマッチ
    # or は文字列全体とマッチ
    # 複数連続した区切り文字をまとめて1回の分割として扱うために+(1回以上)とする
    split_words = re.split(r'(?:[,\n、・]|or| and)+', target)
    
    # 空白文字を除外してリストとして返す
    return [word.strip() for word in split_words if word.strip()]

# 処理プログラム
def split_triple_negative_words(target_words):
    updated_words = []
    for word in target_words:
        if 'triple negative' in word.lower():
            # 'triple negative' の部分を追加
            updated_words.append('Triple Negative')  # 大文字で統一して追加
            # 'triple negative' を除いた残りの部分を追加
            remaining = word.lower().replace('triple negative', '').strip()
            if remaining:  # 残りの単語が存在する場合のみ追加
                updated_words.append(remaining.title().strip())  # 単語の先頭を大文字化
        else:
            updated_words.append(word.strip().title())  # 単語の先頭を大文字化
    return updated_words

class WordProcessor:
    def __init__(self, target_words):
        self.target_words = target_words

    def process(self, target_words):
        """
        入力された単語のリストを処理して、ターゲット単語に基づき分割します。
        """
        updated_words = []
        for word in target_words:
            word_lower = word.lower()
            for target in self.target_words:
                if target in word_lower:
                    # ターゲット単語を追加
                    updated_words.append(target.title())
                    # ターゲット単語を除いた残りを追加
                    remaining = word_lower.replace(target, '').strip()
                    if remaining:
                        updated_words.append(remaining.title())
                    break
            else:
                # ターゲット単語に該当しない場合
                updated_words.append(word.strip().title())
        return updated_words

    def __call__(self, target_words):
        """
        インスタンスを関数として呼び出すためのエントリポイント。
        """
        return self.process(target_words)


In [77]:
exclusive_words = ["triple negative", "double positive"]

    # インスタンス作成
processor = WordProcessor(exclusive_words)

In [78]:
basedf = pd.read_csv('../ClinicalTrialCSV/JRCT20241215Cancer.csv', index_col=0)
basedf = basedf.dropna(subset=['試験等のフェーズ'])
# Target列を分割してTargetWord列を追加
basedf['TargetWord'] = basedf['TargetEnglish'].apply(split_target_English)
# 各行のTargetWord列に処理を適用
# NaN や None の場合の処理を追加
basedf['TargetWord'] = basedf['TargetWord'].apply(lambda x: processor(x) if isinstance(x, list) else x)

In [79]:
basedf.dropna(subset=['TargetEnglish'])

Unnamed: 0,JRCT ID,NCT No,JapicCTI No,Title,TargetJ,Target,TargetEnglish,研究・治験の目的,試験等のフェーズ,試験の種類,...,purpose,Inclusion Criteria,Exclusion Criteria,Age Minimum,Age Maximum,Gender,Discontinuation Criteria,Keyword,Intervention(s),TargetWord
0,jRCT2031240552,NCT06599502,,KRAS G12D 変異陽性がん患者を対象としてAZD0022 の単剤療法及び抗がん剤との併...,以下の進行固形がん患者\n・非小細胞肺癌\n・膵管腺癌\n・結腸・直腸癌,以下の進行固形がん患者 ・非小細胞肺癌 ・膵管腺癌 ・結腸・直腸癌,Non-Small Cell Lung Cancer (NSCLC) Pancreatic ...,Treatment,1-2,,...,treatment purpose,"1. Participant must be 18 years or more, or th...",1. Any significant laboratory finding or any s...,18age old over,No limit,Both,,,AZD0022,[Non-Small Cell Lung Cancer (Nsclc) Pancreatic...
3,jRCT2031240547,,,FGFR2b陽性切除不能進行・再発の胃または食道胃接合部腺癌を対象としたBemarituzu...,切除不能進行・再発の胃または食道胃接合部腺癌,切除不能進行・再発の胃または食道胃接合部腺癌,Advanced Gastric or Gastroesophageal Junction ...,フッ化ピリミジン系薬剤に不応または不耐となったFGFR2b陽性の切除不能進行・再発の胃腺癌又...,2,,...,treatment purpose,1) Histologically documented gastric or GEJ ad...,1) Prior treatment with taxanes 2) Prior treat...,18age old over,No limit,Both,,,"- Bemarituzumab (15mg/kg, intravenous, Day1, 1...","[Advanced Gastric, Gastroesophageal Junction C..."
5,jRCT2031240542,,,自家造血幹細胞移植非適応の初発多発性骨髄腫患者を対象としてベランタマブ マホドチンとレナリド...,多発性骨髄腫,多発性骨髄腫,Multiple Myeloma,自家造血幹細胞移植非適応の初発多発性骨髄腫を対象にGSK2857916、レナリドミド 及び ...,3,,...,treatment purpose,1. Is at least 18 or the legal age of consent ...,Participants are excluded from the study if an...,18age old over,No limit,Both,,,Arm A: Belantamab Mafodotin is intraveniously ...,[Multiple Myeloma]
7,jRCT2031240536,NCT06003231,,既治療の局所進行切除不能又は転移性HER2発現固形癌を有する成人患者を対象としたDisita...,頭頸部扁平上皮癌、非小細胞肺癌、卵巣癌、子宮内膜癌,頭頸部扁平上皮癌、非小細胞肺癌、卵巣癌、子宮内膜癌,Head and neck squamous cell carcinoma/Non-smal...,既治療の局所進行切除不能又は転移性(LA/m)ヒト上皮増殖因子受容体2型(HER2)発現固形...,2,,...,treatment purpose,Cohort 1: Head and neck squamous cell carcinom...,- Prior treatment with a monomethyl auristatin...,18age old over,No limit,Both,,,Experimental Arm - Disitamab vedotin 1.5 mg/kg...,"[Head, Neck Squamous Cell Carcinoma/Non-Small ..."
9,jRCT2031240530,,,HER2遺伝子増幅あるいはHER2遺伝子変異を有するII-III期非小細胞肺癌に対するTra...,HER2遺伝子増幅あるいはHER2遺伝子変異を有するII/IIIA/IIIB(T3-4N2)...,HER2遺伝子増幅あるいはHER2遺伝子変異を有するII/IIIA/IIIB(T3-4N2)...,Stage II-III HER2-Amplified or HER2-Mutant Non...,HER2遺伝子増幅あるいはHER2遺伝子変異を有するII/IIIA/IIIB(T3-4N2)...,2,,...,treatment purpose,1. Signed informed consent form 2. Age >= 18 y...,1. NSCLC that is clinically T4 by virtue of me...,18age old over,No limit,Both,,,The study drug will be administered as an IV i...,"[Stage Ii-Iii Her2-Amplified, Her2-Mutant Non-..."
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
729,jRCT2031200057,,,治療抵抗性乳がんを対象としたTDM-812の腫瘍内投与法の安全性評価を目的とした第I相試験,乳がん,乳がん,Breast cancer,TDM-812の腫瘍内投与の安全性および忍容性を評価し、今後の臨床評価に用いる腫瘍内投与にお...,1,,...,treatment purpose,<Disease Characteristics> 1)Patients with inop...,1)Patients correspond to either of the followi...,20age old over,No limit,Female,,Breast cancer,"For the target tumor, the dose settled at each...",[Breast Cancer]
739,jRCT2073200004,,,切除不能膵癌を対象とした腫瘍溶解性ウイルスの腫瘍内局所投与の臨床第Ⅰ/Ⅱ相試験,膵癌,膵癌,Pancreatic cancer,切除不能膵癌患者に対するSurv.m-CRA-1の腫瘍内投与の安全性及び有効性を検討する。,1-2,,...,treatment purpose,The patients with pancreatic tumors must meet ...,1) Patients with the following complications: ...,20age old over,No limit,Both,,"Pancreatic ductal carcinoma, unresectable",Intratumoral administration of oncolytic virus,[Pancreatic Cancer]
760,jRCT2033190086,,,悪性黒色腫患者を対象としたインターロイキン12発現型遺伝子組換え単純ヘルペスウイルス１型の第...,悪性黒色腫,悪性黒色腫,malignant melanoma,第I相パート:進行期の悪性黒色腫患者を対象として、ヒトIL-12発現型遺伝子組換え単純ヘルペ...,1-2,,...,treatment purpose,(Phase 1) 1) Histologically confirmed malignan...,(Phase 1 and 2) 1) Patients who have brain and...,20age old over,No limit,Both,,"malignamt melanoma, advanced stage",Test drug (T-hIL12) will be administered into ...,[Malignant Melanoma]
762,jRCT2031190072,,,切除不能な進行肝細胞癌患者を対象としたCYT001の第I相臨床試験,肝細胞癌,肝細胞癌,Hepatocellular carcinoma,切除不能な進行肝細胞癌患者を対象としたCYT001の第I相臨床試験,1,,...,treatment purpose,1) Histologically or cytologically confirmed h...,1) CNS metastases that are symptomatic or requ...,20age 0month 0week old over,No limit,Both,,Cancer peptide vaccine,Reconstitute two kinds of cancer peptide vacci...,[Hepatocellular Carcinoma]


In [80]:
print(basedf[basedf['JRCT ID'] == 'jRCT2031230090']['TargetWord'])
print(basedf[basedf['JRCT ID'] == 'jRCT2031230090']['TargetEnglish'])
print(basedf[basedf['JRCT ID'] == 'jRCT2031230090']['Exclusion Criteria'])

391    [Relapsed/Refract, Y Locally Advanced, Metasta...
Name: TargetWord, dtype: object
391    Relapsed/Refractory Locally Advanced or Metast...
Name: TargetEnglish, dtype: object
391    Dose Escalation and Expansion Cohorts 1. Women...
Name: Exclusion Criteria, dtype: object


In [81]:
# 値を直接取得して表示
print(basedf.loc[basedf['JRCT ID'] == 'jRCT2031230090', 'TargetWord'].values[0])
print(basedf.loc[basedf['JRCT ID'] == 'jRCT2031230090', 'TargetEnglish'].values[0])
print(basedf.loc[basedf['JRCT ID'] == 'jRCT2031230090', 'Exclusion Criteria'].values[0])


['Relapsed/Refract', 'Y Locally Advanced', 'Metastatic Breast Cancer', 'Triple Negative', 'Breast Cancer']
Relapsed/Refractory Locally Advanced or Metastatic Breast Cancer and Triple Negative Breast Cancer
Dose Escalation and Expansion Cohorts 1. Women who are pregnant or lactating. Women of child-bearing potential (WOCBP) not using adequate birth control see Appendix H: Forms of contraception. 2. Patients with known central nervous system (CNS) or leptomeningeal metastases not controlled by prior surgery, radiotherapy or requiring corticosteroids to control symptoms, or patients with symptoms suggesting CNS involvement for which treatment is required. 3. Patients with primary brain tumors. 4. Patients with any hematologic malignancy. This includes leukemia (any form), lymphoma, and multiple myeloma. 5. Patients with any of the following hematologic abnormalities at baseline. (Patients may have received a red blood cell product transfusion prior to study, if clinically warranted.): -Ab

In [82]:
# モデルのロード
model = SentenceTransformer('pritamdeka/S-PubMedBert-MS-MARCO')



In [89]:
# クエリ
#query = "Triple negative breast cancer"
query = "breast cancer"
#query = "glioma"
threshold = 0.925
# クエリをベクトル化
query_vec = model.encode(query, convert_to_tensor=True)


In [90]:
# ターゲットリスト全体を処理
matched_indices = []
target_vecs_list = []
cosine_scores_list = []
for idx, target_words in enumerate(basedf['TargetWord']):
    # ターゲット内の各単語をベクトル化
    target_vecs = model.encode(target_words, convert_to_tensor=True)
    # コサイン類似度を計算
    cosine_scores = util.cos_sim(query_vec, target_vecs).squeeze()
    target_vecs_list.append(target_vecs)
    cosine_scores_list.append(cosine_scores)
    # 閾値を超えるか確認
    if (cosine_scores >= threshold).any():  # いずれかが閾値を超えている場合
        matched_indices.append(idx)

# 抽出
matched_df = basedf.iloc[matched_indices]
matched_df

Unnamed: 0,JRCT ID,NCT No,JapicCTI No,Title,TargetJ,Target,TargetEnglish,研究・治験の目的,試験等のフェーズ,試験の種類,...,purpose,Inclusion Criteria,Exclusion Criteria,Age Minimum,Age Maximum,Gender,Discontinuation Criteria,Keyword,Intervention(s),TargetWord
31,jRCT2061240081,NCT06393374,,術前薬物療法後の手術時に病理学的完全奏効を達成していないトリプルネガティブ乳癌患者を対象に、...,トリプルネガティブ乳癌,トリプルネガティブ乳癌,Triple negative breast cancer,術前薬物療法後の手術時に病理学的完全奏効を達成していないトリプルネガティブ乳癌患者を対象に、...,3,,...,treatment purpose,"-Has centrally confirmed TNBC, as defined by t...",-Has a known germline breast cancer gene (BRCA...,18age old over,No limit,Both,,,-Arm 1: MK-2870 4mg/kg intravenous (IV) every ...,"[Triple Negative, Breast Cancer]"
157,jRCT2052240059,,,遺伝子HSD17B4高メチル化を有するHER2陽性ER陰性乳癌における非手術療法の有用性を評...,乳がん,乳がん,Breast cancer,HSD17B4高メチル化(HSD17B4 hypermethylation:HH)を有するH...,2,,...,diagnostic purpose,1. Histologically confirmed invasive breast ca...,1. History of other malignancy within the last...,20age old over,No limit,Female,,,Omitting breast surgery after preoperative che...,[Breast Cancer]
170,jRCT2031240096,NCT06380751,,本治験の主要目的は、BRCA1、BRCA2 又は PALB2 変異を有し、HR 陽性、HER...,進行乳癌,進行乳癌,Advanced Breast Cancer,Treatment,3,,...,treatment purpose,"- Adult females, pre/peri-menopausal and/or po...",- Participants with history of MDS/AML or with...,18age old over,No limit,Both,,,Experimental: Arm 1: saruparib (AZD5305) plus ...,[Advanced Breast Cancer]
215,jRCT2031230723,NCT06112379,,未治療のトリプルネガティブ又はホルモン受容体低発現／HER2陰性乳癌の成人患者を対象として、...,乳癌,乳癌,Breast Cancer,未治療のトリプルネガティブ又はホルモン受容体低発現/HER2陰性乳癌の成人患者を対象として、...,3,,...,treatment purpose,1. Participant must be >= 18 years at the time...,"1. As judged by the investigator, any evidence...",18age old over,No limit,Both,,,- Experimental arm: Dato-DXd plus durvalumab n...,[Breast Cancer]
240,jRCT2061230102,NCT06103864,,Programmed death-ligand（PD-L1）陽性の局所再発手術不能又は転移性...,乳癌,乳癌,Breast Cancer,PD-L1陽性の局所再発手術不能または転移性TNBC患者を対象に、デュルバルマブ併用または非...,3,,...,treatment purpose,Histologically or cytologically documented loc...,"As judged by investigator, severe or uncontrol...",18age old over,No limit,Both,,,Arm 1: Dato-DXd + durvalumab Arm 2: Investigat...,[Breast Cancer]
307,jRCT2061230074,NCT05952557,,根治的局所治療（化学療法の併用または非併用）を受けて疾患の兆候のない、再発リスクが中間～高リ...,乳がん、早期乳がん,乳がん、早期乳がん,"Breast Cancer, Early Breast Cancer",Treatment,3,,...,treatment purpose,- Women and Men; 18 years or more at the time ...,- Inoperable locally advanced or metastatic br...,18age old over,No limit,,,,arm A: continue with SoC ET as directed by inv...,"[Breast Cancer, Early Breast Cancer]"
351,jRCT2031230279,NCT05862285,,GENENTECH社及び／又はF.HOFFMANN-LA ROCHE LTDが依頼した試験に...,癌,癌,Cancer,"本継続投与試験の目的は,親治験から移行する時点でまだ治験治療を受けており,その地域でその治療...",3,,...,treatment purpose,- Eligible for continuing Roche IMP-based ther...,- Meet any of the study treatment discontinuat...,18age old over,No limit,Both,,,Ipatasertib: Ipatasertib will be administered ...,[Cancer]
388,jRCT2031230109,NCT05514054,,EMBER-4：2～5年間の術後内分泌療法による前治療歴を有する再発高リスクのER+、HER...,乳癌,乳癌,Breast Neoplasms,早期乳癌患者を対象としたimlunestrantと標準的な内分泌療法の比較試験,3,,...,treatment purpose,"-Have a diagnosis of ER+, HER2- early-stage, r...",-Have any evidence of metastatic disease (incl...,18age old over,No limit,Both,,,-Drug: Imlunestrant Administered orally. Other...,[Breast Neoplasms]
391,jRCT2031230090,,,MELK阻害剤OTS167POにおける転移性・進行性乳がん患者を対象とした安全性、忍容性およ...,再発・難治性の局所進行性・転移性乳がん及びトリプルネガティブ乳がん,再発・難治性の局所進行性・転移性乳がん及びトリプルネガティブ乳がん,Relapsed/Refractory Locally Advanced or Metast...,再発・難治性の局所進行性または転移性乳がん患者に対して、OTS167を経口カプセルで投与する...,1,,...,treatment purpose,Dose Escalation and Dose Expansion Cohorts 1. ...,Dose Escalation and Expansion Cohorts 1. Women...,18age old over,No limit,Female,,,This is a Phase I dose escalation/expansion mu...,"[Relapsed/Refract, Y Locally Advanced, Metasta..."
392,jRCT2031230096,NCT05774951,,根治的局所領域療法（化学療法の併用または非併用）および標準補助内分泌療法（ET）を少なくとも...,乳がん、早期乳がん,乳がん、早期乳がん,"Breast Cancer, Early Breast Cancer",Treatment,3,,...,treatment purpose,"- Women and Men, greater than or equal to 18 y...",- Inoperable locally advanced or metastatic br...,18age old over,No limit,Both,,,arm A: continue with SoC ET as directed by inv...,"[Breast Cancer, Early Breast Cancer]"


In [92]:
cosine_scores_list [matched_indices]

TypeError: list indices must be integers or slices, not list

In [86]:
# ターゲットリスト全体を処理
matched_indices = []
target_vecs_list = []
cosine_scores_list = []
for idx, target_words in enumerate(basedf['TargetWord']):
    # ターゲット内の各単語をベクトル化
    target_vecs = model.encode(target_words, convert_to_tensor=True)
    # コサイン類似度を計算
    cosine_scores = util.cos_sim(query_vec, target_vecs).squeeze()
    target_vecs_list.append(target_vecs)
    cosine_scores_list.append(cosine_scores)
    # 閾値を超えるか確認
    if (cosine_scores >= threshold).any():  # いずれかが閾値を超えている場合
        matched_indices.append(idx)

# 抽出
matched_df = basedf.iloc[matched_indices]
matched_df

Unnamed: 0,JRCT ID,NCT No,JapicCTI No,Title,TargetJ,Target,TargetEnglish,研究・治験の目的,試験等のフェーズ,試験の種類,...,purpose,Inclusion Criteria,Exclusion Criteria,Age Minimum,Age Maximum,Gender,Discontinuation Criteria,Keyword,Intervention(s),TargetWord
31,jRCT2061240081,NCT06393374,,術前薬物療法後の手術時に病理学的完全奏効を達成していないトリプルネガティブ乳癌患者を対象に、...,トリプルネガティブ乳癌,トリプルネガティブ乳癌,Triple negative breast cancer,術前薬物療法後の手術時に病理学的完全奏効を達成していないトリプルネガティブ乳癌患者を対象に、...,3,,...,treatment purpose,"-Has centrally confirmed TNBC, as defined by t...",-Has a known germline breast cancer gene (BRCA...,18age old over,No limit,Both,,,-Arm 1: MK-2870 4mg/kg intravenous (IV) every ...,"[Triple Negative, Breast Cancer]"
157,jRCT2052240059,,,遺伝子HSD17B4高メチル化を有するHER2陽性ER陰性乳癌における非手術療法の有用性を評...,乳がん,乳がん,Breast cancer,HSD17B4高メチル化(HSD17B4 hypermethylation:HH)を有するH...,2,,...,diagnostic purpose,1. Histologically confirmed invasive breast ca...,1. History of other malignancy within the last...,20age old over,No limit,Female,,,Omitting breast surgery after preoperative che...,[Breast Cancer]
215,jRCT2031230723,NCT06112379,,未治療のトリプルネガティブ又はホルモン受容体低発現／HER2陰性乳癌の成人患者を対象として、...,乳癌,乳癌,Breast Cancer,未治療のトリプルネガティブ又はホルモン受容体低発現/HER2陰性乳癌の成人患者を対象として、...,3,,...,treatment purpose,1. Participant must be >= 18 years at the time...,"1. As judged by the investigator, any evidence...",18age old over,No limit,Both,,,- Experimental arm: Dato-DXd plus durvalumab n...,[Breast Cancer]
240,jRCT2061230102,NCT06103864,,Programmed death-ligand（PD-L1）陽性の局所再発手術不能又は転移性...,乳癌,乳癌,Breast Cancer,PD-L1陽性の局所再発手術不能または転移性TNBC患者を対象に、デュルバルマブ併用または非...,3,,...,treatment purpose,Histologically or cytologically documented loc...,"As judged by investigator, severe or uncontrol...",18age old over,No limit,Both,,,Arm 1: Dato-DXd + durvalumab Arm 2: Investigat...,[Breast Cancer]
307,jRCT2061230074,NCT05952557,,根治的局所治療（化学療法の併用または非併用）を受けて疾患の兆候のない、再発リスクが中間～高リ...,乳がん、早期乳がん,乳がん、早期乳がん,"Breast Cancer, Early Breast Cancer",Treatment,3,,...,treatment purpose,- Women and Men; 18 years or more at the time ...,- Inoperable locally advanced or metastatic br...,18age old over,No limit,,,,arm A: continue with SoC ET as directed by inv...,"[Breast Cancer, Early Breast Cancer]"
388,jRCT2031230109,NCT05514054,,EMBER-4：2～5年間の術後内分泌療法による前治療歴を有する再発高リスクのER+、HER...,乳癌,乳癌,Breast Neoplasms,早期乳癌患者を対象としたimlunestrantと標準的な内分泌療法の比較試験,3,,...,treatment purpose,"-Have a diagnosis of ER+, HER2- early-stage, r...",-Have any evidence of metastatic disease (incl...,18age old over,No limit,Both,,,-Drug: Imlunestrant Administered orally. Other...,[Breast Neoplasms]
391,jRCT2031230090,,,MELK阻害剤OTS167POにおける転移性・進行性乳がん患者を対象とした安全性、忍容性およ...,再発・難治性の局所進行性・転移性乳がん及びトリプルネガティブ乳がん,再発・難治性の局所進行性・転移性乳がん及びトリプルネガティブ乳がん,Relapsed/Refractory Locally Advanced or Metast...,再発・難治性の局所進行性または転移性乳がん患者に対して、OTS167を経口カプセルで投与する...,1,,...,treatment purpose,Dose Escalation and Dose Expansion Cohorts 1. ...,Dose Escalation and Expansion Cohorts 1. Women...,18age old over,No limit,Female,,,This is a Phase I dose escalation/expansion mu...,"[Relapsed/Refract, Y Locally Advanced, Metasta..."
392,jRCT2031230096,NCT05774951,,根治的局所領域療法（化学療法の併用または非併用）および標準補助内分泌療法（ET）を少なくとも...,乳がん、早期乳がん,乳がん、早期乳がん,"Breast Cancer, Early Breast Cancer",Treatment,3,,...,treatment purpose,"- Women and Men, greater than or equal to 18 y...",- Inoperable locally advanced or metastatic br...,18age old over,No limit,Both,,,arm A: continue with SoC ET as directed by inv...,"[Breast Cancer, Early Breast Cancer]"
402,jRCT2061230009,NCT05485766,,gBRCA1/2遺伝子変異を有するトリプルネガティブ原発乳がんに対するプラチナ製剤、PARP...,トリプルネガティブ乳がん,トリプルネガティブ乳がん,Triple Negative Breast Neoplasms,術前療法としてgBRCA変異陽性手術可能または局所進行TNBCに対してペムブロリズマブ+パク...,2,,...,treatment purpose,1)Male/female subjects who are at least 18 yea...,1) Subjects who has a positive urine pregnancy...,18age old exceed,No limit,Both,,"Triple Negative Breast Cancer, Breast Neoplasm...","Drug: Pembrolizumab 200 mg fixed dose, IV, eve...","[Triple Negative, Breast Neoplasms]"
463,jRCT2061220087,NCT05629585,,術前薬物療法後の外科的切除時に乳房及び／又は腋窩リンパ節に浸潤性残存病変を有するステージI～...,乳癌,乳癌,Breast Cancer,術前薬物療法後の外科的切除時に乳房および/または腋窩リンパ節に浸潤性残存病変を有するI~II...,3,,...,treatment purpose,Participant must be >= 18 years at the time of...,Stage IV (metastatic) TNBC. History of prior i...,18age old over,130age old under,Both,,,Arm 1: Dato-DXd 6 mg/kg IV Q3W x 8 cycles + Du...,[Breast Cancer]


In [14]:
# 全データのターゲット列をベクトル化
target_list = basedf['TargetEnglish'].tolist()
target_vecs = model.encode(target_list, convert_to_tensor=True)
# コサイン類似度を計算
cosine_scores = util.cos_sim(query_vec, target_vecs).squeeze()

In [15]:
matched_indices_d = (cosine_scores >= threshold).nonzero().tolist()
# 入れ子リストをフラットなリストに変換
flat_indices_d = [idx[0] for idx in matched_indices_d]

# 抽出
matched_df_d = basedf.iloc[flat_indices_d]
matched_df_d


Unnamed: 0,JRCT ID,NCT No,JapicCTI No,Title,TargetJ,Target,TargetEnglish,研究・治験の目的,試験等のフェーズ,試験の種類,...,purpose,Inclusion Criteria,Exclusion Criteria,Age Minimum,Age Maximum,Gender,Discontinuation Criteria,Keyword,Intervention(s),TargetWord
157,jRCT2052240059,,,遺伝子HSD17B4高メチル化を有するHER2陽性ER陰性乳癌における非手術療法の有用性を評...,乳がん,乳がん,Breast cancer,HSD17B4高メチル化(HSD17B4 hypermethylation:HH)を有するH...,2,,...,diagnostic purpose,1. Histologically confirmed invasive breast ca...,1. History of other malignancy within the last...,20age old over,No limit,Female,,,Omitting breast surgery after preoperative che...,[Breast cancer]
170,jRCT2031240096,NCT06380751,,本治験の主要目的は、BRCA1、BRCA2 又は PALB2 変異を有し、HR 陽性、HER...,進行乳癌,進行乳癌,Advanced Breast Cancer,Treatment,3,,...,treatment purpose,"- Adult females, pre/peri-menopausal and/or po...",- Participants with history of MDS/AML or with...,18age old over,No limit,Both,,,Experimental: Arm 1: saruparib (AZD5305) plus ...,[Advanced Breast Cancer]
215,jRCT2031230723,NCT06112379,,未治療のトリプルネガティブ又はホルモン受容体低発現／HER2陰性乳癌の成人患者を対象として、...,乳癌,乳癌,Breast Cancer,未治療のトリプルネガティブ又はホルモン受容体低発現/HER2陰性乳癌の成人患者を対象として、...,3,,...,treatment purpose,1. Participant must be >= 18 years at the time...,"1. As judged by the investigator, any evidence...",18age old over,No limit,Both,,,- Experimental arm: Dato-DXd plus durvalumab n...,[Breast Cancer]
240,jRCT2061230102,NCT06103864,,Programmed death-ligand（PD-L1）陽性の局所再発手術不能又は転移性...,乳癌,乳癌,Breast Cancer,PD-L1陽性の局所再発手術不能または転移性TNBC患者を対象に、デュルバルマブ併用または非...,3,,...,treatment purpose,Histologically or cytologically documented loc...,"As judged by investigator, severe or uncontrol...",18age old over,No limit,Both,,,Arm 1: Dato-DXd + durvalumab Arm 2: Investigat...,[Breast Cancer]
307,jRCT2061230074,NCT05952557,,根治的局所治療（化学療法の併用または非併用）を受けて疾患の兆候のない、再発リスクが中間～高リ...,乳がん、早期乳がん,乳がん、早期乳がん,"Breast Cancer, Early Breast Cancer",Treatment,3,,...,treatment purpose,- Women and Men; 18 years or more at the time ...,- Inoperable locally advanced or metastatic br...,18age old over,No limit,,,,arm A: continue with SoC ET as directed by inv...,"[Breast Cancer, Early Breast Cancer]"
342,jRCT2051230094,NCT05794906,,前立腺癌の高リスク生化学的再発（BCR）患者を対象としたアンドロゲン遮断療法（ADT）を併用...,前立腺癌,前立腺癌,prostate cancer,BAY 1841788(darolutamide)とADTの24ヵ月間併用投与によって、プラ...,3,,...,treatment purpose,- Histologically or cytologically confirmed ad...,- Pathological finding consistent with small c...,18age old over,No limit,Male,,,"Drug: Darolutamide (BAY1841788, Nubeqa) Coated...",[prostate cancer]
351,jRCT2031230279,NCT05862285,,GENENTECH社及び／又はF.HOFFMANN-LA ROCHE LTDが依頼した試験に...,癌,癌,Cancer,"本継続投与試験の目的は,親治験から移行する時点でまだ治験治療を受けており,その地域でその治療...",3,,...,treatment purpose,- Eligible for continuing Roche IMP-based ther...,- Meet any of the study treatment discontinuat...,18age old over,No limit,Both,,,Ipatasertib: Ipatasertib will be administered ...,[Cancer]
388,jRCT2031230109,NCT05514054,,EMBER-4：2～5年間の術後内分泌療法による前治療歴を有する再発高リスクのER+、HER...,乳癌,乳癌,Breast Neoplasms,早期乳癌患者を対象としたimlunestrantと標準的な内分泌療法の比較試験,3,,...,treatment purpose,"-Have a diagnosis of ER+, HER2- early-stage, r...",-Have any evidence of metastatic disease (incl...,18age old over,No limit,Both,,,-Drug: Imlunestrant Administered orally. Other...,[Breast Neoplasms]
392,jRCT2031230096,NCT05774951,,根治的局所領域療法（化学療法の併用または非併用）および標準補助内分泌療法（ET）を少なくとも...,乳がん、早期乳がん,乳がん、早期乳がん,"Breast Cancer, Early Breast Cancer",Treatment,3,,...,treatment purpose,"- Women and Men, greater than or equal to 18 y...",- Inoperable locally advanced or metastatic br...,18age old over,No limit,Both,,,arm A: continue with SoC ET as directed by inv...,"[Breast Cancer, Early Breast Cancer]"
463,jRCT2061220087,NCT05629585,,術前薬物療法後の外科的切除時に乳房及び／又は腋窩リンパ節に浸潤性残存病変を有するステージI～...,乳癌,乳癌,Breast Cancer,術前薬物療法後の外科的切除時に乳房および/または腋窩リンパ節に浸潤性残存病変を有するI~II...,3,,...,treatment purpose,Participant must be >= 18 years at the time of...,Stage IV (metastatic) TNBC. History of prior i...,18age old over,130age old under,Both,,,Arm 1: Dato-DXd 6 mg/kg IV Q3W x 8 cycles + Du...,[Breast Cancer]
