{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "from selenium import webdriver\n", "from selenium.webdriver.common.by import By\n", "from selenium.webdriver.support.ui import WebDriverWait\n", "from selenium.webdriver.support import expected_conditions as EC\n", "import csv\n", "\n", "\n", "from selenium import webdriver\n", "from selenium.webdriver.common.by import By\n", "from selenium.webdriver.support.ui import WebDriverWait\n", "from selenium.webdriver.support import expected_conditions as EC\n", "import csv\n", "\n", "from selenium.common.exceptions import ElementClickInterceptedException, TimeoutException\n", "\n", "import pandas as pd\n", "import requests\n", "from bs4 import BeautifulSoup\n", "import time" ] }, { "cell_type": "code", "execution_count": 12, "metadata": {}, "outputs": [], "source": [ "def scrape_jrct_all_details(url):\n", " \"\"\"\n", " 指定されたjRCT URLから必要なすべての情報を抽出します。\n", " \"\"\"\n", " # リクエストを送信\n", " headers = {\n", " \"User-Agent\": \"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36\"\n", " }\n", " try:\n", " response = requests.get(url, headers=headers, timeout=10)\n", " response.raise_for_status()\n", " except requests.RequestException as e:\n", " print(f\"URLリクエストに失敗しました: {url} - エラー: {e}\")\n", " return {\"URL\": url, \"エラー\": \"リクエスト失敗\"}\n", "\n", " soup = BeautifulSoup(response.text, 'html.parser')\n", "\n", " # データ格納用辞書\n", " data = {\"URL\": url}\n", "\n", " def extract_label_data(label_text, label_en=None):\n", " \"\"\"\n", " 特定のラベルに対応するデータを抽出するヘルパー関数\n", " \"\"\"\n", " label = soup.find('label', string=lambda text: text and label_text in text)\n", " if label:\n", " td_jp = label.find_parent('th').find_next_sibling('td')\n", " td_en = td_jp.find_next_sibling('td') if td_jp and label_en else None\n", " return (\n", " td_jp.text.strip() if td_jp else None,\n", " td_en.text.strip() if td_en else None\n", " )\n", " return None, None\n", "\n", " # \"研究・治験の目的\" を抽出\n", " data[\"研究・治験の目的\"], _ = extract_label_data(\"研究・治験の目的\")\n", "\n", " # 試験デザイン情報(日本語と英語)を抽出\n", " design_labels = [\n", " ('試験等のフェーズ', 'Phase'),\n", " ('試験の種類', 'Study Type'),\n", " ('無作為化', 'allocation'),\n", " ('盲検化', 'masking'),\n", " ('対照', 'control'),\n", " ('割付け', 'assignment'),\n", " ('研究目的', 'purpose')\n", " ]\n", " for label_jp, label_en in design_labels:\n", " jp, en = extract_label_data(label_jp, label_en)\n", " data[label_jp] = jp\n", " data[label_en] = en\n", "\n", " # その他の情報を抽出\n", " details_labels = [\n", " ('主たる選択基準', 'Inclusion Criteria'),\n", " ('主たる除外基準', 'Exclusion Criteria'),\n", " ('年齢下限', 'Age Minimum'),\n", " ('年齢上限', 'Age Maximum'),\n", " ('性別', 'Gender'),\n", " ('中止基準', 'Discontinuation Criteria'),\n", " ('対象疾患キーワード', 'Keyword'),\n", " ('介入の内容', 'Intervention(s)')\n", " ]\n", " for label_jp, label_en in details_labels:\n", " jp, en = extract_label_data(label_jp, label_en)\n", " data[label_jp] = jp\n", " data[label_en] = en\n", "\n", " # \"他の臨床研究登録機関への登録\" を探索\n", " other_registries_section = soup.find(\"div\", id=\"area-toggle-07-02\")\n", " japic_no_list = []\n", " nct_no_list = []\n", "\n", " if other_registries_section:\n", " rows = other_registries_section.find_all(\"tr\")\n", " for row in rows:\n", " label = row.find(\"label\")\n", " if label and (\"ID番号\" in label.text or \"研究番号\" in label.text):\n", " value_td = row.find(\"td\")\n", " if value_td:\n", " id_number = value_td.text.strip()\n", " if id_number.startswith(\"JapicCTI\"):\n", " japic_no_list.append(id_number)\n", " elif id_number.startswith(\"NCT\"):\n", " nct_no_list.append(id_number)\n", "\n", " # JapicCTI No と NCT No を格納(複数あればカンマ区切り)\n", " data[\"JapicCTI No\"] = \", \".join(japic_no_list) if japic_no_list else None\n", " data[\"NCT No\"] = \", \".join(nct_no_list) if nct_no_list else None\n", "\n", " # サーバーへの負荷を避けるためのスリープ\n", " time.sleep(1) # 必要に応じて調整\n", "\n", " return data\n" ] }, { "cell_type": "code", "execution_count": 13, "metadata": {}, "outputs": [], "source": [ "url = \"https://jrct.niph.go.jp/latest-detail/jRCT2031200246\"\n", "data = scrape_jrct_all_details(url)" ] }, { "cell_type": "code", "execution_count": 14, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "{'URL': 'https://jrct.niph.go.jp/latest-detail/jRCT2031200246',\n", " '研究・治験の目的': '本治験は,非盲検,多施設共同,第1 相試験であり,PF-07248144の安全性,忍容性,PKおよびPDを評価するとともに,PF-07248144を単剤投与またはフルベストラント,レトロゾール+パルボシクリブもしくはPF-07220060+フルベストラントと併用投与したときの臨床的有効性の早期徴候を評価することを目的とする。',\n", " '試験等のフェーズ': '1',\n", " 'Phase': None,\n", " '試験の種類': None,\n", " 'Study Type': None,\n", " '無作為化': '単一群',\n", " 'allocation': 'single arm study',\n", " '盲検化': '非盲検',\n", " 'masking': 'open(masking not used)',\n", " '対照': '非対照',\n", " 'control': 'uncontrolled control',\n", " '割付け': '単群比較',\n", " 'assignment': 'single assignment',\n", " '研究目的': '治療',\n", " 'purpose': 'treatment purpose',\n", " '主たる選択基準': '対象疾患 - 乳癌, 前立腺癌, 肺癌\\r\\n◦パート1A(単剤投与用量漸増パート):組織学的または細胞学的に局所進行または転移性ER 陽性HER2 陰性乳癌,局所進行または転移性CRPC,あるいは局所進行または転移性NSCLC と診断され,標準療法に不耐容もしくは抵抗性,または標準療法が適応とならないない治験参加者\\r\\n◦パート1B,1Cおよび1D(併用投与用量漸増パート):組織学的または細胞学的に局所進行または転移性ER 陽性HER2 陰性乳癌と診断された治験参加者。進行または転移性疾患に対する内分泌療法およびCDK 4/6 阻害薬による前治療を1 種類以上受けた後に疾患進行が認められた治験参加者\\r\\n◦パート2A(ER 陽性HER2 陰性乳癌3L+,単剤療法):組織学的または細胞学的に局所進行または転移性ER 陽性HER2 陰性乳癌と診断された治験参加者。少なくとも1 種類のCDK 4/6 阻害薬による前治療および1種類の内分泌療法による前治療後に疾患進行が認められた治験参加者\\r\\n◦パート2B(ER 陽性HER2 陰性乳癌2~4L,併用療法):組織学的または細胞学的に局所進行または転移性ER 陽性HER2 陰性乳癌と診断された治験参加者。少なくとも1 種類のCDK 4/6 阻害薬による前治療および1種類以上の内分泌療法による前治療後に疾患進行が認められた治験参加者\\r\\n◦パート2D(ER 陽性HER2 陰性乳癌2~4L,併用療法):組織学的または細胞学的に局所進行または転移性ER 陽性HER2 陰性乳癌と診断された治験参加者。少なくとも1 種類以上のCDK 4/6 阻害薬および内分泌療法による前治療後に疾患進行が認められた治験参加者\\r\\n◦各国の基準に従った検査法を用いて直近の腫瘍生検に基づき,ER 陽性腫瘍(染色細胞の1%以上陽性)が確認されている進行または転移性ER 陽性HER2 陰性乳癌を有する治験参加者\\r\\n◦免疫組織化学染色法(IHC)スコア0/1 である場合,またはin situ ハイブリダイゼーション(ISH)法(FISH/CISH/SISH/DISH)によりHER2/CEP17 比が2 未満,かつHER2 コピー数が4 未満である進行または転移性ER 陽性HER2 陰性乳癌を有する治験参加者\\r\\n◦妊娠可能または卵管結紮のみを有する進行または転移性ER 陽性HER2 陰性乳癌を有する女性治験参加者は,承認されたLHRH アゴニスト[ゴセレリン,リュープロリド(本邦においてはリュープロレリン)または同等の薬剤など]治療により,薬剤誘発性の閉経状態にする必要がある\\r\\n◦RECIST 第1.1 版の定義に基づく,放射線照射を受けていない測定可能病変を少なくとも1 つ有する治験参加者\\r\\n◦Eastern Cooperative Oncology Group(ECOG)パフォーマンスステータス(PS)が0 または1 の治験参加者\\r\\n◦18 歳以上の男女。日本では20 歳以上。\\r\\n◦適切な腎機能,肝機能,骨髄昨日を有する治験参加者\\r\\n◦前治療による急性の影響がベースラインまたはCTCAE グレード1 以下にまで回復している治験参加者。ただし,安全性上のリスクとならないと治験責任医師が判断したAE を除く。',\n", " 'Inclusion Criteria': 'Disease Characteristics - Breast, Prostate, and Lung Cancer\\r\\n*Part 1A (Monotherapy Dose Escalation) Histological or cytological diagnosis of locally advanced or metastatic ER+HER2- breast cancer, locally advanced or metastatic CRPC, or locally advanced or metastatic NSCLC that is intolerant or resistant to standard therapy or for which no standard therapy is available.\\r\\n*Part 1B, Part 1C and Part 1D (Combination Dose Escalation) Histological or cytological diagnosis of locally advanced or metastatic ER+HER2- breast cancer. Participants must have progressed after at least 1 prior line of treatment with an endocrine therapy and CDK4/6 inhibitor in the advanced or metastatic setting.\\r\\n*Part 2A (ER+HER2- breast cancer 3L+, monotherapy) Histological or cytological diagnosis of locally advanced or metastatic ER+HER2- breast cancer. Participants must have progressed after at least 1 prior line of CDK4/6 inhibitor and 1 lines of endocrine therapy.\\r\\n*Part 2B (ER+HER2- breast cancer 2L, combination) Histological or cytological diagnosis of locally advanced or metastatic ER+HER2- breast cancer. Participants must have progressed after 1 prior line of CDK4/6 inhibitor and at least 1 line of endocrine therapy.\\r\\n*Part 2D (ER+HER2- breast cancer 2L, combination) Histological or cytological diagnosis of locally advanced or metastatic ER+HER2- breast cancer. Participants must have progressed after at least 1 prior line of CDK4/6 inhibitor and at least 1 line of endocrine therapy.\\r\\n*Participants with ER+HER2- advanced or metastatic breast cancer must have documentation of ER-positive tumor (>=1% positive stained cells) based on most recent tumor biopsy utilizing an assay consistent with local standards.\\r\\n*Participants with ER+HER2- advanced or metastatic breast cancer must have documentation of HER2-negative tumor: HER2-negative tumor is determined as immunohistochemistry score 0/1+ or negative by in situ hybridization (FISH/CISH/SISH/DISH) defined as a HER2/CEP17 ratio <2 or for single probe assessment a HER2 copy number <4.\\r\\n*Female participants with ER+HER2- advanced or metastatic breast cancer considered to be of childbearing potential (or have tubal ligations only) must be willing to undergo medically induced menopause by treatment with the approved LHRH agonist such as goserelin, leuprolide or equivalent agents to induce chemical menopause.\\r\\n*Participants must have at least 1 measurable lesion as defined by RECIST version 1.1 that has not been previously irradiated.\\r\\n*Eastern Cooperative Oncology Group (ECOG) Performance Status PS 0 or 1\\r\\n*Female or male patients aged >= 18 years (Japan >= 20 years).\\r\\n*Adequate renal, liver, and bone marrow function.\\r\\n*Resolved acute effects of any prior therapy to baseline severity or CTCAE Grade 1 except for adverse events (AEs) not constituting a safety risk by investigator judgment.I136',\n", " '主たる除外基準': '・管理不能な腹水が認められる方(腹水をコントロールするための治療は限定的であるが,腹水が認められる治験参加者はすべて治験依頼者のメディカルモニターによる確認が必要である)\\r\\n・組み入れ前3 年以内にその他の活動性の悪性腫瘍を発症した方。ただし,適切に治療された皮膚の基底細胞癌もしくは扁平上皮癌または上皮内癌は除く\\r\\n・本治験組み入れ前3 週間以内に大きな外科的処置,放射線治療または全身療法による抗がん治療を受けた方\\r\\n・骨髄の25%を超える領域に放射線照射を受けた経験がある方\\r\\n・ベースライン時の12 誘導ECG が,安全性または治験結果の解釈に影響を及ぼす可能性[ベースライン時の補正QT(QTc)間隔が470 msec を超える,完全左脚ブロック,急性または発症時期不明の心筋梗塞の徴候,活動性の心筋虚血を示唆するST-T 間隔の変化,2 度または3 度の房室ブロック,重篤な徐脈性不整脈または頻脈性不整脈]がある臨床的に意味のある異常を示す方\\r\\n・抗凝固療法は許容されない。ただし,低分子ヘパリン投与による抗凝固療法は許容される。ビタミンK 拮抗薬または第Xa 因子阻害薬は,治験依頼者との協議により許容される場合もある。\\r\\n・PF-07248144 の有効成分/添加物に対して過敏症または重度のアレルギーの既往または疑いがある方\\r\\n・活動性の炎症性消化管疾患,難治性および未回復の慢性下痢,または胃切除の既往歴やラップバンドの外科手術,またはPF-07248144 錠の消化管吸収を著しく変化させる可能性のある他の消化管疾患を有している方。治療中の胃食道逆流性疾患は許容される。\\r\\n・妊娠中または授乳中の女性',\n", " 'Exclusion Criteria': \"*Unmanageable ascites (limited medical treatment to control ascites is permitted, but all participants with ascites require review by sponsor's medical monitor).\\r\\n*Participants with any other active malignancy within 3 years prior to enrollment, except for adequately treated basal cell or squamous cell skin cancer, or carcinoma in situ.\\r\\n*Major surgery, radiation therapy, or systemic anti-cancer therapy within 3 weeks prior to study entry.\\r\\n*Prior irradiation to >25% of the bone marrow.\\r\\n*ECG clinically relevant abnormalities (eg, QTc >470 msec, complete LBBB, second/third degree AV block, ST elevation or EKG changes suggesting myocardial infarction or active myocardia ischemia).\\r\\n*Therapeutic anticoagulation. However, low molecular weight heparin is allowed. Vitamin K antagonists or factor Xa inhibitors may be allowed following discussion with the Sponsor.\\r\\n*Known or suspected hypersensitivity or severe allergy to active ingredient/excipients of PF-07248144.\\r\\n*Active inflammatory GI disease, refractory and unresolved chronic diarrhea or previous gastric resection, lap band surgery or other GI conditions and surgeries that may significantly alter the absorption of PF-07248144 tablets. Gastroesophageal reflux disease under treatment is allowed.\\r\\n*Pregnant or breastfeeding female participants.\",\n", " '年齢下限': '20歳 以上',\n", " 'Age Minimum': '20age old over',\n", " '年齢上限': '上限なし',\n", " 'Age Maximum': 'No limit',\n", " '性別': '男性・女性',\n", " 'Gender': 'Both',\n", " '中止基準': '',\n", " 'Discontinuation Criteria': None,\n", " '対象疾患キーワード': '',\n", " 'Keyword': '',\n", " '介入の内容': 'Part 1A: PF-07248144 (KAT6阻害剤)\\r\\nPart 1B: PF-07248144 (KAT6阻害剤), フルベストラント (内分泌療法: フェソロデックス)\\r\\nPart 1C: PF-07248144 (KAT6阻害剤), レトロゾール (内分泌療法: フェマーラ), パルボシクリブ(CDK4/6阻害剤: イブランス)\\r\\nPart 1D: PF-07248144 (KAT6阻害剤), PF-07220060 (CDK4阻害剤), フルベストラント (内分泌療法: フェソロデックス)\\r\\nPart 2A: PF-07248144 (KAT6阻害剤)\\r\\nPart 2B: PF-07248144 (KAT6阻害剤), フルベストラント (内分泌療法: フェソロデックス)\\r\\nPart 2D: PF-07248144 (KAT6阻害剤), PF-07220060 (CDK4阻害剤), フルベストラント (内分泌療法: フェソロデックス)',\n", " 'Intervention(s)': 'Part 1A: PF-07248144 (KAT6 Inhibitor)\\r\\nPart 1B: PF-07248144 (KAT6 Inhibitor), Fulvestrant (Endocrine Therapy: Faslodex)\\r\\nPart 1C: PF-07248144 (KAT6 Inhibitor), Letrozole (Endocrine Therapy: Femara), Palbociclib (CDK4/6 Inhibitor: Ibrance)\\r\\nPart 1D: PF-07248144 (KAT6 Inhibitor), PF-07220060 (CDK4 Inhibitor), Fulvestrant (Endocrine Therapy: Faslodex)\\r\\nPart 2A: PF-07248144 (KAT6 Inhibitor)\\r\\nPart 2B: PF-07248144 (KAT6 Inhibitor), Fulvestrant (Endocrine Therapy: Faslodex)\\r\\nPart 2D: PF-07248144 (KAT6 Inhibitor), PF-07220060 (CDK4 Inhibitor), Fulvestrant (Endocrine Therapy: Faslodex)',\n", " 'JapicCTI No': None,\n", " 'NCT No': 'NCT04606446'}" ] }, "execution_count": 14, "metadata": {}, "output_type": "execute_result" } ], "source": [ "data" ] }, { "cell_type": "code", "execution_count": 15, "metadata": {}, "outputs": [], "source": [ "url = \"https://jrct.niph.go.jp/latest-detail/jRCT2080223999\"\n", "data = scrape_jrct_all_details(url)" ] }, { "cell_type": "code", "execution_count": 16, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "{'URL': 'https://jrct.niph.go.jp/latest-detail/jRCT2080223999',\n", " '研究・治験の目的': None,\n", " '試験等のフェーズ': None,\n", " 'Phase': None,\n", " '試験の種類': '介入研究',\n", " 'Study Type': 'Interventional',\n", " '無作為化': None,\n", " 'allocation': None,\n", " '盲検化': None,\n", " 'masking': None,\n", " '対照': None,\n", " 'control': None,\n", " '割付け': None,\n", " 'assignment': None,\n", " '研究目的': None,\n", " 'purpose': None,\n", " '主たる選択基準': None,\n", " 'Inclusion Criteria': None,\n", " '主たる除外基準': None,\n", " 'Exclusion Criteria': None,\n", " '年齢下限': None,\n", " 'Age Minimum': None,\n", " '年齢上限': None,\n", " 'Age Maximum': None,\n", " '性別': '男性・女性',\n", " 'Gender': 'Both',\n", " '中止基準': None,\n", " 'Discontinuation Criteria': None,\n", " '対象疾患キーワード': '',\n", " 'Keyword': '',\n", " '介入の内容': '試験対象薬剤等\\r\\n一般的名称等:BI 907828\\r\\n薬剤・試験薬剤:\\r\\n薬効分類コード:42- 腫瘍用薬\\r\\n用法・用量、使用方法:経口投与',\n", " 'Intervention(s)': 'investigational material(s)\\r\\nGeneric name etc : BI 907828\\r\\nINN of investigational material : -\\r\\nTherapeutic category code : 42 - antitumor agents\\r\\nDosage and Administration for Investigational material : p.o',\n", " 'JapicCTI No': 'JapicCTI-184058',\n", " 'NCT No': 'NCT03449381'}" ] }, "execution_count": 16, "metadata": {}, "output_type": "execute_result" } ], "source": [ "data" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "gradio", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.12.3" } }, "nbformat": 4, "nbformat_minor": 2 }