{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "import requests\n", "from bs4 import BeautifulSoup\n", "import unicodedata\n", "\n", "def normalize_text(text):\n", " if text is None:\n", " return \"\"\n", " text = unicodedata.normalize('NFKC', text)\n", " return \" \".join(text.split())\n", "\n", "url = \"https://jrct.niph.go.jp/latest-detail/jRCT2051240150\"\n", "headers = {\n", " \"User-Agent\": \"Mozilla/5.0\"\n", "}\n", "\n", "try:\n", " response = requests.get(url, headers=headers, timeout=10)\n", " response.raise_for_status()\n", "except requests.RequestException as e:\n", " print(f\"URLリクエストに失敗しました: {url} - エラー: {e}\")\n", " # エラーが出た場合はここで止まります\n", " exit(1)\n", "\n", "# response.text をファイルに保存して確認しても良い\n", "# with open(\"debug_html.html\", \"w\", encoding=\"utf-8\") as f:\n", "# f.write(response.text)\n", "\n", "soup = BeautifulSoup(response.text, 'html.parser')\n" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "-----\n", "RAW: '研究の種別'\n", "NORMALIZED: '研究の種別'\n", "-----\n", "RAW: '治験の区分'\n", "NORMALIZED: '治験の区分'\n", "-----\n", "RAW: '初回公表日'\n", "NORMALIZED: '初回公表日'\n", "-----\n", "RAW: '最終公表日'\n", "NORMALIZED: '最終公表日'\n", "-----\n", "RAW: '中止年月日'\n", "NORMALIZED: '中止年月日'\n", "-----\n", "RAW: '観察期間終了日'\n", "NORMALIZED: '観察期間終了日'\n", "-----\n", "RAW: '研究名称'\n", "NORMALIZED: '研究名称'\n", "-----\n", "RAW: '平易な研究名称'\n", "NORMALIZED: '平易な研究名称'\n", "-----\n", "RAW: '研究責任(代表)医師の氏名'\n", "NORMALIZED: '研究責任(代表)医師の氏名'\n", "-----\n", "RAW: '研究責任(代表)医師の所属機関'\n", "NORMALIZED: '研究責任(代表)医師の所属機関'\n", "-----\n", "RAW: '研究・治験の目的'\n", "NORMALIZED: '研究・治験の目的'\n", "-----\n", "RAW: '試験のフェーズ'\n", "NORMALIZED: '試験のフェーズ'\n", "-----\n", "RAW: '対象疾患名'\n", "NORMALIZED: '対象疾患名'\n", "-----\n", "RAW: '進捗状況'\n", "NORMALIZED: '進捗状況'\n", "-----\n", "RAW: '医薬品等の一般名称'\n", "NORMALIZED: '医薬品等の一般名称'\n", "-----\n", "RAW: '販売名'\n", "NORMALIZED: '販売名'\n", "-----\n", "RAW: '認定委員会の名称'\n", "NORMALIZED: '認定委員会の名称'\n", "-----\n", "RAW: '認定番号'\n", "NORMALIZED: '認定番号'\n", "-----\n", "RAW: ' 試験等の名称 / Scientific Title(Acronym) '\n", "NORMALIZED: '試験等の名称 / Scientific Title(Acronym)'\n", "-----\n", "RAW: ' 平易な試験等の名称 / Public Title(Acronym) '\n", "NORMALIZED: '平易な試験等の名称 / Public Title(Acronym)'\n", "-----\n", "RAW: ' 科学的な内容の問合せ先 Contact for Scientific Queries '\n", "NORMALIZED: '科学的な内容の問合せ先 Contact for Scientific Queries'\n", "-----\n", "RAW: '氏名 / Name'\n", "NORMALIZED: '氏名 / Name'\n", "-----\n", "RAW: ' e-Rad番号 '\n", "NORMALIZED: 'e-Rad番号'\n", "-----\n", "RAW: ' 所属機関(実施医療機関) '\n", "NORMALIZED: '所属機関(実施医療機関)'\n", "-----\n", "RAW: ' Affiliation '\n", "NORMALIZED: 'Affiliation'\n", "-----\n", "RAW: ' 所属部署 '\n", "NORMALIZED: '所属部署'\n", "-----\n", "RAW: ' 所属機関の郵便番号 '\n", "NORMALIZED: '所属機関の郵便番号'\n", "-----\n", "RAW: ' 所属機関の住所'\n", "NORMALIZED: '所属機関の住所'\n", "-----\n", "RAW: ' Address '\n", "NORMALIZED: 'Address'\n", "-----\n", "RAW: ' 電話番号 '\n", "NORMALIZED: '電話番号'\n", "-----\n", "RAW: ' 電子メールアドレス '\n", "NORMALIZED: '電子メールアドレス'\n", "-----\n", "RAW: ' 試験に関する問い合わせ先 Contact for Public Queries '\n", "NORMALIZED: '試験に関する問い合わせ先 Contact for Public Queries'\n", "-----\n", "RAW: ' 担当者氏名 /\\n Name '\n", "NORMALIZED: '担当者氏名 / Name'\n", "-----\n", "RAW: ' 担当者所属機関 / Affiliation '\n", "NORMALIZED: '担当者所属機関 / Affiliation'\n", "-----\n", "RAW: ' 担当者所属部署 '\n", "NORMALIZED: '担当者所属部署'\n", "-----\n", "RAW: ' 担当者所属機関の郵便番号 '\n", "NORMALIZED: '担当者所属機関の郵便番号'\n", "-----\n", "RAW: '\\n 担当者所属機関の住所 /\\n Address '\n", "NORMALIZED: '担当者所属機関の住所 / Address'\n", "-----\n", "RAW: ' 電話番号 '\n", "NORMALIZED: '電話番号'\n", "-----\n", "RAW: ' FAX番号 '\n", "NORMALIZED: 'FAX番号'\n", "-----\n", "RAW: ' 電子メールアドレス '\n", "NORMALIZED: '電子メールアドレス'\n", "-----\n", "RAW: '\\n 実施医療機関の長の氏名 '\n", "NORMALIZED: '実施医療機関の長の氏名'\n", "-----\n", "RAW: '\\n 当該試験等に対する管理者の許可の有無 '\n", "NORMALIZED: '当該試験等に対する管理者の許可の有無'\n", "-----\n", "RAW: '\\n IRBの承認日 '\n", "NORMALIZED: 'IRBの承認日'\n", "-----\n", "RAW: '\\n 救急医療に必要な施設又は設備 '\n", "NORMALIZED: '救急医療に必要な施設又は設備'\n", "-----\n", "RAW: 'データマネジメント担当機関'\n", "NORMALIZED: 'データマネジメント担当機関'\n", "-----\n", "RAW: 'データマネジメント担当責任者'\n", "NORMALIZED: 'データマネジメント担当責任者'\n", "-----\n", "RAW: '氏名'\n", "NORMALIZED: '氏名'\n", "-----\n", "RAW: 'e-Rad番号'\n", "NORMALIZED: 'e-Rad番号'\n", "-----\n", "RAW: '所属'\n", "NORMALIZED: '所属'\n", "-----\n", "RAW: '役職'\n", "NORMALIZED: '役職'\n", "-----\n", "RAW: 'モニタリング担当機関'\n", "NORMALIZED: 'モニタリング担当機関'\n", "-----\n", "RAW: 'モニタリング担当責任者'\n", "NORMALIZED: 'モニタリング担当責任者'\n", "-----\n", "RAW: '氏名'\n", "NORMALIZED: '氏名'\n", "-----\n", "RAW: 'e-Rad番号'\n", "NORMALIZED: 'e-Rad番号'\n", "-----\n", "RAW: '所属'\n", "NORMALIZED: '所属'\n", "-----\n", "RAW: '役職'\n", "NORMALIZED: '役職'\n", "-----\n", "RAW: '監査担当機関'\n", "NORMALIZED: '監査担当機関'\n", "-----\n", "RAW: '監査担当責任者'\n", "NORMALIZED: '監査担当責任者'\n", "-----\n", "RAW: '氏名'\n", "NORMALIZED: '氏名'\n", "-----\n", "RAW: 'e-Rad番号'\n", "NORMALIZED: 'e-Rad番号'\n", "-----\n", "RAW: '所属'\n", "NORMALIZED: '所属'\n", "-----\n", "RAW: '役職'\n", "NORMALIZED: '役職'\n", "-----\n", "RAW: '統計解析担当機関'\n", "NORMALIZED: '統計解析担当機関'\n", "-----\n", "RAW: '統計解析担当責任者'\n", "NORMALIZED: '統計解析担当責任者'\n", "-----\n", "RAW: '氏名'\n", "NORMALIZED: '氏名'\n", "-----\n", "RAW: 'e-Rad番号'\n", "NORMALIZED: 'e-Rad番号'\n", "-----\n", "RAW: '所属'\n", "NORMALIZED: '所属'\n", "-----\n", "RAW: '役職'\n", "NORMALIZED: '役職'\n", "-----\n", "RAW: '研究・開発計画支援担当機関'\n", "NORMALIZED: '研究・開発計画支援担当機関'\n", "-----\n", "RAW: '研究・開発計画支援担当責任者'\n", "NORMALIZED: '研究・開発計画支援担当責任者'\n", "-----\n", "RAW: '氏名'\n", "NORMALIZED: '氏名'\n", "-----\n", "RAW: 'e-Rad番号'\n", "NORMALIZED: 'e-Rad番号'\n", "-----\n", "RAW: '所属'\n", "NORMALIZED: '所属'\n", "-----\n", "RAW: '役職'\n", "NORMALIZED: '役職'\n", "-----\n", "RAW: '調整・管理実務担当機関'\n", "NORMALIZED: '調整・管理実務担当機関'\n", "-----\n", "RAW: '調整・管理実務担当責任者'\n", "NORMALIZED: '調整・管理実務担当責任者'\n", "-----\n", "RAW: '氏名'\n", "NORMALIZED: '氏名'\n", "-----\n", "RAW: 'e-Rad番号'\n", "NORMALIZED: 'e-Rad番号'\n", "-----\n", "RAW: '所属'\n", "NORMALIZED: '所属'\n", "-----\n", "RAW: '役職'\n", "NORMALIZED: '役職'\n", "-----\n", "RAW: 'その他の研究を総括する者'\n", "NORMALIZED: 'その他の研究を総括する者'\n", "-----\n", "RAW: '氏名 / Name'\n", "NORMALIZED: '氏名 / Name'\n", "-----\n", "RAW: 'e-Rad番号'\n", "NORMALIZED: 'e-Rad番号'\n", "-----\n", "RAW: '所属 / Affiliation'\n", "NORMALIZED: '所属 / Affiliation'\n", "-----\n", "RAW: '役職'\n", "NORMALIZED: '役職'\n", "-----\n", "RAW: 'Secondary Sponsor の該当性'\n", "NORMALIZED: 'Secondary Sponsor の該当性'\n", "-----\n", "RAW: '治験責任医師等の連絡先'\n", "NORMALIZED: '治験責任医師等の連絡先'\n", "-----\n", "RAW: '氏名'\n", "NORMALIZED: '氏名'\n", "-----\n", "RAW: 'Name'\n", "NORMALIZED: 'Name'\n", "-----\n", "RAW: 'e-Rad番号'\n", "NORMALIZED: 'e-Rad番号'\n", "-----\n", "RAW: '所属機関(実施医療機関)'\n", "NORMALIZED: '所属機関(実施医療機関)'\n", "-----\n", "RAW: 'Affiliation'\n", "NORMALIZED: 'Affiliation'\n", "-----\n", "RAW: '所属部署'\n", "NORMALIZED: '所属部署'\n", "-----\n", "RAW: '所属部署の郵便番号'\n", "NORMALIZED: '所属部署の郵便番号'\n", "-----\n", "RAW: '所属機関の住所'\n", "NORMALIZED: '所属機関の住所'\n", "-----\n", "RAW: '電話番号'\n", "NORMALIZED: '電話番号'\n", "-----\n", "RAW: '電子メールアドレス'\n", "NORMALIZED: '電子メールアドレス'\n", "-----\n", "RAW: '研究に関する問合わせ先'\n", "NORMALIZED: '研究に関する問合わせ先'\n", "-----\n", "RAW: '担当者氏名'\n", "NORMALIZED: '担当者氏名'\n", "-----\n", "RAW: '担当者所属機関'\n", "NORMALIZED: '担当者所属機関'\n", "-----\n", "RAW: '担当者所属部署'\n", "NORMALIZED: '担当者所属部署'\n", "-----\n", "RAW: '担当者所属機関の郵便番号'\n", "NORMALIZED: '担当者所属機関の郵便番号'\n", "-----\n", "RAW: '担当者所属機関の住所'\n", "NORMALIZED: '担当者所属機関の住所'\n", "-----\n", "RAW: '電話番号'\n", "NORMALIZED: '電話番号'\n", "-----\n", "RAW: 'FAX番号'\n", "NORMALIZED: 'FAX番号'\n", "-----\n", "RAW: '電子メールアドレス'\n", "NORMALIZED: '電子メールアドレス'\n", "-----\n", "RAW: '実施医療機関の長の氏名'\n", "NORMALIZED: '実施医療機関の長の氏名'\n", "-----\n", "RAW: '管理者の許可の有無'\n", "NORMALIZED: '管理者の許可の有無'\n", "-----\n", "RAW: 'IRBの承認日'\n", "NORMALIZED: 'IRBの承認日'\n", "-----\n", "RAW: '救急医療に必要な施設又は設備'\n", "NORMALIZED: '救急医療に必要な施設又は設備'\n", "-----\n", "RAW: '治験責任医師等の連絡先'\n", "NORMALIZED: '治験責任医師等の連絡先'\n", "-----\n", "RAW: '氏名'\n", "NORMALIZED: '氏名'\n", "-----\n", "RAW: 'Name'\n", "NORMALIZED: 'Name'\n", "-----\n", "RAW: 'e-Rad番号'\n", "NORMALIZED: 'e-Rad番号'\n", "-----\n", "RAW: '所属機関(実施医療機関)'\n", "NORMALIZED: '所属機関(実施医療機関)'\n", "-----\n", "RAW: 'Affiliation'\n", "NORMALIZED: 'Affiliation'\n", "-----\n", "RAW: '所属部署'\n", "NORMALIZED: '所属部署'\n", "-----\n", "RAW: '所属部署の郵便番号'\n", "NORMALIZED: '所属部署の郵便番号'\n", "-----\n", "RAW: '所属機関の住所'\n", "NORMALIZED: '所属機関の住所'\n", "-----\n", "RAW: '電話番号'\n", "NORMALIZED: '電話番号'\n", "-----\n", "RAW: '電子メールアドレス'\n", "NORMALIZED: '電子メールアドレス'\n", "-----\n", "RAW: '研究に関する問合わせ先'\n", "NORMALIZED: '研究に関する問合わせ先'\n", "-----\n", "RAW: '担当者氏名'\n", "NORMALIZED: '担当者氏名'\n", "-----\n", "RAW: '担当者所属機関'\n", "NORMALIZED: '担当者所属機関'\n", "-----\n", "RAW: '担当者所属部署'\n", "NORMALIZED: '担当者所属部署'\n", "-----\n", "RAW: '担当者所属機関の郵便番号'\n", "NORMALIZED: '担当者所属機関の郵便番号'\n", "-----\n", "RAW: '担当者所属機関の住所'\n", "NORMALIZED: '担当者所属機関の住所'\n", "-----\n", "RAW: '電話番号'\n", "NORMALIZED: '電話番号'\n", "-----\n", "RAW: 'FAX番号'\n", "NORMALIZED: 'FAX番号'\n", "-----\n", "RAW: '電子メールアドレス'\n", "NORMALIZED: '電子メールアドレス'\n", "-----\n", "RAW: '実施医療機関の長の氏名'\n", "NORMALIZED: '実施医療機関の長の氏名'\n", "-----\n", "RAW: '管理者の許可の有無'\n", "NORMALIZED: '管理者の許可の有無'\n", "-----\n", "RAW: 'IRBの承認日'\n", "NORMALIZED: 'IRBの承認日'\n", "-----\n", "RAW: '救急医療に必要な施設又は設備'\n", "NORMALIZED: '救急医療に必要な施設又は設備'\n", "-----\n", "RAW: 'ご参考資料 多施設共同研究機関情報'\n", "NORMALIZED: 'ご参考資料 多施設共同研究機関情報'\n", "-----\n", "RAW: '\\n 試験等の目的 '\n", "NORMALIZED: '試験等の目的'\n", "-----\n", "RAW: '\\n 試験等のフェーズ /\\n Phase '\n", "NORMALIZED: '試験等のフェーズ / Phase'\n", "-----\n", "RAW: '\\n 症例登録開始予定日 / Date of First Enrollment '\n", "NORMALIZED: '症例登録開始予定日 / Date of First Enrollment'\n", "-----\n", "RAW: '\\n 第1症例登録日 / Date of First Enrollment '\n", "NORMALIZED: '第1症例登録日 / Date of First Enrollment'\n", "-----\n", "RAW: '\\n 実施期間(開始日) '\n", "NORMALIZED: '実施期間(開始日)'\n", "-----\n", "RAW: '\\n\\n 実施期間(終了日) \\n'\n", "NORMALIZED: '実施期間(終了日)'\n", "-----\n", "RAW: '\\n 実施期間(終了日) '\n", "NORMALIZED: '実施期間(終了日)'\n", "-----\n", "RAW: '\\n 実施予定被験者数 / Sample Size '\n", "NORMALIZED: '実施予定被験者数 / Sample Size'\n", "-----\n", "RAW: '\\n 試験等の種類 /\\n Study Type '\n", "NORMALIZED: '試験等の種類 / Study Type'\n", "-----\n", "RAW: '\\n 試験等のデザイン \\n Study Design '\n", "NORMALIZED: '試験等のデザイン Study Design'\n", "-----\n", "RAW: '\\n 無作為化 / allocation '\n", "NORMALIZED: '無作為化 / allocation'\n", "-----\n", "RAW: '\\n 盲検化 /masking '\n", "NORMALIZED: '盲検化 /masking'\n", "-----\n", "RAW: '\\n 対照 / control '\n", "NORMALIZED: '対照 / control'\n", "-----\n", "RAW: '\\n 割付け / assignment '\n", "NORMALIZED: '割付け / assignment'\n", "-----\n", "RAW: '\\n 研究目的 / purpose '\n", "NORMALIZED: '研究目的 / purpose'\n", "-----\n", "RAW: '\\n プラセボの有無 '\n", "NORMALIZED: 'プラセボの有無'\n", "-----\n", "RAW: '\\n 盲検の有無 '\n", "NORMALIZED: '盲検の有無'\n", "-----\n", "RAW: '\\n 無作為化の有無 '\n", "NORMALIZED: '無作為化の有無'\n", "-----\n", "RAW: '\\n 保険外併用療養費の有無 '\n", "NORMALIZED: '保険外併用療養費の有無'\n", "-----\n", "RAW: '\\n 実施国(日本以外) /\\n Countries of Recruitment(Except Japan) '\n", "NORMALIZED: '実施国(日本以外) / Countries of Recruitment(Except Japan)'\n", "-----\n", "RAW: '\\n 研究対象者の適格基準 / Key inclusion & exclusion criteria '\n", "NORMALIZED: '研究対象者の適格基準 / Key inclusion & exclusion criteria'\n", "-----\n", "RAW: '\\n 主たる選択基準 / Inclusion Criteria '\n", "NORMALIZED: '主たる選択基準 / Inclusion Criteria'\n", "-----\n", "RAW: '\\n 主たる除外基準 / Exclusion Criteria '\n", "NORMALIZED: '主たる除外基準 / Exclusion Criteria'\n", "-----\n", "RAW: '\\n 年齢下限 / Age Minimum '\n", "NORMALIZED: '年齢下限 / Age Minimum'\n", "-----\n", "RAW: '\\n 年齢上限 / Age Maximum '\n", "NORMALIZED: '年齢上限 / Age Maximum'\n", "-----\n", "RAW: '\\n 性別 / Gender '\n", "NORMALIZED: '性別 / Gender'\n", "-----\n", "RAW: '\\n 中止基準 '\n", "NORMALIZED: '中止基準'\n", "-----\n", "RAW: '\\n 対象疾患名 / Health Condition(s) or Problem(s) Studied '\n", "NORMALIZED: '対象疾患名 / Health Condition(s) or Problem(s) Studied'\n", "-----\n", "RAW: '\\n 対象疾患コード / Code '\n", "NORMALIZED: '対象疾患コード / Code'\n", "-----\n", "RAW: '\\n 対象疾患キーワード / Keyword '\n", "NORMALIZED: '対象疾患キーワード / Keyword'\n", "-----\n", "RAW: '\\n 介入の有無 '\n", "NORMALIZED: '介入の有無'\n", "-----\n", "RAW: '\\n 介入の内容 / Intervention(s) '\n", "NORMALIZED: '介入の内容 / Intervention(s)'\n", "-----\n", "RAW: '\\n 介入コード / Code '\n", "NORMALIZED: '介入コード / Code'\n", "-----\n", "RAW: '\\n 介入キーワード /Keyword '\n", "NORMALIZED: '介入キーワード /Keyword'\n", "-----\n", "RAW: '\\n 主たる評価項目 / Primary Outcome(s) '\n", "NORMALIZED: '主たる評価項目 / Primary Outcome(s)'\n", "-----\n", "RAW: '\\n 副次的な評価項目 / Secondary Outcome(s) '\n", "NORMALIZED: '副次的な評価項目 / Secondary Outcome(s)'\n", "-----\n", "RAW: '医薬品、医療機器、再生医療等製品の別'\n", "NORMALIZED: '医薬品、医療機器、再生医療等製品の別'\n", "-----\n", "RAW: '医薬品医療機器等法における未承認、適応外、承認内の別'\n", "NORMALIZED: '医薬品医療機器等法における未承認、適応外、承認内の別'\n", "-----\n", "RAW: '一般名称等'\n", "NORMALIZED: '一般名称等'\n", "-----\n", "RAW: '医薬品'\n", "NORMALIZED: '医薬品'\n", "-----\n", "RAW: '一般名称'\n", "NORMALIZED: '一般名称'\n", "-----\n", "RAW: '販売名(海外製品の場合は国名も記載すること)'\n", "NORMALIZED: '販売名(海外製品の場合は国名も記載すること)'\n", "-----\n", "RAW: '承認番号'\n", "NORMALIZED: '承認番号'\n", "-----\n", "RAW: '被験薬等提供者'\n", "NORMALIZED: '被験薬等提供者'\n", "-----\n", "RAW: '名称'\n", "NORMALIZED: '名称'\n", "-----\n", "RAW: '所在地'\n", "NORMALIZED: '所在地'\n", "-----\n", "RAW: '監査の実施予定の有無'\n", "NORMALIZED: '監査の実施予定の有無'\n", "-----\n", "RAW: '試験等の進捗状況'\n", "NORMALIZED: '試験等の進捗状況'\n", "-----\n", "RAW: '進捗状況'\n", "NORMALIZED: '進捗状況'\n", "-----\n", "RAW: 'Recruitment status'\n", "NORMALIZED: 'Recruitment status'\n", "-----\n", "RAW: '主たる評価項目に係る研究成果'\n", "NORMALIZED: '主たる評価項目に係る研究成果'\n", "-----\n", "RAW: 'Summary Results (Primary Outcome Results)'\n", "NORMALIZED: 'Summary Results (Primary Outcome Results)'\n", "-----\n", "RAW: '\\n 研究対象者への補償の有無 '\n", "NORMALIZED: '研究対象者への補償の有無'\n", "-----\n", "RAW: '\\n 補償の内容 '\n", "NORMALIZED: '補償の内容'\n", "-----\n", "RAW: '\\n 保険への加入の有無 '\n", "NORMALIZED: '保険への加入の有無'\n", "-----\n", "RAW: '\\n 保険の補償内容 '\n", "NORMALIZED: '保険の補償内容'\n", "-----\n", "RAW: '\\n 保険以外の補償の内容 '\n", "NORMALIZED: '保険以外の補償の内容'\n", "-----\n", "RAW: '依頼者等の名称'\n", "NORMALIZED: '依頼者等の名称'\n", "-----\n", "RAW: 'Primary Sponsor'\n", "NORMALIZED: 'Primary Sponsor'\n", "-----\n", "RAW: '研究資金提供の有無'\n", "NORMALIZED: '研究資金提供の有無'\n", "-----\n", "RAW: 'Source of Monetary Support'\n", "NORMALIZED: 'Source of Monetary Support'\n", "-----\n", "RAW: '契約締結の有無'\n", "NORMALIZED: '契約締結の有無'\n", "-----\n", "RAW: '契約締結日'\n", "NORMALIZED: '契約締結日'\n", "-----\n", "RAW: '物品提供の有無'\n", "NORMALIZED: '物品提供の有無'\n", "-----\n", "RAW: '物品提供の内容'\n", "NORMALIZED: '物品提供の内容'\n", "-----\n", "RAW: '役務提供の有無'\n", "NORMALIZED: '役務提供の有無'\n", "-----\n", "RAW: '役務提供の内容'\n", "NORMALIZED: '役務提供の内容'\n", "-----\n", "RAW: '研究資金等の提供組織の有無'\n", "NORMALIZED: '研究資金等の提供組織の有無'\n", "-----\n", "RAW: '研究資金等の提供組織名称 / Source of Monetary Support'\n", "NORMALIZED: '研究資金等の提供組織名称 / Source of Monetary Support'\n", "-----\n", "RAW: 'Secondary Sponsorの該当性'\n", "NORMALIZED: 'Secondary Sponsorの該当性'\n", "-----\n", "RAW: '\\n IRBの名称 / Name of IRB '\n", "NORMALIZED: 'IRBの名称 / Name of IRB'\n", "-----\n", "RAW: '\\n 住所 / Address '\n", "NORMALIZED: '住所 / Address'\n", "-----\n", "RAW: '\\n 電話番号 '\n", "NORMALIZED: '電話番号'\n", "-----\n", "RAW: '\\n 電子メールアドレス '\n", "NORMALIZED: '電子メールアドレス'\n", "-----\n", "RAW: '\\n 審査受付番号\\n '\n", "NORMALIZED: '審査受付番号'\n", "-----\n", "RAW: '\\n 当該試験等に対する審査結果 '\n", "NORMALIZED: '当該試験等に対する審査結果'\n", "-----\n", "RAW: '\\n 他の臨床研究登録機関発行の研究番号 '\n", "NORMALIZED: '他の臨床研究登録機関発行の研究番号'\n", "-----\n", "RAW: '\\n 他の臨床研究登録機関の名称 '\n", "NORMALIZED: '他の臨床研究登録機関の名称'\n", "-----\n", "RAW: '\\n Issuing Authority '\n", "NORMALIZED: 'Issuing Authority'\n", "-----\n", "RAW: '\\n 備考 '\n", "NORMALIZED: '備考'\n", "-----\n", "RAW: '\\n 国際共同研究 '\n", "NORMALIZED: '国際共同研究'\n", "-----\n", "RAW: '\\n 遺伝子治療等臨床研究に関する指針(平成27年厚生労働省令第344号)の対象となる試験等 '\n", "NORMALIZED: '遺伝子治療等臨床研究に関する指針(平成27年厚生労働省令第344号)の対象となる試験等'\n", "-----\n", "RAW: '\\n 遺伝子組換え生物等の使用等の規制による生物の多様性の確保に関する法律(平成15年法律第97号)の対象となる薬物を用いる試験等 '\n", "NORMALIZED: '遺伝子組換え生物等の使用等の規制による生物の多様性の確保に関する法律(平成15年法律第97号)の対象となる薬物を用いる試験等'\n", "-----\n", "RAW: '\\n 生物由来製品に指定が見込まれる薬物を用いる試験等 '\n", "NORMALIZED: '生物由来製品に指定が見込まれる薬物を用いる試験等'\n", "-----\n", "RAW: '\\n IPDデータを共有する計画 / Plan to share IPD\\n '\n", "NORMALIZED: 'IPDデータを共有する計画 / Plan to share IPD'\n", "-----\n", "RAW: '\\n 計画の説明 / Plan description\\n '\n", "NORMALIZED: '計画の説明 / Plan description'\n", "-----\n", "RAW: '\\n その他1 '\n", "NORMALIZED: 'その他1'\n", "-----\n", "RAW: '\\n その他2 '\n", "NORMALIZED: 'その他2'\n", "-----\n", "RAW: '\\n その他3 '\n", "NORMALIZED: 'その他3'\n", "-----\n", "RAW: '2-1 その他の添付資料1'\n", "NORMALIZED: '2-1 その他の添付資料1'\n", "-----\n", "RAW: '2-2 その他の添付資料2'\n", "NORMALIZED: '2-2 その他の添付資料2'\n" ] } ], "source": [ "for l in soup.find_all('label'):\n", " raw_text = l.get_text()\n", " normalized = normalize_text(raw_text)\n", " print(\"-----\")\n", " print(\"RAW:\", repr(raw_text))\n", " print(\"NORMALIZED:\", repr(normalized))\n" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "FOUND LABEL:\n", "NORMALIZED: 対象疾患名\n" ] } ], "source": [ "target_jp = \"対象疾患名\"\n", "found_label = None\n", "for l in soup.find_all('label'):\n", " normalized = normalize_text(l.get_text())\n", " if target_jp in normalized:\n", " found_label = l\n", " print(\"FOUND LABEL:\")\n", " print(\"NORMALIZED:\", normalized)\n", " break\n", "\n", "if not found_label:\n", " print(\"対象疾患名 を含むラベルが見つかりませんでした。\")\n" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Number of TDs: 1\n", "TD[0]: 'ラブドイド腫瘍'\n" ] } ], "source": [ "if found_label:\n", " th = found_label.find_parent('th')\n", " if th:\n", " tr = th.find_parent('tr')\n", " if tr:\n", " tds = tr.find_all('td')\n", " print(\"Number of TDs:\", len(tds))\n", " for i, td in enumerate(tds):\n", " print(f\"TD[{i}]:\", repr(normalize_text(td.get_text())))\n", " else:\n", " print(\"Parent