diff --git "a/dev_JRCT_api/dev_web_scraber.ipynb" "b/dev_JRCT_api/dev_web_scraber.ipynb"
new file mode 100644--- /dev/null
+++ "b/dev_JRCT_api/dev_web_scraber.ipynb"
@@ -0,0 +1,3007 @@
+{
+ "cells": [
+ {
+ "cell_type": "code",
+ "execution_count": 1,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "import httpx\n",
+ "from bs4 import BeautifulSoup\n",
+ "import pandas as pd"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 2,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "BASE_URL = \"https://jrct.niph.go.jp\"\n",
+ "\n",
+ "def search_trials(query_params: dict):\n",
+ " response = httpx.get(f\"{BASE_URL}/search\", params=query_params)\n",
+ " soup = BeautifulSoup(response.text, \"html.parser\")\n",
+ " # Extract trial search results\n",
+ " trials = [] # Populate this list with trial details\n",
+ " return trials"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 3,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "query_params = {\n",
+ " \"condition\": \"乳がん\", # 対象疾患\n",
+ " \"gender\": \"both\" # 男女両方\n",
+ " }\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 4,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "[]"
+ ]
+ },
+ "execution_count": 4,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "search_trials(query_params)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 5,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "def search_trials(query_params):\n",
+ " # POSTリクエストを送信\n",
+ " response = httpx.post(f\"{BASE_URL}/search\", data=query_params)\n",
+ " response.raise_for_status() # エラーチェック\n",
+ "\n",
+ " # HTMLを解析\n",
+ " soup = BeautifulSoup(response.text, \"html.parser\")\n",
+ "\n",
+ " # 検索結果を抽出\n",
+ " trials = []\n",
+ " for trial in soup.select(\"table tbody tr\"): # テーブル行をループ\n",
+ " trial_data = {\n",
+ " \"title\": trial.select_one(\"td:nth-child(2)\").text.strip(),\n",
+ " \"id\": trial.select_one(\"td:nth-child(1)\").text.strip(),\n",
+ " \"link\": BASE_URL + trial.select_one(\"td:nth-child(2) a\")[\"href\"].strip()\n",
+ " }\n",
+ " trials.append(trial_data)\n",
+ "\n",
+ " return trials"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 6,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "query_params = { # 研究の名称(未指定)\n",
+ " \"reg_is_specific[]\": [\"1\", \"3\"] # 特定臨床研究、企業治験\n",
+ "}"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 7,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "[]"
+ ]
+ },
+ "execution_count": 7,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "search_trials(query_params)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 8,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "\n",
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " 臨床研究等提出・公開システム\n",
+ " \n",
+ " \n",
+ "\t\n",
+ "\t\n",
+ "\t\n",
+ "\t\n",
+ "\t\n",
+ " \n",
+ " \n",
+ "\n",
+ "\n",
+ " \n",
+ " \n",
+ " \n",
+ "\n",
+ " \n",
+ " \n",
+ " \n",
+ " \n",
+ "\n",
+ "\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
\n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " 臨床研究検索
\n",
+ "\n",
+ "
\n",
+ " \n",
+ " 【重要】データ利用に関するお願い
\n",
+ " jRCTでは、公開されているデータの適正な利用をお願いしています。個人利用の範囲を超えた大量データ収集はお控えください。プログラムを利用した自動操作等による意図的な大量データ収集は個人利用の範囲を超えた利用とみなされます。皆様に快適にご利用いただくために、ご理解をよろしくお願いいたします。\n",
+ " \n",
+ "
\n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
\n",
+ "\n",
+ "\n",
+ "\n",
+ "\n",
+ "\n",
+ "\n",
+ "\n",
+ "\n",
+ " \n",
+ "
\n",
+ "\n",
+ " \n",
+ " \n",
+ "
\n",
+ "\n",
+ " \n",
+ " \n",
+ " \n",
+ "\t\n",
+ "\t\n",
+ "\t\n",
+ "\t\n",
+ "\t\n",
+ "\t\n",
+ "\t\n",
+ " \n",
+ "\n",
+ " \n",
+ " \n",
+ "\n",
+ "\n"
+ ]
+ }
+ ],
+ "source": [
+ "query_params = {\n",
+ " \"reg_plobrem_1\": \"神経膠腫\", # 対象疾患名\n",
+ " \"reg_recruitment[]\": [\"2\"], # 募集中\n",
+ " \"reg_is_specific[]\": [\"1\", \"3\"] # 特定臨床研究、企業治験\n",
+ "}\n",
+ "response = httpx.post(f\"{BASE_URL}/search\", data=query_params)\n",
+ "#response.raise_for_status() # エラーチェック\n",
+ "print(response.text)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 9,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ ""
+ ]
+ },
+ "execution_count": 9,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "response.raise_for_status() # エラーチェック"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 10,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "\n",
+ "\n",
+ "\n",
+ "\n",
+ " \n",
+ " \n",
+ " 臨床研究等提出・公開システム\n",
+ " \n",
+ " \n",
+ "\t\n",
+ "\t\n",
+ "\t\n",
+ "\t\n",
+ "\t\n",
+ " \n",
+ " \n",
+ "\n",
+ "\n",
+ " \n",
+ " \n",
+ " \n",
+ "\n",
+ " \n",
+ " \n",
+ " \n",
+ " \n",
+ "\n",
+ "\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
\n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " 臨床研究検索
\n",
+ "\n",
+ "
\n",
+ " \n",
+ " 【重要】データ利用に関するお願い
\n",
+ " jRCTでは、公開されているデータの適正な利用をお願いしています。個人利用の範囲を超えた大量データ収集はお控えください。プログラムを利用した自動操作等による意図的な大量データ収集は個人利用の範囲を超えた利用とみなされます。皆様に快適にご利用いただくために、ご理解をよろしくお願いいたします。\n",
+ " \n",
+ "
\n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
\n",
+ "\n",
+ "\n",
+ "\n",
+ "\n",
+ "\n",
+ "\n",
+ "\n",
+ "\n",
+ " \n",
+ "
\n",
+ "\n",
+ " \n",
+ " \n",
+ "
\n",
+ "\n",
+ " \n",
+ " \n",
+ " \n",
+ "\t\n",
+ "\t\n",
+ "\t\n",
+ "\t\n",
+ "\t\n",
+ "\t\n",
+ "\t\n",
+ " \n",
+ "\n",
+ " \n",
+ " \n",
+ "\n",
+ "\n"
+ ]
+ }
+ ],
+ "source": [
+ "# レスポンス内容を確認\n",
+ "print(response.text) # レスポンスのHTML内容を出力"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 11,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "\n",
+ "\n",
+ "\n",
+ "\n",
+ " \n",
+ " \n",
+ " 臨床研究等提出・公開システム\n",
+ " \n",
+ " \n",
+ "\t\n",
+ "\t\n",
+ "\t\n",
+ "\t\n",
+ "\t\n",
+ " \n",
+ " \n",
+ "\n",
+ "\n",
+ " \n",
+ " \n",
+ " \n",
+ "\n",
+ " \n",
+ " \n",
+ " \n",
+ " \n",
+ "\n",
+ "\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
\n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " 臨床研究検索
\n",
+ "\n",
+ "
\n",
+ " \n",
+ " 【重要】データ利用に関するお願い
\n",
+ " jRCTでは、公開されているデータの適正な利用をお願いしています。個人利用の範囲を超えた大量データ収集はお控えください。プログラムを利用した自動操作等による意図的な大量データ収集は個人利用の範囲を超えた利用とみなされます。皆様に快適にご利用いただくために、ご理解をよろしくお願いいたします。\n",
+ " \n",
+ "
\n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
\n",
+ "\n",
+ "\n",
+ "\n",
+ "\n",
+ "\n",
+ "\n",
+ "\n",
+ "\n",
+ " \n",
+ "
\n",
+ "\n",
+ " \n",
+ " \n",
+ "
\n",
+ "\n",
+ " \n",
+ " \n",
+ " \n",
+ "\t\n",
+ "\t\n",
+ "\t\n",
+ "\t\n",
+ "\t\n",
+ "\t\n",
+ "\t\n",
+ " \n",
+ "\n",
+ " \n",
+ " \n",
+ "\n",
+ "\n"
+ ]
+ }
+ ],
+ "source": [
+ "headers = {\n",
+ " \"Content-Type\": \"application/x-www-form-urlencoded\"\n",
+ "}\n",
+ "\n",
+ "response = httpx.post(f\"{BASE_URL}/search\", data=query_params, headers=headers)\n",
+ "print(response.text)\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 12,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "\n",
+ "\n",
+ "\n",
+ "\n",
+ " \n",
+ " \n",
+ " 臨床研究等提出・公開システム\n",
+ " \n",
+ " \n",
+ "\t\n",
+ "\t\n",
+ "\t\n",
+ "\t\n",
+ "\t\n",
+ " \n",
+ " \n",
+ "\n",
+ "\n",
+ " \n",
+ " \n",
+ " \n",
+ "\n",
+ " \n",
+ " \n",
+ " \n",
+ " \n",
+ "\n",
+ "\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
\n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " 臨床研究検索
\n",
+ "\n",
+ "
\n",
+ " \n",
+ " 【重要】データ利用に関するお願い
\n",
+ " jRCTでは、公開されているデータの適正な利用をお願いしています。個人利用の範囲を超えた大量データ収集はお控えください。プログラムを利用した自動操作等による意図的な大量データ収集は個人利用の範囲を超えた利用とみなされます。皆様に快適にご利用いただくために、ご理解をよろしくお願いいたします。\n",
+ " \n",
+ "
\n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
\n",
+ "\n",
+ "\n",
+ "\n",
+ "\n",
+ "\n",
+ "\n",
+ "\n",
+ "\n",
+ " \n",
+ "
\n",
+ "\n",
+ " \n",
+ " \n",
+ "
\n",
+ "\n",
+ " \n",
+ " \n",
+ " \n",
+ "\t\n",
+ "\t\n",
+ "\t\n",
+ "\t\n",
+ "\t\n",
+ "\t\n",
+ "\t\n",
+ " \n",
+ "\n",
+ " \n",
+ " \n",
+ "\n",
+ "\n"
+ ]
+ }
+ ],
+ "source": [
+ "# 検索ページのHTMLを取得\n",
+ "with httpx.Client() as client:\n",
+ " response = client.get(f\"{BASE_URL}/search\")\n",
+ " response.raise_for_status()\n",
+ "\n",
+ " # BeautifulSoupでトークンを取得\n",
+ " soup = BeautifulSoup(response.text, \"html.parser\")\n",
+ " token_field = soup.find(\"input\", {\"name\": \"_Token[fields]\"})\n",
+ " token_value = token_field[\"value\"] if token_field else None\n",
+ "\n",
+ " if not token_value:\n",
+ " print(\"トークンを取得できませんでした\")\n",
+ " exit()\n",
+ "\n",
+ " # 検索クエリとトークンを含めたデータ\n",
+ " query_params = {\n",
+ " \"_Token[fields]\": token_value,\n",
+ " \"reg_plobrem_1\": \"神経膠腫\", # 対象疾患名\n",
+ " \"reg_recruitment[]\": [\"2\"], # 募集中\n",
+ " \"reg_is_specific[]\": [\"1\", \"3\"], # 特定臨床研究、企業治験\n",
+ " }\n",
+ "\n",
+ " # POSTリクエストを送信\n",
+ " search_response = client.post(f\"{BASE_URL}/search\", data=query_params)\n",
+ " search_response.raise_for_status()\n",
+ "\n",
+ " print(search_response.text) # レスポンスを確認\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 2,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "トークンを取得できませんでした\n",
+ "\n",
+ "\n",
+ "\n",
+ "\n",
+ " \n",
+ " \n",
+ " 臨床研究等提出・公開システム\n",
+ " \n",
+ " \n",
+ "\t\n",
+ "\t\n",
+ "\t\n",
+ "\t\n",
+ "\t\n",
+ " \n",
+ " \n",
+ "\n",
+ "\n",
+ " \n",
+ " \n",
+ " \n",
+ "\n",
+ " \n",
+ " \n",
+ " \n",
+ " \n",
+ "\n",
+ "\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
\n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " 臨床研究検索
\n",
+ "\n",
+ "
\n",
+ " \n",
+ " 【重要】データ利用に関するお願い
\n",
+ " jRCTでは、公開されているデータの適正な利用をお願いしています。個人利用の範囲を超えた大量データ収集はお控えください。プログラムを利用した自動操作等による意図的な大量データ収集は個人利用の範囲を超えた利用とみなされます。皆様に快適にご利用いただくために、ご理解をよろしくお願いいたします。\n",
+ " \n",
+ "
\n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
\n",
+ "\n",
+ "\n",
+ "\n",
+ "\n",
+ "\n",
+ "\n",
+ "\n",
+ "\n",
+ " \n",
+ "
\n",
+ "\n",
+ " \n",
+ " \n",
+ "
\n",
+ "\n",
+ " \n",
+ " \n",
+ " \n",
+ "\t\n",
+ "\t\n",
+ "\t\n",
+ "\t\n",
+ "\t\n",
+ "\t\n",
+ "\t\n",
+ " \n",
+ "\n",
+ " \n",
+ " \n",
+ "\n",
+ "\n"
+ ]
+ },
+ {
+ "ename": "",
+ "evalue": "",
+ "output_type": "error",
+ "traceback": [
+ "\u001b[1;31mThe Kernel crashed while executing code in the current cell or a previous cell. \n",
+ "\u001b[1;31mPlease review the code in the cell(s) to identify a possible cause of the failure. \n",
+ "\u001b[1;31mClick here for more info. \n",
+ "\u001b[1;31mView Jupyter log for further details."
+ ]
+ }
+ ],
+ "source": [
+ "import httpx\n",
+ "from bs4 import BeautifulSoup\n",
+ "\n",
+ "BASE_URL = \"https://jrct.niph.go.jp\"\n",
+ "\n",
+ "# 検索ページのHTMLを取得\n",
+ "with httpx.Client() as client:\n",
+ " response = client.get(f\"{BASE_URL}/search\")\n",
+ " response.raise_for_status()\n",
+ "\n",
+ " # BeautifulSoupでトークンを取得\n",
+ " soup = BeautifulSoup(response.text, \"html.parser\")\n",
+ " token_field = soup.find(\"input\", {\"name\": \"_Token[fields]\"})\n",
+ " token_unlocked = soup.find(\"input\", {\"name\": \"_Token[unlocked]\"})\n",
+ "\n",
+ " # トークンの値を確認\n",
+ " token_value = token_field[\"value\"] if token_field else None\n",
+ " token_unlocked_value = token_unlocked[\"value\"] if token_unlocked else None\n",
+ "\n",
+ " if not token_value or not token_unlocked_value:\n",
+ " print(\"トークンを取得できませんでした\")\n",
+ " exit()\n",
+ "\n",
+ " # 検索クエリとトークンを含めたデータ\n",
+ " query_params = {\n",
+ " \"_Token[fields]\": token_value,\n",
+ " \"_Token[unlocked]\": token_unlocked_value,\n",
+ " \"reg_plobrem_1\": \"神経膠腫\", # 対象疾患名\n",
+ " \"reg_recruitment[]\": [\"2\"], # 募集中\n",
+ " \"reg_is_specific[]\": [\"1\", \"3\"], # 特定臨床研究、企業治験\n",
+ " }\n",
+ "\n",
+ " # POSTリクエストを送信\n",
+ " search_response = client.post(f\"{BASE_URL}/search\", data=query_params)\n",
+ " search_response.raise_for_status()\n",
+ "\n",
+ " # レスポンス内容を確認\n",
+ " print(search_response.text)\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "gradio",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.12.3"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}