{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "import httpx\n", "from bs4 import BeautifulSoup\n", "import pandas as pd" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "BASE_URL = \"https://jrct.niph.go.jp\"\n", "\n", "def search_trials(query_params: dict):\n", " response = httpx.get(f\"{BASE_URL}/search\", params=query_params)\n", " soup = BeautifulSoup(response.text, \"html.parser\")\n", " # Extract trial search results\n", " trials = [] # Populate this list with trial details\n", " return trials" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [], "source": [ "query_params = {\n", " \"condition\": \"乳がん\", # 対象疾患\n", " \"gender\": \"both\" # 男女両方\n", " }\n" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "[]" ] }, "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ "search_trials(query_params)" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [], "source": [ "def search_trials(query_params):\n", " # POSTリクエストを送信\n", " response = httpx.post(f\"{BASE_URL}/search\", data=query_params)\n", " response.raise_for_status() # エラーチェック\n", "\n", " # HTMLを解析\n", " soup = BeautifulSoup(response.text, \"html.parser\")\n", "\n", " # 検索結果を抽出\n", " trials = []\n", " for trial in soup.select(\"table tbody tr\"): # テーブル行をループ\n", " trial_data = {\n", " \"title\": trial.select_one(\"td:nth-child(2)\").text.strip(),\n", " \"id\": trial.select_one(\"td:nth-child(1)\").text.strip(),\n", " \"link\": BASE_URL + trial.select_one(\"td:nth-child(2) a\")[\"href\"].strip()\n", " }\n", " trials.append(trial_data)\n", "\n", " return trials" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [], "source": [ "query_params = { # 研究の名称(未指定)\n", " \"reg_is_specific[]\": [\"1\", \"3\"] # 特定臨床研究、企業治験\n", "}" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "[]" ] }, "execution_count": 7, "metadata": {}, "output_type": "execute_result" } ], "source": [ "search_trials(query_params)" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\n", "\n", "\n", "
\n", " \n", " \n", "