File size: 7,791 Bytes
ce4a4e7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
import os
import pandas as pd
#from llama_index.llms.replicate import Replicate
import requests
import re


def extract_japan_cities(text):
    # 正規葨現を使用して " - Japan" γ§η΅‚γ‚γ‚‹ιƒ½εΈ‚εγ‚’ζŠ½ε‡Ί
    pattern = r'(\b\w+\s*\w*\b) - Japan'
    cities = re.findall(pattern, text)
    unique_cities = list(set(cities))
    # ユニークγͺιƒ½εΈ‚εγ‚’γ‚½γƒΌγƒˆγ—γ¦γ‚«γƒ³γƒžγ§εŒΊεˆ‡γ‚‰γ‚ŒγŸζ–‡ε­—εˆ—γ«ε€‰ζ›
    unique_cities.sort()
    return ', '.join(unique_cities)

def fetch_clinical_trials(cancer_name):
    search_expr="%s SEARCH[Location](AREA[LocationCountry]Japan AND AREA[LocationStatus]Recruiting)" % (cancer_name)
    # Initial URL for the first API call
    base_url = "https://clinicaltrials.gov/api/v2/studies"
    params = {
        "query.titles": search_expr,
        "pageSize": 100
    }

    # Initialize an empty list to store the data
    data_list = []
    # Loop until there is no nextPageToken
    while True:
        # Print the current URL (for debugging purposes)
        print("Fetching data from:", base_url + '?' + '&'.join([f"{k}={v}" for k, v in params.items()]))
        
        # Send a GET request to the API
        response = requests.get(base_url, params=params)

        # Check if the request was successful
        if response.status_code == 200:
            data = response.json()  # Parse JSON response
            studies = data.get('studies', [])  # Extract the list of studies

            # Loop through each study and extract specific information
            for study in studies:
                # Safely access nested keys
                nctId = study['protocolSection']['identificationModule'].get('nctId', 'Unknown')
                startDate = study['protocolSection']['statusModule'].get('startDateStruct', {}).get('date', 'Unknown Date')
                conditions = ', '.join(study['protocolSection']['conditionsModule'].get('conditions', ['No conditions listed']))
                title = study['protocolSection']['identificationModule'].get('briefTitle', 'no title')
                summary = study['protocolSection']['descriptionModule'].get('briefSummary', 'no summary')

                # Extract locations safely
                locations_list = study['protocolSection'].get('contactsLocationsModule', {}).get('locations', [])
                locations = ', '.join([f"{location.get('city', 'No City')} - {location.get('country', 'No Country')}" for location in locations_list]) if locations_list else "No locations listed"

                JapanesLocations = extract_japan_cities(locations)
                # Extract dates and phases
                primaryCompletionDate = study['protocolSection']['statusModule'].get('primaryCompletionDateStruct', {}).get('date', 'Unknown Date')

                phases = ', '.join(study['protocolSection']['designModule'].get('phases', ['Not Available']))
                eligibilityCriteria = study['protocolSection']['eligibilityModule'].get('eligibilityCriteria', 'Unknown')

                # Append the data to the list as a dictionary
                data_list.append({
                    "NCTID": nctId,
                    "Title": title,
                    #"Start Date": startDate,
                    "Primary Completion Date": primaryCompletionDate,
                    #"Conditions": conditions,
                    "Cancer": conditions,
                    "Summary": summary,
                    "Japanes Locations": JapanesLocations,
                    #"Phases": phases,
                    "Eligibility Criteria": eligibilityCriteria
                })

            # Check for nextPageToken and update the params or break the loop
            nextPageToken = data.get('nextPageToken')
            if nextPageToken:
                params['pageToken'] = nextPageToken  # Set the pageToken for the next request
            else:
                break  # Exit the loop if no nextPageToken is present
        else:
            print("Failed to fetch data. Status code:", response.status_code)
            break

    # Create a DataFrame from the list of dictionaries
    df = pd.DataFrame(data_list)
    return df

def fetch_clinical_trials_jp(cancer_name):
    search_expr="%s SEARCH[Location](AREA[LocationCountry]Japan AND AREA[LocationStatus]Recruiting)" % (cancer_name)
    # Initial URL for the first API call
    base_url = "https://clinicaltrials.gov/api/v2/studies"
    params = {
        "query.titles": search_expr,
        "pageSize": 100
    }

    # Initialize an empty list to store the data
    data_list = []
    # Loop until there is no nextPageToken
    while True:
        # Print the current URL (for debugging purposes)
        print("Fetching data from:", base_url + '?' + '&'.join([f"{k}={v}" for k, v in params.items()]))
        
        # Send a GET request to the API
        response = requests.get(base_url, params=params)

        # Check if the request was successful
        if response.status_code == 200:
            data = response.json()  # Parse JSON response
            studies = data.get('studies', [])  # Extract the list of studies

            # Loop through each study and extract specific information
            for study in studies:
                # Safely access nested keys
                nctId = study['protocolSection']['identificationModule'].get('nctId', 'Unknown')
                startDate = study['protocolSection']['statusModule'].get('startDateStruct', {}).get('date', 'Unknown Date')
                conditions = ', '.join(study['protocolSection']['conditionsModule'].get('conditions', ['No conditions listed']))
                title = study['protocolSection']['identificationModule'].get('briefTitle', 'no title')
                summary = study['protocolSection']['descriptionModule'].get('briefSummary', 'no summary')

                # Extract locations safely
                locations_list = study['protocolSection'].get('contactsLocationsModule', {}).get('locations', [])
                locations = ', '.join([f"{location.get('city', 'No City')} - {location.get('country', 'No Country')}" for location in locations_list]) if locations_list else "No locations listed"

                JapanesLocations = extract_japan_cities(locations)
                # Extract dates and phases
                primaryCompletionDate = study['protocolSection']['statusModule'].get('primaryCompletionDateStruct', {}).get('date', 'Unknown Date')

                phases = ', '.join(study['protocolSection']['designModule'].get('phases', ['Not Available']))
                eligibilityCriteria = study['protocolSection']['eligibilityModule'].get('eligibilityCriteria', 'Unknown')

                # Append the data to the list as a dictionary
                data_list.append({
                    "NCTID": nctId,
                    "γ‚Ώγ‚€γƒˆγƒ«": title,
                    #"Start Date": startDate,
                    #"Primary Completion Date": primaryCompletionDate,
                    "対豑とγͺγ‚‹η™Œ": conditions,
                    "γ‚΅γƒžγƒͺγƒΌ": summary,
                    "場所": JapanesLocations,
                    #"Phases": phases,
                    "クラむテγƒͺγ‚’": eligibilityCriteria
                })

            # Check for nextPageToken and update the params or break the loop
            nextPageToken = data.get('nextPageToken')
            if nextPageToken:
                params['pageToken'] = nextPageToken  # Set the pageToken for the next request
            else:
                break  # Exit the loop if no nextPageToken is present
        else:
            print("Failed to fetch data. Status code:", response.status_code)
            break

    # Create a DataFrame from the list of dictionaries
    df = pd.DataFrame(data_list)
    return df