Update app.py
Browse files
app.py
CHANGED
@@ -1,3 +1,18 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
# IMPORTS & CONFIGURATION
|
2 |
# -----------------------------
|
3 |
import streamlit as st
|
@@ -7,30 +22,54 @@ from rdkit.Chem import Draw
|
|
7 |
import pandas as pd
|
8 |
import matplotlib.pyplot as plt
|
9 |
import seaborn as sns
|
|
|
|
|
10 |
import logging
|
11 |
-
import
|
12 |
-
|
|
|
|
|
13 |
from openai import OpenAI
|
14 |
|
15 |
-
#
|
16 |
logging.basicConfig(
|
17 |
level=logging.INFO,
|
18 |
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
|
19 |
-
handlers=[
|
|
|
|
|
|
|
20 |
)
|
21 |
logger = logging.getLogger("PRIS")
|
22 |
|
23 |
# -----------------------------
|
24 |
# GLOBAL CONSTANTS
|
25 |
# -----------------------------
|
26 |
-
API_ENDPOINTS = {
|
|
|
27 |
"clinical_trials": "https://clinicaltrials.gov/api/v2/studies",
|
28 |
"fda_drug_approval": "https://api.fda.gov/drug/label.json",
|
|
|
|
|
|
|
29 |
"pubchem": "https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/name/{}/JSON",
|
30 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
31 |
}
|
32 |
|
33 |
-
DEFAULT_HEADERS = {
|
34 |
"User-Agent": "PharmaResearchIntelligenceSuite/1.0 (Professional Use)",
|
35 |
"Accept": "application/json"
|
36 |
}
|
@@ -39,193 +78,230 @@ DEFAULT_HEADERS = {
|
|
39 |
# SECRETS MANAGEMENT
|
40 |
# -----------------------------
|
41 |
class APIConfigurationError(Exception):
|
42 |
-
"""Custom exception for missing API
|
43 |
pass
|
44 |
|
45 |
try:
|
46 |
-
|
47 |
-
|
48 |
-
|
49 |
-
|
50 |
-
|
|
|
|
|
51 |
if not all([OPENAI_API_KEY, BIOPORTAL_API_KEY, PUB_EMAIL, OPENFDA_KEY]):
|
52 |
raise APIConfigurationError("One or more required API credentials are missing.")
|
|
|
53 |
except (KeyError, APIConfigurationError) as e:
|
54 |
st.error(f"Critical configuration error: {str(e)}")
|
|
|
55 |
st.stop()
|
56 |
|
57 |
# -----------------------------
|
58 |
# CORE INFRASTRUCTURE
|
59 |
# -----------------------------
|
60 |
class PharmaResearchEngine:
|
61 |
-
"""
|
|
|
|
|
|
|
|
|
|
|
62 |
|
63 |
-
def __init__(self):
|
|
|
64 |
self.openai_client = OpenAI(api_key=OPENAI_API_KEY)
|
65 |
-
|
|
|
66 |
@staticmethod
|
67 |
def api_request(endpoint: str,
|
68 |
-
params: Optional[Dict] = None,
|
69 |
-
headers: Optional[Dict] = None) -> Optional[Dict]:
|
70 |
"""
|
71 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
72 |
"""
|
73 |
try:
|
|
|
74 |
response = requests.get(
|
75 |
endpoint,
|
76 |
params=params,
|
77 |
headers={**DEFAULT_HEADERS, **(headers or {})},
|
78 |
timeout=(3.05, 15)
|
79 |
)
|
80 |
-
response.raise_for_status()
|
|
|
81 |
return response.json()
|
82 |
-
except requests.exceptions.HTTPError as
|
83 |
-
logger.error(f"HTTP Error {
|
84 |
-
st.error(f"API Error: {
|
85 |
except Exception as e:
|
86 |
-
logger.error(f"Network error
|
87 |
st.error(f"Network error: {str(e)}")
|
88 |
return None
|
89 |
|
90 |
-
def get_compound_profile(self, identifier: str) -> Optional[Dict]:
|
91 |
"""
|
92 |
-
Retrieve comprehensive chemical profile
|
93 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
94 |
"""
|
95 |
-
|
96 |
-
|
97 |
-
|
98 |
-
|
99 |
-
st.error(msg)
|
100 |
-
return None
|
101 |
-
|
102 |
-
pubchem_url = API_ENDPOINTS["pubchem"].format(identifier)
|
103 |
-
pubchem_data = self.api_request(pubchem_url)
|
104 |
if not pubchem_data or not pubchem_data.get("PC_Compounds"):
|
105 |
-
logger.warning(
|
106 |
-
st.error("No compound data found. Please verify your input (e.g., check for typos or use a recognized compound name).")
|
107 |
return None
|
108 |
|
109 |
compound = pubchem_data["PC_Compounds"][0]
|
110 |
-
|
111 |
'molecular_formula': self._extract_property(compound, 'Molecular Formula'),
|
112 |
'iupac_name': self._extract_property(compound, 'IUPAC Name'),
|
113 |
'canonical_smiles': self._extract_property(compound, 'Canonical SMILES'),
|
114 |
'molecular_weight': self._extract_property(compound, 'Molecular Weight'),
|
115 |
'logp': self._extract_property(compound, 'LogP')
|
116 |
}
|
|
|
|
|
117 |
|
118 |
-
def _extract_property(self, compound: Dict, prop_name: str) -> str:
|
119 |
-
"""Helper to extract a specific property from PubChem compound data."""
|
120 |
-
for prop in compound.get("props", []):
|
121 |
-
if prop.get("urn", {}).get("label") == prop_name:
|
122 |
-
return prop["value"].get("sval", "N/A")
|
123 |
-
return "N/A"
|
124 |
-
|
125 |
-
@staticmethod
|
126 |
-
def _is_valid_compound_input(user_input: str) -> bool:
|
127 |
"""
|
128 |
-
|
129 |
-
Accepts both conventional compound names and SMILES strings.
|
130 |
-
Rejects inputs containing known disease terms.
|
131 |
-
"""
|
132 |
-
input_lower = user_input.lower().strip()
|
133 |
-
# Known disease terms that should not be processed as compounds
|
134 |
-
disease_terms = ['diabetes', 'cancer', 'hypertension', 'asthma']
|
135 |
-
if any(term in input_lower for term in disease_terms):
|
136 |
-
return False
|
137 |
-
|
138 |
-
# If the input contains characters common in SMILES (e.g., '=', '(', ')', '#'), treat as SMILES.
|
139 |
-
if re.search(r"[=\(\)#]", user_input):
|
140 |
-
return True
|
141 |
|
142 |
-
|
143 |
-
|
144 |
-
|
145 |
|
146 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
147 |
|
148 |
# -----------------------------
|
149 |
# INTELLIGENCE MODULES
|
150 |
# -----------------------------
|
151 |
class ClinicalIntelligence:
|
152 |
"""
|
153 |
-
Module for clinical trial and regulatory
|
154 |
-
|
|
|
|
|
155 |
"""
|
156 |
|
157 |
-
def __init__(self):
|
158 |
self.engine = PharmaResearchEngine()
|
|
|
159 |
|
160 |
-
def get_trial_landscape(self, query: str) -> List[Dict]:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
161 |
params = {"query.term": query, "retmax": 10} if not query.startswith("NCT") else {"id": query}
|
|
|
162 |
trials = self.engine.api_request(API_ENDPOINTS["clinical_trials"], params=params)
|
163 |
-
|
164 |
-
|
165 |
-
|
166 |
-
|
167 |
-
return
|
168 |
|
169 |
-
def get_fda_approval(self, drug_name: str) -> Optional[Dict]:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
170 |
if not OPENFDA_KEY:
|
171 |
st.error("OpenFDA API key not configured.")
|
|
|
172 |
return None
|
173 |
|
174 |
-
params = {
|
175 |
"api_key": OPENFDA_KEY,
|
176 |
"search": f'openfda.brand_name:"{drug_name}"',
|
177 |
"limit": 1
|
178 |
}
|
|
|
179 |
data = self.engine.api_request(API_ENDPOINTS["fda_drug_approval"], params=params)
|
|
|
180 |
if data and data.get("results"):
|
|
|
181 |
return data["results"][0]
|
182 |
-
logger.warning(f"No FDA data found for drug: {drug_name}")
|
183 |
-
st.error("No FDA regulatory data found for the specified drug.")
|
184 |
return None
|
185 |
|
186 |
class AIDrugInnovator:
|
187 |
"""
|
188 |
-
|
|
|
|
|
|
|
189 |
"""
|
190 |
|
191 |
-
def __init__(self):
|
192 |
self.engine = PharmaResearchEngine()
|
|
|
193 |
|
194 |
def generate_strategy(self, target: str, strategy: str) -> str:
|
195 |
-
|
|
|
196 |
|
197 |
-
|
198 |
-
|
199 |
-
|
200 |
-
|
201 |
-
|
202 |
-
|
203 |
-
|
204 |
-
|
205 |
-
|
206 |
-
|
207 |
-
|
208 |
-
|
209 |
-
|
210 |
-
|
211 |
-
|
212 |
-
-
|
213 |
-
-
|
214 |
-
-
|
215 |
-
|
216 |
-
**
|
217 |
-
|
218 |
-
|
219 |
-
|
220 |
-
|
221 |
-
|
222 |
-
**Commercial Potential Assessment**
|
223 |
-
- Conduct detailed market research to understand the competitive landscape and unmet needs.
|
224 |
-
- Segment patient populations to tailor therapeutic approaches.
|
225 |
-
- Devise dynamic pricing and reimbursement strategies aligned with payer requirements.
|
226 |
-
- Formulate a comprehensive go-to-market plan leveraging multi-channel marketing strategies.
|
227 |
-
|
228 |
-
Please format your response in Markdown with clear section headers."""
|
229 |
try:
|
230 |
response = self.engine.openai_client.chat.completions.create(
|
231 |
model="gpt-4",
|
@@ -233,45 +309,61 @@ Please format your response in Markdown with clear section headers."""
|
|
233 |
temperature=0.7,
|
234 |
max_tokens=1500
|
235 |
)
|
236 |
-
|
|
|
|
|
237 |
except Exception as e:
|
238 |
-
logger.error(f"AI
|
239 |
-
|
240 |
-
return "Strategy generation failed due to an internal error."
|
241 |
|
242 |
# -----------------------------
|
243 |
# STREAMLIT INTERFACE
|
244 |
# -----------------------------
|
245 |
class PharmaResearchInterface:
|
246 |
"""
|
247 |
-
|
248 |
-
|
|
|
|
|
|
|
249 |
"""
|
250 |
|
251 |
-
def __init__(self):
|
252 |
self.clinical_intel = ClinicalIntelligence()
|
253 |
self.ai_innovator = AIDrugInnovator()
|
254 |
self._configure_page()
|
|
|
255 |
|
256 |
-
def _configure_page(self):
|
|
|
|
|
|
|
257 |
st.set_page_config(
|
258 |
-
page_title="PRIS -
|
259 |
layout="wide",
|
260 |
initial_sidebar_state="expanded"
|
261 |
)
|
262 |
st.markdown("""
|
263 |
<style>
|
264 |
-
.main {background-color: #
|
265 |
-
.stAlert {padding: 20px;}
|
266 |
-
.reportview-container .markdown-text-container {font-family: '
|
267 |
</style>
|
268 |
""", unsafe_allow_html=True)
|
|
|
269 |
|
270 |
-
def render(self):
|
271 |
-
|
|
|
|
|
|
|
272 |
self._render_navigation()
|
|
|
273 |
|
274 |
-
def _render_navigation(self):
|
|
|
|
|
|
|
275 |
tabs = st.tabs([
|
276 |
"π Drug Innovation",
|
277 |
"π Trial Analytics",
|
@@ -279,6 +371,7 @@ class PharmaResearchInterface:
|
|
279 |
"π Regulatory Hub",
|
280 |
"π€ AI Strategist"
|
281 |
])
|
|
|
282 |
with tabs[0]:
|
283 |
self._drug_innovation()
|
284 |
with tabs[1]:
|
@@ -290,9 +383,13 @@ class PharmaResearchInterface:
|
|
290 |
with tabs[4]:
|
291 |
self._ai_strategist()
|
292 |
|
293 |
-
def _drug_innovation(self):
|
|
|
|
|
|
|
294 |
st.header("AI-Powered Drug Innovation Engine")
|
295 |
col1, col2 = st.columns([1, 3])
|
|
|
296 |
with col1:
|
297 |
target = st.text_input("Target Pathobiology:", placeholder="e.g., EGFR mutant NSCLC")
|
298 |
strategy = st.selectbox("Development Paradigm:",
|
@@ -301,29 +398,35 @@ class PharmaResearchInterface:
|
|
301 |
with st.spinner("Formulating strategic plan..."):
|
302 |
blueprint = self.ai_innovator.generate_strategy(target, strategy)
|
303 |
st.markdown(blueprint, unsafe_allow_html=True)
|
|
|
304 |
|
305 |
-
def _trial_analytics(self):
|
|
|
|
|
|
|
306 |
st.header("Clinical Trial Landscape Analysis")
|
307 |
trial_query = st.text_input("Search Clinical Trials:", placeholder="Enter condition, intervention, or NCT number")
|
|
|
308 |
if st.button("Analyze Trial Landscape"):
|
309 |
with st.spinner("Fetching trial data..."):
|
310 |
trials = self.clinical_intel.get_trial_landscape(trial_query)
|
|
|
311 |
if trials:
|
312 |
st.subheader("Top 5 Clinical Trials")
|
313 |
-
trial_data = []
|
314 |
for study in trials:
|
315 |
-
title = study.get("protocolSection", {}).get("identificationModule", {}).get("briefTitle", "N/A")
|
316 |
-
status = study.get("protocolSection", {}).get("statusModule", {}).get("overallStatus", "N/A")
|
317 |
-
phase = study.get("protocolSection", {}).get("designModule", {}).get("phases", ["N/A"])[0]
|
318 |
-
enrollment = study.get("protocolSection", {}).get("designModule", {}).get("enrollmentInfo", {}).get("count", "N/A")
|
319 |
trial_data.append({
|
320 |
-
"Title":
|
321 |
-
"Status":
|
322 |
-
"Phase":
|
323 |
-
"Enrollment":
|
324 |
})
|
|
|
|
|
325 |
df = pd.DataFrame(trial_data)
|
326 |
st.dataframe(df)
|
|
|
|
|
327 |
st.subheader("Trial Phase Distribution")
|
328 |
phase_counts = df["Phase"].value_counts()
|
329 |
fig, ax = plt.subplots()
|
@@ -331,60 +434,87 @@ class PharmaResearchInterface:
|
|
331 |
ax.set_xlabel("Trial Phase")
|
332 |
ax.set_ylabel("Number of Trials")
|
333 |
st.pyplot(fig)
|
|
|
334 |
else:
|
335 |
-
st.warning("No clinical trials found for the
|
|
|
336 |
|
337 |
-
def _compound_profiler(self):
|
338 |
-
|
339 |
-
|
340 |
-
|
|
|
|
|
|
|
|
|
341 |
with st.spinner("Decoding molecular profile..."):
|
342 |
profile = PharmaResearchEngine().get_compound_profile(compound)
|
343 |
-
|
344 |
-
|
345 |
-
|
346 |
-
|
347 |
-
|
348 |
-
|
349 |
-
|
350 |
-
|
351 |
-
|
352 |
-
|
353 |
-
|
354 |
-
|
355 |
-
|
356 |
-
|
357 |
-
|
358 |
-
|
359 |
-
|
360 |
-
|
361 |
-
st.
|
|
|
|
|
|
|
|
|
|
|
362 |
|
363 |
-
def _regulatory_hub(self):
|
|
|
|
|
|
|
364 |
st.header("Regulatory Intelligence Hub")
|
365 |
-
st.write("
|
366 |
-
drug_name = st.text_input("Enter Drug Name for Regulatory Analysis:", placeholder="e.g.,
|
|
|
367 |
if st.button("Fetch Regulatory Data"):
|
368 |
with st.spinner("Retrieving regulatory information..."):
|
369 |
fda_data = self.clinical_intel.get_fda_approval(drug_name)
|
370 |
if fda_data:
|
371 |
st.subheader("FDA Approval Details")
|
372 |
st.json(fda_data)
|
|
|
373 |
else:
|
374 |
-
st.warning("No FDA
|
|
|
375 |
|
376 |
-
def _ai_strategist(self):
|
|
|
|
|
|
|
377 |
st.header("AI Drug Development Strategist")
|
378 |
-
st.write("
|
379 |
target = st.text_input("Enter Target Disease or Pathway:", placeholder="e.g., KRAS G12C mutation")
|
|
|
380 |
if st.button("Generate AI Strategy"):
|
381 |
with st.spinner("Generating AI-driven strategy..."):
|
382 |
strategy = self.ai_innovator.generate_strategy(target, "First-in-class")
|
383 |
st.markdown(strategy, unsafe_allow_html=True)
|
|
|
384 |
|
385 |
# -----------------------------
|
386 |
# MAIN EXECUTION
|
387 |
# -----------------------------
|
388 |
if __name__ == "__main__":
|
389 |
-
|
390 |
-
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
"""
|
2 |
+
Pharma Research Intelligence Suite (PRIS)
|
3 |
+
==========================================
|
4 |
+
A Next-Generation, AI-Integrated Platform for Accelerating Drug Discovery and Development
|
5 |
+
|
6 |
+
This module integrates clinical, chemical, and regulatory data streams with advanced AI analysis
|
7 |
+
to provide a comprehensive toolset for pharmaceutical research. The platform is built on a robust
|
8 |
+
architecture to support multi-omics compound profiling, clinical trial analytics, regulatory intelligence,
|
9 |
+
and AI-driven drug innovation strategies.
|
10 |
+
|
11 |
+
Author: [Your Name]
|
12 |
+
Date: 2025-01-31
|
13 |
+
"""
|
14 |
+
|
15 |
+
# -----------------------------
|
16 |
# IMPORTS & CONFIGURATION
|
17 |
# -----------------------------
|
18 |
import streamlit as st
|
|
|
22 |
import pandas as pd
|
23 |
import matplotlib.pyplot as plt
|
24 |
import seaborn as sns
|
25 |
+
from fpdf import FPDF
|
26 |
+
import tempfile
|
27 |
import logging
|
28 |
+
import os
|
29 |
+
import plotly.graph_objects as go
|
30 |
+
import networkx as nx
|
31 |
+
from typing import Optional, Dict, List, Any, Tuple
|
32 |
from openai import OpenAI
|
33 |
|
34 |
+
# Advanced logging configuration: capturing detailed operational logs for debugging and audit trails.
|
35 |
logging.basicConfig(
|
36 |
level=logging.INFO,
|
37 |
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
|
38 |
+
handlers=[
|
39 |
+
logging.FileHandler("pris_debug.log", mode='w'),
|
40 |
+
logging.StreamHandler()
|
41 |
+
]
|
42 |
)
|
43 |
logger = logging.getLogger("PRIS")
|
44 |
|
45 |
# -----------------------------
|
46 |
# GLOBAL CONSTANTS
|
47 |
# -----------------------------
|
48 |
+
API_ENDPOINTS: Dict[str, str] = {
|
49 |
+
# Clinical Data Services
|
50 |
"clinical_trials": "https://clinicaltrials.gov/api/v2/studies",
|
51 |
"fda_drug_approval": "https://api.fda.gov/drug/label.json",
|
52 |
+
"faers_adverse_events": "https://api.fda.gov/drug/event.json",
|
53 |
+
|
54 |
+
# Chemical & Biological Data
|
55 |
"pubchem": "https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/name/{}/JSON",
|
56 |
+
"pubmed": "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi",
|
57 |
+
|
58 |
+
# Pharmacogenomics Resources
|
59 |
+
"pharmgkb_variant_clinical_annotations": "https://api.pharmgkb.org/v1/data/variant/{}/clinicalAnnotations",
|
60 |
+
"pharmgkb_gene": "https://api.pharmgkb.org/v1/data/gene/{}",
|
61 |
+
"pharmgkb_gene_variants": "https://api.pharmgkb.org/v1/data/gene/{}/variants",
|
62 |
+
|
63 |
+
# Semantic Medical Resources
|
64 |
+
"bioportal_search": "https://data.bioontology.org/search",
|
65 |
+
|
66 |
+
# Drug Classification Systems
|
67 |
+
"rxnorm_rxcui": "https://rxnav.nlm.nih.gov/REST/rxcui.json",
|
68 |
+
"rxnorm_properties": "https://rxnav.nlm.nih.gov/REST/rxcui/{}/properties.json",
|
69 |
+
"rxclass_by_drug": "https://rxnav.nlm.nih.gov/REST/class/byDrugName.json"
|
70 |
}
|
71 |
|
72 |
+
DEFAULT_HEADERS: Dict[str, str] = {
|
73 |
"User-Agent": "PharmaResearchIntelligenceSuite/1.0 (Professional Use)",
|
74 |
"Accept": "application/json"
|
75 |
}
|
|
|
78 |
# SECRETS MANAGEMENT
|
79 |
# -----------------------------
|
80 |
class APIConfigurationError(Exception):
|
81 |
+
"""Custom exception for missing or misconfigured API credentials."""
|
82 |
pass
|
83 |
|
84 |
try:
|
85 |
+
# Retrieve API credentials from the secure Streamlit secrets store.
|
86 |
+
OPENAI_API_KEY: str = st.secrets["OPENAI_API_KEY"]
|
87 |
+
BIOPORTAL_API_KEY: str = st.secrets["BIOPORTAL_API_KEY"]
|
88 |
+
PUB_EMAIL: str = st.secrets["PUB_EMAIL"]
|
89 |
+
OPENFDA_KEY: str = st.secrets["OPENFDA_KEY"]
|
90 |
+
|
91 |
+
# Ensure that all essential API keys are present.
|
92 |
if not all([OPENAI_API_KEY, BIOPORTAL_API_KEY, PUB_EMAIL, OPENFDA_KEY]):
|
93 |
raise APIConfigurationError("One or more required API credentials are missing.")
|
94 |
+
|
95 |
except (KeyError, APIConfigurationError) as e:
|
96 |
st.error(f"Critical configuration error: {str(e)}")
|
97 |
+
logger.critical(f"Configuration error: {str(e)}")
|
98 |
st.stop()
|
99 |
|
100 |
# -----------------------------
|
101 |
# CORE INFRASTRUCTURE
|
102 |
# -----------------------------
|
103 |
class PharmaResearchEngine:
|
104 |
+
"""
|
105 |
+
Core engine for integrating and analyzing pharmaceutical data.
|
106 |
+
|
107 |
+
This engine provides utility functions for API requests and chemical data extraction,
|
108 |
+
facilitating the seamless integration of multi-omics and clinical datasets.
|
109 |
+
"""
|
110 |
|
111 |
+
def __init__(self) -> None:
|
112 |
+
# Initialize the OpenAI client with the provided API key.
|
113 |
self.openai_client = OpenAI(api_key=OPENAI_API_KEY)
|
114 |
+
logger.info("PharmaResearchEngine initialized with OpenAI client.")
|
115 |
+
|
116 |
@staticmethod
|
117 |
def api_request(endpoint: str,
|
118 |
+
params: Optional[Dict[str, Any]] = None,
|
119 |
+
headers: Optional[Dict[str, str]] = None) -> Optional[Dict[str, Any]]:
|
120 |
"""
|
121 |
+
Perform a resilient API GET request.
|
122 |
+
|
123 |
+
Args:
|
124 |
+
endpoint (str): The URL endpoint for the API.
|
125 |
+
params (Optional[Dict[str, Any]]): Query parameters to be included in the request.
|
126 |
+
headers (Optional[Dict[str, str]]): Additional headers to include in the request.
|
127 |
+
|
128 |
+
Returns:
|
129 |
+
Optional[Dict[str, Any]]: JSON response from the API, or None if an error occurs.
|
130 |
"""
|
131 |
try:
|
132 |
+
logger.debug(f"Requesting data from {endpoint} with params: {params}")
|
133 |
response = requests.get(
|
134 |
endpoint,
|
135 |
params=params,
|
136 |
headers={**DEFAULT_HEADERS, **(headers or {})},
|
137 |
timeout=(3.05, 15)
|
138 |
)
|
139 |
+
response.raise_for_status()
|
140 |
+
logger.info(f"Successful API request to {endpoint}")
|
141 |
return response.json()
|
142 |
+
except requests.exceptions.HTTPError as http_err:
|
143 |
+
logger.error(f"HTTP Error {http_err.response.status_code} for {endpoint}: {http_err}")
|
144 |
+
st.error(f"API HTTP Error: {http_err.response.status_code} - {http_err.response.reason}")
|
145 |
except Exception as e:
|
146 |
+
logger.error(f"Network error during API request to {endpoint}: {str(e)}")
|
147 |
st.error(f"Network error: {str(e)}")
|
148 |
return None
|
149 |
|
150 |
+
def get_compound_profile(self, identifier: str) -> Optional[Dict[str, str]]:
|
151 |
"""
|
152 |
+
Retrieve a comprehensive chemical profile for a given compound.
|
153 |
+
|
154 |
+
This method queries the PubChem API using a provided identifier (name or SMILES)
|
155 |
+
and extracts key molecular properties.
|
156 |
+
|
157 |
+
Args:
|
158 |
+
identifier (str): The compound name or SMILES string.
|
159 |
+
|
160 |
+
Returns:
|
161 |
+
Optional[Dict[str, str]]: A dictionary containing molecular formula, IUPAC name,
|
162 |
+
canonical SMILES, molecular weight, and LogP. Returns None if data is unavailable.
|
163 |
"""
|
164 |
+
formatted_endpoint = API_ENDPOINTS["pubchem"].format(identifier)
|
165 |
+
logger.info(f"Fetching compound profile from PubChem for identifier: {identifier}")
|
166 |
+
pubchem_data = self.api_request(formatted_endpoint)
|
167 |
+
|
|
|
|
|
|
|
|
|
|
|
168 |
if not pubchem_data or not pubchem_data.get("PC_Compounds"):
|
169 |
+
logger.warning("No compound data found in PubChem response.")
|
|
|
170 |
return None
|
171 |
|
172 |
compound = pubchem_data["PC_Compounds"][0]
|
173 |
+
profile = {
|
174 |
'molecular_formula': self._extract_property(compound, 'Molecular Formula'),
|
175 |
'iupac_name': self._extract_property(compound, 'IUPAC Name'),
|
176 |
'canonical_smiles': self._extract_property(compound, 'Canonical SMILES'),
|
177 |
'molecular_weight': self._extract_property(compound, 'Molecular Weight'),
|
178 |
'logp': self._extract_property(compound, 'LogP')
|
179 |
}
|
180 |
+
logger.debug(f"Extracted compound profile: {profile}")
|
181 |
+
return profile
|
182 |
|
183 |
+
def _extract_property(self, compound: Dict[str, Any], prop_name: str) -> str:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
184 |
"""
|
185 |
+
Helper function to extract a specific property from PubChem compound data.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
186 |
|
187 |
+
Args:
|
188 |
+
compound (Dict[str, Any]): The compound data dictionary from PubChem.
|
189 |
+
prop_name (str): The name of the property to extract.
|
190 |
|
191 |
+
Returns:
|
192 |
+
str: The extracted property value as a string, or "N/A" if not found.
|
193 |
+
"""
|
194 |
+
for prop in compound.get("props", []):
|
195 |
+
if prop.get("urn", {}).get("label") == prop_name:
|
196 |
+
# Attempt to return the string value regardless of underlying type.
|
197 |
+
return str(prop["value"].get("sval", "N/A"))
|
198 |
+
logger.debug(f"Property '{prop_name}' not found for compound.")
|
199 |
+
return "N/A"
|
200 |
|
201 |
# -----------------------------
|
202 |
# INTELLIGENCE MODULES
|
203 |
# -----------------------------
|
204 |
class ClinicalIntelligence:
|
205 |
"""
|
206 |
+
Module for analyzing clinical trial landscapes and regulatory data.
|
207 |
+
|
208 |
+
This class encapsulates methods for retrieving and processing clinical trial data
|
209 |
+
and FDA drug approval information.
|
210 |
"""
|
211 |
|
212 |
+
def __init__(self) -> None:
|
213 |
self.engine = PharmaResearchEngine()
|
214 |
+
logger.info("ClinicalIntelligence module initialized.")
|
215 |
|
216 |
+
def get_trial_landscape(self, query: str) -> List[Dict[str, Any]]:
|
217 |
+
"""
|
218 |
+
Analyze the clinical trial landscape for a specified query.
|
219 |
+
|
220 |
+
Args:
|
221 |
+
query (str): A search term (condition, intervention, or NCT number) for clinical trials.
|
222 |
+
|
223 |
+
Returns:
|
224 |
+
List[Dict[str, Any]]: A list of dictionaries representing the top clinical trials.
|
225 |
+
"""
|
226 |
+
# Determine appropriate parameters based on query format.
|
227 |
params = {"query.term": query, "retmax": 10} if not query.startswith("NCT") else {"id": query}
|
228 |
+
logger.info(f"Fetching clinical trials with query: {query}")
|
229 |
trials = self.engine.api_request(API_ENDPOINTS["clinical_trials"], params=params)
|
230 |
+
|
231 |
+
# Safely extract and return up to the first 5 trials.
|
232 |
+
trial_list = trials.get("studies", [])[:5] if trials else []
|
233 |
+
logger.debug(f"Retrieved {len(trial_list)} clinical trials for query '{query}'")
|
234 |
+
return trial_list
|
235 |
|
236 |
+
def get_fda_approval(self, drug_name: str) -> Optional[Dict[str, Any]]:
|
237 |
+
"""
|
238 |
+
Retrieve FDA approval information for a specified drug.
|
239 |
+
|
240 |
+
Args:
|
241 |
+
drug_name (str): The name of the drug to query.
|
242 |
+
|
243 |
+
Returns:
|
244 |
+
Optional[Dict[str, Any]]: A dictionary containing FDA approval details or None if unavailable.
|
245 |
+
"""
|
246 |
if not OPENFDA_KEY:
|
247 |
st.error("OpenFDA API key not configured.")
|
248 |
+
logger.error("Missing OpenFDA API key.")
|
249 |
return None
|
250 |
|
251 |
+
params: Dict[str, Any] = {
|
252 |
"api_key": OPENFDA_KEY,
|
253 |
"search": f'openfda.brand_name:"{drug_name}"',
|
254 |
"limit": 1
|
255 |
}
|
256 |
+
logger.info(f"Fetching FDA approval data for drug: {drug_name}")
|
257 |
data = self.engine.api_request(API_ENDPOINTS["fda_drug_approval"], params=params)
|
258 |
+
|
259 |
if data and data.get("results"):
|
260 |
+
logger.debug(f"FDA approval data retrieved for drug: {drug_name}")
|
261 |
return data["results"][0]
|
262 |
+
logger.warning(f"No FDA approval data found for drug: {drug_name}")
|
|
|
263 |
return None
|
264 |
|
265 |
class AIDrugInnovator:
|
266 |
"""
|
267 |
+
AI-Driven Drug Development Strategist powered by GPT-4.
|
268 |
+
|
269 |
+
This module leverages advanced language models to generate innovative drug development
|
270 |
+
strategies tailored to specific targets and therapeutic paradigms.
|
271 |
"""
|
272 |
|
273 |
+
def __init__(self) -> None:
|
274 |
self.engine = PharmaResearchEngine()
|
275 |
+
logger.info("AIDrugInnovator module initialized with GPT-4 integration.")
|
276 |
|
277 |
def generate_strategy(self, target: str, strategy: str) -> str:
|
278 |
+
"""
|
279 |
+
Generate an AI-driven development strategy.
|
280 |
|
281 |
+
Constructs a detailed prompt for GPT-4 to generate a strategic plan including
|
282 |
+
target validation, lead optimization, clinical trial design, regulatory analysis,
|
283 |
+
and commercial potential assessment.
|
284 |
+
|
285 |
+
Args:
|
286 |
+
target (str): The target disease, pathway, or biological entity.
|
287 |
+
strategy (str): The desired development paradigm (e.g., "First-in-class").
|
288 |
+
|
289 |
+
Returns:
|
290 |
+
str: A formatted strategic blueprint in Markdown.
|
291 |
+
"""
|
292 |
+
prompt: str = (
|
293 |
+
f"As Chief Scientific Officer at a leading pharmaceutical company, "
|
294 |
+
f"develop a {strategy} development strategy for the target: {target}.\n\n"
|
295 |
+
"Include the following sections:\n"
|
296 |
+
"- **Target Validation Approach:** Describe methods to confirm the target's role in the disease.\n"
|
297 |
+
"- **Lead Optimization Tactics:** Outline strategies for refining lead compounds.\n"
|
298 |
+
"- **Clinical Trial Design:** Propose innovative trial designs and endpoints.\n"
|
299 |
+
"- **Regulatory Pathway Analysis:** Evaluate the regulatory strategy and compliance roadmap.\n"
|
300 |
+
"- **Commercial Potential Assessment:** Analyze market opportunity and competitive landscape.\n\n"
|
301 |
+
"Please format your response in Markdown with clear, well-defined sections."
|
302 |
+
)
|
303 |
+
|
304 |
+
logger.info(f"Generating AI strategy for target: {target} using paradigm: {strategy}")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
305 |
try:
|
306 |
response = self.engine.openai_client.chat.completions.create(
|
307 |
model="gpt-4",
|
|
|
309 |
temperature=0.7,
|
310 |
max_tokens=1500
|
311 |
)
|
312 |
+
generated_strategy = response.choices[0].message.content
|
313 |
+
logger.debug("AI strategy generation successful.")
|
314 |
+
return generated_strategy
|
315 |
except Exception as e:
|
316 |
+
logger.error(f"Error during AI strategy generation: {str(e)}")
|
317 |
+
return "Strategy generation failed. Please check API configuration and try again."
|
|
|
318 |
|
319 |
# -----------------------------
|
320 |
# STREAMLIT INTERFACE
|
321 |
# -----------------------------
|
322 |
class PharmaResearchInterface:
|
323 |
"""
|
324 |
+
User Interface for the Pharma Research Intelligence Suite.
|
325 |
+
|
326 |
+
This class configures and renders the Streamlit application, providing an interactive
|
327 |
+
environment for exploring drug innovation, clinical trial analytics, compound profiling,
|
328 |
+
regulatory insights, and AI-driven strategy generation.
|
329 |
"""
|
330 |
|
331 |
+
def __init__(self) -> None:
|
332 |
self.clinical_intel = ClinicalIntelligence()
|
333 |
self.ai_innovator = AIDrugInnovator()
|
334 |
self._configure_page()
|
335 |
+
logger.info("PharmaResearchInterface initialized and page configured.")
|
336 |
|
337 |
+
def _configure_page(self) -> None:
|
338 |
+
"""
|
339 |
+
Configure the Streamlit page settings and apply custom CSS styles.
|
340 |
+
"""
|
341 |
st.set_page_config(
|
342 |
+
page_title="PRIS - Pharma Research Intelligence Suite",
|
343 |
layout="wide",
|
344 |
initial_sidebar_state="expanded"
|
345 |
)
|
346 |
st.markdown("""
|
347 |
<style>
|
348 |
+
.main {background-color: #f9f9f9; padding: 20px;}
|
349 |
+
.stAlert {padding: 20px; border: 1px solid #e0e0e0; border-radius: 5px; background-color: #fff;}
|
350 |
+
.reportview-container .markdown-text-container {font-family: 'Arial', sans-serif; line-height: 1.6;}
|
351 |
</style>
|
352 |
""", unsafe_allow_html=True)
|
353 |
+
logger.info("Streamlit page configuration completed.")
|
354 |
|
355 |
+
def render(self) -> None:
|
356 |
+
"""
|
357 |
+
Render the complete Streamlit user interface with multiple functional tabs.
|
358 |
+
"""
|
359 |
+
st.title("Pharma Research Intelligence Suite")
|
360 |
self._render_navigation()
|
361 |
+
logger.info("User interface rendered successfully.")
|
362 |
|
363 |
+
def _render_navigation(self) -> None:
|
364 |
+
"""
|
365 |
+
Create a dynamic, tab-based navigation layout for different modules.
|
366 |
+
"""
|
367 |
tabs = st.tabs([
|
368 |
"π Drug Innovation",
|
369 |
"π Trial Analytics",
|
|
|
371 |
"π Regulatory Hub",
|
372 |
"π€ AI Strategist"
|
373 |
])
|
374 |
+
|
375 |
with tabs[0]:
|
376 |
self._drug_innovation()
|
377 |
with tabs[1]:
|
|
|
383 |
with tabs[4]:
|
384 |
self._ai_strategist()
|
385 |
|
386 |
+
def _drug_innovation(self) -> None:
|
387 |
+
"""
|
388 |
+
Render the drug innovation module that generates AI-powered development strategies.
|
389 |
+
"""
|
390 |
st.header("AI-Powered Drug Innovation Engine")
|
391 |
col1, col2 = st.columns([1, 3])
|
392 |
+
|
393 |
with col1:
|
394 |
target = st.text_input("Target Pathobiology:", placeholder="e.g., EGFR mutant NSCLC")
|
395 |
strategy = st.selectbox("Development Paradigm:",
|
|
|
398 |
with st.spinner("Formulating strategic plan..."):
|
399 |
blueprint = self.ai_innovator.generate_strategy(target, strategy)
|
400 |
st.markdown(blueprint, unsafe_allow_html=True)
|
401 |
+
logger.info("Drug innovation strategy generated and displayed.")
|
402 |
|
403 |
+
def _trial_analytics(self) -> None:
|
404 |
+
"""
|
405 |
+
Render the clinical trial analytics module to explore current trial landscapes.
|
406 |
+
"""
|
407 |
st.header("Clinical Trial Landscape Analysis")
|
408 |
trial_query = st.text_input("Search Clinical Trials:", placeholder="Enter condition, intervention, or NCT number")
|
409 |
+
|
410 |
if st.button("Analyze Trial Landscape"):
|
411 |
with st.spinner("Fetching trial data..."):
|
412 |
trials = self.clinical_intel.get_trial_landscape(trial_query)
|
413 |
+
|
414 |
if trials:
|
415 |
st.subheader("Top 5 Clinical Trials")
|
416 |
+
trial_data: List[Dict[str, Any]] = []
|
417 |
for study in trials:
|
|
|
|
|
|
|
|
|
418 |
trial_data.append({
|
419 |
+
"Title": study.get("protocolSection", {}).get("identificationModule", {}).get("briefTitle", "N/A"),
|
420 |
+
"Status": study.get("protocolSection", {}).get("statusModule", {}).get("overallStatus", "N/A"),
|
421 |
+
"Phase": study.get("protocolSection", {}).get("designModule", {}).get("phases", ["N/A"])[0],
|
422 |
+
"Enrollment": study.get("protocolSection", {}).get("designModule", {}).get("enrollmentInfo", {}).get("count", "N/A")
|
423 |
})
|
424 |
+
|
425 |
+
# Display the clinical trial data in a table.
|
426 |
df = pd.DataFrame(trial_data)
|
427 |
st.dataframe(df)
|
428 |
+
|
429 |
+
# Generate and display a bar chart of trial phase distribution.
|
430 |
st.subheader("Trial Phase Distribution")
|
431 |
phase_counts = df["Phase"].value_counts()
|
432 |
fig, ax = plt.subplots()
|
|
|
434 |
ax.set_xlabel("Trial Phase")
|
435 |
ax.set_ylabel("Number of Trials")
|
436 |
st.pyplot(fig)
|
437 |
+
logger.info("Clinical trial analytics displayed successfully.")
|
438 |
else:
|
439 |
+
st.warning("No clinical trials found for the query.")
|
440 |
+
logger.warning("No clinical trial data returned from API.")
|
441 |
|
442 |
+
def _compound_profiler(self) -> None:
|
443 |
+
"""
|
444 |
+
Render the multi-omics compound profiler module for in-depth chemical analysis.
|
445 |
+
"""
|
446 |
+
st.header("Multi-Omics Compound Profiler")
|
447 |
+
compound = st.text_input("Analyze Compound:", placeholder="Enter drug name or SMILES")
|
448 |
+
|
449 |
+
if compound:
|
450 |
with st.spinner("Decoding molecular profile..."):
|
451 |
profile = PharmaResearchEngine().get_compound_profile(compound)
|
452 |
+
|
453 |
+
if profile:
|
454 |
+
col1, col2 = st.columns(2)
|
455 |
+
with col1:
|
456 |
+
st.subheader("Structural Insights")
|
457 |
+
mol = Chem.MolFromSmiles(profile['canonical_smiles'])
|
458 |
+
if mol:
|
459 |
+
# Generate and display the 2D molecular structure image.
|
460 |
+
img = Draw.MolToImage(mol, size=(400, 300))
|
461 |
+
st.image(img, caption="2D Molecular Structure")
|
462 |
+
else:
|
463 |
+
st.warning("Unable to render molecular structure from SMILES.")
|
464 |
+
logger.warning("RDKit failed to create molecule from SMILES.")
|
465 |
+
|
466 |
+
with col2:
|
467 |
+
st.subheader("Physicochemical Profile")
|
468 |
+
st.metric("Molecular Weight", profile['molecular_weight'])
|
469 |
+
st.metric("LogP", profile['logp'])
|
470 |
+
st.metric("IUPAC Name", profile['iupac_name'])
|
471 |
+
st.code(f"SMILES: {profile['canonical_smiles']}")
|
472 |
+
logger.info("Compound profile details rendered.")
|
473 |
+
else:
|
474 |
+
st.warning("No compound data available. Please verify the input.")
|
475 |
+
logger.warning("Compound profiler did not return any data.")
|
476 |
|
477 |
+
def _regulatory_hub(self) -> None:
|
478 |
+
"""
|
479 |
+
Render the regulatory intelligence hub module for accessing FDA and regulatory data.
|
480 |
+
"""
|
481 |
st.header("Regulatory Intelligence Hub")
|
482 |
+
st.write("Access detailed insights into FDA approvals and regulatory pathways.")
|
483 |
+
drug_name = st.text_input("Enter Drug Name for Regulatory Analysis:", placeholder="e.g., aspirin")
|
484 |
+
|
485 |
if st.button("Fetch Regulatory Data"):
|
486 |
with st.spinner("Retrieving regulatory information..."):
|
487 |
fda_data = self.clinical_intel.get_fda_approval(drug_name)
|
488 |
if fda_data:
|
489 |
st.subheader("FDA Approval Details")
|
490 |
st.json(fda_data)
|
491 |
+
logger.info("FDA regulatory data displayed.")
|
492 |
else:
|
493 |
+
st.warning("No FDA data found for the specified drug.")
|
494 |
+
logger.warning("FDA regulatory data retrieval returned no results.")
|
495 |
|
496 |
+
def _ai_strategist(self) -> None:
|
497 |
+
"""
|
498 |
+
Render the AI strategist module for generating innovative drug development strategies.
|
499 |
+
"""
|
500 |
st.header("AI Drug Development Strategist")
|
501 |
+
st.write("Utilize GPT-4 to craft cutting-edge drug development strategies.")
|
502 |
target = st.text_input("Enter Target Disease or Pathway:", placeholder="e.g., KRAS G12C mutation")
|
503 |
+
|
504 |
if st.button("Generate AI Strategy"):
|
505 |
with st.spinner("Generating AI-driven strategy..."):
|
506 |
strategy = self.ai_innovator.generate_strategy(target, "First-in-class")
|
507 |
st.markdown(strategy, unsafe_allow_html=True)
|
508 |
+
logger.info("AI-driven strategy generated and displayed.")
|
509 |
|
510 |
# -----------------------------
|
511 |
# MAIN EXECUTION
|
512 |
# -----------------------------
|
513 |
if __name__ == "__main__":
|
514 |
+
try:
|
515 |
+
interface = PharmaResearchInterface()
|
516 |
+
interface.render()
|
517 |
+
logger.info("PRIS application launched successfully.")
|
518 |
+
except Exception as e:
|
519 |
+
logger.critical(f"Unexpected error during application launch: {str(e)}")
|
520 |
+
st.error(f"Application failed to start due to an unexpected error: {str(e)}")
|