Update app.py
Browse files
app.py
CHANGED
@@ -16,8 +16,6 @@ from scipy.stats import ttest_ind, f_oneway
|
|
16 |
from sklearn.model_selection import train_test_split
|
17 |
from sklearn.linear_model import LogisticRegression
|
18 |
from sklearn.metrics import accuracy_score
|
19 |
-
from sklearn.feature_extraction.text import TfidfVectorizer
|
20 |
-
from sklearn.metrics.pairwise import cosine_similarity
|
21 |
|
22 |
from statsmodels.tsa.seasonal import seasonal_decompose
|
23 |
from statsmodels.tsa.stattools import adfuller
|
@@ -27,29 +25,36 @@ from Bio import Entrez # Ensure BioPython is installed
|
|
27 |
|
28 |
from dotenv import load_dotenv
|
29 |
import requests
|
|
|
30 |
import ast
|
|
|
31 |
|
32 |
# ---------------------- Load Environment Variables ---------------------------
|
33 |
load_dotenv()
|
34 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
35 |
# ---------------------- Streamlit Page Configuration ---------------------------
|
36 |
# This must be the first Streamlit command in the script
|
37 |
st.set_page_config(page_title="AI Clinical Intelligence Hub", layout="wide")
|
38 |
|
39 |
# ---------------------- Initialize External Clients ---------------------------
|
40 |
-
# Initialize
|
41 |
-
|
42 |
-
GROQ_API_KEY = os.getenv("GROQ_API_KEY")
|
43 |
PUB_EMAIL = os.getenv("PUB_EMAIL", "")
|
44 |
|
45 |
-
if not
|
46 |
-
st.error("
|
47 |
st.stop()
|
48 |
|
49 |
-
|
50 |
-
"Authorization": f"Bearer {GROQ_API_KEY}",
|
51 |
-
"Content-Type": "application/json"
|
52 |
-
}
|
53 |
|
54 |
# Load spaCy model with error handling
|
55 |
try:
|
@@ -119,6 +124,7 @@ class AdvancedEDA(DataAnalyzer):
|
|
119 |
}
|
120 |
return analysis
|
121 |
except Exception as e:
|
|
|
122 |
return {"error": f"EDA Failed: {str(e)}"}
|
123 |
|
124 |
class DistributionVisualizer(DataAnalyzer):
|
@@ -139,6 +145,7 @@ class DistributionVisualizer(DataAnalyzer):
|
|
139 |
plt.close()
|
140 |
return base64.b64encode(buf.getvalue()).decode()
|
141 |
except Exception as e:
|
|
|
142 |
return f"Visualization Error: {str(e)}"
|
143 |
|
144 |
class TemporalAnalyzer(DataAnalyzer):
|
@@ -167,6 +174,7 @@ class TemporalAnalyzer(DataAnalyzer):
|
|
167 |
"visualization": plot_data
|
168 |
}
|
169 |
except Exception as e:
|
|
|
170 |
return {"error": f"Temporal Analysis Failed: {str(e)}"}
|
171 |
|
172 |
class HypothesisTester(DataAnalyzer):
|
@@ -197,6 +205,7 @@ class HypothesisTester(DataAnalyzer):
|
|
197 |
"interpretation": self.interpret_p_value(p)
|
198 |
}
|
199 |
except Exception as e:
|
|
|
200 |
return {"error": f"Hypothesis Testing Failed: {str(e)}"}
|
201 |
|
202 |
@staticmethod
|
@@ -206,7 +215,8 @@ class HypothesisTester(DataAnalyzer):
|
|
206 |
mean_diff = abs(x.mean() - y.mean())
|
207 |
pooled_std = np.sqrt((x.var() + y.var()) / 2)
|
208 |
return mean_diff / pooled_std
|
209 |
-
except Exception:
|
|
|
210 |
return None
|
211 |
|
212 |
@staticmethod
|
@@ -242,6 +252,7 @@ class LogisticRegressionTrainer(DataAnalyzer):
|
|
242 |
"model_params": model.get_params()
|
243 |
}
|
244 |
except Exception as e:
|
|
|
245 |
return {"error": f"Logistic Regression Model Error: {str(e)}"}
|
246 |
|
247 |
# ---------------------- Business Logic Layer ---------------------------
|
@@ -273,6 +284,7 @@ class ClinicalRulesEngine:
|
|
273 |
"severity": rule.severity if rule_matched else None
|
274 |
}
|
275 |
except Exception as e:
|
|
|
276 |
results[rule_name] = {
|
277 |
"rule_matched": False,
|
278 |
"error": str(e),
|
@@ -298,6 +310,7 @@ class ClinicalRulesEngine:
|
|
298 |
raise ValueError(f"Unsupported expression: {expr}")
|
299 |
return eval(compile(node, '<string>', mode='eval'), {"__builtins__": None}, variables)
|
300 |
except Exception as e:
|
|
|
301 |
raise ValueError(f"Invalid expression: {e}")
|
302 |
|
303 |
class ClinicalKPI(BaseModel):
|
@@ -327,6 +340,7 @@ class ClinicalKPIMonitoring:
|
|
327 |
"status": status
|
328 |
}
|
329 |
except Exception as e:
|
|
|
330 |
results[kpi_name] = {"error": str(e)}
|
331 |
return results
|
332 |
|
@@ -357,6 +371,7 @@ class ClinicalKPIMonitoring:
|
|
357 |
raise ValueError(f"Unsupported expression: {expr}")
|
358 |
return eval(compile(node, '<string>', mode='eval'), {"__builtins__": None}, variables)
|
359 |
except Exception as e:
|
|
|
360 |
raise ValueError(f"Invalid expression: {e}")
|
361 |
|
362 |
class DiagnosisSupport(ABC):
|
@@ -397,6 +412,7 @@ class SimpleDiagnosis(DiagnosisSupport):
|
|
397 |
diagnosis_key: [f"Diagnosis failed: {result.get('error', 'Unknown error')}"]
|
398 |
})
|
399 |
except Exception as e:
|
|
|
400 |
return pd.DataFrame({
|
401 |
diagnosis_key: [f"Error during diagnosis: {e}"]
|
402 |
})
|
@@ -425,6 +441,7 @@ class BasicTreatmentRecommendation(TreatmentRecommendation):
|
|
425 |
**kwargs
|
426 |
) -> pd.DataFrame:
|
427 |
if condition_col not in data.columns or treatment_col not in data.columns:
|
|
|
428 |
return pd.DataFrame({
|
429 |
recommendation_key: ["Condition or Treatment columns not found!"]
|
430 |
})
|
@@ -448,61 +465,61 @@ class MedicalKnowledgeBase(ABC):
|
|
448 |
pass
|
449 |
|
450 |
class SimpleMedicalKnowledge(MedicalKnowledgeBase):
|
451 |
-
"""Enhanced Medical Knowledge Class using
|
452 |
def __init__(self):
|
453 |
-
self.
|
454 |
-
self.api_key = GROQ_API_KEY
|
455 |
-
self.pub_email = PUB_EMAIL
|
456 |
-
|
457 |
-
self.headers = {
|
458 |
-
"Authorization": f"Bearer {self.api_key}",
|
459 |
-
"Content-Type": "application/json"
|
460 |
-
}
|
461 |
-
|
462 |
-
# Initialize spaCy model for entity recognition if needed
|
463 |
-
try:
|
464 |
-
self.nlp = spacy.load("en_core_web_sm")
|
465 |
-
except OSError:
|
466 |
-
import subprocess
|
467 |
-
import sys
|
468 |
-
subprocess.run([sys.executable, "-m", "spacy", "download", "en_core_web_sm"])
|
469 |
-
self.nlp = spacy.load("en_core_web_sm")
|
470 |
|
471 |
def search_medical_info(self, query: str, pub_email: str = "") -> str:
|
472 |
"""
|
473 |
-
Uses
|
474 |
"""
|
|
|
475 |
try:
|
476 |
-
# Preprocess the query
|
477 |
doc = self.nlp(query.lower())
|
478 |
entities = [ent.text for ent in doc.ents]
|
479 |
-
if entities
|
480 |
-
|
481 |
-
|
482 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
483 |
|
484 |
-
#
|
485 |
-
|
486 |
-
"query": processed_query,
|
487 |
-
"context": "medical" # Assuming the API can handle context specification
|
488 |
-
}
|
489 |
|
490 |
-
|
491 |
-
response = requests.post(
|
492 |
-
self.api_endpoint,
|
493 |
-
headers=self.headers,
|
494 |
-
data=json.dumps(payload)
|
495 |
-
)
|
496 |
|
497 |
-
|
498 |
-
|
499 |
-
answer = data.get("answer", "I'm sorry, I couldn't find relevant information.")
|
500 |
-
pubmed_abstract = self.fetch_pubmed_abstract(processed_query, pub_email)
|
501 |
-
return f"**Based on your query:** {answer}\n\n**PubMed Abstract:**\n\n{pubmed_abstract}"
|
502 |
-
else:
|
503 |
-
return f"Error: Received status code {response.status_code} from Groq API."
|
504 |
|
|
|
|
|
|
|
|
|
|
|
|
|
505 |
except Exception as e:
|
|
|
506 |
return f"Medical Knowledge Search Failed: {str(e)}"
|
507 |
|
508 |
def fetch_pubmed_abstract(self, query: str, email: str) -> str:
|
@@ -510,18 +527,27 @@ class SimpleMedicalKnowledge(MedicalKnowledgeBase):
|
|
510 |
Searches PubMed for abstracts related to the query.
|
511 |
"""
|
512 |
try:
|
|
|
|
|
|
|
|
|
513 |
Entrez.email = email
|
514 |
handle = Entrez.esearch(db="pubmed", term=query, retmax=1, sort='relevance')
|
515 |
record = Entrez.read(handle)
|
516 |
handle.close()
|
|
|
|
|
517 |
if record["IdList"]:
|
518 |
handle = Entrez.efetch(db="pubmed", id=record["IdList"][0], rettype="abstract", retmode="text")
|
519 |
abstract = handle.read()
|
520 |
handle.close()
|
|
|
521 |
return abstract
|
522 |
else:
|
|
|
523 |
return "No abstracts found for this query on PubMed."
|
524 |
except Exception as e:
|
|
|
525 |
return f"Error searching PubMed: {e}"
|
526 |
|
527 |
# ---------------------- Forecasting Engine ---------------------------
|
@@ -559,8 +585,10 @@ class AutomatedInsights:
|
|
559 |
try:
|
560 |
results[name] = analyzer.invoke(data=data, **kwargs)
|
561 |
except Exception as e:
|
|
|
562 |
results[name] = {"error": str(e)}
|
563 |
else:
|
|
|
564 |
results[name] = {"error": "Analysis not found"}
|
565 |
return results
|
566 |
|
|
|
16 |
from sklearn.model_selection import train_test_split
|
17 |
from sklearn.linear_model import LogisticRegression
|
18 |
from sklearn.metrics import accuracy_score
|
|
|
|
|
19 |
|
20 |
from statsmodels.tsa.seasonal import seasonal_decompose
|
21 |
from statsmodels.tsa.stattools import adfuller
|
|
|
25 |
|
26 |
from dotenv import load_dotenv
|
27 |
import requests
|
28 |
+
import openai # Added for OpenAI GPT-4 integration
|
29 |
import ast
|
30 |
+
import logging
|
31 |
|
32 |
# ---------------------- Load Environment Variables ---------------------------
|
33 |
load_dotenv()
|
34 |
|
35 |
+
# ---------------------- Logging Configuration ---------------------------
|
36 |
+
logging.basicConfig(
|
37 |
+
filename='app.log',
|
38 |
+
filemode='a',
|
39 |
+
format='%(asctime)s - %(levelname)s - %(message)s',
|
40 |
+
level=logging.INFO
|
41 |
+
)
|
42 |
+
logger = logging.getLogger()
|
43 |
+
|
44 |
# ---------------------- Streamlit Page Configuration ---------------------------
|
45 |
# This must be the first Streamlit command in the script
|
46 |
st.set_page_config(page_title="AI Clinical Intelligence Hub", layout="wide")
|
47 |
|
48 |
# ---------------------- Initialize External Clients ---------------------------
|
49 |
+
# Initialize OpenAI API details from environment variables
|
50 |
+
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
|
|
|
51 |
PUB_EMAIL = os.getenv("PUB_EMAIL", "")
|
52 |
|
53 |
+
if not OPENAI_API_KEY:
|
54 |
+
st.error("OpenAI API key must be set as an environment variable (OPENAI_API_KEY).")
|
55 |
st.stop()
|
56 |
|
57 |
+
openai.api_key = OPENAI_API_KEY
|
|
|
|
|
|
|
58 |
|
59 |
# Load spaCy model with error handling
|
60 |
try:
|
|
|
124 |
}
|
125 |
return analysis
|
126 |
except Exception as e:
|
127 |
+
logger.error(f"EDA Failed: {str(e)}")
|
128 |
return {"error": f"EDA Failed: {str(e)}"}
|
129 |
|
130 |
class DistributionVisualizer(DataAnalyzer):
|
|
|
145 |
plt.close()
|
146 |
return base64.b64encode(buf.getvalue()).decode()
|
147 |
except Exception as e:
|
148 |
+
logger.error(f"Visualization Error: {str(e)}")
|
149 |
return f"Visualization Error: {str(e)}"
|
150 |
|
151 |
class TemporalAnalyzer(DataAnalyzer):
|
|
|
174 |
"visualization": plot_data
|
175 |
}
|
176 |
except Exception as e:
|
177 |
+
logger.error(f"Temporal Analysis Failed: {str(e)}")
|
178 |
return {"error": f"Temporal Analysis Failed: {str(e)}"}
|
179 |
|
180 |
class HypothesisTester(DataAnalyzer):
|
|
|
205 |
"interpretation": self.interpret_p_value(p)
|
206 |
}
|
207 |
except Exception as e:
|
208 |
+
logger.error(f"Hypothesis Testing Failed: {str(e)}")
|
209 |
return {"error": f"Hypothesis Testing Failed: {str(e)}"}
|
210 |
|
211 |
@staticmethod
|
|
|
215 |
mean_diff = abs(x.mean() - y.mean())
|
216 |
pooled_std = np.sqrt((x.var() + y.var()) / 2)
|
217 |
return mean_diff / pooled_std
|
218 |
+
except Exception as e:
|
219 |
+
logger.error(f"Error calculating Cohen's d: {str(e)}")
|
220 |
return None
|
221 |
|
222 |
@staticmethod
|
|
|
252 |
"model_params": model.get_params()
|
253 |
}
|
254 |
except Exception as e:
|
255 |
+
logger.error(f"Logistic Regression Model Error: {str(e)}")
|
256 |
return {"error": f"Logistic Regression Model Error: {str(e)}"}
|
257 |
|
258 |
# ---------------------- Business Logic Layer ---------------------------
|
|
|
284 |
"severity": rule.severity if rule_matched else None
|
285 |
}
|
286 |
except Exception as e:
|
287 |
+
logger.error(f"Error executing rule '{rule_name}': {str(e)}")
|
288 |
results[rule_name] = {
|
289 |
"rule_matched": False,
|
290 |
"error": str(e),
|
|
|
310 |
raise ValueError(f"Unsupported expression: {expr}")
|
311 |
return eval(compile(node, '<string>', mode='eval'), {"__builtins__": None}, variables)
|
312 |
except Exception as e:
|
313 |
+
logger.error(f"safe_eval error: {str(e)}")
|
314 |
raise ValueError(f"Invalid expression: {e}")
|
315 |
|
316 |
class ClinicalKPI(BaseModel):
|
|
|
340 |
"status": status
|
341 |
}
|
342 |
except Exception as e:
|
343 |
+
logger.error(f"Error calculating KPI '{kpi_name}': {str(e)}")
|
344 |
results[kpi_name] = {"error": str(e)}
|
345 |
return results
|
346 |
|
|
|
371 |
raise ValueError(f"Unsupported expression: {expr}")
|
372 |
return eval(compile(node, '<string>', mode='eval'), {"__builtins__": None}, variables)
|
373 |
except Exception as e:
|
374 |
+
logger.error(f"safe_eval error: {str(e)}")
|
375 |
raise ValueError(f"Invalid expression: {e}")
|
376 |
|
377 |
class DiagnosisSupport(ABC):
|
|
|
412 |
diagnosis_key: [f"Diagnosis failed: {result.get('error', 'Unknown error')}"]
|
413 |
})
|
414 |
except Exception as e:
|
415 |
+
logger.error(f"Error during diagnosis: {str(e)}")
|
416 |
return pd.DataFrame({
|
417 |
diagnosis_key: [f"Error during diagnosis: {e}"]
|
418 |
})
|
|
|
441 |
**kwargs
|
442 |
) -> pd.DataFrame:
|
443 |
if condition_col not in data.columns or treatment_col not in data.columns:
|
444 |
+
logger.warning(f"Condition or Treatment columns not found: {condition_col}, {treatment_col}")
|
445 |
return pd.DataFrame({
|
446 |
recommendation_key: ["Condition or Treatment columns not found!"]
|
447 |
})
|
|
|
465 |
pass
|
466 |
|
467 |
class SimpleMedicalKnowledge(MedicalKnowledgeBase):
|
468 |
+
"""Enhanced Medical Knowledge Class using OpenAI GPT-4."""
|
469 |
def __init__(self):
|
470 |
+
self.nlp = nlp # Using the loaded spaCy model
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
471 |
|
472 |
def search_medical_info(self, query: str, pub_email: str = "") -> str:
|
473 |
"""
|
474 |
+
Uses OpenAI's GPT-4 to fetch medical information based on the user's query.
|
475 |
"""
|
476 |
+
logger.info(f"Received medical query: {query}")
|
477 |
try:
|
478 |
+
# Preprocess the query (e.g., entity recognition)
|
479 |
doc = self.nlp(query.lower())
|
480 |
entities = [ent.text for ent in doc.ents]
|
481 |
+
processed_query = " ".join(entities) if entities else query.lower()
|
482 |
+
|
483 |
+
logger.info(f"Processed query: {processed_query}")
|
484 |
+
|
485 |
+
# Create a prompt for GPT-4
|
486 |
+
prompt = f"""
|
487 |
+
You are a medical assistant. Provide a comprehensive and accurate response to the following medical query:
|
488 |
+
|
489 |
+
Query: {processed_query}
|
490 |
+
|
491 |
+
Please ensure the information is clear, concise, and evidence-based.
|
492 |
+
"""
|
493 |
+
|
494 |
+
# Make the API request to OpenAI GPT-4
|
495 |
+
response = openai.ChatCompletion.create(
|
496 |
+
model="gpt-4",
|
497 |
+
messages=[
|
498 |
+
{"role": "system", "content": "You are a helpful medical assistant."},
|
499 |
+
{"role": "user", "content": prompt}
|
500 |
+
],
|
501 |
+
max_tokens=500,
|
502 |
+
n=1,
|
503 |
+
stop=None,
|
504 |
+
temperature=0.7,
|
505 |
+
)
|
506 |
|
507 |
+
# Extract the answer from the response
|
508 |
+
answer = response.choices[0].message['content'].strip()
|
|
|
|
|
|
|
509 |
|
510 |
+
logger.info("Successfully retrieved data from OpenAI GPT-4.")
|
|
|
|
|
|
|
|
|
|
|
511 |
|
512 |
+
# Fetch PubMed abstract related to the query
|
513 |
+
pubmed_abstract = self.fetch_pubmed_abstract(processed_query, pub_email)
|
|
|
|
|
|
|
|
|
|
|
514 |
|
515 |
+
# Format the response
|
516 |
+
return f"**Based on your query:** {answer}\n\n**PubMed Abstract:**\n\n{pubmed_abstract}"
|
517 |
+
|
518 |
+
except openai.error.OpenAIError as e:
|
519 |
+
logger.error(f"OpenAI API Error: {str(e)}")
|
520 |
+
return f"OpenAI API Error: {str(e)}"
|
521 |
except Exception as e:
|
522 |
+
logger.error(f"Medical Knowledge Search Failed: {str(e)}")
|
523 |
return f"Medical Knowledge Search Failed: {str(e)}"
|
524 |
|
525 |
def fetch_pubmed_abstract(self, query: str, email: str) -> str:
|
|
|
527 |
Searches PubMed for abstracts related to the query.
|
528 |
"""
|
529 |
try:
|
530 |
+
if not email:
|
531 |
+
logger.warning("PubMed abstract retrieval skipped: Email not provided.")
|
532 |
+
return "No PubMed abstract available: Email not provided."
|
533 |
+
|
534 |
Entrez.email = email
|
535 |
handle = Entrez.esearch(db="pubmed", term=query, retmax=1, sort='relevance')
|
536 |
record = Entrez.read(handle)
|
537 |
handle.close()
|
538 |
+
logger.info(f"PubMed search for query '{query}' returned IDs: {record['IdList']}")
|
539 |
+
|
540 |
if record["IdList"]:
|
541 |
handle = Entrez.efetch(db="pubmed", id=record["IdList"][0], rettype="abstract", retmode="text")
|
542 |
abstract = handle.read()
|
543 |
handle.close()
|
544 |
+
logger.info(f"Fetched PubMed abstract for ID {record['IdList'][0]}")
|
545 |
return abstract
|
546 |
else:
|
547 |
+
logger.info(f"No PubMed abstracts found for query '{query}'.")
|
548 |
return "No abstracts found for this query on PubMed."
|
549 |
except Exception as e:
|
550 |
+
logger.error(f"Error searching PubMed: {e}")
|
551 |
return f"Error searching PubMed: {e}"
|
552 |
|
553 |
# ---------------------- Forecasting Engine ---------------------------
|
|
|
585 |
try:
|
586 |
results[name] = analyzer.invoke(data=data, **kwargs)
|
587 |
except Exception as e:
|
588 |
+
logger.error(f"Error in analysis '{name}': {str(e)}")
|
589 |
results[name] = {"error": str(e)}
|
590 |
else:
|
591 |
+
logger.warning(f"Analysis '{name}' not found.")
|
592 |
results[name] = {"error": "Analysis not found"}
|
593 |
return results
|
594 |
|