mgbam commited on
Commit
0824d84
·
verified ·
1 Parent(s): bdbf8f3

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +82 -54
app.py CHANGED
@@ -16,8 +16,6 @@ from scipy.stats import ttest_ind, f_oneway
16
  from sklearn.model_selection import train_test_split
17
  from sklearn.linear_model import LogisticRegression
18
  from sklearn.metrics import accuracy_score
19
- from sklearn.feature_extraction.text import TfidfVectorizer
20
- from sklearn.metrics.pairwise import cosine_similarity
21
 
22
  from statsmodels.tsa.seasonal import seasonal_decompose
23
  from statsmodels.tsa.stattools import adfuller
@@ -27,29 +25,36 @@ from Bio import Entrez # Ensure BioPython is installed
27
 
28
  from dotenv import load_dotenv
29
  import requests
 
30
  import ast
 
31
 
32
  # ---------------------- Load Environment Variables ---------------------------
33
  load_dotenv()
34
 
 
 
 
 
 
 
 
 
 
35
  # ---------------------- Streamlit Page Configuration ---------------------------
36
  # This must be the first Streamlit command in the script
37
  st.set_page_config(page_title="AI Clinical Intelligence Hub", layout="wide")
38
 
39
  # ---------------------- Initialize External Clients ---------------------------
40
- # Initialize Groq Client with API Key from environment variables
41
- GROQ_API_ENDPOINT = os.getenv("GROQ_API_ENDPOINT")
42
- GROQ_API_KEY = os.getenv("GROQ_API_KEY")
43
  PUB_EMAIL = os.getenv("PUB_EMAIL", "")
44
 
45
- if not GROQ_API_ENDPOINT or not GROQ_API_KEY:
46
- st.error("Groq API endpoint and key must be set as environment variables.")
47
  st.stop()
48
 
49
- headers = {
50
- "Authorization": f"Bearer {GROQ_API_KEY}",
51
- "Content-Type": "application/json"
52
- }
53
 
54
  # Load spaCy model with error handling
55
  try:
@@ -119,6 +124,7 @@ class AdvancedEDA(DataAnalyzer):
119
  }
120
  return analysis
121
  except Exception as e:
 
122
  return {"error": f"EDA Failed: {str(e)}"}
123
 
124
  class DistributionVisualizer(DataAnalyzer):
@@ -139,6 +145,7 @@ class DistributionVisualizer(DataAnalyzer):
139
  plt.close()
140
  return base64.b64encode(buf.getvalue()).decode()
141
  except Exception as e:
 
142
  return f"Visualization Error: {str(e)}"
143
 
144
  class TemporalAnalyzer(DataAnalyzer):
@@ -167,6 +174,7 @@ class TemporalAnalyzer(DataAnalyzer):
167
  "visualization": plot_data
168
  }
169
  except Exception as e:
 
170
  return {"error": f"Temporal Analysis Failed: {str(e)}"}
171
 
172
  class HypothesisTester(DataAnalyzer):
@@ -197,6 +205,7 @@ class HypothesisTester(DataAnalyzer):
197
  "interpretation": self.interpret_p_value(p)
198
  }
199
  except Exception as e:
 
200
  return {"error": f"Hypothesis Testing Failed: {str(e)}"}
201
 
202
  @staticmethod
@@ -206,7 +215,8 @@ class HypothesisTester(DataAnalyzer):
206
  mean_diff = abs(x.mean() - y.mean())
207
  pooled_std = np.sqrt((x.var() + y.var()) / 2)
208
  return mean_diff / pooled_std
209
- except Exception:
 
210
  return None
211
 
212
  @staticmethod
@@ -242,6 +252,7 @@ class LogisticRegressionTrainer(DataAnalyzer):
242
  "model_params": model.get_params()
243
  }
244
  except Exception as e:
 
245
  return {"error": f"Logistic Regression Model Error: {str(e)}"}
246
 
247
  # ---------------------- Business Logic Layer ---------------------------
@@ -273,6 +284,7 @@ class ClinicalRulesEngine:
273
  "severity": rule.severity if rule_matched else None
274
  }
275
  except Exception as e:
 
276
  results[rule_name] = {
277
  "rule_matched": False,
278
  "error": str(e),
@@ -298,6 +310,7 @@ class ClinicalRulesEngine:
298
  raise ValueError(f"Unsupported expression: {expr}")
299
  return eval(compile(node, '<string>', mode='eval'), {"__builtins__": None}, variables)
300
  except Exception as e:
 
301
  raise ValueError(f"Invalid expression: {e}")
302
 
303
  class ClinicalKPI(BaseModel):
@@ -327,6 +340,7 @@ class ClinicalKPIMonitoring:
327
  "status": status
328
  }
329
  except Exception as e:
 
330
  results[kpi_name] = {"error": str(e)}
331
  return results
332
 
@@ -357,6 +371,7 @@ class ClinicalKPIMonitoring:
357
  raise ValueError(f"Unsupported expression: {expr}")
358
  return eval(compile(node, '<string>', mode='eval'), {"__builtins__": None}, variables)
359
  except Exception as e:
 
360
  raise ValueError(f"Invalid expression: {e}")
361
 
362
  class DiagnosisSupport(ABC):
@@ -397,6 +412,7 @@ class SimpleDiagnosis(DiagnosisSupport):
397
  diagnosis_key: [f"Diagnosis failed: {result.get('error', 'Unknown error')}"]
398
  })
399
  except Exception as e:
 
400
  return pd.DataFrame({
401
  diagnosis_key: [f"Error during diagnosis: {e}"]
402
  })
@@ -425,6 +441,7 @@ class BasicTreatmentRecommendation(TreatmentRecommendation):
425
  **kwargs
426
  ) -> pd.DataFrame:
427
  if condition_col not in data.columns or treatment_col not in data.columns:
 
428
  return pd.DataFrame({
429
  recommendation_key: ["Condition or Treatment columns not found!"]
430
  })
@@ -448,61 +465,61 @@ class MedicalKnowledgeBase(ABC):
448
  pass
449
 
450
  class SimpleMedicalKnowledge(MedicalKnowledgeBase):
451
- """Enhanced Medical Knowledge Class using Groq API."""
452
  def __init__(self):
453
- self.api_endpoint = GROQ_API_ENDPOINT
454
- self.api_key = GROQ_API_KEY
455
- self.pub_email = PUB_EMAIL
456
-
457
- self.headers = {
458
- "Authorization": f"Bearer {self.api_key}",
459
- "Content-Type": "application/json"
460
- }
461
-
462
- # Initialize spaCy model for entity recognition if needed
463
- try:
464
- self.nlp = spacy.load("en_core_web_sm")
465
- except OSError:
466
- import subprocess
467
- import sys
468
- subprocess.run([sys.executable, "-m", "spacy", "download", "en_core_web_sm"])
469
- self.nlp = spacy.load("en_core_web_sm")
470
 
471
  def search_medical_info(self, query: str, pub_email: str = "") -> str:
472
  """
473
- Uses the Groq API to fetch medical information based on the user's query.
474
  """
 
475
  try:
476
- # Preprocess the query if necessary (e.g., entity recognition)
477
  doc = self.nlp(query.lower())
478
  entities = [ent.text for ent in doc.ents]
479
- if entities:
480
- processed_query = " ".join(entities)
481
- else:
482
- processed_query = query.lower()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
483
 
484
- # Prepare the payload for the Groq API
485
- payload = {
486
- "query": processed_query,
487
- "context": "medical" # Assuming the API can handle context specification
488
- }
489
 
490
- # Make the API request
491
- response = requests.post(
492
- self.api_endpoint,
493
- headers=self.headers,
494
- data=json.dumps(payload)
495
- )
496
 
497
- if response.status_code == 200:
498
- data = response.json()
499
- answer = data.get("answer", "I'm sorry, I couldn't find relevant information.")
500
- pubmed_abstract = self.fetch_pubmed_abstract(processed_query, pub_email)
501
- return f"**Based on your query:** {answer}\n\n**PubMed Abstract:**\n\n{pubmed_abstract}"
502
- else:
503
- return f"Error: Received status code {response.status_code} from Groq API."
504
 
 
 
 
 
 
 
505
  except Exception as e:
 
506
  return f"Medical Knowledge Search Failed: {str(e)}"
507
 
508
  def fetch_pubmed_abstract(self, query: str, email: str) -> str:
@@ -510,18 +527,27 @@ class SimpleMedicalKnowledge(MedicalKnowledgeBase):
510
  Searches PubMed for abstracts related to the query.
511
  """
512
  try:
 
 
 
 
513
  Entrez.email = email
514
  handle = Entrez.esearch(db="pubmed", term=query, retmax=1, sort='relevance')
515
  record = Entrez.read(handle)
516
  handle.close()
 
 
517
  if record["IdList"]:
518
  handle = Entrez.efetch(db="pubmed", id=record["IdList"][0], rettype="abstract", retmode="text")
519
  abstract = handle.read()
520
  handle.close()
 
521
  return abstract
522
  else:
 
523
  return "No abstracts found for this query on PubMed."
524
  except Exception as e:
 
525
  return f"Error searching PubMed: {e}"
526
 
527
  # ---------------------- Forecasting Engine ---------------------------
@@ -559,8 +585,10 @@ class AutomatedInsights:
559
  try:
560
  results[name] = analyzer.invoke(data=data, **kwargs)
561
  except Exception as e:
 
562
  results[name] = {"error": str(e)}
563
  else:
 
564
  results[name] = {"error": "Analysis not found"}
565
  return results
566
 
 
16
  from sklearn.model_selection import train_test_split
17
  from sklearn.linear_model import LogisticRegression
18
  from sklearn.metrics import accuracy_score
 
 
19
 
20
  from statsmodels.tsa.seasonal import seasonal_decompose
21
  from statsmodels.tsa.stattools import adfuller
 
25
 
26
  from dotenv import load_dotenv
27
  import requests
28
+ import openai # Added for OpenAI GPT-4 integration
29
  import ast
30
+ import logging
31
 
32
  # ---------------------- Load Environment Variables ---------------------------
33
  load_dotenv()
34
 
35
+ # ---------------------- Logging Configuration ---------------------------
36
+ logging.basicConfig(
37
+ filename='app.log',
38
+ filemode='a',
39
+ format='%(asctime)s - %(levelname)s - %(message)s',
40
+ level=logging.INFO
41
+ )
42
+ logger = logging.getLogger()
43
+
44
  # ---------------------- Streamlit Page Configuration ---------------------------
45
  # This must be the first Streamlit command in the script
46
  st.set_page_config(page_title="AI Clinical Intelligence Hub", layout="wide")
47
 
48
  # ---------------------- Initialize External Clients ---------------------------
49
+ # Initialize OpenAI API details from environment variables
50
+ OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
 
51
  PUB_EMAIL = os.getenv("PUB_EMAIL", "")
52
 
53
+ if not OPENAI_API_KEY:
54
+ st.error("OpenAI API key must be set as an environment variable (OPENAI_API_KEY).")
55
  st.stop()
56
 
57
+ openai.api_key = OPENAI_API_KEY
 
 
 
58
 
59
  # Load spaCy model with error handling
60
  try:
 
124
  }
125
  return analysis
126
  except Exception as e:
127
+ logger.error(f"EDA Failed: {str(e)}")
128
  return {"error": f"EDA Failed: {str(e)}"}
129
 
130
  class DistributionVisualizer(DataAnalyzer):
 
145
  plt.close()
146
  return base64.b64encode(buf.getvalue()).decode()
147
  except Exception as e:
148
+ logger.error(f"Visualization Error: {str(e)}")
149
  return f"Visualization Error: {str(e)}"
150
 
151
  class TemporalAnalyzer(DataAnalyzer):
 
174
  "visualization": plot_data
175
  }
176
  except Exception as e:
177
+ logger.error(f"Temporal Analysis Failed: {str(e)}")
178
  return {"error": f"Temporal Analysis Failed: {str(e)}"}
179
 
180
  class HypothesisTester(DataAnalyzer):
 
205
  "interpretation": self.interpret_p_value(p)
206
  }
207
  except Exception as e:
208
+ logger.error(f"Hypothesis Testing Failed: {str(e)}")
209
  return {"error": f"Hypothesis Testing Failed: {str(e)}"}
210
 
211
  @staticmethod
 
215
  mean_diff = abs(x.mean() - y.mean())
216
  pooled_std = np.sqrt((x.var() + y.var()) / 2)
217
  return mean_diff / pooled_std
218
+ except Exception as e:
219
+ logger.error(f"Error calculating Cohen's d: {str(e)}")
220
  return None
221
 
222
  @staticmethod
 
252
  "model_params": model.get_params()
253
  }
254
  except Exception as e:
255
+ logger.error(f"Logistic Regression Model Error: {str(e)}")
256
  return {"error": f"Logistic Regression Model Error: {str(e)}"}
257
 
258
  # ---------------------- Business Logic Layer ---------------------------
 
284
  "severity": rule.severity if rule_matched else None
285
  }
286
  except Exception as e:
287
+ logger.error(f"Error executing rule '{rule_name}': {str(e)}")
288
  results[rule_name] = {
289
  "rule_matched": False,
290
  "error": str(e),
 
310
  raise ValueError(f"Unsupported expression: {expr}")
311
  return eval(compile(node, '<string>', mode='eval'), {"__builtins__": None}, variables)
312
  except Exception as e:
313
+ logger.error(f"safe_eval error: {str(e)}")
314
  raise ValueError(f"Invalid expression: {e}")
315
 
316
  class ClinicalKPI(BaseModel):
 
340
  "status": status
341
  }
342
  except Exception as e:
343
+ logger.error(f"Error calculating KPI '{kpi_name}': {str(e)}")
344
  results[kpi_name] = {"error": str(e)}
345
  return results
346
 
 
371
  raise ValueError(f"Unsupported expression: {expr}")
372
  return eval(compile(node, '<string>', mode='eval'), {"__builtins__": None}, variables)
373
  except Exception as e:
374
+ logger.error(f"safe_eval error: {str(e)}")
375
  raise ValueError(f"Invalid expression: {e}")
376
 
377
  class DiagnosisSupport(ABC):
 
412
  diagnosis_key: [f"Diagnosis failed: {result.get('error', 'Unknown error')}"]
413
  })
414
  except Exception as e:
415
+ logger.error(f"Error during diagnosis: {str(e)}")
416
  return pd.DataFrame({
417
  diagnosis_key: [f"Error during diagnosis: {e}"]
418
  })
 
441
  **kwargs
442
  ) -> pd.DataFrame:
443
  if condition_col not in data.columns or treatment_col not in data.columns:
444
+ logger.warning(f"Condition or Treatment columns not found: {condition_col}, {treatment_col}")
445
  return pd.DataFrame({
446
  recommendation_key: ["Condition or Treatment columns not found!"]
447
  })
 
465
  pass
466
 
467
  class SimpleMedicalKnowledge(MedicalKnowledgeBase):
468
+ """Enhanced Medical Knowledge Class using OpenAI GPT-4."""
469
  def __init__(self):
470
+ self.nlp = nlp # Using the loaded spaCy model
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
471
 
472
  def search_medical_info(self, query: str, pub_email: str = "") -> str:
473
  """
474
+ Uses OpenAI's GPT-4 to fetch medical information based on the user's query.
475
  """
476
+ logger.info(f"Received medical query: {query}")
477
  try:
478
+ # Preprocess the query (e.g., entity recognition)
479
  doc = self.nlp(query.lower())
480
  entities = [ent.text for ent in doc.ents]
481
+ processed_query = " ".join(entities) if entities else query.lower()
482
+
483
+ logger.info(f"Processed query: {processed_query}")
484
+
485
+ # Create a prompt for GPT-4
486
+ prompt = f"""
487
+ You are a medical assistant. Provide a comprehensive and accurate response to the following medical query:
488
+
489
+ Query: {processed_query}
490
+
491
+ Please ensure the information is clear, concise, and evidence-based.
492
+ """
493
+
494
+ # Make the API request to OpenAI GPT-4
495
+ response = openai.ChatCompletion.create(
496
+ model="gpt-4",
497
+ messages=[
498
+ {"role": "system", "content": "You are a helpful medical assistant."},
499
+ {"role": "user", "content": prompt}
500
+ ],
501
+ max_tokens=500,
502
+ n=1,
503
+ stop=None,
504
+ temperature=0.7,
505
+ )
506
 
507
+ # Extract the answer from the response
508
+ answer = response.choices[0].message['content'].strip()
 
 
 
509
 
510
+ logger.info("Successfully retrieved data from OpenAI GPT-4.")
 
 
 
 
 
511
 
512
+ # Fetch PubMed abstract related to the query
513
+ pubmed_abstract = self.fetch_pubmed_abstract(processed_query, pub_email)
 
 
 
 
 
514
 
515
+ # Format the response
516
+ return f"**Based on your query:** {answer}\n\n**PubMed Abstract:**\n\n{pubmed_abstract}"
517
+
518
+ except openai.error.OpenAIError as e:
519
+ logger.error(f"OpenAI API Error: {str(e)}")
520
+ return f"OpenAI API Error: {str(e)}"
521
  except Exception as e:
522
+ logger.error(f"Medical Knowledge Search Failed: {str(e)}")
523
  return f"Medical Knowledge Search Failed: {str(e)}"
524
 
525
  def fetch_pubmed_abstract(self, query: str, email: str) -> str:
 
527
  Searches PubMed for abstracts related to the query.
528
  """
529
  try:
530
+ if not email:
531
+ logger.warning("PubMed abstract retrieval skipped: Email not provided.")
532
+ return "No PubMed abstract available: Email not provided."
533
+
534
  Entrez.email = email
535
  handle = Entrez.esearch(db="pubmed", term=query, retmax=1, sort='relevance')
536
  record = Entrez.read(handle)
537
  handle.close()
538
+ logger.info(f"PubMed search for query '{query}' returned IDs: {record['IdList']}")
539
+
540
  if record["IdList"]:
541
  handle = Entrez.efetch(db="pubmed", id=record["IdList"][0], rettype="abstract", retmode="text")
542
  abstract = handle.read()
543
  handle.close()
544
+ logger.info(f"Fetched PubMed abstract for ID {record['IdList'][0]}")
545
  return abstract
546
  else:
547
+ logger.info(f"No PubMed abstracts found for query '{query}'.")
548
  return "No abstracts found for this query on PubMed."
549
  except Exception as e:
550
+ logger.error(f"Error searching PubMed: {e}")
551
  return f"Error searching PubMed: {e}"
552
 
553
  # ---------------------- Forecasting Engine ---------------------------
 
585
  try:
586
  results[name] = analyzer.invoke(data=data, **kwargs)
587
  except Exception as e:
588
+ logger.error(f"Error in analysis '{name}': {str(e)}")
589
  results[name] = {"error": str(e)}
590
  else:
591
+ logger.warning(f"Analysis '{name}' not found.")
592
  results[name] = {"error": "Analysis not found"}
593
  return results
594