import pandas as pd import json import numpy as np import faiss from sklearn.feature_extraction.text import TfidfVectorizer from transformers import pipeline # ------------------------------- # Load disease data and preprocess # ------------------------------- def load_disease_data(csv_path): df = pd.read_csv(csv_path) df.columns = df.columns.str.strip().str.lower() df = df.fillna("") disease_symptoms = {} disease_precautions = {} for _, row in df.iterrows(): disease = row["disease"].strip() symptoms = [s.strip().lower() for s in row["symptoms"].split(",") if s.strip()] precautions = [p.strip() for p in row["precautions"].split(",") if p.strip()] disease_symptoms[disease] = symptoms disease_precautions[disease] = precautions return disease_symptoms, disease_precautions # Load CSV data (ensure this CSV file is in the repository root) disease_symptoms, disease_precautions = load_disease_data("disease_sympts_prec_full.csv") known_symptoms = set() for syms in disease_symptoms.values(): known_symptoms.update(syms) # ------------------------------- # Build symptom vectorizer and FAISS index # ------------------------------- vectorizer = TfidfVectorizer() symptom_texts = [" ".join(symptoms) for symptoms in disease_symptoms.values()] tfidf_matrix = vectorizer.fit_transform(symptom_texts).toarray() index = faiss.IndexFlatL2(tfidf_matrix.shape[1]) index.add(np.array(tfidf_matrix, dtype=np.float32)) disease_list = list(disease_symptoms.keys()) def find_closest_disease(user_symptoms): if not user_symptoms: return None user_vector = vectorizer.transform([" ".join(user_symptoms)]).toarray().astype("float32") distances, indices = index.search(user_vector, k=1) return disease_list[indices[0][0]] # ------------------------------- # Load Medical NER model for symptom extraction # ------------------------------- medical_ner = pipeline( "ner", model="blaze999/Medical-NER", tokenizer="blaze999/Medical-NER", aggregation_strategy="simple" ) def extract_symptoms_ner(text): results = medical_ner(text) extracted = [] for r in results: if "SIGN_SYMPTOM" in r["entity_group"]: extracted.append(r["word"].lower()) return list(set(extracted)) def is_affirmative(answer): answer_lower = answer.lower() return any(word in answer_lower for word in ["yes", "yeah", "yep", "certainly", "sometimes", "a little"]) import random # ------------------------------- # Chatbot session class # ------------------------------- class ChatbotSession: def __init__(self): self.conversation_history = [] self.reported_symptoms = set() self.symptom_details = {} # New: Track details per symptom self.asked_missing = set() self.awaiting_followup = None self.awaiting_detail = None self.current_detail_symptom = None self.state = "symptom_collection" self.finished = False self.predicted_disease = None greeting = "Doctor: Hello, I am your virtual doctor. What symptoms are you experiencing today?" self.conversation_history.append(greeting) def process_message(self, message: str) -> str: if self.finished: return "Doctor: Thank you. Our session has ended." if self.state == "symptom_collection": return self._handle_symptom_collection(message) if self.state == "symptom_detail": return self._handle_symptom_detail(message) if self.state == "pain_check": return self._handle_pain_check(message) if self.state == "medications": return self._handle_medications(message) return "Doctor: Could you please clarify?" def _handle_symptom_collection(self, message): if message.lower() in ["exit", "quit", "no"]: if not self.reported_symptoms: goodbye = "Doctor: It seems no symptoms were reported. Ending the session." self.finished = True return goodbye else: self.predicted_disease = find_closest_disease(list(self.reported_symptoms)) self.state = "pain_check" return f"Doctor: Before proceeding, are you experiencing any pain? If yes, please rate it 1-10 or type 'no'." # Extract symptoms ner_results = extract_symptoms_ner(message) if ner_results: for sym in ner_results: if sym not in self.reported_symptoms: self.reported_symptoms.add(sym) self.symptom_details[sym] = {} # Update disease prediction self.predicted_disease = find_closest_disease(list(self.reported_symptoms)) if self.predicted_disease: expected = set(disease_symptoms.get(self.predicted_disease, [])) missing = expected - self.reported_symptoms not_asked = missing - self.asked_missing if not_asked: symptom_to_ask = list(not_asked)[0] self.awaiting_followup = symptom_to_ask return f"Doctor: Are you also experiencing {symptom_to_ask}?" # If all covered, ask symptom details if self.reported_symptoms: symptom = random.choice(list(self.reported_symptoms)) self.current_detail_symptom = symptom self.state = "symptom_detail" return f"Doctor: About your '{symptom}', when did it start? (duration)" else: return "Doctor: I couldn't detect any medical symptoms. Could you describe it differently?" return "Doctor: Do you have any more symptoms to share?" def _handle_symptom_detail(self, message): if self.current_detail_symptom and 'duration' not in self.symptom_details[self.current_detail_symptom]: self.symptom_details[self.current_detail_symptom]['duration'] = message return f"Doctor: How severe is the '{self.current_detail_symptom}'? (mild/moderate/severe)" if self.current_detail_symptom and 'severity' not in self.symptom_details[self.current_detail_symptom]: self.symptom_details[self.current_detail_symptom]['severity'] = message return f"Doctor: Where exactly do you feel the '{self.current_detail_symptom}' (body part/location)?" if self.current_detail_symptom and 'location' not in self.symptom_details[self.current_detail_symptom]: self.symptom_details[self.current_detail_symptom]['location'] = message self.state = "symptom_collection" return "Doctor: Thank you. Any other symptoms you'd like to mention?" return "Doctor: Please clarify." def _handle_pain_check(self, message): try: pain_level = int(message) self.symptom_details['pain'] = {'severity': pain_level} except ValueError: self.symptom_details['pain'] = {'severity': message} self.state = "medications" return "Doctor: Have you taken any medications recently? Please mention them or type 'no'." def _handle_medications(self, message): self.symptom_details['medications'] = message if message.lower() not in ["no", "none"] else "None" self.finished = True summary = self._generate_summary() return summary def _generate_summary(self): report = "\n".join([ f"- {sym.title()}: {details}" if isinstance(details, dict) else f"- {sym.title()}: {details}" for sym, details in self.symptom_details.items() ]) disease_part = f"\n\nDoctor: Based on your symptoms, the most likely condition could be *{self.predicted_disease}*." if self.predicted_disease else "" advice = "\n\nDoctor: Please note this is a preliminary assessment. Kindly consult a physician for a detailed diagnosis." return f"Doctor: Thank you for the information! Here's a summary of your case:\n{report}{disease_part}{advice}" def get_data(self): return { "conversation": self.conversation_history, "symptoms": list(self.reported_symptoms), "symptom_details": self.symptom_details, "predicted_disease": self.predicted_disease }