Uspark / symptom_assessment.py
PranayChamala's picture
initialized the first deployment
8dcd1f3
raw
history blame
1.59 kB
from sklearn.feature_extraction.text import TfidfVectorizer
class SymptomAssessment:
def __init__(self):
# Example disease-symptom mapping dictionary.
# In practice, replace this with a robust dataset.
self.disease_symptoms = {
"Flu": ["fever", "cough", "sore throat", "fatigue"],
"Migraine": ["headache", "nausea", "sensitivity to light"],
"COVID-19": ["fever", "cough", "shortness of breath", "loss of taste"]
}
# Prepare vector space for diseases
self.vectorizer = TfidfVectorizer()
self.diseases = list(self.disease_symptoms.keys())
symptom_texts = [" ".join(self.disease_symptoms[d]) for d in self.diseases]
self.vectors = self.vectorizer.fit_transform(symptom_texts)
def assess(self, symptoms_list):
"""
Given a list of reported symptoms, determine the best matching disease
and identify which expected symptoms are missing.
"""
input_text = " ".join(symptoms_list)
input_vector = self.vectorizer.transform([input_text])
similarities = (self.vectors * input_vector.T).toarray().flatten()
best_match_index = similarities.argmax()
best_disease = self.diseases[best_match_index]
missing_symptoms = list(set(self.disease_symptoms[best_disease]) - set(symptoms_list))
assessment = (f"Based on the input symptoms, {best_disease} is suspected. "
f"Missing symptoms for improved diagnosis: {missing_symptoms}")
return missing_symptoms, assessment