shukdevdatta123 commited on
Commit
1f6b8ca
·
verified ·
1 Parent(s): cdd48ba

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +102 -0
app.py ADDED
@@ -0,0 +1,102 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pandas as pd
2
+ import numpy as np
3
+ import streamlit as st
4
+ import faiss
5
+ from sentence_transformers import SentenceTransformer
6
+ from symspellpy import SymSpell, Verbosity
7
+
8
+ # ----------------------
9
+ # Data Preparation
10
+ # ----------------------
11
+ def preprocess_data(file_path):
12
+ # Load dataset
13
+ df = pd.read_csv(file_path)
14
+
15
+ # Combine multi-value columns
16
+ def combine_columns(row, prefix):
17
+ values = [str(row[col]) for col in df.columns if col.startswith(prefix) and pd.notna(row[col])]
18
+ return ', '.join(values)
19
+
20
+ df['uses'] = df.apply(lambda x: combine_columns(x, 'use'), axis=1)
21
+ df['substitutes'] = df.apply(lambda x: combine_columns(x, 'substitute'), axis=1)
22
+ df['side_effects'] = df.apply(lambda x: combine_columns(x, 'sideEffect'), axis=1)
23
+
24
+ # Clean text
25
+ text_columns = ['name', 'uses', 'Chemical Class', 'Therapeutic Class']
26
+ for col in text_columns:
27
+ df[col] = df[col].str.lower().str.replace('[^\w\s]', '', regex=True)
28
+
29
+ return df[['id', 'name', 'uses', 'substitutes', 'side_effects',
30
+ 'Habit Forming', 'Therapeutic Class', 'Action Class']]
31
+
32
+ # ----------------------
33
+ # Embedding & FAISS Setup
34
+ # ----------------------
35
+ def setup_faiss(df):
36
+ model = SentenceTransformer('all-MiniLM-L6-v2')
37
+ embeddings = model.encode(df['uses'].tolist(), show_progress_bar=True)
38
+
39
+ # Create FAISS index
40
+ dimension = embeddings.shape[1]
41
+ index = faiss.IndexFlatL2(dimension)
42
+ index.add(embeddings)
43
+ return model, index
44
+
45
+ # ----------------------
46
+ # Spelling Correction
47
+ # ----------------------
48
+ def setup_spell_checker():
49
+ sym_spell = SymSpell(max_dictionary_edit_distance=2, prefix_length=7)
50
+ sym_spell.load_dictionary('frequency_dictionary_en_82_765.txt',
51
+ term_index=0, count_index=1)
52
+ return sym_spell
53
+
54
+ # ----------------------
55
+ # Streamlit App
56
+ # ----------------------
57
+ def main():
58
+ st.title("🧬 MedSearch NLP: Medicine Recommender System")
59
+
60
+ # Load data and models
61
+ df = preprocess_data('medicine_dataset.csv')
62
+ model, faiss_index = setup_faiss(df)
63
+ sym_spell = setup_spell_checker()
64
+
65
+ # User input
66
+ query = st.text_input("Describe your symptoms or medical need:")
67
+ therapeutic_class = st.selectbox(
68
+ "Filter by Therapeutic Class (optional):",
69
+ ['All'] + sorted(df['Therapeutic Class'].dropna().unique().tolist())
70
+ )
71
+
72
+ if query:
73
+ # Spelling correction
74
+ suggestions = sym_spell.lookup(query, Verbosity.CLOSEST, max_edit_distance=2)
75
+ if suggestions:
76
+ query = suggestions[0].term
77
+ st.info(f"Did you mean: '{query}'?")
78
+
79
+ # Semantic search
80
+ query_embedding = model.encode([query])
81
+ D, I = faiss_index.search(query_embedding, k=5)
82
+
83
+ # Filter results
84
+ results = df.iloc[I[0]].copy()
85
+ if therapeutic_class != 'All':
86
+ results = results[results['Therapeutic Class'] == therapeutic_class]
87
+
88
+ # Display results
89
+ st.subheader("Recommended Medicines")
90
+ for _, row in results.iterrows():
91
+ with st.expander(f"💊 {row['name']}"):
92
+ cols = st.columns(3)
93
+ cols[0].write(f"**Uses:** {row['uses']}")
94
+ cols[1].write(f"**Substitutes:** {row['substitutes']}")
95
+ cols[2].write(f"**Side Effects:** {row['side_effects']}")
96
+
97
+ cols2 = st.columns(2)
98
+ cols2[0].write(f"Therapeutic Class: {row['Therapeutic Class']}")
99
+ cols2[1].write(f"Habit Forming: {row['Habit Forming']}")
100
+
101
+ if __name__ == "__main__":
102
+ main()