Spaces:

ceejaytheanalyst
/

Insurance_code_mapping

Sleeping

App Files Files Community

ceejaytheanalyst commited on Apr 4, 2024

Commit

0c98cfe

verified ·

1 Parent(s): 0974b80

Update app.py

Browse files

Improved App version 2

Files changed (1) hide show

app.py +35 -40

app.py CHANGED Viewed

@@ -1,70 +1,65 @@
 import streamlit as st
 import torch
-from sentence_transformers import SentenceTransformer,util
-#from transformers import pipeline
-import pandas as pd
-import numpy as np
 import pickle
 # Load the pre-trained SentenceTransformer model
-#pipeline = pipeline(task="Sentence Similarity", model="all-MiniLM-L6-v2")
 model = SentenceTransformer('neuml/pubmedbert-base-embeddings')
-#sentence_embed = pd.read_csv('Reference_file.csv')
 with open("embeddings_1.pkl", "rb") as fIn:
     stored_data = pickle.load(fIn)
-    stored_code = stored_data["SBS_code"]
-    stored_sentences = stored_data["Description"]
     stored_embeddings = stored_data["embeddings"]
-import streamlit as st
 # Define the function for mapping code
 def mapping_code(user_input):
     emb1 = model.encode(user_input.lower())
-    similarities = []
-    for sentence in stored_embeddings:
-        similarity = util.cos_sim(sentence, emb1)
-        similarities.append(similarity)
     # Combine similarity scores with 'code' and 'description'
-    result = list(zip(stored_data["SBS_code"],stored_data["Description"], similarities))
     # Sort results by similarity scores
     result.sort(key=lambda x: x[2], reverse=True)
-    num_results = min(5, len(result))
     # Return top 5 entries with 'code', 'description', and 'similarity_score'
-    top_5_results = []
-    if num_results > 0:
-        for i in range(num_results):
-            code, description, similarity_score = result[i]
-            top_5_results.append({"Code": code, "Description": description, "Similarity Score": similarity_score})
-    else:
-        top_5_results.append({"Code": "", "Description": "No similar sentences found", "Similarity Score": 0.0})
     return top_5_results
 # Streamlit frontend interface
 def main():
     st.title("CPT Description Mapping")
     # Input text box for user input
-    user_input = st.text_input("Enter CPT description:")
     # Button to trigger mapping
     if st.button("Map"):
-        if user_input:
             st.write("Please wait for a moment .... ")
             # Call backend function to get mapping results
-            mapping_results = mapping_code(user_input)
-            # Display top 5 similar sentences
-            st.write("Top 5 similar sentences:")
-            for i, result in enumerate(mapping_results, 1):
-                st.write(f"{i}. Code: {result['Code']}, Description: {result['Description']}, Similarity Score: {float(result['Similarity Score']):.4f}")
 if __name__ == "__main__":
     main()

 import streamlit as st
 import torch
+from sentence_transformers import SentenceTransformer, util
+from textblob import SpellChecker
 import pickle
 # Load the pre-trained SentenceTransformer model
 model = SentenceTransformer('neuml/pubmedbert-base-embeddings')
+# Load stored data
 with open("embeddings_1.pkl", "rb") as fIn:
     stored_data = pickle.load(fIn)
     stored_embeddings = stored_data["embeddings"]
+def check_misspelled_words(user_input):
+    spell = SpellChecker()
+    # Tokenize the input into words
+    words = user_input.split()
+    # Get a list of misspelled words
+    misspelled = spell.unknown(words)
+    return misspelled
 # Define the function for mapping code
 def mapping_code(user_input):
+    if len(user_input.split()) < 5:  # Check if sentence has less than 5 words
+        raise ValueError("Input sentence should be at least 5 words long.")
     emb1 = model.encode(user_input.lower())
+    similarities = util.pytorch_cos_sim(emb1, stored_embeddings)[0]
     # Combine similarity scores with 'code' and 'description'
+    result = [(code, description, float(sim)) for code, description, sim in zip(stored_data["SBS_code"], stored_data["Description"], similarities)]
     # Sort results by similarity scores
     result.sort(key=lambda x: x[2], reverse=True)
     # Return top 5 entries with 'code', 'description', and 'similarity_score'
+    num_results = min(5, len(result))
+    top_5_results = [{"Code": code, "Description": description, "Similarity Score": sim} for code, description, sim in result[:num_results]]
     return top_5_results
 # Streamlit frontend interface
 def main():
     st.title("CPT Description Mapping")
     # Input text box for user input
+    user_input = st.text_input("Enter CPT description:", placeholder="Please enter a full description for better search results.")
     # Button to trigger mapping
     if st.button("Map"):
+        if not user_input.strip():  # Check if input is empty or contains only whitespace
+            st.error("Input box cannot be empty.")
+        else:
             st.write("Please wait for a moment .... ")
             # Call backend function to get mapping results
+            try:
+                misspelled_words = check_misspelled_words(user_input)
+                if misspelled_words:
+                    st.write("Please enter a detailed correct full description")
+                    st.write(misspelled_words)
+                else:
+                    mapping_results = mapping_code(user_input)
+                    # Display top 5 similar sentences
+                    st.write("Top 5 similar sentences:")
+                    for i, result in enumerate(mapping_results, 1):
+                        st.write(f"{i}. Code: {result['Code']}, Description: {result['Description']}, Similarity Score: {result['Similarity Score']:.4f}")
+            except ValueError as e:
+                st.error(str(e))
 if __name__ == "__main__":
     main()