ceejaytheanalyst's picture
Update app.py
e1b1664 verified
raw
history blame
2.92 kB
import streamlit as st
import torch
from sentence_transformers import SentenceTransformer, util
from spellchecker import SpellChecker
import pickle
# Load the pre-trained SentenceTransformer model
model = SentenceTransformer('neuml/pubmedbert-base-embeddings')
# Load stored data
with open("embeddings_1.pkl", "rb") as fIn:
stored_data = pickle.load(fIn)
stored_embeddings = stored_data["embeddings"]
def check_misspelled_words(user_input):
spell = SpellChecker()
# Tokenize the input into words
words = user_input.split()
# Get a list of misspelled words
misspelled = spell.unknown(words)
return misspelled
# Define the function for mapping code
def mapping_code(user_input):
if len(user_input.split()) < 5: # Check if sentence has less than 5 words
raise ValueError("Input sentence should be at least 5 words long.")
emb1 = model.encode(user_input.lower())
similarities = util.pytorch_cos_sim(emb1, stored_embeddings)[0]
# Combine similarity scores with 'code' and 'description'
result = [(code, description, float(sim)) for code, description, sim in zip(stored_data["SBS_code"], stored_data["Description"], similarities)]
# Sort results by similarity scores
result.sort(key=lambda x: x[2], reverse=True)
# Return top 5 entries with 'code', 'description', and 'similarity_score'
num_results = min(5, len(result))
top_5_results = [{"Code": code, "Description": description, "Similarity Score": sim} for code, description, sim in result[:num_results]]
return top_5_results
# Streamlit frontend interface
def main():
st.title("CPT Description Mapping")
# Input text box for user input
user_input = st.text_input("Enter CPT description:", placeholder="Please enter a full description for better search results.")
# Button to trigger mapping
if st.button("Map"):
if not user_input.strip(): # Check if input is empty or contains only whitespace
st.error("Input box cannot be empty.")
else:
st.write("Please wait for a moment .... ")
# Call backend function to get mapping results
try:
misspelled_words = check_misspelled_words(user_input)
if misspelled_words:
st.write("Please enter a detailed correct full description")
st.write(misspelled_words)
else:
mapping_results = mapping_code(user_input)
# Display top 5 similar sentences
st.write("Top 5 similar sentences:")
for i, result in enumerate(mapping_results, 1):
st.write(f"{i}. Code: {result['Code']}, Description: {result['Description']}, Similarity Score: {result['Similarity Score']:.4f}")
except ValueError as e:
st.error(str(e))
if __name__ == "__main__":
main()