import streamlit as st import torch from sentence_transformers import SentenceTransformer, util from spellchecker import SpellChecker import pickle # Load the pre-trained SentenceTransformer model model = SentenceTransformer('neuml/pubmedbert-base-embeddings') # Load stored data with open("embeddings_1.pkl", "rb") as fIn: stored_data = pickle.load(fIn) stored_embeddings = stored_data["embeddings"] def check_misspelled_words(user_input): spell = SpellChecker() # Tokenize the input into words words = user_input.split() # Get a list of misspelled words misspelled = spell.unknown(words) return misspelled # Define the function for mapping code def mapping_code(user_input): if len(user_input.split()) < 5: # Check if sentence has less than 5 words raise ValueError("Input sentence should be at least 5 words long.") emb1 = model.encode(user_input.lower()) similarities = util.pytorch_cos_sim(emb1, stored_embeddings)[0] # Combine similarity scores with 'code' and 'description' result = [(code, description, float(sim)) for code, description, sim in zip(stored_data["SBS_code"], stored_data["Description"], similarities)] # Sort results by similarity scores result.sort(key=lambda x: x[2], reverse=True) # Return top 5 entries with 'code', 'description', and 'similarity_score' num_results = min(5, len(result)) top_5_results = [{"Code": code, "Description": description, "Similarity Score": sim} for code, description, sim in result[:num_results]] return top_5_results # Streamlit frontend interface def main(): st.title("CPT Description Mapping") # Input text box for user input user_input = st.text_input("Enter CPT description:", placeholder="Please enter a full description for better search results.") # Button to trigger mapping if st.button("Map"): if not user_input.strip(): # Check if input is empty or contains only whitespace st.error("Input box cannot be empty.") else: st.write("Please wait for a moment .... ") # Call backend function to get mapping results try: misspelled_words = check_misspelled_words(user_input) if misspelled_words: st.write("Please enter a detailed correct full description") st.write(misspelled_words) else: mapping_results = mapping_code(user_input) # Display top 5 similar sentences st.write("Top 5 similar sentences:") for i, result in enumerate(mapping_results, 1): st.write(f"{i}. Code: {result['Code']}, Description: {result['Description']}, Similarity Score: {result['Similarity Score']:.4f}") except ValueError as e: st.error(str(e)) if __name__ == "__main__": main()