Spaces:

ceejaytheanalyst
/

Insurance_code_mapping

Running

File size: 2,918 Bytes

0974b80
 
0c98cfe
e1b1664
0974b80
 
 
 
0c98cfe
 
0974b80
 
 
 
0c98cfe
 
 
 
 
 
 
0974b80
 
 
0c98cfe
 
0974b80
0c98cfe
0974b80
0c98cfe
0974b80
 
 
0c98cfe
 
0974b80
0c98cfe
0974b80
 
 
 
0c98cfe
0974b80
 
0c98cfe
 
 
0974b80
 
0c98cfe
 
 
 
 
 
 
 
 
 
 
 
 
0974b80

import streamlit as st
import torch
from sentence_transformers import SentenceTransformer, util
from spellchecker import SpellChecker
import pickle

# Load the pre-trained SentenceTransformer model
model = SentenceTransformer('neuml/pubmedbert-base-embeddings')

# Load stored data
with open("embeddings_1.pkl", "rb") as fIn:
    stored_data = pickle.load(fIn)
    stored_embeddings = stored_data["embeddings"]

def check_misspelled_words(user_input):
    spell = SpellChecker()
    # Tokenize the input into words
    words = user_input.split()
    # Get a list of misspelled words
    misspelled = spell.unknown(words)
    return misspelled

# Define the function for mapping code
def mapping_code(user_input):
    if len(user_input.split()) < 5:  # Check if sentence has less than 5 words
        raise ValueError("Input sentence should be at least 5 words long.")
    emb1 = model.encode(user_input.lower())
    similarities = util.pytorch_cos_sim(emb1, stored_embeddings)[0]
    # Combine similarity scores with 'code' and 'description'
    result = [(code, description, float(sim)) for code, description, sim in zip(stored_data["SBS_code"], stored_data["Description"], similarities)]
    # Sort results by similarity scores
    result.sort(key=lambda x: x[2], reverse=True)
    # Return top 5 entries with 'code', 'description', and 'similarity_score'
    num_results = min(5, len(result))
    top_5_results = [{"Code": code, "Description": description, "Similarity Score": sim} for code, description, sim in result[:num_results]]
    return top_5_results

# Streamlit frontend interface
def main():
    st.title("CPT Description Mapping")
    # Input text box for user input
    user_input = st.text_input("Enter CPT description:", placeholder="Please enter a full description for better search results.")
    # Button to trigger mapping
    if st.button("Map"):
        if not user_input.strip():  # Check if input is empty or contains only whitespace
            st.error("Input box cannot be empty.")
        else:
            st.write("Please wait for a moment .... ")
            # Call backend function to get mapping results
            try:
                misspelled_words = check_misspelled_words(user_input)
                if misspelled_words:
                    st.write("Please enter a detailed correct full description")
                    st.write(misspelled_words)
                else:
                    mapping_results = mapping_code(user_input)
                    # Display top 5 similar sentences
                    st.write("Top 5 similar sentences:")
                    for i, result in enumerate(mapping_results, 1):
                        st.write(f"{i}. Code: {result['Code']}, Description: {result['Description']}, Similarity Score: {result['Similarity Score']:.4f}")
            except ValueError as e:
                st.error(str(e))

if __name__ == "__main__":
    main()