File size: 2,918 Bytes
0974b80
 
0c98cfe
e1b1664
0974b80
 
 
 
0c98cfe
 
0974b80
 
 
 
0c98cfe
 
 
 
 
 
 
0974b80
 
 
0c98cfe
 
0974b80
0c98cfe
0974b80
0c98cfe
0974b80
 
 
0c98cfe
 
0974b80
0c98cfe
0974b80
 
 
 
0c98cfe
0974b80
 
0c98cfe
 
 
0974b80
 
0c98cfe
 
 
 
 
 
 
 
 
 
 
 
 
0974b80
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
import streamlit as st
import torch
from sentence_transformers import SentenceTransformer, util
from spellchecker import SpellChecker
import pickle

# Load the pre-trained SentenceTransformer model
model = SentenceTransformer('neuml/pubmedbert-base-embeddings')

# Load stored data
with open("embeddings_1.pkl", "rb") as fIn:
    stored_data = pickle.load(fIn)
    stored_embeddings = stored_data["embeddings"]

def check_misspelled_words(user_input):
    spell = SpellChecker()
    # Tokenize the input into words
    words = user_input.split()
    # Get a list of misspelled words
    misspelled = spell.unknown(words)
    return misspelled

# Define the function for mapping code
def mapping_code(user_input):
    if len(user_input.split()) < 5:  # Check if sentence has less than 5 words
        raise ValueError("Input sentence should be at least 5 words long.")
    emb1 = model.encode(user_input.lower())
    similarities = util.pytorch_cos_sim(emb1, stored_embeddings)[0]
    # Combine similarity scores with 'code' and 'description'
    result = [(code, description, float(sim)) for code, description, sim in zip(stored_data["SBS_code"], stored_data["Description"], similarities)]
    # Sort results by similarity scores
    result.sort(key=lambda x: x[2], reverse=True)
    # Return top 5 entries with 'code', 'description', and 'similarity_score'
    num_results = min(5, len(result))
    top_5_results = [{"Code": code, "Description": description, "Similarity Score": sim} for code, description, sim in result[:num_results]]
    return top_5_results

# Streamlit frontend interface
def main():
    st.title("CPT Description Mapping")
    # Input text box for user input
    user_input = st.text_input("Enter CPT description:", placeholder="Please enter a full description for better search results.")
    # Button to trigger mapping
    if st.button("Map"):
        if not user_input.strip():  # Check if input is empty or contains only whitespace
            st.error("Input box cannot be empty.")
        else:
            st.write("Please wait for a moment .... ")
            # Call backend function to get mapping results
            try:
                misspelled_words = check_misspelled_words(user_input)
                if misspelled_words:
                    st.write("Please enter a detailed correct full description")
                    st.write(misspelled_words)
                else:
                    mapping_results = mapping_code(user_input)
                    # Display top 5 similar sentences
                    st.write("Top 5 similar sentences:")
                    for i, result in enumerate(mapping_results, 1):
                        st.write(f"{i}. Code: {result['Code']}, Description: {result['Description']}, Similarity Score: {result['Similarity Score']:.4f}")
            except ValueError as e:
                st.error(str(e))

if __name__ == "__main__":
    main()