File size: 3,136 Bytes
0974b80
 
0c98cfe
bce1f5b
0974b80
 
 
 
0c98cfe
 
0974b80
 
 
 
272a9aa
0974b80
90da2fd
0974b80
56fcccb
bce1f5b
0974b80
c8e8d6b
e2a08b5
 
 
7fca223
90da2fd
56fcccb
90da2fd
0974b80
 
90da2fd
0c98cfe
90da2fd
 
 
 
 
 
 
 
 
 
0974b80
0c98cfe
0974b80
 
 
bce1f5b
7107507
bce1f5b
 
7107507
56fcccb
 
0974b80
0c98cfe
0974b80
 
0c98cfe
 
 
0974b80
 
0c98cfe
bce1f5b
 
 
 
 
0c98cfe
 
0974b80
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
import streamlit as st
import torch
from sentence_transformers import SentenceTransformer, util
#from spellchecker import SpellChecker
import pickle

# Load the pre-trained SentenceTransformer model
model = SentenceTransformer('neuml/pubmedbert-base-embeddings')

# Load stored data
with open("embeddings_1.pkl", "rb") as fIn:
    stored_data = pickle.load(fIn)
    stored_embeddings = stored_data["embeddings"]



# Define the function for mapping code
# Define the function for mapping code
def mapping_code(user_input,user_slider_input_number):

    emb1 = model.encode(user_input.lower())
    similarities = []
    for sentence in stored_embeddings:
        similarity = util.cos_sim(sentence, emb1)
        similarities.append(similarity)

    # Filter results with similarity scores above 0.70
    result = [(code, desc, sim) for (code, desc, sim) in zip(stored_data["SBS_code"], stored_data["Description"], similarities) if sim > user_slider_input_number]

    # Sort results by similarity scores
    result.sort(key=lambda x: x[2], reverse=True)

    num_results = min(5, len(result))

    # Return top 5 entries with 'code', 'description', and 'similarity_score'
    top_5_results = []
    if num_results > 0:
        for i in range(num_results):
            code, description, similarity_score = result[i]
            top_5_results.append({"Code": code, "Description": description, "Similarity Score": similarity_score})
    else:
        top_5_results.append({"Code": "", "Description": "No match", "Similarity Score": 0.0})

    return top_5_results

# Streamlit frontend interface
def main():
    st.title("CPT Description Mapping")
    st.markdown("<font color='red'>**⚠️ Ensure that you input the accurate spellings.**</font>", unsafe_allow_html=True)

    st.markdown("<font color='blue'>**💡 Note:** Similarity scores are not absolute and should be further confirmed manually for accuracy.</font>", unsafe_allow_html=True)
    

    user_slider_input_number = st.sidebar.slider('Select similarity threshold', 0.0, 1.0, 0.7, 0.01, key='slider1', help='Adjust the similarity threshold')

    # Input text box for user input
    user_input = st.text_input("Enter CPT description:", placeholder="Please enter a full description for better search results.")
    # Button to trigger mapping
    if st.button("Map"):
        if not user_input.strip():  # Check if input is empty or contains only whitespace
            st.error("Input box cannot be empty.")
        else:
            st.write("Please wait for a moment .... ")
            # Call backend function to get mapping results
            try:
                mapping_results = mapping_code(user_input,user_slider_input_number)
                # Display top 5 similar sentences
                st.write("Top 5 similar sentences:")
                for i, result in enumerate(mapping_results, 1):
                    st.write(f"{i}. Code: {result['Code']}, Description: {result['Description']}, Similarity Score: {float(result['Similarity Score']):.4f}")
            except ValueError as e:
                st.error(str(e))

if __name__ == "__main__":
    main()