File size: 2,480 Bytes
c086c81
 
d33ebe2
18f7759
0e9a048
 
e30bda8
0e9a048
c086c81
 
18f7759
33d7cef
 
31e5aac
33d7cef
 
 
 
c086c81
f1f67b1
290d982
f1f67b1
c086c81
f1f67b1
c086c81
33d7cef
f4165c6
c086c81
 
 
33d7cef
c086c81
 
 
 
10dd01c
 
c086c81
 
669590f
 
 
 
 
 
c086c81
669590f
c086c81
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4174702
 
c086c81
 
f1f67b1
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
import streamlit as st
import torch
from sentence_transformers import SentenceTransformer,util
#from transformers import pipeline
import pandas as pd 
import numpy as np 
import pickle


# Load the pre-trained SentenceTransformer model
#pipeline = pipeline(task="Sentence Similarity", model="all-MiniLM-L6-v2")
model = SentenceTransformer('neuml/pubmedbert-base-embeddings')
#sentence_embed = pd.read_csv('Reference_file.csv')
with open("embeddings_1.pkl", "rb") as fIn:
    stored_data = pickle.load(fIn)
    stored_code = stored_data["SBS_code"]
    stored_sentences = stored_data["sentences"]
    stored_embeddings = stored_data["embeddings"]

import streamlit as st

# Define the function for mapping code
def mapping_code(user_input):
    emb1 = model.encode(user_input.lower())
    similarities = []
    for sentence in stored_embeddings:
        similarity = util.cos_sim(sentence, emb1)
        similarities.append(similarity)

    # Combine similarity scores with 'code' and 'description'
    result = list(zip(stored_data["SBS_code"],stored_data["sentences"], similarities))

    # Sort results by similarity scores
    result.sort(key=lambda x: x[2], reverse=True)

    num_results = min(5, len(result))

    # Return top 5 entries with 'code', 'description', and 'similarity_score'
    top_5_results = []
    if num_results > 0:
        for i in range(num_results):
            code, description, similarity_score = result[i]
            top_5_results.append({"Code": code, "Description": description, "Similarity Score": similarity_score})
    else:
        top_5_results.append({"Code": "", "Description": "No similar sentences found", "Similarity Score": 0.0})

    return top_5_results
# Streamlit frontend interface
def main():
    st.title("CPT Description Mapping")

    # Input text box for user input
    user_input = st.text_input("Enter CPT description:")

    # Button to trigger mapping
    if st.button("Map"):
        if user_input:
            st.write("Please wait for a moment .... ")

            # Call backend function to get mapping results
            mapping_results = mapping_code(user_input)

            # Display top 5 similar sentences
            st.write("Top 5 similar sentences:")
            for i, result in enumerate(mapping_results, 1):
                st.write(f"{i}. Code: {result['Code']}, Description: {result['Description']}, Similarity Score: {float(result['Similarity Score']):.4f}")


if __name__ == "__main__":
    main()