File size: 3,722 Bytes
0974b80 0c98cfe bce1f5b 0974b80 791c8de 0974b80 0c98cfe 0974b80 364d7af 8a61041 8441d3c c4db698 272a9aa 0974b80 5955187 bce1f5b 0974b80 c8e8d6b e2a08b5 7fca223 90da2fd 5955187 90da2fd 0974b80 90da2fd 0c98cfe 90da2fd 0974b80 0c98cfe 0974b80 2dc6bf4 0974b80 86e20d7 7107507 8441d3c 2dc6bf4 8441d3c 2dc6bf4 d529f5a 2dc6bf4 0974b80 2dc6bf4 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 |
import streamlit as st
import torch
from sentence_transformers import SentenceTransformer, util
#from spellchecker import SpellChecker
import pickle
import re
# Load the pre-trained SentenceTransformer model
model = SentenceTransformer('neuml/pubmedbert-base-embeddings')
# Load stored data
with open("embeddings_1.pkl", "rb") as fIn:
stored_data = pickle.load(fIn)
stored_embeddings = stored_data["embeddings"]
def validate_input(input_string):
# Regular expression pattern to match letters and numbers, or letters only
pattern = r'^[a-zA-Z0-9]+$|^[a-zA-Z]+$'
# Check if input contains at least one non-numeric character
if re.match(pattern, input_string) or input_string.isdigit():
return True
else:
return False
# Define the function for mapping code
def mapping_code(user_input):
emb1 = model.encode(user_input.lower())
similarities = []
for sentence in stored_embeddings:
similarity = util.cos_sim(sentence, emb1)
similarities.append(similarity)
# Filter results with similarity scores above 0.70
result = [(code, desc, sim) for (code, desc, sim) in zip(stored_data["SBS_code"], stored_data["Description"], similarities)]
# Sort results by similarity scores
result.sort(key=lambda x: x[2], reverse=True)
num_results = min(5, len(result))
# Return top 5 entries with 'code', 'description', and 'similarity_score'
top_5_results = []
if num_results > 0:
for i in range(num_results):
code, description, similarity_score = result[i]
top_5_results.append({"Code": code, "Description": description, "Similarity Score": similarity_score})
else:
top_5_results.append({"Code": "", "Description": "No match", "Similarity Score": 0.0})
return top_5_results
# Streamlit frontend interface
import streamlit as st
def main():
st.title("CPT Description Mapping")
st.markdown("<font color='blue'>**💡 Please enter the input CPT description with specific available details in correct spelling for best results.**</font>", unsafe_allow_html=True)
st.markdown("<font color='blue'>**💡 Note:** Please note that the similarity scores of each code are the calculated based on language module matching and the top 5 codes descriptions results should be verified with CPT description by the user.</font>", unsafe_allow_html=True)
# user_slider_input_number = st.sidebar.slider('Select similarity threshold', 0.0, 1.0, 0.7, 0.01, key='slider1', help='Adjust the similarity threshold')
# Input text box for user input
user_input = st.text_input("Enter CPT description:", placeholder="Please enter the input CPT description with specific available details for best results.")
# Button to trigger mapping
if st.button("Map"):
if not user_input.strip(): # Check if input is empty or contains only whitespace
st.error("Input box cannot be empty.")
elif validate_input(user_input):
st.warning("Please input correct description .")
else:
st.write("Please wait for a moment .... ")
# Call backend function to get mapping results
try:
mapping_results = mapping_code(user_input) # user_slider_input_number
# Display top 5 similar sentences
st.write("Top 5 similar sentences:")
for i, result in enumerate(mapping_results, 1):
st.write(f"{i}. Code: {result['Code']}, Description: {result['Description']}, Similarity Score: {float(result['Similarity Score']):.4f}")
except ValueError as e:
st.error(str(e))
if __name__ == "__main__":
main()
|