File size: 5,044 Bytes
0974b80 0c98cfe 0974b80 791c8de 0974b80 0c98cfe 0974b80 46210e1 364d7af 8a61041 8441d3c c4db698 272a9aa acea3cc 0974b80 c8e8d6b e2a08b5 7fca223 90da2fd acea3cc 90da2fd 0974b80 90da2fd 0c98cfe 90da2fd 0974b80 0c98cfe acea3cc e053b60 acea3cc 2dc6bf4 acea3cc 0974b80 46210e1 7107507 c96a1af 46210e1 2dc6bf4 46210e1 2dc6bf4 46210e1 2dc6bf4 46210e1 2dc6bf4 46210e1 2dc6bf4 46210e1 2dc6bf4 46210e1 2dc6bf4 0974b80 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 |
import streamlit as st
import torch
from sentence_transformers import SentenceTransformer, util
import pickle
import re
# Load the pre-trained SentenceTransformer model
model = SentenceTransformer('neuml/pubmedbert-base-embeddings')
# Load stored data
with open("embeddings_1.pkl", "rb") as fIn:
stored_data = pickle.load(fIn)
stored_embeddings = stored_data["embeddings"]
with open("embeddings_2.pkl", "rb") as fIn:
stored_data_cpt = pickle.load(fIn)
stored_embeddings_cpt = stored_data_cpt["embeddings"]
def validate_input(input_string):
# Regular expression pattern to match letters and numbers, or letters only
pattern = r'^[a-zA-Z0-9]+$|^[a-zA-Z]+$'
# Check if input contains at least one non-numeric character
if re.match(pattern, input_string) or input_string.isdigit():
return True
else:
return False
def cpt_code(user_input):
emb1 = model.encode(user_input.lower())
similarities = []
for sentence in stored_embeddings:
similarity = util.cos_sim(sentence, emb1)
similarities.append(similarity)
# Filter results with similarity scores above 0.70
result = [(code, desc, sim) for (code, desc, sim) in zip(stored_data["SBS_code"], stored_data["Description"], similarities)]
# Sort results by similarity scores
result.sort(key=lambda x: x[2], reverse=True)
num_results = min(5, len(result))
# Return top 5 entries with 'code', 'description', and 'similarity_score'
top_5_results = []
if num_results > 0:
for i in range(num_results):
code, description, similarity_score = result[i]
top_5_results.append({"Code": code, "Description": description, "Similarity Score": similarity_score})
else:
top_5_results.append({"Code": "", "Description": "No match", "Similarity Score": 0.0})
return top_5_results
def sbs_code(user_input):
emb1 = model.encode(user_input.lower())
similarities = []
for sentence in stored_embeddings_cpt:
similarity = util.cos_sim(sentence, emb1)
similarities.append(similarity)
# Filter results with similarity scores above 0.70
result = [(code, desc, sim) for (code, desc, sim) in zip(stored_data_cpt["CPT_CODE"], stored_data_cpt["Description"], similarities)]
# Sort results by similarity scores
result.sort(key=lambda x: x[2], reverse=True)
num_results = min(5, len(result))
# Return top 5 entries with 'code', 'description', and 'similarity_score'
top_5_results = []
if num_results > 0:
for i in range(num_results):
code, description, similarity_score = result[i]
top_5_results.append({"Code": code, "Description": description, "Similarity Score": similarity_score})
else:
top_5_results.append({"Code": "", "Description": "No match", "Similarity Score": 0.0})
return top_5_results
def mapping_code(user_input, mode):
if mode == "CPT_to_SBS":
return cpt_code(user_input)
elif mode == "SBS_to_CPT":
return sbs_code(user_input)
# Streamlit frontend interface
def main():
st.title("CPT-SBS Code Mapping")
st.markdown("<font color='red'>**⚠️ Please enter the input CPT/SBS description with specific available details for best results.**</font>", unsafe_allow_html=True)
st.markdown("<font color='blue'>**💡 Note:** Please note that the similarity scores provided are not indicative of accuracy. Top 5 code descriptions provided should be verified with CPT/SBS descriptions by the user.</font>", unsafe_allow_html=True)
# Dropdown for user to choose mapping direction
mapping_mode = st.selectbox("Choose mapping direction:", ("CPT description to SBS code", "SBS description to CPT code"))
if mapping_mode == "CPT description to SBS code":
user_input_label = "Enter CPT description:"
mode = "CPT_to_SBS"
else:
user_input_label = "Enter SBS description:"
mode = "SBS_to_CPT"
# Input text box for user input
user_input = st.text_input(user_input_label, placeholder="Enter description here...")
# Button to trigger mapping
if st.button("Map"):
if not user_input.strip(): # Check if input is empty or contains only whitespace
st.error("Input box cannot be empty.")
elif validate_input(user_input):
st.warning("Please input correct description.")
else:
st.write("Please wait for a moment ...")
# Call backend function to get mapping results
try:
mapping_results = mapping_code(user_input, mode)
# Display top 5 similar sentences
st.write("Top 5 similar entries:")
for i, result in enumerate(mapping_results, 1):
st.write(f"{i}. Code: {result['Code']}, Description: {result['Description']}, Similarity Score: {float(result['Similarity Score']):.4f}")
except ValueError as e:
st.error(str(e))
if __name__ == "__main__":
main()
|