File size: 3,449 Bytes
0974b80 0c98cfe e1b1664 0974b80 0c98cfe 0974b80 272a9aa 0c98cfe 272a9aa b513d7d 272a9aa 0c98cfe 0974b80 b513d7d 0974b80 7fca223 e2a08b5 7fca223 0974b80 0c98cfe 0974b80 0c98cfe 0974b80 0c98cfe 0974b80 7107507 0974b80 0c98cfe 0974b80 0c98cfe 0974b80 0c98cfe 7fb1853 0c98cfe 0974b80 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 |
import streamlit as st
import torch
from sentence_transformers import SentenceTransformer, util
from spellchecker import SpellChecker
import pickle
# Load the pre-trained SentenceTransformer model
model = SentenceTransformer('neuml/pubmedbert-base-embeddings')
# Load stored data
with open("embeddings_1.pkl", "rb") as fIn:
stored_data = pickle.load(fIn)
stored_embeddings = stored_data["embeddings"]
spell = SpellChecker()
# Define a function to check for misspelled words
def check_misspelled_words(user_input):
# Tokenize the input into words
words = user_input.split()
# Get a list of misspelled words excluding words containing only numbers
misspelled = [word for word in words if word.isalpha() and not word.isdigit() and not spell.correction(word.lower()) == word.lower()]
return misspelled
# Define the function for mapping code
def mapping_code(user_input):
if len(user_input.split()) <= 1: # Check if sentence has less than 5 words
raise ValueError("Input sentence should be more than 1 word long.Please provide the Full description")
emb1 = model.encode(user_input.lower())
#similarities = util.pytorch_cos_sim(emb1, stored_embeddings)[0]
for sentence in stored_embeddings:
similarity = util.cos_sim(sentence, emb1)
similarities.append(similarity)
# Combine similarity scores with 'code' and 'description'
result = [(code, description, float(sim)) for code, description, sim in zip(stored_data["SBS_code"], stored_data["Description"], similarities)]
# Sort results by similarity scores
result.sort(key=lambda x: x[2], reverse=True)
# Return top 5 entries with 'code', 'description', and 'similarity_score'
num_results = min(5, len(result))
top_5_results = [{"Code": code, "Description": description, "Similarity Score": sim} for code, description, sim in result[:num_results]]
return top_5_results
# Streamlit frontend interface
def main():
st.title("CPT Description Mapping")
st.markdown("**Note:** Similarity scores are not absolute and should be further confirmed manually for accuracy.")
# Input text box for user input
user_input = st.text_input("Enter CPT description:", placeholder="Please enter a full description for better search results.")
# Button to trigger mapping
if st.button("Map"):
if not user_input.strip(): # Check if input is empty or contains only whitespace
st.error("Input box cannot be empty.")
else:
st.write("Please wait for a moment .... ")
# Call backend function to get mapping results
try:
misspelled_words = check_misspelled_words(user_input)
if misspelled_words:
st.write("Please enter a detailed correct full description")
st.write(f"Kindly check if these words are spelt correctly :{misspelled_words}")
else:
mapping_results = mapping_code(user_input)
# Display top 5 similar sentences
st.write("Top 5 similar sentences:")
for i, result in enumerate(mapping_results, 1):
st.write(f"{i}. Code: {result['Code']}, Description: {result['Description']}, Similarity Score: {result['Similarity Score']:.4f}")
except ValueError as e:
st.error(str(e))
if __name__ == "__main__":
main()
|