ceejaytheanalyst commited on
Commit
0c98cfe
·
verified ·
1 Parent(s): 0974b80

Update app.py

Browse files

Improved App version 2

Files changed (1) hide show
  1. app.py +35 -40
app.py CHANGED
@@ -1,70 +1,65 @@
1
  import streamlit as st
2
  import torch
3
- from sentence_transformers import SentenceTransformer,util
4
- #from transformers import pipeline
5
- import pandas as pd
6
- import numpy as np
7
  import pickle
8
 
9
-
10
  # Load the pre-trained SentenceTransformer model
11
- #pipeline = pipeline(task="Sentence Similarity", model="all-MiniLM-L6-v2")
12
  model = SentenceTransformer('neuml/pubmedbert-base-embeddings')
13
- #sentence_embed = pd.read_csv('Reference_file.csv')
 
14
  with open("embeddings_1.pkl", "rb") as fIn:
15
  stored_data = pickle.load(fIn)
16
- stored_code = stored_data["SBS_code"]
17
- stored_sentences = stored_data["Description"]
18
  stored_embeddings = stored_data["embeddings"]
19
 
20
- import streamlit as st
 
 
 
 
 
 
21
 
22
  # Define the function for mapping code
23
  def mapping_code(user_input):
 
 
24
  emb1 = model.encode(user_input.lower())
25
- similarities = []
26
- for sentence in stored_embeddings:
27
- similarity = util.cos_sim(sentence, emb1)
28
- similarities.append(similarity)
29
-
30
  # Combine similarity scores with 'code' and 'description'
31
- result = list(zip(stored_data["SBS_code"],stored_data["Description"], similarities))
32
-
33
  # Sort results by similarity scores
34
  result.sort(key=lambda x: x[2], reverse=True)
35
-
36
- num_results = min(5, len(result))
37
-
38
  # Return top 5 entries with 'code', 'description', and 'similarity_score'
39
- top_5_results = []
40
- if num_results > 0:
41
- for i in range(num_results):
42
- code, description, similarity_score = result[i]
43
- top_5_results.append({"Code": code, "Description": description, "Similarity Score": similarity_score})
44
- else:
45
- top_5_results.append({"Code": "", "Description": "No similar sentences found", "Similarity Score": 0.0})
46
-
47
  return top_5_results
 
48
  # Streamlit frontend interface
49
  def main():
50
  st.title("CPT Description Mapping")
51
-
52
  # Input text box for user input
53
- user_input = st.text_input("Enter CPT description:")
54
-
55
  # Button to trigger mapping
56
  if st.button("Map"):
57
- if user_input:
 
 
58
  st.write("Please wait for a moment .... ")
59
-
60
  # Call backend function to get mapping results
61
- mapping_results = mapping_code(user_input)
62
-
63
- # Display top 5 similar sentences
64
- st.write("Top 5 similar sentences:")
65
- for i, result in enumerate(mapping_results, 1):
66
- st.write(f"{i}. Code: {result['Code']}, Description: {result['Description']}, Similarity Score: {float(result['Similarity Score']):.4f}")
67
-
 
 
 
 
 
 
68
 
69
  if __name__ == "__main__":
70
  main()
 
1
  import streamlit as st
2
  import torch
3
+ from sentence_transformers import SentenceTransformer, util
4
+ from textblob import SpellChecker
 
 
5
  import pickle
6
 
 
7
  # Load the pre-trained SentenceTransformer model
 
8
  model = SentenceTransformer('neuml/pubmedbert-base-embeddings')
9
+
10
+ # Load stored data
11
  with open("embeddings_1.pkl", "rb") as fIn:
12
  stored_data = pickle.load(fIn)
 
 
13
  stored_embeddings = stored_data["embeddings"]
14
 
15
+ def check_misspelled_words(user_input):
16
+ spell = SpellChecker()
17
+ # Tokenize the input into words
18
+ words = user_input.split()
19
+ # Get a list of misspelled words
20
+ misspelled = spell.unknown(words)
21
+ return misspelled
22
 
23
  # Define the function for mapping code
24
  def mapping_code(user_input):
25
+ if len(user_input.split()) < 5: # Check if sentence has less than 5 words
26
+ raise ValueError("Input sentence should be at least 5 words long.")
27
  emb1 = model.encode(user_input.lower())
28
+ similarities = util.pytorch_cos_sim(emb1, stored_embeddings)[0]
 
 
 
 
29
  # Combine similarity scores with 'code' and 'description'
30
+ result = [(code, description, float(sim)) for code, description, sim in zip(stored_data["SBS_code"], stored_data["Description"], similarities)]
 
31
  # Sort results by similarity scores
32
  result.sort(key=lambda x: x[2], reverse=True)
 
 
 
33
  # Return top 5 entries with 'code', 'description', and 'similarity_score'
34
+ num_results = min(5, len(result))
35
+ top_5_results = [{"Code": code, "Description": description, "Similarity Score": sim} for code, description, sim in result[:num_results]]
 
 
 
 
 
 
36
  return top_5_results
37
+
38
  # Streamlit frontend interface
39
  def main():
40
  st.title("CPT Description Mapping")
 
41
  # Input text box for user input
42
+ user_input = st.text_input("Enter CPT description:", placeholder="Please enter a full description for better search results.")
 
43
  # Button to trigger mapping
44
  if st.button("Map"):
45
+ if not user_input.strip(): # Check if input is empty or contains only whitespace
46
+ st.error("Input box cannot be empty.")
47
+ else:
48
  st.write("Please wait for a moment .... ")
 
49
  # Call backend function to get mapping results
50
+ try:
51
+ misspelled_words = check_misspelled_words(user_input)
52
+ if misspelled_words:
53
+ st.write("Please enter a detailed correct full description")
54
+ st.write(misspelled_words)
55
+ else:
56
+ mapping_results = mapping_code(user_input)
57
+ # Display top 5 similar sentences
58
+ st.write("Top 5 similar sentences:")
59
+ for i, result in enumerate(mapping_results, 1):
60
+ st.write(f"{i}. Code: {result['Code']}, Description: {result['Description']}, Similarity Score: {result['Similarity Score']:.4f}")
61
+ except ValueError as e:
62
+ st.error(str(e))
63
 
64
  if __name__ == "__main__":
65
  main()