ceejaytheanalyst commited on
Commit
90da2fd
·
verified ·
1 Parent(s): c8e8d6b

Update app.py

Browse files

added a threshold of 70%

Files changed (1) hide show
  1. app.py +17 -7
app.py CHANGED
@@ -24,24 +24,34 @@ def check_misspelled_words(user_input):
24
 
25
  return misspelled
26
 
 
27
  # Define the function for mapping code
28
  def mapping_code(user_input):
29
- if len(user_input.split()) <= 1: # Check if sentence has less than 5 words
30
- raise ValueError("Input sentence should be more than 1 word long.Please provide the Full description")
31
  emb1 = model.encode(user_input.lower())
32
- #similarities = util.pytorch_cos_sim(emb1, stored_embeddings)[0]
33
  similarities = []
34
  for sentence in stored_embeddings:
35
  similarity = util.cos_sim(sentence, emb1)
36
  similarities.append(similarity)
37
 
38
- # Combine similarity scores with 'code' and 'description'
39
- result = [(code, description, float(sim)) for code, description, sim in zip(stored_data["SBS_code"], stored_data["Description"], similarities)]
 
40
  # Sort results by similarity scores
41
  result.sort(key=lambda x: x[2], reverse=True)
42
- # Return top 5 entries with 'code', 'description', and 'similarity_score'
43
  num_results = min(5, len(result))
44
- top_5_results = [{"Code": code, "Description": description, "Similarity Score": sim} for code, description, sim in result[:num_results]]
 
 
 
 
 
 
 
 
 
45
  return top_5_results
46
 
47
  # Streamlit frontend interface
 
24
 
25
  return misspelled
26
 
27
+ # Define the function for mapping code
28
  # Define the function for mapping code
29
  def mapping_code(user_input):
30
+ if len(user_input.split()) < 5: # Check if sentence has less than 5 words
31
+ raise ValueError("Input sentence should be at least 5 words long.")
32
  emb1 = model.encode(user_input.lower())
 
33
  similarities = []
34
  for sentence in stored_embeddings:
35
  similarity = util.cos_sim(sentence, emb1)
36
  similarities.append(similarity)
37
 
38
+ # Filter results with similarity scores above 0.70
39
+ result = [(code, desc, sim) for (code, desc, sim) in zip(stored_data["SBS_code"], stored_data["Description"], similarities) if sim > 0.70]
40
+
41
  # Sort results by similarity scores
42
  result.sort(key=lambda x: x[2], reverse=True)
43
+
44
  num_results = min(5, len(result))
45
+
46
+ # Return top 5 entries with 'code', 'description', and 'similarity_score'
47
+ top_5_results = []
48
+ if num_results > 0:
49
+ for i in range(num_results):
50
+ code, description, similarity_score = result[i]
51
+ top_5_results.append({"Code": code, "Description": description, "Similarity Score": similarity_score})
52
+ else:
53
+ top_5_results.append({"Code": "", "Description": "No match", "Similarity Score": 0.0})
54
+
55
  return top_5_results
56
 
57
  # Streamlit frontend interface