Spaces:

YouNameIt
/

YouNameIt_chatbot

Sleeping

schlenker commited on Sep 25, 2023

Commit

971a4bf

2 Parent(s): a4def93 2a20515

Merge branch 'main' of https://huggingface.co/spaces/YouNameIt/YouNameIt_chatbot into main

Files changed (1) hide show

app.py CHANGED Viewed

@@ -6,6 +6,7 @@ from transformers import AutoModelForSeq2SeqLM, AutoModelForCausalLM
 from transformers import AutoTokenizer
 import numpy as np
 import time
 # JS
 import nltk
@@ -37,6 +38,15 @@ def get_models(llama=False):
 model, tokenizer = get_models()
 def return_top_k(sentence, k=10):
   if sentence[-1] != ".":
@@ -64,9 +74,10 @@ def return_top_k(sentence, k=10):
     #all word predictions
     predictions = [tokenizer.decode(tokens, skip_special_tokens=True) for tokens in output_sequences['sequences']]
     probabilities = [round(float(prob), 2) for prob in decoded_probabilities]
     for pred in predictions:
-      if (len(pred) < 2) | (pred in sentence.split()):
         predictions.pop(predictions.index(pred))
   return predictions[:10]

 from transformers import AutoTokenizer
 import numpy as np
 import time
+import string
 # JS
 import nltk
 model, tokenizer = get_models()
+def remove_punctuation(word):
+    # Create a translation table that maps all punctuation characters to None
+    translator = str.maketrans('', '', string.punctuation)
+    # Use the translate method to remove punctuation from the word
+    word_without_punctuation = word.translate(translator)
+    return word_without_punctuation
 def return_top_k(sentence, k=10):
   if sentence[-1] != ".":
     #all word predictions
     predictions = [tokenizer.decode(tokens, skip_special_tokens=True) for tokens in output_sequences['sequences']]
     probabilities = [round(float(prob), 2) for prob in decoded_probabilities]
+    stripped_sent = [remove_punctuation(word.lower()) for word in sentence.split()]
     for pred in predictions:
+      if (len(pred) < 2) | (pred in stripped_sent):
         predictions.pop(predictions.index(pred))
   return predictions[:10]