Spaces:

StKirill
/

chatbot

Sleeping

StKirill commited on Feb 16, 2024

Commit

93c49fb

verified ·

1 Parent(s): 6c51481

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -76,7 +76,7 @@ def removeStopWords(text):
 question_norm_and_stop = df['Normalized question'].apply(removeStopWords)
 df.insert(3, 'Normalized and StopWords question', question_norm_and_stop, True)
-tfidf = TfidfVectorizer()  # initializing tf-idf
 x_tfidf = tfidf.fit_transform(df['Normalized and StopWords question']).toarray()  # oversimplifying this converts words to vectors
 features_tfidf = tfidf.get_feature_names_out()  # use function to get all the normalized words
 df_tfidf = pd.DataFrame(x_tfidf, columns = features_tfidf)  # create dataframe to show the 0, 1 value for each word
@@ -279,7 +279,7 @@ def chat_bert_context(question, history):
   else:
     memory_weights = np.array([0.3, 1.0])
-  history_sentence = np.zeros(shape=(len_history+1, 384))
   for ind, h in enumerate(history):
@@ -299,19 +299,6 @@ def chat_bert_context(question, history):
 #------------------------------------------------------------------------------------------------#
 # gradio part
 def echo(message, history, model):
-  # print(model)
-  # print(history)
-  # if model=="TF-IDF":
-  #   answer = chat_tfidf(message)
-  #   return answer
-  # elif model=="W2V":
-  #   answer = chat_word2vec(message)
-  #   return answer
-  # elif model=="BERT":
-  #   answer = chat_bert(message)
-  #   return answer
   if model=="TF-IDF":
     # answer = chat_tfidf(message)

 question_norm_and_stop = df['Normalized question'].apply(removeStopWords)
 df.insert(3, 'Normalized and StopWords question', question_norm_and_stop, True)
+tfidf = TfidfVectorizer(ngram_range=(1,3), max_features=5024)  # initializing tf-idf
 x_tfidf = tfidf.fit_transform(df['Normalized and StopWords question']).toarray()  # oversimplifying this converts words to vectors
 features_tfidf = tfidf.get_feature_names_out()  # use function to get all the normalized words
 df_tfidf = pd.DataFrame(x_tfidf, columns = features_tfidf)  # create dataframe to show the 0, 1 value for each word
   else:
     memory_weights = np.array([0.3, 1.0])
+  history_sentence = np.zeros(shape=(len_history+1, 768))
   for ind, h in enumerate(history):
 #------------------------------------------------------------------------------------------------#
 # gradio part
 def echo(message, history, model):
   if model=="TF-IDF":
     # answer = chat_tfidf(message)