Vipul-Chauhan commited on
Commit
b1979b2
1 Parent(s): b89f0ab

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +13 -9
app.py CHANGED
@@ -1,6 +1,6 @@
1
  #loading tfidf dataset
2
  import pandas as pd
3
- newsdf_sample = pd.read_excel("200_sample_each_20newsgroup_4k_tfidf.xlsx",engine="openpyxl")
4
 
5
  print("file size",len(newsdf_sample))
6
 
@@ -63,8 +63,8 @@ def process_row(row):
63
  return row
64
 
65
  import pickle
66
- kmeans_tfidf = pickle.load( open( "kmeans_tfidf_20.p", "rb" ) )
67
- vectorizer = pickle.load(open("tfidf_vectorizer.p","rb"))
68
 
69
  import matplotlib.pyplot as plt
70
  from wordcloud import WordCloud
@@ -74,11 +74,11 @@ dictt_cluster_words={}
74
 
75
  for i in range(0,20):
76
  # print(i)
77
- temp_df = newsdf_sample[newsdf_sample.exp1==i]
78
  text_list= temp_df["tfidf_cleaned"].values
79
  text_list = [element for element in text_list if str(element) != "nan"]
80
  single_text = " ".join(text_list)
81
- wordcloud = WordCloud(width = 1000, height = 500).generate(single_text)
82
  dictt_cluster_words[i] = wordcloud.words_
83
 
84
 
@@ -155,7 +155,7 @@ def get_summary_answer(Question):
155
  print("question: ", Question)
156
  cluster_selected = return_selected_cluster(Question)
157
 
158
- temp_df = newsdf_sample[newsdf_sample.exp1==cluster_selected]
159
  tfidf_ques = vectorizer.transform([process_row(Question)]).todense()
160
  cosine_score = []
161
  for sent in temp_df["tfidf_cleaned"].values:
@@ -177,7 +177,10 @@ def get_summary_answer(Question):
177
  summary = return_summary(relevant_text)
178
  squad_answer = return_squad_answer(Question, relevant_text)
179
 
180
- return summary, squad_answer
 
 
 
181
 
182
 
183
  import gradio as gr
@@ -185,8 +188,9 @@ iface = gr.Interface(fn = get_summary_answer,
185
  inputs = gr.Textbox(type="text", label="Type your question"),
186
  # outputs = ["text", "text"],
187
  outputs = [
188
- gr.Textbox(type="text", value=1, label="Answer from Generative Model"),
189
- gr.Textbox(type="text", value=2, label="Answer from SQuAD model"),
 
190
  ],
191
  title = "20NewsGroup_QA",
192
  description ="Returns answer from 20NewsGroup dataset")
 
1
  #loading tfidf dataset
2
  import pandas as pd
3
+ newsdf_sample = pd.read_excel("complete_tfidf_25.xlsx",engine="openpyxl")
4
 
5
  print("file size",len(newsdf_sample))
6
 
 
63
  return row
64
 
65
  import pickle
66
+ kmeans_tfidf = pickle.load( open( "kmeans_tfidf_25_complete.p", "rb" ) )
67
+ vectorizer = pickle.load(open("tfidf_vectorizer_complete.p","rb"))
68
 
69
  import matplotlib.pyplot as plt
70
  from wordcloud import WordCloud
 
74
 
75
  for i in range(0,20):
76
  # print(i)
77
+ temp_df = newsdf_sample[newsdf_sample.exp25==i]
78
  text_list= temp_df["tfidf_cleaned"].values
79
  text_list = [element for element in text_list if str(element) != "nan"]
80
  single_text = " ".join(text_list)
81
+ wordcloud = WordCloud(width = 1000, height = 500, , max_words=1000).generate(single_text)
82
  dictt_cluster_words[i] = wordcloud.words_
83
 
84
 
 
155
  print("question: ", Question)
156
  cluster_selected = return_selected_cluster(Question)
157
 
158
+ temp_df = newsdf_sample[newsdf_sample.exp25==cluster_selected]
159
  tfidf_ques = vectorizer.transform([process_row(Question)]).todense()
160
  cosine_score = []
161
  for sent in temp_df["tfidf_cleaned"].values:
 
177
  summary = return_summary(relevant_text)
178
  squad_answer = return_squad_answer(Question, relevant_text)
179
 
180
+ relevant_text = " ".join(relevant_text.split()[:min(250,len(relevant_text.split()))])
181
+
182
+
183
+ return relevant_text, summary, squad_answer
184
 
185
 
186
  import gradio as gr
 
188
  inputs = gr.Textbox(type="text", label="Type your question"),
189
  # outputs = ["text", "text"],
190
  outputs = [
191
+ gr.Textbox(type="text", value=1, label="Relevant text"),
192
+ gr.Textbox(type="text", value=2, label="Answer from Generative Model"),
193
+ gr.Textbox(type="text", value=3, label="Answer from SQuAD model"),
194
  ],
195
  title = "20NewsGroup_QA",
196
  description ="Returns answer from 20NewsGroup dataset")