Spaces:
Sleeping
Sleeping
Vipul-Chauhan
commited on
Commit
•
b1979b2
1
Parent(s):
b89f0ab
Update app.py
Browse files
app.py
CHANGED
@@ -1,6 +1,6 @@
|
|
1 |
#loading tfidf dataset
|
2 |
import pandas as pd
|
3 |
-
newsdf_sample = pd.read_excel("
|
4 |
|
5 |
print("file size",len(newsdf_sample))
|
6 |
|
@@ -63,8 +63,8 @@ def process_row(row):
|
|
63 |
return row
|
64 |
|
65 |
import pickle
|
66 |
-
kmeans_tfidf = pickle.load( open( "
|
67 |
-
vectorizer = pickle.load(open("
|
68 |
|
69 |
import matplotlib.pyplot as plt
|
70 |
from wordcloud import WordCloud
|
@@ -74,11 +74,11 @@ dictt_cluster_words={}
|
|
74 |
|
75 |
for i in range(0,20):
|
76 |
# print(i)
|
77 |
-
temp_df = newsdf_sample[newsdf_sample.
|
78 |
text_list= temp_df["tfidf_cleaned"].values
|
79 |
text_list = [element for element in text_list if str(element) != "nan"]
|
80 |
single_text = " ".join(text_list)
|
81 |
-
wordcloud = WordCloud(width = 1000, height = 500).generate(single_text)
|
82 |
dictt_cluster_words[i] = wordcloud.words_
|
83 |
|
84 |
|
@@ -155,7 +155,7 @@ def get_summary_answer(Question):
|
|
155 |
print("question: ", Question)
|
156 |
cluster_selected = return_selected_cluster(Question)
|
157 |
|
158 |
-
temp_df = newsdf_sample[newsdf_sample.
|
159 |
tfidf_ques = vectorizer.transform([process_row(Question)]).todense()
|
160 |
cosine_score = []
|
161 |
for sent in temp_df["tfidf_cleaned"].values:
|
@@ -177,7 +177,10 @@ def get_summary_answer(Question):
|
|
177 |
summary = return_summary(relevant_text)
|
178 |
squad_answer = return_squad_answer(Question, relevant_text)
|
179 |
|
180 |
-
|
|
|
|
|
|
|
181 |
|
182 |
|
183 |
import gradio as gr
|
@@ -185,8 +188,9 @@ iface = gr.Interface(fn = get_summary_answer,
|
|
185 |
inputs = gr.Textbox(type="text", label="Type your question"),
|
186 |
# outputs = ["text", "text"],
|
187 |
outputs = [
|
188 |
-
gr.Textbox(type="text", value=1, label="
|
189 |
-
gr.Textbox(type="text", value=2, label="Answer from
|
|
|
190 |
],
|
191 |
title = "20NewsGroup_QA",
|
192 |
description ="Returns answer from 20NewsGroup dataset")
|
|
|
1 |
#loading tfidf dataset
|
2 |
import pandas as pd
|
3 |
+
newsdf_sample = pd.read_excel("complete_tfidf_25.xlsx",engine="openpyxl")
|
4 |
|
5 |
print("file size",len(newsdf_sample))
|
6 |
|
|
|
63 |
return row
|
64 |
|
65 |
import pickle
|
66 |
+
kmeans_tfidf = pickle.load( open( "kmeans_tfidf_25_complete.p", "rb" ) )
|
67 |
+
vectorizer = pickle.load(open("tfidf_vectorizer_complete.p","rb"))
|
68 |
|
69 |
import matplotlib.pyplot as plt
|
70 |
from wordcloud import WordCloud
|
|
|
74 |
|
75 |
for i in range(0,20):
|
76 |
# print(i)
|
77 |
+
temp_df = newsdf_sample[newsdf_sample.exp25==i]
|
78 |
text_list= temp_df["tfidf_cleaned"].values
|
79 |
text_list = [element for element in text_list if str(element) != "nan"]
|
80 |
single_text = " ".join(text_list)
|
81 |
+
wordcloud = WordCloud(width = 1000, height = 500, , max_words=1000).generate(single_text)
|
82 |
dictt_cluster_words[i] = wordcloud.words_
|
83 |
|
84 |
|
|
|
155 |
print("question: ", Question)
|
156 |
cluster_selected = return_selected_cluster(Question)
|
157 |
|
158 |
+
temp_df = newsdf_sample[newsdf_sample.exp25==cluster_selected]
|
159 |
tfidf_ques = vectorizer.transform([process_row(Question)]).todense()
|
160 |
cosine_score = []
|
161 |
for sent in temp_df["tfidf_cleaned"].values:
|
|
|
177 |
summary = return_summary(relevant_text)
|
178 |
squad_answer = return_squad_answer(Question, relevant_text)
|
179 |
|
180 |
+
relevant_text = " ".join(relevant_text.split()[:min(250,len(relevant_text.split()))])
|
181 |
+
|
182 |
+
|
183 |
+
return relevant_text, summary, squad_answer
|
184 |
|
185 |
|
186 |
import gradio as gr
|
|
|
188 |
inputs = gr.Textbox(type="text", label="Type your question"),
|
189 |
# outputs = ["text", "text"],
|
190 |
outputs = [
|
191 |
+
gr.Textbox(type="text", value=1, label="Relevant text"),
|
192 |
+
gr.Textbox(type="text", value=2, label="Answer from Generative Model"),
|
193 |
+
gr.Textbox(type="text", value=3, label="Answer from SQuAD model"),
|
194 |
],
|
195 |
title = "20NewsGroup_QA",
|
196 |
description ="Returns answer from 20NewsGroup dataset")
|