Spaces:
Runtime error
Runtime error
Create app.py
Browse files
app.py
ADDED
@@ -0,0 +1,149 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import gradio as gr
|
2 |
+
import tensorflow as tf
|
3 |
+
import numpy as np
|
4 |
+
import pickle
|
5 |
+
# Load model, including its weights and the optimizer
|
6 |
+
model = tf.keras.models.load_model('core4.h5')
|
7 |
+
# load tokenizer
|
8 |
+
with open('tokenizer.pickle', 'rb') as handle:
|
9 |
+
tokenize = pickle.load(handle)
|
10 |
+
text_labels = ['How to apply', 'how much can I get', 'who can apply']
|
11 |
+
# model.summary() # model architecture
|
12 |
+
def greet(string):
|
13 |
+
tokenizedText = tokenize.texts_to_matrix([string])
|
14 |
+
prediction = model.predict(np.array([tokenizedText[0]]))
|
15 |
+
predicted_label = text_labels[np.argmax(prediction)]
|
16 |
+
print(prediction[0][np.argmax(prediction)])
|
17 |
+
print("Predicted label: " + predicted_label + "\n")
|
18 |
+
|
19 |
+
###################
|
20 |
+
import requests as rs
|
21 |
+
import pandas as pd
|
22 |
+
|
23 |
+
spreadsheet_id = '1vjWnYsnGc0J6snT67NVbA-NWSGZ5b0eDBVHmg9lbf9s' # Please set the Spreadsheet ID.
|
24 |
+
csv_url='https://docs.google.com/spreadsheets/d/' + spreadsheet_id + '/export?format=csv&id=' + spreadsheet_id + '&gid=0'
|
25 |
+
|
26 |
+
res=rs.get(url=csv_url)
|
27 |
+
open('google.csv', 'wb').write(res.content)
|
28 |
+
df = pd.read_csv('google.csv')
|
29 |
+
|
30 |
+
import json
|
31 |
+
import requests
|
32 |
+
|
33 |
+
spreadsheet_id = '1vjWnYsnGc0J6snT67NVbA-NWSGZ5b0eDBVHmg9lbf9s' # Please set the Spreadsheet ID.
|
34 |
+
url = 'https://script.google.com/macros/s/AKfycbwXP5fsDgOXJ9biZQC293o6bTBL3kDOJ07PlmxKjabzdTej6WYdC8Yos6NpDVqAJeVM/exec?spreadsheetId=' + spreadsheet_id
|
35 |
+
body = {
|
36 |
+
"arguments": {"range": "Sheet1!A"+str(len(df)+2), "valueInputOption": "USER_ENTERED"},
|
37 |
+
"body": {"values": [[string]]}
|
38 |
+
}
|
39 |
+
res = requests.post(url, json.dumps(body), headers={'Content-Type': 'application/json'})
|
40 |
+
|
41 |
+
body = {
|
42 |
+
"arguments": {"range": "Sheet1!B"+str(len(df)+2), "valueInputOption": "USER_ENTERED"},
|
43 |
+
"body": {"values": [[predicted_label]]}
|
44 |
+
}
|
45 |
+
res = requests.post(url, json.dumps(body), headers={'Content-Type': 'application/json'})
|
46 |
+
|
47 |
+
import datetime
|
48 |
+
current_time = datetime.datetime.now()
|
49 |
+
body = {
|
50 |
+
"arguments": {"range": "Sheet1!C"+str(len(df)+2), "valueInputOption": "USER_ENTERED"},
|
51 |
+
"body": {"values": [[str(current_time)]]}
|
52 |
+
}
|
53 |
+
res = requests.post(url, json.dumps(body), headers={'Content-Type': 'application/json'})
|
54 |
+
#print(res.text)
|
55 |
+
#######################
|
56 |
+
return predicted_label
|
57 |
+
#One testing case
|
58 |
+
|
59 |
+
###################################################
|
60 |
+
import gradio as gr
|
61 |
+
from transformers import pipeline
|
62 |
+
from datetime import datetime
|
63 |
+
import pandas as pd
|
64 |
+
import requests
|
65 |
+
from bs4 import BeautifulSoup
|
66 |
+
import re
|
67 |
+
benefits = [
|
68 |
+
{"benefitName": "Universal Credit", "coreName": "what is this benefit", "link": "https://www.gov.uk/universal-credit/"},
|
69 |
+
{"benefitName": "Universal Credit", "coreName": "who can apply", "link": "https://www.gov.uk/universal-credit/eligibility"},
|
70 |
+
{"benefitName": "Universal Credit", "coreName": "how much can I get", "link": "https://www.gov.uk/universal-credit/what-youll-get,https://www.gov.uk/universal-credit/how-youre-paid"},
|
71 |
+
{"benefitName": "Universal Credit", "coreName": "How to apply", "link": "https://www.gov.uk/universal-credit/how-to-claim"}
|
72 |
+
]
|
73 |
+
def requestPage(link):
|
74 |
+
page = requests.get(link)
|
75 |
+
# print(page.text)
|
76 |
+
soup = BeautifulSoup(page.content, "html.parser")
|
77 |
+
return soup
|
78 |
+
|
79 |
+
def scrapeTable(table):
|
80 |
+
columns = [col.text.strip() for col in table.thead.tr.find_all()]
|
81 |
+
columns
|
82 |
+
rows = table.tbody.find_all(recursive=False)
|
83 |
+
clean_rows = ""
|
84 |
+
for row in rows:
|
85 |
+
elements = ["{}: {}".format(columns[index], element.text.strip()) for index, element in enumerate(row.find_all(recursive=False))]
|
86 |
+
elements = " ".join(elements)
|
87 |
+
# print(elements)
|
88 |
+
clean_rows += elements + "\n"
|
89 |
+
return clean_rows
|
90 |
+
def scrapePage(page):
|
91 |
+
# Scrape the text
|
92 |
+
corpus = ""
|
93 |
+
# starting from the main page
|
94 |
+
content = page.find('div', {"id":"guide-contents"})
|
95 |
+
title = content.find('h1', {"class":"part-title"})
|
96 |
+
title = title.text.strip()
|
97 |
+
corpus += title +"\n\n"
|
98 |
+
print(title)
|
99 |
+
content = content.find('div', {"class":"gem-c-govspeak"})
|
100 |
+
fragments = content.find_all(recursive=False)
|
101 |
+
for frag in fragments:
|
102 |
+
text= frag.text.strip()
|
103 |
+
if frag.name == 'ul':
|
104 |
+
clean = re.sub('\n+', "{;}", text)
|
105 |
+
corpus += "{;}" + clean
|
106 |
+
elif frag.name == 'table':
|
107 |
+
corpus += scrapeTable(frag)
|
108 |
+
else:
|
109 |
+
corpus += text
|
110 |
+
corpus += "\n"
|
111 |
+
# print(corpus)
|
112 |
+
return corpus
|
113 |
+
|
114 |
+
|
115 |
+
for benefit in benefits:
|
116 |
+
links = benefit['link'].split(',')
|
117 |
+
print(benefit['benefitName'], benefit['coreName'], len(links))
|
118 |
+
context = ""
|
119 |
+
for link in links:
|
120 |
+
page = requestPage(link)
|
121 |
+
context += scrapePage(page)
|
122 |
+
benefit['context'] = context
|
123 |
+
benefit['contextLen'] = len(context)
|
124 |
+
print("--------------------------------")
|
125 |
+
benefitsClasses = list(set(list(map(lambda x: x['benefitName'], benefits))))
|
126 |
+
core4Classes = list(set(list(map(lambda x: x['coreName'], benefits))))
|
127 |
+
# contexts
|
128 |
+
benefitsClasses, core4Classes
|
129 |
+
question_answerer = pipeline("question-answering")
|
130 |
+
coreName = 'how much can I get'
|
131 |
+
def testQA(question):
|
132 |
+
predictedBenefit = "Universal Credit"
|
133 |
+
predictedCore = coreName
|
134 |
+
time = datetime.now()
|
135 |
+
context = list(filter(lambda x: x['benefitName']==predictedBenefit and x['coreName']==predictedCore, benefits))[0]
|
136 |
+
answer = question_answerer(question = question, context = context['context'])['answer']
|
137 |
+
time3 = (datetime.now() - time).total_seconds()
|
138 |
+
return answer
|
139 |
+
|
140 |
+
iface = gr.Interface(fn=testQA, inputs="text", outputs="text")
|
141 |
+
iface.launch()
|
142 |
+
|
143 |
+
|
144 |
+
|
145 |
+
|
146 |
+
|
147 |
+
|
148 |
+
iface = gr.Interface(fn=greet, inputs="text", outputs="text")
|
149 |
+
iface.launch()
|