Spaces:
Runtime error
Runtime error
import gradio as gr | |
import tensorflow as tf | |
import numpy as np | |
import pickle | |
import pandas as pd | |
import json | |
import datetime | |
from transformers import pipeline | |
import requests | |
from bs4 import BeautifulSoup | |
import re | |
# Load model, including its weights and the optimizer | |
model = tf.keras.models.load_model('core4.h5') | |
# load tokenizer | |
with open('tokenizer.pickle', 'rb') as handle: | |
tokenize = pickle.load(handle) | |
text_labels = ['How to apply', 'how much can I get', 'who can apply'] | |
# model.summary() # model architecture | |
def greet(string): | |
tokenizedText = tokenize.texts_to_matrix([string]) | |
prediction = model.predict(np.array([tokenizedText[0]])) | |
predicted_label = text_labels[np.argmax(prediction)] | |
print(prediction[0][np.argmax(prediction)]) | |
print("Predicted label: " + predicted_label + "\n") | |
return predicted_label | |
################################################### | |
benefits = [ | |
{"benefitName": "Universal Credit", "coreName": "what is this benefit", "link": "https://www.gov.uk/universal-credit/"}, | |
{"benefitName": "Universal Credit", "coreName": "who can apply", "link": "https://www.gov.uk/universal-credit/eligibility"}, | |
{"benefitName": "Universal Credit", "coreName": "how much can I get", "link": "https://www.gov.uk/universal-credit/what-youll-get,https://www.gov.uk/universal-credit/how-youre-paid"}, | |
{"benefitName": "Universal Credit", "coreName": "How to apply", "link": "https://www.gov.uk/universal-credit/how-to-claim"} | |
] | |
def requestPage(link): | |
page = requests.get(link) | |
# print(page.text) | |
soup = BeautifulSoup(page.content, "html.parser") | |
return soup | |
def scrapeTable(table): | |
columns = [col.text.strip() for col in table.thead.tr.find_all()] | |
columns | |
rows = table.tbody.find_all(recursive=False) | |
clean_rows = "" | |
for row in rows: | |
elements = ["{}: {}".format(columns[index], element.text.strip()) for index, element in enumerate(row.find_all(recursive=False))] | |
elements = " ".join(elements) | |
# print(elements) | |
clean_rows += elements + "\n" | |
return clean_rows | |
def scrapePage(page): | |
# Scrape the text | |
corpus = "" | |
# starting from the main page | |
content = page.find('div', {"id":"guide-contents"}) | |
title = content.find('h1', {"class":"part-title"}) | |
title = title.text.strip() | |
corpus += title +"\n\n" | |
print(title) | |
content = content.find('div', {"class":"gem-c-govspeak"}) | |
fragments = content.find_all(recursive=False) | |
for frag in fragments: | |
text= frag.text.strip() | |
if frag.name == 'ul': | |
clean = re.sub('\n+', "{;}", text) | |
corpus += "{;}" + clean | |
elif frag.name == 'table': | |
corpus += scrapeTable(frag) | |
else: | |
corpus += text | |
corpus += "\n" | |
# print(corpus) | |
return corpus | |
for benefit in benefits: | |
links = benefit['link'].split(',') | |
print(benefit['benefitName'], benefit['coreName'], len(links)) | |
context = "" | |
for link in links: | |
page = requestPage(link) | |
context += scrapePage(page) | |
benefit['context'] = context | |
benefit['contextLen'] = len(context) | |
print("--------------------------------") | |
benefitsClasses = list(set(list(map(lambda x: x['benefitName'], benefits)))) | |
core4Classes = list(set(list(map(lambda x: x['coreName'], benefits)))) | |
# contexts | |
benefitsClasses, core4Classes | |
question_answerer = pipeline("question-answering") | |
def testQA(question): | |
predictedBenefit = "Universal Credit" | |
coreName = greet(question) | |
predictedCore = coreName | |
#time = datetime.now() | |
context = list(filter(lambda x: x['benefitName']==predictedBenefit and x['coreName']==predictedCore, benefits))[0] | |
answer = question_answerer(question = question, context = context['context'])['answer'] | |
#time3 = (datetime.now() - time).total_seconds() | |
################### add to the google sheet | |
spreadsheet_id = '1vjWnYsnGc0J6snT67NVbA-NWSGZ5b0eDBVHmg9lbf9s' # Please set the Spreadsheet ID. | |
csv_url='https://docs.google.com/spreadsheets/d/' + spreadsheet_id + '/export?format=csv&id=' + spreadsheet_id + '&gid=0' | |
res=requests.get(url=csv_url) | |
open('google.csv', 'wb').write(res.content) | |
df = pd.read_csv('google.csv') | |
url = 'https://script.google.com/macros/s/AKfycbwXP5fsDgOXJ9biZQC293o6bTBL3kDOJ07PlmxKjabzdTej6WYdC8Yos6NpDVqAJeVM/exec?spreadsheetId=' + spreadsheet_id | |
body = { | |
"arguments": {"range": "Sheet1!A"+str(len(df)+2), "valueInputOption": "USER_ENTERED"}, | |
"body": {"values": [[question]]} | |
} | |
res = requests.post(url, json.dumps(body), headers={'Content-Type': 'application/json'}) | |
body = { | |
"arguments": {"range": "Sheet1!B"+str(len(df)+2), "valueInputOption": "USER_ENTERED"}, | |
"body": {"values": [[coreName]]} | |
} | |
res = requests.post(url, json.dumps(body), headers={'Content-Type': 'application/json'}) | |
body = { | |
"arguments": {"range": "Sheet1!C"+str(len(df)+2), "valueInputOption": "USER_ENTERED"}, | |
"body": {"values": [[answer]]} | |
} | |
res = requests.post(url, json.dumps(body), headers={'Content-Type': 'application/json'}) | |
current_time = datetime.datetime.now() | |
body = { | |
"arguments": {"range": "Sheet1!D"+str(len(df)+2), "valueInputOption": "USER_ENTERED"}, | |
"body": {"values": [[str(current_time)]]} | |
} | |
res = requests.post(url, json.dumps(body), headers={'Content-Type': 'application/json'}) | |
#print(res.text) | |
####################### | |
output = coreName + ': ' + answer | |
return output | |
iface = gr.Interface(fn=testQA, inputs="text", outputs="text") | |
iface.launch() | |