import gradio as gr import tensorflow as tf import numpy as np import pickle # Load model, including its weights and the optimizer model = tf.keras.models.load_model('core4.h5') # load tokenizer with open('tokenizer.pickle', 'rb') as handle: tokenize = pickle.load(handle) text_labels = ['How to apply', 'how much can I get', 'who can apply'] # model.summary() # model architecture def greet(string): tokenizedText = tokenize.texts_to_matrix([string]) prediction = model.predict(np.array([tokenizedText[0]])) predicted_label = text_labels[np.argmax(prediction)] print(prediction[0][np.argmax(prediction)]) print("Predicted label: " + predicted_label + "\n") ################### import requests as rs import pandas as pd spreadsheet_id = '1vjWnYsnGc0J6snT67NVbA-NWSGZ5b0eDBVHmg9lbf9s' # Please set the Spreadsheet ID. csv_url='https://docs.google.com/spreadsheets/d/' + spreadsheet_id + '/export?format=csv&id=' + spreadsheet_id + '&gid=0' res=rs.get(url=csv_url) open('google.csv', 'wb').write(res.content) df = pd.read_csv('google.csv') import json import requests spreadsheet_id = '1vjWnYsnGc0J6snT67NVbA-NWSGZ5b0eDBVHmg9lbf9s' # Please set the Spreadsheet ID. url = 'https://script.google.com/macros/s/AKfycbwXP5fsDgOXJ9biZQC293o6bTBL3kDOJ07PlmxKjabzdTej6WYdC8Yos6NpDVqAJeVM/exec?spreadsheetId=' + spreadsheet_id body = { "arguments": {"range": "Sheet1!A"+str(len(df)+2), "valueInputOption": "USER_ENTERED"}, "body": {"values": [[string]]} } res = requests.post(url, json.dumps(body), headers={'Content-Type': 'application/json'}) body = { "arguments": {"range": "Sheet1!B"+str(len(df)+2), "valueInputOption": "USER_ENTERED"}, "body": {"values": [[predicted_label]]} } res = requests.post(url, json.dumps(body), headers={'Content-Type': 'application/json'}) import datetime current_time = datetime.datetime.now() body = { "arguments": {"range": "Sheet1!C"+str(len(df)+2), "valueInputOption": "USER_ENTERED"}, "body": {"values": [[str(current_time)]]} } res = requests.post(url, json.dumps(body), headers={'Content-Type': 'application/json'}) #print(res.text) ####################### return predicted_label #One testing case ################################################### import gradio as gr from transformers import pipeline from datetime import datetime import pandas as pd import requests from bs4 import BeautifulSoup import re benefits = [ {"benefitName": "Universal Credit", "coreName": "what is this benefit", "link": "https://www.gov.uk/universal-credit/"}, {"benefitName": "Universal Credit", "coreName": "who can apply", "link": "https://www.gov.uk/universal-credit/eligibility"}, {"benefitName": "Universal Credit", "coreName": "how much can I get", "link": "https://www.gov.uk/universal-credit/what-youll-get,https://www.gov.uk/universal-credit/how-youre-paid"}, {"benefitName": "Universal Credit", "coreName": "How to apply", "link": "https://www.gov.uk/universal-credit/how-to-claim"} ] def requestPage(link): page = requests.get(link) # print(page.text) soup = BeautifulSoup(page.content, "html.parser") return soup def scrapeTable(table): columns = [col.text.strip() for col in table.thead.tr.find_all()] columns rows = table.tbody.find_all(recursive=False) clean_rows = "" for row in rows: elements = ["{}: {}".format(columns[index], element.text.strip()) for index, element in enumerate(row.find_all(recursive=False))] elements = " ".join(elements) # print(elements) clean_rows += elements + "\n" return clean_rows def scrapePage(page): # Scrape the text corpus = "" # starting from the main page content = page.find('div', {"id":"guide-contents"}) title = content.find('h1', {"class":"part-title"}) title = title.text.strip() corpus += title +"\n\n" print(title) content = content.find('div', {"class":"gem-c-govspeak"}) fragments = content.find_all(recursive=False) for frag in fragments: text= frag.text.strip() if frag.name == 'ul': clean = re.sub('\n+', "{;}", text) corpus += "{;}" + clean elif frag.name == 'table': corpus += scrapeTable(frag) else: corpus += text corpus += "\n" # print(corpus) return corpus for benefit in benefits: links = benefit['link'].split(',') print(benefit['benefitName'], benefit['coreName'], len(links)) context = "" for link in links: page = requestPage(link) context += scrapePage(page) benefit['context'] = context benefit['contextLen'] = len(context) print("--------------------------------") benefitsClasses = list(set(list(map(lambda x: x['benefitName'], benefits)))) core4Classes = list(set(list(map(lambda x: x['coreName'], benefits)))) # contexts benefitsClasses, core4Classes question_answerer = pipeline("question-answering") def testQA(question): predictedBenefit = "Universal Credit" coreName = greet(string) predictedCore = coreName time = datetime.now() context = list(filter(lambda x: x['benefitName']==predictedBenefit and x['coreName']==predictedCore, benefits))[0] answer = question_answerer(question = question, context = context['context'])['answer'] time3 = (datetime.now() - time).total_seconds() return coreName + ': ' + answer iface = gr.Interface(fn=testQA, inputs="text", outputs="text") iface.launch()