walter1 commited on
Commit
62ab214
·
1 Parent(s): 011010f

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +149 -0
app.py ADDED
@@ -0,0 +1,149 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import tensorflow as tf
3
+ import numpy as np
4
+ import pickle
5
+ # Load model, including its weights and the optimizer
6
+ model = tf.keras.models.load_model('core4.h5')
7
+ # load tokenizer
8
+ with open('tokenizer.pickle', 'rb') as handle:
9
+ tokenize = pickle.load(handle)
10
+ text_labels = ['How to apply', 'how much can I get', 'who can apply']
11
+ # model.summary() # model architecture
12
+ def greet(string):
13
+ tokenizedText = tokenize.texts_to_matrix([string])
14
+ prediction = model.predict(np.array([tokenizedText[0]]))
15
+ predicted_label = text_labels[np.argmax(prediction)]
16
+ print(prediction[0][np.argmax(prediction)])
17
+ print("Predicted label: " + predicted_label + "\n")
18
+
19
+ ###################
20
+ import requests as rs
21
+ import pandas as pd
22
+
23
+ spreadsheet_id = '1vjWnYsnGc0J6snT67NVbA-NWSGZ5b0eDBVHmg9lbf9s' # Please set the Spreadsheet ID.
24
+ csv_url='https://docs.google.com/spreadsheets/d/' + spreadsheet_id + '/export?format=csv&id=' + spreadsheet_id + '&gid=0'
25
+
26
+ res=rs.get(url=csv_url)
27
+ open('google.csv', 'wb').write(res.content)
28
+ df = pd.read_csv('google.csv')
29
+
30
+ import json
31
+ import requests
32
+
33
+ spreadsheet_id = '1vjWnYsnGc0J6snT67NVbA-NWSGZ5b0eDBVHmg9lbf9s' # Please set the Spreadsheet ID.
34
+ url = 'https://script.google.com/macros/s/AKfycbwXP5fsDgOXJ9biZQC293o6bTBL3kDOJ07PlmxKjabzdTej6WYdC8Yos6NpDVqAJeVM/exec?spreadsheetId=' + spreadsheet_id
35
+ body = {
36
+ "arguments": {"range": "Sheet1!A"+str(len(df)+2), "valueInputOption": "USER_ENTERED"},
37
+ "body": {"values": [[string]]}
38
+ }
39
+ res = requests.post(url, json.dumps(body), headers={'Content-Type': 'application/json'})
40
+
41
+ body = {
42
+ "arguments": {"range": "Sheet1!B"+str(len(df)+2), "valueInputOption": "USER_ENTERED"},
43
+ "body": {"values": [[predicted_label]]}
44
+ }
45
+ res = requests.post(url, json.dumps(body), headers={'Content-Type': 'application/json'})
46
+
47
+ import datetime
48
+ current_time = datetime.datetime.now()
49
+ body = {
50
+ "arguments": {"range": "Sheet1!C"+str(len(df)+2), "valueInputOption": "USER_ENTERED"},
51
+ "body": {"values": [[str(current_time)]]}
52
+ }
53
+ res = requests.post(url, json.dumps(body), headers={'Content-Type': 'application/json'})
54
+ #print(res.text)
55
+ #######################
56
+ return predicted_label
57
+ #One testing case
58
+
59
+ ###################################################
60
+ import gradio as gr
61
+ from transformers import pipeline
62
+ from datetime import datetime
63
+ import pandas as pd
64
+ import requests
65
+ from bs4 import BeautifulSoup
66
+ import re
67
+ benefits = [
68
+ {"benefitName": "Universal Credit", "coreName": "what is this benefit", "link": "https://www.gov.uk/universal-credit/"},
69
+ {"benefitName": "Universal Credit", "coreName": "who can apply", "link": "https://www.gov.uk/universal-credit/eligibility"},
70
+ {"benefitName": "Universal Credit", "coreName": "how much can I get", "link": "https://www.gov.uk/universal-credit/what-youll-get,https://www.gov.uk/universal-credit/how-youre-paid"},
71
+ {"benefitName": "Universal Credit", "coreName": "How to apply", "link": "https://www.gov.uk/universal-credit/how-to-claim"}
72
+ ]
73
+ def requestPage(link):
74
+ page = requests.get(link)
75
+ # print(page.text)
76
+ soup = BeautifulSoup(page.content, "html.parser")
77
+ return soup
78
+
79
+ def scrapeTable(table):
80
+ columns = [col.text.strip() for col in table.thead.tr.find_all()]
81
+ columns
82
+ rows = table.tbody.find_all(recursive=False)
83
+ clean_rows = ""
84
+ for row in rows:
85
+ elements = ["{}: {}".format(columns[index], element.text.strip()) for index, element in enumerate(row.find_all(recursive=False))]
86
+ elements = " ".join(elements)
87
+ # print(elements)
88
+ clean_rows += elements + "\n"
89
+ return clean_rows
90
+ def scrapePage(page):
91
+ # Scrape the text
92
+ corpus = ""
93
+ # starting from the main page
94
+ content = page.find('div', {"id":"guide-contents"})
95
+ title = content.find('h1', {"class":"part-title"})
96
+ title = title.text.strip()
97
+ corpus += title +"\n\n"
98
+ print(title)
99
+ content = content.find('div', {"class":"gem-c-govspeak"})
100
+ fragments = content.find_all(recursive=False)
101
+ for frag in fragments:
102
+ text= frag.text.strip()
103
+ if frag.name == 'ul':
104
+ clean = re.sub('\n+', "{;}", text)
105
+ corpus += "{;}" + clean
106
+ elif frag.name == 'table':
107
+ corpus += scrapeTable(frag)
108
+ else:
109
+ corpus += text
110
+ corpus += "\n"
111
+ # print(corpus)
112
+ return corpus
113
+
114
+
115
+ for benefit in benefits:
116
+ links = benefit['link'].split(',')
117
+ print(benefit['benefitName'], benefit['coreName'], len(links))
118
+ context = ""
119
+ for link in links:
120
+ page = requestPage(link)
121
+ context += scrapePage(page)
122
+ benefit['context'] = context
123
+ benefit['contextLen'] = len(context)
124
+ print("--------------------------------")
125
+ benefitsClasses = list(set(list(map(lambda x: x['benefitName'], benefits))))
126
+ core4Classes = list(set(list(map(lambda x: x['coreName'], benefits))))
127
+ # contexts
128
+ benefitsClasses, core4Classes
129
+ question_answerer = pipeline("question-answering")
130
+ coreName = 'how much can I get'
131
+ def testQA(question):
132
+ predictedBenefit = "Universal Credit"
133
+ predictedCore = coreName
134
+ time = datetime.now()
135
+ context = list(filter(lambda x: x['benefitName']==predictedBenefit and x['coreName']==predictedCore, benefits))[0]
136
+ answer = question_answerer(question = question, context = context['context'])['answer']
137
+ time3 = (datetime.now() - time).total_seconds()
138
+ return answer
139
+
140
+ iface = gr.Interface(fn=testQA, inputs="text", outputs="text")
141
+ iface.launch()
142
+
143
+
144
+
145
+
146
+
147
+
148
+ iface = gr.Interface(fn=greet, inputs="text", outputs="text")
149
+ iface.launch()