Commit
·
6830ff4
1
Parent(s):
a474698
Create new file
Browse files
app.py
ADDED
@@ -0,0 +1,176 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
os.system('pip install paddlepaddle')
|
3 |
+
os.system('pip install paddleocr')
|
4 |
+
from paddleocr import PaddleOCR, draw_ocr
|
5 |
+
from PIL import Image
|
6 |
+
import gradio as gr
|
7 |
+
import torch
|
8 |
+
|
9 |
+
torch.hub.download_url_to_file('https://i.imgur.com/aqMBT0i.jpg', 'example.jpg')
|
10 |
+
|
11 |
+
def inference(img, lang):
|
12 |
+
ocr = PaddleOCR(use_angle_cls=True, lang=lang,use_gpu=False)
|
13 |
+
img_path = img.name
|
14 |
+
result = ocr.ocr(img_path, cls=True)
|
15 |
+
image = Image.open(img_path).convert('RGB')
|
16 |
+
boxes = [line[0] for line in result]
|
17 |
+
txts = [line[1][0] for line in result]
|
18 |
+
# scores = [line[1][1] for line in result]
|
19 |
+
im_show = draw_ocr(image, boxes, txts,
|
20 |
+
font_path='simfang.ttf')
|
21 |
+
im_show = Image.fromarray(im_show)
|
22 |
+
im_show.save('result.jpg')
|
23 |
+
return 'result.jpg'
|
24 |
+
|
25 |
+
title = 'A Framework for Data-Driven Document Evaluation and scoring - Image to Text Extraction '
|
26 |
+
description = 'Demo for Optical character recognition(OCR)'
|
27 |
+
article = ""
|
28 |
+
examples = [['example.jpg','en']]
|
29 |
+
css = ".output_image, .input_image {height: 40rem !important; width: 100% !important;}"
|
30 |
+
gr.Interface(
|
31 |
+
inference,
|
32 |
+
[gr.inputs.Image(type='file', label='Input'),gr.inputs.Dropdown(choices=['ch', 'en', 'fr', 'german', 'korean', 'japan'], type="value", default='en', label='language')],
|
33 |
+
gr.outputs.Image(type='file', label='Output'),
|
34 |
+
title=title,
|
35 |
+
description=description,
|
36 |
+
article=article,
|
37 |
+
examples=examples,
|
38 |
+
css=css,
|
39 |
+
enable_queue=True
|
40 |
+
).launch(debug=True)
|
41 |
+
|
42 |
+
|
43 |
+
##########################################################################################################
|
44 |
+
|
45 |
+
import os
|
46 |
+
import gradio as gr
|
47 |
+
from huggingface_hub import snapshot_download
|
48 |
+
from prettytable import PrettyTable
|
49 |
+
import pandas as pd
|
50 |
+
import torch
|
51 |
+
import traceback
|
52 |
+
|
53 |
+
config = {
|
54 |
+
"model_type": "roberta",
|
55 |
+
"model_name_or_path": "roberta-large",
|
56 |
+
"logic_lambda": 0.5,
|
57 |
+
"prior": "random",
|
58 |
+
"mask_rate": 0.0,
|
59 |
+
"cand_k": 1,
|
60 |
+
"max_seq1_length": 256,
|
61 |
+
"max_seq2_length": 128,
|
62 |
+
"max_num_questions": 8,
|
63 |
+
"do_lower_case": False,
|
64 |
+
"seed": 42,
|
65 |
+
"n_gpu": torch.cuda.device_count(),
|
66 |
+
}
|
67 |
+
|
68 |
+
os.system('git clone https://github.com/kkpathak91/project_metch/')
|
69 |
+
os.system('rm -r project_metch/data/')
|
70 |
+
os.system('rm -r project_metch/results/')
|
71 |
+
os.system('rm -r project_metch/models/')
|
72 |
+
os.system('mv project_metch/* ./')
|
73 |
+
|
74 |
+
model_dir = snapshot_download('kkpathak91/FVM')
|
75 |
+
config['fc_dir'] = os.path.join(model_dir, 'fact_checking/roberta-large/')
|
76 |
+
config['mrc_dir'] = os.path.join(model_dir, 'mrc_seq2seq/bart-base/')
|
77 |
+
config['er_dir'] = os.path.join(model_dir, 'evidence_retrieval/')
|
78 |
+
|
79 |
+
|
80 |
+
from src.loren import Loren
|
81 |
+
|
82 |
+
|
83 |
+
loren = Loren(config, verbose=False)
|
84 |
+
try:
|
85 |
+
js = loren.check('Donald Trump won the 2020 U.S. presidential election.')
|
86 |
+
except Exception as e:
|
87 |
+
raise ValueError(e)
|
88 |
+
|
89 |
+
|
90 |
+
def highlight_phrase(text, phrase):
|
91 |
+
text = loren.fc_client.tokenizer.clean_up_tokenization(text)
|
92 |
+
return text.replace('<mask>', f'<i><b>{phrase}</b></i>')
|
93 |
+
|
94 |
+
|
95 |
+
def highlight_entity(text, entity):
|
96 |
+
return text.replace(entity, f'<i><b>{entity}</b></i>')
|
97 |
+
|
98 |
+
|
99 |
+
def gradio_formatter(js, output_type):
|
100 |
+
zebra_css = '''
|
101 |
+
tr:nth-child(even) {
|
102 |
+
background: #f1f1f1;
|
103 |
+
}
|
104 |
+
thead{
|
105 |
+
background: #f1f1f1;
|
106 |
+
}'''
|
107 |
+
if output_type == 'e':
|
108 |
+
data = {'Evidence': [highlight_entity(x, e) for x, e in zip(js['evidence'], js['entities'])]}
|
109 |
+
elif output_type == 'z':
|
110 |
+
p_sup, p_ref, p_nei = [], [], []
|
111 |
+
for x in js['phrase_veracity']:
|
112 |
+
max_idx = torch.argmax(torch.tensor(x)).tolist()
|
113 |
+
x = ['%.4f' % xx for xx in x]
|
114 |
+
x[max_idx] = f'<i><b>{x[max_idx]}</b></i>'
|
115 |
+
p_sup.append(x[2])
|
116 |
+
p_ref.append(x[0])
|
117 |
+
p_nei.append(x[1])
|
118 |
+
|
119 |
+
data = {
|
120 |
+
'Claim Phrase': js['claim_phrases'],
|
121 |
+
'Local Premise': [highlight_phrase(q, x[0]) for q, x in zip(js['cloze_qs'], js['evidential'])],
|
122 |
+
'p_SUP': p_sup,
|
123 |
+
'p_REF': p_ref,
|
124 |
+
'p_NEI': p_nei,
|
125 |
+
}
|
126 |
+
else:
|
127 |
+
raise NotImplementedError
|
128 |
+
data = pd.DataFrame(data)
|
129 |
+
pt = PrettyTable(field_names=list(data.columns),
|
130 |
+
align='l', border=True, hrules=1, vrules=1)
|
131 |
+
for v in data.values:
|
132 |
+
pt.add_row(v)
|
133 |
+
html = pt.get_html_string(attributes={
|
134 |
+
'style': 'border-width: 2px; bordercolor: black'
|
135 |
+
}, format=True)
|
136 |
+
html = f'<head> <style type="text/css"> {zebra_css} </style> </head>\n' + html
|
137 |
+
html = html.replace('<', '<').replace('>', '>')
|
138 |
+
return html
|
139 |
+
|
140 |
+
|
141 |
+
def run(claim):
|
142 |
+
try:
|
143 |
+
js = loren.check(claim)
|
144 |
+
except Exception as error_msg:
|
145 |
+
exc = traceback.format_exc()
|
146 |
+
msg = f'[Error]: {error_msg}.\n[Traceback]: {exc}'
|
147 |
+
loren.logger.error(claim)
|
148 |
+
loren.logger.error(msg)
|
149 |
+
return 'Oops, something went wrong.', '', ''
|
150 |
+
label = js['claim_veracity']
|
151 |
+
loren.logger.warning(label + str(js))
|
152 |
+
ev_html = gradio_formatter(js, 'e')
|
153 |
+
z_html = gradio_formatter(js, 'z')
|
154 |
+
return label, z_html, ev_html
|
155 |
+
|
156 |
+
|
157 |
+
iface = gr.Interface(
|
158 |
+
fn=run,
|
159 |
+
inputs="text",
|
160 |
+
outputs=[
|
161 |
+
'text',
|
162 |
+
'html',
|
163 |
+
'html',
|
164 |
+
],
|
165 |
+
examples=['Kanpur is a city in Nepal',
|
166 |
+
'PV Sindhu is an Indian Badminton Player.'],
|
167 |
+
title="A Framework for Data-Driven Document Evaluation and Scoring",
|
168 |
+
layout='horizontal',
|
169 |
+
description="[Student Name: Karan Kumar Pathak] " " [Roll No.: 2020fc04334] ",
|
170 |
+
flagging_dir='results/flagged/',
|
171 |
+
allow_flagging=True,
|
172 |
+
flagging_options=['Interesting!', 'Error: Claim Phrase Parsing', 'Error: Local Premise',
|
173 |
+
'Error: Require Commonsense', 'Error: Evidence Retrieval'],
|
174 |
+
enable_queue=True
|
175 |
+
)
|
176 |
+
iface.launch()
|