Spaces:

kkpathak91
/

Mtech_Project_Image_processing_and_Fact_verification

Runtime error

App Files Files Community

Mtech_Project_Image_processing_and_Fact_verification / app.py

kkpathak91

Create new file

6830ff4 almost 3 years ago

raw

history blame

5.67 kB

	import os
	os.system('pip install paddlepaddle')
	os.system('pip install paddleocr')
	from paddleocr import PaddleOCR, draw_ocr
	from PIL import Image
	import gradio as gr
	import torch

	torch.hub.download_url_to_file('https://i.imgur.com/aqMBT0i.jpg', 'example.jpg')

	def inference(img, lang):
	ocr = PaddleOCR(use_angle_cls=True, lang=lang,use_gpu=False)
	img_path = img.name
	result = ocr.ocr(img_path, cls=True)
	image = Image.open(img_path).convert('RGB')
	boxes = [line[0] for line in result]
	txts = [line[1][0] for line in result]
	# scores = [line[1][1] for line in result]
	im_show = draw_ocr(image, boxes, txts,
	font_path='simfang.ttf')
	im_show = Image.fromarray(im_show)
	im_show.save('result.jpg')
	return 'result.jpg'

	title = 'A Framework for Data-Driven Document Evaluation and scoring - Image to Text Extraction '
	description = 'Demo for Optical character recognition(OCR)'
	article = ""
	examples = [['example.jpg','en']]
	css = ".output_image, .input_image {height: 40rem !important; width: 100% !important;}"
	gr.Interface(
	inference,
	[gr.inputs.Image(type='file', label='Input'),gr.inputs.Dropdown(choices=['ch', 'en', 'fr', 'german', 'korean', 'japan'], type="value", default='en', label='language')],
	gr.outputs.Image(type='file', label='Output'),
	title=title,
	description=description,
	article=article,
	examples=examples,
	css=css,
	enable_queue=True
	).launch(debug=True)


	##########################################################################################################

	import os
	import gradio as gr
	from huggingface_hub import snapshot_download
	from prettytable import PrettyTable
	import pandas as pd
	import torch
	import traceback

	config = {
	"model_type": "roberta",
	"model_name_or_path": "roberta-large",
	"logic_lambda": 0.5,
	"prior": "random",
	"mask_rate": 0.0,
	"cand_k": 1,
	"max_seq1_length": 256,
	"max_seq2_length": 128,
	"max_num_questions": 8,
	"do_lower_case": False,
	"seed": 42,
	"n_gpu": torch.cuda.device_count(),
	}

	os.system('git clone https://github.com/kkpathak91/project_metch/')
	os.system('rm -r project_metch/data/')
	os.system('rm -r project_metch/results/')
	os.system('rm -r project_metch/models/')
	os.system('mv project_metch/* ./')

	model_dir = snapshot_download('kkpathak91/FVM')
	config['fc_dir'] = os.path.join(model_dir, 'fact_checking/roberta-large/')
	config['mrc_dir'] = os.path.join(model_dir, 'mrc_seq2seq/bart-base/')
	config['er_dir'] = os.path.join(model_dir, 'evidence_retrieval/')


	from src.loren import Loren


	loren = Loren(config, verbose=False)
	try:
	js = loren.check('Donald Trump won the 2020 U.S. presidential election.')
	except Exception as e:
	raise ValueError(e)


	def highlight_phrase(text, phrase):
	text = loren.fc_client.tokenizer.clean_up_tokenization(text)
	return text.replace('<mask>', f'<i><b>{phrase}</b></i>')


	def highlight_entity(text, entity):
	return text.replace(entity, f'<i><b>{entity}</b></i>')


	def gradio_formatter(js, output_type):
	zebra_css = '''
	tr:nth-child(even) {
	background: #f1f1f1;
	}
	thead{
	background: #f1f1f1;
	}'''
	if output_type == 'e':
	data = {'Evidence': [highlight_entity(x, e) for x, e in zip(js['evidence'], js['entities'])]}
	elif output_type == 'z':
	p_sup, p_ref, p_nei = [], [], []
	for x in js['phrase_veracity']:
	max_idx = torch.argmax(torch.tensor(x)).tolist()
	x = ['%.4f' % xx for xx in x]
	x[max_idx] = f'<i><b>{x[max_idx]}</b></i>'
	p_sup.append(x[2])
	p_ref.append(x[0])
	p_nei.append(x[1])

	data = {
	'Claim Phrase': js['claim_phrases'],
	'Local Premise': [highlight_phrase(q, x[0]) for q, x in zip(js['cloze_qs'], js['evidential'])],
	'p_SUP': p_sup,
	'p_REF': p_ref,
	'p_NEI': p_nei,
	}
	else:
	raise NotImplementedError
	data = pd.DataFrame(data)
	pt = PrettyTable(field_names=list(data.columns),
	align='l', border=True, hrules=1, vrules=1)
	for v in data.values:
	pt.add_row(v)
	html = pt.get_html_string(attributes={
	'style': 'border-width: 2px; bordercolor: black'
	}, format=True)
	html = f'<head> <style type="text/css"> {zebra_css} </style> </head>\n' + html
	html = html.replace('<', '<').replace('>', '>')
	return html


	def run(claim):
	try:
	js = loren.check(claim)
	except Exception as error_msg:
	exc = traceback.format_exc()
	msg = f'[Error]: {error_msg}.\n[Traceback]: {exc}'
	loren.logger.error(claim)
	loren.logger.error(msg)
	return 'Oops, something went wrong.', '', ''
	label = js['claim_veracity']
	loren.logger.warning(label + str(js))
	ev_html = gradio_formatter(js, 'e')
	z_html = gradio_formatter(js, 'z')
	return label, z_html, ev_html


	iface = gr.Interface(
	fn=run,
	inputs="text",
	outputs=[
	'text',
	'html',
	'html',
	],
	examples=['Kanpur is a city in Nepal',
	'PV Sindhu is an Indian Badminton Player.'],
	title="A Framework for Data-Driven Document Evaluation and Scoring",
	layout='horizontal',
	description="[Student Name: Karan Kumar Pathak] " " [Roll No.: 2020fc04334] ",
	flagging_dir='results/flagged/',
	allow_flagging=True,
	flagging_options=['Interesting!', 'Error: Claim Phrase Parsing', 'Error: Local Premise',
	'Error: Require Commonsense', 'Error: Evidence Retrieval'],
	enable_queue=True
	)
	iface.launch()