Spaces:

Noureddinesa
/

LayoutLmv3_Space

Running

LayoutLmv3_Space / utilitis.py

ITSAIDI

37ac053 9 months ago

5.62 kB

	import streamlit as st
	from paddleocr import PaddleOCR
	from PIL import ImageDraw, ImageFont
	import torch
	from transformers import AutoProcessor,LayoutLMv3ForTokenClassification
	import numpy as np

	model_Hugging_path = "Noureddinesa/Output_LayoutLMv3_v2"


	#############################################################################
	#############################################################################
	def Labels():
	labels = ['InvNum', 'InvDate', 'Fourni', 'TTC', 'TVA', 'TT', 'Autre']
	id2label = {v: k for v, k in enumerate(labels)}
	label2id = {k: v for v, k in enumerate(labels)}
	return id2label, label2id

	#############################################################################
	#############################################################################
	def Paddle():
	ocr = PaddleOCR(use_angle_cls=False,lang='fr',rec=False)
	return ocr

	def processbbox(BBOX, width, height):
	bbox = []
	bbox.append(BBOX[0][0])
	bbox.append(BBOX[0][1])
	bbox.append(BBOX[2][0])
	bbox.append(BBOX[2][1])
	#Scaling
	bbox[0]= 1000*bbox[0]/width # X1
	bbox[1]= 1000*bbox[1]/height # Y1
	bbox[2]= 1000*bbox[2]/width # X2
	bbox[3]= 1000*bbox[3]/height # Y2
	for i in range(4):
	bbox[i] = int(bbox[i])
	return bbox


	def Preprocess(image):
	image_array = np.array(image)
	ocr = Paddle()
	width, height = image.size
	results = ocr.ocr(image_array, cls=True)
	results = results[0]
	test_dict = {'image': image ,'tokens':[], "bboxes":[]}
	for item in results :
	bbox = processbbox(item[0], width, height)
	test_dict['tokens'].append(item[1][0])
	test_dict['bboxes'].append(bbox)

	print(test_dict['bboxes'])
	print(test_dict['tokens'])
	return test_dict

	#############################################################################
	#############################################################################
	def Encode(image):
	example = Preprocess(image)
	image = example["image"]
	words = example["tokens"]
	boxes = example["bboxes"]
	processor = AutoProcessor.from_pretrained(model_Hugging_path, apply_ocr=False)
	encoding = processor(image, words, boxes=boxes,return_offsets_mapping=True,truncation=True, max_length=512, padding="max_length", return_tensors="pt")
	offset_mapping = encoding.pop('offset_mapping')
	return encoding, offset_mapping,words

	def unnormalize_box(bbox, width, height):
	return [
	width * (bbox[0] / 1000),
	height * (bbox[1] / 1000),
	width * (bbox[2] / 1000),
	height * (bbox[3] / 1000),
	]

	def Run_model(image):
	encoding,offset_mapping,words = Encode(image)
	device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
	# load the fine-tuned model from the hub
	model = LayoutLMv3ForTokenClassification.from_pretrained(model_Hugging_path)
	model.to(device)
	# forward pass
	outputs = model(**encoding)

	predictions = outputs.logits.argmax(-1).squeeze().tolist()
	token_boxes = encoding.bbox.squeeze().tolist()

	width, height = image.size

	id2label, _ = Labels()
	is_subword = np.array(offset_mapping.squeeze().tolist())[:,0] != 0
	true_predictions = [id2label[pred] for idx, pred in enumerate(predictions) if not is_subword[idx]]
	true_boxes = [unnormalize_box(box, width, height) for idx, box in enumerate(token_boxes) if not is_subword[idx]]
	return true_predictions,true_boxes,words


	def Get_Json(true_predictions,words):
	Results = {}
	i = 0
	for prd in true_predictions:
	if prd in ['InvNum','Fourni', 'InvDate','TT','TTC','TVA']:
	#print(i,prd,words[i-1])
	Results[prd] = words[i-1]
	i+=1
	return Results


	def Draw(image):
	true_predictions, true_boxes,words = Run_model(image)
	draw = ImageDraw.Draw(image)

	label2color = {
	'InvNum': 'blue',
	'InvDate': 'green',
	'Fourni': 'orange',
	'TTC':'purple',
	'TVA': 'magenta',
	'TT': 'red',
	'Autre': 'black'
	}

	# Adjust the thickness of the rectangle outline and label text position
	rectangle_thickness = 4
	label_x_offset = 20
	label_y_offset = -40

	# Custom font size
	custom_font_size = 25

	# Load a font with the custom size
	font_path = "arial.ttf" # Specify the path to your font file
	custom_font = ImageFont.truetype(font_path, custom_font_size)

	for prediction, box in zip(true_predictions, true_boxes):
	predicted_label = prediction
	# Check if the predicted label exists in the label2color dictionary
	if predicted_label in label2color:
	color = label2color[predicted_label]
	else:
	color = 'black' # Default color if label is not found
	if predicted_label != "Autre":
	draw.rectangle(box, outline=color, width=rectangle_thickness)
	# Draw text using the custom font and size

	draw.rectangle((box[0], box[1]+ label_y_offset,box[2],box[3]+ label_y_offset), fill=color)
	draw.text((box[0] + label_x_offset, box[1] + label_y_offset), text=predicted_label, fill='white', font=custom_font)

	# Get the Results Json File
	Results = Get_Json(true_predictions,words)

	return image,Results


	def Add_Results(data):
	# Render the table
	for key, value in data.items():
	data[key] = st.sidebar.text_input(key, value)

	#############################################################################
	#############################################################################