Spaces:

capitaletech
/

LayoutLMv3-v1-languageLevels

Runtime error

App Files Files Community

LayoutLMv3-v1-languageLevels / app.py

lamiaaEl

Update app.py

687b539 verified 12 months ago

raw

history blame

2.59 kB

	import os
	import numpy as np
	import streamlit as st
	from transformers import AutoModelForTokenClassification, AutoProcessor
	from PIL import Image, ImageDraw, ImageFont

	import pytesseract

	pytesseract.pytesseract.tesseract_cmd = '/usr/bin/tesseract'


	processor = AutoProcessor.from_pretrained("microsoft/layoutlmv3-base", apply_ocr=True)
	model = AutoModelForTokenClassification.from_pretrained("capitaletech/language-levels-LayoutLMv3-v4")

	labels = ["language", "1", "2", "3", "4", "5", "6", "7", "8", "9", "10"]

	label2id = {label: idx for idx, label in enumerate(labels)}
	id2label = {v: k for k, v in label2id.items()}
	label2color = {
	'language': 'blue', '1': 'red', '2': 'red', '3': 'red',
	'4': 'orange', '5': 'orange', '6': 'orange', '7': 'green',
	'8': 'green', '9': 'green', '10': 'green'
	}

	def unnormalize_box(bbox, width, height):
	return [
	width * (bbox[0] / 1000),
	height * (bbox[1] / 1000),
	width * (bbox[2] / 1000),
	height * (bbox[3] / 1000),
	]

	def iob_to_label(label):
	return label

	def process_image(image):
	width, height = image.size

	encoding = processor(image, truncation=True, return_offsets_mapping=True, return_tensors="pt")
	offset_mapping = encoding.pop('offset_mapping')

	outputs = model(**encoding)

	predictions = outputs.logits.argmax(-1).squeeze().tolist()
	token_boxes = encoding.bbox.squeeze().tolist()

	is_subword = np.array(offset_mapping.squeeze().tolist())[:, 0] != 0
	true_predictions = [id2label[pred] for idx, pred in enumerate(predictions) if not is_subword[idx]]
	true_boxes = [unnormalize_box(box, width, height) for idx, box in enumerate(token_boxes) if not is_subword[idx]]

	draw = ImageDraw.Draw(image)
	font = ImageFont.load_default()
	for prediction, box in zip(true_predictions, true_boxes):
	predicted_label = iob_to_label(prediction)
	draw.rectangle(box, outline=label2color[predicted_label])
	draw.text((box[0] + 10, box[1] - 10), text=predicted_label, fill=label2color[predicted_label], font=font)

	return image

	st.title("Language Levels Extraction using LayoutLMv3 Model")
	st.write("Use this application to predict language levels in CVs.")

	uploaded_file = st.file_uploader("Choose an image...", type="png")

	if uploaded_file is not None:
	image = Image.open(uploaded_file)
	st.image(image, caption='Uploaded Image', use_column_width=True)

	if st.button('Predict'):
	annotated_image = process_image(image)
	st.image(annotated_image, caption='Annotated Image', use_column_width=True)