Spaces:

capitaletech
/

LayoutLMv3-v1-languageLevels

Runtime error

App Files Files Community

LayoutLMv3-v1-languageLevels / app.py

lamiaaEl

Update app.py

3d88528 verified 12 months ago

raw

history blame

2.77 kB

	import os
	import numpy as np
	import streamlit as st
	from transformers import AutoModelForTokenClassification, AutoProcessor
	from PIL import Image, ImageDraw, ImageFont

	# Load the LayoutLMv3 model and processor
	processor = AutoProcessor.from_pretrained("microsoft/layoutlmv3-base", apply_ocr=True)
	model = AutoModelForTokenClassification.from_pretrained("capitaletech/language-levels-LayoutLMv3-v4")

	labels = ["language", "1", "2", "3", "4", "5", "6", "7", "8", "9", "10"]

	label2id = {label: idx for idx, label in enumerate(labels)}
	id2label = {v: k for k, v in label2id.items()}
	label2color = {
	'language': 'blue', '1': 'red', '2': 'red', '3': 'red',
	'4': 'orange', '5': 'orange', '6': 'orange', '7': 'green',
	'8': 'green', '9': 'green', '10': 'green'
	}

	def unnormalize_box(bbox, width, height):
	return [
	width * (bbox[0] / 1000),
	height * (bbox[1] / 1000),
	width * (bbox[2] / 1000),
	height * (bbox[3] / 1000),
	]

	def iob_to_label(label):
	return label

	def process_image(image):
	width, height = image.size

	# Encode
	encoding = processor(image, truncation=True, return_offsets_mapping=True, return_tensors="pt")
	offset_mapping = encoding.pop('offset_mapping')

	# Forward pass
	outputs = model(**encoding)

	# Get predictions
	predictions = outputs.logits.argmax(-1).squeeze().tolist()
	token_boxes = encoding.bbox.squeeze().tolist()

	# Only keep non-subword predictions
	is_subword = np.array(offset_mapping.squeeze().tolist())[:, 0] != 0
	true_predictions = [id2label[pred] for idx, pred in enumerate(predictions) if not is_subword[idx]]
	true_boxes = [unnormalize_box(box, width, height) for idx, box in enumerate(token_boxes) if not is_subword[idx]]

	# Draw predictions over the image
	draw = ImageDraw.Draw(image)
	font = ImageFont.load_default()
	for prediction, box in zip(true_predictions, true_boxes):
	predicted_label = iob_to_label(prediction)
	draw.rectangle(box, outline=label2color[predicted_label])
	draw.text((box[0] + 10, box[1] - 10), text=predicted_label, fill=label2color[predicted_label], font=font)

	return image

	# Streamlit UI
	st.title("Language Levels Extraction using LayoutLMv3 Model")
	st.write("Use this application to predict language levels in CVs.")

	uploaded_file = st.file_uploader("Choose an image...", type="png")

	if uploaded_file is not None:
	image = Image.open(uploaded_file)
	st.image(image, caption='Uploaded Image', use_column_width=True)

	if st.button('Predict'):
	annotated_image = process_image(image)
	st.image(annotated_image, caption='Annotated Image', use_column_width=True)

	# Add your token if required
	os.environ["YOUR_TOKEN_ENV_VAR"] = "your_token_here"