lamiaaEl's picture
Update app.py
3d88528 verified
raw
history blame
2.77 kB
import os
import numpy as np
import streamlit as st
from transformers import AutoModelForTokenClassification, AutoProcessor
from PIL import Image, ImageDraw, ImageFont
# Load the LayoutLMv3 model and processor
processor = AutoProcessor.from_pretrained("microsoft/layoutlmv3-base", apply_ocr=True)
model = AutoModelForTokenClassification.from_pretrained("capitaletech/language-levels-LayoutLMv3-v4")
labels = ["language", "1", "2", "3", "4", "5", "6", "7", "8", "9", "10"]
label2id = {label: idx for idx, label in enumerate(labels)}
id2label = {v: k for k, v in label2id.items()}
label2color = {
'language': 'blue', '1': 'red', '2': 'red', '3': 'red',
'4': 'orange', '5': 'orange', '6': 'orange', '7': 'green',
'8': 'green', '9': 'green', '10': 'green'
}
def unnormalize_box(bbox, width, height):
return [
width * (bbox[0] / 1000),
height * (bbox[1] / 1000),
width * (bbox[2] / 1000),
height * (bbox[3] / 1000),
]
def iob_to_label(label):
return label
def process_image(image):
width, height = image.size
# Encode
encoding = processor(image, truncation=True, return_offsets_mapping=True, return_tensors="pt")
offset_mapping = encoding.pop('offset_mapping')
# Forward pass
outputs = model(**encoding)
# Get predictions
predictions = outputs.logits.argmax(-1).squeeze().tolist()
token_boxes = encoding.bbox.squeeze().tolist()
# Only keep non-subword predictions
is_subword = np.array(offset_mapping.squeeze().tolist())[:, 0] != 0
true_predictions = [id2label[pred] for idx, pred in enumerate(predictions) if not is_subword[idx]]
true_boxes = [unnormalize_box(box, width, height) for idx, box in enumerate(token_boxes) if not is_subword[idx]]
# Draw predictions over the image
draw = ImageDraw.Draw(image)
font = ImageFont.load_default()
for prediction, box in zip(true_predictions, true_boxes):
predicted_label = iob_to_label(prediction)
draw.rectangle(box, outline=label2color[predicted_label])
draw.text((box[0] + 10, box[1] - 10), text=predicted_label, fill=label2color[predicted_label], font=font)
return image
# Streamlit UI
st.title("Language Levels Extraction using LayoutLMv3 Model")
st.write("Use this application to predict language levels in CVs.")
uploaded_file = st.file_uploader("Choose an image...", type="png")
if uploaded_file is not None:
image = Image.open(uploaded_file)
st.image(image, caption='Uploaded Image', use_column_width=True)
if st.button('Predict'):
annotated_image = process_image(image)
st.image(annotated_image, caption='Annotated Image', use_column_width=True)
# Add your token if required
os.environ["YOUR_TOKEN_ENV_VAR"] = "your_token_here"