lamiaaEl's picture
Update app.py
687b539 verified
raw
history blame
2.59 kB
import os
import numpy as np
import streamlit as st
from transformers import AutoModelForTokenClassification, AutoProcessor
from PIL import Image, ImageDraw, ImageFont
import pytesseract
pytesseract.pytesseract.tesseract_cmd = '/usr/bin/tesseract'
processor = AutoProcessor.from_pretrained("microsoft/layoutlmv3-base", apply_ocr=True)
model = AutoModelForTokenClassification.from_pretrained("capitaletech/language-levels-LayoutLMv3-v4")
labels = ["language", "1", "2", "3", "4", "5", "6", "7", "8", "9", "10"]
label2id = {label: idx for idx, label in enumerate(labels)}
id2label = {v: k for k, v in label2id.items()}
label2color = {
'language': 'blue', '1': 'red', '2': 'red', '3': 'red',
'4': 'orange', '5': 'orange', '6': 'orange', '7': 'green',
'8': 'green', '9': 'green', '10': 'green'
}
def unnormalize_box(bbox, width, height):
return [
width * (bbox[0] / 1000),
height * (bbox[1] / 1000),
width * (bbox[2] / 1000),
height * (bbox[3] / 1000),
]
def iob_to_label(label):
return label
def process_image(image):
width, height = image.size
encoding = processor(image, truncation=True, return_offsets_mapping=True, return_tensors="pt")
offset_mapping = encoding.pop('offset_mapping')
outputs = model(**encoding)
predictions = outputs.logits.argmax(-1).squeeze().tolist()
token_boxes = encoding.bbox.squeeze().tolist()
is_subword = np.array(offset_mapping.squeeze().tolist())[:, 0] != 0
true_predictions = [id2label[pred] for idx, pred in enumerate(predictions) if not is_subword[idx]]
true_boxes = [unnormalize_box(box, width, height) for idx, box in enumerate(token_boxes) if not is_subword[idx]]
draw = ImageDraw.Draw(image)
font = ImageFont.load_default()
for prediction, box in zip(true_predictions, true_boxes):
predicted_label = iob_to_label(prediction)
draw.rectangle(box, outline=label2color[predicted_label])
draw.text((box[0] + 10, box[1] - 10), text=predicted_label, fill=label2color[predicted_label], font=font)
return image
st.title("Language Levels Extraction using LayoutLMv3 Model")
st.write("Use this application to predict language levels in CVs.")
uploaded_file = st.file_uploader("Choose an image...", type="png")
if uploaded_file is not None:
image = Image.open(uploaded_file)
st.image(image, caption='Uploaded Image', use_column_width=True)
if st.button('Predict'):
annotated_image = process_image(image)
st.image(annotated_image, caption='Annotated Image', use_column_width=True)