alphapen_new_large_45000 / inference.py
hadrakey's picture
Training in progress, step 1000
17bd62d verified
raw
history blame
1.4 kB
from transformers import TrOCRProcessor, VisionEncoderDecoderModel
import pandas as pd
from PIL import Image
# Finetuned model
model_finetune = VisionEncoderDecoderModel.from_pretrained("hadrakey/alphapen_trocr")
#Baseline
model_base = VisionEncoderDecoderModel.from_pretrained("microsoft/trocr-base-handwritten")
processor = TrOCRProcessor.from_pretrained("microsoft/trocr-base-handwritten")
# Checked label
df_path = "/mnt/data1/Datasets/AlphaPen/" + "testing_data.csv"
data = pd.read_csv(df_path)
data.dropna(inplace=True)
data.reset_index(inplace=True)
root_dir = "/mnt/data1/Datasets/OCR/Alphapen/clean_data/"
inf_baseline = []
inf_finetune = []
for idx in range(len(data)):
image = Image.open(root_dir + "final_cropped_rotated_" + data.filename[idx]).convert("RGB")
pixel_values = processor(image, return_tensors="pt").pixel_values
generated_ids_base = model_base.generate(pixel_values)
generated_ids_fine = model_finetune.generate(pixel_values)
generated_text_base = processor.batch_decode(generated_ids_base, skip_special_tokens=True)[0]
generated_text_fine= processor.batch_decode(generated_ids_fine, skip_special_tokens=True)[0]
inf_baseline.append(generated_text_base)
inf_finetune.append(generated_text_fine)
data["Baseline"]=inf_baseline
data["Finetune"]=inf_finetune
data.to_csv("/mnt/data1/Datasets/AlphaPen/" + "inference_data.csv")