from transformers import TrOCRProcessor, VisionEncoderDecoderModel import pandas as pd from PIL import Image # Finetuned model model_finetune = VisionEncoderDecoderModel.from_pretrained("hadrakey/alphapen_trocr") #Baseline model_base = VisionEncoderDecoderModel.from_pretrained("microsoft/trocr-base-handwritten") processor = TrOCRProcessor.from_pretrained("microsoft/trocr-base-handwritten") # Checked label df_path = "/mnt/data1/Datasets/AlphaPen/" + "testing_data.csv" data = pd.read_csv(df_path) data.dropna(inplace=True) data.reset_index(inplace=True) root_dir = "/mnt/data1/Datasets/OCR/Alphapen/clean_data/" inf_baseline = [] inf_finetune = [] for idx in range(len(data)): image = Image.open(root_dir + "final_cropped_rotated_" + data.filename[idx]).convert("RGB") pixel_values = processor(image, return_tensors="pt").pixel_values generated_ids_base = model_base.generate(pixel_values) generated_ids_fine = model_finetune.generate(pixel_values) generated_text_base = processor.batch_decode(generated_ids_base, skip_special_tokens=True)[0] generated_text_fine= processor.batch_decode(generated_ids_fine, skip_special_tokens=True)[0] inf_baseline.append(generated_text_base) inf_finetune.append(generated_text_fine) data["Baseline"]=inf_baseline data["Finetune"]=inf_finetune data.to_csv("/mnt/data1/Datasets/AlphaPen/" + "inference_data.csv")