import gradio as gr import subprocess from PIL import Image import tempfile import os import yaml import base64 import evaluate def resize_image(image, base_height): if image.size[1] == base_height: return image # Calculate aspect ratio w_percent = base_height / float(image.size[1]) w_size = int(float(image.size[0]) * float(w_percent)) # Resize the image return image.resize((w_size, base_height), Image.Resampling.LANCZOS) # Get images and respective transcriptions from the examples directory def get_example_data(folder_path="./examples/"): example_data = [] # Get list of all files in the folder all_files = os.listdir(folder_path) # Loop through the file list for file_name in all_files: file_path = os.path.join(folder_path, file_name) # Check if the file is an image (.png) if file_name.endswith(".jpg"): # Construct the corresponding .txt filename (same name) corresponding_text_file_name = file_name.replace(".jpg", ".txt") corresponding_text_file_path = os.path.join(folder_path, corresponding_text_file_name) # Initialize to a default value transcription = "Transcription not found." # Try to read the content from the .txt file try: with open(corresponding_text_file_path, "r") as f: transcription = f.read().strip() except FileNotFoundError: pass # If the corresponding .txt file is not found, leave the default value example_data.append([file_path, transcription]) return example_data def predict(input_image: Image.Image, ground_truth): cer = None try: # Try to resize the image to a fixed height of 128 pixels try: input_image = resize_image(input_image, 128) except Exception as e: print(f"Image resizing failed: {e}") return f"Image resizing failed: {e}" # Used as a context manager. Takes care of cleaning up the directory. # Even if an error is raised within the with block, the directory is removed. # No finally block needed with tempfile.TemporaryDirectory() as temp_dir: temp_image_path = os.path.join(temp_dir, 'temp_image.jpg') temp_list_path = os.path.join(temp_dir, 'temp_img_list.txt') temp_config_path = os.path.join(temp_dir, 'temp_config.yaml') input_image.save(temp_image_path) # Create a temporary img_list file with open(temp_list_path, 'w') as f: f.write(temp_image_path) # Read the original config file and create a temporary one with open('my_decode_config.yaml', 'r') as f: config_data = yaml.safe_load(f) config_data['img_list'] = temp_list_path with open(temp_config_path, 'w') as f: yaml.dump(config_data, f) try: subprocess.run(f"pylaia-htr-decode-ctc --config {temp_config_path} | tee predict.txt", shell=True, check=True) except subprocess.CalledProcessError as e: print(f"Command failed with error {e.returncode}, output:\n{e.output}") # # Write the output to predict.txt # with open('predict.txt', 'wb') as f: # f.write(output) # Read the output from predict.txt if os.path.exists('predict.txt'): with open('predict.txt', 'r') as f: output_line = f.read().strip().split('\n')[-1] # Last line _, prediction = output_line.split(' ', 1) # split only at the first space else: print('predict.txt does not exist') if ground_truth is not None and ground_truth.strip() != "": # Debug: Print lengths before computing metric print("Number of predictions:", len(prediction)) print("Number of references:", len(ground_truth)) # Check if lengths match if len(prediction) != len(ground_truth): print("Mismatch in number of predictions and references.") print("Predictions:", prediction) print("References:", ground_truth) print("\n") cer = cer_metric.compute(predictions=[prediction], references=[ground_truth]) # cer = f"{cer:.3f}" else: cer = "Ground truth not provided" return prediction, cer except subprocess.CalledProcessError as e: return f"Command failed with error {e.returncode}" # Encode images with open("assets/header.png", "rb") as img_file: logo_html = base64.b64encode(img_file.read()).decode('utf-8') with open("assets/teklia_logo.png", "rb") as img_file: footer_html = base64.b64encode(img_file.read()).decode('utf-8') title = """