Spaces:

DeepDiveDev
/

TransformoDocs-Demo

Sleeping

App Files Files Community

DeepDiveDev commited on Feb 26

Commit

0b73000

verified ·

1 Parent(s): 6477a5b

Update app.py

Browse files

Files changed (1) hide show

app.py +19 -34

app.py CHANGED Viewed

@@ -1,48 +1,33 @@
 import gradio as gr
-import torch
-import numpy as np
-from PIL import Image
-from transformers import AutoProcessor, AutoModelForVision2Seq
-# Load the model & processor
 from transformers import TrOCRProcessor, VisionEncoderDecoderModel
 model_name = "microsoft/trocr-large-handwritten"
 processor = TrOCRProcessor.from_pretrained(model_name)
 model = VisionEncoderDecoderModel.from_pretrained(model_name)
-processor = AutoProcessor.from_pretrained(model_name)
-model = AutoModelForVision2Seq.from_pretrained(model_name)
-# Function to extract handwritten text
-def extract_text(image):
-    try:
-        # Convert input to PIL Image
-        if isinstance(image, np.ndarray):
-            if len(image.shape) == 2:  # If grayscale (H, W), add channels
-                image = np.stack([image] * 3, axis=-1)
-            image = Image.fromarray(image)
-        else:
-            image = Image.open(image).convert("RGB")
-        # Process image through model
-        pixel_values = processor(images=image, return_tensors="pt").pixel_values
-        generated_ids = model.generate(pixel_values)
-        extracted_text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
-        return extracted_text
-    except Exception as e:
-        return f"Error: {str(e)}"
-# Gradio Interface
 iface = gr.Interface(
-    fn=extract_text,
-    inputs="image",
     outputs="text",
-    title="Handwritten Text OCR",
-    description="Upload a handwritten document and extract text using AI.",
 )
-# Run the app
 iface.launch()

 import gradio as gr
 from transformers import TrOCRProcessor, VisionEncoderDecoderModel
+from PIL import Image
+# Load the model and processor
 model_name = "microsoft/trocr-large-handwritten"
 processor = TrOCRProcessor.from_pretrained(model_name)
 model = VisionEncoderDecoderModel.from_pretrained(model_name)
+def ocr_recognition(image):
+    # Open the image
+    image = Image.open(image).convert("RGB")
+    # Process the image and generate text
+    pixel_values = processor(image, return_tensors="pt").pixel_values
+    generated_ids = model.generate(pixel_values)
+    # Decode the output text
+    text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
+    return text
+# Create Gradio interface
 iface = gr.Interface(
+    fn=ocr_recognition,
+    inputs=gr.Image(type="pil"),  # Ensures PIL image input
     outputs="text",
+    title="Handwritten OCR Extraction",
+    description="Upload a handwritten image to extract text using TrOCR."
 )
+# Launch the Gradio app
 iface.launch()