Spaces:

DeepDiveDev
/

TransformoDocs-Demo

Sleeping

App Files Files Community

DeepDiveDev commited on Feb 26

Commit

429d160

verified ·

1 Parent(s): fca31c6

Update app.py

Browse files

Files changed (1) hide show

app.py +16 -19

app.py CHANGED Viewed

@@ -1,35 +1,32 @@
 import gradio as gr
-from transformers import TrOCRProcessor, VisionEncoderDecoderModel
-from PIL import Image
-import numpy as np
 import torch
-# Load TrOCR model and processor
-processor = TrOCRProcessor.from_pretrained("microsoft/trocr-large-handwritten")
-model = VisionEncoderDecoderModel.from_pretrained("microsoft/trocr-large-handwritten")
-# Function to extract text from handwritten images
 def extract_text(image):
     try:
-        # Convert image to RGB if needed
         if isinstance(image, np.ndarray):
-            if len(image.shape) == 2:  # If grayscale (H, W), convert to RGB
                 image = np.stack([image] * 3, axis=-1)
             image = Image.fromarray(image)
         else:
             image = Image.open(image).convert("RGB")
-        # Preprocessing (convert to grayscale for better OCR)
-        image = image.convert("L")
-        image = image.resize((640, 640))
-        # Process image
         pixel_values = processor(images=image, return_tensors="pt").pixel_values
         generated_ids = model.generate(pixel_values)
         extracted_text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
-        return extracted_text if extracted_text.strip() else "No text detected."
     except Exception as e:
         return f"Error: {str(e)}"
@@ -38,9 +35,9 @@ iface = gr.Interface(
     fn=extract_text,
     inputs="image",
     outputs="text",
-    title="Handwritten OCR Extractor",
-    description="Upload a handwritten image to extract text.",
 )
-# Launch the app
 iface.launch()

 import gradio as gr
 import torch
+import numpy as np
+from PIL import Image
+from transformers import AutoProcessor, AutoModelForVision2Seq
+# Load the model & processor
+model_name = "Murasajo/Llama-3.2-VL-Finetuned-on-HandwrittenText"
+processor = AutoProcessor.from_pretrained(model_name)
+model = AutoModelForVision2Seq.from_pretrained(model_name)
+# Function to extract handwritten text
 def extract_text(image):
     try:
+        # Convert input to PIL Image
         if isinstance(image, np.ndarray):
+            if len(image.shape) == 2:  # If grayscale (H, W), add channels
                 image = np.stack([image] * 3, axis=-1)
             image = Image.fromarray(image)
         else:
             image = Image.open(image).convert("RGB")
+        # Process image through model
         pixel_values = processor(images=image, return_tensors="pt").pixel_values
         generated_ids = model.generate(pixel_values)
         extracted_text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
+        return extracted_text
     except Exception as e:
         return f"Error: {str(e)}"
     fn=extract_text,
     inputs="image",
     outputs="text",
+    title="Handwritten Text OCR",
+    description="Upload a handwritten document and extract text using AI.",
 )
+# Run the app
 iface.launch()