Spaces:

DeepDiveDev
/

TransformoDocs-Demo

Sleeping

App Files Files Community

DeepDiveDev commited on Feb 26

Commit

398e23b

verified ·

1 Parent(s): d010bf6

Update app.py

Browse files

Files changed (1) hide show

app.py +21 -29

app.py CHANGED Viewed

@@ -4,51 +4,43 @@ from PIL import Image
 import numpy as np
 import torch
-# Load the primary OCR model (DeepDiveDev/transformodocs-ocr)
-processor1 = TrOCRProcessor.from_pretrained("DeepDiveDev/transformodocs-ocr")
-model1 = VisionEncoderDecoderModel.from_pretrained("DeepDiveDev/transformodocs-ocr")
-# Load the fallback model (microsoft/trocr-base-handwritten) for handwritten text
-processor2 = TrOCRProcessor.from_pretrained("microsoft/trocr-base-handwritten")
-model2 = VisionEncoderDecoderModel.from_pretrained("microsoft/trocr-base-handwritten")
 # Function to extract text from handwritten images
 def extract_text(image):
     try:
-        # Ensure input is a PIL Image
-        if isinstance(image, np.ndarray):
-            if len(image.shape) == 2:  # Grayscale (H, W) -> Convert to RGB
                 image = np.stack([image] * 3, axis=-1)
             image = Image.fromarray(image)
-        elif isinstance(image, str):  # If file path is given, open the image
             image = Image.open(image).convert("RGB")
-        # Maintain aspect ratio while resizing (better for OCR)
-        image.thumbnail((800, 800))
-        # Process image with the first model
-        pixel_values = processor1(images=image, return_tensors="pt").pixel_values.to(torch.float32)
-        generated_ids = model1.generate(pixel_values)
-        extracted_text = processor1.batch_decode(generated_ids, skip_special_tokens=True)[0]
-        # If output is short or incorrect, use the fallback model
-        if len(extracted_text.strip()) < 2:
-            inputs = processor2(images=image, return_tensors="pt").pixel_values.to(torch.float32)
-            generated_ids = model2.generate(inputs)
-            extracted_text = processor2.batch_decode(generated_ids, skip_special_tokens=True)[0]
-        return extracted_text if extracted_text else "No text detected."
     except Exception as e:
         return f"Error: {str(e)}"
-# Gradio UI for OCR Extraction
 iface = gr.Interface(
     fn=extract_text,
-    inputs=gr.Image(type="pil"),  # Ensures input is a PIL image
     outputs="text",
-    title="Handwritten OCR Extraction",
-    description="Upload a handwritten image to extract text using AI OCR.",
 )
 iface.launch()

 import numpy as np
 import torch
+# Load TrOCR model and processor
+processor = TrOCRProcessor.from_pretrained("microsoft/trocr-large-handwritten")
+model = VisionEncoderDecoderModel.from_pretrained("microsoft/trocr-large-handwritten")
 # Function to extract text from handwritten images
 def extract_text(image):
     try:
+        # Convert image to RGB if needed
+        if isinstance(image, np.ndarray):
+            if len(image.shape) == 2:  # If grayscale (H, W), convert to RGB
                 image = np.stack([image] * 3, axis=-1)
             image = Image.fromarray(image)
+        else:
             image = Image.open(image).convert("RGB")
+        # Preprocessing (convert to grayscale for better OCR)
+        image = image.convert("L")
+        image = image.resize((640, 640))
+        # Process image
+        pixel_values = processor(images=image, return_tensors="pt").pixel_values
+        generated_ids = model.generate(pixel_values)
+        extracted_text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
+        return extracted_text if extracted_text.strip() else "No text detected."
     except Exception as e:
         return f"Error: {str(e)}"
+# Gradio Interface
 iface = gr.Interface(
     fn=extract_text,
+    inputs="image",
     outputs="text",
+    title="Handwritten OCR Extractor",
+    description="Upload a handwritten image to extract text.",
 )
+# Launch the app
 iface.launch()