Spaces:

DeepDiveDev
/

TransformoDocs-Demo

Sleeping

App Files Files Community

DeepDiveDev commited on Feb 26

Commit

a3df3f5

verified ·

1 Parent(s): 2653a83

Update app.py

Browse files

Files changed (1) hide show

app.py +14 -14

app.py CHANGED Viewed

@@ -4,51 +4,51 @@ from PIL import Image
 import numpy as np
 import torch
-# Load the primary model (DeepDiveDev/transformodocs-ocr)
 processor1 = TrOCRProcessor.from_pretrained("DeepDiveDev/transformodocs-ocr")
 model1 = VisionEncoderDecoderModel.from_pretrained("DeepDiveDev/transformodocs-ocr")
-# Load the fallback model (microsoft/trocr-base-handwritten)
 processor2 = TrOCRProcessor.from_pretrained("microsoft/trocr-base-handwritten")
 model2 = VisionEncoderDecoderModel.from_pretrained("microsoft/trocr-base-handwritten")
-# Function to extract text using both models
 def extract_text(image):
     try:
         # Convert NumPy array to PIL Image if needed
         if isinstance(image, np.ndarray):
-            if len(image.shape) == 2:  # Grayscale (H, W), convert to RGB
                 image = np.stack([image] * 3, axis=-1)
             image = Image.fromarray(image)
         else:
-            image = Image.open(image).convert("RGB")  # Ensure RGB mode
-        # Maintain aspect ratio while resizing
-        image.thumbnail((640, 640))
-        # Process with the primary model
         pixel_values = processor1(images=image, return_tensors="pt").pixel_values.to(torch.float32)
         generated_ids = model1.generate(pixel_values)
         extracted_text = processor1.batch_decode(generated_ids, skip_special_tokens=True)[0]
-        # If output seems incorrect, use the fallback model
         if len(extracted_text.strip()) < 2:
             inputs = processor2(images=image, return_tensors="pt").pixel_values.to(torch.float32)
             generated_ids = model2.generate(inputs)
             extracted_text = processor2.batch_decode(generated_ids, skip_special_tokens=True)[0]
-        return extracted_text
     except Exception as e:
         return f"Error: {str(e)}"
-# Gradio Interface
 iface = gr.Interface(
     fn=extract_text,
-    inputs="image",
     outputs="text",
-    title="TransformoDocs - AI OCR",
-    description="Upload a handwritten document and get the extracted text.",
 )
 iface.launch()

 import numpy as np
 import torch
+# Load the primary OCR model (DeepDiveDev/transformodocs-ocr)
 processor1 = TrOCRProcessor.from_pretrained("DeepDiveDev/transformodocs-ocr")
 model1 = VisionEncoderDecoderModel.from_pretrained("DeepDiveDev/transformodocs-ocr")
+# Load the fallback model (microsoft/trocr-base-handwritten) for handwritten text
 processor2 = TrOCRProcessor.from_pretrained("microsoft/trocr-base-handwritten")
 model2 = VisionEncoderDecoderModel.from_pretrained("microsoft/trocr-base-handwritten")
+# Function to extract text from handwritten images
 def extract_text(image):
     try:
         # Convert NumPy array to PIL Image if needed
         if isinstance(image, np.ndarray):
+            if len(image.shape) == 2:  # Grayscale (H, W) -> Convert to RGB
                 image = np.stack([image] * 3, axis=-1)
             image = Image.fromarray(image)
         else:
+            image = Image.open(image).convert("RGB")  # Ensure RGB format
+        # Maintain aspect ratio while resizing (better for OCR)
+        image.thumbnail((800, 800))
+        # Process image with the first model
         pixel_values = processor1(images=image, return_tensors="pt").pixel_values.to(torch.float32)
         generated_ids = model1.generate(pixel_values)
         extracted_text = processor1.batch_decode(generated_ids, skip_special_tokens=True)[0]
+        # If output is short or incorrect, use the fallback model
         if len(extracted_text.strip()) < 2:
             inputs = processor2(images=image, return_tensors="pt").pixel_values.to(torch.float32)
             generated_ids = model2.generate(inputs)
             extracted_text = processor2.batch_decode(generated_ids, skip_special_tokens=True)[0]
+        return extracted_text if extracted_text else "No text detected."
     except Exception as e:
         return f"Error: {str(e)}"
+# Gradio UI for OCR Extraction
 iface = gr.Interface(
     fn=extract_text,
+    inputs=gr.Image(type="pil"),
     outputs="text",
+    title="Handwritten OCR Extraction",
+    description="Upload a handwritten image to extract text using AI OCR.",
 )
 iface.launch()