DeepDiveDev commited on
Commit
e6b9318
·
verified ·
1 Parent(s): 3a8de33

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +35 -14
app.py CHANGED
@@ -2,23 +2,44 @@ import gradio as gr
2
  from transformers import TrOCRProcessor, VisionEncoderDecoderModel
3
  from PIL import Image
4
  import numpy as np
5
- import requests
6
 
7
- # Load your model from Hugging Face
8
- processor = TrOCRProcessor.from_pretrained("DeepDiveDev/transformodocs-ocr")
9
- model = VisionEncoderDecoderModel.from_pretrained("DeepDiveDev/transformodocs-ocr")
10
 
11
- # Function to extract text
 
 
 
 
12
  def extract_text(image):
13
- if isinstance(image, np.ndarray): # Check if input is a NumPy array
14
- image = Image.fromarray(image) # Convert NumPy array to PIL Image
15
- else:
16
- image = Image.open(image).convert("RGB") # Open normally if not a NumPy array
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
17
 
18
- pixel_values = processor(images=image, return_tensors="pt").pixel_values
19
- generated_ids = model.generate(pixel_values)
20
- extracted_text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
21
- return extracted_text
22
 
23
  # Gradio Interface
24
  iface = gr.Interface(
@@ -29,4 +50,4 @@ iface = gr.Interface(
29
  description="Upload a handwritten document and get the extracted text.",
30
  )
31
 
32
- iface.launch()
 
2
  from transformers import TrOCRProcessor, VisionEncoderDecoderModel
3
  from PIL import Image
4
  import numpy as np
5
+ import torch
6
 
7
+ # Load the primary model (DeepDiveDev/transformodocs-ocr)
8
+ processor1 = TrOCRProcessor.from_pretrained("DeepDiveDev/transformodocs-ocr")
9
+ model1 = VisionEncoderDecoderModel.from_pretrained("DeepDiveDev/transformodocs-ocr")
10
 
11
+ # Load the fallback model (allenai/olmOCR-7B-0225-preview)
12
+ processor2 = TrOCRProcessor.from_pretrained("allenai/olmOCR-7B-0225-preview")
13
+ model2 = VisionEncoderDecoderModel.from_pretrained("allenai/olmOCR-7B-0225-preview")
14
+
15
+ # Function to extract text using both models
16
  def extract_text(image):
17
+ try:
18
+ # Convert input to PIL Image
19
+ if isinstance(image, np.ndarray):
20
+ image = Image.fromarray(image)
21
+ else:
22
+ image = Image.open(image).convert("RGB")
23
+
24
+ # Preprocessing
25
+ image = image.convert("L") # Convert to grayscale for better OCR
26
+ image = image.resize((640, 640)) # Resize to improve accuracy
27
+
28
+ # Process with the primary model
29
+ pixel_values = processor1(images=image, return_tensors="pt").pixel_values
30
+ generated_ids = model1.generate(pixel_values)
31
+ extracted_text = processor1.batch_decode(generated_ids, skip_special_tokens=True)[0]
32
+
33
+ # If output seems incorrect, use the fallback model
34
+ if len(extracted_text.strip()) < 2: # If output is too short, retry with second model
35
+ pixel_values = processor2(images=image, return_tensors="pt").pixel_values
36
+ generated_ids = model2.generate(pixel_values)
37
+ extracted_text = processor2.batch_decode(generated_ids, skip_special_tokens=True)[0]
38
+
39
+ return extracted_text
40
 
41
+ except Exception as e:
42
+ return f"Error: {str(e)}"
 
 
43
 
44
  # Gradio Interface
45
  iface = gr.Interface(
 
50
  description="Upload a handwritten document and get the extracted text.",
51
  )
52
 
53
+ iface.launch()