DeepDiveDev commited on
Commit
0b73000
·
verified ·
1 Parent(s): 6477a5b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +19 -34
app.py CHANGED
@@ -1,48 +1,33 @@
1
  import gradio as gr
2
- import torch
3
- import numpy as np
4
- from PIL import Image
5
- from transformers import AutoProcessor, AutoModelForVision2Seq
6
-
7
- # Load the model & processor
8
  from transformers import TrOCRProcessor, VisionEncoderDecoderModel
 
9
 
 
10
  model_name = "microsoft/trocr-large-handwritten"
11
  processor = TrOCRProcessor.from_pretrained(model_name)
12
  model = VisionEncoderDecoderModel.from_pretrained(model_name)
13
 
14
- processor = AutoProcessor.from_pretrained(model_name)
15
- model = AutoModelForVision2Seq.from_pretrained(model_name)
16
-
17
- # Function to extract handwritten text
18
- def extract_text(image):
19
- try:
20
- # Convert input to PIL Image
21
- if isinstance(image, np.ndarray):
22
- if len(image.shape) == 2: # If grayscale (H, W), add channels
23
- image = np.stack([image] * 3, axis=-1)
24
- image = Image.fromarray(image)
25
- else:
26
- image = Image.open(image).convert("RGB")
27
-
28
- # Process image through model
29
- pixel_values = processor(images=image, return_tensors="pt").pixel_values
30
- generated_ids = model.generate(pixel_values)
31
- extracted_text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
32
-
33
- return extracted_text
34
 
35
- except Exception as e:
36
- return f"Error: {str(e)}"
 
 
 
 
 
 
37
 
38
- # Gradio Interface
39
  iface = gr.Interface(
40
- fn=extract_text,
41
- inputs="image",
42
  outputs="text",
43
- title="Handwritten Text OCR",
44
- description="Upload a handwritten document and extract text using AI.",
45
  )
46
 
47
- # Run the app
48
  iface.launch()
 
1
  import gradio as gr
 
 
 
 
 
 
2
  from transformers import TrOCRProcessor, VisionEncoderDecoderModel
3
+ from PIL import Image
4
 
5
+ # Load the model and processor
6
  model_name = "microsoft/trocr-large-handwritten"
7
  processor = TrOCRProcessor.from_pretrained(model_name)
8
  model = VisionEncoderDecoderModel.from_pretrained(model_name)
9
 
10
+ def ocr_recognition(image):
11
+ # Open the image
12
+ image = Image.open(image).convert("RGB")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
13
 
14
+ # Process the image and generate text
15
+ pixel_values = processor(image, return_tensors="pt").pixel_values
16
+ generated_ids = model.generate(pixel_values)
17
+
18
+ # Decode the output text
19
+ text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
20
+
21
+ return text
22
 
23
+ # Create Gradio interface
24
  iface = gr.Interface(
25
+ fn=ocr_recognition,
26
+ inputs=gr.Image(type="pil"), # Ensures PIL image input
27
  outputs="text",
28
+ title="Handwritten OCR Extraction",
29
+ description="Upload a handwritten image to extract text using TrOCR."
30
  )
31
 
32
+ # Launch the Gradio app
33
  iface.launch()