Vinay15 commited on
Commit
2b3d2ae
·
verified ·
1 Parent(s): 66ae2fc

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +31 -39
app.py CHANGED
@@ -1,46 +1,38 @@
1
  import gradio as gr
2
- import torch
3
- from transformers import AutoModel, AutoTokenizer
4
  from PIL import Image
 
 
 
 
 
 
 
 
 
 
 
 
 
5
 
6
- # Check GPU availability
7
- if torch.cuda.is_available():
8
- print("CUDA is available! GPU is present.")
9
- print(f"Number of GPUs: {torch.cuda.device_count()}")
10
- print(f"GPU Name: {torch.cuda.get_device_name(0)}")
11
- else:
12
- print("CUDA is not available. Running on CPU.")
13
-
14
- # Load the tokenizer and model
15
- tokenizer = AutoTokenizer.from_pretrained('ucaslcl/GOT-OCR2_0', trust_remote_code=True)
16
-
17
- # Initialize the model
18
- if torch.cuda.is_available():
19
- model = AutoModel.from_pretrained('ucaslcl/GOT-OCR2_0', trust_remote_code=True, low_cpu_mem_usage=True, device_map='cuda', use_safetensors=True, pad_token_id=tokenizer.eos_token_id)
20
- model = model.eval().cuda()
21
- else:
22
- model = AutoModel.from_pretrained('ucaslcl/GOT-OCR2_0', trust_remote_code=True, low_cpu_mem_usage=True, pad_token_id=tokenizer.eos_token_id)
23
- model = model.eval() # Keep model on CPU
24
-
25
- # Define the OCR function
26
  def perform_ocr(image):
27
- # Convert PIL image to RGB format (if necessary)
28
- if image.mode != "RGB":
29
- image = image.convert("RGB")
30
-
31
- # Perform OCR using the model
32
- res = model.chat(tokenizer, image, ocr_type='ocr')
33
-
34
- return res
35
-
36
- # Define the Gradio interface
37
- interface = gr.Interface(
 
38
  fn=perform_ocr,
39
- inputs=gr.Image(type="pil", label="Upload Image"),
40
- outputs=gr.Textbox(label="Extracted Text"),
41
- title="OCR and Document Search Web Application",
42
- description="Upload an image to extract text using the GOT-OCR2_0 model."
43
  )
44
 
45
- # Launch the Gradio app
46
- interface.launch()
 
1
  import gradio as gr
 
 
2
  from PIL import Image
3
+ # Assuming 'model' and 'tokenizer' are defined elsewhere in your code
4
+ # from your_model_file import model, tokenizer
5
+
6
+ def load_image(image_file):
7
+ """Load and preprocess the image."""
8
+ if isinstance(image_file, Image.Image): # Check if the input is an Image object
9
+ return image_file.convert("RGB") # Convert to RGB if necessary
10
+ elif isinstance(image_file, str) and (image_file.startswith('http') or image_file.startswith('https')):
11
+ # Handle URL case (you can use an external library to fetch the image if needed)
12
+ return Image.open(requests.get(image_file, stream=True).raw).convert("RGB")
13
+ else:
14
+ # Handle file path case
15
+ return Image.open(image_file).convert("RGB")
16
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
17
  def perform_ocr(image):
18
+ """Perform OCR on the uploaded image."""
19
+ try:
20
+ # Load and preprocess the image
21
+ processed_image = load_image(image)
22
+ # Use the model for OCR
23
+ res = model.chat(tokenizer, processed_image, ocr_type='ocr')
24
+ return res
25
+ except Exception as e:
26
+ return str(e) # Return the error message
27
+
28
+ # Gradio interface setup
29
+ iface = gr.Interface(
30
  fn=perform_ocr,
31
+ inputs=gr.Image(type="pil"), # Ensure Gradio accepts images as PIL images
32
+ outputs="text",
33
+ title="OCR Application",
34
+ description="Upload an image to perform Optical Character Recognition (OCR)."
35
  )
36
 
37
+ if __name__ == "__main__":
38
+ iface.launch()