Tonic commited on
Commit
d1d0907
Β·
unverified Β·
1 Parent(s): aebaa46

remove loadimg

Browse files
Files changed (1) hide show
  1. app.py +17 -19
app.py CHANGED
@@ -4,7 +4,7 @@ from transformers import AutoModel, AutoTokenizer, AutoConfig
4
  import os
5
  import base64
6
  import spaces
7
- from loadimg import load_img
8
  from PIL import Image
9
  import numpy as np
10
 
@@ -47,34 +47,32 @@ model = AutoModel.from_pretrained('ucaslcl/GOT-OCR2_0', trust_remote_code=True,
47
  model = model.eval().cuda()
48
  model.config.pad_token_id = tokenizer.eos_token_id
49
 
50
- def load_image(image_file):
51
- if isinstance(image_file, str):
52
- if image_file.startswith('http') or image_file.startswith('https'):
53
- return Image.open(requests.get(image_file, stream=True).raw).convert('RGB')
54
- else:
55
- return Image.open(image_file).convert('RGB')
56
- else:
57
- return image_file.convert('RGB')
58
 
59
  @spaces.GPU
60
  def process_image(image, task, ocr_type=None, ocr_box=None, ocr_color=None, render=False):
61
  try:
62
- img = load_image(image)
63
- img_path = "/tmp/temp_image.png"
64
- img.save(img_path)
 
 
65
 
66
  if task == "Plain Text OCR":
67
- res = model.chat(tokenizer, img_path, ocr_type='ocr')
68
  elif task == "Format Text OCR":
69
- res = model.chat(tokenizer, img_path, ocr_type='format')
70
  elif task == "Fine-grained OCR (Box)":
71
- res = model.chat(tokenizer, img_path, ocr_type=ocr_type, ocr_box=ocr_box)
72
  elif task == "Fine-grained OCR (Color)":
73
- res = model.chat(tokenizer, img_path, ocr_type=ocr_type, ocr_color=ocr_color)
74
  elif task == "Multi-crop OCR":
75
- res = model.chat_crop(tokenizer, image_file=img_path)
76
  elif task == "Render Formatted OCR":
77
- res = model.chat(tokenizer, img_path, ocr_type='format', render=True, save_render_file='./results/demo.html')
78
  with open('./results/demo.html', 'r') as f:
79
  html_content = f.read()
80
  return res, html_content
@@ -147,7 +145,7 @@ with gr.Blocks() as demo:
147
  )
148
  render_checkbox = gr.Checkbox(
149
  label="Render Result",
150
- visible=False
151
  )
152
  submit_button = gr.Button("Process")
153
 
 
4
  import os
5
  import base64
6
  import spaces
7
+ import io
8
  from PIL import Image
9
  import numpy as np
10
 
 
47
  model = model.eval().cuda()
48
  model.config.pad_token_id = tokenizer.eos_token_id
49
 
50
+ def image_to_base64(image):
51
+ buffered = io.BytesIO()
52
+ image.save(buffered, format="PNG")
53
+ return base64.b64encode(buffered.getvalue()).decode()
 
 
 
 
54
 
55
  @spaces.GPU
56
  def process_image(image, task, ocr_type=None, ocr_box=None, ocr_color=None, render=False):
57
  try:
58
+ if image is None:
59
+ return "No image provided", None
60
+
61
+ # Convert image to base64
62
+ image_base64 = image_to_base64(image)
63
 
64
  if task == "Plain Text OCR":
65
+ res = model.chat(tokenizer, image_base64, ocr_type='ocr')
66
  elif task == "Format Text OCR":
67
+ res = model.chat(tokenizer, image_base64, ocr_type='format')
68
  elif task == "Fine-grained OCR (Box)":
69
+ res = model.chat(tokenizer, image_base64, ocr_type=ocr_type, ocr_box=ocr_box)
70
  elif task == "Fine-grained OCR (Color)":
71
+ res = model.chat(tokenizer, image_base64, ocr_type=ocr_type, ocr_color=ocr_color)
72
  elif task == "Multi-crop OCR":
73
+ res = model.chat_crop(tokenizer, image_file=image_base64)
74
  elif task == "Render Formatted OCR":
75
+ res = model.chat(tokenizer, image_base64, ocr_type='format', render=True, save_render_file='./results/demo.html')
76
  with open('./results/demo.html', 'r') as f:
77
  html_content = f.read()
78
  return res, html_content
 
145
  )
146
  render_checkbox = gr.Checkbox(
147
  label="Render Result",
148
+ visible=False
149
  )
150
  submit_button = gr.Button("Process")
151