jadechoghari commited on
Commit
f26ac1a
1 Parent(s): da21963

Update inference.py

Browse files
Files changed (1) hide show
  1. inference.py +8 -8
inference.py CHANGED
@@ -1,9 +1,10 @@
1
  import subprocess
2
  import os
3
  import subprocess
4
- from PIL import Image
5
  import re
6
  import json
 
7
 
8
  def process_inference_results(results):
9
  """
@@ -20,23 +21,21 @@ def process_inference_results(results):
20
  for result in results:
21
  image_path = result['image_path']
22
  img = Image.open(image_path).convert("RGB")
 
23
 
24
- # this no more than extracts bounding box coordinates from the 'text'
25
  bbox_str = re.search(r'\[\[([0-9,\s]+)\]\]', result['text'])
26
  if bbox_str:
27
  bbox = [int(coord) for coord in bbox_str.group(1).split(',')]
28
  x1, y1, x2, y2 = bbox
29
 
30
- # Draw the bounding box on the image (optional if needed later)
31
- # draw = ImageDraw.Draw(img)
32
- # draw.rectangle([x1, y1, x2, y2], outline="red", width=3)
33
 
34
  extracted_texts.append(result['text'])
35
 
36
  processed_images.append(img)
37
 
38
- return processed_images[0], extracted_texts[0]
39
-
40
  def inference_and_run(image_path, prompt, conv_mode="ferret_gemma_instruct", model_path="jadechoghari/Ferret-UI-Gemma2b", box=None):
41
  """
42
  Run the inference and capture the errors for debugging.
@@ -64,13 +63,14 @@ def inference_and_run(image_path, prompt, conv_mode="ferret_gemma_instruct", mod
64
  "--image_path", ".",
65
  "--answers_file", "eval_output.jsonl",
66
  "--num_beam", "1",
67
- "--max_new_tokens", "1024",
68
  "--conv_mode", conv_mode
69
  ]
70
 
71
  if box:
72
  cmd.extend(["--region_format", "box", "--add_region_feature"])
73
 
 
74
  result = subprocess.run(cmd, check=True, capture_output=True, text=True)
75
  print(f"Subprocess output:\n{result.stdout}")
76
  print(f"Subprocess error (if any):\n{result.stderr}")
 
1
  import subprocess
2
  import os
3
  import subprocess
4
+ from PIL import Image, ImageDraw
5
  import re
6
  import json
7
+ import subprocess
8
 
9
  def process_inference_results(results):
10
  """
 
21
  for result in results:
22
  image_path = result['image_path']
23
  img = Image.open(image_path).convert("RGB")
24
+ draw = ImageDraw.Draw(img)
25
 
 
26
  bbox_str = re.search(r'\[\[([0-9,\s]+)\]\]', result['text'])
27
  if bbox_str:
28
  bbox = [int(coord) for coord in bbox_str.group(1).split(',')]
29
  x1, y1, x2, y2 = bbox
30
 
31
+ draw.rectangle([x1, y1, x2, y2], outline="red", width=3)
 
 
32
 
33
  extracted_texts.append(result['text'])
34
 
35
  processed_images.append(img)
36
 
37
+ return processed_images, extracted_texts
38
+
39
  def inference_and_run(image_path, prompt, conv_mode="ferret_gemma_instruct", model_path="jadechoghari/Ferret-UI-Gemma2b", box=None):
40
  """
41
  Run the inference and capture the errors for debugging.
 
63
  "--image_path", ".",
64
  "--answers_file", "eval_output.jsonl",
65
  "--num_beam", "1",
66
+ "--max_new_tokens", "32",
67
  "--conv_mode", conv_mode
68
  ]
69
 
70
  if box:
71
  cmd.extend(["--region_format", "box", "--add_region_feature"])
72
 
73
+ try:
74
  result = subprocess.run(cmd, check=True, capture_output=True, text=True)
75
  print(f"Subprocess output:\n{result.stdout}")
76
  print(f"Subprocess error (if any):\n{result.stderr}")