jadechoghari
commited on
Commit
•
f26ac1a
1
Parent(s):
da21963
Update inference.py
Browse files- inference.py +8 -8
inference.py
CHANGED
@@ -1,9 +1,10 @@
|
|
1 |
import subprocess
|
2 |
import os
|
3 |
import subprocess
|
4 |
-
from PIL import Image
|
5 |
import re
|
6 |
import json
|
|
|
7 |
|
8 |
def process_inference_results(results):
|
9 |
"""
|
@@ -20,23 +21,21 @@ def process_inference_results(results):
|
|
20 |
for result in results:
|
21 |
image_path = result['image_path']
|
22 |
img = Image.open(image_path).convert("RGB")
|
|
|
23 |
|
24 |
-
# this no more than extracts bounding box coordinates from the 'text'
|
25 |
bbox_str = re.search(r'\[\[([0-9,\s]+)\]\]', result['text'])
|
26 |
if bbox_str:
|
27 |
bbox = [int(coord) for coord in bbox_str.group(1).split(',')]
|
28 |
x1, y1, x2, y2 = bbox
|
29 |
|
30 |
-
|
31 |
-
# draw = ImageDraw.Draw(img)
|
32 |
-
# draw.rectangle([x1, y1, x2, y2], outline="red", width=3)
|
33 |
|
34 |
extracted_texts.append(result['text'])
|
35 |
|
36 |
processed_images.append(img)
|
37 |
|
38 |
-
return processed_images
|
39 |
-
|
40 |
def inference_and_run(image_path, prompt, conv_mode="ferret_gemma_instruct", model_path="jadechoghari/Ferret-UI-Gemma2b", box=None):
|
41 |
"""
|
42 |
Run the inference and capture the errors for debugging.
|
@@ -64,13 +63,14 @@ def inference_and_run(image_path, prompt, conv_mode="ferret_gemma_instruct", mod
|
|
64 |
"--image_path", ".",
|
65 |
"--answers_file", "eval_output.jsonl",
|
66 |
"--num_beam", "1",
|
67 |
-
"--max_new_tokens", "
|
68 |
"--conv_mode", conv_mode
|
69 |
]
|
70 |
|
71 |
if box:
|
72 |
cmd.extend(["--region_format", "box", "--add_region_feature"])
|
73 |
|
|
|
74 |
result = subprocess.run(cmd, check=True, capture_output=True, text=True)
|
75 |
print(f"Subprocess output:\n{result.stdout}")
|
76 |
print(f"Subprocess error (if any):\n{result.stderr}")
|
|
|
1 |
import subprocess
|
2 |
import os
|
3 |
import subprocess
|
4 |
+
from PIL import Image, ImageDraw
|
5 |
import re
|
6 |
import json
|
7 |
+
import subprocess
|
8 |
|
9 |
def process_inference_results(results):
|
10 |
"""
|
|
|
21 |
for result in results:
|
22 |
image_path = result['image_path']
|
23 |
img = Image.open(image_path).convert("RGB")
|
24 |
+
draw = ImageDraw.Draw(img)
|
25 |
|
|
|
26 |
bbox_str = re.search(r'\[\[([0-9,\s]+)\]\]', result['text'])
|
27 |
if bbox_str:
|
28 |
bbox = [int(coord) for coord in bbox_str.group(1).split(',')]
|
29 |
x1, y1, x2, y2 = bbox
|
30 |
|
31 |
+
draw.rectangle([x1, y1, x2, y2], outline="red", width=3)
|
|
|
|
|
32 |
|
33 |
extracted_texts.append(result['text'])
|
34 |
|
35 |
processed_images.append(img)
|
36 |
|
37 |
+
return processed_images, extracted_texts
|
38 |
+
|
39 |
def inference_and_run(image_path, prompt, conv_mode="ferret_gemma_instruct", model_path="jadechoghari/Ferret-UI-Gemma2b", box=None):
|
40 |
"""
|
41 |
Run the inference and capture the errors for debugging.
|
|
|
63 |
"--image_path", ".",
|
64 |
"--answers_file", "eval_output.jsonl",
|
65 |
"--num_beam", "1",
|
66 |
+
"--max_new_tokens", "32",
|
67 |
"--conv_mode", conv_mode
|
68 |
]
|
69 |
|
70 |
if box:
|
71 |
cmd.extend(["--region_format", "box", "--add_region_feature"])
|
72 |
|
73 |
+
try:
|
74 |
result = subprocess.run(cmd, check=True, capture_output=True, text=True)
|
75 |
print(f"Subprocess output:\n{result.stdout}")
|
76 |
print(f"Subprocess error (if any):\n{result.stderr}")
|