jadechoghari
/

Ferret-UI-Gemma2b

Image-Text-to-Text

text-generation

Model card Files Files and versions Community

jadechoghari commited on Oct 18

Commit

f26ac1a

•

1 Parent(s): da21963

Update inference.py

Files changed (1) hide show

inference.py +8 -8

inference.py CHANGED Viewed

@@ -1,9 +1,10 @@
 import subprocess
 import os
 import subprocess
-from PIL import Image
 import re
 import json
 def process_inference_results(results):
     """
@@ -20,23 +21,21 @@ def process_inference_results(results):
     for result in results:
         image_path = result['image_path']
         img = Image.open(image_path).convert("RGB")
-        # this no more than extracts bounding box coordinates from the 'text'
         bbox_str = re.search(r'\[\[([0-9,\s]+)\]\]', result['text'])
         if bbox_str:
             bbox = [int(coord) for coord in bbox_str.group(1).split(',')]
             x1, y1, x2, y2 = bbox
-            # Draw the bounding box on the image (optional if needed later)
-            # draw = ImageDraw.Draw(img)
-            # draw.rectangle([x1, y1, x2, y2], outline="red", width=3)
         extracted_texts.append(result['text'])
         processed_images.append(img)
-    return processed_images[0], extracted_texts[0]
 def inference_and_run(image_path, prompt, conv_mode="ferret_gemma_instruct", model_path="jadechoghari/Ferret-UI-Gemma2b", box=None):
     """
     Run the inference and capture the errors for debugging.
@@ -64,13 +63,14 @@ def inference_and_run(image_path, prompt, conv_mode="ferret_gemma_instruct", mod
         "--image_path", ".",
         "--answers_file", "eval_output.jsonl",
         "--num_beam", "1",
-        "--max_new_tokens", "1024",
         "--conv_mode", conv_mode
     ]
     if box:
         cmd.extend(["--region_format", "box", "--add_region_feature"])
         result = subprocess.run(cmd, check=True, capture_output=True, text=True)
         print(f"Subprocess output:\n{result.stdout}")
         print(f"Subprocess error (if any):\n{result.stderr}")

 import subprocess
 import os
 import subprocess
+from PIL import Image, ImageDraw
 import re
 import json
+import subprocess
 def process_inference_results(results):
     """
     for result in results:
         image_path = result['image_path']
         img = Image.open(image_path).convert("RGB")
+        draw = ImageDraw.Draw(img)
         bbox_str = re.search(r'\[\[([0-9,\s]+)\]\]', result['text'])
         if bbox_str:
             bbox = [int(coord) for coord in bbox_str.group(1).split(',')]
             x1, y1, x2, y2 = bbox
+            draw.rectangle([x1, y1, x2, y2], outline="red", width=3)
         extracted_texts.append(result['text'])
         processed_images.append(img)
+    return processed_images, extracted_texts
 def inference_and_run(image_path, prompt, conv_mode="ferret_gemma_instruct", model_path="jadechoghari/Ferret-UI-Gemma2b", box=None):
     """
     Run the inference and capture the errors for debugging.
         "--image_path", ".",
         "--answers_file", "eval_output.jsonl",
         "--num_beam", "1",
+        "--max_new_tokens", "32",
         "--conv_mode", conv_mode
     ]
     if box:
         cmd.extend(["--region_format", "box", "--add_region_feature"])
+    try:
         result = subprocess.run(cmd, check=True, capture_output=True, text=True)
         print(f"Subprocess output:\n{result.stdout}")
         print(f"Subprocess error (if any):\n{result.stderr}")