File size: 3,419 Bytes
744d366 73d385f 744d366 6d56da9 744d366 6d56da9 744d366 6d56da9 744d366 6d56da9 744d366 6d56da9 744d366 6d56da9 744d366 6d56da9 744d366 6d56da9 744d366 6d56da9 744d366 6d56da9 744d366 6d56da9 744d366 6d56da9 744d366 6d56da9 744d366 6d56da9 744d366 6d56da9 744d366 6d56da9 744d366 73d385f 744d366 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 |
import subprocess
import os
import subprocess
from PIL import Image
import re
import json
def process_inference_results(results):
"""
Process the inference results by:
1. Adding bounding boxes on the image based on the coordinates in 'text'.
2. Extracting and returning the text prompt.
:param results: List of inference results with bounding boxes in 'text'.
:return: (image, text)
"""
processed_images = []
extracted_texts = []
for result in results:
image_path = result['image_path']
img = Image.open(image_path).convert("RGB")
# this no more than extracts bounding box coordinates from the 'text'
bbox_str = re.search(r'\[\[([0-9,\s]+)\]\]', result['text'])
if bbox_str:
bbox = [int(coord) for coord in bbox_str.group(1).split(',')]
x1, y1, x2, y2 = bbox
# Draw the bounding box on the image (optional if needed later)
# draw = ImageDraw.Draw(img)
# draw.rectangle([x1, y1, x2, y2], outline="red", width=3)
extracted_texts.append(result['text'])
processed_images.append(img)
return processed_images[0], extracted_texts[0]
def inference_and_run(image_path, prompt, conv_mode="ferret_gemma_instruct", model_path="jadechoghari/Ferret-UI-Gemma2b", box=None):
"""
Run the inference and capture the errors for debugging.
"""
data_input = [{
"id": 0,
"image": os.path.basename(image_path),
"image_h": Image.open(image_path).height,
"image_w": Image.open(image_path).width,
"conversations": [{"from": "human", "value": f"<image>\n{prompt}"}]
}]
if box:
data_input[0]["box_x1y1x2y2"] = [[box]]
with open("eval.json", "w") as json_file:
json.dump(data_input, json_file)
print("eval.json file created successfully.")
cmd = [
"python", "-m", "model_UI",
"--model_path", model_path,
"--data_path", "eval.json",
"--image_path", ".",
"--answers_file", "eval_output.jsonl",
"--num_beam", "1",
"--max_new_tokens", "1024",
"--conv_mode", conv_mode
]
if box:
cmd.extend(["--region_format", "box", "--add_region_feature"])
result = subprocess.run(cmd, check=True, capture_output=True, text=True)
print(f"Subprocess output:\n{result.stdout}")
print(f"Subprocess error (if any):\n{result.stderr}")
print(f"Inference completed. Output written to eval_output.jsonl")
output_folder = 'eval_output.jsonl'
if os.path.exists(output_folder):
json_files = [f for f in os.listdir(output_folder) if f.endswith(".jsonl")]
if json_files:
output_file_path = os.path.join(output_folder, json_files[0])
with open(output_file_path, "r") as output_file:
results = [json.loads(line) for line in output_file]
return process_inference_results(results)
else:
print("No output JSONL files found.")
return None, None
else:
print("Output folder not found.")
return None, None
except subprocess.CalledProcessError as e:
print(f"Error occurred during inference:\n{e}")
print(f"Subprocess output:\n{e.output}")
return None, None
|