Spaces:
Running
on
L4
Running
on
L4
Upload utils.py
Browse files
utils.py
ADDED
@@ -0,0 +1,58 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
def process_all_vision_info(messages, examples=None):
|
2 |
+
"""
|
3 |
+
Process vision information from both messages and in-context examples, supporting batch processing.
|
4 |
+
|
5 |
+
Args:
|
6 |
+
messages: List of message dictionaries (single input) OR list of message lists (batch input)
|
7 |
+
examples: Optional list of example dictionaries (single input) OR list of example lists (batch)
|
8 |
+
|
9 |
+
Returns:
|
10 |
+
A flat list of all images in the correct order:
|
11 |
+
- For single input: example images followed by message images
|
12 |
+
- For batch input: interleaved as (item1 examples, item1 input, item2 examples, item2 input, etc.)
|
13 |
+
- Returns None if no images were found
|
14 |
+
"""
|
15 |
+
from qwen_vl_utils import process_vision_info, fetch_image
|
16 |
+
|
17 |
+
# Helper function to extract images from examples
|
18 |
+
def extract_example_images(example_item):
|
19 |
+
if not example_item:
|
20 |
+
return []
|
21 |
+
|
22 |
+
# Handle both list of examples and single example
|
23 |
+
examples_to_process = example_item if isinstance(example_item, list) else [example_item]
|
24 |
+
images = []
|
25 |
+
|
26 |
+
for example in examples_to_process:
|
27 |
+
if isinstance(example.get('input'), dict) and example['input'].get('type') == 'image':
|
28 |
+
images.append(fetch_image(example['input']))
|
29 |
+
|
30 |
+
return images
|
31 |
+
|
32 |
+
# Normalize inputs to always be batched format
|
33 |
+
is_batch = messages and isinstance(messages[0], list)
|
34 |
+
messages_batch = messages if is_batch else [messages]
|
35 |
+
is_batch_examples = examples and isinstance(examples, list) and (isinstance(examples[0], list) or examples[0] is None)
|
36 |
+
examples_batch = examples if is_batch_examples else ([examples] if examples is not None else None)
|
37 |
+
|
38 |
+
# Ensure examples batch matches messages batch if provided
|
39 |
+
if examples and len(examples_batch) != len(messages_batch):
|
40 |
+
if not is_batch and len(examples_batch) == 1:
|
41 |
+
# Single example set for a single input is fine
|
42 |
+
pass
|
43 |
+
else:
|
44 |
+
raise ValueError("Examples batch length must match messages batch length")
|
45 |
+
|
46 |
+
# Process all inputs, maintaining correct order
|
47 |
+
all_images = []
|
48 |
+
for i, message_group in enumerate(messages_batch):
|
49 |
+
# Get example images for this input
|
50 |
+
if examples and i < len(examples_batch):
|
51 |
+
input_example_images = extract_example_images(examples_batch[i])
|
52 |
+
all_images.extend(input_example_images)
|
53 |
+
|
54 |
+
# Get message images for this input
|
55 |
+
input_message_images = process_vision_info(message_group)[0] or []
|
56 |
+
all_images.extend(input_message_images)
|
57 |
+
|
58 |
+
return all_images if all_images else None
|