lkhl commited on
Commit
16a7483
·
verified ·
1 Parent(s): 5629617

Update processing_videollama3.py

Browse files
Files changed (1) hide show
  1. processing_videollama3.py +9 -8
processing_videollama3.py CHANGED
@@ -680,14 +680,15 @@ class Videollama3Qwen2Processor(ProcessorMixin):
680
  kwargs.pop("padding")
681
  kwargs.pop("padding_side")
682
 
683
- image_idx = 0
684
- while DEFAULT_IMAGE_TOKEN in text:
685
- num_tokens = self._get_visual_seq_len(grid_sizes[image_idx])
686
- text = text.replace(DEFAULT_IMAGE_TOKEN, "<placeholder>" * num_tokens, 1)
687
- image_idx += 1
688
- text = text.replace("<placeholder>", DEFAULT_IMAGE_TOKEN)
689
-
690
- assert len(grid_sizes) == image_idx, "Number of images does not match the number of image tokens in the text."
 
691
 
692
  text_inputs = self.tokenizer(text, **kwargs)
693
  return text_inputs
 
680
  kwargs.pop("padding")
681
  kwargs.pop("padding_side")
682
 
683
+ if len(grid_sizes) > 0:
684
+ image_idx = 0
685
+ while DEFAULT_IMAGE_TOKEN in text:
686
+ num_tokens = self._get_visual_seq_len(grid_sizes[image_idx])
687
+ text = text.replace(DEFAULT_IMAGE_TOKEN, "<placeholder>" * num_tokens, 1)
688
+ image_idx += 1
689
+ text = text.replace("<placeholder>", DEFAULT_IMAGE_TOKEN)
690
+
691
+ assert len(grid_sizes) == image_idx, "Number of images does not match the number of image tokens in the text."
692
 
693
  text_inputs = self.tokenizer(text, **kwargs)
694
  return text_inputs