Pectics commited on
Commit
469e885
·
verified ·
1 Parent(s): 374e122

Re-enable TOKENIZERS_PARALLELISM

Browse files
Files changed (1) hide show
  1. app.py +9 -10
app.py CHANGED
@@ -19,12 +19,18 @@ processor: Qwen2VLProcessor = AutoProcessor.from_pretrained(model_path, min_pixe
19
 
20
  @GPU
21
  def infer(
22
- inputs: BatchFeature,
23
  max_tokens: int,
24
  temperature: float,
25
  top_p: float,
26
  ):
27
- inputs = inputs.to("cuda")
 
 
 
 
 
 
28
  streamer = TextIteratorStreamer(processor, skip_prompt=True, skip_special_tokens=True)
29
  kwargs = dict(
30
  **inputs,
@@ -53,14 +59,7 @@ def respond(
53
  messages.append({"role": "user", "content": message})
54
  text_inputs = processor.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
55
  image_inputs, video_inputs = process_vision_info(messages)
56
- inputs = processor(
57
- text = [text_inputs],
58
- images = image_inputs,
59
- videos = video_inputs,
60
- padding = True,
61
- return_tensors = "pt",
62
- )
63
- for response in infer(inputs, max_tokens, temperature, top_p):
64
  yield response
65
 
66
  app = ChatInterface(
 
19
 
20
  @GPU
21
  def infer(
22
+ inputs: tuple,
23
  max_tokens: int,
24
  temperature: float,
25
  top_p: float,
26
  ):
27
+ inputs = processor(
28
+ text=[inputs[0]],
29
+ images=inputs[1],
30
+ videos=inputs[2],
31
+ padding=True,
32
+ return_tensors="pt",
33
+ ).to("cuda")
34
  streamer = TextIteratorStreamer(processor, skip_prompt=True, skip_special_tokens=True)
35
  kwargs = dict(
36
  **inputs,
 
59
  messages.append({"role": "user", "content": message})
60
  text_inputs = processor.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
61
  image_inputs, video_inputs = process_vision_info(messages)
62
+ for response in infer((text_inputs, image_inputs, video_inputs), max_tokens, temperature, top_p):
 
 
 
 
 
 
 
63
  yield response
64
 
65
  app = ChatInterface(