Pectics commited on
Commit
a2fc719
·
verified ·
1 Parent(s): aa819ab

asnyc trial

Browse files
Files changed (1) hide show
  1. app.py +3 -4
app.py CHANGED
@@ -18,9 +18,9 @@ max_pixels = 1280 * 28 * 28
18
  processor: Qwen2VLProcessor = AutoProcessor.from_pretrained(model_path, min_pixels=min_pixels, max_pixels=max_pixels)
19
 
20
  @GPU
21
- def infer(inputs: BatchFeature, **kwargs):
22
  inputs = inputs.to("cuda")
23
- Thread(target=model.generate, kwargs=kwargs).start()
24
 
25
  def respond(
26
  message,
@@ -45,13 +45,12 @@ def respond(
45
  )
46
  streamer = TextIteratorStreamer(processor, skip_prompt=True, skip_special_tokens=True)
47
  kwargs = dict(
48
- inputs=inputs,
49
  streamer=streamer,
50
  max_new_tokens=max_tokens,
51
  temperature=temperature,
52
  top_p=top_p,
53
  )
54
- infer(**kwargs)
55
  response = ""
56
  for token in streamer:
57
  response += token
 
18
  processor: Qwen2VLProcessor = AutoProcessor.from_pretrained(model_path, min_pixels=min_pixels, max_pixels=max_pixels)
19
 
20
  @GPU
21
+ async def infer(inputs: BatchFeature, **kwargs):
22
  inputs = inputs.to("cuda")
23
+ model.generate(**inputs, **kwargs)
24
 
25
  def respond(
26
  message,
 
45
  )
46
  streamer = TextIteratorStreamer(processor, skip_prompt=True, skip_special_tokens=True)
47
  kwargs = dict(
 
48
  streamer=streamer,
49
  max_new_tokens=max_tokens,
50
  temperature=temperature,
51
  top_p=top_p,
52
  )
53
+ infer(inputs, **kwargs)
54
  response = ""
55
  for token in streamer:
56
  response += token