Pectics commited on
Commit
af0c8f0
·
verified ·
1 Parent(s): a2fc719

Embeded generator

Browse files
Files changed (1) hide show
  1. app.py +8 -6
app.py CHANGED
@@ -18,9 +18,14 @@ max_pixels = 1280 * 28 * 28
18
  processor: Qwen2VLProcessor = AutoProcessor.from_pretrained(model_path, min_pixels=min_pixels, max_pixels=max_pixels)
19
 
20
  @GPU
21
- async def infer(inputs: BatchFeature, **kwargs):
22
  inputs = inputs.to("cuda")
23
- model.generate(**inputs, **kwargs)
 
 
 
 
 
24
 
25
  def respond(
26
  message,
@@ -50,10 +55,7 @@ def respond(
50
  temperature=temperature,
51
  top_p=top_p,
52
  )
53
- infer(inputs, **kwargs)
54
- response = ""
55
- for token in streamer:
56
- response += token
57
  yield response
58
 
59
  app = ChatInterface(
 
18
  processor: Qwen2VLProcessor = AutoProcessor.from_pretrained(model_path, min_pixels=min_pixels, max_pixels=max_pixels)
19
 
20
  @GPU
21
+ def infer(inputs: BatchFeature, streamer, kwargs: dict):
22
  inputs = inputs.to("cuda")
23
+ thread = Thread(target=model.generate, kwargs={**inputs, **kwargs})
24
+ thread.start()
25
+ response = ""
26
+ for token in streamer:
27
+ response += token
28
+ yield response
29
 
30
  def respond(
31
  message,
 
55
  temperature=temperature,
56
  top_p=top_p,
57
  )
58
+ for response in infer(inputs, streamer, kwargs):
 
 
 
59
  yield response
60
 
61
  app = ChatInterface(