Spaces:
Sleeping
Sleeping
v1
Browse files
app.py
CHANGED
@@ -32,7 +32,7 @@ freeze_model(meteor)
|
|
32 |
# previous length
|
33 |
previous_length = 0
|
34 |
|
35 |
-
def threading_function(inputs, image_token_number, streamer, device):
|
36 |
|
37 |
# Meteor Mamba
|
38 |
mmamba_inputs = mmamba.eval_process(inputs=inputs, tokenizer=tok_meteor, device=device, img_token_number=image_token_number)
|
@@ -50,14 +50,14 @@ def threading_function(inputs, image_token_number, streamer, device):
|
|
50 |
generation_kwargs = meteor_inputs
|
51 |
generation_kwargs.update({'streamer': streamer})
|
52 |
generation_kwargs.update({'do_sample': True})
|
53 |
-
generation_kwargs.update({'max_new_tokens':
|
54 |
-
generation_kwargs.update({'top_p':
|
55 |
-
generation_kwargs.update({'temperature':
|
56 |
generation_kwargs.update({'use_cache': True})
|
57 |
return meteor.generate(**generation_kwargs)
|
58 |
|
59 |
@spaces.GPU
|
60 |
-
def bot_streaming(message, history):
|
61 |
|
62 |
# param
|
63 |
for param in mmamba.parameters():
|
@@ -80,7 +80,13 @@ def bot_streaming(message, history):
|
|
80 |
streamer = TextIteratorStreamer(tok_meteor, skip_special_tokens=True)
|
81 |
|
82 |
# Threading generation
|
83 |
-
thread = Thread(target=threading_function, kwargs=dict(inputs=inputs,
|
|
|
|
|
|
|
|
|
|
|
|
|
84 |
thread.start()
|
85 |
|
86 |
# generated text
|
@@ -98,7 +104,9 @@ def bot_streaming(message, history):
|
|
98 |
time.sleep(0.02)
|
99 |
yield buffer
|
100 |
|
101 |
-
demo = gr.ChatInterface(fn=bot_streaming,
|
|
|
|
|
102 |
description="Meteor is efficient 7B size Large Language and Vision Model built on the help of traversal of rationale",
|
103 |
stop_btn="Stop Generation", multimodal=True)
|
104 |
demo.launch()
|
|
|
32 |
# previous length
|
33 |
previous_length = 0
|
34 |
|
35 |
+
def threading_function(inputs, image_token_number, streamer, device, temperature, new_max_token, top_p):
|
36 |
|
37 |
# Meteor Mamba
|
38 |
mmamba_inputs = mmamba.eval_process(inputs=inputs, tokenizer=tok_meteor, device=device, img_token_number=image_token_number)
|
|
|
50 |
generation_kwargs = meteor_inputs
|
51 |
generation_kwargs.update({'streamer': streamer})
|
52 |
generation_kwargs.update({'do_sample': True})
|
53 |
+
generation_kwargs.update({'max_new_tokens': new_max_token})
|
54 |
+
generation_kwargs.update({'top_p': top_p})
|
55 |
+
generation_kwargs.update({'temperature': temperature})
|
56 |
generation_kwargs.update({'use_cache': True})
|
57 |
return meteor.generate(**generation_kwargs)
|
58 |
|
59 |
@spaces.GPU
|
60 |
+
def bot_streaming(message, history, temperature, new_max_token, top_p):
|
61 |
|
62 |
# param
|
63 |
for param in mmamba.parameters():
|
|
|
80 |
streamer = TextIteratorStreamer(tok_meteor, skip_special_tokens=True)
|
81 |
|
82 |
# Threading generation
|
83 |
+
thread = Thread(target=threading_function, kwargs=dict(inputs=inputs,
|
84 |
+
image_token_number=image_token_number,
|
85 |
+
streamer=streamer,
|
86 |
+
device=accel.device,
|
87 |
+
temperature=temperature,
|
88 |
+
new_max_token=new_max_token,
|
89 |
+
top_p=top_p))
|
90 |
thread.start()
|
91 |
|
92 |
# generated text
|
|
|
104 |
time.sleep(0.02)
|
105 |
yield buffer
|
106 |
|
107 |
+
demo = gr.ChatInterface(fn=bot_streaming,
|
108 |
+
additional_inputs = [gr.Slider(0, 1, 0.9, label="temperature"), gr.Slider(1, 2048, 128, label="new_max_token"), gr.Slider(0, 1, 0.95, label="top_p")],
|
109 |
+
title="☄️ Meteor",
|
110 |
description="Meteor is efficient 7B size Large Language and Vision Model built on the help of traversal of rationale",
|
111 |
stop_btn="Stop Generation", multimodal=True)
|
112 |
demo.launch()
|