File size: 2,456 Bytes
dc15a3f 374e122 03d2f46 374e122 161b347 03d2f46 161b347 03d2f46 dc15a3f 03d2f46 dc15a3f 03d2f46 2941c6d dc15a3f 469e885 dc15a3f 469e885 dc15a3f 374e122 af0c8f0 1325e72 161b347 1325e72 03d2f46 469e885 161b347 2941c6d 161b347 03d2f46 161b347 2941c6d 161b347 03d2f46 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 |
from spaces import GPU
from threading import Thread
from transformers import Qwen2VLForConditionalGeneration, Qwen2VLProcessor, TextIteratorStreamer, AutoProcessor, BatchFeature
from qwen_vl_utils import process_vision_info
from gradio import ChatInterface, Textbox, Slider
model_path = "Pectics/Softie-VL-7B-250123"
model = Qwen2VLForConditionalGeneration.from_pretrained(
model_path,
torch_dtype="auto",
device_map="auto",
attn_implementation="flash_attention_2",
)
min_pixels = 256 * 28 * 28
max_pixels = 1280 * 28 * 28
processor: Qwen2VLProcessor = AutoProcessor.from_pretrained(model_path, min_pixels=min_pixels, max_pixels=max_pixels)
@GPU
def infer(
inputs: tuple,
max_tokens: int,
temperature: float,
top_p: float,
):
inputs = processor(
text=[inputs[0]],
images=inputs[1],
videos=inputs[2],
padding=True,
return_tensors="pt",
).to("cuda")
streamer = TextIteratorStreamer(processor, skip_prompt=True, skip_special_tokens=True)
kwargs = dict(
**inputs,
streamer=streamer,
max_new_tokens=max_tokens,
temperature=temperature,
top_p=top_p,
)
Thread(target=model.generate, kwargs=kwargs).start()
response = ""
for token in streamer:
response += token
yield response
def respond(
message,
history,
system_message,
max_tokens,
temperature,
top_p,
):
messages = [{"role": "system", "content": system_message}]
for m in history:
messages.append({"role": m["role"], "content": m["content"]})
messages.append({"role": "user", "content": message})
text_inputs = processor.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
image_inputs, video_inputs = process_vision_info(messages)
for response in infer((text_inputs, image_inputs, video_inputs), max_tokens, temperature, top_p):
yield response
app = ChatInterface(
respond,
type="messages",
additional_inputs=[
Textbox(value="You are Softie, a helpful assistant.", label="系统设定"),
Slider(minimum=1, maximum=2048, value=512, step=1, label="最大生成长度"),
Slider(minimum=0.01, maximum=4.0, value=0.75, step=0.01, label="温度系数(Temperature)"),
Slider(minimum=0.01, maximum=1.0, value=0.5, step=0.01, label="核取样系数(Top-p)"),
],
)
if __name__ == "__main__":
app.launch()
|