File size: 2,342 Bytes
1325e72 aa819ab 1325e72 03d2f46 161b347 03d2f46 161b347 03d2f46 aa819ab 61f2a3d 03d2f46 2941c6d a2fc719 1325e72 a2fc719 1325e72 161b347 1325e72 03d2f46 1325e72 03d2f46 161b347 03d2f46 a2fc719 03d2f46 161b347 2941c6d 161b347 03d2f46 161b347 2941c6d 161b347 03d2f46 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 |
from gradio import ChatInterface, Textbox, Slider
from spaces import GPU
from threading import Thread
from torch import bfloat16
from transformers import Qwen2VLForConditionalGeneration, Qwen2VLProcessor, TextIteratorStreamer, AutoProcessor, BatchFeature
from qwen_vl_utils import process_vision_info
model_path = "Pectics/Softie-VL-7B-250123"
model = Qwen2VLForConditionalGeneration.from_pretrained(
model_path,
torch_dtype=bfloat16,
attn_implementation="flash_attention_2",
device_map="auto",
)
min_pixels = 256 * 28 * 28
max_pixels = 1280 * 28 * 28
processor: Qwen2VLProcessor = AutoProcessor.from_pretrained(model_path, min_pixels=min_pixels, max_pixels=max_pixels)
@GPU
async def infer(inputs: BatchFeature, **kwargs):
inputs = inputs.to("cuda")
model.generate(**inputs, **kwargs)
def respond(
message,
history,
system_message,
max_tokens,
temperature,
top_p,
):
messages = [{"role": "system", "content": system_message}]
for m in history:
messages.append({"role": m["role"], "content": m["content"]})
messages.append({"role": "user", "content": message})
text_inputs = processor.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
image_inputs, video_inputs = process_vision_info(messages)
inputs = processor(
text = [text_inputs],
images = image_inputs,
videos = video_inputs,
padding = True,
return_tensors = "pt",
)
streamer = TextIteratorStreamer(processor, skip_prompt=True, skip_special_tokens=True)
kwargs = dict(
streamer=streamer,
max_new_tokens=max_tokens,
temperature=temperature,
top_p=top_p,
)
infer(inputs, **kwargs)
response = ""
for token in streamer:
response += token
yield response
app = ChatInterface(
respond,
type="messages",
additional_inputs=[
Textbox(value="You are Softie, a helpful assistant.", label="系统设定"),
Slider(minimum=1, maximum=2048, value=512, step=1, label="最大生成长度"),
Slider(minimum=0.01, maximum=4.0, value=0.75, step=0.01, label="温度系数(Temperature)"),
Slider(minimum=0.01, maximum=1.0, value=0.5, step=0.01, label="核取样系数(Top-p)"),
],
)
if __name__ == "__main__":
app.launch()
|