Softie / app.py
Pectics's picture
Update app.py
dc15a3f verified
raw
history blame
2.49 kB
from threading import Thread
from transformers import Qwen2VLForConditionalGeneration, Qwen2VLProcessor, TextIteratorStreamer, AutoProcessor, BatchFeature
from gradio import ChatInterface, Textbox, Slider
from spaces import GPU
from qwen_vl_utils import process_vision_info
model_path = "Pectics/Softie-VL-7B-250123"
model = Qwen2VLForConditionalGeneration.from_pretrained(
model_path,
torch_dtype="auto",
device_map="auto",
attn_implementation="flash_attention_2",
)
min_pixels = 256 * 28 * 28
max_pixels = 1280 * 28 * 28
processor: Qwen2VLProcessor = AutoProcessor.from_pretrained(model_path, min_pixels=min_pixels, max_pixels=max_pixels)
@GPU
def infer(
inputs: BatchFeature,
max_tokens: int,
temperature: float,
top_p: float,
):
inputs = inputs.to("cuda")
streamer = TextIteratorStreamer(processor, skip_prompt=True, skip_special_tokens=True)
kwargs = dict(
**inputs,
streamer=streamer,
max_new_tokens=max_tokens,
temperature=temperature,
top_p=top_p,
)
thread = Thread(target=model.generate, kwargs=kwargs)
thread.start()
response = ""
for token in streamer:
response += token
yield response
def respond(
message,
history,
system_message,
max_tokens,
temperature,
top_p,
):
messages = [{"role": "system", "content": system_message}]
for m in history:
messages.append({"role": m["role"], "content": m["content"]})
messages.append({"role": "user", "content": message})
text_inputs = processor.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
image_inputs, video_inputs = process_vision_info(messages)
inputs = processor(
text = [text_inputs],
images = image_inputs,
videos = video_inputs,
padding = True,
return_tensors = "pt",
)
for response in infer(inputs, max_tokens, temperature, top_p):
yield response
app = ChatInterface(
respond,
type="messages",
additional_inputs=[
Textbox(value="You are Softie, a helpful assistant.", label="系统设定"),
Slider(minimum=1, maximum=2048, value=512, step=1, label="最大生成长度"),
Slider(minimum=0.01, maximum=4.0, value=0.75, step=0.01, label="温度系数(Temperature)"),
Slider(minimum=0.01, maximum=1.0, value=0.5, step=0.01, label="核取样系数(Top-p)"),
],
)
if __name__ == "__main__":
app.launch()