o3 / V3.py
Happzy-WHU
load from huggingface
5a54d5d
raw
history blame
1.03 kB
import os
from transformers import AutoTokenizer
from vllm import LLM, SamplingParams
from huggingface_hub import snapshot_download
model_path = "happzy2633/qwen2.5-7b-ins-v3"
tokenizer = AutoTokenizer.from_pretrained(model_path)
sampling_params = SamplingParams(temperature=0.7, top_p=0.8, repetition_penalty=1.05, max_tokens=8192)
llm = LLM(model=model_path)
def api_call_batch(batch_messages):
text_list = [
tokenizer.apply_chat_template(conversation=messages, tokenize=False, add_generation_prompt=True, return_tensors='pt')
for messages in batch_messages
]
outputs = llm.generate(text_list, sampling_params)
result = [output.outputs[0].text for output in outputs]
return result
def api_call(messages):
return api_call_batch([messages])[0]
def call_gpt(history, prompt):
return api_call(history+[{"role":"user", "content":prompt}])
if __name__ == "__main__":
messages = [{"role":"user", "content":"你是谁?"}]
breakpoint()
print(api_call_batch([messages]*4))