Spaces:
Sleeping
Sleeping
# SPDX-License-Identifier: Apache-2.0 | |
import os | |
os.system('nvcc --version') | |
os.system('python --version') | |
model_url = 'http://localhost:8000/v1' | |
model = "sijieaaa/CodeModel-V1-3B-2024-02-07" | |
stop_token_ids = '' | |
import subprocess | |
# Setup LLM API | |
# -- llama-factory | |
def start_api(): | |
# 以非阻塞方式启动 API | |
process = subprocess.Popen( | |
["lmf", "api", "--model_name", "sijieaaa/CodeModel-V1-3B-2024-02-07", "--template", "qwen"], | |
stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True | |
) | |
# 监听 API 启动日志,检测关键字 | |
while True: | |
line = process.stdout.readline() | |
if not line: | |
break # 没有新输出,继续等待 | |
print(line.strip()) # 打印 API 启动日志 | |
if "Running on local URL:" in line or "API started" in line or "http://localhost:" in line: | |
print("✅ API 启动成功!") | |
return process # 返回 API 进程 | |
print("❌ API 启动失败!") | |
process.terminate() | |
return None | |
api_process = start_api() | |
# API 启动后执行下一步 | |
if api_process: | |
print("🎯 执行后续操作……") | |
else: | |
raise Exception("API 启动失败!") | |
# Setup OpenAI API client | |
from openai import OpenAI | |
openai_api_key = '0' | |
openai_api_base = model_url | |
client = OpenAI( | |
api_key=openai_api_key, | |
base_url=openai_api_base, | |
) | |
def predict(message, history): | |
# Convert chat history to OpenAI format | |
history_openai_format = [{ | |
"role": "system", | |
"content": "You are a great ai assistant." | |
}] | |
for human, assistant in history: | |
history_openai_format.append({"role": "user", "content": human}) | |
history_openai_format.append({ | |
"role": "assistant", | |
"content": assistant | |
}) | |
history_openai_format.append({"role": "user", "content": message}) | |
# Create a chat completion request and send it to the API server | |
stream = client.chat.completions.create( | |
model=model, # Model name to use | |
messages=history_openai_format, # Chat history | |
temperature=0.95, # Temperature for text generation | |
stream=True, # Stream response | |
top_p=0.7, | |
extra_body={ | |
'repetition_penalty': | |
1, | |
'stop_token_ids': [ | |
int(id.strip()) for id in stop_token_ids.split(',') | |
if id.strip() | |
] if stop_token_ids else [] | |
}) | |
# Read and return generated text from response stream | |
partial_message = "" | |
for chunk in stream: | |
partial_message += (chunk.choices[0].delta.content or "") | |
yield partial_message | |
import gradio as gr | |
# Create and launch a chat interface with Gradio | |
gr.ChatInterface(predict).queue().launch( | |
# server_name=host, | |
# server_port=port, | |
share=True | |
) |