Spaces:

sijieaaa
/

CodeModel-demo

Sleeping

App Files Files Community

CodeModel-demo / app.py

sijieaaa

Update app.py

0988d38 verified about 2 months ago

raw

history blame

2.84 kB

	# SPDX-License-Identifier: Apache-2.0

	import os
	os.system('nvcc --version')
	os.system('python --version')




	model_url = 'http://localhost:8000/v1'
	model = "sijieaaa/CodeModel-V1-3B-2024-02-07"
	stop_token_ids = ''



	import subprocess

	# Setup LLM API
	# -- llama-factory
	def start_api():
	# 以非阻塞方式启动 API
	process = subprocess.Popen(
	["lmf", "api", "--model_name", "sijieaaa/CodeModel-V1-3B-2024-02-07", "--template", "qwen"],
	stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True
	)
	# 监听 API 启动日志，检测关键字
	while True:
	line = process.stdout.readline()
	if not line:
	break # 没有新输出，继续等待
	print(line.strip()) # 打印 API 启动日志
	if "Running on local URL:" in line or "API started" in line or "http://localhost:" in line:
	print("✅ API 启动成功！")
	return process # 返回 API 进程
	print("❌ API 启动失败！")
	process.terminate()
	return None
	api_process = start_api()
	# API 启动后执行下一步
	if api_process:
	print("🎯 执行后续操作……")
	else:
	raise Exception("API 启动失败！")





	# Setup OpenAI API client
	from openai import OpenAI
	openai_api_key = '0'
	openai_api_base = model_url
	client = OpenAI(
	api_key=openai_api_key,
	base_url=openai_api_base,
	)


	def predict(message, history):
	# Convert chat history to OpenAI format
	history_openai_format = [{
	"role": "system",
	"content": "You are a great ai assistant."
	}]
	for human, assistant in history:
	history_openai_format.append({"role": "user", "content": human})
	history_openai_format.append({
	"role": "assistant",
	"content": assistant
	})
	history_openai_format.append({"role": "user", "content": message})

	# Create a chat completion request and send it to the API server
	stream = client.chat.completions.create(
	model=model, # Model name to use
	messages=history_openai_format, # Chat history
	temperature=0.95, # Temperature for text generation
	stream=True, # Stream response
	top_p=0.7,
	extra_body={
	'repetition_penalty':
	1,
	'stop_token_ids': [
	int(id.strip()) for id in stop_token_ids.split(',')
	if id.strip()
	] if stop_token_ids else []
	})

	# Read and return generated text from response stream
	partial_message = ""
	for chunk in stream:
	partial_message += (chunk.choices[0].delta.content or "")
	yield partial_message



	import gradio as gr
	# Create and launch a chat interface with Gradio
	gr.ChatInterface(predict).queue().launch(
	# server_name=host,
	# server_port=port,
	share=True
	)