llama_chat_test / app.py
sawac's picture
Update app.py
9528334 verified
import gradio as gr
from huggingface_hub import hf_hub_download
from llama_cpp import Llama
import os
import threading
import time
repo_id = "bartowski/Qwen2.5-1.5B-Instruct-GGUF"
filename = "Qwen2.5-1.5B-Instruct-Q8_0.gguf"
CONTEXT_SIZE = 1024
N_THREADS = 2 #FreeのCPUは2なので
llm = None
model_loaded = False
def load_model(progress=gr.Progress()):
global llm, model_loaded
progress(0, desc="モデルのダウンロードを開始")
model_path = hf_hub_download(repo_id=repo_id, filename=filename)
progress(0.5, desc="モデルをメモリに読み込み中")
llm = Llama(
model_path=model_path,
n_threads=N_THREADS,
n_batch=8,
verbose=False,
n_ctx=CONTEXT_SIZE,
)
progress(1, desc="モデルの読み込み完了")
model_loaded = True
return "モデルの読み込みが完了しました。"
def get_llama_response(prompt, temperature):
global llm, model_loaded
if not model_loaded:
return [{"choices": [{"text": "モデルを読み込んでいます。しばらくお待ちください..."}]}]
try:
return llm(prompt, max_tokens=1024, temperature=temperature, top_p=0.95, repeat_penalty=1.1, stream=True)
except Exception as e:
return [{"choices": [{"text": f"エラーが発生しました: {str(e)}"}]}]
def greet(prompt, temperature):
global model_loaded
if not model_loaded:
return "モデルを読み込んでいます。しばらくお待ちください..."
full_response = ""
for output in get_llama_response(prompt, temperature):
if len(output['choices']) > 0:
text_chunk = output['choices'][0]['text']
full_response += text_chunk
yield full_response
return full_response
with gr.Blocks() as demo:
gr.Markdown(f"# LLMチャットボット(Streaming)")
gr.HighlightedText(
value=[("", None),
("これはLLM", "positive"),
("の", None),
("テストアプリケーション", "neutral"), ("です。\n", None),
("内容は実験的", "neutral"), ("なため", None),
("重要な意思決定に用いない", "negative"),
("でください。", None)
],
label="注意",
show_label=False,
)
with gr.Row():
input_text = gr.Textbox(label="プロンプトを入力してください")
temperature = gr.Slider(minimum=0.1, maximum=1.0, value=0.7, step=0.1, label="Temperature")
output_text = gr.Textbox(label="生成されたレスポンス")
submit_button = gr.Button("送信")
gr.Textbox(value=filename, label="モデル", interactive=False)
loading_status = gr.Textbox(label="Loading Status")
submit_button.click(fn=greet, inputs=[input_text, temperature], outputs=output_text)
input_text.submit(fn=greet, inputs=[input_text, temperature], outputs=output_text)
demo.load(fn=load_model, outputs=loading_status)
demo.queue()
demo.launch()