Spaces:
Runtime error
Runtime error
import gradio as gr | |
from huggingface_hub import hf_hub_download | |
from llama_cpp import Llama | |
import os | |
import threading | |
import time | |
repo_id = "bartowski/Qwen2.5-1.5B-Instruct-GGUF" | |
filename = "Qwen2.5-1.5B-Instruct-Q8_0.gguf" | |
CONTEXT_SIZE = 1024 | |
N_THREADS = 2 #FreeのCPUは2なので | |
llm = None | |
model_loaded = False | |
def load_model(progress=gr.Progress()): | |
global llm, model_loaded | |
progress(0, desc="モデルのダウンロードを開始") | |
model_path = hf_hub_download(repo_id=repo_id, filename=filename) | |
progress(0.5, desc="モデルをメモリに読み込み中") | |
llm = Llama( | |
model_path=model_path, | |
n_threads=N_THREADS, | |
n_batch=8, | |
verbose=False, | |
n_ctx=CONTEXT_SIZE, | |
) | |
progress(1, desc="モデルの読み込み完了") | |
model_loaded = True | |
return "モデルの読み込みが完了しました。" | |
def get_llama_response(prompt, temperature): | |
global llm, model_loaded | |
if not model_loaded: | |
return [{"choices": [{"text": "モデルを読み込んでいます。しばらくお待ちください..."}]}] | |
try: | |
return llm(prompt, max_tokens=1024, temperature=temperature, top_p=0.95, repeat_penalty=1.1, stream=True) | |
except Exception as e: | |
return [{"choices": [{"text": f"エラーが発生しました: {str(e)}"}]}] | |
def greet(prompt, temperature): | |
global model_loaded | |
if not model_loaded: | |
return "モデルを読み込んでいます。しばらくお待ちください..." | |
full_response = "" | |
for output in get_llama_response(prompt, temperature): | |
if len(output['choices']) > 0: | |
text_chunk = output['choices'][0]['text'] | |
full_response += text_chunk | |
yield full_response | |
return full_response | |
with gr.Blocks() as demo: | |
gr.Markdown(f"# LLMチャットボット(Streaming)") | |
gr.HighlightedText( | |
value=[("", None), | |
("これはLLM", "positive"), | |
("の", None), | |
("テストアプリケーション", "neutral"), ("です。\n", None), | |
("内容は実験的", "neutral"), ("なため", None), | |
("重要な意思決定に用いない", "negative"), | |
("でください。", None) | |
], | |
label="注意", | |
show_label=False, | |
) | |
with gr.Row(): | |
input_text = gr.Textbox(label="プロンプトを入力してください") | |
temperature = gr.Slider(minimum=0.1, maximum=1.0, value=0.7, step=0.1, label="Temperature") | |
output_text = gr.Textbox(label="生成されたレスポンス") | |
submit_button = gr.Button("送信") | |
gr.Textbox(value=filename, label="モデル", interactive=False) | |
loading_status = gr.Textbox(label="Loading Status") | |
submit_button.click(fn=greet, inputs=[input_text, temperature], outputs=output_text) | |
input_text.submit(fn=greet, inputs=[input_text, temperature], outputs=output_text) | |
demo.load(fn=load_model, outputs=loading_status) | |
demo.queue() | |
demo.launch() |