Spaces:
Running
Running
import subprocess | |
import sys | |
# Function to install dependencies if missing | |
def install(package): | |
subprocess.check_call([sys.executable, "-m", "pip", "install", package]) | |
# List of required libraries | |
required_packages = ["transformers", "torch", "accelerate", "gradio", "huggingface_hub", "safetensors"] | |
# Install any missing packages | |
for package in required_packages: | |
try: | |
__import__(package) | |
except ImportError: | |
install(package) | |
import gradio as gr | |
import torch | |
from transformers import AutoModelForCausalLM, AutoTokenizer | |
from huggingface_hub import snapshot_download | |
import re | |
# πΉ Set torch num threads to max | |
torch.set_num_threads(torch.get_num_threads()) | |
# πΉ Download & load the model from Hugging Face | |
model_name = "HyperX-Sen/Qwen-2.5-7B-Reasoning" | |
model_path = snapshot_download(repo_id=model_name, repo_type="model") | |
# πΉ Load the model & tokenizer | |
model = AutoModelForCausalLM.from_pretrained(model_path, torch_dtype="auto", device_map="auto") | |
tokenizer = AutoTokenizer.from_pretrained(model_path) | |
# πΉ System prompt | |
SYSTEM_PROMPT = """ | |
Respond in the following format: | |
<reasoning> | |
... | |
</reasoning> | |
<answer> | |
... | |
</answer> | |
""" | |
# πΉ Function to extract reasoning and answer | |
def extract_response(full_response): | |
reasoning_match = re.search(r"<reasoning>(.*?)</reasoning>", full_response, re.DOTALL) | |
answer_match = re.search(r"<answer>(.*?)</answer>", full_response, re.DOTALL) | |
reasoning = reasoning_match.group(1).strip() if reasoning_match else "" | |
answer = answer_match.group(1).strip() if answer_match else "" | |
return f"<reasoning>\n{reasoning}\n</reasoning>\n<answer>\n{answer}\n</answer>" | |
# πΉ Function to generate response | |
def chat_response(user_input, top_p, top_k, temperature, max_length): | |
messages = [ | |
{"role": "system", "content": f"{SYSTEM_PROMPT}"}, | |
{"role": "user", "content": user_input} | |
] | |
input_text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True) | |
inputs = tokenizer(input_text, return_tensors="pt").to(model.device) | |
with torch.no_grad(): | |
output = model.generate( | |
**inputs, | |
max_length=max_length, | |
do_sample=True, | |
top_p=top_p, | |
top_k=top_k, | |
temperature=temperature | |
) | |
full_response = tokenizer.decode(output[0], skip_special_tokens=True) | |
return extract_response(full_response.replace(SYSTEM_PROMPT, "")) | |
# πΉ Gradio UI | |
with gr.Blocks() as demo: | |
gr.Markdown("# π€ Qwen-2.5-7B-Reasoning Chatbot") | |
with gr.Row(): | |
chatbot = gr.Textbox(label="Model Response", lines=8, interactive=False) | |
with gr.Row(): | |
user_input = gr.Textbox(label="Your Prompt", placeholder="Ask me anything...", lines=2) | |
with gr.Accordion("π§ Advanced Settings", open=False): | |
top_p = gr.Slider(0.1, 1.0, value=0.9, label="Top-p") | |
top_k = gr.Slider(1, 100, value=50, label="Top-k") | |
temperature = gr.Slider(0.1, 1.5, value=0.7, label="Temperature") | |
max_length = gr.Slider(128, 1024, value=512, label="Max Length") | |
with gr.Row(): | |
submit_button = gr.Button("Generate Response") | |
submit_button.click(chat_response, inputs=[user_input, top_p, top_k, temperature, max_length], outputs=[chatbot]) | |
# πΉ Launch the Gradio app | |
demo.launch() | |