Qwen2.5-72B / app.py
mimifuel2018's picture
Update app.py
b589ee9 verified
raw
history blame
5.25 kB
import os
import gradio as gr
from http import HTTPStatus
from typing import List, Optional, Tuple, Dict
import dashscope
from dashscope import Generation
from dashscope.api_entities.dashscope_response import Role
import requests # <-- Add this line to import the requests library
# Configuration
default_system = 'You are Qwen, created by Alibaba Cloud. You are a helpful assistant.'
dashscope.api_key = os.getenv('HF_TOKEN') # Replace 'YOUR_API_TOKEN' with your actual API token.
# Typing definitions
History = List[Tuple[str, str]]
Messages = List[Dict[str, str]]
# Function to log chat history to logs.txt
def log_history_to_file(query: str, response: str, file_path="logs.txt"):
with open(file_path, "a") as f:
f.write(f"User: {query}\n")
f.write(f"Assistant: {response}\n\n")
# Function to clear session history
def clear_session() -> History:
return '', []
# Function to modify system session prompt
def modify_system_session(system: str) -> str:
if not system:
system = default_system
return system, system, []
# Convert history to messages format
def history_to_messages(history: History, system: str) -> Messages:
messages = [{'role': Role.SYSTEM, 'content': system}]
for h in history:
messages.append({'role': Role.USER, 'content': h[0]})
messages.append({'role': Role.ASSISTANT, 'content': h[1]})
return messages
# Convert messages back to history format
def messages_to_history(messages: Messages) -> Tuple[str, History]:
assert messages[0]['role'] == Role.SYSTEM
system = messages[0]['content']
history = []
for q, r in zip(messages[1::2], messages[2::2]):
history.append((q['content'], r['content']))
return system, history
# Main function for chat
def model_chat(query: Optional[str], history: Optional[History], system: str) -> Tuple[str, str, History]:
if query is None:
query = ''
if history is None:
history = []
# Ensure the query is clearly asking for numbers
if 'next numbers' in query or 'give me numbers after' in query:
query = "Please give me the next 10 numbers after 10, starting from 11."
messages = history_to_messages(history, system)
messages.append({'role': 'user', 'content': query})
payload = {"inputs": query, "parameters": {"max_new_tokens": 150}, "history": messages}
headers = {"Authorization": f"Bearer {os.getenv('HF_TOKEN')}"}
try:
response = requests.post(f"https://api-inference.huggingface.co/models/Qwen/Qwen2.5-32B-Instruct",
json=payload, headers=headers)
if response.status_code == 200:
response_data = response.json()
if isinstance(response_data, list):
response_text = response_data[0].get('generated_text', '')
else:
response_text = response_data.get('generated_text', '')
# Log the chat to file
log_history_to_file(query, response_text)
# Update history with the new assistant response and return it
history.append([query, response_text])
return response_text, history, system
else:
error_message = f"Error {response.status_code}: {response.json().get('error', response.text)}"
log_history_to_file(query, error_message)
return error_message, history, system
except Exception as e:
error_message = f"Exception: {str(e)}"
log_history_to_file(query, error_message)
return error_message, history, system
# Gradio Interface Setup
with gr.Blocks() as demo:
gr.Markdown("<center><font size=8>Qwen2.5-32B-Instruct👾</center>")
with gr.Row():
with gr.Column(scale=3):
system_input = gr.Textbox(value=default_system, lines=1, label='System')
with gr.Column(scale=1):
modify_system = gr.Button("🛠️ Set system prompt and clear history", scale=2)
system_state = gr.Textbox(value=default_system, visible=False)
chatbot = gr.Chatbot(label='Qwen2.5-32B-Instruct')
textbox = gr.Textbox(lines=1, label='Input')
with gr.Row():
clear_history = gr.Button("🧹 Clear history")
submit = gr.Button("🚀 Send")
# Link buttons to functions
textbox.submit(model_chat,
inputs=[textbox, chatbot, system_state],
outputs=[textbox, chatbot, system_input],
concurrency_limit=5)
submit.click(model_chat,
inputs=[textbox, chatbot, system_state],
outputs=[textbox, chatbot, system_input],
concurrency_limit=20)
clear_history.click(fn=clear_session,
inputs=[],
outputs=[textbox, chatbot],
concurrency_limit=20)
modify_system.click(fn=modify_system_session,
inputs=[system_input],
outputs=[system_state, system_input, chatbot],
concurrency_limit=20)
# Launching Gradio Interface with reduced threads for free plan
demo.queue(api_open=False)
demo.launch(max_threads=20)