Spaces:
Sleeping
Sleeping
File size: 1,822 Bytes
3521152 64b5a1f b866e46 3521152 64b5a1f 3521152 64b5a1f 8ba4838 64b5a1f 8ba4838 64b5a1f 8ba4838 64b5a1f b866e46 18fbc58 8ba4838 64b5a1f b866e46 8ba4838 18fbc58 8ba4838 18fbc58 8ba4838 18fbc58 3521152 18fbc58 3521152 8ba4838 05a057d dd825f3 3521152 8ba4838 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 |
import gradio as gr
from huggingface_hub import InferenceClient
from typing import List, Tuple, Dict
client = InferenceClient("AuriLab/gpt-bi-instruct-cesar")
def format_messages(history: List[Tuple[str, str]], system_message: str, user_message: str) -> List[Dict[str, str]]:
messages = [{"role": "system", "content": system_message}]
messages.extend([
{"role": "user" if i % 2 == 0 else "assistant", "content": str(msg)} # Convert msg to string
for turn in history
for i, msg in enumerate(turn)
if msg is not None
])
messages.append({"role": "user", "content": str(user_message)}) # Convert user_message to string
return messages
async def respond(message: str, history: List[Tuple[str, str]]) -> str:
system_message = "You are a helpful AI assistant."
max_tokens = 1000
temperature = 0.7
top_p = 0.85
messages = format_messages(history, system_message, message)
response = ""
try:
async for msg in client.chat_completion(
messages,
max_tokens=max_tokens,
stream=True,
temperature=temperature,
top_p=top_p,
):
if hasattr(msg.choices[0].delta, 'content'):
token = msg.choices[0].delta.content
if token is not None:
response += token
yield response
if not response: # Handle empty response case
yield "No response generated."
except Exception as e:
yield f"Error: {str(e)}"
# Update the ChatInterface to use async function
demo = gr.ChatInterface(
fn=respond,
title="Demo GPT-BI instruct",
examples=["nola duzu izena?", "Nola egiten duzu?"]
)
if __name__ == "__main__":
demo.launch(share=False)
|