Spaces:
Sleeping
Sleeping
from transformers import AutoTokenizer, AutoModelForCausalLM | |
import gradio as gr | |
from typing import List, Tuple, Dict | |
import torch | |
# Load model and tokenizer | |
model_name = "AuriLab/gpt-bi-instruct-cesar" | |
tokenizer_name = "AuriLab/gpt-bi" | |
tokenizer = AutoTokenizer.from_pretrained(tokenizer_name) | |
model = AutoModelForCausalLM.from_pretrained(model_name) | |
def format_messages(history: List[Tuple[str, str]], system_message: str, user_message: str) -> str: | |
# Format conversation history into a single string | |
formatted_prompt = system_message + "\n\n" | |
for user, assistant in history: | |
if user: | |
formatted_prompt += f"User: {user}\n" | |
if assistant: | |
formatted_prompt += f"Assistant: {assistant}\n" | |
formatted_prompt += f"User: {user_message}\nAssistant:" | |
return formatted_prompt | |
def respond(message: str, history: List[Tuple[str, str]]) -> str: | |
system_message = """You are a helpful assistant. Follow these rules: | |
1. Provide diverse and varied responses | |
2. Avoid repeating the same words or phrases | |
3. Use synonyms and alternative expressions | |
4. Be concise and direct""" | |
prompt = format_messages(history, system_message, message) | |
inputs = tokenizer(prompt, return_tensors="pt", padding=True, truncation=True) | |
# Generate response | |
with torch.no_grad(): | |
outputs = model.generate( | |
inputs["input_ids"], | |
max_new_tokens=200, | |
temperature=0.7, | |
top_p=0.85, | |
do_sample=True, | |
pad_token_id=tokenizer.pad_token_id, | |
eos_token_id=tokenizer.eos_token_id, | |
) | |
response = tokenizer.decode(outputs[0], skip_special_tokens=True) | |
# Extract only the assistant's response | |
response = response.split("Assistant:")[-1].strip() | |
return response | |
# Create the Gradio interface with custom title | |
demo = gr.ChatInterface( | |
fn=respond, | |
title="Demo GPT-BI instruct", | |
) | |
if __name__ == "__main__": | |
demo.launch() | |