File size: 2,330 Bytes
c022a81
45060c1
c022a81
6b07d33
45060c1
 
c022a81
6b07d33
 
 
 
 
 
 
 
 
 
 
 
 
 
c022a81
 
6b07d33
 
 
 
 
 
c022a81
6b07d33
c022a81
 
6b07d33
 
c022a81
6b07d33
45060c1
 
 
c022a81
 
45060c1
 
 
6b07d33
45060c1
c022a81
45060c1
c022a81
6b07d33
c022a81
 
 
 
 
 
6b07d33
c022a81
 
 
 
6b07d33
c022a81
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
import gradio as gr
from transformers import AutoTokenizer, AutoModelForCausalLM

# Load model and tokenizer
tokenizer = AutoTokenizer.from_pretrained("Leo022/Gemma_QA_For_Telegram_Bot")
model = AutoModelForCausalLM.from_pretrained("Leo022/Gemma_QA_For_Telegram_Bot")

def respond(message, history: list[tuple[str, str]], system_message, max_tokens, temperature, top_p):
    """
    Function to generate response from the model.
    Args:
        message (str): The user's input message.
        history (list): The conversation history.
        system_message (str): The system message.
        max_tokens (int): Maximum number of tokens for output.
        temperature (float): Sampling temperature.
        top_p (float): Nucleus sampling parameter.
    Returns:
        str: The model's response.
    """
    # Initialize messages list with the system message
    messages = [{"role": "system", "content": system_message}]

    # Add conversation history to messages
    for user_msg, assistant_msg in history:
        if user_msg:
            messages.append({"role": "user", "content": user_msg})
        if assistant_msg:
            messages.append({"role": "assistant", "content": assistant_msg})

    # Append the latest user message
    messages.append({"role": "user", "content": message})

    # Encode the concatenation of all message contents
    input_ids = tokenizer.encode(" ".join([msg["content"] for msg in messages]), return_tensors="pt")

    # Generate response
    output = model.generate(
        input_ids,
        max_length=max_tokens,
        temperature=temperature,
        top_p=top_p,
        do_sample=True,
    )

    # Decode the generated tokens to get the response text
    response = tokenizer.decode(output[0], skip_special_tokens=True)

    return response

# Define the Gradio interface
demo = gr.ChatInterface(
    respond,
    additional_inputs=[
        gr.Textbox(value="You are a friendly Chatbot.", label="System message"),
        gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
        gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
        gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p (nucleus sampling)"),
    ],
)

if __name__ == "__main__":
    # Launch the Gradio app
    demo.launch()