File size: 1,453 Bytes
acca2c1
e869292
 
 
9803328
 
 
 
acca2c1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
356e049
 
acca2c1
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
import gradio as gr
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer

# Load the HelpingAI2.5-2B model
model = AutoModelForCausalLM.from_pretrained("OEvortex/HelpingAI2.5-2B")
tokenizer = AutoTokenizer.from_pretrained("OEvortex/HelpingAI2.5-2B")

# Move model to GPU (if available) or CPU
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

# Define the function for generating responses
def generate_response(user_input):
    # Define the chat input structure
    chat = [
        { "role": "system", "content": "You are HelpingAI, an emotional AI. Always answer my questions in the HelpingAI style." },
        { "role": "user", "content": user_input }
    ]

    chat_input = ""
    for message in chat:
        role = message["role"]
        content = message["content"]
        chat_input += f"{role}: {content}\n"

    # Tokenize the input
    inputs = tokenizer(chat_input, return_tensors="pt").to(device)

    # Generate text
    outputs = model.generate(
        inputs["input_ids"],
        max_new_tokens=256,
        do_sample=True,
        temperature=0.6,
        top_p=0.9,
    )

    response = outputs[0][inputs["input_ids"].shape[-1]:]
    return tokenizer.decode(response, skip_special_tokens=True)

# Create the Gradio interface
iface = gr.Interface(
    fn=generate_response,
    inputs="text",
    outputs="text",
    live=True
)

# Launch the Gradio app
iface.launch()