import gradio as gr
from transformers import AutoModelForCausalLM, AutoTokenizer
import torch
import threading
import discord
import asyncio

# Load the DeepScaleR model and tokenizer
MODEL_NAME = "agentica-org/DeepScaleR-1.5B-Preview"
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
model = AutoModelForCausalLM.from_pretrained(MODEL_NAME, torch_dtype=torch.float16, device_map="auto")

# Define the function to generate responses
def generate_response(prompt):
    inputs = tokenizer(prompt, return_tensors="pt").to("cuda")
    outputs = model.generate(inputs.input_ids, max_length=100, do_sample=True, top_p=0.95, top_k=60)
    response = tokenizer.decode(outputs[0], skip_special_tokens=True)
    return response

# Set up Gradio interface
def gradio_interface(input_text):
    return generate_response(input_text)

with gr.Blocks() as demo:
    gr.Markdown("## DeepScaleR Chatbot")
    chatbot = gr.Chatbot()
    with gr.Row():
        with gr.Column():
            user_input = gr.Textbox(show_label=False, placeholder="Enter your message")
        with gr.Column():
            submit_btn = gr.Button("Send")
    submit_btn.click(gradio_interface, inputs=user_input, outputs=chatbot)

# Discord bot setup
DISCORD_TOKEN = "YOUR_DISCORD_BOT_TOKEN"

intents = discord.Intents.default()
intents.message_content = True
client = discord.Client(intents=intents)

@client.event
async def on_ready():
    print(f'Logged in as {client.user}')

@client.event
async def on_message(message):
    if message.author == client.user:
        return
    response = generate_response(message.content)
    await message.channel.send(response)

# Run the Gradio app and Discord bot concurrently
def start_gradio():
    demo.launch()

def start_discord_bot():
    asyncio.run(client.start(DISCORD_TOKEN))

if __name__ == "__main__":
    threading.Thread(target=start_gradio).start()
    threading.Thread(target=start_discord_bot).start()