Hardik5456's picture
Update app.py
1c63c09 verified
raw
history blame
3.5 kB
import os
import threading
import asyncio
import discord
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from dotenv import load_dotenv
# Load environment variables (from Hugging Face Secrets and .env if available)
load_dotenv()
DISCORD_TOKEN = os.getenv("DISCORD_TOKEN")
HF_TOKEN = os.getenv("HF_TOKEN") # Optional: only needed if your model repo is private
if not DISCORD_TOKEN:
raise ValueError("Discord bot token is missing. Set DISCORD_TOKEN in the environment variables.")
# Specify the model repository name.
# For DeepScaleR-1.5B-Preview, we use the official repository:
MODEL_NAME = "agentica-org/DeepScaleR-1.5B-Preview"
# Load the tokenizer and model.
if HF_TOKEN:
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, token=HF_TOKEN)
model = AutoModelForCausalLM.from_pretrained(
MODEL_NAME, token=HF_TOKEN, torch_dtype=torch.float16, device_map="auto"
)
else:
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
model = AutoModelForCausalLM.from_pretrained(
MODEL_NAME, torch_dtype=torch.float16, device_map="auto"
)
# Define a function to generate AI responses.
def generate_response(prompt):
device = "cuda" if torch.cuda.is_available() else "cpu"
inputs = tokenizer(prompt, return_tensors="pt", truncation=True, max_length=1024).to(device)
outputs = model.generate(
**inputs,
max_new_tokens=200,
do_sample=True,
top_p=0.9,
temperature=0.7
)
response = tokenizer.decode(outputs[0], skip_special_tokens=True)
# Replace any instance of the internal model name with the bot's identity.
response = response.replace("DeepScaleR", "Shiv Yantra AI")
return response
# --------------------------
# Discord Bot Setup
# --------------------------
intents = discord.Intents.default()
intents.message_content = True # Required to read message contents
client = discord.Client(intents=intents)
@client.event
async def on_ready():
print(f"Logged in as {client.user}")
@client.event
async def on_message(message):
# Skip messages from the bot itself.
if message.author == client.user:
return
user_input = message.content.strip()
if user_input:
try:
# Run the synchronous generate_response function in a separate thread.
ai_response = await asyncio.to_thread(generate_response, user_input)
except Exception as e:
print(f"Error during generation: {e}")
ai_response = "Error processing your request."
await message.channel.send(ai_response)
def run_discord_bot():
client.run(DISCORD_TOKEN)
# --------------------------
# (Optional) Gradio Interface Setup
# --------------------------
# If you want a web UI (you can disable this if not needed)
import gradio as gr
def gradio_api(input_text):
return generate_response(input_text)
iface = gr.Interface(fn=gradio_api, inputs="text", outputs="text", title="Shiv Yantra AI")
def run_gradio():
iface.launch(server_name="0.0.0.0", server_port=7860, share=False)
# --------------------------
# Start Services Concurrently
# --------------------------
if __name__ == "__main__":
# Optionally, start the Gradio interface in a daemon thread.
threading.Thread(target=run_gradio, daemon=True).start()
# Start the Discord bot in a separate thread.
threading.Thread(target=run_discord_bot, daemon=True).start()
# Keep the main thread alive.
while True:
pass