Spaces:

Hardik5456
/

Wan2.1playground

Runtime error

App Files Files Community

Hardik5456 commited on Mar 17

Commit

cf25aa7

verified ·

1 Parent(s): 87adfb1

Update app.py

Browse files

Files changed (1) hide show

app.py +22 -53

app.py CHANGED Viewed

@@ -2,55 +2,38 @@ import os
 import threading
 import asyncio
 import discord
-import torch
-from transformers import AutoModelForCausalLM, AutoTokenizer
 from dotenv import load_dotenv
-# Load environment variables (from Hugging Face Secrets and .env if available)
 load_dotenv()
 DISCORD_TOKEN = os.getenv("DISCORD_TOKEN")
-HF_TOKEN = os.getenv("HF_TOKEN")  # Optional: only needed if your model repo is private
 if not DISCORD_TOKEN:
     raise ValueError("Discord bot token is missing. Set DISCORD_TOKEN in the environment variables.")
-# Specify the model repository name.
-# For DeepScaleR-1.5B-Preview, we use the official repository:
-MODEL_NAME = "agentica-org/DeepScaleR-1.5B-Preview"
-# Load the tokenizer and model.
-if HF_TOKEN:
-    tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, token=HF_TOKEN)
-    model = AutoModelForCausalLM.from_pretrained(
-        MODEL_NAME, token=HF_TOKEN, torch_dtype=torch.float16, device_map="auto"
-    )
-else:
-    tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
-    model = AutoModelForCausalLM.from_pretrained(
-        MODEL_NAME, torch_dtype=torch.float16, device_map="auto"
-    )
-# Define a function to generate AI responses.
 def generate_response(prompt):
-    device = "cuda" if torch.cuda.is_available() else "cpu"
-    inputs = tokenizer(prompt, return_tensors="pt", truncation=True, max_length=1024).to(device)
-    outputs = model.generate(
-        **inputs,
-        max_new_tokens=200,
-        do_sample=True,
-        top_p=0.9,
-        temperature=0.7
-    )
-    response = tokenizer.decode(outputs[0], skip_special_tokens=True)
-    # Replace any instance of the internal model name with the bot's identity.
     response = response.replace("DeepScaleR", "Shiv Yantra AI")
     return response
-# --------------------------
 # Discord Bot Setup
-# --------------------------
 intents = discord.Intents.default()
-intents.message_content = True  # Required to read message contents
 client = discord.Client(intents=intents)
 @client.event
@@ -59,14 +42,14 @@ async def on_ready():
 @client.event
 async def on_message(message):
-    # Skip messages from the bot itself.
     if message.author == client.user:
         return
     user_input = message.content.strip()
     if user_input:
         try:
-            # Run the synchronous generate_response function in a separate thread.
             ai_response = await asyncio.to_thread(generate_response, user_input)
         except Exception as e:
             print(f"Error during generation: {e}")
@@ -76,25 +59,11 @@ async def on_message(message):
 def run_discord_bot():
     client.run(DISCORD_TOKEN)
-# --------------------------
-# (Optional) Gradio Interface Setup
-# --------------------------
-# If you want a web UI (you can disable this if not needed)
-import gradio as gr
-def gradio_api(input_text):
-    return generate_response(input_text)
-iface = gr.Interface(fn=gradio_api, inputs="text", outputs="text", title="Shiv Yantra AI")
-def run_gradio():
-    iface.launch(server_name="0.0.0.0", server_port=7860, share=False)
-# --------------------------
-# Start Services Concurrently
-# --------------------------
 if __name__ == "__main__":
-    # Optionally, start the Gradio interface in a daemon thread.
-    threading.Thread(target=run_gradio, daemon=True).start()
-    # Start the Discord bot in a separate thread.
     threading.Thread(target=run_discord_bot, daemon=True).start()
     # Keep the main thread alive.

 import threading
 import asyncio
 import discord
 from dotenv import load_dotenv
+from llama_cpp import Llama  # Library for GGUF models
+# Load environment variables (set these via Hugging Face Secrets)
 load_dotenv()
 DISCORD_TOKEN = os.getenv("DISCORD_TOKEN")
 if not DISCORD_TOKEN:
     raise ValueError("Discord bot token is missing. Set DISCORD_TOKEN in the environment variables.")
+# Set the local path to your quantized model file.
+# Ensure that this file (e.g. DeepScaleR-1.5B-Preview-Q8_0.gguf) is uploaded to your repository.
+MODEL_PATH = "./DeepScaleR-1.5B-Preview-Q8_0.gguf"
+# Initialize the model with appropriate settings.
+# Adjust n_threads and other parameters as needed.
+llm = Llama(model_path=MODEL_PATH, n_threads=4)
 def generate_response(prompt):
+    # Generate text using llama-cpp's Llama instance.
+    # Adjust parameters (max_tokens, temperature, top_p) for speed/quality tradeoffs.
+    output = llm(prompt=prompt, max_tokens=200, temperature=0.7, top_p=0.9, echo=False)
+    response = output["text"]
+    # Optionally enforce your bot identity:
     response = response.replace("DeepScaleR", "Shiv Yantra AI")
     return response
+# ----------------------------
 # Discord Bot Setup
+# ----------------------------
 intents = discord.Intents.default()
+intents.message_content = True  # Enable reading message content
 client = discord.Client(intents=intents)
 @client.event
 @client.event
 async def on_message(message):
+    # Ignore messages from the bot itself.
     if message.author == client.user:
         return
     user_input = message.content.strip()
     if user_input:
         try:
+            # Run the generate_response function in a separate thread so as not to block Discord's event loop.
             ai_response = await asyncio.to_thread(generate_response, user_input)
         except Exception as e:
             print(f"Error during generation: {e}")
 def run_discord_bot():
     client.run(DISCORD_TOKEN)
+# ----------------------------
+# Start the Discord Bot
+# ----------------------------
 if __name__ == "__main__":
+    # Start the Discord bot in a separate daemon thread.
     threading.Thread(target=run_discord_bot, daemon=True).start()
     # Keep the main thread alive.