Spaces:
Running
Running
File size: 3,014 Bytes
7aec585 cb35dc2 1c63c09 09d5523 b5fc9a6 41f1852 4a56061 81721b3 09d5523 41f1852 7aec585 09d5523 4a56061 cb35dc2 09d5523 4a56061 09d5523 4a56061 09d5523 4a56061 09d5523 cf25aa7 09d5523 cb35dc2 4a56061 cb35dc2 09d5523 cb35dc2 09d5523 906ce9f 09d5523 81721b3 09d5523 81721b3 d29ae30 b5fc9a6 09d5523 b5fc9a6 09d5523 7aec585 fdb904d cb35dc2 09d5523 1c63c09 cb35dc2 09d5523 fdb904d cb35dc2 7aec585 cb35dc2 7aec585 09d5523 cb35dc2 09d5523 cb35dc2 09d5523 cb35dc2 09d5523 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 |
import os
import threading
import asyncio
import time
import discord
from dotenv import load_dotenv
from huggingface_hub import hf_hub_download
from llama_cpp import Llama
# Load environment variables from Hugging Face Secrets (.env file is optional)
load_dotenv()
DISCORD_TOKEN = os.getenv("DISCORD_TOKEN")
if not DISCORD_TOKEN:
raise ValueError("Discord bot token is missing. Set DISCORD_TOKEN in the environment variables.")
# Model details: using the quantized Q8 version from bartowski's repository.
MODEL_REPO = "bartowski/agentica-org_DeepScaleR-1.5B-Preview-GGUF"
MODEL_FILENAME = "agentica-org_DeepScaleR-1.5B-Preview-Q8_0.gguf"
MODEL_PATH = f"./{MODEL_FILENAME}"
# Download the model file if it does not exist locally.
if not os.path.exists(MODEL_PATH):
print("Model file not found locally. Downloading now...")
MODEL_PATH = hf_hub_download(repo_id=MODEL_REPO, filename=MODEL_FILENAME)
print(f"Model downloaded to {MODEL_PATH}")
else:
print(f"Model found locally at {MODEL_PATH}")
# Initialize the model using llama-cpp-python.
# Adjust n_threads based on available CPU cores.
print("Initializing model...")
llm = Llama(model_path=MODEL_PATH, n_threads=4)
print("Model initialization complete.")
# Define a function to generate a response.
def generate_response(prompt):
try:
# Generate response using the quantized model.
output = llm(prompt=prompt, max_tokens=200, temperature=0.7, top_p=0.9, echo=False)
response = output["text"]
# Enforce your bot identity.
response = response.replace("DeepScaleR", "Shiv Yantra AI")
return response
except Exception as e:
print(f"Error in generate_response: {e}")
return "Error processing your request."
# ----------------------------
# Discord Bot Setup
# ----------------------------
intents = discord.Intents.default()
intents.message_content = True # Ensure we can read message contents
client = discord.Client(intents=intents)
@client.event
async def on_ready():
print(f"Discord bot logged in as {client.user}")
@client.event
async def on_message(message):
# Skip bot's own messages.
if message.author == client.user:
return
user_input = message.content.strip()
if user_input:
try:
# Run the generate_response in a separate thread to avoid blocking.
ai_response = await asyncio.to_thread(generate_response, user_input)
except Exception as e:
print(f"Error during generation in on_message: {e}")
ai_response = "Error processing your request."
await message.channel.send(ai_response)
def run_discord_bot():
client.run(DISCORD_TOKEN)
# ----------------------------
# Start Services Concurrently
# ----------------------------
if __name__ == "__main__":
print("Starting Discord bot...")
threading.Thread(target=run_discord_bot, daemon=True).start()
print("Discord bot started. Keeping main thread alive.")
while True:
time.sleep(60) |