File size: 3,076 Bytes
7aec585
cb35dc2
1c63c09
09d5523
b5fc9a6
41f1852
4a56061
 
81721b3
ac17fc5
41f1852
7aec585
 
 
 
 
ac17fc5
4a56061
a9684f4
4a56061
cb35dc2
a9684f4
4a56061
09d5523
4a56061
 
09d5523
 
4a56061
 
09d5523
cf25aa7
09d5523
cb35dc2
a9684f4
cb35dc2
09d5523
ac17fc5
 
09d5523
ac17fc5
09d5523
 
 
 
 
cb35dc2
09d5523
906ce9f
09d5523
81721b3
ac17fc5
81721b3
d29ae30
b5fc9a6
 
09d5523
b5fc9a6
 
 
7aec585
ac17fc5
cb35dc2
 
 
ac17fc5
1c63c09
cb35dc2
09d5523
fdb904d
cb35dc2
7aec585
cb35dc2
 
7aec585
09d5523
 
 
cb35dc2
09d5523
cb35dc2
09d5523
ac17fc5
cb35dc2
09d5523
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
import os
import threading
import asyncio
import time
import discord
from dotenv import load_dotenv
from huggingface_hub import hf_hub_download
from llama_cpp import Llama

# Load environment variables (from Hugging Face Secrets and .env if available)
load_dotenv()
DISCORD_TOKEN = os.getenv("DISCORD_TOKEN")

if not DISCORD_TOKEN:
    raise ValueError("Discord bot token is missing. Set DISCORD_TOKEN in the environment variables.")

# Model details: Using the Q6_K_L variant from bartowski’s GGUF collection.
MODEL_REPO = "bartowski/agentica-org_DeepScaleR-1.5B-Preview-GGUF"
MODEL_FILENAME = "agentica-org_DeepScaleR-1.5B-Preview-Q6_K_L.gguf"
MODEL_PATH = f"./{MODEL_FILENAME}"

# Download the model file if it doesn't exist locally.
if not os.path.exists(MODEL_PATH):
    print("Model file not found locally. Downloading now...")
    MODEL_PATH = hf_hub_download(repo_id=MODEL_REPO, filename=MODEL_FILENAME)
    print(f"Model downloaded to {MODEL_PATH}")
else:
    print(f"Model found locally at {MODEL_PATH}")

# Initialize the model using llama-cpp-python.
print("Initializing model...")
llm = Llama(model_path=MODEL_PATH, n_threads=4)
print("Model initialization complete.")

# Define a function to generate responses using the model.
def generate_response(prompt):
    try:
        # Reduce max_tokens to 50 to speed up generation.
        output = llm(prompt=prompt, max_tokens=50, temperature=0.7, top_p=0.9, echo=False)
        response = output["text"]
        # Enforce bot identity: replace internal model name with "Shiv Yantra AI"
        response = response.replace("DeepScaleR", "Shiv Yantra AI")
        return response
    except Exception as e:
        print(f"Error in generate_response: {e}")
        return "Error processing your request."

# ----------------------------
# Discord Bot Setup
# ----------------------------
intents = discord.Intents.default()
intents.message_content = True  # Required to read message content
client = discord.Client(intents=intents)

@client.event
async def on_ready():
    print(f"Discord bot logged in as {client.user}")

@client.event
async def on_message(message):
    if message.author == client.user:
        return  # Skip messages from the bot itself
    user_input = message.content.strip()
    if user_input:
        try:
            # Run generate_response in a separate thread to avoid blocking.
            ai_response = await asyncio.to_thread(generate_response, user_input)
        except Exception as e:
            print(f"Error during generation in on_message: {e}")
            ai_response = "Error processing your request."
        await message.channel.send(ai_response)

def run_discord_bot():
    client.run(DISCORD_TOKEN)

# ----------------------------
# Start Services Concurrently
# ----------------------------
if __name__ == "__main__":
    print("Starting Discord bot...")
    threading.Thread(target=run_discord_bot, daemon=True).start()
    print("Discord bot started. Keeping main thread alive.")
    # Use a sleep loop instead of busy-waiting.
    while True:
        time.sleep(60)