File size: 3,014 Bytes
7aec585
cb35dc2
1c63c09
09d5523
b5fc9a6
41f1852
4a56061
 
81721b3
09d5523
41f1852
7aec585
 
 
 
 
09d5523
4a56061
 
 
cb35dc2
09d5523
4a56061
09d5523
4a56061
 
09d5523
 
4a56061
 
09d5523
 
cf25aa7
09d5523
cb35dc2
4a56061
cb35dc2
09d5523
 
 
 
 
 
 
 
 
 
cb35dc2
09d5523
906ce9f
09d5523
81721b3
09d5523
81721b3
d29ae30
b5fc9a6
 
09d5523
b5fc9a6
 
 
09d5523
7aec585
fdb904d
cb35dc2
 
 
09d5523
1c63c09
cb35dc2
09d5523
fdb904d
cb35dc2
7aec585
cb35dc2
 
7aec585
09d5523
 
 
cb35dc2
09d5523
cb35dc2
09d5523
cb35dc2
09d5523
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
import os
import threading
import asyncio
import time
import discord
from dotenv import load_dotenv
from huggingface_hub import hf_hub_download
from llama_cpp import Llama

# Load environment variables from Hugging Face Secrets (.env file is optional)
load_dotenv()
DISCORD_TOKEN = os.getenv("DISCORD_TOKEN")

if not DISCORD_TOKEN:
    raise ValueError("Discord bot token is missing. Set DISCORD_TOKEN in the environment variables.")

# Model details: using the quantized Q8 version from bartowski's repository.
MODEL_REPO = "bartowski/agentica-org_DeepScaleR-1.5B-Preview-GGUF"
MODEL_FILENAME = "agentica-org_DeepScaleR-1.5B-Preview-Q8_0.gguf"
MODEL_PATH = f"./{MODEL_FILENAME}"

# Download the model file if it does not exist locally.
if not os.path.exists(MODEL_PATH):
    print("Model file not found locally. Downloading now...")
    MODEL_PATH = hf_hub_download(repo_id=MODEL_REPO, filename=MODEL_FILENAME)
    print(f"Model downloaded to {MODEL_PATH}")
else:
    print(f"Model found locally at {MODEL_PATH}")

# Initialize the model using llama-cpp-python.
# Adjust n_threads based on available CPU cores.
print("Initializing model...")
llm = Llama(model_path=MODEL_PATH, n_threads=4)
print("Model initialization complete.")

# Define a function to generate a response.
def generate_response(prompt):
    try:
        # Generate response using the quantized model.
        output = llm(prompt=prompt, max_tokens=200, temperature=0.7, top_p=0.9, echo=False)
        response = output["text"]
        # Enforce your bot identity.
        response = response.replace("DeepScaleR", "Shiv Yantra AI")
        return response
    except Exception as e:
        print(f"Error in generate_response: {e}")
        return "Error processing your request."

# ----------------------------
# Discord Bot Setup
# ----------------------------
intents = discord.Intents.default()
intents.message_content = True  # Ensure we can read message contents
client = discord.Client(intents=intents)

@client.event
async def on_ready():
    print(f"Discord bot logged in as {client.user}")

@client.event
async def on_message(message):
    # Skip bot's own messages.
    if message.author == client.user:
        return
    user_input = message.content.strip()
    if user_input:
        try:
            # Run the generate_response in a separate thread to avoid blocking.
            ai_response = await asyncio.to_thread(generate_response, user_input)
        except Exception as e:
            print(f"Error during generation in on_message: {e}")
            ai_response = "Error processing your request."
        await message.channel.send(ai_response)

def run_discord_bot():
    client.run(DISCORD_TOKEN)

# ----------------------------
# Start Services Concurrently
# ----------------------------
if __name__ == "__main__":
    print("Starting Discord bot...")
    threading.Thread(target=run_discord_bot, daemon=True).start()
    print("Discord bot started. Keeping main thread alive.")
    while True:
        time.sleep(60)