Hardik5456 commited on
Commit
cf25aa7
·
verified ·
1 Parent(s): 87adfb1

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +22 -53
app.py CHANGED
@@ -2,55 +2,38 @@ import os
2
  import threading
3
  import asyncio
4
  import discord
5
- import torch
6
- from transformers import AutoModelForCausalLM, AutoTokenizer
7
  from dotenv import load_dotenv
 
8
 
9
- # Load environment variables (from Hugging Face Secrets and .env if available)
10
  load_dotenv()
11
  DISCORD_TOKEN = os.getenv("DISCORD_TOKEN")
12
- HF_TOKEN = os.getenv("HF_TOKEN") # Optional: only needed if your model repo is private
13
 
14
  if not DISCORD_TOKEN:
15
  raise ValueError("Discord bot token is missing. Set DISCORD_TOKEN in the environment variables.")
16
 
17
- # Specify the model repository name.
18
- # For DeepScaleR-1.5B-Preview, we use the official repository:
19
- MODEL_NAME = "agentica-org/DeepScaleR-1.5B-Preview"
20
 
21
- # Load the tokenizer and model.
22
- if HF_TOKEN:
23
- tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, token=HF_TOKEN)
24
- model = AutoModelForCausalLM.from_pretrained(
25
- MODEL_NAME, token=HF_TOKEN, torch_dtype=torch.float16, device_map="auto"
26
- )
27
- else:
28
- tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
29
- model = AutoModelForCausalLM.from_pretrained(
30
- MODEL_NAME, torch_dtype=torch.float16, device_map="auto"
31
- )
32
 
33
- # Define a function to generate AI responses.
34
  def generate_response(prompt):
35
- device = "cuda" if torch.cuda.is_available() else "cpu"
36
- inputs = tokenizer(prompt, return_tensors="pt", truncation=True, max_length=1024).to(device)
37
- outputs = model.generate(
38
- **inputs,
39
- max_new_tokens=200,
40
- do_sample=True,
41
- top_p=0.9,
42
- temperature=0.7
43
- )
44
- response = tokenizer.decode(outputs[0], skip_special_tokens=True)
45
- # Replace any instance of the internal model name with the bot's identity.
46
  response = response.replace("DeepScaleR", "Shiv Yantra AI")
47
  return response
48
 
49
- # --------------------------
50
  # Discord Bot Setup
51
- # --------------------------
52
  intents = discord.Intents.default()
53
- intents.message_content = True # Required to read message contents
54
  client = discord.Client(intents=intents)
55
 
56
  @client.event
@@ -59,14 +42,14 @@ async def on_ready():
59
 
60
  @client.event
61
  async def on_message(message):
62
- # Skip messages from the bot itself.
63
  if message.author == client.user:
64
  return
65
 
66
  user_input = message.content.strip()
67
  if user_input:
68
  try:
69
- # Run the synchronous generate_response function in a separate thread.
70
  ai_response = await asyncio.to_thread(generate_response, user_input)
71
  except Exception as e:
72
  print(f"Error during generation: {e}")
@@ -76,25 +59,11 @@ async def on_message(message):
76
  def run_discord_bot():
77
  client.run(DISCORD_TOKEN)
78
 
79
- # --------------------------
80
- # (Optional) Gradio Interface Setup
81
- # --------------------------
82
- # If you want a web UI (you can disable this if not needed)
83
- import gradio as gr
84
- def gradio_api(input_text):
85
- return generate_response(input_text)
86
- iface = gr.Interface(fn=gradio_api, inputs="text", outputs="text", title="Shiv Yantra AI")
87
-
88
- def run_gradio():
89
- iface.launch(server_name="0.0.0.0", server_port=7860, share=False)
90
-
91
- # --------------------------
92
- # Start Services Concurrently
93
- # --------------------------
94
  if __name__ == "__main__":
95
- # Optionally, start the Gradio interface in a daemon thread.
96
- threading.Thread(target=run_gradio, daemon=True).start()
97
- # Start the Discord bot in a separate thread.
98
  threading.Thread(target=run_discord_bot, daemon=True).start()
99
 
100
  # Keep the main thread alive.
 
2
  import threading
3
  import asyncio
4
  import discord
 
 
5
  from dotenv import load_dotenv
6
+ from llama_cpp import Llama # Library for GGUF models
7
 
8
+ # Load environment variables (set these via Hugging Face Secrets)
9
  load_dotenv()
10
  DISCORD_TOKEN = os.getenv("DISCORD_TOKEN")
 
11
 
12
  if not DISCORD_TOKEN:
13
  raise ValueError("Discord bot token is missing. Set DISCORD_TOKEN in the environment variables.")
14
 
15
+ # Set the local path to your quantized model file.
16
+ # Ensure that this file (e.g. DeepScaleR-1.5B-Preview-Q8_0.gguf) is uploaded to your repository.
17
+ MODEL_PATH = "./DeepScaleR-1.5B-Preview-Q8_0.gguf"
18
 
19
+ # Initialize the model with appropriate settings.
20
+ # Adjust n_threads and other parameters as needed.
21
+ llm = Llama(model_path=MODEL_PATH, n_threads=4)
 
 
 
 
 
 
 
 
22
 
 
23
  def generate_response(prompt):
24
+ # Generate text using llama-cpp's Llama instance.
25
+ # Adjust parameters (max_tokens, temperature, top_p) for speed/quality tradeoffs.
26
+ output = llm(prompt=prompt, max_tokens=200, temperature=0.7, top_p=0.9, echo=False)
27
+ response = output["text"]
28
+ # Optionally enforce your bot identity:
 
 
 
 
 
 
29
  response = response.replace("DeepScaleR", "Shiv Yantra AI")
30
  return response
31
 
32
+ # ----------------------------
33
  # Discord Bot Setup
34
+ # ----------------------------
35
  intents = discord.Intents.default()
36
+ intents.message_content = True # Enable reading message content
37
  client = discord.Client(intents=intents)
38
 
39
  @client.event
 
42
 
43
  @client.event
44
  async def on_message(message):
45
+ # Ignore messages from the bot itself.
46
  if message.author == client.user:
47
  return
48
 
49
  user_input = message.content.strip()
50
  if user_input:
51
  try:
52
+ # Run the generate_response function in a separate thread so as not to block Discord's event loop.
53
  ai_response = await asyncio.to_thread(generate_response, user_input)
54
  except Exception as e:
55
  print(f"Error during generation: {e}")
 
59
  def run_discord_bot():
60
  client.run(DISCORD_TOKEN)
61
 
62
+ # ----------------------------
63
+ # Start the Discord Bot
64
+ # ----------------------------
 
 
 
 
 
 
 
 
 
 
 
 
65
  if __name__ == "__main__":
66
+ # Start the Discord bot in a separate daemon thread.
 
 
67
  threading.Thread(target=run_discord_bot, daemon=True).start()
68
 
69
  # Keep the main thread alive.