prithivMLmods commited on
Commit
7f06ad0
·
verified ·
1 Parent(s): eebf65b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +5 -1
app.py CHANGED
@@ -9,6 +9,7 @@ import cv2
9
  from datetime import datetime, timedelta
10
  from threading import Thread
11
 
 
12
  import gradio as gr
13
  import spaces
14
  import numpy as np
@@ -88,9 +89,12 @@ SYSTEM_PROMPT = load_system_prompt(MODEL_ID, "SYSTEM_PROMPT.txt")
88
  # If you prefer a hardcoded system prompt, you can use:
89
  # SYSTEM_PROMPT = "You are a conversational agent that always answers straight to the point, and ends with an ASCII cat."
90
 
 
 
 
91
  # Initialize the Mistral LLM via vllm.
92
  # Note: Running this model on GPU may require very high VRAM.
93
- llm = LLM(model=MODEL_ID, tokenizer_mode="mistral")
94
 
95
  # -----------------------------------------------------------------------------
96
  # Main Generation Function
 
9
  from datetime import datetime, timedelta
10
  from threading import Thread
11
 
12
+ import torch
13
  import gradio as gr
14
  import spaces
15
  import numpy as np
 
89
  # If you prefer a hardcoded system prompt, you can use:
90
  # SYSTEM_PROMPT = "You are a conversational agent that always answers straight to the point, and ends with an ASCII cat."
91
 
92
+ # Set the device explicitly (vLLM requires an explicit device specification)
93
+ device = "cuda" if torch.cuda.is_available() else "cpu"
94
+
95
  # Initialize the Mistral LLM via vllm.
96
  # Note: Running this model on GPU may require very high VRAM.
97
+ llm = LLM(model=MODEL_ID, tokenizer_mode="mistral", device=device)
98
 
99
  # -----------------------------------------------------------------------------
100
  # Main Generation Function