Spaces:
Running
on
Zero
Running
on
Zero
Fix compatability for ZeroGPU
Browse files- profanity_detector.py +27 -16
profanity_detector.py
CHANGED
@@ -16,13 +16,6 @@ from html import escape
|
|
16 |
import traceback
|
17 |
import spaces # Required for Hugging Face ZeroGPU compatibility
|
18 |
|
19 |
-
# ZeroGPU COMPATIBILITY NOTES:
|
20 |
-
# The @spaces.GPU decorators throughout this code enable compatibility with Hugging Face ZeroGPU.
|
21 |
-
# - They request GPU resources only when needed and release them after function completion
|
22 |
-
# - They have no effect when running in local environments or standard GPU Spaces
|
23 |
-
# - Custom durations can be specified for functions requiring longer processing times
|
24 |
-
# - For local development, you'll need: pip install huggingface_hub[spaces]
|
25 |
-
|
26 |
# Configure logging
|
27 |
logging.basicConfig(
|
28 |
level=logging.INFO,
|
@@ -31,6 +24,26 @@ logging.basicConfig(
|
|
31 |
)
|
32 |
logger = logging.getLogger('profanity_detector')
|
33 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
34 |
# Define device at the top of the script (global scope)
|
35 |
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
36 |
logger.info(f"Using device: {device}")
|
@@ -66,12 +79,11 @@ def load_models():
|
|
66 |
# Load model with memory optimization using half-precision
|
67 |
profanity_model = AutoModelForSequenceClassification.from_pretrained(PROFANITY_MODEL)
|
68 |
|
69 |
-
#
|
70 |
-
if torch.cuda.is_available():
|
71 |
profanity_model = profanity_model.to(device)
|
72 |
-
# Convert to half precision to save memory (if possible)
|
73 |
try:
|
74 |
-
profanity_model = profanity_model.half()
|
75 |
logger.info("Successfully converted profanity model to half precision")
|
76 |
except Exception as e:
|
77 |
logger.warning(f"Could not convert to half precision: {str(e)}")
|
@@ -84,7 +96,7 @@ def load_models():
|
|
84 |
t5_model = AutoModelForSeq2SeqLM.from_pretrained(T5_MODEL)
|
85 |
|
86 |
# Move to GPU if available and optimize with half-precision where possible
|
87 |
-
if torch.cuda.is_available():
|
88 |
t5_model = t5_model.to(device)
|
89 |
# Convert to half precision to save memory (if possible)
|
90 |
try:
|
@@ -95,7 +107,7 @@ def load_models():
|
|
95 |
|
96 |
logger.info("Loading Whisper speech-to-text model...")
|
97 |
whisper_model = whisper.load_model("large")
|
98 |
-
if torch.cuda.is_available():
|
99 |
whisper_model = whisper_model.to(device)
|
100 |
|
101 |
logger.info("Loading Text-to-Speech model...")
|
@@ -106,13 +118,13 @@ def load_models():
|
|
106 |
vocoder = SpeechT5HifiGan.from_pretrained("microsoft/speecht5_hifigan")
|
107 |
|
108 |
# Move models to appropriate device
|
109 |
-
if torch.cuda.is_available():
|
110 |
tts_model = tts_model.to(device)
|
111 |
vocoder = vocoder.to(device)
|
112 |
|
113 |
# Speaker embeddings for TTS
|
114 |
speaker_embeddings = torch.zeros((1, 512))
|
115 |
-
if torch.cuda.is_available():
|
116 |
speaker_embeddings = speaker_embeddings.to(device)
|
117 |
|
118 |
models_loaded = True
|
@@ -127,7 +139,6 @@ def load_models():
|
|
127 |
# ZeroGPU decorator: Requests GPU resources when function is called and releases them when completed.
|
128 |
# This enables efficient GPU sharing in Hugging Face Spaces while having no effect in local environments.
|
129 |
@spaces.GPU
|
130 |
-
@spaces.GPU
|
131 |
def detect_profanity(text: str, threshold: float = 0.5):
|
132 |
"""
|
133 |
Detect profanity in text with adjustable threshold
|
|
|
16 |
import traceback
|
17 |
import spaces # Required for Hugging Face ZeroGPU compatibility
|
18 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
19 |
# Configure logging
|
20 |
logging.basicConfig(
|
21 |
level=logging.INFO,
|
|
|
24 |
)
|
25 |
logger = logging.getLogger('profanity_detector')
|
26 |
|
27 |
+
# ZeroGPU COMPATIBILITY NOTES:
|
28 |
+
# The @spaces.GPU decorators throughout this code enable compatibility with Hugging Face ZeroGPU.
|
29 |
+
# - They request GPU resources only when needed and release them after function completion
|
30 |
+
# - They have no effect when running in local environments or standard GPU Spaces
|
31 |
+
# - Custom durations can be specified for functions requiring longer processing times
|
32 |
+
# - For local development, you'll need: pip install huggingface_hub[spaces]
|
33 |
+
|
34 |
+
# Detect if we're running in a ZeroGPU environment
|
35 |
+
IS_ZEROGPU = os.environ.get("SPACE_RUNTIME_STATELESS", "0") == "1"
|
36 |
+
|
37 |
+
# Define device strategy that works in both environments
|
38 |
+
if IS_ZEROGPU:
|
39 |
+
# In ZeroGPU: initialize on CPU, will use GPU only in @spaces.GPU functions
|
40 |
+
device = torch.device("cpu")
|
41 |
+
logger.info("ZeroGPU environment detected. Using CPU for initial loading.")
|
42 |
+
else:
|
43 |
+
# For local runs: use CUDA if available
|
44 |
+
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
45 |
+
logger.info(f"Local environment. Using device: {device}")
|
46 |
+
|
47 |
# Define device at the top of the script (global scope)
|
48 |
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
49 |
logger.info(f"Using device: {device}")
|
|
|
79 |
# Load model with memory optimization using half-precision
|
80 |
profanity_model = AutoModelForSequenceClassification.from_pretrained(PROFANITY_MODEL)
|
81 |
|
82 |
+
# Only move to device for local runs
|
83 |
+
if not IS_ZEROGPU and torch.cuda.is_available():
|
84 |
profanity_model = profanity_model.to(device)
|
|
|
85 |
try:
|
86 |
+
profanity_model = profanity_model.half()
|
87 |
logger.info("Successfully converted profanity model to half precision")
|
88 |
except Exception as e:
|
89 |
logger.warning(f"Could not convert to half precision: {str(e)}")
|
|
|
96 |
t5_model = AutoModelForSeq2SeqLM.from_pretrained(T5_MODEL)
|
97 |
|
98 |
# Move to GPU if available and optimize with half-precision where possible
|
99 |
+
if not IS_ZEROGPU and torch.cuda.is_available():
|
100 |
t5_model = t5_model.to(device)
|
101 |
# Convert to half precision to save memory (if possible)
|
102 |
try:
|
|
|
107 |
|
108 |
logger.info("Loading Whisper speech-to-text model...")
|
109 |
whisper_model = whisper.load_model("large")
|
110 |
+
if not IS_ZEROGPU and torch.cuda.is_available():
|
111 |
whisper_model = whisper_model.to(device)
|
112 |
|
113 |
logger.info("Loading Text-to-Speech model...")
|
|
|
118 |
vocoder = SpeechT5HifiGan.from_pretrained("microsoft/speecht5_hifigan")
|
119 |
|
120 |
# Move models to appropriate device
|
121 |
+
if not IS_ZEROGPU and torch.cuda.is_available():
|
122 |
tts_model = tts_model.to(device)
|
123 |
vocoder = vocoder.to(device)
|
124 |
|
125 |
# Speaker embeddings for TTS
|
126 |
speaker_embeddings = torch.zeros((1, 512))
|
127 |
+
if not IS_ZEROGPU and torch.cuda.is_available():
|
128 |
speaker_embeddings = speaker_embeddings.to(device)
|
129 |
|
130 |
models_loaded = True
|
|
|
139 |
# ZeroGPU decorator: Requests GPU resources when function is called and releases them when completed.
|
140 |
# This enables efficient GPU sharing in Hugging Face Spaces while having no effect in local environments.
|
141 |
@spaces.GPU
|
|
|
142 |
def detect_profanity(text: str, threshold: float = 0.5):
|
143 |
"""
|
144 |
Detect profanity in text with adjustable threshold
|