Spaces:

nightey3s
/

profanity-detection

Running on Zero

App Files Files Community

nightey3s commited on Mar 16

Commit

ee2109f

unverified ·

1 Parent(s): bdef09a

Fix compatability for ZeroGPU

Browse files

Files changed (1) hide show

profanity_detector.py +27 -16

profanity_detector.py CHANGED Viewed

@@ -16,13 +16,6 @@ from html import escape
 import traceback
 import spaces # Required for Hugging Face ZeroGPU compatibility
-# ZeroGPU COMPATIBILITY NOTES:
-# The @spaces.GPU decorators throughout this code enable compatibility with Hugging Face ZeroGPU.
-# - They request GPU resources only when needed and release them after function completion
-# - They have no effect when running in local environments or standard GPU Spaces
-# - Custom durations can be specified for functions requiring longer processing times
-# - For local development, you'll need: pip install huggingface_hub[spaces]
 # Configure logging
 logging.basicConfig(
     level=logging.INFO,
@@ -31,6 +24,26 @@ logging.basicConfig(
 )
 logger = logging.getLogger('profanity_detector')
 # Define device at the top of the script (global scope)
 device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
 logger.info(f"Using device: {device}")
@@ -66,12 +79,11 @@ def load_models():
         # Load model with memory optimization using half-precision
         profanity_model = AutoModelForSequenceClassification.from_pretrained(PROFANITY_MODEL)
-        # Move to GPU if available and optimize with half-precision where possible
-        if torch.cuda.is_available():
             profanity_model = profanity_model.to(device)
-            # Convert to half precision to save memory (if possible)
             try:
-                profanity_model = profanity_model.half()  # Convert to FP16
                 logger.info("Successfully converted profanity model to half precision")
             except Exception as e:
                 logger.warning(f"Could not convert to half precision: {str(e)}")
@@ -84,7 +96,7 @@ def load_models():
         t5_model = AutoModelForSeq2SeqLM.from_pretrained(T5_MODEL)
         # Move to GPU if available and optimize with half-precision where possible
-        if torch.cuda.is_available():
             t5_model = t5_model.to(device)
             # Convert to half precision to save memory (if possible)
             try:
@@ -95,7 +107,7 @@ def load_models():
         logger.info("Loading Whisper speech-to-text model...")
         whisper_model = whisper.load_model("large")
-        if torch.cuda.is_available():
             whisper_model = whisper_model.to(device)
         logger.info("Loading Text-to-Speech model...")
@@ -106,13 +118,13 @@ def load_models():
         vocoder = SpeechT5HifiGan.from_pretrained("microsoft/speecht5_hifigan")
         # Move models to appropriate device
-        if torch.cuda.is_available():
             tts_model = tts_model.to(device)
             vocoder = vocoder.to(device)
         # Speaker embeddings for TTS
         speaker_embeddings = torch.zeros((1, 512))
-        if torch.cuda.is_available():
             speaker_embeddings = speaker_embeddings.to(device)
         models_loaded = True
@@ -127,7 +139,6 @@ def load_models():
 # ZeroGPU decorator: Requests GPU resources when function is called and releases them when completed.
 # This enables efficient GPU sharing in Hugging Face Spaces while having no effect in local environments.
 @spaces.GPU
-@spaces.GPU
 def detect_profanity(text: str, threshold: float = 0.5):
     """
     Detect profanity in text with adjustable threshold

 import traceback
 import spaces # Required for Hugging Face ZeroGPU compatibility
 # Configure logging
 logging.basicConfig(
     level=logging.INFO,
 )
 logger = logging.getLogger('profanity_detector')
+# ZeroGPU COMPATIBILITY NOTES:
+# The @spaces.GPU decorators throughout this code enable compatibility with Hugging Face ZeroGPU.
+# - They request GPU resources only when needed and release them after function completion
+# - They have no effect when running in local environments or standard GPU Spaces
+# - Custom durations can be specified for functions requiring longer processing times
+# - For local development, you'll need: pip install huggingface_hub[spaces]
+# Detect if we're running in a ZeroGPU environment
+IS_ZEROGPU = os.environ.get("SPACE_RUNTIME_STATELESS", "0") == "1"
+# Define device strategy that works in both environments
+if IS_ZEROGPU:
+    # In ZeroGPU: initialize on CPU, will use GPU only in @spaces.GPU functions
+    device = torch.device("cpu")
+    logger.info("ZeroGPU environment detected. Using CPU for initial loading.")
+else:
+    # For local runs: use CUDA if available
+    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+    logger.info(f"Local environment. Using device: {device}")
 # Define device at the top of the script (global scope)
 device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
 logger.info(f"Using device: {device}")
         # Load model with memory optimization using half-precision
         profanity_model = AutoModelForSequenceClassification.from_pretrained(PROFANITY_MODEL)
+        # Only move to device for local runs
+        if not IS_ZEROGPU and torch.cuda.is_available():
             profanity_model = profanity_model.to(device)
             try:
+                profanity_model = profanity_model.half()
                 logger.info("Successfully converted profanity model to half precision")
             except Exception as e:
                 logger.warning(f"Could not convert to half precision: {str(e)}")
         t5_model = AutoModelForSeq2SeqLM.from_pretrained(T5_MODEL)
         # Move to GPU if available and optimize with half-precision where possible
+        if not IS_ZEROGPU and torch.cuda.is_available():
             t5_model = t5_model.to(device)
             # Convert to half precision to save memory (if possible)
             try:
         logger.info("Loading Whisper speech-to-text model...")
         whisper_model = whisper.load_model("large")
+        if not IS_ZEROGPU and torch.cuda.is_available():
             whisper_model = whisper_model.to(device)
         logger.info("Loading Text-to-Speech model...")
         vocoder = SpeechT5HifiGan.from_pretrained("microsoft/speecht5_hifigan")
         # Move models to appropriate device
+        if not IS_ZEROGPU and torch.cuda.is_available():
             tts_model = tts_model.to(device)
             vocoder = vocoder.to(device)
         # Speaker embeddings for TTS
         speaker_embeddings = torch.zeros((1, 512))
+        if not IS_ZEROGPU and torch.cuda.is_available():
             speaker_embeddings = speaker_embeddings.to(device)
         models_loaded = True
 # ZeroGPU decorator: Requests GPU resources when function is called and releases them when completed.
 # This enables efficient GPU sharing in Hugging Face Spaces while having no effect in local environments.
 @spaces.GPU
 def detect_profanity(text: str, threshold: float = 0.5):
     """
     Detect profanity in text with adjustable threshold