Spaces:

nightey3s
/

profanity-detection

Running on Zero

App Files Files Community

nightey3s commited on Mar 16

Commit

984bc80

unverified ·

1 Parent(s): ee2109f

Fix compatability for ZeroGPU

Browse files

Files changed (1) hide show

profanity_detector.py +60 -34

profanity_detector.py CHANGED Viewed

@@ -76,53 +76,79 @@ def load_models():
         PROFANITY_MODEL = "parsawar/profanity_model_3.1"
         profanity_tokenizer = AutoTokenizer.from_pretrained(PROFANITY_MODEL)
-        # Load model with memory optimization using half-precision
-        profanity_model = AutoModelForSequenceClassification.from_pretrained(PROFANITY_MODEL)
-        # Only move to device for local runs
-        if not IS_ZEROGPU and torch.cuda.is_available():
-            profanity_model = profanity_model.to(device)
-            try:
-                profanity_model = profanity_model.half()
-                logger.info("Successfully converted profanity model to half precision")
-            except Exception as e:
-                logger.warning(f"Could not convert to half precision: {str(e)}")
         logger.info("Loading detoxification model...")
         T5_MODEL = "s-nlp/t5-paranmt-detox"
         t5_tokenizer = AutoTokenizer.from_pretrained(T5_MODEL)
-        # Load model with memory optimization
-        t5_model = AutoModelForSeq2SeqLM.from_pretrained(T5_MODEL)
-        # Move to GPU if available and optimize with half-precision where possible
-        if not IS_ZEROGPU and torch.cuda.is_available():
-            t5_model = t5_model.to(device)
-            # Convert to half precision to save memory (if possible)
-            try:
-                t5_model = t5_model.half()  # Convert to FP16
-                logger.info("Successfully converted T5 model to half precision")
-            except Exception as e:
-                logger.warning(f"Could not convert to half precision: {str(e)}")
         logger.info("Loading Whisper speech-to-text model...")
-        whisper_model = whisper.load_model("large")
-        if not IS_ZEROGPU and torch.cuda.is_available():
-            whisper_model = whisper_model.to(device)
         logger.info("Loading Text-to-Speech model...")
         TTS_MODEL = "microsoft/speecht5_tts"
         tts_processor = SpeechT5Processor.from_pretrained(TTS_MODEL)
-        # Load TTS models without automatic device mapping
-        tts_model = SpeechT5ForTextToSpeech.from_pretrained(TTS_MODEL)
-        vocoder = SpeechT5HifiGan.from_pretrained("microsoft/speecht5_hifigan")
-        # Move models to appropriate device
-        if not IS_ZEROGPU and torch.cuda.is_available():
-            tts_model = tts_model.to(device)
-            vocoder = vocoder.to(device)
-        # Speaker embeddings for TTS
         speaker_embeddings = torch.zeros((1, 512))
         if not IS_ZEROGPU and torch.cuda.is_available():
             speaker_embeddings = speaker_embeddings.to(device)

         PROFANITY_MODEL = "parsawar/profanity_model_3.1"
         profanity_tokenizer = AutoTokenizer.from_pretrained(PROFANITY_MODEL)
+        # Load model without moving to CUDA directly
+        if IS_ZEROGPU:
+            logger.info("ZeroGPU mode: Loading model without CUDA initialization")
+            # For ZeroGPU, use device_map='auto' or just stay on CPU
+            profanity_model = AutoModelForSequenceClassification.from_pretrained(
+                PROFANITY_MODEL,
+                device_map=None,  # Explicitly stay on CPU
+                low_cpu_mem_usage=True
+            )
+        else:
+            # For local runs, normal loading with CUDA if available
+            profanity_model = AutoModelForSequenceClassification.from_pretrained(PROFANITY_MODEL)
+            if torch.cuda.is_available():
+                profanity_model = profanity_model.to(device)
+                try:
+                    profanity_model = profanity_model.half()
+                    logger.info("Successfully converted profanity model to half precision")
+                except Exception as e:
+                    logger.warning(f"Could not convert to half precision: {str(e)}")
+        # Apply similar changes to all other model loading...
         logger.info("Loading detoxification model...")
         T5_MODEL = "s-nlp/t5-paranmt-detox"
         t5_tokenizer = AutoTokenizer.from_pretrained(T5_MODEL)
+        if IS_ZEROGPU:
+            t5_model = AutoModelForSeq2SeqLM.from_pretrained(
+                T5_MODEL,
+                device_map=None,
+                low_cpu_mem_usage=True
+            )
+        else:
+            t5_model = AutoModelForSeq2SeqLM.from_pretrained(T5_MODEL)
+            if torch.cuda.is_available():
+                t5_model = t5_model.to(device)
+                try:
+                    t5_model = t5_model.half()
+                    logger.info("Successfully converted T5 model to half precision")
+                except Exception as e:
+                    logger.warning(f"Could not convert to half precision: {str(e)}")
         logger.info("Loading Whisper speech-to-text model...")
+        if IS_ZEROGPU:
+            # For ZeroGPU, stay on CPU in the main process
+            whisper_model = whisper.load_model("medium", device="cpu")
+        else:
+            whisper_model = whisper.load_model("large")
+            if torch.cuda.is_available():
+                whisper_model = whisper_model.to(device)
         logger.info("Loading Text-to-Speech model...")
         TTS_MODEL = "microsoft/speecht5_tts"
         tts_processor = SpeechT5Processor.from_pretrained(TTS_MODEL)
+        if IS_ZEROGPU:
+            tts_model = SpeechT5ForTextToSpeech.from_pretrained(
+                TTS_MODEL,
+                device_map=None,
+                low_cpu_mem_usage=True
+            )
+            vocoder = SpeechT5HifiGan.from_pretrained(
+                "microsoft/speecht5_hifigan",
+                device_map=None,
+                low_cpu_mem_usage=True
+            )
+        else:
+            tts_model = SpeechT5ForTextToSpeech.from_pretrained(TTS_MODEL)
+            vocoder = SpeechT5HifiGan.from_pretrained("microsoft/speecht5_hifigan")
+            if torch.cuda.is_available():
+                tts_model = tts_model.to(device)
+                vocoder = vocoder.to(device)
+        # Speaker embeddings - always on CPU for ZeroGPU
         speaker_embeddings = torch.zeros((1, 512))
         if not IS_ZEROGPU and torch.cuda.is_available():
             speaker_embeddings = speaker_embeddings.to(device)