nightey3s commited on
Commit
ee2109f
·
unverified ·
1 Parent(s): bdef09a

Fix compatability for ZeroGPU

Browse files
Files changed (1) hide show
  1. profanity_detector.py +27 -16
profanity_detector.py CHANGED
@@ -16,13 +16,6 @@ from html import escape
16
  import traceback
17
  import spaces # Required for Hugging Face ZeroGPU compatibility
18
 
19
- # ZeroGPU COMPATIBILITY NOTES:
20
- # The @spaces.GPU decorators throughout this code enable compatibility with Hugging Face ZeroGPU.
21
- # - They request GPU resources only when needed and release them after function completion
22
- # - They have no effect when running in local environments or standard GPU Spaces
23
- # - Custom durations can be specified for functions requiring longer processing times
24
- # - For local development, you'll need: pip install huggingface_hub[spaces]
25
-
26
  # Configure logging
27
  logging.basicConfig(
28
  level=logging.INFO,
@@ -31,6 +24,26 @@ logging.basicConfig(
31
  )
32
  logger = logging.getLogger('profanity_detector')
33
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
34
  # Define device at the top of the script (global scope)
35
  device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
36
  logger.info(f"Using device: {device}")
@@ -66,12 +79,11 @@ def load_models():
66
  # Load model with memory optimization using half-precision
67
  profanity_model = AutoModelForSequenceClassification.from_pretrained(PROFANITY_MODEL)
68
 
69
- # Move to GPU if available and optimize with half-precision where possible
70
- if torch.cuda.is_available():
71
  profanity_model = profanity_model.to(device)
72
- # Convert to half precision to save memory (if possible)
73
  try:
74
- profanity_model = profanity_model.half() # Convert to FP16
75
  logger.info("Successfully converted profanity model to half precision")
76
  except Exception as e:
77
  logger.warning(f"Could not convert to half precision: {str(e)}")
@@ -84,7 +96,7 @@ def load_models():
84
  t5_model = AutoModelForSeq2SeqLM.from_pretrained(T5_MODEL)
85
 
86
  # Move to GPU if available and optimize with half-precision where possible
87
- if torch.cuda.is_available():
88
  t5_model = t5_model.to(device)
89
  # Convert to half precision to save memory (if possible)
90
  try:
@@ -95,7 +107,7 @@ def load_models():
95
 
96
  logger.info("Loading Whisper speech-to-text model...")
97
  whisper_model = whisper.load_model("large")
98
- if torch.cuda.is_available():
99
  whisper_model = whisper_model.to(device)
100
 
101
  logger.info("Loading Text-to-Speech model...")
@@ -106,13 +118,13 @@ def load_models():
106
  vocoder = SpeechT5HifiGan.from_pretrained("microsoft/speecht5_hifigan")
107
 
108
  # Move models to appropriate device
109
- if torch.cuda.is_available():
110
  tts_model = tts_model.to(device)
111
  vocoder = vocoder.to(device)
112
 
113
  # Speaker embeddings for TTS
114
  speaker_embeddings = torch.zeros((1, 512))
115
- if torch.cuda.is_available():
116
  speaker_embeddings = speaker_embeddings.to(device)
117
 
118
  models_loaded = True
@@ -127,7 +139,6 @@ def load_models():
127
  # ZeroGPU decorator: Requests GPU resources when function is called and releases them when completed.
128
  # This enables efficient GPU sharing in Hugging Face Spaces while having no effect in local environments.
129
  @spaces.GPU
130
- @spaces.GPU
131
  def detect_profanity(text: str, threshold: float = 0.5):
132
  """
133
  Detect profanity in text with adjustable threshold
 
16
  import traceback
17
  import spaces # Required for Hugging Face ZeroGPU compatibility
18
 
 
 
 
 
 
 
 
19
  # Configure logging
20
  logging.basicConfig(
21
  level=logging.INFO,
 
24
  )
25
  logger = logging.getLogger('profanity_detector')
26
 
27
+ # ZeroGPU COMPATIBILITY NOTES:
28
+ # The @spaces.GPU decorators throughout this code enable compatibility with Hugging Face ZeroGPU.
29
+ # - They request GPU resources only when needed and release them after function completion
30
+ # - They have no effect when running in local environments or standard GPU Spaces
31
+ # - Custom durations can be specified for functions requiring longer processing times
32
+ # - For local development, you'll need: pip install huggingface_hub[spaces]
33
+
34
+ # Detect if we're running in a ZeroGPU environment
35
+ IS_ZEROGPU = os.environ.get("SPACE_RUNTIME_STATELESS", "0") == "1"
36
+
37
+ # Define device strategy that works in both environments
38
+ if IS_ZEROGPU:
39
+ # In ZeroGPU: initialize on CPU, will use GPU only in @spaces.GPU functions
40
+ device = torch.device("cpu")
41
+ logger.info("ZeroGPU environment detected. Using CPU for initial loading.")
42
+ else:
43
+ # For local runs: use CUDA if available
44
+ device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
45
+ logger.info(f"Local environment. Using device: {device}")
46
+
47
  # Define device at the top of the script (global scope)
48
  device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
49
  logger.info(f"Using device: {device}")
 
79
  # Load model with memory optimization using half-precision
80
  profanity_model = AutoModelForSequenceClassification.from_pretrained(PROFANITY_MODEL)
81
 
82
+ # Only move to device for local runs
83
+ if not IS_ZEROGPU and torch.cuda.is_available():
84
  profanity_model = profanity_model.to(device)
 
85
  try:
86
+ profanity_model = profanity_model.half()
87
  logger.info("Successfully converted profanity model to half precision")
88
  except Exception as e:
89
  logger.warning(f"Could not convert to half precision: {str(e)}")
 
96
  t5_model = AutoModelForSeq2SeqLM.from_pretrained(T5_MODEL)
97
 
98
  # Move to GPU if available and optimize with half-precision where possible
99
+ if not IS_ZEROGPU and torch.cuda.is_available():
100
  t5_model = t5_model.to(device)
101
  # Convert to half precision to save memory (if possible)
102
  try:
 
107
 
108
  logger.info("Loading Whisper speech-to-text model...")
109
  whisper_model = whisper.load_model("large")
110
+ if not IS_ZEROGPU and torch.cuda.is_available():
111
  whisper_model = whisper_model.to(device)
112
 
113
  logger.info("Loading Text-to-Speech model...")
 
118
  vocoder = SpeechT5HifiGan.from_pretrained("microsoft/speecht5_hifigan")
119
 
120
  # Move models to appropriate device
121
+ if not IS_ZEROGPU and torch.cuda.is_available():
122
  tts_model = tts_model.to(device)
123
  vocoder = vocoder.to(device)
124
 
125
  # Speaker embeddings for TTS
126
  speaker_embeddings = torch.zeros((1, 512))
127
+ if not IS_ZEROGPU and torch.cuda.is_available():
128
  speaker_embeddings = speaker_embeddings.to(device)
129
 
130
  models_loaded = True
 
139
  # ZeroGPU decorator: Requests GPU resources when function is called and releases them when completed.
140
  # This enables efficient GPU sharing in Hugging Face Spaces while having no effect in local environments.
141
  @spaces.GPU
 
142
  def detect_profanity(text: str, threshold: float = 0.5):
143
  """
144
  Detect profanity in text with adjustable threshold