Tony4 commited on
Commit
945e29c
·
verified ·
1 Parent(s): 200adf9

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +19 -32
app.py CHANGED
@@ -1,57 +1,44 @@
1
  import gradio as gr
2
- import spaces
3
  from transformers import WhisperProcessor, WhisperForConditionalGeneration
4
  import torch
5
  import os
6
  import soundfile as sf
7
  from scipy.signal import resample
8
 
9
- print(f"Is CUDA available: {torch.cuda.is_available()}")
10
- if torch.cuda.is_available():
11
- print(f"Using CUDA device: {torch.cuda.get_device_name(torch.cuda.current_device())}")
12
- else:
13
- print("No GPU detected, defaulting to CPU.")
14
-
15
  # Define the model ID
16
  MODEL_ID = "WMRNORDIC/whisper-swedish-telephonic"
17
 
18
- # Load token from environment variables
19
  HF_API_TOKEN = os.getenv("HF_API_TOKEN")
20
  if not HF_API_TOKEN:
21
- raise ValueError("HF_API_TOKEN is not set. Please set it in the environment variables or Space settings.")
22
-
23
- # GPU Initialization
24
- DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
25
- print(f"Running on device: {DEVICE}")
26
 
27
- # Load model and processor with ZeroGPU integration
28
- @spaces.GPU
29
  def initialize_model():
30
- try:
31
- print("Loading model and processor...")
32
- processor = WhisperProcessor.from_pretrained(MODEL_ID, use_auth_token=HF_API_TOKEN)
33
- model = WhisperForConditionalGeneration.from_pretrained(MODEL_ID, use_auth_token=HF_API_TOKEN)
34
- model = model.to(DEVICE) # Move model to GPU or CPU
35
- print("Model loaded successfully.")
36
- return processor, model
37
- except Exception as e:
38
- print(f"Error loading model or processor: {e}")
39
- raise e
40
-
41
- processor, model = initialize_model()
42
 
43
  # Function to resample audio to 16kHz
44
  def resample_audio(audio_data, original_rate, target_rate=16000):
45
  if original_rate != target_rate:
46
- print(f"Resampling audio from {original_rate}Hz to {target_rate}Hz...")
47
  num_samples = int(len(audio_data) * target_rate / original_rate)
48
  return resample(audio_data, num_samples)
49
  return audio_data
50
 
51
- # Transcription function with GPU allocation
52
- @spaces.GPU
53
  def transcribe_audio(audio):
54
  try:
 
 
 
 
 
 
55
  if isinstance(audio, tuple): # Microphone input
56
  audio_data = audio[1]
57
  sample_rate = audio[0]
@@ -61,8 +48,8 @@ def transcribe_audio(audio):
61
  audio_data = resample_audio(audio_data, sample_rate)
62
 
63
  # Preprocess and perform inference
64
- input_features = processor(audio_data, return_tensors="pt", sampling_rate=16000).input_features
65
- input_features = input_features.to(DEVICE) # Move input to GPU or CPU
66
  with torch.no_grad():
67
  predicted_ids = model.generate(input_features)
68
 
 
1
  import gradio as gr
 
2
  from transformers import WhisperProcessor, WhisperForConditionalGeneration
3
  import torch
4
  import os
5
  import soundfile as sf
6
  from scipy.signal import resample
7
 
 
 
 
 
 
 
8
  # Define the model ID
9
  MODEL_ID = "WMRNORDIC/whisper-swedish-telephonic"
10
 
11
+ # Load the Hugging Face token from the environment
12
  HF_API_TOKEN = os.getenv("HF_API_TOKEN")
13
  if not HF_API_TOKEN:
14
+ raise ValueError("HF_API_TOKEN not found in environment variables. Please set it in the Space settings.")
 
 
 
 
15
 
16
+ # Function to initialize the model and processor lazily
 
17
  def initialize_model():
18
+ # This function will be executed only when Gradio is processing a request
19
+ print("Loading model and processor...")
20
+ processor = WhisperProcessor.from_pretrained(MODEL_ID, token=HF_API_TOKEN)
21
+ model = WhisperForConditionalGeneration.from_pretrained(MODEL_ID, token=HF_API_TOKEN)
22
+ model = model.to("cuda" if torch.cuda.is_available() else "cpu") # Ensure GPU is used if available
23
+ print("Model loaded successfully.")
24
+ return processor, model
 
 
 
 
 
25
 
26
  # Function to resample audio to 16kHz
27
  def resample_audio(audio_data, original_rate, target_rate=16000):
28
  if original_rate != target_rate:
 
29
  num_samples = int(len(audio_data) * target_rate / original_rate)
30
  return resample(audio_data, num_samples)
31
  return audio_data
32
 
33
+ # Transcription function
 
34
  def transcribe_audio(audio):
35
  try:
36
+ # Lazy-load the model and processor inside the request handler
37
+ global processor, model
38
+ if 'processor' not in globals() or 'model' not in globals():
39
+ processor, model = initialize_model()
40
+
41
+ # Handle microphone input or uploaded file
42
  if isinstance(audio, tuple): # Microphone input
43
  audio_data = audio[1]
44
  sample_rate = audio[0]
 
48
  audio_data = resample_audio(audio_data, sample_rate)
49
 
50
  # Preprocess and perform inference
51
+ device = "cuda" if torch.cuda.is_available() else "cpu"
52
+ input_features = processor(audio_data, return_tensors="pt", sampling_rate=16000).input_features.to(device)
53
  with torch.no_grad():
54
  predicted_ids = model.generate(input_features)
55