Spaces:
Sleeping
Sleeping
Manjot Singh
commited on
Commit
·
15002e4
1
Parent(s):
102c75b
reduced spaces duration
Browse files- app.py +0 -1
- audio_processing.py +5 -5
- requirements.txt +0 -2
app.py
CHANGED
@@ -10,7 +10,6 @@ if torch.cuda.is_available():
|
|
10 |
else:
|
11 |
print("No CUDA GPUs available. Running on CPU.")
|
12 |
|
13 |
-
# @spaces.GPU(duration=180)
|
14 |
def transcribe_audio(audio_file, translate, model_size):
|
15 |
language_segments, final_segments = process_audio(audio_file, translate=translate, model_size=model_size)
|
16 |
|
|
|
10 |
else:
|
11 |
print("No CUDA GPUs available. Running on CPU.")
|
12 |
|
|
|
13 |
def transcribe_audio(audio_file, translate, model_size):
|
14 |
language_segments, final_segments = process_audio(audio_file, translate=translate, model_size=model_size)
|
15 |
|
audio_processing.py
CHANGED
@@ -16,11 +16,11 @@ OVERLAP=0
|
|
16 |
import whisperx
|
17 |
import torch
|
18 |
import numpy as np
|
19 |
-
logging.basicConfig(level=logging.INFO)
|
20 |
-
logger = logging.getLogger(__name__)
|
21 |
-
import spaces
|
22 |
|
23 |
|
|
|
|
|
|
|
24 |
|
25 |
|
26 |
def preprocess_audio(audio, chunk_size=CHUNK_LENGTH*16000, overlap=OVERLAP*16000): # 2 seconds overlap
|
@@ -32,7 +32,7 @@ def preprocess_audio(audio, chunk_size=CHUNK_LENGTH*16000, overlap=OVERLAP*16000
|
|
32 |
chunks.append(chunk)
|
33 |
return chunks
|
34 |
|
35 |
-
@spaces.GPU(
|
36 |
def process_audio(audio_file, translate=False, model_size="small"):
|
37 |
start_time = time.time()
|
38 |
|
@@ -113,7 +113,7 @@ def process_audio(audio_file, translate=False, model_size="small"):
|
|
113 |
logger.error(f"An error occurred during audio processing: {str(e)}")
|
114 |
raise
|
115 |
|
116 |
-
def merge_nearby_segments(segments, time_threshold=0.5, similarity_threshold=0.
|
117 |
merged = []
|
118 |
for segment in segments:
|
119 |
if not merged or segment['start'] - merged[-1]['end'] > time_threshold:
|
|
|
16 |
import whisperx
|
17 |
import torch
|
18 |
import numpy as np
|
|
|
|
|
|
|
19 |
|
20 |
|
21 |
+
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
|
22 |
+
logger = logging.getLogger(__name__)
|
23 |
+
import spaces
|
24 |
|
25 |
|
26 |
def preprocess_audio(audio, chunk_size=CHUNK_LENGTH*16000, overlap=OVERLAP*16000): # 2 seconds overlap
|
|
|
32 |
chunks.append(chunk)
|
33 |
return chunks
|
34 |
|
35 |
+
@spaces.GPU()
|
36 |
def process_audio(audio_file, translate=False, model_size="small"):
|
37 |
start_time = time.time()
|
38 |
|
|
|
113 |
logger.error(f"An error occurred during audio processing: {str(e)}")
|
114 |
raise
|
115 |
|
116 |
+
def merge_nearby_segments(segments, time_threshold=0.5, similarity_threshold=0.9):
|
117 |
merged = []
|
118 |
for segment in segments:
|
119 |
if not merged or segment['start'] - merged[-1]['end'] > time_threshold:
|
requirements.txt
CHANGED
@@ -19,5 +19,3 @@ nvidia-cublas-cu11
|
|
19 |
torchvision
|
20 |
cdifflib
|
21 |
pydub
|
22 |
-
|
23 |
-
|
|
|
19 |
torchvision
|
20 |
cdifflib
|
21 |
pydub
|
|
|
|