demavior commited on
Commit
a2978c1
·
verified ·
1 Parent(s): aaf280a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +4 -28
app.py CHANGED
@@ -10,29 +10,6 @@ import numpy as np
10
  device = "cuda:0" if torch.cuda.is_available() else "cpu"
11
 
12
  def transcribe(audio):
13
- # Extract the sample rate and audio data from the tuple
14
- sample_rate, audio_data = audio
15
-
16
- # Ensure the audio data is a numpy array
17
- if not isinstance(audio_data, np.ndarray):
18
- audio_data = np.array(audio_data)
19
-
20
- # Convert to a tensor and ensure it's a floating-point type
21
- audio_tensor = torch.tensor(audio_data, dtype=torch.float32)
22
-
23
- # Convert to mono if the audio is stereo
24
- if audio_tensor.ndim > 1:
25
- audio_tensor = torch.mean(audio_tensor, dim=0, keepdim=True)
26
-
27
- # Resample to 16kHz if necessary
28
- if sample_rate != 16000:
29
- resampler = torchaudio.transforms.Resample(orig_freq=sample_rate, new_freq=16000)
30
- audio_tensor = resampler(audio_tensor)
31
-
32
- # Convert back to a NumPy array and ensure it's in the correct shape
33
- audio_np = audio_tensor.cpu().numpy()
34
- if audio_np.ndim == 2:
35
- audio_np = audio_np
36
 
37
  pipe = pipeline(
38
  "automatic-speech-recognition",
@@ -40,13 +17,12 @@ def transcribe(audio):
40
  chunk_length_s=30,
41
  device=device,
42
  )
43
-
44
- # prediction = pipe(audio_np)["text"]
45
 
46
- ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation")
47
- sample = ds[0]["audio"]
48
 
49
- prediction = pipe(sample.copy(), batch_size=8)["text"]
 
50
  print(prediction)
51
 
52
  return prediction
 
10
  device = "cuda:0" if torch.cuda.is_available() else "cpu"
11
 
12
  def transcribe(audio):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
13
 
14
  pipe = pipeline(
15
  "automatic-speech-recognition",
 
17
  chunk_length_s=30,
18
  device=device,
19
  )
 
 
20
 
21
+ # ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation")
22
+ # sample = ds[0]["audio"]
23
 
24
+ # prediction = pipe(sample.copy(), batch_size=8)["text"]
25
+ prediction = pipe(audio)["text"]
26
  print(prediction)
27
 
28
  return prediction