Pijush2023 commited on
Commit
49d906d
·
verified ·
1 Parent(s): b2042b7

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +35 -1
app.py CHANGED
@@ -439,6 +439,33 @@ pipe_asr = pipeline("automatic-speech-recognition", model=model, tokenizer=proce
439
 
440
  base_audio_drive = "/data/audio"
441
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
442
  def transcribe_function(stream, new_chunk):
443
  try:
444
  sr, y = new_chunk[0], new_chunk[1]
@@ -446,6 +473,13 @@ def transcribe_function(stream, new_chunk):
446
  print(f"Error chunk structure: {type(new_chunk)}, content: {new_chunk}")
447
  return stream, "", None
448
 
 
 
 
 
 
 
 
449
  y = y.astype(np.float32) / np.max(np.abs(y))
450
 
451
  if stream is not None:
@@ -455,7 +489,7 @@ def transcribe_function(stream, new_chunk):
455
 
456
  result = pipe_asr({"array": stream, "sampling_rate": sr}, return_timestamps=False)
457
 
458
- full_text = result.get("text","")
459
 
460
  return stream, full_text, result
461
 
 
439
 
440
  base_audio_drive = "/data/audio"
441
 
442
+ #Normal Code with sample rate is 44100 Hz
443
+
444
+ # def transcribe_function(stream, new_chunk):
445
+ # try:
446
+ # sr, y = new_chunk[0], new_chunk[1]
447
+ # except TypeError:
448
+ # print(f"Error chunk structure: {type(new_chunk)}, content: {new_chunk}")
449
+ # return stream, "", None
450
+
451
+ # y = y.astype(np.float32) / np.max(np.abs(y))
452
+
453
+ # if stream is not None:
454
+ # stream = np.concatenate([stream, y])
455
+ # else:
456
+ # stream = y
457
+
458
+ # result = pipe_asr({"array": stream, "sampling_rate": sr}, return_timestamps=False)
459
+
460
+ # full_text = result.get("text","")
461
+
462
+ # return stream, full_text, result
463
+
464
+ # Resampling code with 16000 Hz
465
+
466
+ import numpy as np
467
+ from scipy.signal import resample
468
+
469
  def transcribe_function(stream, new_chunk):
470
  try:
471
  sr, y = new_chunk[0], new_chunk[1]
 
473
  print(f"Error chunk structure: {type(new_chunk)}, content: {new_chunk}")
474
  return stream, "", None
475
 
476
+ # Resample to 16000 Hz
477
+ target_sr = 16000
478
+ if sr != target_sr:
479
+ num_samples = int(len(y) * float(target_sr) / sr)
480
+ y = resample(y, num_samples)
481
+ sr = target_sr
482
+
483
  y = y.astype(np.float32) / np.max(np.abs(y))
484
 
485
  if stream is not None:
 
489
 
490
  result = pipe_asr({"array": stream, "sampling_rate": sr}, return_timestamps=False)
491
 
492
+ full_text = result.get("text", "")
493
 
494
  return stream, full_text, result
495