asahi417 commited on
Commit
6abc055
·
verified ·
1 Parent(s): 94efc09

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +5 -1
app.py CHANGED
@@ -5,6 +5,7 @@ from typing import Optional
5
  import spaces
6
  import torch
7
  import gradio as gr
 
8
  from transformers import pipeline
9
  from transformers.pipelines.audio_utils import ffmpeg_read
10
 
@@ -60,7 +61,10 @@ def transcribe(inputs: str):
60
  raise gr.Error("No audio file submitted! Please upload or record an audio file before submitting your request.")
61
  with open(inputs, "rb") as f:
62
  inputs = f.read()
63
- prediction = get_prediction({"array": ffmpeg_read(inputs, sampling_rate), "sampling_rate": sampling_rate})
 
 
 
64
  output = ""
65
  for n, s in enumerate(prediction["speakers"]):
66
  text_timestamped = "\n".join([f"- **{format_time(*c['timestamp'])}** {c['text']}" for c in prediction[f"chunks/{s}"]])
 
5
  import spaces
6
  import torch
7
  import gradio as gr
8
+ import numpy as np
9
  from transformers import pipeline
10
  from transformers.pipelines.audio_utils import ffmpeg_read
11
 
 
61
  raise gr.Error("No audio file submitted! Please upload or record an audio file before submitting your request.")
62
  with open(inputs, "rb") as f:
63
  inputs = f.read()
64
+ inputs = ffmpeg_read(inputs, sampling_rate)
65
+ array_pad = np.zeros(int(pipe.feature_extractor.sampling_rate * 0.5))
66
+ inputs = np.concatenate([array_pad, inputs, array_pad])
67
+ prediction = get_prediction({"array": inputs, "sampling_rate": sampling_rate})
68
  output = ""
69
  for n, s in enumerate(prediction["speakers"]):
70
  text_timestamped = "\n".join([f"- **{format_time(*c['timestamp'])}** {c['text']}" for c in prediction[f"chunks/{s}"]])