Spaces:
Runtime error
Runtime error
File size: 2,259 Bytes
8aa42aa fd51276 8aa42aa b3b871d a7c2026 8aa42aa a7c2026 e05af3f b5f4a75 e05af3f 8aa42aa 3de00ec 8aa42aa 23a9064 8aa42aa de6b9df 8aa42aa de6b9df 8aa42aa de6b9df 8aa42aa b5f4a75 23a9064 a7c2026 8aa42aa de6b9df 8aa42aa 871b8a4 8aa42aa |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 |
import os
os.system("pip install git+https://github.com/openai/whisper.git")
import whisper
from flask import Flask, jsonify, request
import requests
import time
from transformers import pipeline
from datasets import load_dataset
model = whisper.load_model("small")
pipe = pipeline(
"automatic-speech-recognition",
model="openai/whisper-small.en",
chunk_length_s=15,
device=model.device,
)
app = Flask(__name__)
app.config['TIMEOUT'] = 60 * 10 # 10 mins
@app.route("/")
def indexApi():
return jsonify({"output": "okay"})
@app.route("/run", methods=['POST'])
def runApi():
start_time = time.time()
audio_url = request.form.get("audio_url")
response = requests.get(audio_url)
if response.status_code == requests.codes.ok:
with open("audio.mp3", "wb") as f:
f.write(response.content)
else:
return jsonify({
"result": "Unable to save file, status code: {response.status_code}" ,
}), 400
audio = "audio.mp3"
audioOri = whisper.load_audio(audio)
audio = whisper.pad_or_trim(audioOri)
mel = whisper.log_mel_spectrogram(audio).to(model.device)
_, probs = model.detect_language(mel)
options = whisper.DecodingOptions(fp16 = False)
result = whisper.decode(model, mel, options)
# test 2
# ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation")
# sample = ds[0]["audio"]
prediction = pipe(audioOri)["text"]
print(prediction)
end_time = time.time()
total_time = end_time - start_time
return jsonify({
"audio_url": audio_url,
"result": result.text,
"exec_time_sec": total_time
})
if __name__ == "__main__":
app.run(host="0.0.0.0", port=7860)
# def inference(audio):
# audio = whisper.load_audio(audio)
# audio = whisper.pad_or_trim(audio)
# mel = whisper.log_mel_spectrogram(audio).to(model.device)
# _, probs = model.detect_language(mel)
# options = whisper.DecodingOptions(fp16 = False)
# result = whisper.decode(model, mel, options)
# # print(result.text)
# return result.text, gr.update(visible=True), gr.update(visible=True), gr.update(visible=True)
|