Spaces:
Runtime error
Runtime error
File size: 2,336 Bytes
8aa42aa 6c87da3 fd51276 3e6bd64 8aa42aa b3b871d 6c87da3 8aa42aa a7c2026 6c87da3 8aa42aa 3de00ec 8aa42aa 3e6bd64 fab4dca 8aa42aa fab4dca 8aa42aa fab4dca 8aa42aa fab4dca 8aa42aa b5f4a75 23a9064 3e6bd64 fab4dca a7c2026 8aa42aa fab4dca 8aa42aa 871b8a4 8aa42aa |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 |
import os
os.system("pip install git+https://github.com/sanchit-gandhi/whisper-jax.git")
# import whisper
from flask import Flask, jsonify, request
import requests
import time
from transformers import pipeline
from whisper_jax import FlaxWhisperPipline
model = whisper.load_model("small")
# pipe = pipeline(
# "automatic-speech-recognition",
# model="openai/whisper-small.en",
# chunk_length_s=15,
# device=model.device,
# )
pipe = FlaxWhisperPipline("openai/whisper-large-v2")
app = Flask(__name__)
app.config['TIMEOUT'] = 60 * 10 # 10 mins
@app.route("/")
def indexApi():
return jsonify({"output": "okay"})
@app.route("/run", methods=['POST'])
def runApi():
start_time = time.time()
audio_url = request.form.get("audio_url")
response = requests.get(audio_url)
if response.status_code == requests.codes.ok:
with open("audio.mp3", "wb") as f:
f.write(response.content)
else:
return jsonify({
"result": "Unable to save file, status code: {response.status_code}" ,
}), 400
audio = "audio.mp3"
# audioOri = whisper.load_audio(audio)
# audio = whisper.pad_or_trim(audioOri)
# mel = whisper.log_mel_spectrogram(audio).to(model.device)
# _, probs = model.detect_language(mel)
# options = whisper.DecodingOptions(fp16 = False)
# result = whisper.decode(model, mel, options)
# test 2
# ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation")
# sample = ds[0]["audio"]
prediction = pipe(audio)["text"]
end_time = time.time()
total_time = end_time - start_time
return jsonify({
"audio_url": audio_url,
"result": prediction,
"exec_time_sec": total_time
})
if __name__ == "__main__":
app.run(host="0.0.0.0", port=7860)
# def inference(audio):
# audio = whisper.load_audio(audio)
# audio = whisper.pad_or_trim(audio)
# mel = whisper.log_mel_spectrogram(audio).to(model.device)
# _, probs = model.detect_language(mel)
# options = whisper.DecodingOptions(fp16 = False)
# result = whisper.decode(model, mel, options)
# # print(result.text)
# return result.text, gr.update(visible=True), gr.update(visible=True), gr.update(visible=True)
|