Spaces:
Sleeping
Sleeping
import os | |
import tempfile | |
import bentoml | |
from bentoml.io import JSON, File | |
from runners.audio_transcriber import AudioTranscriber | |
from runners.keyword_extractor import KeywordExtractor | |
from runners.transcription_zipper import TranscriptionZipper | |
from runners.audio_amplitude import AudioAmplitude | |
runner_audio_transcriber = bentoml.Runner( | |
AudioTranscriber, | |
name="audio_transcriber", | |
) | |
runner_audio_amplitude = bentoml.Runner( | |
AudioAmplitude, | |
name="audio_amplitude", | |
) | |
runner_keyword_extractor = bentoml.Runner( | |
KeywordExtractor, | |
name="keyword_extractor", | |
) | |
runner_transcription_zipper = bentoml.Runner( | |
TranscriptionZipper, | |
name="transcription_zipper" | |
) | |
svc = bentoml.Service( | |
"speech_to_text_pipeline", | |
runners=[ | |
runner_audio_transcriber, | |
runner_audio_amplitude, | |
runner_keyword_extractor, | |
runner_transcription_zipper, | |
], | |
) | |
async def process_uploaded_file(file): | |
with tempfile.NamedTemporaryFile(delete=False) as temp_file: | |
temp_file.write(file.read()) | |
temp_file_path = temp_file.name | |
transcript, info, segment_info = await runner_audio_transcriber.transcribe_audio_faster.async_run(temp_file_path) | |
# amplitudes = await runner_audio_amplitude.get_audio_amplitude.async_run(temp_file_path) | |
output = {"file_name": file.name, "transcript": transcript, "language": info.language, | |
"file_size": os.stat(temp_file_path).st_size, "segments": segment_info} | |
return output | |
async def zip_transcription(transcription): | |
zip_file = await runner_transcription_zipper.zip_transcription.async_run(transcription) | |
return zip_file |