from faster_whisper import WhisperModel from datetime import datetime, timedelta from tqdm import tqdm import os, time from App.Transcription.Schemas import TranscriptionMetadata, TranscriptionResult current_time = time.localtime() # model_size = "tiny" def transcribe_file(state, file_path, model_size="tiny"): print(model_size,'audio-transcription') result = [] metadata = TranscriptionMetadata() start_time = datetime.now() state.update_state( state="PROGRESS", meta=metadata.dict(), ) model = WhisperModel(model_size, device="cpu", compute_type="int8") segments, info = model.transcribe( file_path, beam_size=5, vad_filter=True, word_timestamps=True ) total_duration = round(info.duration, 2) metadata.language = info.language metadata.duration = int(total_duration) state.update_state( state="PROGRESS", meta=metadata.dict(), ) try: with tqdm(total=total_duration, unit=" seconds") as pbar: for segment in segments: for word in segment.words: segment_duration = word.end - word.start temp = { "start": word.start, "end": word.end, "text": word.word, } result.append(temp) time_difference = datetime.now() - start_time if time_difference > timedelta(seconds=30): print('UPDATED TIME') start_time = datetime.now() metadata.percentage = f"{((word.end / total_duration)*100)}" state.update_state(state="TRANSCRIBING", meta=metadata.dict()) pbar.update(segment_duration) except Exception as e: state.update_state( state="FAILED", meta=metadata.dict(), ) os.remove(file_path) return f"Falied error {e}" # delete file os.remove(file_path) transcription_result = TranscriptionResult( **metadata.dict(), task_id=state.request.id ) transcription_result.content = result transcription_result.status = "SUCCESS" return transcription_result.dict()