|
from faster_whisper import WhisperModel |
|
from datetime import datetime, timedelta |
|
from tqdm import tqdm |
|
import os, time |
|
from App.Transcription.Schemas import TranscriptionMetadata, TranscriptionResult |
|
|
|
current_time = time.localtime() |
|
|
|
|
|
|
|
def transcribe_file(state, file_path, model_size="tiny"): |
|
print(model_size,'audio-transcription') |
|
result = [] |
|
metadata = TranscriptionMetadata() |
|
start_time = datetime.now() |
|
state.update_state( |
|
state="PROGRESS", |
|
meta=metadata.dict(), |
|
) |
|
model = WhisperModel(model_size, device="cpu", compute_type="int8") |
|
segments, info = model.transcribe( |
|
file_path, beam_size=5, vad_filter=True, word_timestamps=True |
|
) |
|
|
|
total_duration = round(info.duration, 2) |
|
metadata.language = info.language |
|
metadata.duration = int(total_duration) |
|
state.update_state( |
|
state="PROGRESS", |
|
meta=metadata.dict(), |
|
) |
|
|
|
try: |
|
with tqdm(total=total_duration, unit=" seconds") as pbar: |
|
for segment in segments: |
|
for word in segment.words: |
|
segment_duration = word.end - word.start |
|
temp = { |
|
"start": word.start, |
|
"end": word.end, |
|
"text": word.word, |
|
} |
|
result.append(temp) |
|
time_difference = datetime.now() - start_time |
|
if time_difference > timedelta(seconds=30): |
|
print('UPDATED TIME') |
|
start_time = datetime.now() |
|
metadata.percentage = f"{((word.end / total_duration)*100)}" |
|
state.update_state(state="TRANSCRIBING", meta=metadata.dict()) |
|
pbar.update(segment_duration) |
|
except Exception as e: |
|
state.update_state( |
|
state="FAILED", |
|
meta=metadata.dict(), |
|
) |
|
os.remove(file_path) |
|
return f"Falied error {e}" |
|
|
|
|
|
os.remove(file_path) |
|
|
|
transcription_result = TranscriptionResult( |
|
**metadata.dict(), task_id=state.request.id |
|
) |
|
transcription_result.content = result |
|
transcription_result.status = "SUCCESS" |
|
return transcription_result.dict() |
|
|