tranny / App /Transcription /Utils /audio_transcription.py
Mbonea's picture
updated time
6e7272c
raw
history blame
2.29 kB
from faster_whisper import WhisperModel
from datetime import datetime, timedelta
from tqdm import tqdm
import os, time
from App.Transcription.Schemas import TranscriptionMetadata, TranscriptionResult
current_time = time.localtime()
# model_size = "tiny"
def transcribe_file(state, file_path, model_size="tiny"):
print(model_size,'audio-transcription')
result = []
metadata = TranscriptionMetadata()
start_time = datetime.now()
state.update_state(
state="PROGRESS",
meta=metadata.dict(),
)
model = WhisperModel(model_size, device="cpu", compute_type="int8")
segments, info = model.transcribe(
file_path, beam_size=5, vad_filter=True, word_timestamps=True
)
total_duration = round(info.duration, 2)
metadata.language = info.language
metadata.duration = int(total_duration)
state.update_state(
state="PROGRESS",
meta=metadata.dict(),
)
try:
with tqdm(total=total_duration, unit=" seconds") as pbar:
for segment in segments:
for word in segment.words:
segment_duration = word.end - word.start
temp = {
"start": word.start,
"end": word.end,
"text": word.word,
}
result.append(temp)
time_difference = datetime.now() - start_time
if time_difference > timedelta(seconds=30):
print('UPDATED TIME')
start_time = datetime.now()
metadata.percentage = f"{((word.end / total_duration)*100)}"
state.update_state(state="TRANSCRIBING", meta=metadata.dict())
pbar.update(segment_duration)
except Exception as e:
state.update_state(
state="FAILED",
meta=metadata.dict(),
)
os.remove(file_path)
return f"Falied error {e}"
# delete file
os.remove(file_path)
transcription_result = TranscriptionResult(
**metadata.dict(), task_id=state.request.id
)
transcription_result.content = result
transcription_result.status = "SUCCESS"
return transcription_result.dict()