tranny / App /Transcription /Utils /audio_transcription.py
Mbonea's picture
I forgot to refresh the start time
1875ed6
raw
history blame
2.11 kB
from faster_whisper import WhisperModel
from datetime import datetime, timedelta
from tqdm import tqdm
import os, time
from App.Transcription.Schemas import TranscriptionMetadata, TranscriptionResult
current_time = time.localtime()
model_size = "tiny"
def transcribe_file(state, file_path, model_size="tiny"):
result = []
metadata = TranscriptionMetadata()
start_time = datetime.now()
state.update_state(
state="PROGRESS",
meta=metadata.dict(),
)
model = WhisperModel(model_size, device="cpu", compute_type="int8")
segments, info = model.transcribe(
file_path, beam_size=5, vad_filter=True, word_timestamps=True
)
total_duration = round(info.duration, 2)
metadata.language = info.language
metadata.duration = int(total_duration)
state.update_state(
state="PROGRESS",
meta=metadata.dict(),
)
try:
with tqdm(total=total_duration, unit=" seconds") as pbar:
for segment in segments:
for word in segment.words:
segment_duration = word.end - word.start
temp = {
"start": word.start,
"end": word.end,
"text": word.word,
}
result.append(temp)
time_difference = datetime.now() - start_time
if time_difference >= timedelta(seconds=30):
start_time = datetime.now()
metadata.percentage = f"{((word.end / total_duration)*100)}"
state.update_state(state="PROGRESS", meta=metadata.dict())
pbar.update(segment_duration)
except Exception as e:
state.update_state(
state="FAILED",
meta=metadata.dict(),
)
os.remove(file_path)
return f"Falied error {e}"
# delete file
os.remove(file_path)
transcription_result = TranscriptionResult(**metadata.dict())
transcription_result.transcript = result
return transcription_result.dict()