File size: 2,287 Bytes
252d749
b8952b7
252d749
f00035f
b8952b7
252d749
f00035f
4884454
252d749
 
 
8e88e09
ff72527
f55cf0b
b8952b7
b712add
 
f55cf0b
b712add
252d749
6278580
 
 
252d749
 
f55cf0b
5e865c5
252d749
 
f55cf0b
252d749
 
f55cf0b
 
 
aeb3fbb
71ae1f7
aeb3fbb
 
 
5fdc6a1
aeb3fbb
 
b8952b7
6e7272c
 
1875ed6
f00035f
6e7272c
71ae1f7
f55cf0b
 
 
 
 
 
5b8a285
252d749
b712add
 
252d749
f544ac0
 
 
aab387d
9b258b1
b8952b7
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
from faster_whisper import WhisperModel
from datetime import datetime, timedelta
from tqdm import tqdm
import os, time
from App.Transcription.Schemas import TranscriptionMetadata, TranscriptionResult

current_time = time.localtime()
# model_size = "tiny"


def transcribe_file(state, file_path, model_size="tiny"):
    print(model_size,'audio-transcription')
    result = []
    metadata = TranscriptionMetadata()
    start_time = datetime.now()
    state.update_state(
        state="PROGRESS",
        meta=metadata.dict(),
    )
    model = WhisperModel(model_size, device="cpu", compute_type="int8")
    segments, info = model.transcribe(
        file_path, beam_size=5, vad_filter=True, word_timestamps=True
    )

    total_duration = round(info.duration, 2)
    metadata.language = info.language
    metadata.duration = int(total_duration)
    state.update_state(
        state="PROGRESS",
        meta=metadata.dict(),
    )

    try:
        with tqdm(total=total_duration, unit=" seconds") as pbar:
            for segment in segments:
                for word in segment.words:
                    segment_duration = word.end - word.start
                    temp = {
                        "start": word.start,
                        "end": word.end,
                        "text": word.word,
                    }
                    result.append(temp)
                    time_difference = datetime.now() - start_time
                    if time_difference > timedelta(seconds=30):
                        print('UPDATED TIME')
                        start_time = datetime.now()
                        metadata.percentage = f"{((word.end / total_duration)*100)}"
                        state.update_state(state="TRANSCRIBING", meta=metadata.dict())
                    pbar.update(segment_duration)
    except Exception as e:
        state.update_state(
            state="FAILED",
            meta=metadata.dict(),
        )
        os.remove(file_path)
        return f"Falied error {e}"

    # delete file
    os.remove(file_path)

    transcription_result = TranscriptionResult(
        **metadata.dict(), task_id=state.request.id
    )
    transcription_result.content = result
    transcription_result.status = "SUCCESS"
    return transcription_result.dict()