Thao Pham commited on
Commit
f33d0ed
·
1 Parent(s): 9c77f6f

add utils function for transcribe

Browse files
Files changed (1) hide show
  1. utils.py +40 -2
utils.py CHANGED
@@ -7,6 +7,46 @@ from tqdm import tqdm
7
  from pytubefix import YouTube, Stream
8
  import cv2
9
  import json
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
10
 
11
  # Taken from the course: https://www.deeplearning.ai/short-courses/multimodal-rag-chat-with-videos/
12
  def getSubs(segments: Iterator[dict], format: str, maxLineWidth: int=-1) -> str:
@@ -14,8 +54,6 @@ def getSubs(segments: Iterator[dict], format: str, maxLineWidth: int=-1) -> str:
14
 
15
  if format == 'vtt':
16
  write_vtt(segments, file=segmentStream, maxLineWidth=maxLineWidth)
17
- elif format == 'srt':
18
- write_srt(segments, file=segmentStream, maxLineWidth=maxLineWidth)
19
  else:
20
  raise Exception("Unknown format " + format)
21
 
 
7
  from pytubefix import YouTube, Stream
8
  import cv2
9
  import json
10
+ import textwrap
11
+
12
+
13
+ # helper function for convert time in second to time format for .vtt or .srt file
14
+ def format_timestamp(seconds: float, always_include_hours: bool = False, fractionalSeperator: str = '.'):
15
+ assert seconds >= 0, "non-negative timestamp expected"
16
+ milliseconds = round(seconds * 1000.0)
17
+
18
+ hours = milliseconds // 3_600_000
19
+ milliseconds -= hours * 3_600_000
20
+
21
+ minutes = milliseconds // 60_000
22
+ milliseconds -= minutes * 60_000
23
+
24
+ seconds = milliseconds // 1_000
25
+ milliseconds -= seconds * 1_000
26
+
27
+ hours_marker = f"{hours:02d}:" if always_include_hours or hours > 0 else ""
28
+ return f"{hours_marker}{minutes:02d}:{seconds:02d}{fractionalSeperator}{milliseconds:03d}"
29
+
30
+
31
+ def _processText(text: str, maxLineWidth=None):
32
+ if (maxLineWidth is None or maxLineWidth < 0):
33
+ return text
34
+
35
+ lines = textwrap.wrap(text, width=maxLineWidth, tabsize=4)
36
+ return '\n'.join(lines)
37
+
38
+ # helper function to convert transcripts generated by whisper to .vtt file
39
+ def write_vtt(transcript: Iterator[dict], file: TextIO, maxLineWidth=None):
40
+ print("WEBVTT\n", file=file)
41
+ for segment in transcript:
42
+ text = _processText(segment['text'], maxLineWidth).replace('-->', '->')
43
+
44
+ print(
45
+ f"{format_timestamp(segment['start'])} --> {format_timestamp(segment['end'])}\n"
46
+ f"{text}\n",
47
+ file=file,
48
+ flush=True,
49
+ )
50
 
51
  # Taken from the course: https://www.deeplearning.ai/short-courses/multimodal-rag-chat-with-videos/
52
  def getSubs(segments: Iterator[dict], format: str, maxLineWidth: int=-1) -> str:
 
54
 
55
  if format == 'vtt':
56
  write_vtt(segments, file=segmentStream, maxLineWidth=maxLineWidth)
 
 
57
  else:
58
  raise Exception("Unknown format " + format)
59