Spaces:
Runtime error
Runtime error
Commit
·
ee0e4c2
1
Parent(s):
b9354c2
Delete whisper_post_processor.py
Browse files- whisper_post_processor.py +0 -46
whisper_post_processor.py
DELETED
@@ -1,46 +0,0 @@
|
|
1 |
-
from interpreter import WhisperInterpreter
|
2 |
-
from utils import VIDEO_INFO, json_dump
|
3 |
-
from yt_dlp.postprocessor import PostProcessor
|
4 |
-
from datasets import Dataset
|
5 |
-
import re
|
6 |
-
|
7 |
-
class WhisperPP(PostProcessor):
|
8 |
-
def __init__(self,data,**whisper_options):
|
9 |
-
super().__init__()
|
10 |
-
self._options = whisper_options
|
11 |
-
interpreter = WhisperInterpreter(self._options.pop("model_size","base"))
|
12 |
-
self.data = data
|
13 |
-
self._process = getattr(interpreter, self._options.pop("mode","transcribe"))
|
14 |
-
self._write = self._options.pop("write")
|
15 |
-
self.videos_to_process = self._options.pop("number_videos",0)
|
16 |
-
self.repoId = self._get_name()
|
17 |
-
|
18 |
-
def run(self, info):
|
19 |
-
self.to_screen(f"Processing Video {info['id']}")
|
20 |
-
result = {key: info[key] for key in VIDEO_INFO}
|
21 |
-
result.update(self._process(info["filepath"], **self._options))
|
22 |
-
self.to_screen(f"Processed Video {info['id']} and appended results.")
|
23 |
-
self._update_data(result)
|
24 |
-
if self._write:
|
25 |
-
json_dump(result, f"{info['filepath'].split('.')[0]}.json")
|
26 |
-
return [], info
|
27 |
-
|
28 |
-
def _update_data(self, record):
|
29 |
-
dataType = type(self.data)
|
30 |
-
if dataType == list:
|
31 |
-
self.data.append(record)
|
32 |
-
else:
|
33 |
-
self.data = self.data.add_item(record)
|
34 |
-
if self.data.num_rows >= self.videos_to_process and self.videos_to_process != 0:
|
35 |
-
self.data.push_to_hub(self.repoId)
|
36 |
-
|
37 |
-
def get_data(self):
|
38 |
-
return self.data
|
39 |
-
|
40 |
-
def _get_name(self):
|
41 |
-
if self.data.info.download_checksums is not None:
|
42 |
-
regex = r"(?<=datasets\/)(.*?)(?=\/resolve)"
|
43 |
-
repoId = re.compile(regex)
|
44 |
-
url = list(self.data.info.download_checksums.keys())[0]
|
45 |
-
return repoId.findall(url)[0]
|
46 |
-
return ""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|