Commit
·
bec78d1
1
Parent(s):
4f772d6
change default values in submit_video. improve readme and html.
Browse files- README.md +5 -17
- cli.py +0 -53
- static/landing_page.html +2 -1
- static/submit_video.html +2 -2
README.md
CHANGED
@@ -1,6 +1,8 @@
|
|
1 |
## Multilang ASR Captioner
|
2 |
|
3 |
-
A multilingual automatic speech recognition and video captioning tool using faster whisper
|
|
|
|
|
4 |
|
5 |
<video width="400" height="300" src="https://github.com/marquesafonso/multilang-asr-captioner/assets/79766107/fcff8ac1-cdfc-4400-821c-f797d84c2d8a"></video>
|
6 |
|
@@ -26,7 +28,7 @@ Check the [landing page](http://127.0.0.1:8000).
|
|
26 |
|
27 |
From there you will see the [submit_video endpoint](http://127.0.0.1:8000/submit_video/) and the [documentation](http://127.0.0.1:8000/docs/)
|
28 |
|
29 |
-
**Tip**: on Linux or Mac localhost will resolve directly to 0.0.0.0 but on windows you will need to change it to 127.0.0.1
|
30 |
|
31 |
### Local
|
32 |
|
@@ -62,18 +64,4 @@ Then check the [landing page](http://127.0.0.1:8000).
|
|
62 |
|
63 |
From there you will see the [submit_video endpoint](http://127.0.0.1:8000/submit_video/) and the [documentation](http://127.0.0.1:8000/docs/)
|
64 |
|
65 |
-
**Tip**: on Linux or Mac localhost will resolve directly to 0.0.0.0 but on windows you will need to change it to 127.0.0.1
|
66 |
-
|
67 |
-
### Command Line Interface
|
68 |
-
|
69 |
-
Run the following code to use the CLI. The input file must be in mp4 format.
|
70 |
-
|
71 |
-
```
|
72 |
-
pipenv run python cli.py --invideo_filename '<your_file_name>' --max_words_per_line 8
|
73 |
-
```
|
74 |
-
|
75 |
-
Fontsize, Font, Background Color and Text Color arguments are available:
|
76 |
-
|
77 |
-
```
|
78 |
-
pipenv run python cli.py --invideo_filename '<your_file>' --max_words_per_line 8 --fontsize 28 --font "Arial-Bold" --bg_color None --text_color 'white'
|
79 |
-
```
|
|
|
1 |
## Multilang ASR Captioner
|
2 |
|
3 |
+
A multilingual automatic speech recognition and video captioning tool using faster whisper.
|
4 |
+
|
5 |
+
Supports real-time translation to english. Runs on consumer grade cpu.
|
6 |
|
7 |
<video width="400" height="300" src="https://github.com/marquesafonso/multilang-asr-captioner/assets/79766107/fcff8ac1-cdfc-4400-821c-f797d84c2d8a"></video>
|
8 |
|
|
|
28 |
|
29 |
From there you will see the [submit_video endpoint](http://127.0.0.1:8000/submit_video/) and the [documentation](http://127.0.0.1:8000/docs/)
|
30 |
|
31 |
+
**Tip**: on Linux or Mac localhost will resolve directly to 0.0.0.0 but on windows you will need to change it to 127.0.0.1 or localhost
|
32 |
|
33 |
### Local
|
34 |
|
|
|
64 |
|
65 |
From there you will see the [submit_video endpoint](http://127.0.0.1:8000/submit_video/) and the [documentation](http://127.0.0.1:8000/docs/)
|
66 |
|
67 |
+
**Tip**: on Linux or Mac localhost will resolve directly to 0.0.0.0 but on windows you will need to change it to 127.0.0.1 or localhost
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
cli.py
DELETED
@@ -1,53 +0,0 @@
|
|
1 |
-
from argparse import ArgumentParser
|
2 |
-
from utils.transcriber import transcriber
|
3 |
-
from utils.subtitler import subtitler
|
4 |
-
from utils.convert_video_to_audio import convert_video_to_audio
|
5 |
-
import logging, os
|
6 |
-
from tqdm import tqdm
|
7 |
-
|
8 |
-
logging.basicConfig(filename='main.log',
|
9 |
-
encoding='utf-8',
|
10 |
-
level=logging.DEBUG,
|
11 |
-
format='%(asctime)s %(levelname)s %(message)s',
|
12 |
-
datefmt='%m/%d/%Y %I:%M:%S %p')
|
13 |
-
|
14 |
-
def main(invideo_filename:str,
|
15 |
-
max_words_per_line:int,
|
16 |
-
fontsize:int,
|
17 |
-
font:str,
|
18 |
-
bg_color:str,
|
19 |
-
text_color:str
|
20 |
-
):
|
21 |
-
INVIDEO_DIR = os.path.join('data/',invideo_filename)
|
22 |
-
os.makedirs(INVIDEO_DIR, exist_ok=True)
|
23 |
-
SRT_PATH = os.path.join(INVIDEO_DIR, f"{invideo_filename}.srt")
|
24 |
-
OUTVIDEO_PATH = os.path.join(INVIDEO_DIR, f"result_{invideo_filename}.mp4")
|
25 |
-
with tqdm(total=100, desc="Overall Progress") as pbar:
|
26 |
-
INVIDEO_PATH = os.path.join(INVIDEO_DIR, f"{invideo_filename}.mp4")
|
27 |
-
INAUDIO_PATH = os.path.join(INVIDEO_DIR, f"{invideo_filename}.m4a")
|
28 |
-
if not os.path.exists(INAUDIO_PATH):
|
29 |
-
convert_video_to_audio(INVIDEO_PATH,INAUDIO_PATH)
|
30 |
-
pbar.update(50)
|
31 |
-
if not os.path.exists(SRT_PATH):
|
32 |
-
transcriber(INAUDIO_PATH, SRT_PATH, max_words_per_line)
|
33 |
-
pbar.update(25)
|
34 |
-
subtitler(INVIDEO_PATH, SRT_PATH, OUTVIDEO_PATH, fontsize, font, bg_color, text_color)
|
35 |
-
pbar.update(25)
|
36 |
-
|
37 |
-
if __name__ == '__main__':
|
38 |
-
parser = ArgumentParser()
|
39 |
-
parser.add_argument('--invideo_filename', required=True, type=str, help='Filename to caption.')
|
40 |
-
parser.add_argument("--max_words_per_line", type=int, default=None, help="the maximum number of words in a segment. (int)")
|
41 |
-
parser.add_argument('--fontsize', required=False, default=32, type=int, help='Font size for captions (int)')
|
42 |
-
parser.add_argument('--font', required=False, default="FuturaPTHeavy", type=str, help='Font style for captions (str)')
|
43 |
-
parser.add_argument('--bg_color', required=False, default="#070a13b3", type=str, help='Hex color value for caption background colour. (str)')
|
44 |
-
parser.add_argument('--text_color', required=False, default="white", type=str, help='color value for caption text. (str)')
|
45 |
-
args = parser.parse_args()
|
46 |
-
# Example usage
|
47 |
-
main(args.invideo_filename,
|
48 |
-
args.max_words_per_line,
|
49 |
-
args.fontsize,
|
50 |
-
args.font,
|
51 |
-
args.bg_color,
|
52 |
-
args.text_color
|
53 |
-
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
static/landing_page.html
CHANGED
@@ -142,7 +142,8 @@
|
|
142 |
<body>
|
143 |
<div class="container">
|
144 |
<h1>Multilang-ASR-Captioner</h1>
|
145 |
-
<p>A multilingual automatic speech recognition and video captioning tool using faster whisper
|
|
|
146 |
<a href="/submit_video" class="button submit">Submit Video</a>
|
147 |
<a href="/docs" class="button docs">Documentation</a>
|
148 |
</div>
|
|
|
142 |
<body>
|
143 |
<div class="container">
|
144 |
<h1>Multilang-ASR-Captioner</h1>
|
145 |
+
<p>A multilingual automatic speech recognition and video captioning tool using faster whisper.</p>
|
146 |
+
<p>Supports real-time translation to english. Runs on consumer grade cpu.</p>
|
147 |
<a href="/submit_video" class="button submit">Submit Video</a>
|
148 |
<a href="/docs" class="button docs">Documentation</a>
|
149 |
</div>
|
static/submit_video.html
CHANGED
@@ -105,8 +105,8 @@
|
|
105 |
<option value="transcribe">Transcribe</option>
|
106 |
<option value="translate">Translate</option>
|
107 |
</select><br>
|
108 |
-
Max words per line: <input type="number" name="max_words_per_line" value="
|
109 |
-
Font size: <input type="number" name="fontsize" value="
|
110 |
Font: <input type="text" name="font" value="FuturaPTHeavy"><br>
|
111 |
Background color (Pro tip: #00FFFF00 = transparent): <input type="text" name="bg_color" value="#070a13b3"><br>
|
112 |
Text color: <input type="text" name="text_color" value="white"><br>
|
|
|
105 |
<option value="transcribe">Transcribe</option>
|
106 |
<option value="translate">Translate</option>
|
107 |
</select><br>
|
108 |
+
Max words per line: <input type="number" name="max_words_per_line" value="6"><br>
|
109 |
+
Font size: <input type="number" name="fontsize" value="42"><br>
|
110 |
Font: <input type="text" name="font" value="FuturaPTHeavy"><br>
|
111 |
Background color (Pro tip: #00FFFF00 = transparent): <input type="text" name="bg_color" value="#070a13b3"><br>
|
112 |
Text color: <input type="text" name="text_color" value="white"><br>
|