Commit
·
d63a8c1
1
Parent(s):
97246dc
add translation task. tweak fontsize and max_words_per_line defaults.
Browse files- .dockerignore +1 -0
- main.py +6 -4
- static/submit_video.html +13 -0
- utils/archiver.py +3 -2
- utils/process_video.py +2 -1
- utils/transcriber.py +3 -1
.dockerignore
CHANGED
@@ -4,6 +4,7 @@ __pycache__/
|
|
4 |
*.git
|
5 |
data/
|
6 |
temp/
|
|
|
7 |
cli.py
|
8 |
Pipfile
|
9 |
Pipfile.lock
|
|
|
4 |
*.git
|
5 |
data/
|
6 |
temp/
|
7 |
+
archive/
|
8 |
cli.py
|
9 |
Pipfile
|
10 |
Pipfile.lock
|
main.py
CHANGED
@@ -86,14 +86,16 @@ async def get_form():
|
|
86 |
@app.post("/process_video/")
|
87 |
async def process_video_api(video_file: MP4Video = Depends(),
|
88 |
srt_file: SRTFile = Depends(),
|
89 |
-
|
90 |
-
|
|
|
91 |
font: Optional[str] = Form("FuturaPTHeavy"),
|
92 |
bg_color: Optional[str] = Form("#070a13b3"),
|
93 |
text_color: Optional[str] = Form("white"),
|
94 |
username: str = Depends(get_current_user)
|
95 |
):
|
96 |
try:
|
|
|
97 |
logging.info("Creating temporary directories")
|
98 |
temp_dir = os.path.join(os.getcwd(),"temp")
|
99 |
os.makedirs(temp_dir, exist_ok=True)
|
@@ -115,12 +117,12 @@ async def process_video_api(video_file: MP4Video = Depends(),
|
|
115 |
finally:
|
116 |
srt_file.file.close()
|
117 |
logging.info("Processing the video...")
|
118 |
-
output_path, _ = process_video(temp_input_path, SRT_PATH, max_words_per_line, fontsize, font, bg_color, text_color)
|
119 |
logging.info("Zipping response...")
|
120 |
zip_path = zip_response(os.path.join(temp_vid_dir,"archive.zip"), [output_path, SRT_PATH])
|
121 |
return FileResponse(zip_path, media_type='application/zip', filename=f"result_{video_file.filename.split('.')[0]}.zip")
|
122 |
logging.info("Processing the video...")
|
123 |
-
output_path, srt_path = process_video(temp_input_path, None, max_words_per_line, fontsize, font, bg_color, text_color)
|
124 |
logging.info("Zipping response...")
|
125 |
zip_path = zip_response(os.path.join(temp_vid_dir,"archive.zip"), [output_path, srt_path])
|
126 |
return FileResponse(zip_path, media_type='application/zip', filename=f"result_{video_file.filename.split('.')[0]}.zip")
|
|
|
86 |
@app.post("/process_video/")
|
87 |
async def process_video_api(video_file: MP4Video = Depends(),
|
88 |
srt_file: SRTFile = Depends(),
|
89 |
+
task: Optional[str] = Form("transcribe"),
|
90 |
+
max_words_per_line: Optional[int] = Form(6),
|
91 |
+
fontsize: Optional[int] = Form(42),
|
92 |
font: Optional[str] = Form("FuturaPTHeavy"),
|
93 |
bg_color: Optional[str] = Form("#070a13b3"),
|
94 |
text_color: Optional[str] = Form("white"),
|
95 |
username: str = Depends(get_current_user)
|
96 |
):
|
97 |
try:
|
98 |
+
print(task)
|
99 |
logging.info("Creating temporary directories")
|
100 |
temp_dir = os.path.join(os.getcwd(),"temp")
|
101 |
os.makedirs(temp_dir, exist_ok=True)
|
|
|
117 |
finally:
|
118 |
srt_file.file.close()
|
119 |
logging.info("Processing the video...")
|
120 |
+
output_path, _ = process_video(temp_input_path, SRT_PATH, task, max_words_per_line, fontsize, font, bg_color, text_color)
|
121 |
logging.info("Zipping response...")
|
122 |
zip_path = zip_response(os.path.join(temp_vid_dir,"archive.zip"), [output_path, SRT_PATH])
|
123 |
return FileResponse(zip_path, media_type='application/zip', filename=f"result_{video_file.filename.split('.')[0]}.zip")
|
124 |
logging.info("Processing the video...")
|
125 |
+
output_path, srt_path = process_video(temp_input_path, None, task, max_words_per_line, fontsize, font, bg_color, text_color)
|
126 |
logging.info("Zipping response...")
|
127 |
zip_path = zip_response(os.path.join(temp_vid_dir,"archive.zip"), [output_path, srt_path])
|
128 |
return FileResponse(zip_path, media_type='application/zip', filename=f"result_{video_file.filename.split('.')[0]}.zip")
|
static/submit_video.html
CHANGED
@@ -31,6 +31,14 @@
|
|
31 |
border: 1px solid #ddd;
|
32 |
box-shadow: inset 0 1px 3px rgba(0, 0, 0, 0.1);
|
33 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
34 |
|
35 |
input[type=submit] {
|
36 |
width: 25%;
|
@@ -92,6 +100,11 @@
|
|
92 |
<form action="/process_video/" enctype="multipart/form-data" method="post">
|
93 |
Video File: <input type="file" name="video_file"><br>
|
94 |
Subtitles File: <input type="file" name="srt_file"><br>
|
|
|
|
|
|
|
|
|
|
|
95 |
Max words per line: <input type="number" name="max_words_per_line" value="8"><br>
|
96 |
Font size: <input type="number" name="fontsize" value="36"><br>
|
97 |
Font: <input type="text" name="font" value="FuturaPTHeavy"><br>
|
|
|
31 |
border: 1px solid #ddd;
|
32 |
box-shadow: inset 0 1px 3px rgba(0, 0, 0, 0.1);
|
33 |
}
|
34 |
+
select {
|
35 |
+
width: 30%;
|
36 |
+
padding: 10px;
|
37 |
+
margin-bottom: 10px;
|
38 |
+
border-radius: 4px;
|
39 |
+
border: 1px solid #ddd;
|
40 |
+
box-shadow: inset 0 1px 3px rgba(0, 0, 0, 0.1);
|
41 |
+
}
|
42 |
|
43 |
input[type=submit] {
|
44 |
width: 25%;
|
|
|
100 |
<form action="/process_video/" enctype="multipart/form-data" method="post">
|
101 |
Video File: <input type="file" name="video_file"><br>
|
102 |
Subtitles File: <input type="file" name="srt_file"><br>
|
103 |
+
<label for="task">Task</label>
|
104 |
+
<select id="task" name="task">
|
105 |
+
<option value="transcribe">Transcribe</option>
|
106 |
+
<option value="translate">Translate</option>
|
107 |
+
</select><br>
|
108 |
Max words per line: <input type="number" name="max_words_per_line" value="8"><br>
|
109 |
Font size: <input type="number" name="fontsize" value="36"><br>
|
110 |
Font: <input type="text" name="font" value="FuturaPTHeavy"><br>
|
utils/archiver.py
CHANGED
@@ -2,14 +2,15 @@ import shutil, os
|
|
2 |
from datetime import datetime
|
3 |
|
4 |
def archiver(timestamp:datetime):
|
5 |
-
|
|
|
6 |
TEMP_DIR = os.path.abspath("temp/")
|
7 |
LOG_FILE = os.path.abspath("main.log")
|
8 |
if os.path.exists(TEMP_DIR):
|
9 |
shutil.make_archive(os.path.join(ARCHIVE, "files"), 'zip', TEMP_DIR)
|
10 |
shutil.rmtree(TEMP_DIR)
|
11 |
if os.path.exists(LOG_FILE):
|
12 |
-
shutil.copy(LOG_FILE, os.path.join(ARCHIVE, f"{
|
13 |
os.remove(LOG_FILE)
|
14 |
|
15 |
if __name__ == '__main__':
|
|
|
2 |
from datetime import datetime
|
3 |
|
4 |
def archiver(timestamp:datetime):
|
5 |
+
TIME = f"{timestamp.year:4d}-{timestamp.month:02d}-{timestamp.day:02d}_{timestamp.hour:02d}-{timestamp.minute:02d}"
|
6 |
+
ARCHIVE = os.path.abspath(f"archive/{TIME}")
|
7 |
TEMP_DIR = os.path.abspath("temp/")
|
8 |
LOG_FILE = os.path.abspath("main.log")
|
9 |
if os.path.exists(TEMP_DIR):
|
10 |
shutil.make_archive(os.path.join(ARCHIVE, "files"), 'zip', TEMP_DIR)
|
11 |
shutil.rmtree(TEMP_DIR)
|
12 |
if os.path.exists(LOG_FILE):
|
13 |
+
shutil.copy(LOG_FILE, os.path.join(ARCHIVE, f"{TIME}.log"))
|
14 |
os.remove(LOG_FILE)
|
15 |
|
16 |
if __name__ == '__main__':
|
utils/process_video.py
CHANGED
@@ -13,6 +13,7 @@ logging.basicConfig(filename='main.log',
|
|
13 |
# API Function
|
14 |
def process_video(invideo_filename:str,
|
15 |
srt_path: str,
|
|
|
16 |
max_words_per_line:int,
|
17 |
fontsize:str,
|
18 |
font:str,
|
@@ -33,7 +34,7 @@ def process_video(invideo_filename:str,
|
|
33 |
SRT_PATH = os.path.abspath(f"{invideo_filename.split('.')[0]}.srt")
|
34 |
logging.info("Transcribing...")
|
35 |
if not os.path.exists(SRT_PATH):
|
36 |
-
transcriber(INAUDIO_PATH, SRT_PATH, max_words_per_line)
|
37 |
logging.info("Subtitling...")
|
38 |
subtitler(invideo_filename, SRT_PATH, OUTVIDEO_PATH, fontsize, font, bg_color, text_color)
|
39 |
return OUTVIDEO_PATH, SRT_PATH
|
|
|
13 |
# API Function
|
14 |
def process_video(invideo_filename:str,
|
15 |
srt_path: str,
|
16 |
+
task: str,
|
17 |
max_words_per_line:int,
|
18 |
fontsize:str,
|
19 |
font:str,
|
|
|
34 |
SRT_PATH = os.path.abspath(f"{invideo_filename.split('.')[0]}.srt")
|
35 |
logging.info("Transcribing...")
|
36 |
if not os.path.exists(SRT_PATH):
|
37 |
+
transcriber(INAUDIO_PATH, SRT_PATH, max_words_per_line, task)
|
38 |
logging.info("Subtitling...")
|
39 |
subtitler(invideo_filename, SRT_PATH, OUTVIDEO_PATH, fontsize, font, bg_color, text_color)
|
40 |
return OUTVIDEO_PATH, SRT_PATH
|
utils/transcriber.py
CHANGED
@@ -29,13 +29,15 @@ def write_srt(segments, srt_path, max_words_per_line):
|
|
29 |
|
30 |
def transcriber(input_path:str,
|
31 |
srt_path:str,
|
32 |
-
max_words_per_line:int
|
|
|
33 |
#TODO: model_size = "distil-large-v3" -> need to wait for new pypi version of faster-whisper (pull request already merged)
|
34 |
model_size = "large-v3"
|
35 |
model = WhisperModel(model_size, device="cpu", compute_type="int8") #TODO: add condition_on_previous_text=False when using distil-whisper
|
36 |
segments, info = model.transcribe(
|
37 |
input_path,
|
38 |
beam_size=5,
|
|
|
39 |
vad_filter=True,
|
40 |
vad_parameters=dict(min_silence_duration_ms=500),
|
41 |
word_timestamps=True
|
|
|
29 |
|
30 |
def transcriber(input_path:str,
|
31 |
srt_path:str,
|
32 |
+
max_words_per_line:int,
|
33 |
+
task:str):
|
34 |
#TODO: model_size = "distil-large-v3" -> need to wait for new pypi version of faster-whisper (pull request already merged)
|
35 |
model_size = "large-v3"
|
36 |
model = WhisperModel(model_size, device="cpu", compute_type="int8") #TODO: add condition_on_previous_text=False when using distil-whisper
|
37 |
segments, info = model.transcribe(
|
38 |
input_path,
|
39 |
beam_size=5,
|
40 |
+
task=task,
|
41 |
vad_filter=True,
|
42 |
vad_parameters=dict(min_silence_duration_ms=500),
|
43 |
word_timestamps=True
|