marquesafonso commited on
Commit
d63a8c1
·
1 Parent(s): 97246dc

add translation task. tweak fontsize and max_words_per_line defaults.

Browse files
.dockerignore CHANGED
@@ -4,6 +4,7 @@ __pycache__/
4
  *.git
5
  data/
6
  temp/
 
7
  cli.py
8
  Pipfile
9
  Pipfile.lock
 
4
  *.git
5
  data/
6
  temp/
7
+ archive/
8
  cli.py
9
  Pipfile
10
  Pipfile.lock
main.py CHANGED
@@ -86,14 +86,16 @@ async def get_form():
86
  @app.post("/process_video/")
87
  async def process_video_api(video_file: MP4Video = Depends(),
88
  srt_file: SRTFile = Depends(),
89
- max_words_per_line: Optional[int] = Form(8),
90
- fontsize: Optional[int] = Form(36),
 
91
  font: Optional[str] = Form("FuturaPTHeavy"),
92
  bg_color: Optional[str] = Form("#070a13b3"),
93
  text_color: Optional[str] = Form("white"),
94
  username: str = Depends(get_current_user)
95
  ):
96
  try:
 
97
  logging.info("Creating temporary directories")
98
  temp_dir = os.path.join(os.getcwd(),"temp")
99
  os.makedirs(temp_dir, exist_ok=True)
@@ -115,12 +117,12 @@ async def process_video_api(video_file: MP4Video = Depends(),
115
  finally:
116
  srt_file.file.close()
117
  logging.info("Processing the video...")
118
- output_path, _ = process_video(temp_input_path, SRT_PATH, max_words_per_line, fontsize, font, bg_color, text_color)
119
  logging.info("Zipping response...")
120
  zip_path = zip_response(os.path.join(temp_vid_dir,"archive.zip"), [output_path, SRT_PATH])
121
  return FileResponse(zip_path, media_type='application/zip', filename=f"result_{video_file.filename.split('.')[0]}.zip")
122
  logging.info("Processing the video...")
123
- output_path, srt_path = process_video(temp_input_path, None, max_words_per_line, fontsize, font, bg_color, text_color)
124
  logging.info("Zipping response...")
125
  zip_path = zip_response(os.path.join(temp_vid_dir,"archive.zip"), [output_path, srt_path])
126
  return FileResponse(zip_path, media_type='application/zip', filename=f"result_{video_file.filename.split('.')[0]}.zip")
 
86
  @app.post("/process_video/")
87
  async def process_video_api(video_file: MP4Video = Depends(),
88
  srt_file: SRTFile = Depends(),
89
+ task: Optional[str] = Form("transcribe"),
90
+ max_words_per_line: Optional[int] = Form(6),
91
+ fontsize: Optional[int] = Form(42),
92
  font: Optional[str] = Form("FuturaPTHeavy"),
93
  bg_color: Optional[str] = Form("#070a13b3"),
94
  text_color: Optional[str] = Form("white"),
95
  username: str = Depends(get_current_user)
96
  ):
97
  try:
98
+ print(task)
99
  logging.info("Creating temporary directories")
100
  temp_dir = os.path.join(os.getcwd(),"temp")
101
  os.makedirs(temp_dir, exist_ok=True)
 
117
  finally:
118
  srt_file.file.close()
119
  logging.info("Processing the video...")
120
+ output_path, _ = process_video(temp_input_path, SRT_PATH, task, max_words_per_line, fontsize, font, bg_color, text_color)
121
  logging.info("Zipping response...")
122
  zip_path = zip_response(os.path.join(temp_vid_dir,"archive.zip"), [output_path, SRT_PATH])
123
  return FileResponse(zip_path, media_type='application/zip', filename=f"result_{video_file.filename.split('.')[0]}.zip")
124
  logging.info("Processing the video...")
125
+ output_path, srt_path = process_video(temp_input_path, None, task, max_words_per_line, fontsize, font, bg_color, text_color)
126
  logging.info("Zipping response...")
127
  zip_path = zip_response(os.path.join(temp_vid_dir,"archive.zip"), [output_path, srt_path])
128
  return FileResponse(zip_path, media_type='application/zip', filename=f"result_{video_file.filename.split('.')[0]}.zip")
static/submit_video.html CHANGED
@@ -31,6 +31,14 @@
31
  border: 1px solid #ddd;
32
  box-shadow: inset 0 1px 3px rgba(0, 0, 0, 0.1);
33
  }
 
 
 
 
 
 
 
 
34
 
35
  input[type=submit] {
36
  width: 25%;
@@ -92,6 +100,11 @@
92
  <form action="/process_video/" enctype="multipart/form-data" method="post">
93
  Video File: <input type="file" name="video_file"><br>
94
  Subtitles File: <input type="file" name="srt_file"><br>
 
 
 
 
 
95
  Max words per line: <input type="number" name="max_words_per_line" value="8"><br>
96
  Font size: <input type="number" name="fontsize" value="36"><br>
97
  Font: <input type="text" name="font" value="FuturaPTHeavy"><br>
 
31
  border: 1px solid #ddd;
32
  box-shadow: inset 0 1px 3px rgba(0, 0, 0, 0.1);
33
  }
34
+ select {
35
+ width: 30%;
36
+ padding: 10px;
37
+ margin-bottom: 10px;
38
+ border-radius: 4px;
39
+ border: 1px solid #ddd;
40
+ box-shadow: inset 0 1px 3px rgba(0, 0, 0, 0.1);
41
+ }
42
 
43
  input[type=submit] {
44
  width: 25%;
 
100
  <form action="/process_video/" enctype="multipart/form-data" method="post">
101
  Video File: <input type="file" name="video_file"><br>
102
  Subtitles File: <input type="file" name="srt_file"><br>
103
+ <label for="task">Task</label>
104
+ <select id="task" name="task">
105
+ <option value="transcribe">Transcribe</option>
106
+ <option value="translate">Translate</option>
107
+ </select><br>
108
  Max words per line: <input type="number" name="max_words_per_line" value="8"><br>
109
  Font size: <input type="number" name="fontsize" value="36"><br>
110
  Font: <input type="text" name="font" value="FuturaPTHeavy"><br>
utils/archiver.py CHANGED
@@ -2,14 +2,15 @@ import shutil, os
2
  from datetime import datetime
3
 
4
  def archiver(timestamp:datetime):
5
- ARCHIVE = os.path.abspath(f"archive/{timestamp.year:4d}-{timestamp.month:02d}-{timestamp.day:02d}/")
 
6
  TEMP_DIR = os.path.abspath("temp/")
7
  LOG_FILE = os.path.abspath("main.log")
8
  if os.path.exists(TEMP_DIR):
9
  shutil.make_archive(os.path.join(ARCHIVE, "files"), 'zip', TEMP_DIR)
10
  shutil.rmtree(TEMP_DIR)
11
  if os.path.exists(LOG_FILE):
12
- shutil.copy(LOG_FILE, os.path.join(ARCHIVE, f"{timestamp.year:4d}-{timestamp.month:02d}-{timestamp.day:02d}.log"))
13
  os.remove(LOG_FILE)
14
 
15
  if __name__ == '__main__':
 
2
  from datetime import datetime
3
 
4
  def archiver(timestamp:datetime):
5
+ TIME = f"{timestamp.year:4d}-{timestamp.month:02d}-{timestamp.day:02d}_{timestamp.hour:02d}-{timestamp.minute:02d}"
6
+ ARCHIVE = os.path.abspath(f"archive/{TIME}")
7
  TEMP_DIR = os.path.abspath("temp/")
8
  LOG_FILE = os.path.abspath("main.log")
9
  if os.path.exists(TEMP_DIR):
10
  shutil.make_archive(os.path.join(ARCHIVE, "files"), 'zip', TEMP_DIR)
11
  shutil.rmtree(TEMP_DIR)
12
  if os.path.exists(LOG_FILE):
13
+ shutil.copy(LOG_FILE, os.path.join(ARCHIVE, f"{TIME}.log"))
14
  os.remove(LOG_FILE)
15
 
16
  if __name__ == '__main__':
utils/process_video.py CHANGED
@@ -13,6 +13,7 @@ logging.basicConfig(filename='main.log',
13
  # API Function
14
  def process_video(invideo_filename:str,
15
  srt_path: str,
 
16
  max_words_per_line:int,
17
  fontsize:str,
18
  font:str,
@@ -33,7 +34,7 @@ def process_video(invideo_filename:str,
33
  SRT_PATH = os.path.abspath(f"{invideo_filename.split('.')[0]}.srt")
34
  logging.info("Transcribing...")
35
  if not os.path.exists(SRT_PATH):
36
- transcriber(INAUDIO_PATH, SRT_PATH, max_words_per_line)
37
  logging.info("Subtitling...")
38
  subtitler(invideo_filename, SRT_PATH, OUTVIDEO_PATH, fontsize, font, bg_color, text_color)
39
  return OUTVIDEO_PATH, SRT_PATH
 
13
  # API Function
14
  def process_video(invideo_filename:str,
15
  srt_path: str,
16
+ task: str,
17
  max_words_per_line:int,
18
  fontsize:str,
19
  font:str,
 
34
  SRT_PATH = os.path.abspath(f"{invideo_filename.split('.')[0]}.srt")
35
  logging.info("Transcribing...")
36
  if not os.path.exists(SRT_PATH):
37
+ transcriber(INAUDIO_PATH, SRT_PATH, max_words_per_line, task)
38
  logging.info("Subtitling...")
39
  subtitler(invideo_filename, SRT_PATH, OUTVIDEO_PATH, fontsize, font, bg_color, text_color)
40
  return OUTVIDEO_PATH, SRT_PATH
utils/transcriber.py CHANGED
@@ -29,13 +29,15 @@ def write_srt(segments, srt_path, max_words_per_line):
29
 
30
  def transcriber(input_path:str,
31
  srt_path:str,
32
- max_words_per_line:int):
 
33
  #TODO: model_size = "distil-large-v3" -> need to wait for new pypi version of faster-whisper (pull request already merged)
34
  model_size = "large-v3"
35
  model = WhisperModel(model_size, device="cpu", compute_type="int8") #TODO: add condition_on_previous_text=False when using distil-whisper
36
  segments, info = model.transcribe(
37
  input_path,
38
  beam_size=5,
 
39
  vad_filter=True,
40
  vad_parameters=dict(min_silence_duration_ms=500),
41
  word_timestamps=True
 
29
 
30
  def transcriber(input_path:str,
31
  srt_path:str,
32
+ max_words_per_line:int,
33
+ task:str):
34
  #TODO: model_size = "distil-large-v3" -> need to wait for new pypi version of faster-whisper (pull request already merged)
35
  model_size = "large-v3"
36
  model = WhisperModel(model_size, device="cpu", compute_type="int8") #TODO: add condition_on_previous_text=False when using distil-whisper
37
  segments, info = model.transcribe(
38
  input_path,
39
  beam_size=5,
40
+ task=task,
41
  vad_filter=True,
42
  vad_parameters=dict(min_silence_duration_ms=500),
43
  word_timestamps=True