Maximofn commited on
Commit
e015c08
·
1 Parent(s): 85c53a9

Refactor project structure and update dependencies

Browse files

- Modularize code by creating separate files for audio, transcription, and UI configuration
- Update requirements.txt to remove unnecessary dependencies
- Simplify audio extraction and transcription processes
- Modify app.py to use new modular structure
- Add new utility files like ui_config.py and audio.py
- Update .gitignore to include __pycache__

Files changed (8) hide show
  1. .gitignore +2 -1
  2. app.py +137 -506
  3. audio.py +46 -0
  4. requirements.txt +21 -10
  5. slice_audio.py +53 -33
  6. transcribe.py +77 -40
  7. ui_config.py +57 -0
  8. url_manager.py +87 -0
.gitignore CHANGED
@@ -14,4 +14,5 @@ sepformer.ipynb
14
  modelscope.ipynb
15
  audio_cache
16
  *.png
17
- .DS_Store
 
 
14
  modelscope.ipynb
15
  audio_cache
16
  *.png
17
+ .DS_Store
18
+ __pycache__
app.py CHANGED
@@ -3,14 +3,28 @@ import argparse
3
  import spaces
4
  import os
5
  import torch
 
6
  from time import sleep
7
  from tqdm import tqdm
8
  from lang_list import union_language_dict
9
  # import pyperclip
10
- from pytube import YouTube
11
  import re
12
  from PIL import Image
13
  # import urllib.request
 
 
 
 
 
 
 
 
 
 
 
 
 
 
14
 
15
  NUMBER = 100
16
  DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
@@ -25,317 +39,33 @@ REMOVE_FILES = True
25
  if DEVICE == "cpu":
26
  # I supose that I am on huggingface server
27
  # Get RAM space
28
- ram = int(os.popen("free -m | grep Mem | awk '{print $2}'").read())
 
29
  factor = 1
30
- SECONDS = int(ram*factor)
31
- print(f"RAM: {ram}, SECONDS: {SECONDS}")
 
 
32
  else:
33
  # I supose that I am on my computer
34
  # Get VRAM space
35
- SECONDS = 300
 
36
 
37
  YOUTUBE = "youtube"
38
  TWITCH = "twitch"
39
  ERROR = "error"
40
 
41
- subtify_logo = Image.open("./assets/subtify_logo-scaled.png")
42
  subtify_logo_width, subtify_logo_height = subtify_logo.size
43
  factor = 4
44
  new_width = subtify_logo_width // factor
45
  new_height = subtify_logo_height // factor
46
 
47
- BACKGROUND_COLOR = "#0b0f19"
48
- BUTTON_COLOR = "#47515f"
49
- SVG_COLOR = "#f3f4f6"
50
- PANEL_COLOR = "#101827"
51
- PRIMARY_TEXT_COLOR = "#f3f4f6"
52
- SUBDUED_TEXT_COLOR = "#59616f"
53
- BACKGROUND_PRIMARY_COLOR = "#1f2937"
54
- BACKGROUND_SECONDARY_COLOR = "#101827"
55
- PRIMARY_BODER_COLOR = "#323c4c"
56
- BLOCK_TITLE_TEXT_COLOR = "#dfe2e6"
57
- INPUT_BACKGROUND_COLOR = "#2f3947"
58
- INPUT_BORDER_COLOR = "#313b4b"
59
- INPUT_PLACEHOLDER_COLOR = "#616977"
60
- ERROR_BACKGROUND_COLOR = "#101827"
61
- ERROR_TEXT_COLOR = "#f7f2f2"
62
- ERROR_BORDER_COLOR = "#9b3339"
63
- BUTTON_SECONDARY_BACKGROUND_COLOR = "#434d5c"
64
- BUTTON_SECONDARY_BORDER_COLOR = "#444d5b"
65
- BUTTON_SECONDARY_TEXT_COLOR = "#c5c9cc"
66
- RED = "#ff0000"
67
- GREEN = "#00ff00"
68
- BLUE = "#0000ff"
69
-
70
- html_social_media = f'''
71
- <div style="float: right;">
72
- <a href="https://maximofn.com/" rel="noopener noreferrer" aria-disabled="false" class="sm secondary svelte-cmf5ev" id="component-1" style="flex-grow: 100;" target="_blank">
73
- <svg xmlns="http://www.w3.org/2000/svg" height="1em" viewBox="0 0 576 512">
74
- <style>
75
- svg {"{"}
76
- fill: {SVG_COLOR}
77
- {"}"}
78
- </style>
79
- <path d="M208 80c0-26.5 21.5-48 48-48h64c26.5 0 48 21.5 48 48v64c0 26.5-21.5 48-48 48h-8v40H464c30.9 0 56 25.1 56 56v32h8c26.5 0 48 21.5 48 48v64c0 26.5-21.5 48-48 48H464c-26.5 0-48-21.5-48-48V368c0-26.5 21.5-48 48-48h8V288c0-4.4-3.6-8-8-8H312v40h8c26.5 0 48 21.5 48 48v64c0 26.5-21.5 48-48 48H256c-26.5 0-48-21.5-48-48V368c0-26.5 21.5-48 48-48h8V280H112c-4.4 0-8 3.6-8 8v32h8c26.5 0 48 21.5 48 48v64c0 26.5-21.5 48-48 48H48c-26.5 0-48-21.5-48-48V368c0-26.5 21.5-48 48-48h8V288c0-30.9 25.1-56 56-56H264V192h-8c-26.5 0-48-21.5-48-48V80z"/>
80
- </svg>
81
- </a>
82
- <a href="http://github.com/maximofn" rel="noopener noreferrer" aria-disabled="false" class="sm secondary svelte-cmf5ev" id="component-1" style="flex-grow: 100;" target="_blank">
83
- <svg xmlns="http://www.w3.org/2000/svg" height="1em" viewBox="0 0 496 512">
84
- <style>
85
- svg {"{"}
86
- fill: {SVG_COLOR}
87
- {"}"}
88
- </style>
89
- <path d="M165.9 397.4c0 2-2.3 3.6-5.2 3.6-3.3.3-5.6-1.3-5.6-3.6 0-2 2.3-3.6 5.2-3.6 3-.3 5.6 1.3 5.6 3.6zm-31.1-4.5c-.7 2 1.3 4.3 4.3 4.9 2.6 1 5.6 0 6.2-2s-1.3-4.3-4.3-5.2c-2.6-.7-5.5.3-6.2 2.3zm44.2-1.7c-2.9.7-4.9 2.6-4.6 4.9.3 2 2.9 3.3 5.9 2.6 2.9-.7 4.9-2.6 4.6-4.6-.3-1.9-3-3.2-5.9-2.9zM244.8 8C106.1 8 0 113.3 0 252c0 110.9 69.8 205.8 169.5 239.2 12.8 2.3 17.3-5.6 17.3-12.1 0-6.2-.3-40.4-.3-61.4 0 0-70 15-84.7-29.8 0 0-11.4-29.1-27.8-36.6 0 0-22.9-15.7 1.6-15.4 0 0 24.9 2 38.6 25.8 21.9 38.6 58.6 27.5 72.9 20.9 2.3-16 8.8-27.1 16-33.7-55.9-6.2-112.3-14.3-112.3-110.5 0-27.5 7.6-41.3 23.6-58.9-2.6-6.5-11.1-33.3 2.6-67.9 20.9-6.5 69 27 69 27 20-5.6 41.5-8.5 62.8-8.5s42.8 2.9 62.8 8.5c0 0 48.1-33.6 69-27 13.7 34.7 5.2 61.4 2.6 67.9 16 17.7 25.8 31.5 25.8 58.9 0 96.5-58.9 104.2-114.8 110.5 9.2 7.9 17 22.9 17 46.4 0 33.7-.3 75.4-.3 83.6 0 6.5 4.6 14.4 17.3 12.1C428.2 457.8 496 362.9 496 252 496 113.3 383.5 8 244.8 8zM97.2 352.9c-1.3 1-1 3.3.7 5.2 1.6 1.6 3.9 2.3 5.2 1 1.3-1 1-3.3-.7-5.2-1.6-1.6-3.9-2.3-5.2-1zm-10.8-8.1c-.7 1.3.3 2.9 2.3 3.9 1.6 1 3.6.7 4.3-.7.7-1.3-.3-2.9-2.3-3.9-2-.6-3.6-.3-4.3.7zm32.4 35.6c-1.6 1.3-1 4.3 1.3 6.2 2.3 2.3 5.2 2.6 6.5 1 1.3-1.3.7-4.3-1.3-6.2-2.2-2.3-5.2-2.6-6.5-1zm-11.4-14.7c-1.6 1-1.6 3.6 0 5.9 1.6 2.3 4.3 3.3 5.6 2.3 1.6-1.3 1.6-3.9 0-6.2-1.4-2.3-4-3.3-5.6-2z"/>
90
- </svg>
91
- </a>
92
- <a href="http://linkedin.com/in/MaximoFN/" rel="noopener noreferrer" aria-disabled="false" class="sm secondary svelte-cmf5ev" id="component-1" style="flex-grow: 100;" target="_blank">
93
- <svg xmlns="http://www.w3.org/2000/svg" height="1em" viewBox="0 0 448 512">
94
- <style>
95
- svg {"{"}
96
- fill: {SVG_COLOR}
97
- {"}"}
98
- </style>
99
- <path d="M416 32H31.9C14.3 32 0 46.5 0 64.3v383.4C0 465.5 14.3 480 31.9 480H416c17.6 0 32-14.5 32-32.3V64.3c0-17.8-14.4-32.3-32-32.3zM135.4 416H69V202.2h66.5V416zm-33.2-243c-21.3 0-38.5-17.3-38.5-38.5S80.9 96 102.2 96c21.2 0 38.5 17.3 38.5 38.5 0 21.3-17.2 38.5-38.5 38.5zm282.1 243h-66.4V312c0-24.8-.5-56.7-34.5-56.7-34.6 0-39.9 27-39.9 54.9V416h-66.4V202.2h63.7v29.2h.9c8.9-16.8 30.6-34.5 62.9-34.5 67.2 0 79.7 44.3 79.7 101.9V416z"/>
100
- </svg>
101
- </a>
102
- <a href="http://kaggle.com/maximofn" rel="noopener noreferrer" aria-disabled="false" class="sm secondary svelte-cmf5ev" id="component-1" style="flex-grow: 100;" target="_blank">
103
- <svg xmlns="http://www.w3.org/2000/svg" height="1em" viewBox="0 0 320 512">
104
- <style>
105
- svg {"{"}
106
- fill: {SVG_COLOR}
107
- {"}"}
108
- </style>
109
- <path d="M304.2 501.5L158.4 320.3 298.2 185c2.6-2.7 1.7-10.5-5.3-10.5h-69.2c-3.5 0-7 1.8-10.5 5.3L80.9 313.5V7.5q0-7.5-7.5-7.5H21.5Q14 0 14 7.5v497q0 7.5 7.5 7.5h51.9q7.5 0 7.5-7.5v-109l30.8-29.3 110.5 140.6c3 3.5 6.5 5.3 10.5 5.3h66.9q5.25 0 6-3z"/>
110
- </svg>
111
- </a>
112
- <a href="https://twitter.com/Maximo_fn" rel="noopener noreferrer" aria-disabled="false" class="sm secondary svelte-cmf5ev" id="component-1" style="flex-grow: 100;" target="_blank">
113
- <svg xmlns="http://www.w3.org/2000/svg" height="1em" viewBox="0 0 512 512">
114
- <style>
115
- svg {"{"}
116
- fill: {SVG_COLOR}
117
- {"}"}
118
- </style>
119
- <path d="M389.2 48h70.6L305.6 224.2 487 464H345L233.7 318.6 106.5 464H35.8L200.7 275.5 26.8 48H172.4L272.9 180.9 389.2 48zM364.4 421.8h39.1L151.1 88h-42L364.4 421.8z"/>
120
- </svg>
121
- </a>
122
- <a href="https://www.instagram.com/maximo__fn/" rel="noopener noreferrer" aria-disabled="false" class="sm secondary svelte-cmf5ev" id="component-1" style="flex-grow: 100;" target="_blank">
123
- <svg xmlns="http://www.w3.org/2000/svg" height="1em" viewBox="0 0 448 512">
124
- <style>
125
- svg {"{"}
126
- fill: {SVG_COLOR}
127
- {"}"}
128
- </style>
129
- <path d="M224.1 141c-63.6 0-114.9 51.3-114.9 114.9s51.3 114.9 114.9 114.9S339 319.5 339 255.9 287.7 141 224.1 141zm0 189.6c-41.1 0-74.7-33.5-74.7-74.7s33.5-74.7 74.7-74.7 74.7 33.5 74.7 74.7-33.6 74.7-74.7 74.7zm146.4-194.3c0 14.9-12 26.8-26.8 26.8-14.9 0-26.8-12-26.8-26.8s12-26.8 26.8-26.8 26.8 12 26.8 26.8zm76.1 27.2c-1.7-35.9-9.9-67.7-36.2-93.9-26.2-26.2-58-34.4-93.9-36.2-37-2.1-147.9-2.1-184.9 0-35.8 1.7-67.6 9.9-93.9 36.1s-34.4 58-36.2 93.9c-2.1 37-2.1 147.9 0 184.9 1.7 35.9 9.9 67.7 36.2 93.9s58 34.4 93.9 36.2c37 2.1 147.9 2.1 184.9 0 35.9-1.7 67.7-9.9 93.9-36.2 26.2-26.2 34.4-58 36.2-93.9 2.1-37 2.1-147.8 0-184.8zM398.8 388c-7.8 19.6-22.9 34.7-42.6 42.6-29.5 11.7-99.5 9-132.1 9s-102.7 2.6-132.1-9c-19.6-7.8-34.7-22.9-42.6-42.6-11.7-29.5-9-99.5-9-132.1s-2.6-102.7 9-132.1c7.8-19.6 22.9-34.7 42.6-42.6 29.5-11.7 99.5-9 132.1-9s102.7-2.6 132.1 9c19.6 7.8 34.7 22.9 42.6 42.6 11.7 29.5 9 99.5 9 132.1s2.7 102.7-9 132.1z"/>
130
- </svg>
131
- </a>
132
- <a href="https://www.youtube.com/channel/UCdQwg2JU_fWRsHn3yIlf3tw" rel="noopener noreferrer" aria-disabled="false" class="sm secondary svelte-cmf5ev" id="component-1" style="flex-grow: 100;" target="_blank">
133
- <svg xmlns="http://www.w3.org/2000/svg" height="1em" viewBox="0 0 576 512">
134
- <style>
135
- svg {"{"}
136
- fill: {SVG_COLOR}
137
- {"}"}
138
- </style>
139
- <path d="M549.655 124.083c-6.281-23.65-24.787-42.276-48.284-48.597C458.781 64 288 64 288 64S117.22 64 74.629 75.486c-23.497 6.322-42.003 24.947-48.284 48.597-11.412 42.867-11.412 132.305-11.412 132.305s0 89.438 11.412 132.305c6.281 23.65 24.787 41.5 48.284 47.821C117.22 448 288 448 288 448s170.78 0 213.371-11.486c23.497-6.321 42.003-24.171 48.284-47.821 11.412-42.867 11.412-132.305 11.412-132.305s0-89.438-11.412-132.305zm-317.51 213.508V175.185l142.739 81.205-142.739 81.201z"/>
140
- </svg>
141
- </a>
142
- <a href="https://www.facebook.com/profile.php?id=100085177670661" rel="noopener noreferrer" aria-disabled="false" class="sm secondary svelte-cmf5ev" id="component-1" style="flex-grow: 100;" target="_blank">
143
- <svg xmlns="http://www.w3.org/2000/svg" height="1em" viewBox="0 0 512 512">
144
- <style>
145
- svg {"{"}
146
- fill: {SVG_COLOR}
147
- {"}"}
148
- </style>
149
- <path d="M504 256C504 119 393 8 256 8S8 119 8 256c0 123.78 90.69 226.38 209.25 245V327.69h-63V256h63v-54.64c0-62.15 37-96.48 93.67-96.48 27.14 0 55.52 4.84 55.52 4.84v61h-31.28c-30.8 0-40.41 19.12-40.41 38.73V256h68.78l-11 71.69h-57.78V501C413.31 482.38 504 379.78 504 256z"/>
150
- </svg>
151
- </a>
152
- <a href="https://www.tiktok.com/@maximo__fn" rel="noopener noreferrer" aria-disabled="false" class="sm secondary svelte-cmf5ev" id="component-1" style="flex-grow: 100;" target="_blank">
153
- <svg xmlns="http://www.w3.org/2000/svg" height="1em" viewBox="0 0 448 512">
154
- <style>
155
- svg {"{"}
156
- fill: {SVG_COLOR}
157
- {"}"}
158
- </style>
159
- <path d="M448,209.91a210.06,210.06,0,0,1-122.77-39.25V349.38A162.55,162.55,0,1,1,185,188.31V278.2a74.62,74.62,0,1,0,52.23,71.18V0l88,0a121.18,121.18,0,0,0,1.86,22.17h0A122.18,122.18,0,0,0,381,102.39a121.43,121.43,0,0,0,67,20.14Z"/>
160
- </svg>
161
- </a>
162
- <a href="https://www.twitch.tv/maximofn/" rel="noopener noreferrer" aria-disabled="false" class="sm secondary svelte-cmf5ev" id="component-1" style="flex-grow: 100;" target="_blank">
163
- <svg xmlns="http://www.w3.org/2000/svg" height="1em" viewBox="0 0 512 512">
164
- <style>
165
- svg {"{"}
166
- fill: {SVG_COLOR}
167
- {"}"}
168
- </style>
169
- <path d="M391.17,103.47H352.54v109.7h38.63ZM285,103H246.37V212.75H285ZM120.83,0,24.31,91.42V420.58H140.14V512l96.53-91.42h77.25L487.69,256V0ZM449.07,237.75l-77.22,73.12H294.61l-67.6,64v-64H140.14V36.58H449.07Z"/>
170
- </svg>
171
- </a>
172
- </div>
173
- '''
174
-
175
- html_subtify_logo = f"""
176
- <div style="display: flex; justify-content: center; align-items: center;">
177
- <img src='https://pub-fb664c455eca46a2ba762a065ac900f7.r2.dev/subtify_logo-scaled.webp' width={new_width}px height={new_height}px >
178
- </div>
179
- """
180
-
181
- html_buy_me_a_coffe = '''
182
- <div style="float: right;">
183
- <a href="https://www.buymeacoffee.com/maximofn" target="_blank">
184
- <img src="https://img.shields.io/badge/Buy_Me_A_Coffee-support_my_work-FFDD00?style=for-the-badge&logo=buy-me-a-coffee&logoColor=white&labelColor=101010" alt="buy me a coffe">
185
- </a>
186
- </div>
187
- '''
188
 
189
  language_dict = union_language_dict()
190
 
191
- # def subtify_no_ui():
192
- # number_works = 6
193
- # progress_bar = tqdm(total=number_works, desc="Subtify")
194
- # folder_chunck = "chunks"
195
- # folder_concatenated = "concatenated_transcriptions"
196
- # folder_translated_transcriptions = "translated_transcriptions"
197
- # if not os.path.exists(folder_chunck):
198
- # os.makedirs(folder_chunck)
199
- # if not os.path.exists(folder_concatenated):
200
- # os.makedirs(folder_concatenated)
201
- # if not os.path.exists(folder_translated_transcriptions):
202
- # os.makedirs(folder_translated_transcriptions)
203
-
204
- # ################## Download video and audio ##################
205
- # if DOWNLOAD:
206
- # print('*'*NUMBER)
207
- # # url = "https://www.twitch.tv/videos/1936119752" # twitch Rob Mula 2 horas
208
- # # url = "https://www.youtube.com/watch?v=yX5EJf4R77s" # ✅ debate, varios hablantes, 3 minutos
209
- # # url = "https://www.youtube.com/watch?v=cgx0QnXo1OU" # ✅ smart home, un solo hablante, 4:42 minutos
210
- # # url = "https://www.youtube.com/watch?v=dgOBxhi19T8" # ✅ rob mula, muchos hablantes, 4:28 minutos
211
- # # url = "https://www.youtube.com/watch?v=Coj72EzmX20" # rob mula, un solo hablante, 16 minutos
212
- # # url = "https://www.youtube.com/watch?v=Tqth0fKo0_g" # Conversación short
213
- # url = "https://www.youtube.com/watch?v=h9xPrgTYP_0" # Letitia 40 segundos
214
- # print(f"Downloading video and audio from {url}")
215
- # python_file = "download.py"
216
- # command = f"python {python_file} {url}"
217
- # os.system(command)
218
- # sleep(1)
219
- # print('*'*NUMBER)
220
- # print("\n\n")
221
- # progress_bar.update(1)
222
-
223
- # ################## Slice audio ##################
224
- # if SLICE_AUDIO:
225
- # print('*'*NUMBER)
226
- # print("Slicing audio")
227
- # python_file = "slice_audio.py"
228
- # audio = "audios/download_audio.mp3"
229
- # command = f"python {python_file} {audio} {SECONDS}"
230
- # os.system(command)
231
- # print('*'*NUMBER)
232
- # print("\n\n")
233
- # progress_bar.update(1)
234
-
235
- # ################# Transcript slices ##################
236
- # if TRANSCRIBE_AUDIO:
237
- # print('*'*NUMBER)
238
- # print("Transcript slices")
239
- # chunks_folder = "chunks"
240
- # if not os.path.exists(chunks_folder):
241
- # os.makedirs(chunks_folder)
242
- # python_file = "transcribe.py"
243
- # chunks_file = "chunks/output_files.txt"
244
- # number_of_speakers = 10
245
- # source_languaje = "English"
246
- # command = f"python {python_file} {chunks_file} {source_languaje} {number_of_speakers} {DEVICE}"
247
- # os.system(command)
248
- # if REMOVE_FILES:
249
- # with open(chunks_file, 'r') as f:
250
- # files = f.read().splitlines()
251
- # for file in files:
252
- # audios_extension = "mp3"
253
- # file_name, _ = file.split(".")
254
- # _, file_name = file_name.split("/")
255
- # vocal = f'{chunks_folder}/{file_name}.{audios_extension}'
256
- # command = f"rm {vocal}"
257
- # os.system(command)
258
- # print('*'*NUMBER)
259
- # print("\n\n")
260
- # progress_bar.update(1)
261
-
262
- # ################## Concatenate transcriptions ##################
263
- # if CONCATENATE_TRANSCRIPTIONS:
264
- # print('*'*NUMBER)
265
- # print("Concatenate transcriptions")
266
- # folder_concatenated = "concatenated_transcriptions"
267
- # if not os.path.exists(folder_concatenated):
268
- # os.makedirs(folder_concatenated)
269
-
270
- # chunck_file = "chunks/output_files.txt"
271
- # python_file = "concat_transcriptions.py"
272
- # command = f"python {python_file} {chunck_file} {SECONDS}"
273
- # os.system(command)
274
- # if REMOVE_FILES:
275
- # with open(chunck_file, 'r') as f:
276
- # files = f.read().splitlines()
277
- # for file in files:
278
- # file_name, _ = file.split(".")
279
- # _, file_name = file_name.split("/")
280
- # transcriptions_folder = "transcriptions"
281
- # transcription_extension = "srt"
282
- # command = f"rm {transcriptions_folder}/{file_name}.{transcription_extension}"
283
- # os.system(command)
284
- # print('*'*NUMBER)
285
- # print("\n\n")
286
- # progress_bar.update(1)
287
-
288
- # ################## Translate transcription ##################
289
- # target_languaje = "Español"
290
- # if TRANSLATE_TRANSCRIPTIONS:
291
- # print('*'*NUMBER)
292
- # print("Translate transcription")
293
- # transcription_file = "concatenated_transcriptions/download_audio.srt"
294
- # source_languaje = "English"
295
- # python_file = "translate_transcriptions.py"
296
- # command = f"python {python_file} {transcription_file} --source_languaje {source_languaje} --target_languaje {target_languaje} --device {DEVICE}"
297
- # os.system(command)
298
- # if REMOVE_FILES:
299
- # if os.path.exists(transcription_file):
300
- # command = f"rm {transcription_file}"
301
- # os.system(command)
302
- # print('*'*NUMBER)
303
- # print("\n\n")
304
- # progress_bar.update(1)
305
-
306
- # ################## Add subtitles to video ##################
307
- # if ADD_SUBTITLES_TO_VIDEO:
308
- # print('*'*NUMBER)
309
- # print("Add subtitles to video")
310
- # python_file = "add_subtitles_to_video.py"
311
- # transcription_file = f"translated_transcriptions/download_audio_{target_languaje}.srt"
312
- # input_video_file = "videos/download_video.mp4"
313
- # input_audio_file = "audios/download_audio.mp3"
314
- # command = f"python {python_file} {transcription_file} {input_video_file} {input_audio_file}"
315
- # os.system(command)
316
- # if REMOVE_FILES:
317
- # if os.path.exists(input_video_file):
318
- # command = f"rm {input_video_file}"
319
- # os.system(command)
320
- # if os.path.exists(input_audio_file):
321
- # command = f"rm {input_audio_file}"
322
- # os.system(command)
323
- # if os.path.exists(transcription_file):
324
- # command = f"rm {transcription_file}"
325
- # os.system(command)
326
- # if os.path.exists("chunks/output_files.txt"):
327
- # command = f"rm chunks/output_files.txt"
328
- # os.system(command)
329
- # if os.path.exists("chunks"):
330
- # command = f"rm -r chunks"
331
- # os.system(command)
332
- # if os.path.exists("vocals/speakers.txt"):
333
- # command = f"rm vocals/speakers.txt"
334
- # os.system(command)
335
- # print('*'*NUMBER)
336
- # print("\n\n")
337
- # progress_bar.update(1)
338
-
339
  def remove_all_files():
340
  if os.path.exists("audios"):
341
  command = f"rm -r audios"
@@ -359,13 +89,10 @@ def remove_all_files():
359
  command = f"rm -r vocals"
360
  os.system(command)
361
 
362
- # def paste_url_from_clipboard():
363
- # return pyperclip.paste()
364
-
365
  def reset_frontend():
366
  visible = False
367
  return (
368
- "",
369
  gr.Image(visible=visible),
370
  gr.Dropdown(visible=visible),
371
  gr.Dropdown(visible=visible),
@@ -381,142 +108,47 @@ def reset_frontend():
381
  gr.Textbox(visible=visible),
382
  gr.Textbox(visible=visible),
383
  gr.Textbox(visible=visible),
384
- gr.Textbox(visible=visible),
385
- gr.Textbox(visible=visible),
386
  gr.Video(visible=visible),
387
  )
388
 
389
  def show_auxiliar_block1():
390
  return gr.Textbox(value="URL checked", visible=False)
391
 
392
- def get_youtube_thumbnail(url):
393
- yt = YouTube(url)
394
- thumbnail_url = yt.thumbnail_url
395
- return thumbnail_url
396
-
397
- def is_valid_youtube_url(url):
398
- # This regular expression should match the following YouTube URL formats:
399
- # - https://youtube.com/watch?v=video_id
400
- # - https://www.youtube.com/watch?v=video_id
401
- # - https://youtu.be/video_id
402
- patron_youtube = r'(https?://)?(www\.)?(youtube\.com/watch\?v=|youtu\.be/)[\w-]+'
403
- return bool(re.match(patron_youtube, url))
404
-
405
- def is_valid_twitch_url(url):
406
- # This regular expression should match the following Twitch URL formats:
407
- # - https://twitch.tv/channel_name
408
- # - https://www.twitch.tv/channel_name
409
- # - https://twitch.tv/videos/video_id
410
- twitch_pattern = r'(https?://)?(www\.)?twitch\.tv/(videos/\d+|\w+)'
411
- return bool(re.match(twitch_pattern, url))
412
-
413
- def is_valid_url(url):
414
- num_speaker = [10, 9, 8, 7, 6, 5, 4, 3, 2, 1]
415
- source_languaje = gr.Dropdown(visible=True, label="Source languaje", show_label=True, value="English", choices=language_dict, scale=1, interactive=True)
416
- target_languaje = gr.Dropdown(visible=True, label="Target languaje", show_label=True, value="Español", choices=language_dict, scale=1, interactive=True)
417
- advanced_setings = gr.Accordion(visible=True)
418
- number_of_speakers = gr.Dropdown(visible=True, label="Number of speakers", show_label=True, value=10, choices=num_speaker, scale=1, interactive=True)
419
- subtify_button = gr.Button(size="lg", value="subtify", min_width="10px", scale=0, visible=True)
420
-
421
- # Youtube
422
- if "youtube" in url.lower() or "youtu.be" in url.lower():
423
- if is_valid_youtube_url(url):
424
- thumbnail = get_youtube_thumbnail(url)
425
- if thumbnail:
426
- return (
427
- gr.Image(value=thumbnail, visible=True, show_download_button=False, container=False),
428
- source_languaje,
429
- target_languaje,
430
- advanced_setings,
431
- number_of_speakers,
432
- subtify_button,
433
- )
434
- else:
435
- return (
436
- gr.Image(value="assets/youtube-no-thumbnails.webp", visible=True, show_download_button=False, container=False),
437
- source_languaje,
438
- target_languaje,
439
- advanced_setings,
440
- number_of_speakers,
441
- subtify_button,
442
- )
443
-
444
- # Twitch
445
- elif "twitch" in url.lower() or "twitch.tv" in url.lower():
446
- if is_valid_twitch_url(url):
447
- return (
448
- gr.Image(value="assets/twitch.webp", visible=True, show_download_button=False, container=False),
449
- source_languaje,
450
- target_languaje,
451
- advanced_setings,
452
- number_of_speakers,
453
- subtify_button,
454
- )
455
-
456
- # Error
457
- visible = False
458
- image = gr.Image(value="assets/youtube_error.webp", visible=visible, show_download_button=False, container=False)
459
- source_languaje = gr.Dropdown(visible=visible, label="Source languaje", show_label=True, value="English", choices=language_dict, scale=1, interactive=True)
460
- target_languaje = gr.Dropdown(visible=visible, label="Target languaje", show_label=True, value="Español", choices=language_dict, scale=1, interactive=True)
461
- advanced_setings = gr.Accordion(visible=visible)
462
- number_of_speakers = gr.Dropdown(visible=visible, label="Number of speakers", show_label=True, value=10, choices=num_speaker, scale=1, interactive=True)
463
- subtify_button = gr.Button(size="lg", value="subtify", min_width="10px", scale=0, visible=visible)
464
- return (
465
- image,
466
- source_languaje,
467
- target_languaje,
468
- advanced_setings,
469
- number_of_speakers,
470
- subtify_button,
471
- )
472
-
473
  def change_visibility_texboxes():
474
-
475
  return (
476
- gr.Textbox(value="Done"),
477
- gr.Textbox(visible=True),
478
- gr.Textbox(visible=True),
479
- gr.Textbox(visible=True),
480
- gr.Textbox(visible=True),
481
- gr.Textbox(visible=True),
482
- gr.Textbox(visible=True),
483
- gr.Textbox(visible=False),
484
  )
485
 
486
- def get_audio_and_video_from_video(url):
487
  print('*'*NUMBER)
488
- print(f"Downloading video and audio from {url}")
489
-
490
- audios_folder = "audios"
491
- videos_folder = "videos"
492
- if not os.path.exists(audios_folder):
493
- os.makedirs(audios_folder)
494
- if not os.path.exists(videos_folder):
495
- os.makedirs(videos_folder)
496
 
497
- python_file = "download.py"
498
- command = f"python {python_file} {url}"
499
- os.system(command)
500
- sleep(1)
501
-
502
- audio = "audios/download_audio.mp3"
503
- video = "videos/download_video.mp4"
504
-
505
- if not os.path.exists(audio):
506
- raise Exception("Error downloading audio")
507
- if not os.path.exists(video):
508
- raise Exception("Error downloading video")
509
-
510
- return (
511
- gr.Textbox(value="Ok"),
512
- gr.Textbox(value=audio),
513
- gr.Textbox(value=video),
514
- )
515
-
516
- def slice_audio(audio_path):
517
  print('*'*NUMBER)
518
- print("Slicing audio")
519
 
 
 
520
  folder_vocals = "vocals"
521
  folder_chunck = "chunks"
522
  if not os.path.exists(folder_vocals):
@@ -524,34 +156,22 @@ def slice_audio(audio_path):
524
  if not os.path.exists(folder_chunck):
525
  os.makedirs(folder_chunck)
526
 
527
- python_file = "slice_audio.py"
528
- command = f"python {python_file} {audio_path} {SECONDS}"
529
- os.system(command)
530
 
531
  return (
532
- gr.Textbox(value="Ok")
533
  )
534
 
535
- def trascribe_audio(source_languaje, number_of_speakers):
536
  print('*'*NUMBER)
537
- print("Transcript slices")
538
-
539
- folder_chunks = "chunks"
540
- python_file = "transcribe.py"
541
- chunks_file = "chunks/output_files.txt"
542
- command = f"python {python_file} {chunks_file} {source_languaje} {number_of_speakers} {DEVICE}"
543
- os.system(command)
544
 
545
- with open(chunks_file, 'r') as f:
546
- files = f.read().splitlines()
547
- for file in files:
548
- audios_extension = "mp3"
549
- file_name, _ = file.split(".")
550
- _, file_name = file_name.split("/")
551
- vocal = f'{folder_chunks}/{file_name}.{audios_extension}'
552
- command = f"rm {vocal}"
553
- os.system(command)
554
 
 
 
 
555
  return (
556
  gr.Textbox(value="Ok")
557
  )
@@ -566,7 +186,7 @@ def concatenate_transcriptions():
566
 
567
  chunck_file = "chunks/output_files.txt"
568
  python_file = "concat_transcriptions.py"
569
- command = f"python {python_file} {chunck_file} {SECONDS}"
570
  os.system(command)
571
 
572
  with open(chunck_file, 'r') as f:
@@ -651,6 +271,23 @@ def hide_textbobes_progress_info():
651
  gr.Textbox(value="Waiting", visible=visible),
652
  )
653
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
654
  @spaces.GPU
655
  def subtify():
656
  with gr.Blocks(
@@ -700,54 +337,59 @@ def subtify():
700
  gr.HTML(html_social_media)
701
  gr.HTML("<h1 style='text-align: center;'>Subtify</h1>")
702
  gr.HTML(html_subtify_logo)
703
- with gr.Row(variant="panel"):
704
- url_textbox = gr.Textbox(placeholder="Add video URL here and wait a moment", label="Video URL", elem_id="video_url", scale=1, interactive=True)
705
- # paste_button = gr.Button(size="sm", icon="icons/paste.svg", value="paste", min_width="10px", scale=0)
706
- delete_button = gr.Button(size="sm", icon="icons/delete.svg", value="clear", min_width="10px", scale=0)
707
-
708
  visible = False
709
- auxiliar_block1 = gr.Textbox(label="Auxiliar block 1", elem_id="auxiliar_block1", interactive=False, visible=visible)
710
- with gr.Row(equal_height=False):
711
- image = gr.Image(visible=visible, scale=1)
712
- with gr.Column():
 
 
 
 
 
 
 
 
 
 
713
  with gr.Row():
714
- source_languaje = gr.Dropdown(visible=visible, label="Source languaje", show_label=True, value="English", choices=language_dict, scale=1, interactive=True, info="Language of the video")
715
- target_languaje = gr.Dropdown(visible=visible, label="Target languaje", show_label=True, value="Español", choices=language_dict, scale=1, interactive=True, info="Language to translate the subtitles")
716
- with gr.Accordion("Advanced settings", open=False, visible=visible) as Advanced_setings:
717
- number_of_speakers = gr.Dropdown(visible=visible, label="Number of speakers", show_label=True, value=10, choices=num_speaker, scale=1, interactive=True, info="Number of speakers in the video, if you don't know, select 10")
718
- subtify_button = gr.Button(size="lg", value="subtify", min_width="10px", scale=0, visible=visible)
719
 
720
- auxiliar_block2 = gr.Textbox(placeholder="Waiting", label="Auxiliar block 2", elem_id="auxiliar_block2", interactive=False, visible=visible)
721
  with gr.Row():
722
- video_donwloaded_progress_info = gr.Textbox(placeholder="Waiting", label="Video download progress info", elem_id="video_donwloaded_progress_info", interactive=False, visible=visible)
723
- video_sliced_progress_info = gr.Textbox(placeholder="Waiting", label="Video slice progress info", elem_id="video_sliced_progress_info", interactive=False, visible=visible)
724
  video_transcribed_progress_info = gr.Textbox(placeholder="Waiting", label="Transcribe progress info", elem_id="video_transcribed_progress_info", interactive=False, visible=visible)
725
  transcriptions_concatenated_progress_info = gr.Textbox(placeholder="Waiting", label="Concatenate progress info", elem_id="transcriptions_concatenated_progress_info", interactive=False, visible=visible)
726
  video_translated_progress_info = gr.Textbox(placeholder="Waiting", label="Translate progress info", elem_id="transcription_translated_progress_info", interactive=False, visible=visible)
727
  video_subtitled_progress_info = gr.Textbox(placeholder="Waiting", label="Video subtitle progress info", elem_id="video_subtitled_progress_info", interactive=False, visible=visible)
728
 
729
  original_audio_path = gr.Textbox(label="Original audio path", elem_id="original_audio_path", visible=visible)
730
- original_video_path = gr.Textbox(label="Original video path", elem_id="original_video_path", visible=visible)
731
  original_audio_transcribed_path = gr.Textbox(label="Original audio transcribed", elem_id="original_audio_transcribed", visible=visible)
732
  original_audio_translated_path = gr.Textbox(label="Original audio translated", elem_id="original_audio_translated", visible=visible)
733
  subtitled_video = gr.Video(label="Subtitled video", elem_id="subtitled_video", visible=visible, interactive=visible)
734
  auxiliar_block3 = gr.Textbox(placeholder="Waiting", label="Auxiliar block 3", elem_id="auxiliar_block3", interactive=False, visible=visible)
735
 
 
 
736
  # Events
737
  # paste_button.click(fn=paste_url_from_clipboard, outputs=url_textbox)
738
  delete_button.click(
739
  fn=reset_frontend,
740
  outputs=[
741
- url_textbox,
742
- image,
743
  source_languaje,
744
  target_languaje,
745
  Advanced_setings,
746
  number_of_speakers,
747
  subtify_button,
748
- auxiliar_block2,
749
- video_donwloaded_progress_info,
750
- video_sliced_progress_info,
751
  video_transcribed_progress_info,
752
  transcriptions_concatenated_progress_info,
753
  video_translated_progress_info,
@@ -755,54 +397,43 @@ def subtify():
755
  subtitled_video,
756
  ]
757
  )
758
- url_textbox.change(
759
- fn=show_auxiliar_block1,
760
- outputs=[auxiliar_block1]
761
- )
762
- auxiliar_block1.change(
763
- fn=is_valid_url,
764
- inputs=url_textbox,
765
- outputs=[image, source_languaje, target_languaje, Advanced_setings, number_of_speakers, subtify_button]
766
  )
767
  subtify_button.click(
768
  fn=change_visibility_texboxes,
769
- outputs=[auxiliar_block2, video_donwloaded_progress_info, video_sliced_progress_info, video_transcribed_progress_info, transcriptions_concatenated_progress_info, video_translated_progress_info, video_subtitled_progress_info, auxiliar_block1]
770
- )
771
- auxiliar_block2.change(
772
- fn=get_audio_and_video_from_video,
773
- inputs=[url_textbox],
774
- outputs=[video_donwloaded_progress_info, original_audio_path, original_video_path]
775
  )
776
- video_donwloaded_progress_info.change(
777
- fn=slice_audio,
778
- inputs=[original_audio_path],
779
- outputs=[video_sliced_progress_info]
780
  )
781
- video_sliced_progress_info.change(
782
  fn=trascribe_audio,
783
- inputs=[source_languaje, number_of_speakers],
784
  outputs=[video_transcribed_progress_info]
785
  )
786
- video_transcribed_progress_info.change(
787
- fn=concatenate_transcriptions,
788
- outputs=[transcriptions_concatenated_progress_info, original_audio_transcribed_path]
789
- )
790
- transcriptions_concatenated_progress_info.change(
791
- fn=translate_transcription,
792
- inputs=[original_audio_transcribed_path, source_languaje, target_languaje],
793
- outputs=[video_translated_progress_info, original_audio_translated_path]
794
- )
795
- video_translated_progress_info.change(
796
- fn=add_translated_subtitles_to_video,
797
- inputs=[original_video_path, original_audio_path, original_audio_translated_path],
798
- outputs=[subtitled_video, video_subtitled_progress_info, auxiliar_block3]
799
- )
800
- auxiliar_block3.change(
801
- fn=hide_textbobes_progress_info,
802
- outputs=[video_donwloaded_progress_info, video_sliced_progress_info, video_transcribed_progress_info, transcriptions_concatenated_progress_info, video_translated_progress_info, video_subtitled_progress_info]
803
- )
804
-
805
- gr.HTML(html_buy_me_a_coffe)
806
 
807
  demo.launch()
808
 
 
3
  import spaces
4
  import os
5
  import torch
6
+ import shutil
7
  from time import sleep
8
  from tqdm import tqdm
9
  from lang_list import union_language_dict
10
  # import pyperclip
 
11
  import re
12
  from PIL import Image
13
  # import urllib.request
14
+ from ui_config import (
15
+ BACKGROUND_COLOR, BUTTON_COLOR, SVG_COLOR, PANEL_COLOR,
16
+ PRIMARY_TEXT_COLOR, SUBDUED_TEXT_COLOR, BACKGROUND_PRIMARY_COLOR,
17
+ BACKGROUND_SECONDARY_COLOR, PRIMARY_BODER_COLOR, BLOCK_TITLE_TEXT_COLOR,
18
+ INPUT_BACKGROUND_COLOR, INPUT_BORDER_COLOR, INPUT_PLACEHOLDER_COLOR,
19
+ ERROR_BACKGROUND_COLOR, ERROR_TEXT_COLOR, ERROR_BORDER_COLOR,
20
+ BUTTON_SECONDARY_BACKGROUND_COLOR, BUTTON_SECONDARY_BORDER_COLOR,
21
+ BUTTON_SECONDARY_TEXT_COLOR, RED, GREEN, BLUE,
22
+ html_social_media, get_html_subtify_logo, html_buy_me_a_coffe
23
+ )
24
+ # from url_manager import get_youtube_thumbnail, is_valid_youtube_url, is_valid_twitch_url, is_valid_url
25
+ from slice_audio import slice_audio as slice_audio_main
26
+ from audio import get_audio_from_video
27
+ from transcribe import transcribe, get_language_dict
28
 
29
  NUMBER = 100
30
  DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
 
39
  if DEVICE == "cpu":
40
  # I supose that I am on huggingface server
41
  # Get RAM space
42
+ # ram = int(os.popen("free -m | grep Mem | awk '{print $2}'").read())
43
+ ram = 16000
44
  factor = 1
45
+ CHUNK_SECONDS = int(ram*factor)
46
+ CHUNK_SECONDS = 30
47
+ CHUNK_OVERLAP_SECONDS = 5
48
+ print(f"RAM: {ram}, CHUNK_SECONDS: {CHUNK_SECONDS}, CHUNK_OVERLAP_SECONDS: {CHUNK_OVERLAP_SECONDS}")
49
  else:
50
  # I supose that I am on my computer
51
  # Get VRAM space
52
+ CHUNK_SECONDS = 30
53
+ CHUNK_OVERLAP_SECONDS = 5
54
 
55
  YOUTUBE = "youtube"
56
  TWITCH = "twitch"
57
  ERROR = "error"
58
 
59
+ subtify_logo = Image.open("assets/subtify_logo-scaled.png")
60
  subtify_logo_width, subtify_logo_height = subtify_logo.size
61
  factor = 4
62
  new_width = subtify_logo_width // factor
63
  new_height = subtify_logo_height // factor
64
 
65
+ html_subtify_logo = get_html_subtify_logo(new_width, new_height)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
66
 
67
  language_dict = union_language_dict()
68
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
69
  def remove_all_files():
70
  if os.path.exists("audios"):
71
  command = f"rm -r audios"
 
89
  command = f"rm -r vocals"
90
  os.system(command)
91
 
 
 
 
92
  def reset_frontend():
93
  visible = False
94
  return (
95
+ None,
96
  gr.Image(visible=visible),
97
  gr.Dropdown(visible=visible),
98
  gr.Dropdown(visible=visible),
 
108
  gr.Textbox(visible=visible),
109
  gr.Textbox(visible=visible),
110
  gr.Textbox(visible=visible),
 
 
111
  gr.Video(visible=visible),
112
  )
113
 
114
  def show_auxiliar_block1():
115
  return gr.Textbox(value="URL checked", visible=False)
116
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
117
  def change_visibility_texboxes():
 
118
  return (
119
+ gr.update(value="Done"), # auxiliar_block1
120
+ gr.update(visible=True), # get_audio_from_video_info
121
+ gr.update(visible=True), # video_sliced_progress_info
122
+ gr.update(visible=True), # video_transcribed_progress_info
123
+ gr.update(visible=True), # transcriptions_concatenated_progress_info
124
+ gr.update(visible=True), # video_translated_progress_info
125
+ gr.update(visible=True), # video_subtitled_progress_info
 
126
  )
127
 
128
+ def get_audio(video_path):
129
  print('*'*NUMBER)
130
+ print(f"Getting audio from video {video_path}")
 
 
 
 
 
 
 
131
 
132
+ audios_folder = "audios"
133
+ try:
134
+ audio_path = get_audio_from_video(video_path, audios_folder)
135
+ return [
136
+ gr.update(value="Ok"), # get_audio_from_video_info
137
+ gr.update(value=audio_path) # original_audio_path
138
+ ]
139
+ except Exception as e:
140
+ print(f"Error: {str(e)}")
141
+ return [
142
+ gr.update(value="Error"), # get_audio_from_video_info
143
+ gr.update(value="") # original_audio_path
144
+ ]
145
+
146
+ def slice_audio(input_audio_path):
 
 
 
 
 
147
  print('*'*NUMBER)
148
+ print(f"Slicing audio {input_audio_path} in chunks of {CHUNK_SECONDS} seconds with {CHUNK_OVERLAP_SECONDS} seconds overlap")
149
 
150
+ # Create vocals and chunks folders
151
+ print("Creating vocals and chunks folders")
152
  folder_vocals = "vocals"
153
  folder_chunck = "chunks"
154
  if not os.path.exists(folder_vocals):
 
156
  if not os.path.exists(folder_chunck):
157
  os.makedirs(folder_chunck)
158
 
159
+ slice_audio_main(input_audio_path, folder_chunck, CHUNK_SECONDS, CHUNK_OVERLAP_SECONDS)
 
 
160
 
161
  return (
162
+ gr.update(value="Ok"), # video_sliced_progress_info
163
  )
164
 
165
+ def trascribe_audio(input_audio_path, source_languaje):
166
  print('*'*NUMBER)
167
+ print(f"Transcript {input_audio_path}")
 
 
 
 
 
 
168
 
169
+ # Get language dict
170
+ language_dict = get_language_dict()
 
 
 
 
 
 
 
171
 
172
+ # Transcribe audio file
173
+ transcribe(input_audio_path, language_dict[source_languaje]["transcriber"], DEVICE, CHUNK_SECONDS, CHUNK_OVERLAP_SECONDS)
174
+
175
  return (
176
  gr.Textbox(value="Ok")
177
  )
 
186
 
187
  chunck_file = "chunks/output_files.txt"
188
  python_file = "concat_transcriptions.py"
189
+ command = f"python {python_file} {chunck_file} {CHUNK_SECONDS} {CHUNK_OVERLAP_SECONDS}"
190
  os.system(command)
191
 
192
  with open(chunck_file, 'r') as f:
 
271
  gr.Textbox(value="Waiting", visible=visible),
272
  )
273
 
274
+ def process_uploaded_video(video_path):
275
+ # Create videos folder
276
+ videos_folder = "videos"
277
+ if not os.path.exists(videos_folder):
278
+ os.makedirs(videos_folder)
279
+
280
+ # Copy uploaded video to videos folder
281
+ new_video_path = os.path.join(videos_folder, "download_video.mp4")
282
+ shutil.copy(video_path, new_video_path)
283
+
284
+ # Return updated config block with new scale and the new video path
285
+ return [
286
+ gr.update(label="Video uploaded"), # video_input
287
+ gr.update(visible=True), # config_block
288
+ gr.update(value=new_video_path) # original_video_path
289
+ ]
290
+
291
  @spaces.GPU
292
  def subtify():
293
  with gr.Blocks(
 
337
  gr.HTML(html_social_media)
338
  gr.HTML("<h1 style='text-align: center;'>Subtify</h1>")
339
  gr.HTML(html_subtify_logo)
340
+
341
+ # Input block, where the user can upload a video and configure the subtify process
 
 
 
342
  visible = False
343
+ input_block = gr.Row(variant="panel")
344
+ with input_block:
345
+ input_video_block = gr.Row(scale=2)
346
+ with input_video_block:
347
+ video_input = gr.Video(
348
+ label="Upload video",
349
+ sources=["upload"],
350
+ scale=1,
351
+ interactive=True
352
+ )
353
+ delete_button = gr.Button(size="sm", icon="icons/delete.svg", value="clear", min_width="10px", scale=0)
354
+
355
+ config_block = gr.Column(scale=1, visible=visible)
356
+ with config_block:
357
  with gr.Row():
358
+ source_languaje = gr.Dropdown(visible=True, label="Source languaje", show_label=True, value="English", choices=language_dict, scale=1, interactive=True, info="Language of the video")
359
+ target_languaje = gr.Dropdown(visible=True, label="Target languaje", show_label=True, value="Español", choices=language_dict, scale=1, interactive=True, info="Language to translate the subtitles")
360
+ with gr.Accordion("Advanced settings", open=False, visible=True) as Advanced_setings:
361
+ number_of_speakers = gr.Dropdown(visible=True, label="Number of speakers", show_label=True, value=10, choices=num_speaker, scale=1, interactive=True, info="Number of speakers in the video, if you don't know, select 10")
362
+ subtify_button = gr.Button(size="lg", value="subtify", min_width="10px", scale=0, visible=True)
363
 
364
+ auxiliar_block1 = gr.Textbox(placeholder="", interactive=False, visible=visible)
365
  with gr.Row():
366
+ get_audio_from_video_info = gr.Textbox(placeholder="Waiting", label="Get audio from video info", elem_id="get_audio_from_video_info", interactive=False, visible=visible)
 
367
  video_transcribed_progress_info = gr.Textbox(placeholder="Waiting", label="Transcribe progress info", elem_id="video_transcribed_progress_info", interactive=False, visible=visible)
368
  transcriptions_concatenated_progress_info = gr.Textbox(placeholder="Waiting", label="Concatenate progress info", elem_id="transcriptions_concatenated_progress_info", interactive=False, visible=visible)
369
  video_translated_progress_info = gr.Textbox(placeholder="Waiting", label="Translate progress info", elem_id="transcription_translated_progress_info", interactive=False, visible=visible)
370
  video_subtitled_progress_info = gr.Textbox(placeholder="Waiting", label="Video subtitle progress info", elem_id="video_subtitled_progress_info", interactive=False, visible=visible)
371
 
372
  original_audio_path = gr.Textbox(label="Original audio path", elem_id="original_audio_path", visible=visible)
373
+ original_video_path = gr.Textbox(label="Original video path", visible=visible)
374
  original_audio_transcribed_path = gr.Textbox(label="Original audio transcribed", elem_id="original_audio_transcribed", visible=visible)
375
  original_audio_translated_path = gr.Textbox(label="Original audio translated", elem_id="original_audio_translated", visible=visible)
376
  subtitled_video = gr.Video(label="Subtitled video", elem_id="subtitled_video", visible=visible, interactive=visible)
377
  auxiliar_block3 = gr.Textbox(placeholder="Waiting", label="Auxiliar block 3", elem_id="auxiliar_block3", interactive=False, visible=visible)
378
 
379
+ gr.HTML(html_buy_me_a_coffe)
380
+
381
  # Events
382
  # paste_button.click(fn=paste_url_from_clipboard, outputs=url_textbox)
383
  delete_button.click(
384
  fn=reset_frontend,
385
  outputs=[
386
+ video_input,
 
387
  source_languaje,
388
  target_languaje,
389
  Advanced_setings,
390
  number_of_speakers,
391
  subtify_button,
392
+ auxiliar_block1,
 
 
393
  video_transcribed_progress_info,
394
  transcriptions_concatenated_progress_info,
395
  video_translated_progress_info,
 
397
  subtitled_video,
398
  ]
399
  )
400
+ video_input.change(
401
+ fn=process_uploaded_video,
402
+ inputs=[video_input],
403
+ outputs=[video_input, config_block, original_video_path]
 
 
 
 
404
  )
405
  subtify_button.click(
406
  fn=change_visibility_texboxes,
407
+ outputs=[auxiliar_block1, get_audio_from_video_info, video_transcribed_progress_info, transcriptions_concatenated_progress_info, video_translated_progress_info, video_subtitled_progress_info]
 
 
 
 
 
408
  )
409
+ auxiliar_block1.change(
410
+ fn=get_audio,
411
+ inputs=[original_video_path],
412
+ outputs=[get_audio_from_video_info, original_audio_path]
413
  )
414
+ get_audio_from_video_info.change(
415
  fn=trascribe_audio,
416
+ inputs=[original_audio_path, source_languaje],
417
  outputs=[video_transcribed_progress_info]
418
  )
419
+ # video_transcribed_progress_info.change(
420
+ # fn=concatenate_transcriptions,
421
+ # outputs=[transcriptions_concatenated_progress_info, original_audio_transcribed_path]
422
+ # )
423
+ # transcriptions_concatenated_progress_info.change(
424
+ # fn=translate_transcription,
425
+ # inputs=[original_audio_transcribed_path, source_languaje, target_languaje],
426
+ # outputs=[video_translated_progress_info, original_audio_translated_path]
427
+ # )
428
+ # video_translated_progress_info.change(
429
+ # fn=add_translated_subtitles_to_video,
430
+ # inputs=[original_video_path, original_audio_path, original_audio_translated_path],
431
+ # outputs=[subtitled_video, video_subtitled_progress_info, auxiliar_block3]
432
+ # )
433
+ # auxiliar_block3.change(
434
+ # fn=hide_textbobes_progress_info,
435
+ # outputs=[video_sliced_progress_info, video_transcribed_progress_info, transcriptions_concatenated_progress_info, video_translated_progress_info, video_subtitled_progress_info]
436
+ # )
 
 
437
 
438
  demo.launch()
439
 
audio.py ADDED
@@ -0,0 +1,46 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import ffmpeg
3
+
4
+ DEBUG=True
5
+
6
+ def get_audio_from_video(video_path: str, output_folder: str) -> str:
7
+ """
8
+ Extract audio from video and save it as mp3.
9
+
10
+ Args:
11
+ video_path (str): Path to the video file
12
+ output_folder (str): Path to folder where audio will be saved
13
+
14
+ Returns:
15
+ str: Path to the saved audio file
16
+
17
+ Raises:
18
+ Exception: If video file doesn't exist
19
+ Exception: If there's an error extracting the audio
20
+ """
21
+ # Validate video exists
22
+ if not os.path.exists(video_path):
23
+ raise Exception(f"Video file not found: {video_path}")
24
+
25
+ # Create output folder if it doesn't exist
26
+ if not os.path.exists(output_folder):
27
+ os.makedirs(output_folder)
28
+
29
+ try:
30
+ # Generate output path
31
+ audio_filename = "download_audio.mp3"
32
+ audio_path = os.path.join(output_folder, audio_filename)
33
+
34
+ if DEBUG:
35
+ if os.path.exists(audio_path):
36
+ return audio_path
37
+
38
+ # Extract audio using ffmpeg
39
+ stream = ffmpeg.input(video_path)
40
+ stream = ffmpeg.output(stream, audio_path, acodec='libmp3lame')
41
+ ffmpeg.run(stream, overwrite_output=True)
42
+
43
+ return audio_path
44
+
45
+ except Exception as e:
46
+ raise Exception(f"Error extracting audio from video: {str(e)}")
requirements.txt CHANGED
@@ -1,20 +1,31 @@
1
  # gradio
2
  gradio
3
 
4
- # Get environment
5
- transformers
 
 
 
 
 
 
 
 
 
 
 
6
 
7
  # Download youtube and twitch videos
8
- pytube
9
- yt-dlp
10
- twitch-dl
11
 
12
  # Trascribe audios
13
- git+https://github.com/m-bain/whisperx.git
14
  pyannote.audio
15
 
16
- # Translate
17
- protobuf
18
 
19
- # Add subtitles to videos
20
- opencv-python
 
1
  # gradio
2
  gradio
3
 
4
+ # spaces
5
+ spaces
6
+
7
+ # pytorch
8
+ torch
9
+ torchvision
10
+ torchaudio
11
+
12
+ # Transformers
13
+ transformers accelerate
14
+
15
+ # ffmpeg
16
+ ffmpeg-python
17
 
18
  # Download youtube and twitch videos
19
+ # pytube
20
+ # yt-dlp
21
+ # twitch-dl
22
 
23
  # Trascribe audios
24
+ # git+https://github.com/m-bain/whisperx.git
25
  pyannote.audio
26
 
27
+ # # Translate
28
+ # protobuf
29
 
30
+ # # Add subtitles to videos
31
+ # opencv-python
slice_audio.py CHANGED
@@ -4,6 +4,7 @@ from tqdm import tqdm
4
 
5
  START = 00
6
  FOLDER = "chunks"
 
7
 
8
  def seconds_to_hms(seconds):
9
  hour = 00
@@ -22,53 +23,72 @@ def seconds_to_hms(seconds):
22
  def hms_to_seconds(hour, minute, second):
23
  return hour*3600 + minute*60 + second
24
 
25
- def main(args):
26
- input = args.input
27
- # name, extension = input.split(".")
28
- path, filename = os.path.split(input)
 
 
 
 
 
 
 
29
  name, extension = os.path.splitext(filename)
30
- seconds = int(args.seconds)
31
 
32
  # Get audio duration in seconds
33
- duration = float(os.popen(f'ffprobe -v error -show_entries format=duration -of default=noprint_wrappers=1:nokey=1 {input}').read())
34
  hour, minute, second = seconds_to_hms(int(duration))
 
35
 
36
- # Number of chunks
37
- num_chunks = -(-int(duration) // seconds) # Redondeo hacia arriba
 
 
 
 
 
 
38
 
39
- # Slice audio into seconds chunks
40
- hour, minute, second = seconds_to_hms(seconds) # Duration of each chunk
41
  output_files = []
42
  progress_bar = tqdm(total=num_chunks, desc="Slice audio into chunks progress")
 
43
  for chunk in range(num_chunks):
44
- start_time = chunk * seconds
45
- hour_start, minute_start, second_start = seconds_to_hms(start_time) # Start time of each chunk
46
-
47
- if start_time + seconds > duration:
48
- hour, minute, second = seconds_to_hms(duration - start_time)
49
- else:
50
- hour, minute, second = seconds_to_hms(seconds)
 
 
 
 
 
 
51
 
52
- output = f"{FOLDER}/{name}_chunk{chunk:003d}{extension}"
53
-
54
- if start_time + seconds > duration:
55
- command = f'ffmpeg -i {input} -ss {hour_start:02d}:{minute_start:02d}:{second_start:02d} -loglevel error {output}'
 
 
 
 
 
56
  else:
57
- command = f'ffmpeg -i {input} -ss {hour_start:02d}:{minute_start:02d}:{second_start:02d} -t {hour:02}:{minute:02}:{second:02} -loglevel error {output}'
 
 
58
  os.system(command)
59
-
60
  output_files.append(output)
61
-
62
  progress_bar.update(1)
63
 
64
- # write output files to a txt file
65
- with open(f"{FOLDER}/output_files.txt", "w") as f:
 
 
66
  for output_file in output_files:
67
  f.write(f"{output_file}\n")
68
-
69
- if __name__ == "__main__":
70
- argparser = argparse.ArgumentParser(description='Slice audio into smaller chunks')
71
- argparser.add_argument('input', help='Input audio file')
72
- argparser.add_argument('seconds', help='Duration of each chunk in seconds')
73
- args = argparser.parse_args()
74
- main(args)
 
4
 
5
  START = 00
6
  FOLDER = "chunks"
7
+ DEBUG = True
8
 
9
  def seconds_to_hms(seconds):
10
  hour = 00
 
23
  def hms_to_seconds(hour, minute, second):
24
  return hour*3600 + minute*60 + second
25
 
26
+ def slice_audio(input_audio_path, output_folder, chunks_seconds, chunk_overlap_seconds):
27
+ """
28
+ Slice audio into chunks with specified duration and overlap.
29
+
30
+ Args:
31
+ input_audio_path (str): Path to input audio file
32
+ output_folder (str): Path to output folder
33
+ chunks_seconds (int): Duration of each chunk in seconds
34
+ chunk_overlap_seconds (int): Overlap between chunks in seconds
35
+ """
36
+ _, filename = os.path.split(input_audio_path)
37
  name, extension = os.path.splitext(filename)
 
38
 
39
  # Get audio duration in seconds
40
+ duration = float(os.popen(f'ffprobe -v error -show_entries format=duration -of default=noprint_wrappers=1:nokey=1 {input_audio_path}').read())
41
  hour, minute, second = seconds_to_hms(int(duration))
42
+ print(f"\tDuration ({duration} seconds): {hour:02d}:{minute:02d}:{second:02d}")
43
 
44
+ # Calculate effective chunk duration considering overlap
45
+ effective_chunk = chunks_seconds - chunk_overlap_seconds
46
+
47
+ # Calculate number of chunks needed
48
+ if effective_chunk > 0:
49
+ num_chunks = -(-int(duration - chunk_overlap_seconds) // effective_chunk) # Ceiling division
50
+ else:
51
+ raise ValueError("Overlap duration must be less than chunk duration")
52
 
53
+ # Slice audio into chunks with overlap
 
54
  output_files = []
55
  progress_bar = tqdm(total=num_chunks, desc="Slice audio into chunks progress")
56
+
57
  for chunk in range(num_chunks):
58
+ # Calculate start and end times for this chunk
59
+ start_time = chunk * effective_chunk
60
+ end_time = min(start_time + chunks_seconds, duration)
61
+
62
+ # Convert times to HH:MM:SS format
63
+ hour_start, minute_start, second_start = seconds_to_hms(start_time)
64
+
65
+ # Calculate chunk duration
66
+ chunk_duration = end_time - start_time
67
+ hour_duration, minute_duration, second_duration = seconds_to_hms(chunk_duration)
68
+
69
+ # Generate output filename
70
+ output = f"{output_folder}/{name}_chunk{chunk:003d}{extension}"
71
 
72
+ if DEBUG:
73
+ if os.path.exists(output):
74
+ output_files.append(output)
75
+ progress_bar.update(1)
76
+ continue
77
+
78
+ # Build ffmpeg command with -y flag to overwrite without asking
79
+ if chunk == num_chunks - 1: # Last chunk
80
+ command = f'ffmpeg -y -i {input_audio_path} -ss {hour_start:02d}:{minute_start:02d}:{second_start:02d} -loglevel error {output}'
81
  else:
82
+ command = f'ffmpeg -y -i {input_audio_path} -ss {hour_start:02d}:{minute_start:02d}:{second_start:02d} -t {hour_duration:02d}:{minute_duration:02d}:{second_duration:02d} -loglevel error {output}'
83
+
84
+ # Execute command
85
  os.system(command)
 
86
  output_files.append(output)
 
87
  progress_bar.update(1)
88
 
89
+ progress_bar.close()
90
+
91
+ # Write output files to a txt file (with overwrite)
92
+ with open(f"{output_folder}/output_files.txt", "w") as f:
93
  for output_file in output_files:
94
  f.write(f"{output_file}\n")
 
 
 
 
 
 
 
transcribe.py CHANGED
@@ -2,50 +2,86 @@ import os
2
  import argparse
3
  from lang_list import LANGUAGE_NAME_TO_CODE, WHISPER_LANGUAGES
4
  from tqdm import tqdm
 
 
5
 
6
- # For pyannote.audio diarize
7
- from pyannote.audio import Model
8
- model = Model.from_pretrained("pyannote/segmentation-3.0", use_auth_token="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")
9
 
10
- language_dict = {}
11
- # Iterate over the LANGUAGE_NAME_TO_CODE dictionary
12
- for language_name, language_code in LANGUAGE_NAME_TO_CODE.items():
13
- # Extract the language code (the first two characters before the underscore)
14
- lang_code = language_code.split('_')[0].lower()
15
-
16
- # Check if the language code is present in WHISPER_LANGUAGES
17
- if lang_code in WHISPER_LANGUAGES:
18
- # Construct the entry for the resulting dictionary
19
- language_dict[language_name] = {
20
- "transcriber": lang_code,
21
- "translator": language_code
22
- }
 
 
23
 
24
- def transcribe(audio_file, language, num_speakers, device):
 
 
 
 
 
 
 
 
 
 
25
  output_folder = "transcriptions"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
26
 
27
- # Transcribe audio file
28
- model = "large-v2"
29
- # word_timestamps = True
30
- print_progress = False
31
- if device == "cpu":
32
- # I supose that I am on huggingface server
33
- compute_type = "float32"
34
- else:
35
- compute_type = "float16"
36
- fp16 = True
37
- batch_size = 8
38
- verbose = False
39
- min_speakers = 1
40
- max_speakers = num_speakers
41
- threads = 4
42
- output_format = "srt"
43
- hf_token = "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"
44
- command = f'whisperx {audio_file} --model {model} --batch_size {batch_size} --compute_type {compute_type} \
45
- --output_dir {output_folder} --output_format {output_format} --verbose {verbose} --language {language} \
46
- --fp16 {fp16} --threads {threads} --print_progress {print_progress} --device {device} \
47
- --diarize --max_speakers {max_speakers} --min_speakers {min_speakers} --hf_token {hf_token}'
48
- os.system(command)
49
 
50
  if __name__ == "__main__":
51
  parser = argparse.ArgumentParser(description='Transcribe audio files')
@@ -66,5 +102,6 @@ if __name__ == "__main__":
66
  _, input_name = input_file.split('/')
67
  extension = "mp3"
68
  file = f'{chunks_folder}/{input_name}.{extension}'
 
69
  transcribe(file, language_dict[args.language]["transcriber"], args.num_speakers, args.device)
70
- progress_bar.update(1)
 
2
  import argparse
3
  from lang_list import LANGUAGE_NAME_TO_CODE, WHISPER_LANGUAGES
4
  from tqdm import tqdm
5
+ import torch
6
+ from transformers import AutoModelForSpeechSeq2Seq, AutoProcessor, pipeline
7
 
 
 
 
8
 
9
+ def get_language_dict():
10
+ language_dict = {}
11
+ # Iterate over the LANGUAGE_NAME_TO_CODE dictionary
12
+ for language_name, language_code in LANGUAGE_NAME_TO_CODE.items():
13
+ # Extract the language code (the first two characters before the underscore)
14
+ lang_code = language_code.split('_')[0].lower()
15
+
16
+ # Check if the language code is present in WHISPER_LANGUAGES
17
+ if lang_code in WHISPER_LANGUAGES:
18
+ # Construct the entry for the resulting dictionary
19
+ language_dict[language_name] = {
20
+ "transcriber": lang_code,
21
+ "translator": language_code
22
+ }
23
+ return language_dict
24
 
25
+ def transcribe(audio_file, language, device, chunk_length_s=30, stride_length_s=5):
26
+ """
27
+ Transcribe audio file using Whisper model.
28
+
29
+ Args:
30
+ audio_file (str): Path to audio file
31
+ language (str): Language code for transcription
32
+ device (str): Device to use for inference ('cuda' or 'cpu')
33
+ chunk_length_s (int): Length of audio chunks in seconds
34
+ stride_length_s (int): Stride length between chunks in seconds
35
+ """
36
  output_folder = "transcriptions"
37
+ if not os.path.exists(output_folder):
38
+ os.makedirs(output_folder)
39
+
40
+ # Get output filename
41
+ audio_filename = os.path.basename(audio_file)
42
+ filename_without_ext = os.path.splitext(audio_filename)[0]
43
+ output_file = os.path.join(output_folder, f"{filename_without_ext}.srt")
44
+
45
+ torch_dtype = torch.float16 if torch.cuda.is_available() else torch.float32
46
+
47
+ # Load model and processor
48
+ model_id = "openai/whisper-large-v3-turbo"
49
+ model = AutoModelForSpeechSeq2Seq.from_pretrained(
50
+ model_id,
51
+ torch_dtype=torch_dtype,
52
+ low_cpu_mem_usage=True,
53
+ use_safetensors=True
54
+ )
55
+ model.to(device)
56
+
57
+ processor = AutoProcessor.from_pretrained(model_id)
58
+
59
+ # Create pipeline with timestamp generation
60
+ pipe = pipeline(
61
+ "automatic-speech-recognition",
62
+ model=model,
63
+ tokenizer=processor.tokenizer,
64
+ feature_extractor=processor.feature_extractor,
65
+ torch_dtype=torch_dtype,
66
+ device=device,
67
+ chunk_length_s=chunk_length_s,
68
+ stride_length_s=stride_length_s,
69
+ return_timestamps=True
70
+ )
71
+
72
+ # Transcribe with timestamps and generate attention mask
73
+ result = pipe(
74
+ audio_file,
75
+ return_timestamps=True,
76
+ generate_kwargs={
77
+ "language": language,
78
+ "task": "transcribe",
79
+ "use_cache": True,
80
+ "num_beams": 1
81
+ }
82
+ )
83
 
84
+ print(result)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
85
 
86
  if __name__ == "__main__":
87
  parser = argparse.ArgumentParser(description='Transcribe audio files')
 
102
  _, input_name = input_file.split('/')
103
  extension = "mp3"
104
  file = f'{chunks_folder}/{input_name}.{extension}'
105
+ language_dict = get_language_dict()
106
  transcribe(file, language_dict[args.language]["transcriber"], args.num_speakers, args.device)
107
+ progress_bar.update(1)
ui_config.py ADDED
@@ -0,0 +1,57 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Definición de colores
2
+ BACKGROUND_COLOR = "#0b0f19"
3
+ BUTTON_COLOR = "#47515f"
4
+ SVG_COLOR = "#f3f4f6"
5
+ PANEL_COLOR = "#101827"
6
+ PRIMARY_TEXT_COLOR = "#f3f4f6"
7
+ SUBDUED_TEXT_COLOR = "#59616f"
8
+ BACKGROUND_PRIMARY_COLOR = "#1f2937"
9
+ BACKGROUND_SECONDARY_COLOR = "#101827"
10
+ PRIMARY_BODER_COLOR = "#323c4c"
11
+ BLOCK_TITLE_TEXT_COLOR = "#dfe2e6"
12
+ INPUT_BACKGROUND_COLOR = "#2f3947"
13
+ INPUT_BORDER_COLOR = "#313b4b"
14
+ INPUT_PLACEHOLDER_COLOR = "#616977"
15
+ ERROR_BACKGROUND_COLOR = "#101827"
16
+ ERROR_TEXT_COLOR = "#f7f2f2"
17
+ ERROR_BORDER_COLOR = "#9b3339"
18
+ BUTTON_SECONDARY_BACKGROUND_COLOR = "#434d5c"
19
+ BUTTON_SECONDARY_BORDER_COLOR = "#444d5b"
20
+ BUTTON_SECONDARY_TEXT_COLOR = "#c5c9cc"
21
+ RED = "#ff0000"
22
+ GREEN = "#00ff00"
23
+ BLUE = "#0000ff"
24
+
25
+ # HTML para redes sociales
26
+ html_social_media = f'''
27
+ <div style="float: right;">
28
+ <a href="https://maximofn.com/" rel="noopener noreferrer" aria-disabled="false" class="sm secondary svelte-cmf5ev" id="component-1" style="flex-grow: 100;" target="_blank">
29
+ <svg xmlns="http://www.w3.org/2000/svg" height="1em" viewBox="0 0 576 512">
30
+ <style>
31
+ svg {"{"}
32
+ fill: {SVG_COLOR}
33
+ {"}"}
34
+ </style>
35
+ <path d="M208 80c0-26.5 21.5-48 48-48h64c26.5 0 48 21.5 48 48v64c0 26.5-21.5 48-48 48h-8v40H464c30.9 0 56 25.1 56 56v32h8c26.5 0 48 21.5 48 48v64c0 26.5-21.5 48-48 48H464c-26.5 0-48-21.5-48-48V368c0-26.5 21.5-48 48-48h8V288c0-4.4-3.6-8-8-8H312v40h8c26.5 0 48 21.5 48 48v64c0 26.5-21.5 48-48 48H256c-26.5 0-48-21.5-48-48V368c0-26.5 21.5-48 48-48h8V280H112c-4.4 0-8 3.6-8 8v32h8c26.5 0 48 21.5 48 48v64c0 26.5-21.5 48-48 48H48c-26.5 0-48-21.5-48-48V368c0-26.5 21.5-48 48-48h8V288c0-30.9 25.1-56 56-56H264V192h-8c-26.5 0-48-21.5-48-48V80z"/>
36
+ </svg>
37
+ </a>
38
+ <!-- Resto de los enlaces de redes sociales... -->
39
+ </div>
40
+ '''
41
+
42
+ # HTML para el logo
43
+ def get_html_subtify_logo(new_width, new_height):
44
+ return f"""
45
+ <div style="display: flex; justify-content: center; align-items: center;">
46
+ <img src='https://pub-fb664c455eca46a2ba762a065ac900f7.r2.dev/subtify_logo-scaled.webp' width={new_width}px height={new_height}px >
47
+ </div>
48
+ """
49
+
50
+ # HTML para el botón de Buy Me a Coffee
51
+ html_buy_me_a_coffe = '''
52
+ <div style="float: right;">
53
+ <a href="https://www.buymeacoffee.com/maximofn" target="_blank">
54
+ <img src="https://img.shields.io/badge/Buy_Me_A_Coffee-support_my_work-FFDD00?style=for-the-badge&logo=buy-me-a-coffee&logoColor=white&labelColor=101010" alt="buy me a coffe">
55
+ </a>
56
+ </div>
57
+ '''
url_manager.py ADDED
@@ -0,0 +1,87 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import re
2
+ import gradio as gr
3
+ from pytube import YouTube
4
+ from lang_list import union_language_dict
5
+
6
+ language_dict = union_language_dict()
7
+
8
+ def get_youtube_thumbnail(url):
9
+ yt = YouTube(url)
10
+ thumbnail_url = yt.thumbnail_url
11
+ return thumbnail_url
12
+
13
+ def is_valid_youtube_url(url):
14
+ # This regular expression should match the following YouTube URL formats:
15
+ # - https://youtube.com/watch?v=video_id
16
+ # - https://www.youtube.com/watch?v=video_id
17
+ # - https://youtu.be/video_id
18
+ patron_youtube = r'(https?://)?(www\.)?(youtube\.com/watch\?v=|youtu\.be/)[\w-]+'
19
+ return bool(re.match(patron_youtube, url))
20
+
21
+ def is_valid_twitch_url(url):
22
+ # This regular expression should match the following Twitch URL formats:
23
+ # - https://twitch.tv/channel_name
24
+ # - https://www.twitch.tv/channel_name
25
+ # - https://twitch.tv/videos/video_id
26
+ twitch_pattern = r'(https?://)?(www\.)?twitch\.tv/(videos/\d+|\w+)'
27
+ return bool(re.match(twitch_pattern, url))
28
+
29
+ def is_valid_url(url):
30
+ num_speaker = [10, 9, 8, 7, 6, 5, 4, 3, 2, 1]
31
+ source_languaje = gr.Dropdown(visible=True, label="Source languaje", show_label=True, value="English", choices=language_dict, scale=1, interactive=True)
32
+ target_languaje = gr.Dropdown(visible=True, label="Target languaje", show_label=True, value="Español", choices=language_dict, scale=1, interactive=True)
33
+ advanced_setings = gr.Accordion(visible=True)
34
+ number_of_speakers = gr.Dropdown(visible=True, label="Number of speakers", show_label=True, value=10, choices=num_speaker, scale=1, interactive=True)
35
+ subtify_button = gr.Button(size="lg", value="subtify", min_width="10px", scale=0, visible=True)
36
+
37
+ # Youtube
38
+ if "youtube" in url.lower() or "youtu.be" in url.lower():
39
+ if is_valid_youtube_url(url):
40
+ thumbnail = get_youtube_thumbnail(url)
41
+ if thumbnail:
42
+ return (
43
+ gr.Image(value=thumbnail, visible=True, show_download_button=False, container=False),
44
+ source_languaje,
45
+ target_languaje,
46
+ advanced_setings,
47
+ number_of_speakers,
48
+ subtify_button,
49
+ )
50
+ else:
51
+ return (
52
+ gr.Image(value="assets/youtube-no-thumbnails.webp", visible=True, show_download_button=False, container=False),
53
+ source_languaje,
54
+ target_languaje,
55
+ advanced_setings,
56
+ number_of_speakers,
57
+ subtify_button,
58
+ )
59
+
60
+ # Twitch
61
+ elif "twitch" in url.lower() or "twitch.tv" in url.lower():
62
+ if is_valid_twitch_url(url):
63
+ return (
64
+ gr.Image(value="assets/twitch.webp", visible=True, show_download_button=False, container=False),
65
+ source_languaje,
66
+ target_languaje,
67
+ advanced_setings,
68
+ number_of_speakers,
69
+ subtify_button,
70
+ )
71
+
72
+ # Error
73
+ visible = False
74
+ image = gr.Image(value="assets/youtube_error.webp", visible=visible, show_download_button=False, container=False)
75
+ source_languaje = gr.Dropdown(visible=visible, label="Source languaje", show_label=True, value="English", choices=language_dict, scale=1, interactive=True)
76
+ target_languaje = gr.Dropdown(visible=visible, label="Target languaje", show_label=True, value="Español", choices=language_dict, scale=1, interactive=True)
77
+ advanced_setings = gr.Accordion(visible=visible)
78
+ number_of_speakers = gr.Dropdown(visible=visible, label="Number of speakers", show_label=True, value=10, choices=num_speaker, scale=1, interactive=True)
79
+ subtify_button = gr.Button(size="lg", value="subtify", min_width="10px", scale=0, visible=visible)
80
+ return (
81
+ image,
82
+ source_languaje,
83
+ target_languaje,
84
+ advanced_setings,
85
+ number_of_speakers,
86
+ subtify_button,
87
+ )