YetNak commited on
Commit
50bd4c1
·
verified ·
1 Parent(s): 519e7bb

Delete app.py

Browse files
Files changed (1) hide show
  1. app.py +0 -226
app.py DELETED
@@ -1,226 +0,0 @@
1
- import os
2
- import uuid
3
- import asyncio
4
- import subprocess
5
- import json
6
- from zipfile import ZipFile
7
- import stat
8
- import gradio as gr
9
- import ffmpeg
10
- import cv2
11
- import edge_tts
12
- from googletrans import Translator
13
- from huggingface_hub import HfApi
14
- import moviepy.editor as mp
15
- import spaces
16
-
17
- # Constants and initialization
18
- HF_TOKEN = os.environ.get("HF_TOKEN")
19
- REPO_ID = "artificialguybr/video-dubbing"
20
- MAX_VIDEO_DURATION = 180 # seconds
21
-
22
- api = HfApi(token=HF_TOKEN)
23
-
24
- # Extract and set permissions for ffmpeg
25
- ZipFile("ffmpeg.zip").extractall()
26
- st = os.stat('ffmpeg')
27
- os.chmod('ffmpeg', st.st_mode | stat.S_IEXEC)
28
-
29
- language_mapping = {
30
- 'English': ('en', 'en-US-EricNeural'),
31
- 'Spanish': ('es', 'es-ES-AlvaroNeural'),
32
- 'French': ('fr', 'fr-FR-HenriNeural'),
33
- 'German': ('de', 'de-DE-ConradNeural'),
34
- 'Italian': ('it', 'it-IT-DiegoNeural'),
35
- 'Portuguese': ('pt', 'pt-PT-DuarteNeural'),
36
- 'Polish': ('pl', 'pl-PL-MarekNeural'),
37
- 'Turkish': ('tr', 'tr-TR-AhmetNeural'),
38
- 'Russian': ('ru', 'ru-RU-DmitryNeural'),
39
- 'Dutch': ('nl', 'nl-NL-MaartenNeural'),
40
- 'Czech': ('cs', 'cs-CZ-AntoninNeural'),
41
- 'Arabic': ('ar', 'ar-SA-HamedNeural'),
42
- 'Chinese (Simplified)': ('zh-CN', 'zh-CN-YunxiNeural'),
43
- 'Japanese': ('ja', 'ja-JP-KeitaNeural'),
44
- 'Korean': ('ko', 'ko-KR-InJoonNeural'),
45
- 'Hindi': ('hi', 'hi-IN-MadhurNeural'),
46
- 'Swedish': ('sv', 'sv-SE-MattiasNeural'),
47
- 'Danish': ('da', 'da-DK-JeppeNeural'),
48
- 'Finnish': ('fi', 'fi-FI-HarriNeural'),
49
- 'Greek': ('el', 'el-GR-NestorasNeural')
50
- }
51
-
52
- print("Starting the program...")
53
-
54
- def generate_unique_filename(extension):
55
- return f"{uuid.uuid4()}{extension}"
56
-
57
- def cleanup_files(*files):
58
- for file in files:
59
- if file and os.path.exists(file):
60
- os.remove(file)
61
- print(f"Removed file: {file}")
62
-
63
- @spaces.GPU(duration=90)
64
- def transcribe_audio(file_path):
65
- print(f"Starting transcription of file: {file_path}")
66
- temp_audio = None
67
-
68
- if file_path.endswith(('.mp4', '.avi', '.mov', '.flv')):
69
- print("Video file detected. Extracting audio...")
70
- try:
71
- video = mp.VideoFileClip(file_path)
72
- temp_audio = generate_unique_filename(".wav")
73
- video.audio.write_audiofile(temp_audio)
74
- file_path = temp_audio
75
- except Exception as e:
76
- print(f"Error extracting audio from video: {e}")
77
- raise
78
-
79
- output_file = generate_unique_filename(".json")
80
- command = [
81
- "insanely-fast-whisper",
82
- "--file-name", file_path,
83
- "--device-id", "0",
84
- "--model-name", "openai/whisper-large-v3",
85
- "--task", "transcribe",
86
- "--timestamp", "chunk",
87
- "--transcript-path", output_file
88
- ]
89
-
90
- try:
91
- result = subprocess.run(command, check=True, capture_output=True, text=True)
92
- print(f"Transcription output: {result.stdout}")
93
- except subprocess.CalledProcessError as e:
94
- print(f"Error running insanely-fast-whisper: {e}")
95
- raise
96
-
97
- try:
98
- with open(output_file, "r") as f:
99
- transcription = json.load(f)
100
- except json.JSONDecodeError as e:
101
- print(f"Error decoding JSON: {e}")
102
- raise
103
-
104
- result = transcription.get("text", " ".join([chunk["text"] for chunk in transcription.get("chunks", [])]))
105
-
106
- cleanup_files(output_file, temp_audio)
107
-
108
- return result
109
-
110
- async def text_to_speech(text, voice, output_file):
111
- communicate = edge_tts.Communicate(text, voice)
112
- await communicate.save(output_file)
113
-
114
- @spaces.GPU
115
- def process_video(video, target_language, use_wav2lip):
116
- try:
117
- if target_language is None:
118
- raise ValueError("Please select a Target Language for Dubbing.")
119
-
120
- run_uuid = uuid.uuid4().hex[:6]
121
- output_filename = f"{run_uuid}_resized_video.mp4"
122
- ffmpeg.input(video).output(output_filename, vf='scale=-2:720').run()
123
-
124
- video_path = output_filename
125
-
126
- if not os.path.exists(video_path):
127
- raise FileNotFoundError(f"Error: {video_path} does not exist.")
128
-
129
- video_info = ffmpeg.probe(video_path)
130
- video_duration = float(video_info['streams'][0]['duration'])
131
-
132
- if video_duration > MAX_VIDEO_DURATION:
133
- cleanup_files(video_path)
134
- raise ValueError(f"Video duration exceeds {MAX_VIDEO_DURATION} seconds. Please upload a shorter video.")
135
-
136
- ffmpeg.input(video_path).output(f"{run_uuid}_output_audio.wav", acodec='pcm_s24le', ar=48000, map='a').run()
137
-
138
- subprocess.run(f"ffmpeg -y -i {run_uuid}_output_audio.wav -af lowpass=3000,highpass=100 {run_uuid}_output_audio_final.wav", shell=True, check=True)
139
-
140
- whisper_text = transcribe_audio(f"{run_uuid}_output_audio_final.wav")
141
- print(f"Transcription successful: {whisper_text}")
142
-
143
- target_language_code, voice = language_mapping[target_language]
144
- translator = Translator()
145
- translated_text = translator.translate(whisper_text, dest=target_language_code).text
146
- print(f"Translated text: {translated_text}")
147
-
148
- asyncio.run(text_to_speech(translated_text, voice, f"{run_uuid}_output_synth.wav"))
149
-
150
- if use_wav2lip:
151
- try:
152
- subprocess.run(f"python Wav2Lip/inference.py --checkpoint_path 'Wav2Lip/checkpoints/wav2lip_gan.pth' --face '{video_path}' --audio '{run_uuid}_output_synth.wav' --pads 0 15 0 0 --resize_factor 1 --nosmooth --outfile '{run_uuid}_output_video.mp4'", shell=True, check=True)
153
- except subprocess.CalledProcessError as e:
154
- print(f"Wav2Lip error: {str(e)}")
155
- gr.Warning("Wav2lip encountered an error. Falling back to simple audio replacement.")
156
- subprocess.run(f"ffmpeg -i {video_path} -i {run_uuid}_output_synth.wav -c:v copy -c:a aac -strict experimental -map 0:v:0 -map 1:a:0 {run_uuid}_output_video.mp4", shell=True, check=True)
157
- else:
158
- subprocess.run(f"ffmpeg -i {video_path} -i {run_uuid}_output_synth.wav -c:v copy -c:a aac -strict experimental -map 0:v:0 -map 1:a:0 {run_uuid}_output_video.mp4", shell=True, check=True)
159
-
160
- output_video_path = f"{run_uuid}_output_video.mp4"
161
- if not os.path.exists(output_video_path):
162
- raise FileNotFoundError(f"Error: {output_video_path} was not generated.")
163
-
164
- cleanup_files(
165
- f"{run_uuid}_resized_video.mp4",
166
- f"{run_uuid}_output_audio.wav",
167
- f"{run_uuid}_output_audio_final.wav",
168
- f"{run_uuid}_output_synth.wav"
169
- )
170
-
171
- return output_video_path, ""
172
-
173
- except Exception as e:
174
- print(f"Error in process_video: {str(e)}")
175
- return None, f"Error: {str(e)}"
176
-
177
- # Gradio interface setup
178
- with gr.Blocks(theme=gr.themes.Soft()) as demo:
179
- gr.Markdown("# AI Video Dubbing")
180
- gr.Markdown("This tool uses AI to dub videos into different languages. Upload a video, choose a target language, and get a dubbed version!")
181
-
182
- with gr.Row():
183
- with gr.Column(scale=2):
184
- video_input = gr.Video(label="Upload Video")
185
- target_language = gr.Dropdown(
186
- choices=list(language_mapping.keys()),
187
- label="Target Language for Dubbing",
188
- value="Spanish"
189
- )
190
- use_wav2lip = gr.Checkbox(
191
- label="Use Wav2Lip for lip sync",
192
- value=False,
193
- info="Enable this if the video has close-up faces. May not work for all videos."
194
- )
195
- submit_button = gr.Button("Process Video", variant="primary")
196
-
197
- with gr.Column(scale=2):
198
- output_video = gr.Video(label="Processed Video")
199
- error_message = gr.Textbox(label="Status/Error Message")
200
-
201
- submit_button.click(
202
- process_video,
203
- inputs=[video_input, target_language, use_wav2lip],
204
- outputs=[output_video, error_message]
205
- )
206
-
207
- gr.Markdown("""
208
- ## Notes:
209
- - Video limit is 1 minute. The tool will dub all speakers using a single voice.
210
- - Processing may take up to 5 minutes.
211
- - This is an alpha version using open-source models.
212
- - Quality vs. speed trade-off was made for scalability and hardware limitations.
213
- - For videos longer than 1 minute, please duplicate this Space and adjust the limit in the code.
214
- """)
215
-
216
- gr.Markdown("""
217
- ---
218
- Developed by [@artificialguybr](https://twitter.com/artificialguybr) using open-source tools.
219
- Special thanks to Hugging Face for GPU support and [@yeswondwer](https://twitter.com/@yeswondwerr) for the original code.
220
-
221
- Try our [Video Transcription and Translation](https://huggingface.co/spaces/artificialguybr/VIDEO-TRANSLATION-TRANSCRIPTION) tool!
222
- """)
223
-
224
- print("Launching Gradio interface...")
225
- demo.queue()
226
- demo.launch()