ChiBenevisamPas commited on
Commit
c494b01
·
verified ·
1 Parent(s): 536f4b9

Delete app.py

Browse files
Files changed (1) hide show
  1. app.py +0 -190
app.py DELETED
@@ -1,190 +0,0 @@
1
- import gradio as gr
2
- import whisper
3
- import os
4
- from transformers import M2M100ForConditionalGeneration, M2M100Tokenizer
5
- from docx import Document # For Word output
6
- from fpdf import FPDF # For PDF output
7
- from pptx import Presentation # For PowerPoint output
8
- import subprocess # To use ffmpeg for embedding subtitles
9
- import shlex # For better command-line argument handling
10
-
11
- # Load the Whisper model
12
- model = whisper.load_model("tiny") # Smaller model for faster transcription
13
-
14
- # Load M2M100 translation model for different languages
15
- def load_translation_model(target_language):
16
- lang_codes = {
17
- "fa": "fa", # Persian (Farsi)
18
- "es": "es", # Spanish
19
- "fr": "fr", # French
20
- }
21
- target_lang_code = lang_codes.get(target_language)
22
- if not target_lang_code:
23
- raise ValueError(f"Translation model for {target_language} not supported")
24
-
25
- # Load M2M100 model and tokenizer
26
- tokenizer = M2M100Tokenizer.from_pretrained("facebook/m2m100_418M")
27
- translation_model = M2M100ForConditionalGeneration.from_pretrained("facebook/m2m100_418M")
28
-
29
- tokenizer.src_lang = "en"
30
- tokenizer.tgt_lang = target_lang_code
31
-
32
- return tokenizer, translation_model
33
-
34
- def translate_text(text, tokenizer, model):
35
- try:
36
- inputs = tokenizer(text, return_tensors="pt", padding=True, truncation=True)
37
- translated = model.generate(**inputs, forced_bos_token_id=tokenizer.get_lang_id(tokenizer.tgt_lang))
38
- return tokenizer.decode(translated[0], skip_special_tokens=True)
39
- except Exception as e:
40
- raise RuntimeError(f"Error during translation: {e}")
41
-
42
- # Helper function to format timestamps in SRT format (hh:mm:ss,ms)
43
- def format_timestamp(seconds):
44
- milliseconds = int((seconds % 1) * 1000)
45
- seconds = int(seconds)
46
- hours = seconds // 3600
47
- minutes = (seconds % 3600) // 60
48
- seconds = seconds % 60
49
- return f"{hours:02}:{minutes:02}:{seconds:02},{milliseconds:03}"
50
-
51
- # Corrected write_srt function
52
- def write_srt(transcription, output_file, tokenizer=None, translation_model=None):
53
- with open(output_file, "w") as f:
54
- for i, segment in enumerate(transcription['segments']):
55
- start = segment['start']
56
- end = segment['end']
57
- text = segment['text']
58
-
59
- if translation_model:
60
- text = translate_text(text, tokenizer, translation_model)
61
-
62
- start_time = format_timestamp(start)
63
- end_time = format_timestamp(end)
64
-
65
- f.write(f"{i + 1}\n")
66
- f.write(f"{start_time} --> {end_time}\n")
67
- f.write(f"{text.strip()}\n\n")
68
-
69
- def embed_hardsub_in_video(video_file, srt_file, output_video):
70
- """Uses ffmpeg to burn subtitles into the video (hardsub)."""
71
- command = f'ffmpeg -i "{video_file}" -vf "subtitles=\'{srt_file}\'" -c:v libx264 -crf 23 -preset medium "{output_video}"'
72
-
73
- try:
74
- print(f"Running command: {command}") # Debug statement
75
- process = subprocess.run(shlex.split(command), capture_output=True, text=True, timeout=300)
76
- print(f"ffmpeg output: {process.stdout}") # Debug statement
77
- if process.returncode != 0:
78
- raise RuntimeError(f"ffmpeg error: {process.stderr}") # Print the error
79
- except subprocess.TimeoutExpired:
80
- raise RuntimeError("ffmpeg process timed out.")
81
- except Exception as e:
82
- raise RuntimeError(f"Error running ffmpeg: {e}")
83
-
84
- def write_word(transcription, output_file, tokenizer=None, translation_model=None):
85
- """Creates a Word document from the transcription without timestamps."""
86
- doc = Document()
87
- for i, segment in enumerate(transcription['segments']):
88
- text = segment['text']
89
-
90
- if translation_model:
91
- text = translate_text(text, tokenizer, translation_model)
92
-
93
- doc.add_paragraph(f"{i + 1}. {text.strip()}")
94
- doc.save(output_file)
95
-
96
- from fpdf import FPDF # This imports fpdf2, not the older FPDF
97
-
98
- def write_pdf(transcription, output_file, tokenizer=None, translation_model=None):
99
- """Creates a PDF document from the transcription without timestamps."""
100
- pdf = FPDF()
101
- pdf.set_auto_page_break(auto=True, margin=15)
102
- pdf.add_page()
103
-
104
- # Use a Unicode font like DejaVuSans that supports a wide range of characters
105
- pdf.add_font('DejaVu', '', '/path/to/DejaVuSans.ttf', uni=True)
106
- pdf.set_font("DejaVu", size=12)
107
-
108
- for i, segment in enumerate(transcription['segments']):
109
- text = segment['text']
110
-
111
- if translation_model:
112
- text = translate_text(text, tokenizer, translation_model)
113
-
114
- pdf.multi_cell(0, 10, f"{i + 1}. {text.strip()}")
115
-
116
- pdf.output(output_file)
117
-
118
- def write_ppt(transcription, output_file, tokenizer=None, translation_model=None):
119
- """Creates a PowerPoint presentation from the transcription without timestamps."""
120
- ppt = Presentation()
121
-
122
- for i, segment in enumerate(transcription['segments']):
123
- text = segment['text']
124
-
125
- if translation_model:
126
- text = translate_text(text, tokenizer, translation_model)
127
-
128
- slide = ppt.slides.add_slide(ppt.slide_layouts[5]) # Blank slide
129
- title = slide.shapes.title
130
- title.text = f"{i + 1}. {text.strip()}"
131
-
132
- ppt.save(output_file)
133
-
134
- def transcribe_video(video_file, language, target_language, output_format):
135
- # Transcribe the video with Whisper
136
- result = model.transcribe(video_file.name, language=language)
137
- video_name = os.path.splitext(video_file.name)[0]
138
-
139
- # Load the translation model for the selected subtitle language
140
- if target_language != "en":
141
- try:
142
- tokenizer, translation_model = load_translation_model(target_language)
143
- except Exception as e:
144
- raise RuntimeError(f"Error loading translation model: {e}")
145
- else:
146
- tokenizer, translation_model = None, None
147
-
148
- # Save the SRT file
149
- srt_file = f"{video_name}.srt"
150
- write_srt(result, srt_file, tokenizer, translation_model)
151
-
152
- # Output based on user's selection
153
- if output_format == "SRT":
154
- return srt_file
155
- elif output_format == "Video with Hardsub":
156
- output_video = f"{video_name}_with_subtitles.mp4"
157
- try:
158
- embed_hardsub_in_video(video_file.name, srt_file, output_video)
159
- return output_video
160
- except Exception as e:
161
- raise RuntimeError(f"Error embedding subtitles in video: {e}")
162
- elif output_format == "Word":
163
- word_file = f"{video_name}.docx"
164
- write_word(result, word_file, tokenizer, translation_model)
165
- return word_file
166
- elif output_format == "PDF":
167
- pdf_file = f"{video_name}.pdf"
168
- write_pdf(result, pdf_file, tokenizer, translation_model)
169
- return pdf_file
170
- elif output_format == "PowerPoint":
171
- ppt_file = f"{video_name}.pptx"
172
- write_ppt(result, ppt_file, tokenizer, translation_model)
173
- return ppt_file
174
-
175
- # Gradio interface
176
- iface = gr.Interface(
177
- fn=transcribe_video,
178
- inputs=[
179
- gr.File(label="Upload Video"),
180
- gr.Dropdown(label="Select Video Language", choices=["en", "es", "fr", "de", "it", "pt"], value="en"),
181
- gr.Dropdown(label="Select Subtitle Language", choices=["en", "fa", "es", "fr"], value="fa"),
182
- gr.Radio(label="Output Format", choices=["SRT", "Video with Hardsub", "Word", "PDF", "PowerPoint"], value="Video with Hardsub")
183
- ],
184
- outputs=gr.File(label="Download Subtitles, Video, or Document"),
185
- title="Video Subtitle Generator with Hardsub and Document Formats",
186
- description="Upload a video file to generate subtitles in SRT format, download the video with hardsubbed subtitles, or generate Word, PDF, or PowerPoint documents using Whisper and M2M100 for translation."
187
- )
188
-
189
- if __name__ == "__main__":
190
- iface.launch()