Spaces:

visionaries666
/

DAI_Project

Sleeping

App Files Files Community

ChiBenevisamPas commited on Oct 15, 2024

Commit

c494b01

verified ·

1 Parent(s): 536f4b9

Delete app.py

Browse files

Files changed (1) hide show

app.py +0 -190

app.py DELETED Viewed

@@ -1,190 +0,0 @@
-import gradio as gr
-import whisper
-import os
-from transformers import M2M100ForConditionalGeneration, M2M100Tokenizer
-from docx import Document  # For Word output
-from fpdf import FPDF  # For PDF output
-from pptx import Presentation  # For PowerPoint output
-import subprocess  # To use ffmpeg for embedding subtitles
-import shlex  # For better command-line argument handling
-# Load the Whisper model
-model = whisper.load_model("tiny")  # Smaller model for faster transcription
-# Load M2M100 translation model for different languages
-def load_translation_model(target_language):
-    lang_codes = {
-        "fa": "fa",  # Persian (Farsi)
-        "es": "es",  # Spanish
-        "fr": "fr",  # French
-    }
-    target_lang_code = lang_codes.get(target_language)
-    if not target_lang_code:
-        raise ValueError(f"Translation model for {target_language} not supported")
-    # Load M2M100 model and tokenizer
-    tokenizer = M2M100Tokenizer.from_pretrained("facebook/m2m100_418M")
-    translation_model = M2M100ForConditionalGeneration.from_pretrained("facebook/m2m100_418M")
-    tokenizer.src_lang = "en"
-    tokenizer.tgt_lang = target_lang_code
-    return tokenizer, translation_model
-def translate_text(text, tokenizer, model):
-    try:
-        inputs = tokenizer(text, return_tensors="pt", padding=True, truncation=True)
-        translated = model.generate(**inputs, forced_bos_token_id=tokenizer.get_lang_id(tokenizer.tgt_lang))
-        return tokenizer.decode(translated[0], skip_special_tokens=True)
-    except Exception as e:
-        raise RuntimeError(f"Error during translation: {e}")
-# Helper function to format timestamps in SRT format (hh:mm:ss,ms)
-def format_timestamp(seconds):
-    milliseconds = int((seconds % 1) * 1000)
-    seconds = int(seconds)
-    hours = seconds // 3600
-    minutes = (seconds % 3600) // 60
-    seconds = seconds % 60
-    return f"{hours:02}:{minutes:02}:{seconds:02},{milliseconds:03}"
-# Corrected write_srt function
-def write_srt(transcription, output_file, tokenizer=None, translation_model=None):
-    with open(output_file, "w") as f:
-        for i, segment in enumerate(transcription['segments']):
-            start = segment['start']
-            end = segment['end']
-            text = segment['text']
-            if translation_model:
-                text = translate_text(text, tokenizer, translation_model)
-            start_time = format_timestamp(start)
-            end_time = format_timestamp(end)
-            f.write(f"{i + 1}\n")
-            f.write(f"{start_time} --> {end_time}\n")
-            f.write(f"{text.strip()}\n\n")
-def embed_hardsub_in_video(video_file, srt_file, output_video):
-    """Uses ffmpeg to burn subtitles into the video (hardsub)."""
-    command = f'ffmpeg -i "{video_file}" -vf "subtitles=\'{srt_file}\'" -c:v libx264 -crf 23 -preset medium "{output_video}"'
-    try:
-        print(f"Running command: {command}")  # Debug statement
-        process = subprocess.run(shlex.split(command), capture_output=True, text=True, timeout=300)
-        print(f"ffmpeg output: {process.stdout}")  # Debug statement
-        if process.returncode != 0:
-            raise RuntimeError(f"ffmpeg error: {process.stderr}")  # Print the error
-    except subprocess.TimeoutExpired:
-        raise RuntimeError("ffmpeg process timed out.")
-    except Exception as e:
-        raise RuntimeError(f"Error running ffmpeg: {e}")
-def write_word(transcription, output_file, tokenizer=None, translation_model=None):
-    """Creates a Word document from the transcription without timestamps."""
-    doc = Document()
-    for i, segment in enumerate(transcription['segments']):
-        text = segment['text']
-        if translation_model:
-            text = translate_text(text, tokenizer, translation_model)
-        doc.add_paragraph(f"{i + 1}. {text.strip()}")
-    doc.save(output_file)
-from fpdf import FPDF  # This imports fpdf2, not the older FPDF
-def write_pdf(transcription, output_file, tokenizer=None, translation_model=None):
-    """Creates a PDF document from the transcription without timestamps."""
-    pdf = FPDF()
-    pdf.set_auto_page_break(auto=True, margin=15)
-    pdf.add_page()
-    # Use a Unicode font like DejaVuSans that supports a wide range of characters
-    pdf.add_font('DejaVu', '', '/path/to/DejaVuSans.ttf', uni=True)
-    pdf.set_font("DejaVu", size=12)
-    for i, segment in enumerate(transcription['segments']):
-        text = segment['text']
-        if translation_model:
-            text = translate_text(text, tokenizer, translation_model)
-        pdf.multi_cell(0, 10, f"{i + 1}. {text.strip()}")
-    pdf.output(output_file)
-def write_ppt(transcription, output_file, tokenizer=None, translation_model=None):
-    """Creates a PowerPoint presentation from the transcription without timestamps."""
-    ppt = Presentation()
-    for i, segment in enumerate(transcription['segments']):
-        text = segment['text']
-        if translation_model:
-            text = translate_text(text, tokenizer, translation_model)
-        slide = ppt.slides.add_slide(ppt.slide_layouts[5])  # Blank slide
-        title = slide.shapes.title
-        title.text = f"{i + 1}. {text.strip()}"
-    ppt.save(output_file)
-def transcribe_video(video_file, language, target_language, output_format):
-    # Transcribe the video with Whisper
-    result = model.transcribe(video_file.name, language=language)
-    video_name = os.path.splitext(video_file.name)[0]
-    # Load the translation model for the selected subtitle language
-    if target_language != "en":
-        try:
-            tokenizer, translation_model = load_translation_model(target_language)
-        except Exception as e:
-            raise RuntimeError(f"Error loading translation model: {e}")
-    else:
-        tokenizer, translation_model = None, None
-    # Save the SRT file
-    srt_file = f"{video_name}.srt"
-    write_srt(result, srt_file, tokenizer, translation_model)
-    # Output based on user's selection
-    if output_format == "SRT":
-        return srt_file
-    elif output_format == "Video with Hardsub":
-        output_video = f"{video_name}_with_subtitles.mp4"
-        try:
-            embed_hardsub_in_video(video_file.name, srt_file, output_video)
-            return output_video
-        except Exception as e:
-            raise RuntimeError(f"Error embedding subtitles in video: {e}")
-    elif output_format == "Word":
-        word_file = f"{video_name}.docx"
-        write_word(result, word_file, tokenizer, translation_model)
-        return word_file
-    elif output_format == "PDF":
-        pdf_file = f"{video_name}.pdf"
-        write_pdf(result, pdf_file, tokenizer, translation_model)
-        return pdf_file
-    elif output_format == "PowerPoint":
-        ppt_file = f"{video_name}.pptx"
-        write_ppt(result, ppt_file, tokenizer, translation_model)
-        return ppt_file
-# Gradio interface
-iface = gr.Interface(
-    fn=transcribe_video,
-    inputs=[
-        gr.File(label="Upload Video"),
-        gr.Dropdown(label="Select Video Language", choices=["en", "es", "fr", "de", "it", "pt"], value="en"),
-        gr.Dropdown(label="Select Subtitle Language", choices=["en", "fa", "es", "fr"], value="fa"),
-        gr.Radio(label="Output Format", choices=["SRT", "Video with Hardsub", "Word", "PDF", "PowerPoint"], value="Video with Hardsub")
-    ],
-    outputs=gr.File(label="Download Subtitles, Video, or Document"),
-    title="Video Subtitle Generator with Hardsub and Document Formats",
-    description="Upload a video file to generate subtitles in SRT format, download the video with hardsubbed subtitles, or generate Word, PDF, or PowerPoint documents using Whisper and M2M100 for translation."
-)
-if __name__ == "__main__":
-    iface.launch()