File size: 3,859 Bytes
9e156fa
4f0841f
 
 
9e156fa
 
dbc58d4
9e156fa
4f0841f
 
 
 
 
 
 
 
 
9e156fa
4f0841f
8c56203
4f0841f
c18dcee
4f0841f
c18dcee
 
4f0841f
1aee38e
4f0841f
 
c18dcee
 
 
 
4f0841f
 
 
 
 
 
 
 
c18dcee
4f0841f
 
 
 
 
 
 
 
 
 
 
 
c18dcee
 
 
 
4f0841f
 
c18dcee
 
 
4f0841f
 
 
9e156fa
4f0841f
 
 
 
c98ea09
4f0841f
 
 
c18dcee
4f0841f
 
c18dcee
4f0841f
 
 
9e156fa
4f0841f
 
c18dcee
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
import gradio as gr
from faster_whisper import WhisperModel
import logging
from transformers import MarianMTModel, MarianTokenizer
import pandas as pd
import requests
import ffmpeg

# Configure logging for debugging purposes
logging.basicConfig()
logging.getLogger("faster_whisper").setLevel(logging.DEBUG)

# Fetch and parse language options from the provided URL
url = "https://huggingface.co/Lenylvt/LanguageISO/resolve/main/iso.md"
df = pd.read_csv(url, delimiter="|", skiprows=2, header=None).dropna(axis=1, how='all')
df.columns = ['ISO 639-1', 'ISO 639-2', 'Language Name', 'Native Name']
df['ISO 639-1'] = df['ISO 639-1'].str.strip()

# Prepare language options for the dropdown
language_options = [(row['ISO 639-1'], f"{row['ISO 639-1']}") for index, row in df.iterrows()]

def transcribe_and_optionally_translate(audio_file, source_language, target_language, model_size, change_transcript):
    # Transcription
    device = "cpu"  # Use "cuda" for GPU
    compute_type = "int8"  # Use "float16" or "int8" for GPU, "int8" for CPU
    model = WhisperModel(model_size, device=device, compute_type=compute_type)
    segments, _ = model.transcribe(audio_file)
    transcription = " ".join([segment.text for segment in segments])
    
    if change_transcript:
        # Assume user will modify the transcript manually before translation
        return transcription, True
    
    # Translation
    if source_language != target_language:
        model_name = f"Helsinki-NLP/opus-mt-{source_language}-{target_language}"
        tokenizer = MarianTokenizer.from_pretrained(model_name)
        model = MarianMTModel.from_pretrained(model_name)
        translated = model.generate(**tokenizer(transcription, return_tensors="pt", padding=True, truncation=True, max_length=512))
        transcription = tokenizer.decode(translated[0], skip_special_tokens=True)
    
    return transcription, False

def add_hard_subtitle_to_video(input_video, transcript):
    """Add hard subtitles to video."""
    temp_subtitle_path = '/tmp/subtitle.srt'
    with open(temp_subtitle_path, 'w', encoding='utf-8') as file:
        file.write(transcript)  # Assuming transcript is in SRT format
    
    output_video_path = f"/tmp/output_video.mp4"
    ffmpeg.input(input_video).output(output_video_path, vf=f"subtitles={temp_subtitle_path}").run(quiet=True)
    
    return output_video_path

def process_video(video, source_language, target_language, model_size='base', change_transcript=False, modified_transcript=None):
    audio_file = video  # Directly use the video file as the audio input
    
    transcript, can_modify = transcribe_and_optionally_translate(audio_file, source_language, target_language, model_size, change_transcript)
    
    if can_modify and modified_transcript:
        # Use the modified transcript for translation if allowed and provided
        transcript = modified_transcript
        # Perform translation here if necessary (similar to the previous step)
    
    output_video = add_hard_subtitle_to_video(video, transcript)
    return output_video

# Setup the Gradio app
app = gr.Interface(
    fn=process_video,
    inputs=[
        gr.Video(label="Upload Video"),
        gr.Dropdown(choices=language_options, label="Source Language"),
        gr.Dropdown(choices=language_options, label="Target Language"),
        gr.Dropdown(choices=["base", "small", "medium", "large", "large-v2", "large-v3"], label="Model Size"),
        gr.Checkbox(label="Change Transcript before Translation?", value=False),
        gr.TextArea(label="Modified Transcript (if allowed)")
    ],
    outputs=gr.Text(label="Transcript"),
    title="Video Transcription and Translation Tool",
    description="Transcribe or translate your video content. Optionally, edit the transcription before adding hard subtitles."
)

if __name__ == "__main__":
    app.launch()