Spaces:

Lenylvt
/

VideoSubtitleCreation-API

Sleeping

App Files Files Community

VideoSubtitleCreation-API / app.py

Lenylvt

Update app.py

8c56203 verified over 1 year ago

raw

history blame

4.28 kB

	import gradio as gr
	from faster_whisper import WhisperModel
	import logging
	import os
	from moviepy.editor import VideoFileClip
	import ffmpeg # Make sure to install ffmpeg-python
	from transformers import MarianMTModel, MarianTokenizer
	import pandas as pd
	import pysrt
	import requests

	# Configure logging for debugging purposes
	logging.basicConfig()
	logging.getLogger("faster_whisper").setLevel(logging.DEBUG)

	# Fetch and parse language options from the provided URL
	url = "https://huggingface.co/Lenylvt/LanguageISO/resolve/main/iso.md"
	df = pd.read_csv(url, delimiter="\|", skiprows=2, header=None).dropna(axis=1, how='all')
	df.columns = ['ISO 639-1', 'ISO 639-2', 'Language Name', 'Native Name']
	df['ISO 639-1'] = df['ISO 639-1'].str.strip()

	# Prepare language options for the dropdown
	language_options = [(row['ISO 639-1'], f"{row['ISO 639-1']}") for index, row in df.iterrows()]

	def format_timestamp(seconds):
	"""Convert seconds to HH:MM:SS.mmm format."""
	hours = int(seconds // 3600)
	minutes = int((seconds % 3600) // 60)
	seconds_remainder = seconds % 60
	return f"{hours:02d}:{minutes:02d}:{seconds_remainder:06.3f}"

	def extract_audio(video_path):
	"""Extract audio from video to a temporary audio file."""
	output_audio_path = '/tmp/audio.wav'
	ffmpeg.input(video_path).output(output_audio_path, acodec='pcm_s16le', ac=1, ar='16k').run(quiet=True)
	return output_audio_path

	def transcribe_and_optionally_translate(video_file, source_language, target_language, model_size, allow_modification):
	audio_file = extract_audio(video_file)

	# Transcription
	device = "cpu" # GPU : cuda CPU : cpu
	compute_type = "int8" # GPU : float16 or int8 - CPU : int8
	model = WhisperModel(model_size, device=device, compute_type=compute_type)
	segments, _ = model.transcribe(audio_file)
	transcription = " ".join([segment.text for segment in segments])

	# Translation
	if source_language != target_language:
	model_name = f"Helsinki-NLP/opus-mt-{source_language}-{target_language}"
	tokenizer = MarianTokenizer.from_pretrained(model_name)
	model = MarianMTModel.from_pretrained(model_name)
	translated = model.generate(**tokenizer(transcription, return_tensors="pt", padding=True, truncation=True, max_length=512))
	transcription = tokenizer.decode(translated[0], skip_special_tokens=True)

	return transcription, allow_modification

	def add_hard_subtitle_to_video(input_video, transcript):
	"""Add hard subtitles to video."""
	temp_subtitle_path = '/tmp/subtitle.srt'
	with open(temp_subtitle_path, 'w', encoding='utf-8') as file:
	file.write(transcript) # Assuming transcript is in SRT format

	output_video_path = f"/tmp/output_video.mp4"
	ffmpeg.input(input_video).output(output_video_path, vf=f"subtitles={temp_subtitle_path}").run(quiet=True)

	return output_video_path

	# Gradio Interface
	def process_video(video, source_language, target_language, model_size='base', allow_modification=False, modified_transcript=None):
	transcript, can_modify = transcribe_and_optionally_translate(video, source_language, target_language, model_size, allow_modification)

	if can_modify and modified_transcript:
	transcript = modified_transcript # Use the modified transcript if provided

	# Add hard subtitles to the video
	output_video = add_hard_subtitle_to_video(video, transcript)
	return output_video

	# Setup the Gradio app
	app = gr.Interface(
	fn=process_video,
	inputs=[
	gr.Video(label="Upload Video"),
	gr.Dropdown(choices=language_options, label="Source Language"),
	gr.Dropdown(choices=language_options, label="Target Language"),
	gr.Dropdown(choices=["base", "small", "medium", "large", "large-v2", "large-v3"], label="Model Size"),
	gr.Checkbox(label="Allow Transcript Modification?", value=False),
	gr.TextArea(label="Modified Transcript (if allowed)")
	],
	outputs=gr.Video(label="Processed Video with Hard Subtitles"),
	title="Video Transcription and Translation Tool",
	description="Transcribe or translate your video content. Optionally, edit the transcription before adding hard subtitles."
	)

	if __name__ == "__main__":
	app.launch()