videogeneration / app.py
saima730's picture
Create app.py
f673178 verified
import gradio as gr
from gtts import gTTS
from moviepy.editor import TextClip, AudioFileClip
from transformers import RagTokenizer, RagRetriever, RagSequenceForGeneration
import torch
import tempfile
# Initialize RAG model components
tokenizer = RagTokenizer.from_pretrained("facebook/rag-sequence-nq")
retriever = RagRetriever.from_pretrained("facebook/rag-sequence-nq", index_name="exact", use_dummy_dataset=True)
model = RagSequenceForGeneration.from_pretrained("facebook/rag-sequence-nq", retriever=retriever)
device = "cuda" if torch.cuda.is_available() else "cpu"
model = model.to(device)
def generate_response(input_text):
input_ids = tokenizer(input_text, return_tensors="pt").input_ids.to(device)
generated = model.generate(input_ids)
response = tokenizer.batch_decode(generated, skip_special_tokens=True)[0]
return response
def text_to_speech(text):
tts = gTTS(text)
with tempfile.NamedTemporaryFile(delete=False, suffix='.mp3') as temp_audio_file:
tts.save(temp_audio_file.name)
return temp_audio_file.name
def text_to_video(text, audio_filename):
text_clip = TextClip(text, fontsize=50, color='white', bg_color='black', size=(640, 480))
text_clip = text_clip.set_duration(10)
audio_clip = AudioFileClip(audio_filename)
video_clip = text_clip.set_audio(audio_clip)
with tempfile.NamedTemporaryFile(delete=False, suffix='.mp4') as temp_video_file:
video_clip.write_videofile(temp_video_file.name, codec='libx264')
return temp_video_file.name
def process_text(input_text):
response = generate_response(input_text)
audio_file = text_to_speech(response)
video_file = text_to_video(response, audio_file)
return response, audio_file, video_file
iface = gr.Interface(
fn=process_text,
inputs=gr.Textbox(label="Enter your text:"),
outputs=[gr.Textbox(label="RAG Model Response"), gr.Audio(label="Audio"), gr.Video(label="Video")],
live=True
)
iface.launch()