videogeneration / app.py
saima730's picture
Create app.py
f673178 verified
raw
history blame
1.98 kB
import gradio as gr
from gtts import gTTS
from moviepy.editor import TextClip, AudioFileClip
from transformers import RagTokenizer, RagRetriever, RagSequenceForGeneration
import torch
import tempfile
# Initialize RAG model components
tokenizer = RagTokenizer.from_pretrained("facebook/rag-sequence-nq")
retriever = RagRetriever.from_pretrained("facebook/rag-sequence-nq", index_name="exact", use_dummy_dataset=True)
model = RagSequenceForGeneration.from_pretrained("facebook/rag-sequence-nq", retriever=retriever)
device = "cuda" if torch.cuda.is_available() else "cpu"
model = model.to(device)
def generate_response(input_text):
input_ids = tokenizer(input_text, return_tensors="pt").input_ids.to(device)
generated = model.generate(input_ids)
response = tokenizer.batch_decode(generated, skip_special_tokens=True)[0]
return response
def text_to_speech(text):
tts = gTTS(text)
with tempfile.NamedTemporaryFile(delete=False, suffix='.mp3') as temp_audio_file:
tts.save(temp_audio_file.name)
return temp_audio_file.name
def text_to_video(text, audio_filename):
text_clip = TextClip(text, fontsize=50, color='white', bg_color='black', size=(640, 480))
text_clip = text_clip.set_duration(10)
audio_clip = AudioFileClip(audio_filename)
video_clip = text_clip.set_audio(audio_clip)
with tempfile.NamedTemporaryFile(delete=False, suffix='.mp4') as temp_video_file:
video_clip.write_videofile(temp_video_file.name, codec='libx264')
return temp_video_file.name
def process_text(input_text):
response = generate_response(input_text)
audio_file = text_to_speech(response)
video_file = text_to_video(response, audio_file)
return response, audio_file, video_file
iface = gr.Interface(
fn=process_text,
inputs=gr.Textbox(label="Enter your text:"),
outputs=[gr.Textbox(label="RAG Model Response"), gr.Audio(label="Audio"), gr.Video(label="Video")],
live=True
)
iface.launch()