saima730 commited on
Commit
f673178
1 Parent(s): 4a1e343

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +49 -0
app.py ADDED
@@ -0,0 +1,49 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from gtts import gTTS
3
+ from moviepy.editor import TextClip, AudioFileClip
4
+ from transformers import RagTokenizer, RagRetriever, RagSequenceForGeneration
5
+ import torch
6
+ import tempfile
7
+
8
+ # Initialize RAG model components
9
+ tokenizer = RagTokenizer.from_pretrained("facebook/rag-sequence-nq")
10
+ retriever = RagRetriever.from_pretrained("facebook/rag-sequence-nq", index_name="exact", use_dummy_dataset=True)
11
+ model = RagSequenceForGeneration.from_pretrained("facebook/rag-sequence-nq", retriever=retriever)
12
+ device = "cuda" if torch.cuda.is_available() else "cpu"
13
+ model = model.to(device)
14
+
15
+ def generate_response(input_text):
16
+ input_ids = tokenizer(input_text, return_tensors="pt").input_ids.to(device)
17
+ generated = model.generate(input_ids)
18
+ response = tokenizer.batch_decode(generated, skip_special_tokens=True)[0]
19
+ return response
20
+
21
+ def text_to_speech(text):
22
+ tts = gTTS(text)
23
+ with tempfile.NamedTemporaryFile(delete=False, suffix='.mp3') as temp_audio_file:
24
+ tts.save(temp_audio_file.name)
25
+ return temp_audio_file.name
26
+
27
+ def text_to_video(text, audio_filename):
28
+ text_clip = TextClip(text, fontsize=50, color='white', bg_color='black', size=(640, 480))
29
+ text_clip = text_clip.set_duration(10)
30
+ audio_clip = AudioFileClip(audio_filename)
31
+ video_clip = text_clip.set_audio(audio_clip)
32
+ with tempfile.NamedTemporaryFile(delete=False, suffix='.mp4') as temp_video_file:
33
+ video_clip.write_videofile(temp_video_file.name, codec='libx264')
34
+ return temp_video_file.name
35
+
36
+ def process_text(input_text):
37
+ response = generate_response(input_text)
38
+ audio_file = text_to_speech(response)
39
+ video_file = text_to_video(response, audio_file)
40
+ return response, audio_file, video_file
41
+
42
+ iface = gr.Interface(
43
+ fn=process_text,
44
+ inputs=gr.Textbox(label="Enter your text:"),
45
+ outputs=[gr.Textbox(label="RAG Model Response"), gr.Audio(label="Audio"), gr.Video(label="Video")],
46
+ live=True
47
+ )
48
+
49
+ iface.launch()