awacke1 commited on
Commit
02a76d9
·
verified ·
1 Parent(s): 64aa7f5

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +41 -25
app.py CHANGED
@@ -1,5 +1,5 @@
1
  import gradio as gr
2
- from transformers import pipeline, SpeechT5Processor, SpeechT5ForTextToSpeech, SpeechT5HifiGan
3
  from diffusers import StableDiffusionPipeline
4
  import torch
5
  from PIL import Image
@@ -7,10 +7,11 @@ import numpy as np
7
  import os
8
  import tempfile
9
  import moviepy.editor as mpe
10
- import soundfile as sf
11
  import nltk
12
  from pydub import AudioSegment
13
  import warnings
 
 
14
 
15
  warnings.filterwarnings("ignore", category=UserWarning)
16
 
@@ -22,33 +23,36 @@ device = "cuda" if torch.cuda.is_available() else "cpu"
22
  torch_dtype = torch.float16 if device == "cuda" else torch.float32
23
 
24
  # Story generator
25
- story_generator = pipeline('text-generation', model='gpt2-large', device=0 if device=='cuda' else -1)
 
 
 
 
26
 
27
  # Stable Diffusion model
28
  sd_model_id = "runwayml/stable-diffusion-v1-5"
29
- sd_pipe = StableDiffusionPipeline.from_pretrained(sd_model_id, torch_dtype=torch_dtype)
 
 
 
30
  sd_pipe = sd_pipe.to(device)
31
 
32
- # Text-to-Speech model
33
- tts_processor = SpeechT5Processor.from_pretrained("microsoft/speecht5_tts")
34
- tts_model = SpeechT5ForTextToSpeech.from_pretrained("microsoft/speecht5_tts", torch_dtype=torch_dtype)
35
- tts_model = tts_model.to(device)
36
- vocoder = SpeechT5HifiGan.from_pretrained("microsoft/speecht5_hifigan", torch_dtype=torch_dtype)
37
- vocoder = vocoder.to(device)
38
-
39
  def text2speech(text):
40
  try:
41
- inputs = tts_processor(text=text, return_tensors="pt").to(device)
42
- speaker_embeddings = torch.zeros((1, 512), device=device)
43
- speech = tts_model.generate_speech(inputs["input_ids"], speaker_embeddings, vocoder=vocoder)
44
-
45
- output_path = os.path.join(tempfile.gettempdir(), "speech_output.wav")
46
- sf.write(output_path, speech.cpu().numpy(), samplerate=16000)
47
  return output_path
48
  except Exception as e:
49
  print(f"Error in text2speech: {str(e)}")
50
  raise
51
 
 
 
 
 
 
 
 
52
  def generate_story(prompt):
53
  generated = story_generator(prompt, max_length=500, num_return_sequences=1)
54
  story = generated[0]['generated_text']
@@ -63,7 +67,7 @@ def generate_images(sentences):
63
  for idx, sentence in enumerate(sentences):
64
  image = sd_pipe(sentence).images[0]
65
  # Save image to temporary file
66
- temp_file = tempfile.NamedTemporaryFile(suffix=f"_{idx}.png", delete=False)
67
  image.save(temp_file.name)
68
  images.append(temp_file.name)
69
  return images
@@ -98,18 +102,31 @@ def create_video(images, durations, audio_path):
98
 
99
  def process_pipeline(prompt, progress=gr.Progress(track_tqdm=True)):
100
  try:
101
- with gr.Progress(track_tqdm=True, desc="Generating Story"):
 
102
  story = generate_story(prompt)
103
- with gr.Progress(track_tqdm=True, desc="Splitting Story into Sentences"):
 
 
104
  sentences = split_story_into_sentences(story)
105
- with gr.Progress(track_tqdm=True, desc="Generating Images for Sentences"):
 
 
106
  images = generate_images(sentences)
107
- with gr.Progress(track_tqdm=True, desc="Generating Audio"):
 
 
108
  audio_path, total_duration = generate_audio(story)
109
- with gr.Progress(track_tqdm=True, desc="Computing Durations"):
 
 
110
  durations = compute_sentence_durations(sentences, total_duration)
111
- with gr.Progress(track_tqdm=True, desc="Creating Video"):
 
 
112
  video_path = create_video(images, durations, audio_path)
 
 
113
  return video_path
114
  except Exception as e:
115
  print(f"Error in process_pipeline: {str(e)}")
@@ -128,7 +145,6 @@ with gr.Blocks(css=".container { max-width: 800px; margin: auto; }") as demo:
128
  with gr.Column():
129
  prompt_input = gr.Textbox(label="Enter a Prompt", lines=2)
130
  generate_button = gr.Button("Generate Video")
131
- progress_bar = gr.Markdown("")
132
  with gr.Column():
133
  video_output = gr.Video(label="Generated Video")
134
 
 
1
  import gradio as gr
2
+ from transformers import pipeline
3
  from diffusers import StableDiffusionPipeline
4
  import torch
5
  from PIL import Image
 
7
  import os
8
  import tempfile
9
  import moviepy.editor as mpe
 
10
  import nltk
11
  from pydub import AudioSegment
12
  import warnings
13
+ import asyncio
14
+ import edge_tts
15
 
16
  warnings.filterwarnings("ignore", category=UserWarning)
17
 
 
23
  torch_dtype = torch.float16 if device == "cuda" else torch.float32
24
 
25
  # Story generator
26
+ story_generator = pipeline(
27
+ 'text-generation',
28
+ model='gpt2-large',
29
+ device=0 if device == 'cuda' else -1
30
+ )
31
 
32
  # Stable Diffusion model
33
  sd_model_id = "runwayml/stable-diffusion-v1-5"
34
+ sd_pipe = StableDiffusionPipeline.from_pretrained(
35
+ sd_model_id,
36
+ torch_dtype=torch_dtype
37
+ )
38
  sd_pipe = sd_pipe.to(device)
39
 
40
+ # Text-to-Speech function using edge_tts
 
 
 
 
 
 
41
  def text2speech(text):
42
  try:
43
+ output_path = asyncio.run(_text2speech_async(text))
 
 
 
 
 
44
  return output_path
45
  except Exception as e:
46
  print(f"Error in text2speech: {str(e)}")
47
  raise
48
 
49
+ async def _text2speech_async(text):
50
+ communicate = edge_tts.Communicate(text, voice="en-US-AriaNeural")
51
+ with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as tmp_file:
52
+ tmp_path = tmp_file.name
53
+ await communicate.save(tmp_path)
54
+ return tmp_path
55
+
56
  def generate_story(prompt):
57
  generated = story_generator(prompt, max_length=500, num_return_sequences=1)
58
  story = generated[0]['generated_text']
 
67
  for idx, sentence in enumerate(sentences):
68
  image = sd_pipe(sentence).images[0]
69
  # Save image to temporary file
70
+ temp_file = tempfile.NamedTemporaryFile(delete=False, suffix=f"_{idx}.png")
71
  image.save(temp_file.name)
72
  images.append(temp_file.name)
73
  return images
 
102
 
103
  def process_pipeline(prompt, progress=gr.Progress(track_tqdm=True)):
104
  try:
105
+ with progress.tqdm(total=6) as pbar:
106
+ pbar.set_description("Generating Story")
107
  story = generate_story(prompt)
108
+ pbar.update(1)
109
+
110
+ pbar.set_description("Splitting Story into Sentences")
111
  sentences = split_story_into_sentences(story)
112
+ pbar.update(1)
113
+
114
+ pbar.set_description("Generating Images for Sentences")
115
  images = generate_images(sentences)
116
+ pbar.update(1)
117
+
118
+ pbar.set_description("Generating Audio")
119
  audio_path, total_duration = generate_audio(story)
120
+ pbar.update(1)
121
+
122
+ pbar.set_description("Computing Durations")
123
  durations = compute_sentence_durations(sentences, total_duration)
124
+ pbar.update(1)
125
+
126
+ pbar.set_description("Creating Video")
127
  video_path = create_video(images, durations, audio_path)
128
+ pbar.update(1)
129
+
130
  return video_path
131
  except Exception as e:
132
  print(f"Error in process_pipeline: {str(e)}")
 
145
  with gr.Column():
146
  prompt_input = gr.Textbox(label="Enter a Prompt", lines=2)
147
  generate_button = gr.Button("Generate Video")
 
148
  with gr.Column():
149
  video_output = gr.Video(label="Generated Video")
150