Fabrice-TIERCELIN commited on
Commit
133ca11
1 Parent(s): 54ed3cf

Handle seed

Browse files

Hi

@soujanyaporia
,

This PR allows the user to control the random value or not

Files changed (1) hide show
  1. app.py +38 -25
app.py CHANGED
@@ -1,5 +1,6 @@
1
  import gradio as gr
2
  import json
 
3
  import torch
4
  import wavio
5
  from tqdm import tqdm
@@ -11,6 +12,8 @@ from pydub import AudioSegment
11
  from gradio import Markdown
12
  import spaces
13
 
 
 
14
  # Automatic device detection
15
  if torch.cuda.is_available():
16
  device_type = "cuda"
@@ -89,8 +92,16 @@ def gradio_generate(
89
  output_format="wav",
90
  output_number=3,
91
  steps=100,
92
- guidance=3
 
 
93
  ):
 
 
 
 
 
 
94
  output_wave = tango.generate(prompt, steps, guidance, output_number)
95
  # output_filename = f"{prompt.replace(' ', '_')}_{steps}_{guidance}"[:250] + ".wav"
96
 
@@ -161,39 +172,41 @@ output_audio_2 = gr.Audio(label="Generated Audio #2/3", type="filepath")
161
  output_audio_3 = gr.Audio(label="Generated Audio #3/3", type="filepath")
162
  denoising_steps = gr.Slider(minimum=10, maximum=200, value=100, step=1, label="Steps", interactive=True)
163
  guidance_scale = gr.Slider(minimum=1, maximum=10, value=3, step=0.1, label="Guidance Scale", interactive=True)
 
 
164
 
165
  # Gradio interface
166
  gr_interface = gr.Interface(
167
  fn=gradio_generate,
168
- inputs=[input_text, output_format, output_number, denoising_steps, guidance_scale],
169
  outputs=[output_audio_1, output_audio_2, output_audio_3],
170
  title="Tango 2: Aligning Diffusion-based Text-to-Audio Generations through Direct Preference Optimization",
171
  description=description_text,
172
  allow_flagging=False,
173
  examples=[
174
- ["Quiet speech and then and airplane flying away", "wav", 1, 100, 3],
175
- ["A bicycle peddling on dirt and gravel followed by a man speaking then laughing", "wav", 1, 100, 3],
176
- ["Ducks quack and water splashes with some animal screeching in the background", "wav", 1, 100, 3],
177
- ["Describe the sound of the ocean", "wav", 1, 100, 3],
178
- ["A woman and a baby are having a conversation", "wav", 1, 100, 3],
179
- ["A man speaks followed by a popping noise and laughter", "wav", 1, 100, 3],
180
- ["A cup is filled from a faucet", "wav", 1, 100, 3],
181
- ["An audience cheering and clapping", "wav", 1, 100, 3],
182
- ["Rolling thunder with lightning strikes", "wav", 1, 100, 3],
183
- ["A dog barking and a cat mewing and a racing car passes by", "wav", 1, 100, 3],
184
- ["Gentle water stream, birds chirping and sudden gun shot", "wav", 1, 100, 3],
185
- ["A man talking followed by a goat baaing then a metal gate sliding shut as ducks quack and wind blows into a microphone.", "wav", 1, 100, 3],
186
- ["A dog barking", "wav", 1, 100, 3],
187
- ["A cat meowing", "wav", 1, 100, 3],
188
- ["Wooden table tapping sound while water pouring", "wav", 1, 100, 3],
189
- ["Applause from a crowd with distant clicking and a man speaking over a loudspeaker", "wav", 1, 100, 3],
190
- ["two gunshots followed by birds flying away while chirping", "wav", 1, 100, 3],
191
- ["Whistling with birds chirping", "wav", 1, 100, 3],
192
- ["A person snoring", "wav", 1, 100, 3],
193
- ["Motor vehicles are driving with loud engines and a person whistles", "wav", 1, 100, 3],
194
- ["People cheering in a stadium while thunder and lightning strikes", "wav", 1, 100, 3],
195
- ["A helicopter is in flight", "wav", 1, 100, 3],
196
- ["A dog barking and a man talking and a racing car passes by", "wav", 1, 100, 3],
197
  ],
198
  cache_examples="lazy", # Turn on to cache.
199
  )
 
1
  import gradio as gr
2
  import json
3
+ import random
4
  import torch
5
  import wavio
6
  from tqdm import tqdm
 
12
  from gradio import Markdown
13
  import spaces
14
 
15
+ max_64_bit_int = 2**63 - 1
16
+
17
  # Automatic device detection
18
  if torch.cuda.is_available():
19
  device_type = "cuda"
 
92
  output_format="wav",
93
  output_number=3,
94
  steps=100,
95
+ guidance=3,
96
+ is_randomize_seed=True,
97
+ seed=123
98
  ):
99
+ if is_randomize_seed:
100
+ seed = random.randint(0, max_64_bit_int)
101
+
102
+ random.seed(seed)
103
+ torch.manual_seed(seed)
104
+
105
  output_wave = tango.generate(prompt, steps, guidance, output_number)
106
  # output_filename = f"{prompt.replace(' ', '_')}_{steps}_{guidance}"[:250] + ".wav"
107
 
 
172
  output_audio_3 = gr.Audio(label="Generated Audio #3/3", type="filepath")
173
  denoising_steps = gr.Slider(minimum=10, maximum=200, value=100, step=1, label="Steps", interactive=True)
174
  guidance_scale = gr.Slider(minimum=1, maximum=10, value=3, step=0.1, label="Guidance Scale", interactive=True)
175
+ randomize_seed = gr.Checkbox(label = "\U0001F3B2 Randomize seed", value = True, info = "If checked, result is always different")
176
+ seed = gr.Slider(minimum = 0, maximum = max_64_bit_int, step = 1, randomize = True, label = "Seed")
177
 
178
  # Gradio interface
179
  gr_interface = gr.Interface(
180
  fn=gradio_generate,
181
+ inputs=[input_text, output_format, output_number, denoising_steps, guidance_scale, randomize_seed, seed],
182
  outputs=[output_audio_1, output_audio_2, output_audio_3],
183
  title="Tango 2: Aligning Diffusion-based Text-to-Audio Generations through Direct Preference Optimization",
184
  description=description_text,
185
  allow_flagging=False,
186
  examples=[
187
+ ["Quiet speech and then and airplane flying away", "wav", 1, 100, 3, False, 123],
188
+ ["A bicycle peddling on dirt and gravel followed by a man speaking then laughing", "wav", 1, 100, 3, False, 123],
189
+ ["Ducks quack and water splashes with some animal screeching in the background", "wav", 1, 100, 3, False, 123],
190
+ ["Describe the sound of the ocean", "wav", 1, 100, 3, False, 123],
191
+ ["A woman and a baby are having a conversation", "wav", 1, 100, 3, False, 123],
192
+ ["A man speaks followed by a popping noise and laughter", "wav", 1, 100, 3, False, 123],
193
+ ["A cup is filled from a faucet", "wav", 1, 100, 3, False, 123],
194
+ ["An audience cheering and clapping", "wav", 1, 100, 3, False, 123],
195
+ ["Rolling thunder with lightning strikes", "wav", 1, 100, 3, False, 123],
196
+ ["A dog barking and a cat mewing and a racing car passes by", "wav", 1, 100, 3, False, 123],
197
+ ["Gentle water stream, birds chirping and sudden gun shot", "wav", 1, 100, 3, False, 123],
198
+ ["A man talking followed by a goat baaing then a metal gate sliding shut as ducks quack and wind blows into a microphone.", "wav", 1, 100, 3, False, 123],
199
+ ["A dog barking", "wav", 1, 100, 3, False, 123],
200
+ ["A cat meowing", "wav", 1, 100, 3, False, 123],
201
+ ["Wooden table tapping sound while water pouring", "wav", 1, 100, 3, False, 123],
202
+ ["Applause from a crowd with distant clicking and a man speaking over a loudspeaker", "wav", 1, 100, 3, False, 123],
203
+ ["two gunshots followed by birds flying away while chirping", "wav", 1, 100, 3, False, 123],
204
+ ["Whistling with birds chirping", "wav", 1, 100, 3, False, 123],
205
+ ["A person snoring", "wav", 1, 100, 3, False, 123],
206
+ ["Motor vehicles are driving with loud engines and a person whistles", "wav", 1, 100, 3, False, 123],
207
+ ["People cheering in a stadium while thunder and lightning strikes", "wav", 1, 100, 3, False, 123],
208
+ ["A helicopter is in flight", "wav", 1, 100, 3, False, 123],
209
+ ["A dog barking and a man talking and a racing car passes by", "wav", 1, 100, 3, False, 123],
210
  ],
211
  cache_examples="lazy", # Turn on to cache.
212
  )