gfartenstein commited on
Commit
302a17b
β€’
1 Parent(s): 73ff61b
Files changed (1) hide show
  1. app.py +31 -43
app.py CHANGED
@@ -1,10 +1,10 @@
1
  import gradio as gr
2
  import torch
3
- import requests
4
  from PIL import Image
5
  import numpy as np
6
  from spectro import wav_bytes_from_spectrogram_image
7
- from io import BytesIO
8
  from diffusers import StableDiffusionPipeline
9
  from diffusers import StableDiffusionImg2ImgPipeline
10
 
@@ -28,30 +28,18 @@ def predict(prompt, negative_prompt, audio_input, duration):
28
  # return style_transfer(prompt, negative_prompt, audio_input)
29
 
30
  def classic(prompt, negative_prompt, duration):
31
- pipe2.safety_checker = dummy_checker
32
- url = "https://huggingface.co/spaces/gfartenstein/text2fart/resolve/main/rootfart-1.jpg"
33
- response = requests.get(url)
34
- im = Image.open(BytesIO(response.content)).convert("RGB")
35
- # spec = pipe(prompt, negative_prompt=negative_prompt, height=512, width=512).images[0]
36
- spec = pipe2(prompt=prompt, negative_prompt=negative_prompt, image=im, strength=0.5, guidance_scale=7).images
37
  print(spec)
38
- # wav = wav_bytes_from_spectrogram_image(spec)
39
- wav = wav_bytes_from_spectrogram_image(spec[0])
40
  with open("output.wav", "wb") as f:
41
  f.write(wav[0].getbuffer())
42
  return spec, 'output.wav', gr.update(visible=True), gr.update(visible=True), gr.update(visible=True)
43
 
44
- # def style_transfer(prompt, negative_prompt, audio_input):
45
- # pipe.safety_checker = dummy_checker
46
- # url = "https://huggingface.co/spaces/gfartenstein/text2fart/resolve/main/rootfart-1.jpg"
47
- # response = requests.get(url)
48
- # init_image = Image.open(BytesIO(response.content)).convert("RGB")
49
- # images = pipe(prompt=prompt, image=init_image, strength=0.75, guidance_scale=7.5).images
50
-
51
-
52
  # spec = spectro_from_wav(audio_input)
53
  # Open the image
54
- # im = Image.open('rootfart-1.jpg')
55
  # im = Image.open(spec)
56
 
57
 
@@ -59,37 +47,37 @@ def classic(prompt, negative_prompt, duration):
59
  # im = image_from_spectrogram(im, 1)
60
 
61
 
62
- # new_spectro = pipe2(prompt=prompt, image=im, strength=0.5, guidance_scale=7).images
63
- # wav = wav_bytes_from_spectrogram_image(new_spectro[0])
64
- # with open("output.wav", "wb") as f:
65
- # f.write(wav[0].getbuffer())
66
- # return new_spectro[0], 'output.wav', gr.update(visible=True), gr.update(visible=True), gr.update(visible=True)
67
 
68
- # def image_from_spectrogram(
69
- # spectrogram: np.ndarray, max_volume: float = 50, power_for_image: float = 0.25
70
- # ) -> Image.Image:
71
- # """
72
- # Compute a spectrogram image from a spectrogram magnitude array.
73
- # """
74
- # # Apply the power curve
75
- # data = np.power(spectrogram, power_for_image)
76
 
77
- # # Rescale to 0-255
78
- # data = data * 255 / max_volume
79
 
80
- # # Invert
81
- # data = 255 - data
82
 
83
- # # Convert to a PIL image
84
- # image = Image.fromarray(data.astype(np.uint8))
85
 
86
- # # Flip Y
87
- # image = image.transpose(Image.FLIP_TOP_BOTTOM)
88
 
89
- # # Convert to RGB
90
- # image = image.convert("RGB")
91
 
92
- # return image
93
 
94
  title = """
95
  <div style="text-align: center; max-width: 500px; margin: 0 auto;">
 
1
  import gradio as gr
2
  import torch
3
+
4
  from PIL import Image
5
  import numpy as np
6
  from spectro import wav_bytes_from_spectrogram_image
7
+
8
  from diffusers import StableDiffusionPipeline
9
  from diffusers import StableDiffusionImg2ImgPipeline
10
 
 
28
  # return style_transfer(prompt, negative_prompt, audio_input)
29
 
30
  def classic(prompt, negative_prompt, duration):
31
+ pipe.safety_checker = dummy_checker
32
+ spec = pipe(prompt, negative_prompt=negative_prompt, height=512, width=512).images[0]
 
 
 
 
33
  print(spec)
34
+ wav = wav_bytes_from_spectrogram_image(spec)
 
35
  with open("output.wav", "wb") as f:
36
  f.write(wav[0].getbuffer())
37
  return spec, 'output.wav', gr.update(visible=True), gr.update(visible=True), gr.update(visible=True)
38
 
39
+ def style_transfer(prompt, negative_prompt, audio_input):
 
 
 
 
 
 
 
40
  # spec = spectro_from_wav(audio_input)
41
  # Open the image
42
+ im = Image.open('rootfart-1.jpg')
43
  # im = Image.open(spec)
44
 
45
 
 
47
  # im = image_from_spectrogram(im, 1)
48
 
49
 
50
+ new_spectro = pipe2(prompt=prompt, image=im, strength=0.5, guidance_scale=7).images
51
+ wav = wav_bytes_from_spectrogram_image(new_spectro[0])
52
+ with open("output.wav", "wb") as f:
53
+ f.write(wav[0].getbuffer())
54
+ return new_spectro[0], 'output.wav', gr.update(visible=True), gr.update(visible=True), gr.update(visible=True)
55
 
56
+ def image_from_spectrogram(
57
+ spectrogram: np.ndarray, max_volume: float = 50, power_for_image: float = 0.25
58
+ ) -> Image.Image:
59
+ """
60
+ Compute a spectrogram image from a spectrogram magnitude array.
61
+ """
62
+ # Apply the power curve
63
+ data = np.power(spectrogram, power_for_image)
64
 
65
+ # Rescale to 0-255
66
+ data = data * 255 / max_volume
67
 
68
+ # Invert
69
+ data = 255 - data
70
 
71
+ # Convert to a PIL image
72
+ image = Image.fromarray(data.astype(np.uint8))
73
 
74
+ # Flip Y
75
+ image = image.transpose(Image.FLIP_TOP_BOTTOM)
76
 
77
+ # Convert to RGB
78
+ image = image.convert("RGB")
79
 
80
+ return image
81
 
82
  title = """
83
  <div style="text-align: center; max-width: 500px; margin: 0 auto;">