nock2 commited on
Commit
d4a8611
Β·
verified Β·
1 Parent(s): b14b378

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +64 -14
app.py CHANGED
@@ -1,4 +1,6 @@
1
  import os
 
 
2
  from huggingface_hub import login
3
  import torch
4
  import torchaudio
@@ -7,20 +9,20 @@ import gradio as gr
7
  from stable_audio_tools import get_pretrained_model
8
  from stable_audio_tools.inference.generation import generate_diffusion_cond
9
 
10
- # Authenticate
11
  token = os.getenv("HUGGINGFACE_TOKEN")
12
  if not token:
13
  raise RuntimeError("HUGGINGFACE_TOKEN not set")
14
  login(token=token, add_to_git_credential=False)
15
 
16
- # Load model
17
  device = "cuda" if torch.cuda.is_available() else "cpu"
18
  model, config = get_pretrained_model("stabilityai/stable-audio-open-small")
19
  model = model.to(device)
20
  sample_rate = config["sample_rate"]
21
  sample_size = config["sample_size"]
22
 
23
- # Inference function
24
  def generate_audio(prompt):
25
  conditioning = [{"prompt": prompt, "seconds_total": 11}]
26
  with torch.no_grad():
@@ -37,22 +39,70 @@ def generate_audio(prompt):
37
  torchaudio.save(path, output, sample_rate)
38
  return path
39
 
40
- # πŸŒ€ Hot Prompt Club UI
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
41
  gr.Interface(
42
- fn=generate_audio,
43
  inputs=gr.Textbox(
44
- label="🎀 Prompt your sonic art here",
45
  placeholder="e.g. 'drunk driving with mario and yung lean'"
46
  ),
47
- outputs=gr.Audio(
48
- type="filepath",
49
- label="🧠 Generated Audio"
50
- ),
51
  title='🌐 Hot Prompts in Your Area: "My Husband Is Dead"',
52
- description="Enter a fun sound idea for music art.",
53
  examples=[
54
- "ghosts peeing in a server room",
55
- "tech startup boss villain entrance music",
56
- "AI doing acid in a technofeudalist dystopia"
57
  ]
58
  ).launch()
 
1
  import os
2
+ import time
3
+ import requests
4
  from huggingface_hub import login
5
  import torch
6
  import torchaudio
 
9
  from stable_audio_tools import get_pretrained_model
10
  from stable_audio_tools.inference.generation import generate_diffusion_cond
11
 
12
+ # Authenticate Hugging Face Hub
13
  token = os.getenv("HUGGINGFACE_TOKEN")
14
  if not token:
15
  raise RuntimeError("HUGGINGFACE_TOKEN not set")
16
  login(token=token, add_to_git_credential=False)
17
 
18
+ # Load audio model
19
  device = "cuda" if torch.cuda.is_available() else "cpu"
20
  model, config = get_pretrained_model("stabilityai/stable-audio-open-small")
21
  model = model.to(device)
22
  sample_rate = config["sample_rate"]
23
  sample_size = config["sample_size"]
24
 
25
+ # Audio generation function
26
  def generate_audio(prompt):
27
  conditioning = [{"prompt": prompt, "seconds_total": 11}]
28
  with torch.no_grad():
 
39
  torchaudio.save(path, output, sample_rate)
40
  return path
41
 
42
+ # Image generation function using Replicate
43
+ def generate_image(prompt):
44
+ replicate_token = os.getenv("REPLICATE_API_TOKEN")
45
+ if not replicate_token:
46
+ raise RuntimeError("REPLICATE_API_TOKEN not set")
47
+
48
+ url = "https://api.replicate.com/v1/predictions"
49
+ headers = {
50
+ "Authorization": f"Token {replicate_token}",
51
+ "Content-Type": "application/json"
52
+ }
53
+ data = {
54
+ "version": "5ee6b41748a4e3e3d3a212ed4a29379d6a13b9265fd00fe59e28c2767a5e82eb",
55
+ "input": {
56
+ "prompt": prompt,
57
+ "style": "surreal"
58
+ }
59
+ }
60
+ response = requests.post(url, headers=headers, json=data)
61
+ response.raise_for_status()
62
+ prediction = response.json()
63
+
64
+ status = prediction["status"]
65
+ get_url = prediction["urls"]["get"]
66
+
67
+ while status not in ["succeeded", "failed"]:
68
+ time.sleep(1.5)
69
+ resp = requests.get(get_url, headers=headers)
70
+ prediction = resp.json()
71
+ status = prediction["status"]
72
+
73
+ if status != "succeeded":
74
+ raise RuntimeError(f"Image generation failed: {prediction}")
75
+
76
+ image_url = prediction["output"]
77
+ image_path = "output.png"
78
+ image_data = requests.get(image_url).content
79
+ with open(image_path, "wb") as f:
80
+ f.write(image_data)
81
+
82
+ return image_path
83
+
84
+ # Combined generation function
85
+ def generate_assets(prompt):
86
+ audio_path = generate_audio(prompt)
87
+ image_path = generate_image(prompt)
88
+ return audio_path, image_path
89
+
90
+ # Gradio UI
91
  gr.Interface(
92
+ fn=generate_assets,
93
  inputs=gr.Textbox(
94
+ label="🎀 Prompt your sonic + visual art",
95
  placeholder="e.g. 'drunk driving with mario and yung lean'"
96
  ),
97
+ outputs=[
98
+ gr.Audio(type="filepath", label="🧠 Generated Audio"),
99
+ gr.Image(type="filepath", label="🎨 Generated Image")
100
+ ],
101
  title='🌐 Hot Prompts in Your Area: "My Husband Is Dead"',
102
+ description="Enter a fun sound idea β€” generate audio *and* visual from one prompt.",
103
  examples=[
104
+ "ghosts peeing",
105
+ "Tech startup boss villain entrance music",
106
+ "Dolphin hootin'"
107
  ]
108
  ).launch()