Spaces:
Running
Running
app.py
CHANGED
@@ -3,78 +3,141 @@ import gradio as gr
|
|
3 |
import os
|
4 |
import torch
|
5 |
import json
|
|
|
6 |
|
7 |
# Check if CUDA is available and set the device accordingly
|
8 |
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
|
9 |
|
10 |
-
|
|
|
|
|
11 |
headers = {"Authorization": f"Bearer {os.environ.get('HF_TOKEN')}"}
|
12 |
|
13 |
def format_error(message):
|
14 |
"""Helper function to format error messages as JSON"""
|
15 |
-
return
|
16 |
|
17 |
-
def
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
18 |
"""
|
19 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
20 |
"""
|
21 |
if audio_file is None:
|
22 |
-
return
|
23 |
|
24 |
try:
|
25 |
-
# Debug: Print token status (masked)
|
26 |
token = os.environ.get('HF_TOKEN')
|
27 |
if not token:
|
28 |
-
return
|
29 |
-
print(f"Token present: {'Yes' if token else 'No'}, Token length: {len(token) if token else 0}")
|
30 |
-
|
31 |
-
# Debug: Print audio file info
|
32 |
-
print(f"Audio file path: {audio_file}")
|
33 |
-
print(f"Audio file size: {os.path.getsize(audio_file)} bytes")
|
34 |
|
|
|
35 |
with open(audio_file, "rb") as f:
|
36 |
data = f.read()
|
37 |
|
38 |
-
print("Sending request to
|
39 |
-
response = requests.post(
|
40 |
-
|
41 |
-
# Print response for debugging
|
42 |
-
print(f"Response status code: {response.status_code}")
|
43 |
-
print(f"Response headers: {dict(response.headers)}")
|
44 |
-
print(f"Response content: {response.content.decode('utf-8', errors='ignore')}")
|
45 |
|
46 |
if response.status_code == 200:
|
47 |
-
|
48 |
-
# Format results
|
49 |
formatted_results = []
|
50 |
-
for result in
|
51 |
formatted_results.append({
|
52 |
'label': result['label'],
|
53 |
'score': f"{result['score']*100:.2f}%"
|
54 |
})
|
55 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
56 |
elif response.status_code == 401:
|
57 |
-
return
|
58 |
elif response.status_code == 503:
|
59 |
-
return
|
60 |
else:
|
61 |
-
|
62 |
-
error_msg += f"Response headers: {dict(response.headers)}\n"
|
63 |
-
error_msg += f"Response: {response.text}"
|
64 |
-
return format_error(error_msg)
|
65 |
|
66 |
except Exception as e:
|
67 |
import traceback
|
68 |
error_details = traceback.format_exc()
|
69 |
-
return
|
70 |
|
71 |
# Create Gradio interface
|
72 |
iface = gr.Interface(
|
73 |
-
fn=
|
74 |
inputs=gr.Audio(type="filepath", label="Upload Audio File"),
|
75 |
-
outputs=gr.
|
76 |
-
|
77 |
-
|
|
|
|
|
|
|
|
|
78 |
examples=[],
|
79 |
)
|
80 |
|
|
|
3 |
import os
|
4 |
import torch
|
5 |
import json
|
6 |
+
from transformers import AutoTokenizer, AutoModelForCausalLM
|
7 |
|
8 |
# Check if CUDA is available and set the device accordingly
|
9 |
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
|
10 |
|
11 |
+
# API URLs and headers
|
12 |
+
AUDIO_API_URL = "https://api-inference.huggingface.co/models/MIT/ast-finetuned-audioset-10-10-0.4593"
|
13 |
+
JANUS_API_URL = "https://api-inference.huggingface.co/models/deepseek-ai/Janus-1.3B"
|
14 |
headers = {"Authorization": f"Bearer {os.environ.get('HF_TOKEN')}"}
|
15 |
|
16 |
def format_error(message):
|
17 |
"""Helper function to format error messages as JSON"""
|
18 |
+
return {"error": message}
|
19 |
|
20 |
+
def create_lyrics_prompt(classification_results):
|
21 |
+
"""Create a prompt for lyrics generation based on classification results"""
|
22 |
+
# Get the top genre and its characteristics
|
23 |
+
top_result = classification_results[0]
|
24 |
+
genre = top_result['label']
|
25 |
+
confidence = float(top_result['score'].strip('%')) / 100
|
26 |
+
|
27 |
+
# Create a detailed prompt
|
28 |
+
prompt = f"""Write song lyrics in the style of {genre} music. The song should capture the essence of this genre.
|
29 |
+
Additional musical elements detected: {', '.join(r['label'] for r in classification_results[1:3])}
|
30 |
+
|
31 |
+
Please write creative and original lyrics that:
|
32 |
+
1. Match the {genre} style
|
33 |
+
2. Have a clear structure (verse, chorus)
|
34 |
+
3. Reflect the mood and themes common in this genre
|
35 |
+
|
36 |
+
Generate the lyrics:
|
37 |
"""
|
38 |
+
return prompt
|
39 |
+
|
40 |
+
def generate_lyrics(prompt):
|
41 |
+
"""Generate lyrics using the Janus model"""
|
42 |
+
try:
|
43 |
+
response = requests.post(
|
44 |
+
JANUS_API_URL,
|
45 |
+
headers=headers,
|
46 |
+
json={
|
47 |
+
"inputs": prompt,
|
48 |
+
"parameters": {
|
49 |
+
"max_new_tokens": 200,
|
50 |
+
"temperature": 0.7,
|
51 |
+
"top_p": 0.9,
|
52 |
+
"return_full_text": False
|
53 |
+
}
|
54 |
+
}
|
55 |
+
)
|
56 |
+
|
57 |
+
if response.status_code == 200:
|
58 |
+
return response.json()[0]["generated_text"]
|
59 |
+
elif response.status_code == 503:
|
60 |
+
return "Model is loading. Please try again in a few seconds."
|
61 |
+
else:
|
62 |
+
return f"Error generating lyrics: {response.text}"
|
63 |
+
except Exception as e:
|
64 |
+
return f"Error: {str(e)}"
|
65 |
+
|
66 |
+
def format_results(classification_results, lyrics, prompt):
|
67 |
+
"""Format the results for display"""
|
68 |
+
# Format classification results
|
69 |
+
classification_text = "Classification Results:\n"
|
70 |
+
for i, result in enumerate(classification_results):
|
71 |
+
classification_text += f"{i+1}. {result['label']}: {result['score']}\n"
|
72 |
+
|
73 |
+
# Format final output
|
74 |
+
output = f"""
|
75 |
+
{classification_text}
|
76 |
+
\n---Generated Lyrics---\n
|
77 |
+
{lyrics}
|
78 |
+
"""
|
79 |
+
return output
|
80 |
+
|
81 |
+
def classify_and_generate(audio_file):
|
82 |
+
"""
|
83 |
+
Classify the audio and generate matching lyrics
|
84 |
"""
|
85 |
if audio_file is None:
|
86 |
+
return "Please upload an audio file."
|
87 |
|
88 |
try:
|
|
|
89 |
token = os.environ.get('HF_TOKEN')
|
90 |
if not token:
|
91 |
+
return "Error: HF_TOKEN environment variable is not set. Please set your Hugging Face API token."
|
|
|
|
|
|
|
|
|
|
|
92 |
|
93 |
+
# First, classify the audio
|
94 |
with open(audio_file, "rb") as f:
|
95 |
data = f.read()
|
96 |
|
97 |
+
print("Sending request to Audio Classification API...")
|
98 |
+
response = requests.post(AUDIO_API_URL, headers=headers, data=data)
|
|
|
|
|
|
|
|
|
|
|
99 |
|
100 |
if response.status_code == 200:
|
101 |
+
classification_results = response.json()
|
102 |
+
# Format classification results
|
103 |
formatted_results = []
|
104 |
+
for result in classification_results:
|
105 |
formatted_results.append({
|
106 |
'label': result['label'],
|
107 |
'score': f"{result['score']*100:.2f}%"
|
108 |
})
|
109 |
+
|
110 |
+
# Generate lyrics based on classification
|
111 |
+
print("Generating lyrics based on classification...")
|
112 |
+
prompt = create_lyrics_prompt(formatted_results)
|
113 |
+
lyrics = generate_lyrics(prompt)
|
114 |
+
|
115 |
+
# Format and return results
|
116 |
+
return format_results(formatted_results, lyrics, prompt)
|
117 |
+
|
118 |
elif response.status_code == 401:
|
119 |
+
return "Error: Invalid or missing API token. Please check your Hugging Face API token."
|
120 |
elif response.status_code == 503:
|
121 |
+
return "Error: Model is loading. Please try again in a few seconds."
|
122 |
else:
|
123 |
+
return f"Error: API returned status code {response.status_code}\nResponse: {response.text}"
|
|
|
|
|
|
|
124 |
|
125 |
except Exception as e:
|
126 |
import traceback
|
127 |
error_details = traceback.format_exc()
|
128 |
+
return f"Error processing request: {str(e)}\nDetails:\n{error_details}"
|
129 |
|
130 |
# Create Gradio interface
|
131 |
iface = gr.Interface(
|
132 |
+
fn=classify_and_generate,
|
133 |
inputs=gr.Audio(type="filepath", label="Upload Audio File"),
|
134 |
+
outputs=gr.Textbox(
|
135 |
+
label="Results",
|
136 |
+
lines=15,
|
137 |
+
placeholder="Upload an audio file to see classification results and generated lyrics..."
|
138 |
+
),
|
139 |
+
title="Music Genre Classifier + Lyric Generator",
|
140 |
+
description="Upload an audio file to classify its genre and generate matching lyrics using AI.",
|
141 |
examples=[],
|
142 |
)
|
143 |
|