jacob-c commited on
Commit
0444752
·
1 Parent(s): 12ceea1
Files changed (1) hide show
  1. app.py +97 -34
app.py CHANGED
@@ -3,78 +3,141 @@ import gradio as gr
3
  import os
4
  import torch
5
  import json
 
6
 
7
  # Check if CUDA is available and set the device accordingly
8
  device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
9
 
10
- API_URL = "https://api-inference.huggingface.co/models/MIT/ast-finetuned-audioset-10-10-0.4593"
 
 
11
  headers = {"Authorization": f"Bearer {os.environ.get('HF_TOKEN')}"}
12
 
13
  def format_error(message):
14
  """Helper function to format error messages as JSON"""
15
- return [{"error": message}]
16
 
17
- def classify_audio(audio_file):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
18
  """
19
- Classify the uploaded audio file using Hugging Face AST model
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
20
  """
21
  if audio_file is None:
22
- return format_error("Please upload an audio file.")
23
 
24
  try:
25
- # Debug: Print token status (masked)
26
  token = os.environ.get('HF_TOKEN')
27
  if not token:
28
- return format_error("Error: HF_TOKEN environment variable is not set. Please set your Hugging Face API token.")
29
- print(f"Token present: {'Yes' if token else 'No'}, Token length: {len(token) if token else 0}")
30
-
31
- # Debug: Print audio file info
32
- print(f"Audio file path: {audio_file}")
33
- print(f"Audio file size: {os.path.getsize(audio_file)} bytes")
34
 
 
35
  with open(audio_file, "rb") as f:
36
  data = f.read()
37
 
38
- print("Sending request to Hugging Face API...")
39
- response = requests.post(API_URL, headers=headers, data=data)
40
-
41
- # Print response for debugging
42
- print(f"Response status code: {response.status_code}")
43
- print(f"Response headers: {dict(response.headers)}")
44
- print(f"Response content: {response.content.decode('utf-8', errors='ignore')}")
45
 
46
  if response.status_code == 200:
47
- results = response.json()
48
- # Format results for better readability
49
  formatted_results = []
50
- for result in results:
51
  formatted_results.append({
52
  'label': result['label'],
53
  'score': f"{result['score']*100:.2f}%"
54
  })
55
- return formatted_results
 
 
 
 
 
 
 
 
56
  elif response.status_code == 401:
57
- return format_error("Error: Invalid or missing API token. Please check your Hugging Face API token.")
58
  elif response.status_code == 503:
59
- return format_error("Error: Model is loading. Please try again in a few seconds.")
60
  else:
61
- error_msg = f"Error: API returned status code {response.status_code}\n"
62
- error_msg += f"Response headers: {dict(response.headers)}\n"
63
- error_msg += f"Response: {response.text}"
64
- return format_error(error_msg)
65
 
66
  except Exception as e:
67
  import traceback
68
  error_details = traceback.format_exc()
69
- return format_error(f"Error processing audio: {str(e)}\nDetails:\n{error_details}")
70
 
71
  # Create Gradio interface
72
  iface = gr.Interface(
73
- fn=classify_audio,
74
  inputs=gr.Audio(type="filepath", label="Upload Audio File"),
75
- outputs=gr.JSON(label="Classification Results"),
76
- title="Audio Classification using AST Model",
77
- description="Upload an audio file to get its classification results using the Audio Spectrogram Transformer model.",
 
 
 
 
78
  examples=[],
79
  )
80
 
 
3
  import os
4
  import torch
5
  import json
6
+ from transformers import AutoTokenizer, AutoModelForCausalLM
7
 
8
  # Check if CUDA is available and set the device accordingly
9
  device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
10
 
11
+ # API URLs and headers
12
+ AUDIO_API_URL = "https://api-inference.huggingface.co/models/MIT/ast-finetuned-audioset-10-10-0.4593"
13
+ JANUS_API_URL = "https://api-inference.huggingface.co/models/deepseek-ai/Janus-1.3B"
14
  headers = {"Authorization": f"Bearer {os.environ.get('HF_TOKEN')}"}
15
 
16
  def format_error(message):
17
  """Helper function to format error messages as JSON"""
18
+ return {"error": message}
19
 
20
+ def create_lyrics_prompt(classification_results):
21
+ """Create a prompt for lyrics generation based on classification results"""
22
+ # Get the top genre and its characteristics
23
+ top_result = classification_results[0]
24
+ genre = top_result['label']
25
+ confidence = float(top_result['score'].strip('%')) / 100
26
+
27
+ # Create a detailed prompt
28
+ prompt = f"""Write song lyrics in the style of {genre} music. The song should capture the essence of this genre.
29
+ Additional musical elements detected: {', '.join(r['label'] for r in classification_results[1:3])}
30
+
31
+ Please write creative and original lyrics that:
32
+ 1. Match the {genre} style
33
+ 2. Have a clear structure (verse, chorus)
34
+ 3. Reflect the mood and themes common in this genre
35
+
36
+ Generate the lyrics:
37
  """
38
+ return prompt
39
+
40
+ def generate_lyrics(prompt):
41
+ """Generate lyrics using the Janus model"""
42
+ try:
43
+ response = requests.post(
44
+ JANUS_API_URL,
45
+ headers=headers,
46
+ json={
47
+ "inputs": prompt,
48
+ "parameters": {
49
+ "max_new_tokens": 200,
50
+ "temperature": 0.7,
51
+ "top_p": 0.9,
52
+ "return_full_text": False
53
+ }
54
+ }
55
+ )
56
+
57
+ if response.status_code == 200:
58
+ return response.json()[0]["generated_text"]
59
+ elif response.status_code == 503:
60
+ return "Model is loading. Please try again in a few seconds."
61
+ else:
62
+ return f"Error generating lyrics: {response.text}"
63
+ except Exception as e:
64
+ return f"Error: {str(e)}"
65
+
66
+ def format_results(classification_results, lyrics, prompt):
67
+ """Format the results for display"""
68
+ # Format classification results
69
+ classification_text = "Classification Results:\n"
70
+ for i, result in enumerate(classification_results):
71
+ classification_text += f"{i+1}. {result['label']}: {result['score']}\n"
72
+
73
+ # Format final output
74
+ output = f"""
75
+ {classification_text}
76
+ \n---Generated Lyrics---\n
77
+ {lyrics}
78
+ """
79
+ return output
80
+
81
+ def classify_and_generate(audio_file):
82
+ """
83
+ Classify the audio and generate matching lyrics
84
  """
85
  if audio_file is None:
86
+ return "Please upload an audio file."
87
 
88
  try:
 
89
  token = os.environ.get('HF_TOKEN')
90
  if not token:
91
+ return "Error: HF_TOKEN environment variable is not set. Please set your Hugging Face API token."
 
 
 
 
 
92
 
93
+ # First, classify the audio
94
  with open(audio_file, "rb") as f:
95
  data = f.read()
96
 
97
+ print("Sending request to Audio Classification API...")
98
+ response = requests.post(AUDIO_API_URL, headers=headers, data=data)
 
 
 
 
 
99
 
100
  if response.status_code == 200:
101
+ classification_results = response.json()
102
+ # Format classification results
103
  formatted_results = []
104
+ for result in classification_results:
105
  formatted_results.append({
106
  'label': result['label'],
107
  'score': f"{result['score']*100:.2f}%"
108
  })
109
+
110
+ # Generate lyrics based on classification
111
+ print("Generating lyrics based on classification...")
112
+ prompt = create_lyrics_prompt(formatted_results)
113
+ lyrics = generate_lyrics(prompt)
114
+
115
+ # Format and return results
116
+ return format_results(formatted_results, lyrics, prompt)
117
+
118
  elif response.status_code == 401:
119
+ return "Error: Invalid or missing API token. Please check your Hugging Face API token."
120
  elif response.status_code == 503:
121
+ return "Error: Model is loading. Please try again in a few seconds."
122
  else:
123
+ return f"Error: API returned status code {response.status_code}\nResponse: {response.text}"
 
 
 
124
 
125
  except Exception as e:
126
  import traceback
127
  error_details = traceback.format_exc()
128
+ return f"Error processing request: {str(e)}\nDetails:\n{error_details}"
129
 
130
  # Create Gradio interface
131
  iface = gr.Interface(
132
+ fn=classify_and_generate,
133
  inputs=gr.Audio(type="filepath", label="Upload Audio File"),
134
+ outputs=gr.Textbox(
135
+ label="Results",
136
+ lines=15,
137
+ placeholder="Upload an audio file to see classification results and generated lyrics..."
138
+ ),
139
+ title="Music Genre Classifier + Lyric Generator",
140
+ description="Upload an audio file to classify its genre and generate matching lyrics using AI.",
141
  examples=[],
142
  )
143