Spaces:
Running
on
Zero
Running
on
Zero
Upload app.py
Browse files
app.py
CHANGED
@@ -7,28 +7,11 @@ import numpy as np
|
|
7 |
import os
|
8 |
import sys
|
9 |
from pathlib import Path
|
|
|
10 |
|
11 |
# Model and Tokenizer Loading
|
12 |
MODEL_ID = "Qwen/Qwen-Audio-Chat"
|
13 |
|
14 |
-
# Add the model's directory to sys.path to import its audio module
|
15 |
-
def setup_audio_module():
|
16 |
-
try:
|
17 |
-
from huggingface_hub import snapshot_download
|
18 |
-
|
19 |
-
# Download the model files
|
20 |
-
model_path = snapshot_download(MODEL_ID)
|
21 |
-
if model_path not in sys.path:
|
22 |
-
sys.path.append(model_path)
|
23 |
-
|
24 |
-
# Now we can import the audio module
|
25 |
-
global Audio
|
26 |
-
from audio import Audio
|
27 |
-
return True
|
28 |
-
except Exception as e:
|
29 |
-
print(f"Error setting up audio module: {e}")
|
30 |
-
return False
|
31 |
-
|
32 |
def load_model():
|
33 |
print("Loading model and tokenizer...")
|
34 |
model = AutoModelForCausalLM.from_pretrained(
|
@@ -42,15 +25,28 @@ def load_model():
|
|
42 |
return model, tokenizer
|
43 |
|
44 |
def process_audio(audio_path):
|
45 |
-
"""Process audio file
|
46 |
try:
|
47 |
print(f"Processing audio file: {audio_path}")
|
48 |
-
#
|
49 |
-
|
50 |
-
|
51 |
-
|
52 |
-
|
53 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
54 |
except Exception as e:
|
55 |
print(f"Error processing audio: {e}")
|
56 |
return None
|
@@ -74,13 +70,9 @@ def analyze_audio(audio_path: str, question: str = None) -> str:
|
|
74 |
if not os.path.exists(audio_path):
|
75 |
return f"Audio file not found: {audio_path}"
|
76 |
|
77 |
-
# Setup audio module
|
78 |
-
if not setup_audio_module():
|
79 |
-
return "Failed to initialize audio processing module."
|
80 |
-
|
81 |
# Process audio
|
82 |
-
|
83 |
-
if
|
84 |
return "Failed to process the audio file. Please ensure it's a valid audio format."
|
85 |
|
86 |
try:
|
@@ -94,13 +86,14 @@ def analyze_audio(audio_path: str, question: str = None) -> str:
|
|
94 |
"content": [
|
95 |
{
|
96 |
"type": "audio",
|
97 |
-
"
|
|
|
98 |
},
|
99 |
{
|
100 |
"type": "text",
|
101 |
-
"text": query
|
102 |
-
}
|
103 |
-
]
|
104 |
}
|
105 |
]
|
106 |
|
|
|
7 |
import os
|
8 |
import sys
|
9 |
from pathlib import Path
|
10 |
+
import base64
|
11 |
|
12 |
# Model and Tokenizer Loading
|
13 |
MODEL_ID = "Qwen/Qwen-Audio-Chat"
|
14 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
15 |
def load_model():
|
16 |
print("Loading model and tokenizer...")
|
17 |
model = AutoModelForCausalLM.from_pretrained(
|
|
|
25 |
return model, tokenizer
|
26 |
|
27 |
def process_audio(audio_path):
|
28 |
+
"""Process audio file for the model."""
|
29 |
try:
|
30 |
print(f"Processing audio file: {audio_path}")
|
31 |
+
# Read audio file
|
32 |
+
audio_data, sample_rate = sf.read(audio_path)
|
33 |
+
|
34 |
+
# Convert to mono if stereo
|
35 |
+
if len(audio_data.shape) > 1:
|
36 |
+
audio_data = audio_data.mean(axis=1)
|
37 |
+
|
38 |
+
# Ensure float32 format
|
39 |
+
audio_data = audio_data.astype(np.float32)
|
40 |
+
|
41 |
+
# Convert to base64
|
42 |
+
audio_bytes = sf.write(file=None, data=audio_data, samplerate=sample_rate, format='WAV')
|
43 |
+
audio_base64 = base64.b64encode(audio_bytes).decode('utf-8')
|
44 |
+
|
45 |
+
print(f"Audio processed successfully. Sample rate: {sample_rate}, Shape: {audio_data.shape}")
|
46 |
+
return {
|
47 |
+
"audio": audio_base64,
|
48 |
+
"sampling_rate": sample_rate
|
49 |
+
}
|
50 |
except Exception as e:
|
51 |
print(f"Error processing audio: {e}")
|
52 |
return None
|
|
|
70 |
if not os.path.exists(audio_path):
|
71 |
return f"Audio file not found: {audio_path}"
|
72 |
|
|
|
|
|
|
|
|
|
73 |
# Process audio
|
74 |
+
audio_data = process_audio(audio_path)
|
75 |
+
if audio_data is None:
|
76 |
return "Failed to process the audio file. Please ensure it's a valid audio format."
|
77 |
|
78 |
try:
|
|
|
86 |
"content": [
|
87 |
{
|
88 |
"type": "audio",
|
89 |
+
"data": audio_data["audio"],
|
90 |
+
"sampling_rate": audio_data["sampling_rate"]
|
91 |
},
|
92 |
{
|
93 |
"type": "text",
|
94 |
+
"text": query
|
95 |
+
}
|
96 |
+
]
|
97 |
}
|
98 |
]
|
99 |
|