desiree commited on
Commit
c69cd11
·
verified ·
1 Parent(s): 0a24813

Upload app.py

Browse files
Files changed (1) hide show
  1. app.py +14 -4
app.py CHANGED
@@ -8,6 +8,7 @@ import os
8
  import sys
9
  from pathlib import Path
10
  import base64
 
11
 
12
  # Model and Tokenizer Loading
13
  MODEL_ID = "Qwen/Qwen-Audio-Chat"
@@ -38,9 +39,15 @@ def process_audio(audio_path):
38
  # Ensure float32 format
39
  audio_data = audio_data.astype(np.float32)
40
 
41
- # Convert to base64
42
- audio_bytes = sf.write(file=None, data=audio_data, samplerate=sample_rate, format='WAV')
43
- audio_base64 = base64.b64encode(audio_bytes).decode('utf-8')
 
 
 
 
 
 
44
 
45
  print(f"Audio processed successfully. Sample rate: {sample_rate}, Shape: {audio_data.shape}")
46
  return {
@@ -49,6 +56,8 @@ def process_audio(audio_path):
49
  }
50
  except Exception as e:
51
  print(f"Error processing audio: {e}")
 
 
52
  return None
53
 
54
  @spaces.GPU
@@ -142,7 +151,8 @@ demo = gr.Interface(
142
  gr.Audio(
143
  type="filepath",
144
  label="Audio Input",
145
- sources=["upload", "microphone"]
 
146
  ),
147
  gr.Textbox(
148
  label="Question",
 
8
  import sys
9
  from pathlib import Path
10
  import base64
11
+ from io import BytesIO
12
 
13
  # Model and Tokenizer Loading
14
  MODEL_ID = "Qwen/Qwen-Audio-Chat"
 
39
  # Ensure float32 format
40
  audio_data = audio_data.astype(np.float32)
41
 
42
+ # Create in-memory buffer
43
+ audio_buffer = BytesIO()
44
+
45
+ # Write audio to buffer in WAV format
46
+ sf.write(audio_buffer, audio_data, sample_rate, format='WAV')
47
+
48
+ # Get the buffer content and encode to base64
49
+ audio_buffer.seek(0)
50
+ audio_base64 = base64.b64encode(audio_buffer.read()).decode('utf-8')
51
 
52
  print(f"Audio processed successfully. Sample rate: {sample_rate}, Shape: {audio_data.shape}")
53
  return {
 
56
  }
57
  except Exception as e:
58
  print(f"Error processing audio: {e}")
59
+ import traceback
60
+ traceback.print_exc()
61
  return None
62
 
63
  @spaces.GPU
 
151
  gr.Audio(
152
  type="filepath",
153
  label="Audio Input",
154
+ sources=["upload", "microphone"],
155
+ format="mp3" # Specify format to ensure consistent audio format
156
  ),
157
  gr.Textbox(
158
  label="Question",