gperdrizet commited on
Commit
8ce33a6
·
verified ·
1 Parent(s): 1b00802

Added audio resampling with Scipy to match default AgentAudio sample rate.

Browse files
Files changed (2) hide show
  1. app.py +7 -2
  2. requirements.txt +2 -2
app.py CHANGED
@@ -6,6 +6,7 @@ import pytz
6
  import yaml
7
  import torch
8
  import soundfile as sf
 
9
  from huggingface_hub import InferenceClient
10
  from tools.final_answer import FinalAnswerTool
11
  from tools.visit_webpage import VisitWebpageTool
@@ -31,9 +32,13 @@ class TextToSpeechTool(Tool):
31
  model="ResembleAI/chatterbox",
32
  )
33
 
34
- audio, _ = sf.read(io.BytesIO(output))
35
 
36
- return torch.from_numpy(audio)
 
 
 
 
37
 
38
 
39
  @tool
 
6
  import yaml
7
  import torch
8
  import soundfile as sf
9
+ from scipy import signal
10
  from huggingface_hub import InferenceClient
11
  from tools.final_answer import FinalAnswerTool
12
  from tools.visit_webpage import VisitWebpageTool
 
32
  model="ResembleAI/chatterbox",
33
  )
34
 
35
+ audio, samplerate = sf.read(io.BytesIO(output))
36
 
37
+ new_samplerate = 16_000
38
+ num_samples = int(len(audio) * new_samplerate / samplerate)
39
+ resampled_audio = signal.resample(audio, num_samples)
40
+
41
+ return torch.from_numpy(resampled_audio)
42
 
43
 
44
  @tool
requirements.txt CHANGED
@@ -4,5 +4,5 @@ requests
4
  duckduckgo_search
5
  pandas
6
  gradio==5.23.1
7
- transformers
8
- soundfile
 
4
  duckduckgo_search
5
  pandas
6
  gradio==5.23.1
7
+ soundfile
8
+ scipy