yasserrmd commited on
Commit
7db8704
·
verified ·
1 Parent(s): f7390ea

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +16 -5
app.py CHANGED
@@ -7,6 +7,7 @@ import os
7
  from pydantic import BaseModel
8
  from groq import Groq
9
  import io
 
10
 
11
  app = FastAPI()
12
 
@@ -46,13 +47,23 @@ async def speech_to_text(file: UploadFile = File(...)):
46
 
47
  @app.post("/tts/")
48
  def text_to_speech(input_text: TextInput):
49
- instance = ggwave.init()
50
- print("input_text:-")
51
- print(input_text)
52
  """Convert text to a WAV audio file using ggwave and return as response."""
53
- encoded_waveform = ggwave.encode(instance, input_text.text)
 
 
 
 
 
 
 
 
54
  buffer = io.BytesIO()
55
- wav.write(buffer, 44100, np.frombuffer(encoded_waveform, dtype=np.uint8))
 
 
 
 
 
56
  buffer.seek(0)
57
  return Response(content=buffer.getvalue(), media_type="audio/wav")
58
 
 
7
  from pydantic import BaseModel
8
  from groq import Groq
9
  import io
10
+ import wave
11
 
12
  app = FastAPI()
13
 
 
47
 
48
  @app.post("/tts/")
49
  def text_to_speech(input_text: TextInput):
 
 
 
50
  """Convert text to a WAV audio file using ggwave and return as response."""
51
+ encoded_waveform = ggwave.encode(instance, input_text.text.encode('utf-8'), protocolId=1, volume=100)
52
+
53
+ # Convert byte data into float32 array
54
+ waveform_float32 = np.frombuffer(encoded_waveform, dtype=np.float32)
55
+
56
+ # Normalize float32 data to the range of int16
57
+ waveform_int16 = np.int16(waveform_float32 * 32767)
58
+
59
+ # Save to buffer instead of a file
60
  buffer = io.BytesIO()
61
+ with wave.open(buffer, "wb") as wf:
62
+ wf.setnchannels(1) # Mono audio
63
+ wf.setsampwidth(2) # 2 bytes per sample (16-bit PCM)
64
+ wf.setframerate(48000) # Sample rate
65
+ wf.writeframes(waveform_int16.tobytes()) # Write waveform as bytes
66
+
67
  buffer.seek(0)
68
  return Response(content=buffer.getvalue(), media_type="audio/wav")
69