Spaces:
Edmond98
/
Running on A100

Afrinetwork7 commited on
Commit
5d16050
1 Parent(s): ab322d4

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +81 -20
app.py CHANGED
@@ -11,6 +11,8 @@ import librosa
11
  from pathlib import Path
12
  import magic # For MIME type detection
13
  from pydub import AudioSegment
 
 
14
 
15
  # Import functions from other modules
16
  from asr import transcribe, ASR_LANGUAGES
@@ -22,6 +24,13 @@ from asr import ASR_SAMPLING_RATE
22
  logging.basicConfig(level=logging.INFO)
23
  logger = logging.getLogger(__name__)
24
 
 
 
 
 
 
 
 
25
  app = FastAPI(title="MMS: Scaling Speech Technology to 1000+ languages")
26
 
27
  # Define request models
@@ -71,41 +80,72 @@ async def transcribe_audio(request: AudioRequest):
71
  result = transcribe(audio_array, request.language)
72
  return JSONResponse(content={"transcription": result})
73
  except Exception as e:
74
- logger.error(f"Error in transcribe_audio: {str(e)}")
75
- raise HTTPException(status_code=500, detail=f"An error occurred: {str(e)}")
 
 
 
 
 
 
 
76
 
77
  @app.post("/synthesize")
78
  async def synthesize_speech(request: TTSRequest):
79
  try:
80
- logger.info(f"Synthesizing speech for text: {request.text}, language: {request.language}, speed: {request.speed}")
 
 
81
  result, filtered_text = synthesize(request.text, request.language, request.speed)
82
- logger.info(f"Synthesis complete. Filtered text: {filtered_text}")
 
 
 
 
83
 
84
  sample_rate, audio = result
85
- logger.info(f"Sample rate: {sample_rate}, Audio shape: {audio.shape}, Audio dtype: {audio.dtype}")
86
-
87
- # Ensure audio is a numpy array with the correct dtype
88
  audio = np.array(audio, dtype=np.float32)
 
89
 
90
- # Normalize audio to [-1, 1] range
91
- audio = audio / np.max(np.abs(audio))
 
 
 
 
92
 
93
- # Convert to int16 for WAV file
94
  audio = (audio * 32767).astype(np.int16)
 
95
 
96
- # Convert numpy array to bytes
97
  buffer = io.BytesIO()
98
  sf.write(buffer, audio, sample_rate, format='wav')
99
  buffer.seek(0)
 
100
 
101
- return FileResponse(
 
102
  buffer,
103
  media_type="audio/wav",
104
  headers={"Content-Disposition": "attachment; filename=synthesized_audio.wav"}
105
  )
 
 
 
106
  except Exception as e:
107
- logger.error(f"Error in synthesize_speech: {str(e)}")
108
- raise HTTPException(status_code=500, detail=f"An error occurred: {str(e)}")
 
 
 
 
 
 
 
109
 
110
  @app.post("/identify")
111
  async def identify_language(request: AudioRequest):
@@ -115,21 +155,42 @@ async def identify_language(request: AudioRequest):
115
  result = identify(audio_array)
116
  return JSONResponse(content={"language_identification": result})
117
  except Exception as e:
118
- logger.error(f"Error in identify_language: {str(e)}")
119
- raise HTTPException(status_code=500, detail=f"An error occurred: {str(e)}")
 
 
 
 
 
 
 
120
 
121
  @app.get("/asr_languages")
122
  async def get_asr_languages():
123
  try:
124
  return JSONResponse(content=ASR_LANGUAGES)
125
  except Exception as e:
126
- logger.error(f"Error in get_asr_languages: {str(e)}")
127
- raise HTTPException(status_code=500, detail=f"An error occurred: {str(e)}")
 
 
 
 
 
 
 
128
 
129
  @app.get("/tts_languages")
130
  async def get_tts_languages():
131
  try:
132
  return JSONResponse(content=TTS_LANGUAGES)
133
  except Exception as e:
134
- logger.error(f"Error in get_tts_languages: {str(e)}")
135
- raise HTTPException(status_code=500, detail=f"An error occurred: {str(e)}")
 
 
 
 
 
 
 
 
11
  from pathlib import Path
12
  import magic # For MIME type detection
13
  from pydub import AudioSegment
14
+ import traceback
15
+ from logging.handlers import RotatingFileHandler
16
 
17
  # Import functions from other modules
18
  from asr import transcribe, ASR_LANGUAGES
 
24
  logging.basicConfig(level=logging.INFO)
25
  logger = logging.getLogger(__name__)
26
 
27
+ # Add a file handler
28
+ file_handler = RotatingFileHandler('app.log', maxBytes=10000000, backupCount=5)
29
+ file_handler.setLevel(logging.INFO)
30
+ formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
31
+ file_handler.setFormatter(formatter)
32
+ logger.addHandler(file_handler)
33
+
34
  app = FastAPI(title="MMS: Scaling Speech Technology to 1000+ languages")
35
 
36
  # Define request models
 
80
  result = transcribe(audio_array, request.language)
81
  return JSONResponse(content={"transcription": result})
82
  except Exception as e:
83
+ logger.error(f"Error in transcribe_audio: {str(e)}", exc_info=True)
84
+ error_details = {
85
+ "error": str(e),
86
+ "traceback": traceback.format_exc()
87
+ }
88
+ return JSONResponse(
89
+ status_code=500,
90
+ content={"message": "An error occurred during transcription", "details": error_details}
91
+ )
92
 
93
  @app.post("/synthesize")
94
  async def synthesize_speech(request: TTSRequest):
95
  try:
96
+ logger.info(f"Synthesize request received: text='{request.text}', language='{request.language}', speed={request.speed}")
97
+
98
+ logger.info("Calling synthesize function")
99
  result, filtered_text = synthesize(request.text, request.language, request.speed)
100
+ logger.info(f"Synthesize function completed. Filtered text: '{filtered_text}'")
101
+
102
+ if result is None:
103
+ logger.error("Synthesize function returned None")
104
+ raise ValueError("Synthesis failed to produce audio")
105
 
106
  sample_rate, audio = result
107
+ logger.info(f"Synthesis result: sample_rate={sample_rate}, audio_shape={audio.shape}, audio_dtype={audio.dtype}")
108
+
109
+ logger.info("Converting audio to numpy array")
110
  audio = np.array(audio, dtype=np.float32)
111
+ logger.info(f"Converted audio shape: {audio.shape}, dtype: {audio.dtype}")
112
 
113
+ logger.info("Normalizing audio")
114
+ max_value = np.max(np.abs(audio))
115
+ if max_value == 0:
116
+ logger.warning("Audio array is all zeros")
117
+ audio = audio / max(max_value, 1e-8) # Avoid division by zero
118
+ logger.info(f"Normalized audio range: [{audio.min()}, {audio.max()}]")
119
 
120
+ logger.info("Converting to int16")
121
  audio = (audio * 32767).astype(np.int16)
122
+ logger.info(f"Int16 audio shape: {audio.shape}, dtype: {audio.dtype}")
123
 
124
+ logger.info("Writing audio to buffer")
125
  buffer = io.BytesIO()
126
  sf.write(buffer, audio, sample_rate, format='wav')
127
  buffer.seek(0)
128
+ logger.info(f"Buffer size: {buffer.getbuffer().nbytes} bytes")
129
 
130
+ logger.info("Preparing FileResponse")
131
+ response = FileResponse(
132
  buffer,
133
  media_type="audio/wav",
134
  headers={"Content-Disposition": "attachment; filename=synthesized_audio.wav"}
135
  )
136
+ logger.info("FileResponse prepared successfully")
137
+
138
+ return response
139
  except Exception as e:
140
+ logger.error(f"Error in synthesize_speech: {str(e)}", exc_info=True)
141
+ error_details = {
142
+ "error": str(e),
143
+ "traceback": traceback.format_exc()
144
+ }
145
+ return JSONResponse(
146
+ status_code=500,
147
+ content={"message": "An error occurred during speech synthesis", "details": error_details}
148
+ )
149
 
150
  @app.post("/identify")
151
  async def identify_language(request: AudioRequest):
 
155
  result = identify(audio_array)
156
  return JSONResponse(content={"language_identification": result})
157
  except Exception as e:
158
+ logger.error(f"Error in identify_language: {str(e)}", exc_info=True)
159
+ error_details = {
160
+ "error": str(e),
161
+ "traceback": traceback.format_exc()
162
+ }
163
+ return JSONResponse(
164
+ status_code=500,
165
+ content={"message": "An error occurred during language identification", "details": error_details}
166
+ )
167
 
168
  @app.get("/asr_languages")
169
  async def get_asr_languages():
170
  try:
171
  return JSONResponse(content=ASR_LANGUAGES)
172
  except Exception as e:
173
+ logger.error(f"Error in get_asr_languages: {str(e)}", exc_info=True)
174
+ error_details = {
175
+ "error": str(e),
176
+ "traceback": traceback.format_exc()
177
+ }
178
+ return JSONResponse(
179
+ status_code=500,
180
+ content={"message": "An error occurred while fetching ASR languages", "details": error_details}
181
+ )
182
 
183
  @app.get("/tts_languages")
184
  async def get_tts_languages():
185
  try:
186
  return JSONResponse(content=TTS_LANGUAGES)
187
  except Exception as e:
188
+ logger.error(f"Error in get_tts_languages: {str(e)}", exc_info=True)
189
+ error_details = {
190
+ "error": str(e),
191
+ "traceback": traceback.format_exc()
192
+ }
193
+ return JSONResponse(
194
+ status_code=500,
195
+ content={"message": "An error occurred while fetching TTS languages", "details": error_details}
196
+ )