Spaces:
Edmond98
/
Running on TPU v5e

Edmond7 commited on
Commit
cd03801
1 Parent(s): 2300584

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +30 -11
app.py CHANGED
@@ -3,10 +3,11 @@ import math
3
  import time
4
  import base64
5
  import io
 
6
  from typing import Dict, Any
7
  from functools import wraps
8
 
9
- from fastapi import FastAPI, Depends, HTTPException
10
  from fastapi.encoders import jsonable_encoder
11
  from pydantic import BaseModel
12
  import jax.numpy as jnp
@@ -50,7 +51,7 @@ compile_time = time.time() - start
50
  logger.debug(f"Compiled in {compile_time}s")
51
 
52
  class TranscribeAudioRequest(BaseModel):
53
- audio_base64: str
54
  task: str = "transcribe"
55
  return_timestamps: bool = False
56
 
@@ -68,23 +69,39 @@ def timeit(func):
68
  return result
69
  return wrapper
70
 
 
 
 
 
 
 
71
  @app.post("/transcribe_audio")
72
  @timeit
73
  async def transcribe_chunked_audio(
74
- request: TranscribeAudioRequest
 
 
75
  ) -> Dict[str, Any]:
76
  logger.debug("Starting transcribe_chunked_audio function")
77
- logger.debug(f"Received parameters - task: {request.task}, return_timestamps: {request.return_timestamps}")
78
 
79
  try:
80
- # Decode base64 audio data
81
- audio_data = base64.b64decode(request.audio_base64)
82
- file_size = len(audio_data)
 
 
 
 
 
 
 
 
83
  file_size_mb = file_size / (1024 * 1024)
84
- logger.debug(f"Decoded audio data size: {file_size} bytes ({file_size_mb:.2f}MB)")
85
  except Exception as e:
86
- logger.error(f"Error decoding base64 audio data: {str(e)}", exc_info=True)
87
- raise HTTPException(status_code=400, detail=f"Error decoding base64 audio data: {str(e)}")
88
 
89
  if file_size_mb > FILE_LIMIT_MB:
90
  logger.warning(f"Max file size exceeded: {file_size_mb:.2f}MB > {FILE_LIMIT_MB}MB")
@@ -101,7 +118,9 @@ async def transcribe_chunked_audio(
101
 
102
  logger.debug("Calling tqdm_generate to transcribe audio")
103
  try:
104
- text, runtime, timing_info = tqdm_generate(inputs, task=request.task, return_timestamps=request.return_timestamps)
 
 
105
  logger.debug(f"Transcription completed. Runtime: {runtime:.2f}s")
106
  except Exception as e:
107
  logger.error(f"Error in tqdm_generate: {str(e)}", exc_info=True)
 
3
  import time
4
  import base64
5
  import io
6
+ import os
7
  from typing import Dict, Any
8
  from functools import wraps
9
 
10
+ from fastapi import FastAPI, Depends, HTTPException, File, UploadFile
11
  from fastapi.encoders import jsonable_encoder
12
  from pydantic import BaseModel
13
  import jax.numpy as jnp
 
51
  logger.debug(f"Compiled in {compile_time}s")
52
 
53
  class TranscribeAudioRequest(BaseModel):
54
+ audio_base64: str = None
55
  task: str = "transcribe"
56
  return_timestamps: bool = False
57
 
 
69
  return result
70
  return wrapper
71
 
72
+ def check_api_key():
73
+ api_key = os.environ.get("WHISPER_API_KEY")
74
+ if not api_key:
75
+ raise HTTPException(status_code=401, detail="API key not found in environment variables")
76
+ return api_key
77
+
78
  @app.post("/transcribe_audio")
79
  @timeit
80
  async def transcribe_chunked_audio(
81
+ request: TranscribeAudioRequest = None,
82
+ file: UploadFile = File(None),
83
+ api_key: str = Depends(check_api_key)
84
  ) -> Dict[str, Any]:
85
  logger.debug("Starting transcribe_chunked_audio function")
86
+ logger.debug(f"Received parameters - task: {request.task if request else 'transcribe'}, return_timestamps: {request.return_timestamps if request else False}")
87
 
88
  try:
89
+ if file:
90
+ logger.debug("Processing uploaded file")
91
+ audio_data = await file.read()
92
+ file_size = len(audio_data)
93
+ elif request and request.audio_base64:
94
+ logger.debug("Processing base64 encoded audio")
95
+ audio_data = base64.b64decode(request.audio_base64)
96
+ file_size = len(audio_data)
97
+ else:
98
+ raise HTTPException(status_code=400, detail="No audio data provided")
99
+
100
  file_size_mb = file_size / (1024 * 1024)
101
+ logger.debug(f"Audio data size: {file_size} bytes ({file_size_mb:.2f}MB)")
102
  except Exception as e:
103
+ logger.error(f"Error processing audio data: {str(e)}", exc_info=True)
104
+ raise HTTPException(status_code=400, detail=f"Error processing audio data: {str(e)}")
105
 
106
  if file_size_mb > FILE_LIMIT_MB:
107
  logger.warning(f"Max file size exceeded: {file_size_mb:.2f}MB > {FILE_LIMIT_MB}MB")
 
118
 
119
  logger.debug("Calling tqdm_generate to transcribe audio")
120
  try:
121
+ task = request.task if request else "transcribe"
122
+ return_timestamps = request.return_timestamps if request else False
123
+ text, runtime, timing_info = tqdm_generate(inputs, task=task, return_timestamps=return_timestamps)
124
  logger.debug(f"Transcription completed. Runtime: {runtime:.2f}s")
125
  except Exception as e:
126
  logger.error(f"Error in tqdm_generate: {str(e)}", exc_info=True)