Spaces:

MJobe
/

document-vqa-v2

Sleeping

MJobe commited on Oct 25

Commit

e6db199

•

1 Parent(s): 76bbd8a

Update main.py

Files changed (1) hide show

main.py CHANGED Viewed

@@ -181,12 +181,11 @@ async def transcribe_and_answer(
         # Load audio using torchaudio
         waveform, sample_rate = torchaudio.load(wav_buffer)
-        # Check if waveform is in float32, otherwise convert
-        if waveform.dtype != torch.float32:
-            waveform = waveform.to(torch.float32)
         # Step 4: Transcribe the audio
-        transcription_result = nlp_speech_to_text(waveform)
         transcription_text = transcription_result['text']
         # Step 5: Parse the JSON-formatted questions
@@ -212,7 +211,6 @@ async def transcribe_and_answer(
     except Exception as e:
         return JSONResponse(content={"error": f"Error processing audio or answering questions: {str(e)}"}, status_code=500)
 # Set up CORS middleware
 origins = ["*"]  # or specify your list of allowed origins
 app.add_middleware(

         # Load audio using torchaudio
         waveform, sample_rate = torchaudio.load(wav_buffer)
+        # Convert waveform to float32 and ensure it's a numpy array
+        waveform_np = waveform.numpy().astype(np.float32)
         # Step 4: Transcribe the audio
+        transcription_result = nlp_speech_to_text(waveform_np)
         transcription_text = transcription_result['text']
         # Step 5: Parse the JSON-formatted questions
     except Exception as e:
         return JSONResponse(content={"error": f"Error processing audio or answering questions: {str(e)}"}, status_code=500)
 # Set up CORS middleware
 origins = ["*"]  # or specify your list of allowed origins
 app.add_middleware(