Spaces:

tejash300
/

docanalyzer

Runtime error

tejash300 commited on Mar 31

Commit

947634e

verified ·

1 Parent(s): 15a9ec1

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -3,8 +3,9 @@ import io
 import torch
 import uvicorn
 import spacy
 import pdfplumber
-import moviepy.editor as mp
 import librosa
 import soundfile as sf
 import matplotlib.pyplot as plt
@@ -240,12 +241,21 @@ def extract_text_from_pdf(pdf_file):
         raise HTTPException(status_code=400, detail=f"PDF extraction failed: {str(e)}")
 def process_video_to_text(video_file_path):
-    """Extract audio from video and convert to text."""
     try:
         print(f"Processing video file at {video_file_path}")
         temp_audio_path = os.path.join("temp", "extracted_audio.wav")
-        video = mp.VideoFileClip(video_file_path)
-        video.audio.write_audiofile(temp_audio_path, codec='pcm_s16le')
         print(f"Audio extracted to {temp_audio_path}")
         result = speech_to_text(temp_audio_path)
         transcript = result["text"]
@@ -725,4 +735,4 @@ if __name__ == "__main__":
         print(f"\n✅ Your API is publicly available at: {public_url}/docs\n")
     else:
         print("\n⚠️ Ngrok setup failed. API will only be available locally.\n")
-    run()

 import torch
 import uvicorn
 import spacy
+import subprocess  # Added for running ffmpeg commands
 import pdfplumber
+# Removed: import moviepy.editor as mp
 import librosa
 import soundfile as sf
 import matplotlib.pyplot as plt
         raise HTTPException(status_code=400, detail=f"PDF extraction failed: {str(e)}")
 def process_video_to_text(video_file_path):
+    """Extract audio from video using ffmpeg and convert to text."""
     try:
         print(f"Processing video file at {video_file_path}")
         temp_audio_path = os.path.join("temp", "extracted_audio.wav")
+        command = [
+            "ffmpeg",
+            "-y",
+            "-i", video_file_path,
+            "-vn",
+            "-acodec", "pcm_s16le",
+            "-ar", "44100",
+            "-ac", "2",
+            temp_audio_path
+        ]
+        subprocess.run(command, check=True)
         print(f"Audio extracted to {temp_audio_path}")
         result = speech_to_text(temp_audio_path)
         transcript = result["text"]
         print(f"\n✅ Your API is publicly available at: {public_url}/docs\n")
     else:
         print("\n⚠️ Ngrok setup failed. API will only be available locally.\n")
+    run()