tejash300 commited on
Commit
947634e
·
verified ·
1 Parent(s): 15a9ec1

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +15 -5
app.py CHANGED
@@ -3,8 +3,9 @@ import io
3
  import torch
4
  import uvicorn
5
  import spacy
 
6
  import pdfplumber
7
- import moviepy.editor as mp
8
  import librosa
9
  import soundfile as sf
10
  import matplotlib.pyplot as plt
@@ -240,12 +241,21 @@ def extract_text_from_pdf(pdf_file):
240
  raise HTTPException(status_code=400, detail=f"PDF extraction failed: {str(e)}")
241
 
242
  def process_video_to_text(video_file_path):
243
- """Extract audio from video and convert to text."""
244
  try:
245
  print(f"Processing video file at {video_file_path}")
246
  temp_audio_path = os.path.join("temp", "extracted_audio.wav")
247
- video = mp.VideoFileClip(video_file_path)
248
- video.audio.write_audiofile(temp_audio_path, codec='pcm_s16le')
 
 
 
 
 
 
 
 
 
249
  print(f"Audio extracted to {temp_audio_path}")
250
  result = speech_to_text(temp_audio_path)
251
  transcript = result["text"]
@@ -725,4 +735,4 @@ if __name__ == "__main__":
725
  print(f"\n✅ Your API is publicly available at: {public_url}/docs\n")
726
  else:
727
  print("\n⚠️ Ngrok setup failed. API will only be available locally.\n")
728
- run()
 
3
  import torch
4
  import uvicorn
5
  import spacy
6
+ import subprocess # Added for running ffmpeg commands
7
  import pdfplumber
8
+ # Removed: import moviepy.editor as mp
9
  import librosa
10
  import soundfile as sf
11
  import matplotlib.pyplot as plt
 
241
  raise HTTPException(status_code=400, detail=f"PDF extraction failed: {str(e)}")
242
 
243
  def process_video_to_text(video_file_path):
244
+ """Extract audio from video using ffmpeg and convert to text."""
245
  try:
246
  print(f"Processing video file at {video_file_path}")
247
  temp_audio_path = os.path.join("temp", "extracted_audio.wav")
248
+ command = [
249
+ "ffmpeg",
250
+ "-y",
251
+ "-i", video_file_path,
252
+ "-vn",
253
+ "-acodec", "pcm_s16le",
254
+ "-ar", "44100",
255
+ "-ac", "2",
256
+ temp_audio_path
257
+ ]
258
+ subprocess.run(command, check=True)
259
  print(f"Audio extracted to {temp_audio_path}")
260
  result = speech_to_text(temp_audio_path)
261
  transcript = result["text"]
 
735
  print(f"\n✅ Your API is publicly available at: {public_url}/docs\n")
736
  else:
737
  print("\n⚠️ Ngrok setup failed. API will only be available locally.\n")
738
+ run()