Spaces:

GoodML
/

Dish-Decode-2

Running

App Files Files Community

GoodML commited on Dec 14, 2024

Commit

dd04276

verified ·

1 Parent(s): a42b144

Update app.py

Browse files

Files changed (1) hide show

app.py +46 -5

app.py CHANGED Viewed

@@ -1,13 +1,10 @@
 import os
-import whisper
 import requests
 from flask import Flask, request, jsonify, render_template
 from dotenv import load_dotenv
-from deepgram import DeepgramClient, PrerecordedOptions
 import tempfile
 import json
 import subprocess
-from youtube_transcript_api import YouTubeTranscriptApi
 import warnings
@@ -53,6 +50,50 @@ def download_audio(url, temp_video_path):
         raise Exception(f"Failed to download audio, status code: {response.status_code}")
 @app.route('/process-video', methods=['POST'])
 def process_video():
@@ -67,9 +108,9 @@ def process_video():
         with tempfile.NamedTemporaryFile(delete=False, suffix=".mp4") as temp_video_file:
             temp_video_path = temp_video_file.name
             download_audio(video_url, temp_video_path)
         # Step 2: get the information from the downloaded MP4 file synchronously
-        video_info = get_information_from_video_using_OCR(temp_video_path)
         if not video_info:
             return jsonify({"error": "video information extraction failed"}), 500

 import os
 import requests
 from flask import Flask, request, jsonify, render_template
 from dotenv import load_dotenv
 import tempfile
 import json
 import subprocess
 import warnings
         raise Exception(f"Failed to download audio, status code: {response.status_code}")
+def preprocess_frame(frame):
+    """Preprocess the frame for better OCR accuracy."""
+    gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
+    denoised = cv2.medianBlur(gray, 3)
+    _, thresh = cv2.threshold(denoised, 0, 255, cv2.THRESH_BINARY | cv2.THRESH_OTSU)
+    return thresh
+def clean_ocr_text(text):
+    """Clean the OCR output by removing noise and unwanted characters."""
+    cleaned_text = re.sub(r'[^A-Za-z0-9\s,.!?-]', '', text)
+    cleaned_text = '\n'.join([line.strip() for line in cleaned_text.splitlines() if len(line.strip()) > 2])
+    return cleaned_text
+def get_information_from_video_using_OCR(video_path, interval=1):
+    """Extract text from video frames using OCR and return the combined text content."""
+    cap = cv2.VideoCapture(video_path)
+    fps = int(cap.get(cv2.CAP_PROP_FPS))
+    frame_interval = interval * fps
+    frame_count = 0
+    extracted_text = ""
+    print("Starting text extraction from video...")
+    while cap.isOpened():
+        ret, frame = cap.read()
+        if not ret:
+            break
+        if frame_count % frame_interval == 0:
+            preprocessed_frame = preprocess_frame(frame)
+            text = pytesseract.image_to_string(preprocessed_frame, lang='eng', config='--psm 6 --oem 3')
+            cleaned_text = clean_ocr_text(text)
+            if cleaned_text:
+                extracted_text += cleaned_text + "\n\n"
+                print(f"Text found at frame {frame_count}: {cleaned_text[:50]}...")
+        frame_count += 1
+    cap.release()
+    print("Text extraction completed.")
+    return extracted_text
 @app.route('/process-video', methods=['POST'])
 def process_video():
         with tempfile.NamedTemporaryFile(delete=False, suffix=".mp4") as temp_video_file:
             temp_video_path = temp_video_file.name
             download_audio(video_url, temp_video_path)
+        interval = 1
         # Step 2: get the information from the downloaded MP4 file synchronously
+        video_info = get_information_from_video_using_OCR(temp_video_path, interval)
         if not video_info:
             return jsonify({"error": "video information extraction failed"}), 500