fixed processing through link or audio bytes

Browse files

Files changed (8) hide show

downloader_manager.py +27 -22
file_processor.py +43 -0
flie_processor.py +0 -23
handler.py +58 -8
mediaoutput.py +7 -4
requirements.txt +0 -0
sorter.py +1 -10
video_getter.py +119 -0

downloader_manager.py CHANGED Viewed

@@ -1,5 +1,4 @@
 import logging
-import os
 import tempfile
 from io import BytesIO
@@ -8,43 +7,49 @@ from moviepy.editor import VideoFileClip
 from tqdm import tqdm
 def download_mp4_and_extract_audio(link: str):
-    """Download an MP4 file from a given link and return the video and audio content as bytes."""
     logging.info("Starting the download of the MP4 file...")
     try:
         r = requests.get(link, stream=True)
         r.raise_for_status()
         total_size = int(r.headers.get('content-length', 0))
-        video_content = BytesIO()
         with tqdm(total=total_size, unit='B', unit_scale=True, desc="Downloading...") as bar:
             for data in r.iter_content(chunk_size=1024):
                 bar.update(len(data))
                 video_content.write(data)
-            with tempfile.NamedTemporaryFile(suffix=".mp4", delete=False) as temp_video_file:
-                temp_video_file.write(video_content.getvalue())
-                temp_video_file_path = temp_video_file.name
-            logging.info("Extracting audio from video...")
-            with VideoFileClip(temp_video_file_path) as video:
-                audio = video.audio
-                with tempfile.NamedTemporaryFile(suffix=".aac", delete=False) as temp_audio_file:
-                    audio.write_audiofile(temp_audio_file.name, codec='aac')
-                    temp_audio_file_path = temp_audio_file.name
-                with open(temp_audio_file_path, 'rb') as f:
-                    audio_content = BytesIO(f.read())
-            os.remove(temp_video_file_path)
-            os.remove(temp_audio_file_path)
-            logging.info("Download and audio extraction completed")
-            return video_content.getvalue(), audio_content.getvalue()
-    except requests.exceptions.HTTPError as e:
-        logging.error(f"HTTP Error: {e}")
     except Exception as e:
         logging.error(f"Failed to download MP4 and extract audio: {e}")
         return None, None

 import logging
 import tempfile
 from io import BytesIO
 from tqdm import tqdm
+def extract_audio(video_path):
+    try:
+        with VideoFileClip(video_path) as video:
+            audio = video.audio
+            audio_path = video_path.replace('.mp4', '.mp3')
+            audio.write_audiofile(audio_path, codec='mp3')
+        return audio_path
+    except Exception as e:
+        logging.error(f"Failed to extract audio: {e}")
+        return None
 def download_mp4_and_extract_audio(link: str):
+    """Download an MP4 file from a given link and return the path to the video and audio files."""
     logging.info("Starting the download of the MP4 file...")
+    video_content = BytesIO()
     try:
         r = requests.get(link, stream=True)
         r.raise_for_status()
         total_size = int(r.headers.get('content-length', 0))
         with tqdm(total=total_size, unit='B', unit_scale=True, desc="Downloading...") as bar:
             for data in r.iter_content(chunk_size=1024):
                 bar.update(len(data))
                 video_content.write(data)
+            video_content.seek(0)
+        with tempfile.NamedTemporaryFile(suffix=".mp4", delete=False) as temp_video_file:
+            temp_video_file.write(video_content.getvalue())
+            temp_video_file_path = temp_video_file.name
+        logging.info("Starting audio extraction in a separate process...")
+        audio_path = extract_audio(temp_video_file_path)
+        logging.info("Download and audio extraction completed")
+        return temp_video_file_path, audio_path
+    except requests.exceptions.RequestException as e:
+        logging.error(f"Request Error: {e}")
+        return None, None
     except Exception as e:
         logging.error(f"Failed to download MP4 and extract audio: {e}")
         return None, None
+    finally:
+        video_content.close()

file_processor.py ADDED Viewed

	@@ -0,0 +1,43 @@

+import argparse
+import logging
+import os
+import sources
+from detector import Detector
+from downloader_manager import download_mp4_and_extract_audio
+from sorter import SlideSorter
+def process_video(link):
+    try:
+        temp_video_file_path, audio_path = download_mp4_and_extract_audio(link)
+        detector = Detector(temp_video_file_path)
+        detected_slides = detector.detect_slides()
+        sorter = SlideSorter(sources.ListSource(detected_slides))
+        sorted_slides = sorter.sort()
+        os.remove(temp_video_file_path)
+        return sorted_slides, audio_path
+    except Exception as e:
+        logging.exception("Failed to execute process_video: %s", e)
+        return None, None
+if __name__ == '__main__':
+    Parser = argparse.ArgumentParser(description="File Processor")
+    Parser.add_argument("-p", "--link")
+    Args = Parser.parse_args()
+    try:
+        slides, path = process_video(Args.link)
+        if slides is not None and path is not None:
+            print("Video processed successfully.")
+        else:
+            print("Failed to process the video.")
+    except Exception as e:
+        logging.exception("An error occurred in main: %s", e)

flie_processor.py DELETED Viewed

@@ -1,23 +0,0 @@
-import logging
-import tempfile
-import sources
-from detector import Detector
-from downloader_manager import download_mp4_and_extract_audio
-from sorter import SlideSorter
-def process_video(link):
-    try:
-        video_bytes, audio_bytes = download_mp4_and_extract_audio(link)
-        with tempfile.NamedTemporaryFile(suffix=".mp4", delete=False) as temp_video:
-            temp_video.write(video_bytes)
-            temp_video_path = temp_video.name
-        detector = Detector(temp_video_path)
-        sorter = SlideSorter(sources.ListSource(detector.detect_slides()), outpath="sorted_slides/")
-        slides = sorter.sort()
-        return slides, audio_bytes
-    except Exception as e:
-        logging.exception("Failed to execute sorter: %s", e)

handler.py CHANGED Viewed

@@ -1,12 +1,30 @@
 import base64
 import io
 import logging
 from faster_whisper import WhisperModel
-from flie_processor import process_video
-logging.basicConfig(level=logging.DEBUG, format='%(asctime)s - %(levelname)s - %(message)s')
 class EndpointHandler:
@@ -15,16 +33,15 @@ class EndpointHandler:
     def __call__(self, data: dict[str, str]):
         inputs = data.pop("inputs")
-        link = data.pop("link")
         language = data.pop("language", "de")
         task = data.pop("task", "transcribe")
-        processing_type = data.pop("type", "audio")
         response = {}
-        if processing_type == "link":
-            slides, audio_bytes = process_video(link)
             slides_list = [slide.to_dict() for slide in slides]
             response.update({"slides": slides_list})
         else:
@@ -32,7 +49,6 @@ class EndpointHandler:
             logging.debug(f"Decoded Bytes Length: {len(audio_bytes_decoded)}")
             audio_bytes = io.BytesIO(audio_bytes_decoded)
-        # run inference pipeline
         logging.info("Running inference...")
         segments, info = self.model.transcribe(audio_bytes, language=language, task=task)
@@ -52,4 +68,38 @@ class EndpointHandler:
         response.update({"audios": full_text})
         logging.debug(response)
         return response

+import argparse
 import base64
 import io
 import logging
+import os
 from faster_whisper import WhisperModel
+from pydub import AudioSegment
+from file_processor import process_video
+logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
+def is_cdn_link(link_or_bytes):
+    logging.info("Checking if the provided link is a CDN link...")
+    if isinstance(link_or_bytes, bytes):
+        return False
+    return True
+def get_audio_bytes(audio_path):
+    audio = AudioSegment.from_file(audio_path)
+    buffer = io.BytesIO()
+    audio.export(buffer, format='mp3')
+    buffer.seek(0)
+    return buffer
 class EndpointHandler:
     def __call__(self, data: dict[str, str]):
         inputs = data.pop("inputs")
         language = data.pop("language", "de")
         task = data.pop("task", "transcribe")
         response = {}
+        audio_path = None
+        if is_cdn_link(inputs):
+            slides, audio_path = process_video(inputs)
+            audio_bytes = get_audio_bytes(audio_path)
             slides_list = [slide.to_dict() for slide in slides]
             response.update({"slides": slides_list})
         else:
             logging.debug(f"Decoded Bytes Length: {len(audio_bytes_decoded)}")
             audio_bytes = io.BytesIO(audio_bytes_decoded)
         logging.info("Running inference...")
         segments, info = self.model.transcribe(audio_bytes, language=language, task=task)
         response.update({"audios": full_text})
         logging.debug(response)
+        if audio_path:
+            os.remove(audio_path)
         return response
+if __name__ == '__main__':
+    Parser = argparse.ArgumentParser(description="EndpointHandler")
+    Parser.add_argument("-p", "--path")
+    Parser.add_argument("-l", "--language", default="de")
+    Parser.add_argument("-t", "--task", default="transcribe")
+    Parser.add_argument("--type", default="video")
+    Args = Parser.parse_args()
+    handler = EndpointHandler()
+    # Args.path = r"C:\Users\mbabu\AppData\Local\Temp\tmpsezkw2i5.mp3"
+    # Args.path = "https://rr2---sn-4g5lzned.googlevideo.com/videoplayback?expire=1703474038&ei=Fp-IZeagJPaii9oPn4i3wAo&ip=195.146.4.71&id=o-ABVj7-vfJlewWZrzUMc466iPpKFkv2lNf6sHtRJ3F7s7&itag=22&source=youtube&requiressl=yes&xpc=EgVo2aDSNQ%3D%3D&mh=O5&mm=31%2C29&mn=sn-4g5lzned%2Csn-4g5ednsr&ms=au%2Crdu&mv=m&mvi=2&pl=25&initcwndbps=503750&spc=UWF9f8xjKc9m59EXQeaCQlNJLtGZUcI&vprv=1&svpuc=1&mime=video%2Fmp4&cnr=14&ratebypass=yes&dur=108.135&lmt=1701260382315374&mt=1703452143&fvip=1&fexp=24007246&c=ANDROID&txp=5308224&sparams=expire%2Cei%2Cip%2Cid%2Citag%2Csource%2Crequiressl%2Cxpc%2Cspc%2Cvprv%2Csvpuc%2Cmime%2Ccnr%2Cratebypass%2Cdur%2Clmt&sig=AJfQdSswRQIgP2osevilYOsP2YAHKqjlgQZVbT_UEzktBYVyOLUg4QgCIQDJdcRA_SOMBhwrMMVAkGkVyoho7rm99Y-io9xs1cVEsg%3D%3D&lsparams=mh%2Cmm%2Cmn%2Cms%2Cmv%2Cmvi%2Cpl%2Cinitcwndbps&lsig=AAO5W4owRgIhAJZ4g0mJIJzQ-5nvrNk5hdZQDzCfvifDuWfDXUu1tS0QAiEAuo-TgZtkwGUxLejKR7J_f2jU-aCV2pS8kx4Yl8zOQBc%3D&title=An%20alle%20Lehrkr%C3%A4fte%3A%20Finanzielle%20Bildung%20mit%20Daniel%20Jung%20%F0%9F%8E%93"
+    if is_cdn_link(Args.path):
+        test_inputs = Args.path
+    else:
+        audio = AudioSegment.from_mp3(Args.path)
+        buffer = io.BytesIO()
+        audio.export(buffer, format="mp3")
+        mp3_bytes = buffer.getvalue()
+        test_inputs = base64.b64encode(mp3_bytes)
+    sample_data = {
+        "inputs": test_inputs,
+        "language": Args.language,
+        "task": Args.task,
+    }
+    test = handler(sample_data)
+    print(test)

mediaoutput.py CHANGED Viewed

@@ -1,9 +1,10 @@
-from abc import ABCMeta, abstractmethod
 import datetime
-import cv2
 import math
 import os
-import errno
 class MediaWriter(object):
@@ -70,6 +71,7 @@ class CustomImageWriter(ImageWriter):
     Image Writer that uses a custom name. It takes it as the first
     argument in *args in the write method.
     """
     def __init__(self, prefix=None, file_format='.jpg'):
         """
         Default initializer
@@ -144,6 +146,7 @@ class TimetableWriter(MediaWriter):
     the IncrementalImageWriter. Additionally it outputs a ".txt"
     document containing the slide name and their appearances.
     """
     def __init__(self, output_dir, timetable_loc, file_format):
         """
         Default initializer
@@ -167,7 +170,6 @@ class TimetableWriter(MediaWriter):
             self.txt_writer.write("Slide %d: %s\n" % (i, appearances))
             i += 1
     def close(self):
         self.timetable.close()
@@ -179,6 +181,7 @@ class TextWriter(MediaWriter):
     def write(self, content, *args):
         self.output_file.write(content)
 def setup_dirs(path):
     """
     Takes a path and makes sure that directories to the path

 import datetime
+import errno
 import math
 import os
+from abc import ABCMeta, abstractmethod
+import cv2
 class MediaWriter(object):
     Image Writer that uses a custom name. It takes it as the first
     argument in *args in the write method.
     """
     def __init__(self, prefix=None, file_format='.jpg'):
         """
         Default initializer
     the IncrementalImageWriter. Additionally it outputs a ".txt"
     document containing the slide name and their appearances.
     """
     def __init__(self, output_dir, timetable_loc, file_format):
         """
         Default initializer
             self.txt_writer.write("Slide %d: %s\n" % (i, appearances))
             i += 1
     def close(self):
         self.timetable.close()
     def write(self, content, *args):
         self.output_file.write(content)
 def setup_dirs(path):
     """
     Takes a path and makes sure that directories to the path

requirements.txt CHANGED Viewed

Binary files a/requirements.txt and b/requirements.txt differ

sorter.py CHANGED Viewed

@@ -16,8 +16,7 @@ class SlideSorter(Analyzer):
     Sorts the slides according to their timestamp.
     """
-    def __init__(self, source, outpath=None, timetable_loc=None, file_format=".png",
-                 comparator=ic.AbsDiffHistComparator(0.98)):
         """
         Default initializer
         :param path: the path where the slides are located on disk
@@ -25,12 +24,6 @@ class SlideSorter(Analyzer):
         are duplicates.
         """
         self.comparator = comparator
-        self.writer = mediaoutput.NullWriter()
-        if outpath is not None:
-            if timetable_loc is None:
-                timetable_loc = os.path.join(outpath, 'timetable.txt')
-            self.file_format = file_format
-            self.writer = mediaoutput.TimetableWriter(outpath, timetable_loc, self.file_format)
         self.source = source
     def sort(self):
@@ -81,8 +74,6 @@ class SlideSorter(Analyzer):
                 sorted_slides.append(slide)
                 page_counter += 1
             loop_counter += 1
-        self.writer.write(sorted_slides)
-        self.writer.close()
     def analyze(self):
         for _, slide in self.group_slides():

     Sorts the slides according to their timestamp.
     """
+    def __init__(self, source, comparator=ic.AbsDiffHistComparator(0.98)):
         """
         Default initializer
         :param path: the path where the slides are located on disk
         are duplicates.
         """
         self.comparator = comparator
         self.source = source
     def sort(self):
                 sorted_slides.append(slide)
                 page_counter += 1
             loop_counter += 1
     def analyze(self):
         for _, slide in self.group_slides():

video_getter.py ADDED Viewed

	@@ -0,0 +1,119 @@

+import logging
+import multiprocessing
+import threading
+import time
+import cv2
+import imgcomparison
+from detector import InfiniteCounter
+logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
+class VideoGet:
+    def __init__(self, src, segments=multiprocessing.cpu_count()):
+        self.device = src
+        self.results = []
+        self.results_lock = threading.Lock()
+        self.total_frames = int(cv2.VideoCapture(src).get(cv2.CAP_PROP_FRAME_COUNT))
+        self.segment_length = self.total_frames // segments
+        self.segments = self._split_into_segments()
+        self.threads = []
+        self.comparator = imgcomparison.AbsDiffHistComparator(0.99)
+        logging.info(f"VideoGet initialized with {segments} segments")
+    def _split_into_segments(self):
+        segments = []
+        for start_frame in range(0, self.total_frames, self.segment_length):
+            end_frame = min(start_frame + self.segment_length, self.total_frames)
+            segments.append((start_frame, end_frame))
+        logging.info(f"Video split into {len(segments)} segments")
+        return segments
+    def start(self):
+        logging.info("Starting video processing")
+        for segment in self.segments:
+            thread = threading.Thread(target=self.process_segment, args=(segment,))
+            thread.start()
+            self.threads.append(thread)
+    def check_transition(self, local_stream):
+        _, prev_frame = local_stream.read()
+        if prev_frame is None:
+            logging.warning(f"{threading.current_thread().name} | Initial frame is None")
+            return
+        yield 0, prev_frame
+        frame_counter = InfiniteCounter()
+        for frame_count in frame_counter.count():
+            _, frame = local_stream.read()
+            if frame is None:
+                logging.info(f"{threading.current_thread().name} | End of segment reached")
+                break
+            elif not self.comparator.are_same(prev_frame, frame):
+                logging.info(f"{threading.current_thread().name} | Transition detected at frame {frame_count}")
+                while True:
+                    if self.comparator.are_same(prev_frame, frame):
+                        break
+                    prev_frame = frame
+                    _, frame = local_stream.read()
+                    frame_counter.increment()
+                yield frame_count, frame
+            prev_frame = frame
+            yield frame_count, None
+    def process_segment(self, segment):
+        start_frame, end_frame = segment
+        logging.info(f"{threading.current_thread().name} | Processing segment: Start frame {start_frame}, End frame {end_frame}")
+        local_stream = cv2.VideoCapture(self.device)
+        local_stream.set(cv2.CAP_PROP_POS_FRAMES, start_frame)
+        qualifying_frames = []
+        last_transition_frame = start_frame
+        for transition_frame, frame in self.check_transition(local_stream):
+            if transition_frame is not None and last_transition_frame < end_frame:
+                while last_transition_frame <= transition_frame and last_transition_frame < end_frame:
+                    grabbed, current_frame = local_stream.read()
+                    if not grabbed:
+                        break
+                    qualifying_frames.append(current_frame)
+                    last_transition_frame += 1
+            if transition_frame is None or transition_frame >= end_frame:
+                break
+        while last_transition_frame < end_frame:
+            grabbed, frame = local_stream.read()
+            if not grabbed:
+                break
+            qualifying_frames.append(frame)
+            last_transition_frame += 1
+        local_stream.release()
+        logging.info(f"{threading.current_thread().name} | Segment processed. Start frame: {start_frame}, End frame: {end_frame}")
+        with self.results_lock:
+            self.results.append(qualifying_frames)
+    def stop(self):
+        for thread in self.threads:
+            thread.join()
+            logging.info("Thread joined")
+        logging.info("Stopping video processing")
+if __name__ == '__main__':
+    start_time = time.time()
+    video_get = VideoGet()
+    video_get.start()
+    video_get.stop()
+    end_time = time.time()
+    total_time = end_time - start_time
+    logging.info(f"Total video processing time: {total_time} seconds")