ManBib commited on
Commit
2b16bc4
·
1 Parent(s): 01b7baf

fixed processing through link or audio bytes

Browse files
Files changed (8) hide show
  1. downloader_manager.py +27 -22
  2. file_processor.py +43 -0
  3. flie_processor.py +0 -23
  4. handler.py +58 -8
  5. mediaoutput.py +7 -4
  6. requirements.txt +0 -0
  7. sorter.py +1 -10
  8. video_getter.py +119 -0
downloader_manager.py CHANGED
@@ -1,5 +1,4 @@
1
  import logging
2
- import os
3
  import tempfile
4
  from io import BytesIO
5
 
@@ -8,43 +7,49 @@ from moviepy.editor import VideoFileClip
8
  from tqdm import tqdm
9
 
10
 
 
 
 
 
 
 
 
 
 
 
 
 
11
  def download_mp4_and_extract_audio(link: str):
12
- """Download an MP4 file from a given link and return the video and audio content as bytes."""
13
  logging.info("Starting the download of the MP4 file...")
 
14
  try:
15
  r = requests.get(link, stream=True)
16
  r.raise_for_status()
17
 
18
  total_size = int(r.headers.get('content-length', 0))
19
- video_content = BytesIO()
20
 
21
  with tqdm(total=total_size, unit='B', unit_scale=True, desc="Downloading...") as bar:
22
  for data in r.iter_content(chunk_size=1024):
23
  bar.update(len(data))
24
  video_content.write(data)
 
25
 
26
- with tempfile.NamedTemporaryFile(suffix=".mp4", delete=False) as temp_video_file:
27
- temp_video_file.write(video_content.getvalue())
28
- temp_video_file_path = temp_video_file.name
29
 
30
- logging.info("Extracting audio from video...")
31
- with VideoFileClip(temp_video_file_path) as video:
32
- audio = video.audio
33
 
34
- with tempfile.NamedTemporaryFile(suffix=".aac", delete=False) as temp_audio_file:
35
- audio.write_audiofile(temp_audio_file.name, codec='aac')
36
- temp_audio_file_path = temp_audio_file.name
37
 
38
- with open(temp_audio_file_path, 'rb') as f:
39
- audio_content = BytesIO(f.read())
40
-
41
- os.remove(temp_video_file_path)
42
- os.remove(temp_audio_file_path)
43
-
44
- logging.info("Download and audio extraction completed")
45
- return video_content.getvalue(), audio_content.getvalue()
46
- except requests.exceptions.HTTPError as e:
47
- logging.error(f"HTTP Error: {e}")
48
  except Exception as e:
49
  logging.error(f"Failed to download MP4 and extract audio: {e}")
50
  return None, None
 
 
 
1
  import logging
 
2
  import tempfile
3
  from io import BytesIO
4
 
 
7
  from tqdm import tqdm
8
 
9
 
10
+ def extract_audio(video_path):
11
+ try:
12
+ with VideoFileClip(video_path) as video:
13
+ audio = video.audio
14
+ audio_path = video_path.replace('.mp4', '.mp3')
15
+ audio.write_audiofile(audio_path, codec='mp3')
16
+ return audio_path
17
+ except Exception as e:
18
+ logging.error(f"Failed to extract audio: {e}")
19
+ return None
20
+
21
+
22
  def download_mp4_and_extract_audio(link: str):
23
+ """Download an MP4 file from a given link and return the path to the video and audio files."""
24
  logging.info("Starting the download of the MP4 file...")
25
+ video_content = BytesIO()
26
  try:
27
  r = requests.get(link, stream=True)
28
  r.raise_for_status()
29
 
30
  total_size = int(r.headers.get('content-length', 0))
 
31
 
32
  with tqdm(total=total_size, unit='B', unit_scale=True, desc="Downloading...") as bar:
33
  for data in r.iter_content(chunk_size=1024):
34
  bar.update(len(data))
35
  video_content.write(data)
36
+ video_content.seek(0)
37
 
38
+ with tempfile.NamedTemporaryFile(suffix=".mp4", delete=False) as temp_video_file:
39
+ temp_video_file.write(video_content.getvalue())
40
+ temp_video_file_path = temp_video_file.name
41
 
42
+ logging.info("Starting audio extraction in a separate process...")
43
+ audio_path = extract_audio(temp_video_file_path)
 
44
 
45
+ logging.info("Download and audio extraction completed")
46
+ return temp_video_file_path, audio_path
 
47
 
48
+ except requests.exceptions.RequestException as e:
49
+ logging.error(f"Request Error: {e}")
50
+ return None, None
 
 
 
 
 
 
 
51
  except Exception as e:
52
  logging.error(f"Failed to download MP4 and extract audio: {e}")
53
  return None, None
54
+ finally:
55
+ video_content.close()
file_processor.py ADDED
@@ -0,0 +1,43 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import argparse
2
+ import logging
3
+ import os
4
+
5
+ import sources
6
+ from detector import Detector
7
+ from downloader_manager import download_mp4_and_extract_audio
8
+ from sorter import SlideSorter
9
+
10
+
11
+ def process_video(link):
12
+ try:
13
+ temp_video_file_path, audio_path = download_mp4_and_extract_audio(link)
14
+
15
+ detector = Detector(temp_video_file_path)
16
+ detected_slides = detector.detect_slides()
17
+
18
+ sorter = SlideSorter(sources.ListSource(detected_slides))
19
+ sorted_slides = sorter.sort()
20
+
21
+ os.remove(temp_video_file_path)
22
+ return sorted_slides, audio_path
23
+
24
+ except Exception as e:
25
+ logging.exception("Failed to execute process_video: %s", e)
26
+ return None, None
27
+
28
+
29
+ if __name__ == '__main__':
30
+ Parser = argparse.ArgumentParser(description="File Processor")
31
+ Parser.add_argument("-p", "--link")
32
+ Args = Parser.parse_args()
33
+
34
+ try:
35
+ slides, path = process_video(Args.link)
36
+
37
+ if slides is not None and path is not None:
38
+ print("Video processed successfully.")
39
+ else:
40
+ print("Failed to process the video.")
41
+
42
+ except Exception as e:
43
+ logging.exception("An error occurred in main: %s", e)
flie_processor.py DELETED
@@ -1,23 +0,0 @@
1
- import logging
2
- import tempfile
3
-
4
- import sources
5
- from detector import Detector
6
- from downloader_manager import download_mp4_and_extract_audio
7
- from sorter import SlideSorter
8
-
9
-
10
- def process_video(link):
11
- try:
12
- video_bytes, audio_bytes = download_mp4_and_extract_audio(link)
13
-
14
- with tempfile.NamedTemporaryFile(suffix=".mp4", delete=False) as temp_video:
15
- temp_video.write(video_bytes)
16
- temp_video_path = temp_video.name
17
-
18
- detector = Detector(temp_video_path)
19
- sorter = SlideSorter(sources.ListSource(detector.detect_slides()), outpath="sorted_slides/")
20
- slides = sorter.sort()
21
- return slides, audio_bytes
22
- except Exception as e:
23
- logging.exception("Failed to execute sorter: %s", e)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
handler.py CHANGED
@@ -1,12 +1,30 @@
 
1
  import base64
2
  import io
3
  import logging
 
4
 
5
  from faster_whisper import WhisperModel
 
6
 
7
- from flie_processor import process_video
8
 
9
- logging.basicConfig(level=logging.DEBUG, format='%(asctime)s - %(levelname)s - %(message)s')
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
10
 
11
 
12
  class EndpointHandler:
@@ -15,16 +33,15 @@ class EndpointHandler:
15
 
16
  def __call__(self, data: dict[str, str]):
17
  inputs = data.pop("inputs")
18
- link = data.pop("link")
19
 
20
  language = data.pop("language", "de")
21
  task = data.pop("task", "transcribe")
22
- processing_type = data.pop("type", "audio")
23
-
24
  response = {}
 
25
 
26
- if processing_type == "link":
27
- slides, audio_bytes = process_video(link)
 
28
  slides_list = [slide.to_dict() for slide in slides]
29
  response.update({"slides": slides_list})
30
  else:
@@ -32,7 +49,6 @@ class EndpointHandler:
32
  logging.debug(f"Decoded Bytes Length: {len(audio_bytes_decoded)}")
33
  audio_bytes = io.BytesIO(audio_bytes_decoded)
34
 
35
- # run inference pipeline
36
  logging.info("Running inference...")
37
  segments, info = self.model.transcribe(audio_bytes, language=language, task=task)
38
 
@@ -52,4 +68,38 @@ class EndpointHandler:
52
 
53
  response.update({"audios": full_text})
54
  logging.debug(response)
 
 
55
  return response
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import argparse
2
  import base64
3
  import io
4
  import logging
5
+ import os
6
 
7
  from faster_whisper import WhisperModel
8
+ from pydub import AudioSegment
9
 
10
+ from file_processor import process_video
11
 
12
+ logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
13
+
14
+
15
+ def is_cdn_link(link_or_bytes):
16
+ logging.info("Checking if the provided link is a CDN link...")
17
+ if isinstance(link_or_bytes, bytes):
18
+ return False
19
+ return True
20
+
21
+
22
+ def get_audio_bytes(audio_path):
23
+ audio = AudioSegment.from_file(audio_path)
24
+ buffer = io.BytesIO()
25
+ audio.export(buffer, format='mp3')
26
+ buffer.seek(0)
27
+ return buffer
28
 
29
 
30
  class EndpointHandler:
 
33
 
34
  def __call__(self, data: dict[str, str]):
35
  inputs = data.pop("inputs")
 
36
 
37
  language = data.pop("language", "de")
38
  task = data.pop("task", "transcribe")
 
 
39
  response = {}
40
+ audio_path = None
41
 
42
+ if is_cdn_link(inputs):
43
+ slides, audio_path = process_video(inputs)
44
+ audio_bytes = get_audio_bytes(audio_path)
45
  slides_list = [slide.to_dict() for slide in slides]
46
  response.update({"slides": slides_list})
47
  else:
 
49
  logging.debug(f"Decoded Bytes Length: {len(audio_bytes_decoded)}")
50
  audio_bytes = io.BytesIO(audio_bytes_decoded)
51
 
 
52
  logging.info("Running inference...")
53
  segments, info = self.model.transcribe(audio_bytes, language=language, task=task)
54
 
 
68
 
69
  response.update({"audios": full_text})
70
  logging.debug(response)
71
+ if audio_path:
72
+ os.remove(audio_path)
73
  return response
74
+
75
+
76
+ if __name__ == '__main__':
77
+ Parser = argparse.ArgumentParser(description="EndpointHandler")
78
+ Parser.add_argument("-p", "--path")
79
+ Parser.add_argument("-l", "--language", default="de")
80
+ Parser.add_argument("-t", "--task", default="transcribe")
81
+ Parser.add_argument("--type", default="video")
82
+ Args = Parser.parse_args()
83
+
84
+ handler = EndpointHandler()
85
+
86
+ # Args.path = r"C:\Users\mbabu\AppData\Local\Temp\tmpsezkw2i5.mp3"
87
+ # Args.path = "https://rr2---sn-4g5lzned.googlevideo.com/videoplayback?expire=1703474038&ei=Fp-IZeagJPaii9oPn4i3wAo&ip=195.146.4.71&id=o-ABVj7-vfJlewWZrzUMc466iPpKFkv2lNf6sHtRJ3F7s7&itag=22&source=youtube&requiressl=yes&xpc=EgVo2aDSNQ%3D%3D&mh=O5&mm=31%2C29&mn=sn-4g5lzned%2Csn-4g5ednsr&ms=au%2Crdu&mv=m&mvi=2&pl=25&initcwndbps=503750&spc=UWF9f8xjKc9m59EXQeaCQlNJLtGZUcI&vprv=1&svpuc=1&mime=video%2Fmp4&cnr=14&ratebypass=yes&dur=108.135&lmt=1701260382315374&mt=1703452143&fvip=1&fexp=24007246&c=ANDROID&txp=5308224&sparams=expire%2Cei%2Cip%2Cid%2Citag%2Csource%2Crequiressl%2Cxpc%2Cspc%2Cvprv%2Csvpuc%2Cmime%2Ccnr%2Cratebypass%2Cdur%2Clmt&sig=AJfQdSswRQIgP2osevilYOsP2YAHKqjlgQZVbT_UEzktBYVyOLUg4QgCIQDJdcRA_SOMBhwrMMVAkGkVyoho7rm99Y-io9xs1cVEsg%3D%3D&lsparams=mh%2Cmm%2Cmn%2Cms%2Cmv%2Cmvi%2Cpl%2Cinitcwndbps&lsig=AAO5W4owRgIhAJZ4g0mJIJzQ-5nvrNk5hdZQDzCfvifDuWfDXUu1tS0QAiEAuo-TgZtkwGUxLejKR7J_f2jU-aCV2pS8kx4Yl8zOQBc%3D&title=An%20alle%20Lehrkr%C3%A4fte%3A%20Finanzielle%20Bildung%20mit%20Daniel%20Jung%20%F0%9F%8E%93"
88
+
89
+ if is_cdn_link(Args.path):
90
+ test_inputs = Args.path
91
+ else:
92
+ audio = AudioSegment.from_mp3(Args.path)
93
+ buffer = io.BytesIO()
94
+ audio.export(buffer, format="mp3")
95
+ mp3_bytes = buffer.getvalue()
96
+ test_inputs = base64.b64encode(mp3_bytes)
97
+
98
+ sample_data = {
99
+ "inputs": test_inputs,
100
+ "language": Args.language,
101
+ "task": Args.task,
102
+ }
103
+
104
+ test = handler(sample_data)
105
+ print(test)
mediaoutput.py CHANGED
@@ -1,9 +1,10 @@
1
- from abc import ABCMeta, abstractmethod
2
  import datetime
3
- import cv2
4
  import math
5
  import os
6
- import errno
 
 
7
 
8
 
9
  class MediaWriter(object):
@@ -70,6 +71,7 @@ class CustomImageWriter(ImageWriter):
70
  Image Writer that uses a custom name. It takes it as the first
71
  argument in *args in the write method.
72
  """
 
73
  def __init__(self, prefix=None, file_format='.jpg'):
74
  """
75
  Default initializer
@@ -144,6 +146,7 @@ class TimetableWriter(MediaWriter):
144
  the IncrementalImageWriter. Additionally it outputs a ".txt"
145
  document containing the slide name and their appearances.
146
  """
 
147
  def __init__(self, output_dir, timetable_loc, file_format):
148
  """
149
  Default initializer
@@ -167,7 +170,6 @@ class TimetableWriter(MediaWriter):
167
  self.txt_writer.write("Slide %d: %s\n" % (i, appearances))
168
  i += 1
169
 
170
-
171
  def close(self):
172
  self.timetable.close()
173
 
@@ -179,6 +181,7 @@ class TextWriter(MediaWriter):
179
  def write(self, content, *args):
180
  self.output_file.write(content)
181
 
 
182
  def setup_dirs(path):
183
  """
184
  Takes a path and makes sure that directories to the path
 
 
1
  import datetime
2
+ import errno
3
  import math
4
  import os
5
+ from abc import ABCMeta, abstractmethod
6
+
7
+ import cv2
8
 
9
 
10
  class MediaWriter(object):
 
71
  Image Writer that uses a custom name. It takes it as the first
72
  argument in *args in the write method.
73
  """
74
+
75
  def __init__(self, prefix=None, file_format='.jpg'):
76
  """
77
  Default initializer
 
146
  the IncrementalImageWriter. Additionally it outputs a ".txt"
147
  document containing the slide name and their appearances.
148
  """
149
+
150
  def __init__(self, output_dir, timetable_loc, file_format):
151
  """
152
  Default initializer
 
170
  self.txt_writer.write("Slide %d: %s\n" % (i, appearances))
171
  i += 1
172
 
 
173
  def close(self):
174
  self.timetable.close()
175
 
 
181
  def write(self, content, *args):
182
  self.output_file.write(content)
183
 
184
+
185
  def setup_dirs(path):
186
  """
187
  Takes a path and makes sure that directories to the path
requirements.txt CHANGED
Binary files a/requirements.txt and b/requirements.txt differ
 
sorter.py CHANGED
@@ -16,8 +16,7 @@ class SlideSorter(Analyzer):
16
  Sorts the slides according to their timestamp.
17
  """
18
 
19
- def __init__(self, source, outpath=None, timetable_loc=None, file_format=".png",
20
- comparator=ic.AbsDiffHistComparator(0.98)):
21
  """
22
  Default initializer
23
  :param path: the path where the slides are located on disk
@@ -25,12 +24,6 @@ class SlideSorter(Analyzer):
25
  are duplicates.
26
  """
27
  self.comparator = comparator
28
- self.writer = mediaoutput.NullWriter()
29
- if outpath is not None:
30
- if timetable_loc is None:
31
- timetable_loc = os.path.join(outpath, 'timetable.txt')
32
- self.file_format = file_format
33
- self.writer = mediaoutput.TimetableWriter(outpath, timetable_loc, self.file_format)
34
  self.source = source
35
 
36
  def sort(self):
@@ -81,8 +74,6 @@ class SlideSorter(Analyzer):
81
  sorted_slides.append(slide)
82
  page_counter += 1
83
  loop_counter += 1
84
- self.writer.write(sorted_slides)
85
- self.writer.close()
86
 
87
  def analyze(self):
88
  for _, slide in self.group_slides():
 
16
  Sorts the slides according to their timestamp.
17
  """
18
 
19
+ def __init__(self, source, comparator=ic.AbsDiffHistComparator(0.98)):
 
20
  """
21
  Default initializer
22
  :param path: the path where the slides are located on disk
 
24
  are duplicates.
25
  """
26
  self.comparator = comparator
 
 
 
 
 
 
27
  self.source = source
28
 
29
  def sort(self):
 
74
  sorted_slides.append(slide)
75
  page_counter += 1
76
  loop_counter += 1
 
 
77
 
78
  def analyze(self):
79
  for _, slide in self.group_slides():
video_getter.py ADDED
@@ -0,0 +1,119 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import logging
2
+ import multiprocessing
3
+ import threading
4
+ import time
5
+
6
+ import cv2
7
+
8
+ import imgcomparison
9
+ from detector import InfiniteCounter
10
+
11
+ logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
12
+
13
+
14
+ class VideoGet:
15
+ def __init__(self, src, segments=multiprocessing.cpu_count()):
16
+ self.device = src
17
+ self.results = []
18
+ self.results_lock = threading.Lock()
19
+ self.total_frames = int(cv2.VideoCapture(src).get(cv2.CAP_PROP_FRAME_COUNT))
20
+ self.segment_length = self.total_frames // segments
21
+ self.segments = self._split_into_segments()
22
+ self.threads = []
23
+ self.comparator = imgcomparison.AbsDiffHistComparator(0.99)
24
+ logging.info(f"VideoGet initialized with {segments} segments")
25
+
26
+ def _split_into_segments(self):
27
+ segments = []
28
+ for start_frame in range(0, self.total_frames, self.segment_length):
29
+ end_frame = min(start_frame + self.segment_length, self.total_frames)
30
+ segments.append((start_frame, end_frame))
31
+ logging.info(f"Video split into {len(segments)} segments")
32
+ return segments
33
+
34
+ def start(self):
35
+ logging.info("Starting video processing")
36
+ for segment in self.segments:
37
+ thread = threading.Thread(target=self.process_segment, args=(segment,))
38
+ thread.start()
39
+ self.threads.append(thread)
40
+
41
+ def check_transition(self, local_stream):
42
+ _, prev_frame = local_stream.read()
43
+ if prev_frame is None:
44
+ logging.warning(f"{threading.current_thread().name} | Initial frame is None")
45
+ return
46
+ yield 0, prev_frame
47
+
48
+ frame_counter = InfiniteCounter()
49
+ for frame_count in frame_counter.count():
50
+
51
+ _, frame = local_stream.read()
52
+
53
+ if frame is None:
54
+ logging.info(f"{threading.current_thread().name} | End of segment reached")
55
+ break
56
+ elif not self.comparator.are_same(prev_frame, frame):
57
+ logging.info(f"{threading.current_thread().name} | Transition detected at frame {frame_count}")
58
+
59
+ while True:
60
+ if self.comparator.are_same(prev_frame, frame):
61
+ break
62
+ prev_frame = frame
63
+ _, frame = local_stream.read()
64
+ frame_counter.increment()
65
+ yield frame_count, frame
66
+
67
+ prev_frame = frame
68
+
69
+ yield frame_count, None
70
+
71
+ def process_segment(self, segment):
72
+ start_frame, end_frame = segment
73
+ logging.info(f"{threading.current_thread().name} | Processing segment: Start frame {start_frame}, End frame {end_frame}")
74
+ local_stream = cv2.VideoCapture(self.device)
75
+
76
+ local_stream.set(cv2.CAP_PROP_POS_FRAMES, start_frame)
77
+
78
+ qualifying_frames = []
79
+ last_transition_frame = start_frame
80
+
81
+ for transition_frame, frame in self.check_transition(local_stream):
82
+ if transition_frame is not None and last_transition_frame < end_frame:
83
+ while last_transition_frame <= transition_frame and last_transition_frame < end_frame:
84
+ grabbed, current_frame = local_stream.read()
85
+ if not grabbed:
86
+ break
87
+ qualifying_frames.append(current_frame)
88
+ last_transition_frame += 1
89
+ if transition_frame is None or transition_frame >= end_frame:
90
+ break
91
+
92
+ while last_transition_frame < end_frame:
93
+ grabbed, frame = local_stream.read()
94
+ if not grabbed:
95
+ break
96
+ qualifying_frames.append(frame)
97
+ last_transition_frame += 1
98
+
99
+ local_stream.release()
100
+ logging.info(f"{threading.current_thread().name} | Segment processed. Start frame: {start_frame}, End frame: {end_frame}")
101
+
102
+ with self.results_lock:
103
+ self.results.append(qualifying_frames)
104
+
105
+ def stop(self):
106
+ for thread in self.threads:
107
+ thread.join()
108
+ logging.info("Thread joined")
109
+ logging.info("Stopping video processing")
110
+
111
+
112
+ if __name__ == '__main__':
113
+ start_time = time.time()
114
+ video_get = VideoGet()
115
+ video_get.start()
116
+ video_get.stop()
117
+ end_time = time.time()
118
+ total_time = end_time - start_time
119
+ logging.info(f"Total video processing time: {total_time} seconds")