Spaces:

prithivMLmods
/

Imgscope-OCR-Mini

Running on Zero

prithivMLmods commited on Mar 15

Commit

4707b9a

verified ·

1 Parent(s): 7340c37

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -80,27 +80,22 @@ def validate_media_constraints(message: dict, history: list[dict]) -> bool:
         return False
     return True
-def downsample_video(video_path: str) -> list[tuple[Image.Image, float]]:
     vidcap = cv2.VideoCapture(video_path)
-    fps = vidcap.get(cv2.CAP_PROP_FPS)
     total_frames = int(vidcap.get(cv2.CAP_PROP_FRAME_COUNT))
-    max_frames = 5
-    if total_frames <= max_frames:
-        indices = list(range(total_frames))
-    else:
-        indices = [int(i * (total_frames - 1) / (max_frames - 1)) for i in range(max_frames)]
     frames = []
-    for i in indices:
         vidcap.set(cv2.CAP_PROP_POS_FRAMES, i)
         success, image = vidcap.read()
         if success:
             image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
             pil_image = Image.fromarray(image)
             timestamp = round(i / fps, 2)
             frames.append((pil_image, timestamp))
     vidcap.release()
     return frames
@@ -168,7 +163,7 @@ def process_history(history: list[dict]) -> list[dict]:
                 current_user_content.append({"type": "image", "url": content[0]})
     return messages
-@spaces.GPU(duration=40)
 def run(message: dict, history: list[dict], system_prompt: str = "", max_new_tokens: int = 512) -> Iterator[str]:
     if not validate_media_constraints(message, history):
         yield ""

         return False
     return True
+def downsample_video(video_path):
     vidcap = cv2.VideoCapture(video_path)
     total_frames = int(vidcap.get(cv2.CAP_PROP_FRAME_COUNT))
+    fps = vidcap.get(cv2.CAP_PROP_FPS)
     frames = []
+    # Sample 10 evenly spaced frames.
+    frame_indices = np.linspace(0, total_frames - 1, 10, dtype=int)
+    for i in frame_indices:
         vidcap.set(cv2.CAP_PROP_POS_FRAMES, i)
         success, image = vidcap.read()
         if success:
+            # Convert from BGR to RGB and then to PIL Image.
             image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
             pil_image = Image.fromarray(image)
             timestamp = round(i / fps, 2)
             frames.append((pil_image, timestamp))
     vidcap.release()
     return frames
                 current_user_content.append({"type": "image", "url": content[0]})
     return messages
+@spaces.GPU(duration=60)
 def run(message: dict, history: list[dict], system_prompt: str = "", max_new_tokens: int = 512) -> Iterator[str]:
     if not validate_media_constraints(message, history):
         yield ""