Spaces:

prithivMLmods
/

Doc-VLMs-OCR

Running on Zero

App Files Files Community

prithivMLmods commited on Mar 15

Commit

6ed0791

verified ·

1 Parent(s): 557810f

Update app.py

Browse files

Files changed (1) hide show

app.py +18 -25

app.py CHANGED Viewed

@@ -22,18 +22,6 @@ model = Gemma3ForConditionalGeneration.from_pretrained(
 MAX_NUM_IMAGES = int(os.getenv("MAX_NUM_IMAGES", "5"))
-css = '''h1 {
-  text-align: center;
-  display: block;
-}
-#logo {
-  display: block;
-  margin: 0 auto;
-  width: 40%;
-  object-fit: contain;
-}
-'''
 def count_files_in_new_message(paths: list[str]) -> tuple[int, int]:
     image_count = 0
@@ -80,22 +68,27 @@ def validate_media_constraints(message: dict, history: list[dict]) -> bool:
         return False
     return True
-def downsample_video(video_path):
     vidcap = cv2.VideoCapture(video_path)
-    total_frames = int(vidcap.get(cv2.CAP_PROP_FRAME_COUNT))
     fps = vidcap.get(cv2.CAP_PROP_FPS)
     frames = []
-    # Sample 10 evenly spaced frames.
-    frame_indices = np.linspace(0, total_frames - 1, 10, dtype=int)
-    for i in frame_indices:
         vidcap.set(cv2.CAP_PROP_POS_FRAMES, i)
         success, image = vidcap.read()
         if success:
-            # Convert from BGR to RGB and then to PIL Image.
             image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
             pil_image = Image.fromarray(image)
             timestamp = round(i / fps, 2)
             frames.append((pil_image, timestamp))
     vidcap.release()
     return frames
@@ -163,7 +156,7 @@ def process_history(history: list[dict]) -> list[dict]:
                 current_user_content.append({"type": "image", "url": content[0]})
     return messages
-@spaces.GPU(duration=60)
 def run(message: dict, history: list[dict], system_prompt: str = "", max_new_tokens: int = 512) -> Iterator[str]:
     if not validate_media_constraints(message, history):
         yield ""
@@ -249,11 +242,11 @@ examples = [
         {
             "text": "Create a short story based on the sequence of images.",
             "files": [
-                "examples/09-1.png",
-                "examples/09-2.png",
-                "examples/09-3.png",
-                "examples/09-4.png",
-                "examples/09-5.png",
             ],
         }
     ],
@@ -342,7 +335,7 @@ demo = gr.ChatInterface(
     examples=examples,
     run_examples_on_click=False,
     cache_examples=False,
-    css=css,
     delete_cache=(1800, 1800),
 )

 MAX_NUM_IMAGES = int(os.getenv("MAX_NUM_IMAGES", "5"))
 def count_files_in_new_message(paths: list[str]) -> tuple[int, int]:
     image_count = 0
         return False
     return True
+def downsample_video(video_path: str) -> list[tuple[Image.Image, float]]:
     vidcap = cv2.VideoCapture(video_path)
     fps = vidcap.get(cv2.CAP_PROP_FPS)
+    total_frames = int(vidcap.get(cv2.CAP_PROP_FRAME_COUNT))
+    max_frames = 5 # Limit to 5 frames to prevent memory overload
+    if total_frames <= max_frames:
+        indices = list(range(total_frames))
+    else:
+        indices = [int(i * (total_frames - 1) / (max_frames - 1)) for i in range(max_frames)]
     frames = []
+    for i in indices:
         vidcap.set(cv2.CAP_PROP_POS_FRAMES, i)
         success, image = vidcap.read()
         if success:
             image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
             pil_image = Image.fromarray(image)
             timestamp = round(i / fps, 2)
             frames.append((pil_image, timestamp))
     vidcap.release()
     return frames
                 current_user_content.append({"type": "image", "url": content[0]})
     return messages
+@spaces.GPU(duration=120)
 def run(message: dict, history: list[dict], system_prompt: str = "", max_new_tokens: int = 512) -> Iterator[str]:
     if not validate_media_constraints(message, history):
         yield ""
         {
             "text": "Create a short story based on the sequence of images.",
             "files": [
+                "assets/sample-images/09-1.png",
+                "assets/sample-images/09-2.png",
+                "assets/sample-images/09-3.png",
+                "assets/sample-images/09-4.png",
+                "assets/sample-images/09-5.png",
             ],
         }
     ],
     examples=examples,
     run_examples_on_click=False,
     cache_examples=False,
+    css_paths="style.css",
     delete_cache=(1800, 1800),
 )