Spaces:

TH9817
/

aa

Runtime error

App Files Files Community

TH9817 commited on Oct 31, 2024

Commit

d50ab60

verified ·

1 Parent(s): 893fc64

Update app.py

Browse files

Files changed (1) hide show

app.py +48 -48

app.py CHANGED Viewed

@@ -41,55 +41,55 @@ def read_video_pyav(container, indices):
             frames.append(frame)
     return np.stack([x.to_ndarray(format="rgb24") for x in frames])
 # Download video from the hub
 #video_path_1 = hf_hub_download(repo_id="raushan-testing-hf/videos-test", filename="sample_demo_1.mp4", repo_type="dataset")
-video_path="/Users/aa469627/Desktop/videollama/scene/sample1-Scene-049.mp4"
-#video_path_2 = hf_hub_download(repo_id="raushan-testing-hf/videos-test", filename="karate.mp4", repo_type="dataset")
-container = av.open(video_path_1)
-# sample uniformly 8 frames from the video (we can sample more for longer videos)
-total_frames = container.streams.video[0].frames
-indices = np.arange(0, total_frames, total_frames / 8).astype(int)
-clip_baby = read_video_pyav(container, indices)
-#container = av.open(video_path_2)
-# sample uniformly 8 frames from the video (we can sample more for longer videos)
-#total_frames = container.streams.video[0].frames
-#indices = np.arange(0, total_frames, total_frames / 8).astype(int)
-#clip_karate = read_video_pyav(container, indices)
-# Each "content" is a list of dicts and you can add image/video/text modalities
-conversation = [
-      {
-          "role": "user",
-          "content": [
-              {"type": "text", "text": "What happens in the video?"},
-              {"type": "video"},
-              ],
-      },
-]
-conversation_2 = [
-      {
-          "role": "user",
-          "content": [
-              {"type": "text", "text": "What do you see in this video?"},
-              {"type": "video"},
-              ],
-      },
-]
-prompt = processor.apply_chat_template(conversation, add_generation_prompt=True)
-#prompt_2 = processor.apply_chat_template(conversation_2, add_generation_prompt=True)
-inputs = processor(prompt, videos=clip_baby, padding=True, return_tensors="pt").to(model.device)
-def chat(i):
-    generate_kwargs = {"max_new_tokens": i, "do_sample": True, "top_p": 0.9}
     output = model.generate(**inputs, **generate_kwargs)
     generated_text = processor.batch_decode(output, skip_special_tokens=True)
@@ -98,7 +98,7 @@ def chat(i):
 demo = gr.Interface(
     fn=chat,
-    inputs=[gr.Slider(100,300)],
     outputs=["text"],
 )

             frames.append(frame)
     return np.stack([x.to_ndarray(format="rgb24") for x in frames])
+def chat(path,token):
 # Download video from the hub
 #video_path_1 = hf_hub_download(repo_id="raushan-testing-hf/videos-test", filename="sample_demo_1.mp4", repo_type="dataset")
+    video_path='"'+path+'"'
+    #video_path_2 = hf_hub_download(repo_id="raushan-testing-hf/videos-test", filename="karate.mp4", repo_type="dataset")
+    container = av.open(video_path)
+    # sample uniformly 8 frames from the video (we can sample more for longer videos)
+    total_frames = container.streams.video[0].frames
+    indices = np.arange(0, total_frames, total_frames / 8).astype(int)
+    clip_baby = read_video_pyav(container, indices)
+    #container = av.open(video_path_2)
+    # sample uniformly 8 frames from the video (we can sample more for longer videos)
+    #total_frames = container.streams.video[0].frames
+    #indices = np.arange(0, total_frames, total_frames / 8).astype(int)
+    #clip_karate = read_video_pyav(container, indices)
+    # Each "content" is a list of dicts and you can add image/video/text modalities
+    conversation = [
+          {
+              "role": "user",
+              "content": [
+                  {"type": "text", "text": "What happens in the video?"},
+                  {"type": "video"},
+                  ],
+          },
+    ]
+    conversation_2 = [
+          {
+              "role": "user",
+              "content": [
+                  {"type": "text", "text": "What do you see in this video?"},
+                  {"type": "video"},
+                  ],
+          },
+    ]
+    prompt = processor.apply_chat_template(conversation, add_generation_prompt=True)
+    #prompt_2 = processor.apply_chat_template(conversation_2, add_generation_prompt=True)
+    inputs = processor(prompt, videos=clip_baby, padding=True, return_tensors="pt").to(model.device)
+    generate_kwargs = {"max_new_tokens": token, "do_sample": True, "top_p": 0.9}
     output = model.generate(**inputs, **generate_kwargs)
     generated_text = processor.batch_decode(output, skip_special_tokens=True)
 demo = gr.Interface(
     fn=chat,
+    inputs=["text",gr.Slider(100,300)],
     outputs=["text"],
 )