Spaces:
Runtime error
Runtime error
DongfuJiang
commited on
Commit
·
5854fc5
1
Parent(s):
74aab33
update
Browse filesThis view is limited to 50 files because it contains too many changes.
See raw diff
- .gitignore +3 -1
- app.py +127 -160
- examples/1006309.mp4 +0 -0
- examples/2006733/2006733_00.jpg +0 -0
- examples/2006733/2006733_01.jpg +0 -0
- examples/2006733/2006733_02.jpg +0 -0
- examples/2006733/2006733_03.jpg +0 -0
- examples/2006733/2006733_04.jpg +0 -0
- examples/2006733/2006733_05.jpg +0 -0
- examples/2006733/2006733_06.jpg +0 -0
- examples/2006733/2006733_07.jpg +0 -0
- examples/2006733/2006733_08.jpg +0 -0
- examples/2006733/2006733_09.jpg +0 -0
- examples/2006733/2006733_10.jpg +0 -0
- examples/2006733/2006733_11.jpg +0 -0
- examples/2006733/2006733_12.jpg +0 -0
- examples/2006733/2006733_13.jpg +0 -0
- examples/2006733/2006733_14.jpg +0 -0
- examples/2006733/2006733_15.jpg +0 -0
- examples/3005033.mp4 +0 -0
- examples/7004180.mp4 +0 -0
- examples/a400480.mp4 +0 -0
- examples/a500010.mp4 +0 -0
- examples/a500251.mp4 +0 -0
- examples/all_subsets.json +475 -1
- examples/b304986.mp4 +0 -0
- examples/b402727.mp4 +0 -0
- examples/b404675.mp4 +0 -0
- examples/d401950.mp4 +0 -0
- examples/d500506.mp4 +0 -0
- examples/d500937.mp4 +0 -0
- examples/data_subset.json +0 -368
- examples/image1.jpg +0 -0
- examples/image10.jpg +0 -0
- examples/image11.jpg +0 -0
- examples/image12.jpg +0 -0
- examples/image13.jpg +0 -0
- examples/image14.jpg +0 -0
- examples/image15.jpg +0 -0
- examples/image2.jpg +0 -0
- examples/image3.jpg +0 -0
- examples/image4.jpg +0 -0
- examples/image5.jpg +0 -0
- examples/image6.jpg +0 -0
- examples/image7.jpg +0 -0
- examples/image8.jpg +0 -0
- examples/image9.jpg +0 -0
- examples/r000228/r000228_00.jpg +0 -0
- examples/r000228/r000228_01.jpg +0 -0
- examples/r000228/r000228_02.jpg +0 -0
.gitignore
CHANGED
@@ -156,4 +156,6 @@ cython_debug/
|
|
156 |
# JetBrains specific template is maintained in a separate JetBrains.gitignore that can
|
157 |
# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
|
158 |
# and can be added to the global gitignore or merged into this file. For a more nuclear
|
159 |
-
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
|
|
|
|
|
|
156 |
# JetBrains specific template is maintained in a separate JetBrains.gitignore that can
|
157 |
# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
|
158 |
# and can be added to the global gitignore or merged into this file. For a more nuclear
|
159 |
+
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
|
160 |
+
|
161 |
+
/.vscode
|
app.py
CHANGED
@@ -3,13 +3,16 @@ import spaces
|
|
3 |
import os
|
4 |
import time
|
5 |
import json
|
|
|
|
|
6 |
from PIL import Image
|
7 |
import functools
|
8 |
from transformers import AutoProcessor, Idefics2ForConditionalGeneration
|
9 |
from models.conversation import conv_templates
|
10 |
from typing import List
|
11 |
-
processor = AutoProcessor.from_pretrained("
|
12 |
-
model = Idefics2ForConditionalGeneration.from_pretrained("
|
|
|
13 |
conv_template = conv_templates["idefics_2"]
|
14 |
|
15 |
with open("./examples/all_subsets.json", 'r') as f:
|
@@ -18,8 +21,33 @@ with open("./examples/all_subsets.json", 'r') as f:
|
|
18 |
for item in examples:
|
19 |
video_id = item['images'][0].split("_")[0]
|
20 |
item['images'] = [os.path.join("./examples", video_id, x) for x in item['images']]
|
21 |
-
|
22 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
23 |
@spaces.GPU
|
24 |
def generate(text:str, images:List[Image.Image], history: List[dict], **kwargs):
|
25 |
global processor, model
|
@@ -32,6 +60,14 @@ def generate(text:str, images:List[Image.Image], history: List[dict], **kwargs):
|
|
32 |
|
33 |
idefics_2_message = []
|
34 |
cur_img_idx = 0
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
35 |
|
36 |
print(history)
|
37 |
for i, message in enumerate(history):
|
@@ -41,6 +77,11 @@ def generate(text:str, images:List[Image.Image], history: List[dict], **kwargs):
|
|
41 |
"content": []
|
42 |
})
|
43 |
message_text = message["text"]
|
|
|
|
|
|
|
|
|
|
|
44 |
num_image_tokens_in_text = message_text.count("<image>")
|
45 |
if num_image_tokens_in_text > 0:
|
46 |
sub_texts = [x.strip() for x in message_text.split("<image>")]
|
@@ -72,185 +113,111 @@ def generate(text:str, images:List[Image.Image], history: List[dict], **kwargs):
|
|
72 |
print(idefics_2_message)
|
73 |
prompt = processor.apply_chat_template(idefics_2_message, add_generation_prompt=True)
|
74 |
|
75 |
-
images = [Image.open(x) for x in
|
76 |
inputs = processor(text=prompt, images=images, return_tensors="pt")
|
77 |
inputs = {k: v.to(model.device) for k, v in inputs.items()}
|
78 |
outputs = model.generate(**inputs, max_new_tokens=1024)
|
79 |
generated_text = processor.decode(outputs[0, inputs["input_ids"].shape[-1]:], skip_special_tokens=True)
|
80 |
return generated_text
|
81 |
|
82 |
-
def enable_next_image(uploaded_images, image):
|
83 |
-
uploaded_images.append(image)
|
84 |
-
return uploaded_images, gr.MultimodalTextbox(value=None, interactive=False)
|
85 |
-
|
86 |
-
def add_message(history, message):
|
87 |
-
if message["files"]:
|
88 |
-
for file in message["files"]:
|
89 |
-
history.append([(file,), None])
|
90 |
-
if message["text"]:
|
91 |
-
history.append([message["text"], None])
|
92 |
-
return history, gr.MultimodalTextbox(value=None)
|
93 |
-
|
94 |
-
def print_like_dislike(x: gr.LikeData):
|
95 |
-
print(x.index, x.value, x.liked)
|
96 |
-
|
97 |
-
|
98 |
-
def get_chat_history(history):
|
99 |
-
chat_history = []
|
100 |
-
user_role = conv_template.roles[0]
|
101 |
-
assistant_role = conv_template.roles[1]
|
102 |
-
for i, message in enumerate(history):
|
103 |
-
if isinstance(message[0], str):
|
104 |
-
chat_history.append({"role": user_role, "text": message[0]})
|
105 |
-
if i != len(history) - 1:
|
106 |
-
assert message[1], "The bot message is not provided, internal error"
|
107 |
-
chat_history.append({"role": assistant_role, "text": message[1]})
|
108 |
-
else:
|
109 |
-
assert not message[1], "the bot message internal error, get: {}".format(message[1])
|
110 |
-
chat_history.append({"role": assistant_role, "text": ""})
|
111 |
-
return chat_history
|
112 |
-
|
113 |
-
|
114 |
-
def get_chat_images(history):
|
115 |
-
images = []
|
116 |
-
for message in history:
|
117 |
-
if isinstance(message[0], tuple):
|
118 |
-
images.extend(message[0])
|
119 |
-
return images
|
120 |
-
|
121 |
-
|
122 |
-
def bot(history):
|
123 |
-
cur_messages = {"text": "", "images": []}
|
124 |
-
for message in history[::-1]:
|
125 |
-
if message[1]:
|
126 |
-
break
|
127 |
-
if isinstance(message[0], str):
|
128 |
-
cur_messages["text"] = message[0] + " " + cur_messages["text"]
|
129 |
-
elif isinstance(message[0], tuple):
|
130 |
-
cur_messages["images"].extend(message[0])
|
131 |
-
cur_messages["text"] = cur_messages["text"].strip()
|
132 |
-
cur_messages["images"] = cur_messages["images"][::-1]
|
133 |
-
if not cur_messages["text"]:
|
134 |
-
raise gr.Error("Please enter a message")
|
135 |
-
if cur_messages['text'].count("<image>") < len(cur_messages['images']):
|
136 |
-
gr.Warning("The number of images uploaded is more than the number of <image> placeholders in the text. Will automatically prepend <image> to the text.")
|
137 |
-
cur_messages['text'] += "<image> "* (len(cur_messages['images']) - cur_messages['text'].count("<image>"))
|
138 |
-
history[-1][0] = cur_messages["text"]
|
139 |
-
if cur_messages['text'].count("<image>") > len(cur_messages['images']):
|
140 |
-
gr.Warning("The number of images uploaded is less than the number of <image> placeholders in the text. Will automatically remove extra <image> placeholders from the text.")
|
141 |
-
cur_messages['text'] = cur_messages['text'][::-1].replace("<image>"[::-1], "", cur_messages['text'].count("<image>") - len(cur_messages['images']))[::-1]
|
142 |
-
history[-1][0] = cur_messages["text"]
|
143 |
-
|
144 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
145 |
|
146 |
-
|
147 |
-
|
148 |
-
|
149 |
-
|
150 |
-
|
151 |
-
|
152 |
-
|
153 |
-
|
154 |
-
|
155 |
-
|
156 |
-
|
157 |
-
|
158 |
-
|
159 |
-
|
160 |
-
|
161 |
-
|
162 |
|
163 |
-
def get_images(video_folder:str):
|
164 |
-
"""
|
165 |
-
video folder contains images files like {video_folder_name}_00.jpg, {video_folder_name}_01.jpg, ...
|
166 |
-
"""
|
167 |
-
images = []
|
168 |
-
for file in os.listdir(video_folder):
|
169 |
-
if file.endswith(".jpg"):
|
170 |
-
images.append(Image.open(os.path.join(video_folder, file)))
|
171 |
-
# sort images by name
|
172 |
-
images.sort(key=lambda x: int(x.filename.split("_")[-1].split(".")[0]))
|
173 |
-
return images
|
174 |
-
|
175 |
def build_demo():
|
176 |
with gr.Blocks() as demo:
|
177 |
-
|
178 |
-
|
179 |
-
|
180 |
-
|
181 |
-
### [Paper](https://arxiv.org/abs/2405.01483) | [Github](https://github.com/TIGER-AI-Lab/Mantis) | [Models](https://huggingface.co/collections/TIGER-Lab/mantis-6619b0834594c878cdb1d6e4) | [Dataset](https://huggingface.co/datasets/TIGER-Lab/Mantis-Instruct) | [Website](https://tiger-ai-lab.github.io/Mantis/)
|
182 |
-
""")
|
183 |
-
|
184 |
-
gr.Markdown("""## Chat with Mantis
|
185 |
-
Mantis supports interleaved text-image input format, where you can simply use the placeholder `<image>` to indicate the position of uploaded images.
|
186 |
-
The model is optimized for multi-image reasoning, while preserving the ability to chat about text and images in a single conversation.
|
187 |
""")
|
188 |
-
|
189 |
-
chatbot = gr.Chatbot(line_breaks=True)
|
190 |
-
chat_input = gr.MultimodalTextbox(interactive=True, file_types=["image"], placeholder="Enter message or upload images. Please use <image> to indicate the position of uploaded images", show_label=True)
|
191 |
-
|
192 |
-
chat_msg = chat_input.submit(add_message, [chatbot, chat_input], [chatbot, chat_input])
|
193 |
-
|
194 |
-
"""
|
195 |
-
with gr.Accordion(label='Advanced options', open=False):
|
196 |
-
temperature = gr.Slider(
|
197 |
-
label='Temperature',
|
198 |
-
minimum=0.1,
|
199 |
-
maximum=2.0,
|
200 |
-
step=0.1,
|
201 |
-
value=0.2,
|
202 |
-
interactive=True
|
203 |
-
)
|
204 |
-
top_p = gr.Slider(
|
205 |
-
label='Top-p',
|
206 |
-
minimum=0.05,
|
207 |
-
maximum=1.0,
|
208 |
-
step=0.05,
|
209 |
-
value=1.0,
|
210 |
-
interactive=True
|
211 |
-
)
|
212 |
-
"""
|
213 |
-
|
214 |
-
bot_msg = chat_msg.success(bot, chatbot, chatbot, api_name="bot_response")
|
215 |
-
|
216 |
-
chatbot.like(print_like_dislike, None, None)
|
217 |
-
|
218 |
with gr.Row():
|
219 |
-
|
220 |
-
|
221 |
-
|
222 |
-
|
223 |
-
|
224 |
-
|
225 |
-
|
|
|
|
|
|
|
226 |
)
|
227 |
-
|
228 |
-
|
|
|
229 |
|
230 |
gr.Examples(
|
231 |
-
examples=
|
|
|
232 |
[
|
233 |
item['id'],
|
234 |
-
|
235 |
-
|
236 |
-
"files": item['images']
|
237 |
-
},
|
238 |
item['conversations'][1]['value']
|
239 |
] for item in examples
|
240 |
],
|
241 |
-
inputs=[dummy_id,
|
242 |
)
|
243 |
|
244 |
-
|
245 |
-
## Citation
|
246 |
-
```
|
247 |
-
@article{jiang2024mantis,
|
248 |
-
|
249 |
-
|
250 |
-
|
251 |
-
|
252 |
-
}
|
253 |
-
```""")
|
254 |
return demo
|
255 |
|
256 |
|
|
|
3 |
import os
|
4 |
import time
|
5 |
import json
|
6 |
+
import numpy as np
|
7 |
+
import av
|
8 |
from PIL import Image
|
9 |
import functools
|
10 |
from transformers import AutoProcessor, Idefics2ForConditionalGeneration
|
11 |
from models.conversation import conv_templates
|
12 |
from typing import List
|
13 |
+
processor = AutoProcessor.from_pretrained("Mantis-VL/mantis-8b-idefics2-video-eval-95k-mantis-2epoch_4096")
|
14 |
+
model = Idefics2ForConditionalGeneration.from_pretrained("Mantis-VL/mantis-8b-idefics2-video-eval-95k-mantis-2epoch_4096")
|
15 |
+
MAX_NUM_FRAMES = 24
|
16 |
conv_template = conv_templates["idefics_2"]
|
17 |
|
18 |
with open("./examples/all_subsets.json", 'r') as f:
|
|
|
21 |
for item in examples:
|
22 |
video_id = item['images'][0].split("_")[0]
|
23 |
item['images'] = [os.path.join("./examples", video_id, x) for x in item['images']]
|
24 |
+
item['video'] = os.path.join("./examples", item['video'])
|
25 |
+
|
26 |
+
VIDEO_EVAL_PROMPT = """
|
27 |
+
Suppose you are an expert in judging and evaluating the quality of AI-generated videos,
|
28 |
+
please watch the following frames of a given video and see the text prompt for generating the video,
|
29 |
+
then give scores from 7 different dimensions:
|
30 |
+
(1) visual quality: the quality of the video in terms of clearness, resolution, brightness, and color
|
31 |
+
(2) object consistency, the consistency of objects or humans in video
|
32 |
+
(3) dynamic degree, the degree of dynamic changes
|
33 |
+
(4) motion smoothness, the smoothness of motion or movements
|
34 |
+
(5) text-to-video alignment, the alignment between the text prompt and the video content
|
35 |
+
(6) factual consistency, the consistency of the video content with the common-sense and factual knowledge
|
36 |
+
(7) overall score, the overall quality of the video
|
37 |
+
for each dimension, output a number from [1,2,3], in which '1' is 'Bad', '2' is 'Average', '3' is 'Good'.
|
38 |
+
Here is an output example:
|
39 |
+
visual quality: 3
|
40 |
+
object consistency: 2
|
41 |
+
dynamic degree: 2
|
42 |
+
motion smoothness: 1
|
43 |
+
text-to-video alignment: 1
|
44 |
+
factual consistency: 2
|
45 |
+
overall score: 1
|
46 |
+
|
47 |
+
For this video, the text prompt is "{text_prompt}",
|
48 |
+
all the frames of video are as follows:
|
49 |
+
|
50 |
+
"""
|
51 |
@spaces.GPU
|
52 |
def generate(text:str, images:List[Image.Image], history: List[dict], **kwargs):
|
53 |
global processor, model
|
|
|
60 |
|
61 |
idefics_2_message = []
|
62 |
cur_img_idx = 0
|
63 |
+
cur_vid_idx = 0
|
64 |
+
all_videos = [x for x in images if isinstance(x, list)]
|
65 |
+
flatten_images = []
|
66 |
+
for x in images:
|
67 |
+
if isinstance(x, list):
|
68 |
+
flatten_images.extend(x)
|
69 |
+
else:
|
70 |
+
flatten_images.append(x)
|
71 |
|
72 |
print(history)
|
73 |
for i, message in enumerate(history):
|
|
|
77 |
"content": []
|
78 |
})
|
79 |
message_text = message["text"]
|
80 |
+
num_video_tokens_in_text = message_text.count("<video>")
|
81 |
+
if num_video_tokens_in_text > 0:
|
82 |
+
for _ in range(num_video_tokens_in_text):
|
83 |
+
message_text = message_text.replace("<video>", "<image> " * len(all_videos[cur_vid_idx]), 1)
|
84 |
+
cur_vid_idx += 1
|
85 |
num_image_tokens_in_text = message_text.count("<image>")
|
86 |
if num_image_tokens_in_text > 0:
|
87 |
sub_texts = [x.strip() for x in message_text.split("<image>")]
|
|
|
113 |
print(idefics_2_message)
|
114 |
prompt = processor.apply_chat_template(idefics_2_message, add_generation_prompt=True)
|
115 |
|
116 |
+
images = [Image.open(x) if isinstance(x, str) else x for x in flatten_images]
|
117 |
inputs = processor(text=prompt, images=images, return_tensors="pt")
|
118 |
inputs = {k: v.to(model.device) for k, v in inputs.items()}
|
119 |
outputs = model.generate(**inputs, max_new_tokens=1024)
|
120 |
generated_text = processor.decode(outputs[0, inputs["input_ids"].shape[-1]:], skip_special_tokens=True)
|
121 |
return generated_text
|
122 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
123 |
|
124 |
+
def read_video_pyav(container, indices):
|
125 |
+
'''
|
126 |
+
Decode the video with PyAV decoder.
|
127 |
+
|
128 |
+
Args:
|
129 |
+
container (av.container.input.InputContainer): PyAV container.
|
130 |
+
indices (List[int]): List of frame indices to decode.
|
131 |
+
|
132 |
+
Returns:
|
133 |
+
np.ndarray: np array of decoded frames of shape (num_frames, height, width, 3).
|
134 |
+
'''
|
135 |
+
frames = []
|
136 |
+
container.seek(0)
|
137 |
+
start_index = indices[0]
|
138 |
+
end_index = indices[-1]
|
139 |
+
for i, frame in enumerate(container.decode(video=0)):
|
140 |
+
if i > end_index:
|
141 |
+
break
|
142 |
+
if i >= start_index and i in indices:
|
143 |
+
frames.append(frame)
|
144 |
+
return np.stack([x.to_ndarray(format="rgb24") for x in frames])
|
145 |
+
|
146 |
+
def eval_video(prompt, video:str):
|
147 |
+
container = av.open(video)
|
148 |
+
|
149 |
+
# sample uniformly 8 frames from the video
|
150 |
+
total_frames = container.streams.video[0].frames
|
151 |
+
if total_frames > MAX_NUM_FRAMES:
|
152 |
+
indices = np.arange(0, total_frames, total_frames / MAX_NUM_FRAMES).astype(int)
|
153 |
+
else:
|
154 |
+
indices = np.arange(total_frames)
|
155 |
+
video_frames = read_video_pyav(container, indices)
|
156 |
+
|
157 |
+
frames = [Image.fromarray(x) for x in video_frames]
|
158 |
|
159 |
+
eval_prompt = VIDEO_EVAL_PROMPT.format(text_prompt=prompt)
|
160 |
+
eval_prompt += "<video>"
|
161 |
+
user_role = conv_template.roles[0]
|
162 |
+
assistant_role = conv_template.roles[1]
|
163 |
+
chat_messages = [
|
164 |
+
{
|
165 |
+
"role": user_role,
|
166 |
+
"text": eval_prompt
|
167 |
+
},
|
168 |
+
{
|
169 |
+
"role": assistant_role,
|
170 |
+
"text": ""
|
171 |
+
}
|
172 |
+
]
|
173 |
+
response = generate(None, [frames], chat_messages)
|
174 |
+
return response
|
175 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
176 |
def build_demo():
|
177 |
with gr.Blocks() as demo:
|
178 |
+
gr.Markdown("""
|
179 |
+
## Video Evaluation
|
180 |
+
upload a video along with a text prompt when generating the video, this model will evaluate the video's quality from 7 different dimensions.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
181 |
""")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
182 |
with gr.Row():
|
183 |
+
video = gr.Video(width=500, label="Video")
|
184 |
+
with gr.Column():
|
185 |
+
video_prompt = gr.Textbox(label="Text Prompt", interactive=False, lines=1)
|
186 |
+
with gr.Row():
|
187 |
+
eval_button = gr.Button("Evaluate Video")
|
188 |
+
clear_button = gr.ClearButton([video])
|
189 |
+
eval_result = gr.Textbox(label="Evaluation result", interactive=False, lines=7)
|
190 |
+
|
191 |
+
eval_button.click(
|
192 |
+
eval_video, [video_prompt, video], [eval_result]
|
193 |
)
|
194 |
+
|
195 |
+
dummy_id = gr.Textbox("id", label="id", visible=False, min_width=50)
|
196 |
+
dummy_output = gr.Textbox("reference score", label="reference scores", visible=False, lines=7)
|
197 |
|
198 |
gr.Examples(
|
199 |
+
examples=
|
200 |
+
[
|
201 |
[
|
202 |
item['id'],
|
203 |
+
item['prompt'],
|
204 |
+
item['video'],
|
|
|
|
|
205 |
item['conversations'][1]['value']
|
206 |
] for item in examples
|
207 |
],
|
208 |
+
inputs=[dummy_id, video_prompt, video, dummy_output],
|
209 |
)
|
210 |
|
211 |
+
# gr.Markdown("""
|
212 |
+
# ## Citation
|
213 |
+
# ```
|
214 |
+
# @article{jiang2024mantis,
|
215 |
+
# title={MANTIS: Interleaved Multi-Image Instruction Tuning},
|
216 |
+
# author={Jiang, Dongfu and He, Xuan and Zeng, Huaye and Wei, Con and Ku, Max and Liu, Qian and Chen, Wenhu},
|
217 |
+
# journal={arXiv preprint arXiv:2405.01483},
|
218 |
+
# year={2024}
|
219 |
+
# }
|
220 |
+
# ```""")
|
221 |
return demo
|
222 |
|
223 |
|
examples/1006309.mp4
ADDED
Binary file (221 kB). View file
|
|
examples/2006733/2006733_00.jpg
DELETED
Binary file (12.6 kB)
|
|
examples/2006733/2006733_01.jpg
DELETED
Binary file (12.5 kB)
|
|
examples/2006733/2006733_02.jpg
DELETED
Binary file (12.5 kB)
|
|
examples/2006733/2006733_03.jpg
DELETED
Binary file (12.5 kB)
|
|
examples/2006733/2006733_04.jpg
DELETED
Binary file (13.3 kB)
|
|
examples/2006733/2006733_05.jpg
DELETED
Binary file (13.4 kB)
|
|
examples/2006733/2006733_06.jpg
DELETED
Binary file (13.3 kB)
|
|
examples/2006733/2006733_07.jpg
DELETED
Binary file (13.2 kB)
|
|
examples/2006733/2006733_08.jpg
DELETED
Binary file (13.5 kB)
|
|
examples/2006733/2006733_09.jpg
DELETED
Binary file (13.5 kB)
|
|
examples/2006733/2006733_10.jpg
DELETED
Binary file (13.7 kB)
|
|
examples/2006733/2006733_11.jpg
DELETED
Binary file (13.5 kB)
|
|
examples/2006733/2006733_12.jpg
DELETED
Binary file (12.7 kB)
|
|
examples/2006733/2006733_13.jpg
DELETED
Binary file (12.7 kB)
|
|
examples/2006733/2006733_14.jpg
DELETED
Binary file (12.7 kB)
|
|
examples/2006733/2006733_15.jpg
DELETED
Binary file (12.7 kB)
|
|
examples/3005033.mp4
ADDED
Binary file (80.1 kB). View file
|
|
examples/7004180.mp4
ADDED
Binary file (26.4 kB). View file
|
|
examples/a400480.mp4
ADDED
Binary file (27.1 kB). View file
|
|
examples/a500010.mp4
ADDED
Binary file (59.7 kB). View file
|
|
examples/a500251.mp4
ADDED
Binary file (135 kB). View file
|
|
examples/all_subsets.json
CHANGED
@@ -1 +1,475 @@
|
|
1 |
-
[{"id": "real_r003679", "images": ["r003679_00.jpg", "r003679_01.jpg", "r003679_02.jpg", "r003679_03.jpg", "r003679_04.jpg", "r003679_05.jpg", "r003679_06.jpg", "r003679_07.jpg", "r003679_08.jpg", "r003679_09.jpg", "r003679_10.jpg", "r003679_11.jpg", "r003679_12.jpg", "r003679_13.jpg", "r003679_14.jpg", "r003679_15.jpg", "r003679_16.jpg", "r003679_17.jpg", "r003679_18.jpg", "r003679_19.jpg", "r003679_20.jpg", "r003679_21.jpg", "r003679_22.jpg", "r003679_23.jpg"], "conversations": [{"from": "human", "value": "\nSuppose you are an expert in judging and evaluating the quality of AI-generated videos, \nplease watch the following frames of a given video and see the text prompt for generating the video, \nthen give scores from 7 different dimensions:\n(1) visual quality, \n(2) object consistency,\n(3) dynamic degree,\n(4) motion smoothness,\n(5) text-to-video alignment,\n(6) factual consistency, \n(7) overall score\nfor each dimension, output a number from [1,2,3], in which '1' stands for 'Bad', '2' stands for 'Average', '3' stands for 'Good'.\nHere is an output example: \nvisual quality: 3\nobject consistency: 2 \ndynamic degree: 2\nmotion smoothness: 1\ntext-to-video alignment: 1\nfactual consistency: 2\noverall score: 1\n\nFor this item, the text prompt is dog rolls over and falls off couch,\nall the frames of video are as follows: \n\n<image> <image> <image> <image> <image> <image> <image> <image> <image> <image> <image> <image> <image> <image> <image> <image> <image> <image> <image> <image> <image> <image> <image> <image> \n"}, {"from": "gpt", "value": "visual quality: 3\n object consistency: 3\n dynamic degree: 3\n motion smoothness: 3\n text-to-video alignment: 3\n factual consistency: 3\n overall score: 3\n"}]}, {"id": "real_r100916", "images": ["r100916_00.jpg", "r100916_01.jpg", "r100916_02.jpg", "r100916_03.jpg", "r100916_04.jpg", "r100916_05.jpg", "r100916_06.jpg", "r100916_07.jpg", "r100916_08.jpg", "r100916_09.jpg", "r100916_10.jpg", "r100916_11.jpg", "r100916_12.jpg", "r100916_13.jpg", "r100916_14.jpg", "r100916_15.jpg"], "conversations": [{"from": "human", "value": "\nSuppose you are an expert in judging and evaluating the quality of AI-generated videos, \nplease watch the following frames of a given video and see the text prompt for generating the video, \nthen give scores from 7 different dimensions:\n(1) visual quality, \n(2) object consistency,\n(3) dynamic degree,\n(4) motion smoothness,\n(5) text-to-video alignment,\n(6) factual consistency, \n(7) overall score\nfor each dimension, output a number from [1,2,3], in which '1' stands for 'Bad', '2' stands for 'Average', '3' stands for 'Good'.\nHere is an output example: \nvisual quality: 3\nobject consistency: 2 \ndynamic degree: 2\nmotion smoothness: 1\ntext-to-video alignment: 1\nfactual consistency: 2\noverall score: 1\n\nFor this item, the text prompt is A young man is standing in front of a glacier with mountains in the background.,\nall the frames of video are as follows: \n\n<image> <image> <image> <image> <image> <image> <image> <image> <image> <image> <image> <image> <image> <image> <image> <image> \n"}, {"from": "gpt", "value": "visual quality: 3\n object consistency: 3\n dynamic degree: 3\n motion smoothness: 3\n text-to-video alignment: 3\n factual consistency: 3\n overall score: 3\n"}]}, {"id": "real_r004061", "images": ["r004061_00.jpg", "r004061_01.jpg", "r004061_02.jpg", "r004061_03.jpg", "r004061_04.jpg", "r004061_05.jpg", "r004061_06.jpg", "r004061_07.jpg", "r004061_08.jpg", "r004061_09.jpg", "r004061_10.jpg", "r004061_11.jpg", "r004061_12.jpg", "r004061_13.jpg", "r004061_14.jpg", "r004061_15.jpg", "r004061_16.jpg", "r004061_17.jpg", "r004061_18.jpg", "r004061_19.jpg", "r004061_20.jpg", "r004061_21.jpg", "r004061_22.jpg", "r004061_23.jpg"], "conversations": [{"from": "human", "value": "\nSuppose you are an expert in judging and evaluating the quality of AI-generated videos, \nplease watch the following frames of a given video and see the text prompt for generating the video, \nthen give scores from 7 different dimensions:\n(1) visual quality, \n(2) object consistency,\n(3) dynamic degree,\n(4) motion smoothness,\n(5) text-to-video alignment,\n(6) factual consistency, \n(7) overall score\nfor each dimension, output a number from [1,2,3], in which '1' stands for 'Bad', '2' stands for 'Average', '3' stands for 'Good'.\nHere is an output example: \nvisual quality: 3\nobject consistency: 2 \ndynamic degree: 2\nmotion smoothness: 1\ntext-to-video alignment: 1\nfactual consistency: 2\noverall score: 1\n\nFor this item, the text prompt is a man wearing a striped blanket over his backpack runs behind two other men.,\nall the frames of video are as follows: \n\n<image> <image> <image> <image> <image> <image> <image> <image> <image> <image> <image> <image> <image> <image> <image> <image> <image> <image> <image> <image> <image> <image> <image> <image> \n"}, {"from": "gpt", "value": "visual quality: 3\n object consistency: 3\n dynamic degree: 3\n motion smoothness: 3\n text-to-video alignment: 3\n factual consistency: 3\n overall score: 3\n"}]}, {"id": "worsen_gen_b404675", "images": ["b404675_00.jpg", "b404675_01.jpg", "b404675_02.jpg", "b404675_03.jpg", "b404675_04.jpg", "b404675_05.jpg", "b404675_06.jpg", "b404675_07.jpg", "b404675_08.jpg", "b404675_09.jpg", "b404675_10.jpg", "b404675_11.jpg", "b404675_12.jpg", "b404675_13.jpg", "b404675_14.jpg", "b404675_15.jpg"], "conversations": [{"from": "human", "value": "\nSuppose you are an expert in judging and evaluating the quality of AI-generated videos, \nplease watch the following frames of a given video and see the text prompt for generating the video, \nthen give scores from 7 different dimensions:\n(1) visual quality, \n(2) object consistency,\n(3) dynamic degree,\n(4) motion smoothness,\n(5) text-to-video alignment,\n(6) factual consistency, \n(7) overall score\nfor each dimension, output a number from [1,2,3], in which '1' stands for 'Bad', '2' stands for 'Average', '3' stands for 'Good'.\nHere is an output example: \nvisual quality: 3\nobject consistency: 2 \ndynamic degree: 2\nmotion smoothness: 1\ntext-to-video alignment: 1\nfactual consistency: 2\noverall score: 1\n\nFor this item, the text prompt is After a few years, Ghulam Muhammad became the owner of his business ,\nall the frames of video are as follows: \n\n<image> <image> <image> <image> <image> <image> <image> <image> <image> <image> <image> <image> <image> <image> <image> <image> \n"}, {"from": "gpt", "value": "visual quality: 1\n object consistency: 1\n dynamic degree: 3\n motion smoothness: 3\n text-to-video alignment: 2\n factual consistency: 3\n overall score: 1\n"}]}, {"id": "worsen_gen_b402727", "images": ["b402727_00.jpg", "b402727_01.jpg", "b402727_02.jpg", "b402727_03.jpg", "b402727_04.jpg", "b402727_05.jpg", "b402727_06.jpg", "b402727_07.jpg", "b402727_08.jpg", "b402727_09.jpg", "b402727_10.jpg", "b402727_11.jpg", "b402727_12.jpg", "b402727_13.jpg", "b402727_14.jpg", "b402727_15.jpg"], "conversations": [{"from": "human", "value": "\nSuppose you are an expert in judging and evaluating the quality of AI-generated videos, \nplease watch the following frames of a given video and see the text prompt for generating the video, \nthen give scores from 7 different dimensions:\n(1) visual quality, \n(2) object consistency,\n(3) dynamic degree,\n(4) motion smoothness,\n(5) text-to-video alignment,\n(6) factual consistency, \n(7) overall score\nfor each dimension, output a number from [1,2,3], in which '1' stands for 'Bad', '2' stands for 'Average', '3' stands for 'Good'.\nHere is an output example: \nvisual quality: 3\nobject consistency: 2 \ndynamic degree: 2\nmotion smoothness: 1\ntext-to-video alignment: 1\nfactual consistency: 2\noverall score: 1\n\nFor this item, the text prompt is view down to the path in the field, moving forward, night, dark. ,\nall the frames of video are as follows: \n\n<image> <image> <image> <image> <image> <image> <image> <image> <image> <image> <image> <image> <image> <image> <image> <image> \n"}, {"from": "gpt", "value": "visual quality: 1\n object consistency: 1\n dynamic degree: 3\n motion smoothness: 3\n text-to-video alignment: 3\n factual consistency: 3\n overall score: 1\n"}]}, {"id": "worsen_gen_b304986", "images": ["b304986_00.jpg", "b304986_01.jpg", "b304986_02.jpg", "b304986_03.jpg", "b304986_04.jpg", "b304986_05.jpg", "b304986_06.jpg", "b304986_07.jpg", "b304986_08.jpg", "b304986_09.jpg", "b304986_10.jpg", "b304986_11.jpg", "b304986_12.jpg", "b304986_13.jpg", "b304986_14.jpg", "b304986_15.jpg"], "conversations": [{"from": "human", "value": "\nSuppose you are an expert in judging and evaluating the quality of AI-generated videos, \nplease watch the following frames of a given video and see the text prompt for generating the video, \nthen give scores from 7 different dimensions:\n(1) visual quality, \n(2) object consistency,\n(3) dynamic degree,\n(4) motion smoothness,\n(5) text-to-video alignment,\n(6) factual consistency, \n(7) overall score\nfor each dimension, output a number from [1,2,3], in which '1' stands for 'Bad', '2' stands for 'Average', '3' stands for 'Good'.\nHere is an output example: \nvisual quality: 3\nobject consistency: 2 \ndynamic degree: 2\nmotion smoothness: 1\ntext-to-video alignment: 1\nfactual consistency: 2\noverall score: 1\n\nFor this item, the text prompt is Old Philippino man, close up, subtle smile, teats of joy, in old wood room ,\nall the frames of video are as follows: \n\n<image> <image> <image> <image> <image> <image> <image> <image> <image> <image> <image> <image> <image> <image> <image> <image> \n"}, {"from": "gpt", "value": "visual quality: 3\n object consistency: 1\n dynamic degree: 3\n motion smoothness: 1\n text-to-video alignment: 2\n factual consistency: 3\n overall score: 1\n"}]}, {"id": "static_d500937", "images": ["d500937_00.jpg", "d500937_01.jpg", "d500937_02.jpg", "d500937_03.jpg", "d500937_04.jpg", "d500937_05.jpg", "d500937_06.jpg", "d500937_07.jpg", "d500937_08.jpg", "d500937_09.jpg", "d500937_10.jpg", "d500937_11.jpg", "d500937_12.jpg", "d500937_13.jpg", "d500937_14.jpg", "d500937_15.jpg"], "conversations": [{"from": "human", "value": "\nSuppose you are an expert in judging and evaluating the quality of AI-generated videos, \nplease watch the following frames of a given video and see the text prompt for generating the video, \nthen give scores from 7 different dimensions:\n(1) visual quality, \n(2) object consistency,\n(3) dynamic degree,\n(4) motion smoothness,\n(5) text-to-video alignment,\n(6) factual consistency, \n(7) overall score\nfor each dimension, output a number from [1,2,3], in which '1' stands for 'Bad', '2' stands for 'Average', '3' stands for 'Good'.\nHere is an output example: \nvisual quality: 3\nobject consistency: 2 \ndynamic degree: 2\nmotion smoothness: 1\ntext-to-video alignment: 1\nfactual consistency: 2\noverall score: 1\n\nFor this item, the text prompt is write y in the garden with flowers ,\nall the frames of video are as follows: \n\n<image> <image> <image> <image> <image> <image> <image> <image> <image> <image> <image> <image> <image> <image> <image> <image> \n"}, {"from": "gpt", "value": "visual quality: 1\n object consistency: 3\n dynamic degree: 1\n motion smoothness: 1\n text-to-video alignment: 1\n factual consistency: 1\n overall score: 1\n"}]}, {"id": "static_d500506", "images": ["d500506_00.jpg", "d500506_01.jpg", "d500506_02.jpg", "d500506_03.jpg", "d500506_04.jpg", "d500506_05.jpg", "d500506_06.jpg", "d500506_07.jpg", "d500506_08.jpg", "d500506_09.jpg", "d500506_10.jpg", "d500506_11.jpg", "d500506_12.jpg", "d500506_13.jpg", "d500506_14.jpg", "d500506_15.jpg"], "conversations": [{"from": "human", "value": "\nSuppose you are an expert in judging and evaluating the quality of AI-generated videos, \nplease watch the following frames of a given video and see the text prompt for generating the video, \nthen give scores from 7 different dimensions:\n(1) visual quality, \n(2) object consistency,\n(3) dynamic degree,\n(4) motion smoothness,\n(5) text-to-video alignment,\n(6) factual consistency, \n(7) overall score\nfor each dimension, output a number from [1,2,3], in which '1' stands for 'Bad', '2' stands for 'Average', '3' stands for 'Good'.\nHere is an output example: \nvisual quality: 3\nobject consistency: 2 \ndynamic degree: 2\nmotion smoothness: 1\ntext-to-video alignment: 1\nfactual consistency: 2\noverall score: 1\n\nFor this item, the text prompt is a boy paiting a beautiful landscape ,\nall the frames of video are as follows: \n\n<image> <image> <image> <image> <image> <image> <image> <image> <image> <image> <image> <image> <image> <image> <image> <image> \n"}, {"from": "gpt", "value": "visual quality: 1\n object consistency: 3\n dynamic degree: 1\n motion smoothness: 1\n text-to-video alignment: 1\n factual consistency: 1\n overall score: 1\n"}]}, {"id": "static_d401950", "images": ["d401950_00.jpg", "d401950_01.jpg", "d401950_02.jpg", "d401950_03.jpg", "d401950_04.jpg", "d401950_05.jpg", "d401950_06.jpg", "d401950_07.jpg", "d401950_08.jpg", "d401950_09.jpg", "d401950_10.jpg", "d401950_11.jpg", "d401950_12.jpg", "d401950_13.jpg", "d401950_14.jpg", "d401950_15.jpg"], "conversations": [{"from": "human", "value": "\nSuppose you are an expert in judging and evaluating the quality of AI-generated videos, \nplease watch the following frames of a given video and see the text prompt for generating the video, \nthen give scores from 7 different dimensions:\n(1) visual quality, \n(2) object consistency,\n(3) dynamic degree,\n(4) motion smoothness,\n(5) text-to-video alignment,\n(6) factual consistency, \n(7) overall score\nfor each dimension, output a number from [1,2,3], in which '1' stands for 'Bad', '2' stands for 'Average', '3' stands for 'Good'.\nHere is an output example: \nvisual quality: 3\nobject consistency: 2 \ndynamic degree: 2\nmotion smoothness: 1\ntext-to-video alignment: 1\nfactual consistency: 2\noverall score: 1\n\nFor this item, the text prompt is sneaker with exclusive design worth 1 billion ,\nall the frames of video are as follows: \n\n<image> <image> <image> <image> <image> <image> <image> <image> <image> <image> <image> <image> <image> <image> <image> <image> \n"}, {"from": "gpt", "value": "visual quality: 1\n object consistency: 3\n dynamic degree: 1\n motion smoothness: 1\n text-to-video alignment: 1\n factual consistency: 1\n overall score: 1\n"}]}, {"id": "insf_a500251", "images": ["a500251_00.jpg", "a500251_01.jpg", "a500251_02.jpg", "a500251_03.jpg", "a500251_04.jpg", "a500251_05.jpg", "a500251_06.jpg", "a500251_07.jpg", "a500251_08.jpg", "a500251_09.jpg", "a500251_10.jpg", "a500251_11.jpg", "a500251_12.jpg", "a500251_13.jpg", "a500251_14.jpg", "a500251_15.jpg"], "conversations": [{"from": "human", "value": "\nSuppose you are an expert in judging and evaluating the quality of AI-generated videos, \nplease watch the following frames of a given video and see the text prompt for generating the video, \nthen give scores from 7 different dimensions:\n(1) visual quality, \n(2) object consistency,\n(3) dynamic degree,\n(4) motion smoothness,\n(5) text-to-video alignment,\n(6) factual consistency, \n(7) overall score\nfor each dimension, output a number from [1,2,3], in which '1' stands for 'Bad', '2' stands for 'Average', '3' stands for 'Good'.\nHere is an output example: \nvisual quality: 3\nobject consistency: 2 \ndynamic degree: 2\nmotion smoothness: 1\ntext-to-video alignment: 1\nfactual consistency: 2\noverall score: 1\n\nFor this item, the text prompt is :sci fi war in the moon ,\nall the frames of video are as follows: \n\n<image> <image> <image> <image> <image> <image> <image> <image> <image> <image> <image> <image> <image> <image> <image> <image> \n"}, {"from": "gpt", "value": "visual quality: 1\n object consistency: 1\n dynamic degree: 3\n motion smoothness: 1\n text-to-video alignment: 1\n factual consistency: 1\n overall score: 1\n"}]}, {"id": "insf_a400480", "images": ["a400480_00.jpg", "a400480_01.jpg", "a400480_02.jpg", "a400480_03.jpg", "a400480_04.jpg", "a400480_05.jpg", "a400480_06.jpg", "a400480_07.jpg", "a400480_08.jpg", "a400480_09.jpg", "a400480_10.jpg", "a400480_11.jpg", "a400480_12.jpg", "a400480_13.jpg", "a400480_14.jpg", "a400480_15.jpg"], "conversations": [{"from": "human", "value": "\nSuppose you are an expert in judging and evaluating the quality of AI-generated videos, \nplease watch the following frames of a given video and see the text prompt for generating the video, \nthen give scores from 7 different dimensions:\n(1) visual quality, \n(2) object consistency,\n(3) dynamic degree,\n(4) motion smoothness,\n(5) text-to-video alignment,\n(6) factual consistency, \n(7) overall score\nfor each dimension, output a number from [1,2,3], in which '1' stands for 'Bad', '2' stands for 'Average', '3' stands for 'Good'.\nHere is an output example: \nvisual quality: 3\nobject consistency: 2 \ndynamic degree: 2\nmotion smoothness: 1\ntext-to-video alignment: 1\nfactual consistency: 2\noverall score: 1\n\nFor this item, the text prompt is for a dog accessories brand a welcome image: brand name: hot doggies ,\nall the frames of video are as follows: \n\n<image> <image> <image> <image> <image> <image> <image> <image> <image> <image> <image> <image> <image> <image> <image> <image> \n"}, {"from": "gpt", "value": "visual quality: 1\n object consistency: 1\n dynamic degree: 3\n motion smoothness: 1\n text-to-video alignment: 1\n factual consistency: 1\n overall score: 1\n"}]}, {"id": "insf_a500010", "images": ["a500010_00.jpg", "a500010_01.jpg", "a500010_02.jpg", "a500010_03.jpg", "a500010_04.jpg", "a500010_05.jpg", "a500010_06.jpg", "a500010_07.jpg", "a500010_08.jpg", "a500010_09.jpg", "a500010_10.jpg", "a500010_11.jpg", "a500010_12.jpg", "a500010_13.jpg", "a500010_14.jpg", "a500010_15.jpg"], "conversations": [{"from": "human", "value": "\nSuppose you are an expert in judging and evaluating the quality of AI-generated videos, \nplease watch the following frames of a given video and see the text prompt for generating the video, \nthen give scores from 7 different dimensions:\n(1) visual quality, \n(2) object consistency,\n(3) dynamic degree,\n(4) motion smoothness,\n(5) text-to-video alignment,\n(6) factual consistency, \n(7) overall score\nfor each dimension, output a number from [1,2,3], in which '1' stands for 'Bad', '2' stands for 'Average', '3' stands for 'Good'.\nHere is an output example: \nvisual quality: 3\nobject consistency: 2 \ndynamic degree: 2\nmotion smoothness: 1\ntext-to-video alignment: 1\nfactual consistency: 2\noverall score: 1\n\nFor this item, the text prompt is a druid in the forest cinematic ,\nall the frames of video are as follows: \n\n<image> <image> <image> <image> <image> <image> <image> <image> <image> <image> <image> <image> <image> <image> <image> <image> \n"}, {"from": "gpt", "value": "visual quality: 1\n object consistency: 1\n dynamic degree: 3\n motion smoothness: 1\n text-to-video alignment: 1\n factual consistency: 1\n overall score: 1\n"}]}, {"id": "lab_3005033", "images": ["3005033_00.jpg", "3005033_01.jpg", "3005033_02.jpg", "3005033_03.jpg", "3005033_04.jpg", "3005033_05.jpg", "3005033_06.jpg", "3005033_07.jpg", "3005033_08.jpg", "3005033_09.jpg", "3005033_10.jpg", "3005033_11.jpg", "3005033_12.jpg", "3005033_13.jpg", "3005033_14.jpg", "3005033_15.jpg"], "conversations": [{"from": "human", "value": "\nSuppose you are an expert in judging and evaluating the quality of AI-generated videos, \nplease watch the following frames of a given video and see the text prompt for generating the video, \nthen give scores from 7 different dimensions:\n(1) visual quality, \n(2) object consistency,\n(3) dynamic degree,\n(4) motion smoothness,\n(5) text-to-video alignment,\n(6) factual consistency, \n(7) overall score\nfor each dimension, output a number from [1,2,3], in which '1' stands for 'Bad', '2' stands for 'Average', '3' stands for 'Good'.\nHere is an output example: \nvisual quality: 3\nobject consistency: 2 \ndynamic degree: 2\nmotion smoothness: 1\ntext-to-video alignment: 1\nfactual consistency: 2\noverall score: 1\n\nFor this item, the text prompt is a robot vacuum traveling through southeast asian countries ,\nall the frames of video are as follows: \n\n<image> <image> <image> <image> <image> <image> <image> <image> <image> <image> <image> <image> <image> <image> <image> <image> \n"}, {"from": "gpt", "value": "visual quality: 3\n object consistency: 2\n dynamic degree: 3\n motion smoothness: 2\n text-to-video alignment: 2\n factual consistency: 3\n overall score: 2\n"}]}, {"id": "lab_7004180", "images": ["7004180_00.jpg", "7004180_01.jpg", "7004180_02.jpg", "7004180_03.jpg", "7004180_04.jpg", "7004180_05.jpg", "7004180_06.jpg", "7004180_07.jpg"], "conversations": [{"from": "human", "value": "\nSuppose you are an expert in judging and evaluating the quality of AI-generated videos, \nplease watch the following frames of a given video and see the text prompt for generating the video, \nthen give scores from 7 different dimensions:\n(1) visual quality, \n(2) object consistency,\n(3) dynamic degree,\n(4) motion smoothness,\n(5) text-to-video alignment,\n(6) factual consistency, \n(7) overall score\nfor each dimension, output a number from [1,2,3], in which '1' stands for 'Bad', '2' stands for 'Average', '3' stands for 'Good'.\nHere is an output example: \nvisual quality: 3\nobject consistency: 2 \ndynamic degree: 2\nmotion smoothness: 1\ntext-to-video alignment: 1\nfactual consistency: 2\noverall score: 1\n\nFor this item, the text prompt is jiaxuan is the most beautiful \uff0celegant\uff0ccharming girl in the world. billions of boys fell in love with her ,\nall the frames of video are as follows: \n\n<image> <image> <image> <image> <image> <image> <image> <image> \n"}, {"from": "gpt", "value": "visual quality: 3\n object consistency: 3\n dynamic degree: 3\n motion smoothness: 3\n text-to-video alignment: 3\n factual consistency: 3\n overall score: 3\n"}]}, {"id": "lab_1006309", "images": ["1006309_00.jpg", "1006309_01.jpg", "1006309_02.jpg", "1006309_03.jpg", "1006309_04.jpg", "1006309_05.jpg", "1006309_06.jpg", "1006309_07.jpg", "1006309_08.jpg", "1006309_09.jpg", "1006309_10.jpg", "1006309_11.jpg", "1006309_12.jpg", "1006309_13.jpg", "1006309_14.jpg", "1006309_15.jpg"], "conversations": [{"from": "human", "value": "\nSuppose you are an expert in judging and evaluating the quality of AI-generated videos, \nplease watch the following frames of a given video and see the text prompt for generating the video, \nthen give scores from 7 different dimensions:\n(1) visual quality, \n(2) object consistency,\n(3) dynamic degree,\n(4) motion smoothness,\n(5) text-to-video alignment,\n(6) factual consistency, \n(7) overall score\nfor each dimension, output a number from [1,2,3], in which '1' stands for 'Bad', '2' stands for 'Average', '3' stands for 'Good'.\nHere is an output example: \nvisual quality: 3\nobject consistency: 2 \ndynamic degree: 2\nmotion smoothness: 1\ntext-to-video alignment: 1\nfactual consistency: 2\noverall score: 1\n\nFor this item, the text prompt is scary halloween skeletons are having a conversation with each other ,\nall the frames of video are as follows: \n\n<image> <image> <image> <image> <image> <image> <image> <image> <image> <image> <image> <image> <image> <image> <image> <image> \n"}, {"from": "gpt", "value": "visual quality: 3\n object consistency: 2\n dynamic degree: 3\n motion smoothness: 2\n text-to-video alignment: 3\n factual consistency: 2\n overall score: 3\n"}]}]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
[
|
2 |
+
{
|
3 |
+
"id": "real_r003679",
|
4 |
+
"images": [
|
5 |
+
"r003679_00.jpg",
|
6 |
+
"r003679_01.jpg",
|
7 |
+
"r003679_02.jpg",
|
8 |
+
"r003679_03.jpg",
|
9 |
+
"r003679_04.jpg",
|
10 |
+
"r003679_05.jpg",
|
11 |
+
"r003679_06.jpg",
|
12 |
+
"r003679_07.jpg",
|
13 |
+
"r003679_08.jpg",
|
14 |
+
"r003679_09.jpg",
|
15 |
+
"r003679_10.jpg",
|
16 |
+
"r003679_11.jpg",
|
17 |
+
"r003679_12.jpg",
|
18 |
+
"r003679_13.jpg",
|
19 |
+
"r003679_14.jpg",
|
20 |
+
"r003679_15.jpg",
|
21 |
+
"r003679_16.jpg",
|
22 |
+
"r003679_17.jpg",
|
23 |
+
"r003679_18.jpg",
|
24 |
+
"r003679_19.jpg",
|
25 |
+
"r003679_20.jpg",
|
26 |
+
"r003679_21.jpg",
|
27 |
+
"r003679_22.jpg",
|
28 |
+
"r003679_23.jpg"
|
29 |
+
],
|
30 |
+
"conversations": [
|
31 |
+
{
|
32 |
+
"from": "human",
|
33 |
+
"value": "\nSuppose you are an expert in judging and evaluating the quality of AI-generated videos, \nplease watch the following frames of a given video and see the text prompt for generating the video, \nthen give scores from 7 different dimensions:\n(1) visual quality, \n(2) object consistency,\n(3) dynamic degree,\n(4) motion smoothness,\n(5) text-to-video alignment,\n(6) factual consistency, \n(7) overall score\nfor each dimension, output a number from [1,2,3], in which '1' stands for 'Bad', '2' stands for 'Average', '3' stands for 'Good'.\nHere is an output example: \nvisual quality: 3\nobject consistency: 2 \ndynamic degree: 2\nmotion smoothness: 1\ntext-to-video alignment: 1\nfactual consistency: 2\noverall score: 1\n\nFor this item, the text prompt is dog rolls over and falls off couch,\nall the frames of video are as follows: \n\n<image> <image> <image> <image> <image> <image> <image> <image> <image> <image> <image> <image> <image> <image> <image> <image> <image> <image> <image> <image> <image> <image> <image> <image> \n"
|
34 |
+
},
|
35 |
+
{
|
36 |
+
"from": "gpt",
|
37 |
+
"value": "visual quality: 3\n object consistency: 3\n dynamic degree: 3\n motion smoothness: 3\n text-to-video alignment: 3\n factual consistency: 3\n overall score: 3\n"
|
38 |
+
}
|
39 |
+
]
|
40 |
+
},
|
41 |
+
{
|
42 |
+
"id": "real_r100916",
|
43 |
+
"images": [
|
44 |
+
"r100916_00.jpg",
|
45 |
+
"r100916_01.jpg",
|
46 |
+
"r100916_02.jpg",
|
47 |
+
"r100916_03.jpg",
|
48 |
+
"r100916_04.jpg",
|
49 |
+
"r100916_05.jpg",
|
50 |
+
"r100916_06.jpg",
|
51 |
+
"r100916_07.jpg",
|
52 |
+
"r100916_08.jpg",
|
53 |
+
"r100916_09.jpg",
|
54 |
+
"r100916_10.jpg",
|
55 |
+
"r100916_11.jpg",
|
56 |
+
"r100916_12.jpg",
|
57 |
+
"r100916_13.jpg",
|
58 |
+
"r100916_14.jpg",
|
59 |
+
"r100916_15.jpg"
|
60 |
+
],
|
61 |
+
"conversations": [
|
62 |
+
{
|
63 |
+
"from": "human",
|
64 |
+
"value": "\nSuppose you are an expert in judging and evaluating the quality of AI-generated videos, \nplease watch the following frames of a given video and see the text prompt for generating the video, \nthen give scores from 7 different dimensions:\n(1) visual quality, \n(2) object consistency,\n(3) dynamic degree,\n(4) motion smoothness,\n(5) text-to-video alignment,\n(6) factual consistency, \n(7) overall score\nfor each dimension, output a number from [1,2,3], in which '1' stands for 'Bad', '2' stands for 'Average', '3' stands for 'Good'.\nHere is an output example: \nvisual quality: 3\nobject consistency: 2 \ndynamic degree: 2\nmotion smoothness: 1\ntext-to-video alignment: 1\nfactual consistency: 2\noverall score: 1\n\nFor this item, the text prompt is A young man is standing in front of a glacier with mountains in the background.,\nall the frames of video are as follows: \n\n<image> <image> <image> <image> <image> <image> <image> <image> <image> <image> <image> <image> <image> <image> <image> <image> \n"
|
65 |
+
},
|
66 |
+
{
|
67 |
+
"from": "gpt",
|
68 |
+
"value": "visual quality: 3\n object consistency: 3\n dynamic degree: 3\n motion smoothness: 3\n text-to-video alignment: 3\n factual consistency: 3\n overall score: 3\n"
|
69 |
+
}
|
70 |
+
]
|
71 |
+
},
|
72 |
+
{
|
73 |
+
"id": "real_r004061",
|
74 |
+
"images": [
|
75 |
+
"r004061_00.jpg",
|
76 |
+
"r004061_01.jpg",
|
77 |
+
"r004061_02.jpg",
|
78 |
+
"r004061_03.jpg",
|
79 |
+
"r004061_04.jpg",
|
80 |
+
"r004061_05.jpg",
|
81 |
+
"r004061_06.jpg",
|
82 |
+
"r004061_07.jpg",
|
83 |
+
"r004061_08.jpg",
|
84 |
+
"r004061_09.jpg",
|
85 |
+
"r004061_10.jpg",
|
86 |
+
"r004061_11.jpg",
|
87 |
+
"r004061_12.jpg",
|
88 |
+
"r004061_13.jpg",
|
89 |
+
"r004061_14.jpg",
|
90 |
+
"r004061_15.jpg",
|
91 |
+
"r004061_16.jpg",
|
92 |
+
"r004061_17.jpg",
|
93 |
+
"r004061_18.jpg",
|
94 |
+
"r004061_19.jpg",
|
95 |
+
"r004061_20.jpg",
|
96 |
+
"r004061_21.jpg",
|
97 |
+
"r004061_22.jpg",
|
98 |
+
"r004061_23.jpg"
|
99 |
+
],
|
100 |
+
"conversations": [
|
101 |
+
{
|
102 |
+
"from": "human",
|
103 |
+
"value": "\nSuppose you are an expert in judging and evaluating the quality of AI-generated videos, \nplease watch the following frames of a given video and see the text prompt for generating the video, \nthen give scores from 7 different dimensions:\n(1) visual quality, \n(2) object consistency,\n(3) dynamic degree,\n(4) motion smoothness,\n(5) text-to-video alignment,\n(6) factual consistency, \n(7) overall score\nfor each dimension, output a number from [1,2,3], in which '1' stands for 'Bad', '2' stands for 'Average', '3' stands for 'Good'.\nHere is an output example: \nvisual quality: 3\nobject consistency: 2 \ndynamic degree: 2\nmotion smoothness: 1\ntext-to-video alignment: 1\nfactual consistency: 2\noverall score: 1\n\nFor this item, the text prompt is a man wearing a striped blanket over his backpack runs behind two other men.,\nall the frames of video are as follows: \n\n<image> <image> <image> <image> <image> <image> <image> <image> <image> <image> <image> <image> <image> <image> <image> <image> <image> <image> <image> <image> <image> <image> <image> <image> \n"
|
104 |
+
},
|
105 |
+
{
|
106 |
+
"from": "gpt",
|
107 |
+
"value": "visual quality: 3\n object consistency: 3\n dynamic degree: 3\n motion smoothness: 3\n text-to-video alignment: 3\n factual consistency: 3\n overall score: 3\n"
|
108 |
+
}
|
109 |
+
]
|
110 |
+
},
|
111 |
+
{
|
112 |
+
"id": "worsen_gen_b404675",
|
113 |
+
"images": [
|
114 |
+
"b404675_00.jpg",
|
115 |
+
"b404675_01.jpg",
|
116 |
+
"b404675_02.jpg",
|
117 |
+
"b404675_03.jpg",
|
118 |
+
"b404675_04.jpg",
|
119 |
+
"b404675_05.jpg",
|
120 |
+
"b404675_06.jpg",
|
121 |
+
"b404675_07.jpg",
|
122 |
+
"b404675_08.jpg",
|
123 |
+
"b404675_09.jpg",
|
124 |
+
"b404675_10.jpg",
|
125 |
+
"b404675_11.jpg",
|
126 |
+
"b404675_12.jpg",
|
127 |
+
"b404675_13.jpg",
|
128 |
+
"b404675_14.jpg",
|
129 |
+
"b404675_15.jpg"
|
130 |
+
],
|
131 |
+
"conversations": [
|
132 |
+
{
|
133 |
+
"from": "human",
|
134 |
+
"value": "\nSuppose you are an expert in judging and evaluating the quality of AI-generated videos, \nplease watch the following frames of a given video and see the text prompt for generating the video, \nthen give scores from 7 different dimensions:\n(1) visual quality, \n(2) object consistency,\n(3) dynamic degree,\n(4) motion smoothness,\n(5) text-to-video alignment,\n(6) factual consistency, \n(7) overall score\nfor each dimension, output a number from [1,2,3], in which '1' stands for 'Bad', '2' stands for 'Average', '3' stands for 'Good'.\nHere is an output example: \nvisual quality: 3\nobject consistency: 2 \ndynamic degree: 2\nmotion smoothness: 1\ntext-to-video alignment: 1\nfactual consistency: 2\noverall score: 1\n\nFor this item, the text prompt is After a few years, Ghulam Muhammad became the owner of his business ,\nall the frames of video are as follows: \n\n<image> <image> <image> <image> <image> <image> <image> <image> <image> <image> <image> <image> <image> <image> <image> <image> \n"
|
135 |
+
},
|
136 |
+
{
|
137 |
+
"from": "gpt",
|
138 |
+
"value": "visual quality: 1\n object consistency: 1\n dynamic degree: 3\n motion smoothness: 3\n text-to-video alignment: 2\n factual consistency: 3\n overall score: 1\n"
|
139 |
+
}
|
140 |
+
]
|
141 |
+
},
|
142 |
+
{
|
143 |
+
"id": "worsen_gen_b402727",
|
144 |
+
"images": [
|
145 |
+
"b402727_00.jpg",
|
146 |
+
"b402727_01.jpg",
|
147 |
+
"b402727_02.jpg",
|
148 |
+
"b402727_03.jpg",
|
149 |
+
"b402727_04.jpg",
|
150 |
+
"b402727_05.jpg",
|
151 |
+
"b402727_06.jpg",
|
152 |
+
"b402727_07.jpg",
|
153 |
+
"b402727_08.jpg",
|
154 |
+
"b402727_09.jpg",
|
155 |
+
"b402727_10.jpg",
|
156 |
+
"b402727_11.jpg",
|
157 |
+
"b402727_12.jpg",
|
158 |
+
"b402727_13.jpg",
|
159 |
+
"b402727_14.jpg",
|
160 |
+
"b402727_15.jpg"
|
161 |
+
],
|
162 |
+
"conversations": [
|
163 |
+
{
|
164 |
+
"from": "human",
|
165 |
+
"value": "\nSuppose you are an expert in judging and evaluating the quality of AI-generated videos, \nplease watch the following frames of a given video and see the text prompt for generating the video, \nthen give scores from 7 different dimensions:\n(1) visual quality, \n(2) object consistency,\n(3) dynamic degree,\n(4) motion smoothness,\n(5) text-to-video alignment,\n(6) factual consistency, \n(7) overall score\nfor each dimension, output a number from [1,2,3], in which '1' stands for 'Bad', '2' stands for 'Average', '3' stands for 'Good'.\nHere is an output example: \nvisual quality: 3\nobject consistency: 2 \ndynamic degree: 2\nmotion smoothness: 1\ntext-to-video alignment: 1\nfactual consistency: 2\noverall score: 1\n\nFor this item, the text prompt is view down to the path in the field, moving forward, night, dark. ,\nall the frames of video are as follows: \n\n<image> <image> <image> <image> <image> <image> <image> <image> <image> <image> <image> <image> <image> <image> <image> <image> \n"
|
166 |
+
},
|
167 |
+
{
|
168 |
+
"from": "gpt",
|
169 |
+
"value": "visual quality: 1\n object consistency: 1\n dynamic degree: 3\n motion smoothness: 3\n text-to-video alignment: 3\n factual consistency: 3\n overall score: 1\n"
|
170 |
+
}
|
171 |
+
]
|
172 |
+
},
|
173 |
+
{
|
174 |
+
"id": "worsen_gen_b304986",
|
175 |
+
"images": [
|
176 |
+
"b304986_00.jpg",
|
177 |
+
"b304986_01.jpg",
|
178 |
+
"b304986_02.jpg",
|
179 |
+
"b304986_03.jpg",
|
180 |
+
"b304986_04.jpg",
|
181 |
+
"b304986_05.jpg",
|
182 |
+
"b304986_06.jpg",
|
183 |
+
"b304986_07.jpg",
|
184 |
+
"b304986_08.jpg",
|
185 |
+
"b304986_09.jpg",
|
186 |
+
"b304986_10.jpg",
|
187 |
+
"b304986_11.jpg",
|
188 |
+
"b304986_12.jpg",
|
189 |
+
"b304986_13.jpg",
|
190 |
+
"b304986_14.jpg",
|
191 |
+
"b304986_15.jpg"
|
192 |
+
],
|
193 |
+
"conversations": [
|
194 |
+
{
|
195 |
+
"from": "human",
|
196 |
+
"value": "\nSuppose you are an expert in judging and evaluating the quality of AI-generated videos, \nplease watch the following frames of a given video and see the text prompt for generating the video, \nthen give scores from 7 different dimensions:\n(1) visual quality, \n(2) object consistency,\n(3) dynamic degree,\n(4) motion smoothness,\n(5) text-to-video alignment,\n(6) factual consistency, \n(7) overall score\nfor each dimension, output a number from [1,2,3], in which '1' stands for 'Bad', '2' stands for 'Average', '3' stands for 'Good'.\nHere is an output example: \nvisual quality: 3\nobject consistency: 2 \ndynamic degree: 2\nmotion smoothness: 1\ntext-to-video alignment: 1\nfactual consistency: 2\noverall score: 1\n\nFor this item, the text prompt is Old Philippino man, close up, subtle smile, teats of joy, in old wood room ,\nall the frames of video are as follows: \n\n<image> <image> <image> <image> <image> <image> <image> <image> <image> <image> <image> <image> <image> <image> <image> <image> \n"
|
197 |
+
},
|
198 |
+
{
|
199 |
+
"from": "gpt",
|
200 |
+
"value": "visual quality: 3\n object consistency: 1\n dynamic degree: 3\n motion smoothness: 1\n text-to-video alignment: 2\n factual consistency: 3\n overall score: 1\n"
|
201 |
+
}
|
202 |
+
]
|
203 |
+
},
|
204 |
+
{
|
205 |
+
"id": "static_d500937",
|
206 |
+
"images": [
|
207 |
+
"d500937_00.jpg",
|
208 |
+
"d500937_01.jpg",
|
209 |
+
"d500937_02.jpg",
|
210 |
+
"d500937_03.jpg",
|
211 |
+
"d500937_04.jpg",
|
212 |
+
"d500937_05.jpg",
|
213 |
+
"d500937_06.jpg",
|
214 |
+
"d500937_07.jpg",
|
215 |
+
"d500937_08.jpg",
|
216 |
+
"d500937_09.jpg",
|
217 |
+
"d500937_10.jpg",
|
218 |
+
"d500937_11.jpg",
|
219 |
+
"d500937_12.jpg",
|
220 |
+
"d500937_13.jpg",
|
221 |
+
"d500937_14.jpg",
|
222 |
+
"d500937_15.jpg"
|
223 |
+
],
|
224 |
+
"conversations": [
|
225 |
+
{
|
226 |
+
"from": "human",
|
227 |
+
"value": "\nSuppose you are an expert in judging and evaluating the quality of AI-generated videos, \nplease watch the following frames of a given video and see the text prompt for generating the video, \nthen give scores from 7 different dimensions:\n(1) visual quality, \n(2) object consistency,\n(3) dynamic degree,\n(4) motion smoothness,\n(5) text-to-video alignment,\n(6) factual consistency, \n(7) overall score\nfor each dimension, output a number from [1,2,3], in which '1' stands for 'Bad', '2' stands for 'Average', '3' stands for 'Good'.\nHere is an output example: \nvisual quality: 3\nobject consistency: 2 \ndynamic degree: 2\nmotion smoothness: 1\ntext-to-video alignment: 1\nfactual consistency: 2\noverall score: 1\n\nFor this item, the text prompt is write y in the garden with flowers ,\nall the frames of video are as follows: \n\n<image> <image> <image> <image> <image> <image> <image> <image> <image> <image> <image> <image> <image> <image> <image> <image> \n"
|
228 |
+
},
|
229 |
+
{
|
230 |
+
"from": "gpt",
|
231 |
+
"value": "visual quality: 1\n object consistency: 3\n dynamic degree: 1\n motion smoothness: 1\n text-to-video alignment: 1\n factual consistency: 1\n overall score: 1\n"
|
232 |
+
}
|
233 |
+
]
|
234 |
+
},
|
235 |
+
{
|
236 |
+
"id": "static_d500506",
|
237 |
+
"images": [
|
238 |
+
"d500506_00.jpg",
|
239 |
+
"d500506_01.jpg",
|
240 |
+
"d500506_02.jpg",
|
241 |
+
"d500506_03.jpg",
|
242 |
+
"d500506_04.jpg",
|
243 |
+
"d500506_05.jpg",
|
244 |
+
"d500506_06.jpg",
|
245 |
+
"d500506_07.jpg",
|
246 |
+
"d500506_08.jpg",
|
247 |
+
"d500506_09.jpg",
|
248 |
+
"d500506_10.jpg",
|
249 |
+
"d500506_11.jpg",
|
250 |
+
"d500506_12.jpg",
|
251 |
+
"d500506_13.jpg",
|
252 |
+
"d500506_14.jpg",
|
253 |
+
"d500506_15.jpg"
|
254 |
+
],
|
255 |
+
"conversations": [
|
256 |
+
{
|
257 |
+
"from": "human",
|
258 |
+
"value": "\nSuppose you are an expert in judging and evaluating the quality of AI-generated videos, \nplease watch the following frames of a given video and see the text prompt for generating the video, \nthen give scores from 7 different dimensions:\n(1) visual quality, \n(2) object consistency,\n(3) dynamic degree,\n(4) motion smoothness,\n(5) text-to-video alignment,\n(6) factual consistency, \n(7) overall score\nfor each dimension, output a number from [1,2,3], in which '1' stands for 'Bad', '2' stands for 'Average', '3' stands for 'Good'.\nHere is an output example: \nvisual quality: 3\nobject consistency: 2 \ndynamic degree: 2\nmotion smoothness: 1\ntext-to-video alignment: 1\nfactual consistency: 2\noverall score: 1\n\nFor this item, the text prompt is a boy paiting a beautiful landscape ,\nall the frames of video are as follows: \n\n<image> <image> <image> <image> <image> <image> <image> <image> <image> <image> <image> <image> <image> <image> <image> <image> \n"
|
259 |
+
},
|
260 |
+
{
|
261 |
+
"from": "gpt",
|
262 |
+
"value": "visual quality: 1\n object consistency: 3\n dynamic degree: 1\n motion smoothness: 1\n text-to-video alignment: 1\n factual consistency: 1\n overall score: 1\n"
|
263 |
+
}
|
264 |
+
]
|
265 |
+
},
|
266 |
+
{
|
267 |
+
"id": "static_d401950",
|
268 |
+
"images": [
|
269 |
+
"d401950_00.jpg",
|
270 |
+
"d401950_01.jpg",
|
271 |
+
"d401950_02.jpg",
|
272 |
+
"d401950_03.jpg",
|
273 |
+
"d401950_04.jpg",
|
274 |
+
"d401950_05.jpg",
|
275 |
+
"d401950_06.jpg",
|
276 |
+
"d401950_07.jpg",
|
277 |
+
"d401950_08.jpg",
|
278 |
+
"d401950_09.jpg",
|
279 |
+
"d401950_10.jpg",
|
280 |
+
"d401950_11.jpg",
|
281 |
+
"d401950_12.jpg",
|
282 |
+
"d401950_13.jpg",
|
283 |
+
"d401950_14.jpg",
|
284 |
+
"d401950_15.jpg"
|
285 |
+
],
|
286 |
+
"conversations": [
|
287 |
+
{
|
288 |
+
"from": "human",
|
289 |
+
"value": "\nSuppose you are an expert in judging and evaluating the quality of AI-generated videos, \nplease watch the following frames of a given video and see the text prompt for generating the video, \nthen give scores from 7 different dimensions:\n(1) visual quality, \n(2) object consistency,\n(3) dynamic degree,\n(4) motion smoothness,\n(5) text-to-video alignment,\n(6) factual consistency, \n(7) overall score\nfor each dimension, output a number from [1,2,3], in which '1' stands for 'Bad', '2' stands for 'Average', '3' stands for 'Good'.\nHere is an output example: \nvisual quality: 3\nobject consistency: 2 \ndynamic degree: 2\nmotion smoothness: 1\ntext-to-video alignment: 1\nfactual consistency: 2\noverall score: 1\n\nFor this item, the text prompt is sneaker with exclusive design worth 1 billion ,\nall the frames of video are as follows: \n\n<image> <image> <image> <image> <image> <image> <image> <image> <image> <image> <image> <image> <image> <image> <image> <image> \n"
|
290 |
+
},
|
291 |
+
{
|
292 |
+
"from": "gpt",
|
293 |
+
"value": "visual quality: 1\n object consistency: 3\n dynamic degree: 1\n motion smoothness: 1\n text-to-video alignment: 1\n factual consistency: 1\n overall score: 1\n"
|
294 |
+
}
|
295 |
+
]
|
296 |
+
},
|
297 |
+
{
|
298 |
+
"id": "insf_a500251",
|
299 |
+
"images": [
|
300 |
+
"a500251_00.jpg",
|
301 |
+
"a500251_01.jpg",
|
302 |
+
"a500251_02.jpg",
|
303 |
+
"a500251_03.jpg",
|
304 |
+
"a500251_04.jpg",
|
305 |
+
"a500251_05.jpg",
|
306 |
+
"a500251_06.jpg",
|
307 |
+
"a500251_07.jpg",
|
308 |
+
"a500251_08.jpg",
|
309 |
+
"a500251_09.jpg",
|
310 |
+
"a500251_10.jpg",
|
311 |
+
"a500251_11.jpg",
|
312 |
+
"a500251_12.jpg",
|
313 |
+
"a500251_13.jpg",
|
314 |
+
"a500251_14.jpg",
|
315 |
+
"a500251_15.jpg"
|
316 |
+
],
|
317 |
+
"conversations": [
|
318 |
+
{
|
319 |
+
"from": "human",
|
320 |
+
"value": "\nSuppose you are an expert in judging and evaluating the quality of AI-generated videos, \nplease watch the following frames of a given video and see the text prompt for generating the video, \nthen give scores from 7 different dimensions:\n(1) visual quality, \n(2) object consistency,\n(3) dynamic degree,\n(4) motion smoothness,\n(5) text-to-video alignment,\n(6) factual consistency, \n(7) overall score\nfor each dimension, output a number from [1,2,3], in which '1' stands for 'Bad', '2' stands for 'Average', '3' stands for 'Good'.\nHere is an output example: \nvisual quality: 3\nobject consistency: 2 \ndynamic degree: 2\nmotion smoothness: 1\ntext-to-video alignment: 1\nfactual consistency: 2\noverall score: 1\n\nFor this item, the text prompt is :sci fi war in the moon ,\nall the frames of video are as follows: \n\n<image> <image> <image> <image> <image> <image> <image> <image> <image> <image> <image> <image> <image> <image> <image> <image> \n"
|
321 |
+
},
|
322 |
+
{
|
323 |
+
"from": "gpt",
|
324 |
+
"value": "visual quality: 1\n object consistency: 1\n dynamic degree: 3\n motion smoothness: 1\n text-to-video alignment: 1\n factual consistency: 1\n overall score: 1\n"
|
325 |
+
}
|
326 |
+
]
|
327 |
+
},
|
328 |
+
{
|
329 |
+
"id": "insf_a400480",
|
330 |
+
"images": [
|
331 |
+
"a400480_00.jpg",
|
332 |
+
"a400480_01.jpg",
|
333 |
+
"a400480_02.jpg",
|
334 |
+
"a400480_03.jpg",
|
335 |
+
"a400480_04.jpg",
|
336 |
+
"a400480_05.jpg",
|
337 |
+
"a400480_06.jpg",
|
338 |
+
"a400480_07.jpg",
|
339 |
+
"a400480_08.jpg",
|
340 |
+
"a400480_09.jpg",
|
341 |
+
"a400480_10.jpg",
|
342 |
+
"a400480_11.jpg",
|
343 |
+
"a400480_12.jpg",
|
344 |
+
"a400480_13.jpg",
|
345 |
+
"a400480_14.jpg",
|
346 |
+
"a400480_15.jpg"
|
347 |
+
],
|
348 |
+
"conversations": [
|
349 |
+
{
|
350 |
+
"from": "human",
|
351 |
+
"value": "\nSuppose you are an expert in judging and evaluating the quality of AI-generated videos, \nplease watch the following frames of a given video and see the text prompt for generating the video, \nthen give scores from 7 different dimensions:\n(1) visual quality, \n(2) object consistency,\n(3) dynamic degree,\n(4) motion smoothness,\n(5) text-to-video alignment,\n(6) factual consistency, \n(7) overall score\nfor each dimension, output a number from [1,2,3], in which '1' stands for 'Bad', '2' stands for 'Average', '3' stands for 'Good'.\nHere is an output example: \nvisual quality: 3\nobject consistency: 2 \ndynamic degree: 2\nmotion smoothness: 1\ntext-to-video alignment: 1\nfactual consistency: 2\noverall score: 1\n\nFor this item, the text prompt is for a dog accessories brand a welcome image: brand name: hot doggies ,\nall the frames of video are as follows: \n\n<image> <image> <image> <image> <image> <image> <image> <image> <image> <image> <image> <image> <image> <image> <image> <image> \n"
|
352 |
+
},
|
353 |
+
{
|
354 |
+
"from": "gpt",
|
355 |
+
"value": "visual quality: 1\n object consistency: 1\n dynamic degree: 3\n motion smoothness: 1\n text-to-video alignment: 1\n factual consistency: 1\n overall score: 1\n"
|
356 |
+
}
|
357 |
+
]
|
358 |
+
},
|
359 |
+
{
|
360 |
+
"id": "insf_a500010",
|
361 |
+
"images": [
|
362 |
+
"a500010_00.jpg",
|
363 |
+
"a500010_01.jpg",
|
364 |
+
"a500010_02.jpg",
|
365 |
+
"a500010_03.jpg",
|
366 |
+
"a500010_04.jpg",
|
367 |
+
"a500010_05.jpg",
|
368 |
+
"a500010_06.jpg",
|
369 |
+
"a500010_07.jpg",
|
370 |
+
"a500010_08.jpg",
|
371 |
+
"a500010_09.jpg",
|
372 |
+
"a500010_10.jpg",
|
373 |
+
"a500010_11.jpg",
|
374 |
+
"a500010_12.jpg",
|
375 |
+
"a500010_13.jpg",
|
376 |
+
"a500010_14.jpg",
|
377 |
+
"a500010_15.jpg"
|
378 |
+
],
|
379 |
+
"conversations": [
|
380 |
+
{
|
381 |
+
"from": "human",
|
382 |
+
"value": "\nSuppose you are an expert in judging and evaluating the quality of AI-generated videos, \nplease watch the following frames of a given video and see the text prompt for generating the video, \nthen give scores from 7 different dimensions:\n(1) visual quality, \n(2) object consistency,\n(3) dynamic degree,\n(4) motion smoothness,\n(5) text-to-video alignment,\n(6) factual consistency, \n(7) overall score\nfor each dimension, output a number from [1,2,3], in which '1' stands for 'Bad', '2' stands for 'Average', '3' stands for 'Good'.\nHere is an output example: \nvisual quality: 3\nobject consistency: 2 \ndynamic degree: 2\nmotion smoothness: 1\ntext-to-video alignment: 1\nfactual consistency: 2\noverall score: 1\n\nFor this item, the text prompt is a druid in the forest cinematic ,\nall the frames of video are as follows: \n\n<image> <image> <image> <image> <image> <image> <image> <image> <image> <image> <image> <image> <image> <image> <image> <image> \n"
|
383 |
+
},
|
384 |
+
{
|
385 |
+
"from": "gpt",
|
386 |
+
"value": "visual quality: 1\n object consistency: 1\n dynamic degree: 3\n motion smoothness: 1\n text-to-video alignment: 1\n factual consistency: 1\n overall score: 1\n"
|
387 |
+
}
|
388 |
+
]
|
389 |
+
},
|
390 |
+
{
|
391 |
+
"id": "lab_3005033",
|
392 |
+
"images": [
|
393 |
+
"3005033_00.jpg",
|
394 |
+
"3005033_01.jpg",
|
395 |
+
"3005033_02.jpg",
|
396 |
+
"3005033_03.jpg",
|
397 |
+
"3005033_04.jpg",
|
398 |
+
"3005033_05.jpg",
|
399 |
+
"3005033_06.jpg",
|
400 |
+
"3005033_07.jpg",
|
401 |
+
"3005033_08.jpg",
|
402 |
+
"3005033_09.jpg",
|
403 |
+
"3005033_10.jpg",
|
404 |
+
"3005033_11.jpg",
|
405 |
+
"3005033_12.jpg",
|
406 |
+
"3005033_13.jpg",
|
407 |
+
"3005033_14.jpg",
|
408 |
+
"3005033_15.jpg"
|
409 |
+
],
|
410 |
+
"conversations": [
|
411 |
+
{
|
412 |
+
"from": "human",
|
413 |
+
"value": "\nSuppose you are an expert in judging and evaluating the quality of AI-generated videos, \nplease watch the following frames of a given video and see the text prompt for generating the video, \nthen give scores from 7 different dimensions:\n(1) visual quality, \n(2) object consistency,\n(3) dynamic degree,\n(4) motion smoothness,\n(5) text-to-video alignment,\n(6) factual consistency, \n(7) overall score\nfor each dimension, output a number from [1,2,3], in which '1' stands for 'Bad', '2' stands for 'Average', '3' stands for 'Good'.\nHere is an output example: \nvisual quality: 3\nobject consistency: 2 \ndynamic degree: 2\nmotion smoothness: 1\ntext-to-video alignment: 1\nfactual consistency: 2\noverall score: 1\n\nFor this item, the text prompt is a robot vacuum traveling through southeast asian countries ,\nall the frames of video are as follows: \n\n<image> <image> <image> <image> <image> <image> <image> <image> <image> <image> <image> <image> <image> <image> <image> <image> \n"
|
414 |
+
},
|
415 |
+
{
|
416 |
+
"from": "gpt",
|
417 |
+
"value": "visual quality: 3\n object consistency: 2\n dynamic degree: 3\n motion smoothness: 2\n text-to-video alignment: 2\n factual consistency: 3\n overall score: 2\n"
|
418 |
+
}
|
419 |
+
]
|
420 |
+
},
|
421 |
+
{
|
422 |
+
"id": "lab_7004180",
|
423 |
+
"images": [
|
424 |
+
"7004180_00.jpg",
|
425 |
+
"7004180_01.jpg",
|
426 |
+
"7004180_02.jpg",
|
427 |
+
"7004180_03.jpg",
|
428 |
+
"7004180_04.jpg",
|
429 |
+
"7004180_05.jpg",
|
430 |
+
"7004180_06.jpg",
|
431 |
+
"7004180_07.jpg"
|
432 |
+
],
|
433 |
+
"conversations": [
|
434 |
+
{
|
435 |
+
"from": "human",
|
436 |
+
"value": "\nSuppose you are an expert in judging and evaluating the quality of AI-generated videos, \nplease watch the following frames of a given video and see the text prompt for generating the video, \nthen give scores from 7 different dimensions:\n(1) visual quality, \n(2) object consistency,\n(3) dynamic degree,\n(4) motion smoothness,\n(5) text-to-video alignment,\n(6) factual consistency, \n(7) overall score\nfor each dimension, output a number from [1,2,3], in which '1' stands for 'Bad', '2' stands for 'Average', '3' stands for 'Good'.\nHere is an output example: \nvisual quality: 3\nobject consistency: 2 \ndynamic degree: 2\nmotion smoothness: 1\ntext-to-video alignment: 1\nfactual consistency: 2\noverall score: 1\n\nFor this item, the text prompt is jiaxuan is the most beautiful \uff0celegant\uff0ccharming girl in the world. billions of boys fell in love with her ,\nall the frames of video are as follows: \n\n<image> <image> <image> <image> <image> <image> <image> <image> \n"
|
437 |
+
},
|
438 |
+
{
|
439 |
+
"from": "gpt",
|
440 |
+
"value": "visual quality: 3\n object consistency: 3\n dynamic degree: 3\n motion smoothness: 3\n text-to-video alignment: 3\n factual consistency: 3\n overall score: 3\n"
|
441 |
+
}
|
442 |
+
]
|
443 |
+
},
|
444 |
+
{
|
445 |
+
"id": "lab_1006309",
|
446 |
+
"images": [
|
447 |
+
"1006309_00.jpg",
|
448 |
+
"1006309_01.jpg",
|
449 |
+
"1006309_02.jpg",
|
450 |
+
"1006309_03.jpg",
|
451 |
+
"1006309_04.jpg",
|
452 |
+
"1006309_05.jpg",
|
453 |
+
"1006309_06.jpg",
|
454 |
+
"1006309_07.jpg",
|
455 |
+
"1006309_08.jpg",
|
456 |
+
"1006309_09.jpg",
|
457 |
+
"1006309_10.jpg",
|
458 |
+
"1006309_11.jpg",
|
459 |
+
"1006309_12.jpg",
|
460 |
+
"1006309_13.jpg",
|
461 |
+
"1006309_14.jpg",
|
462 |
+
"1006309_15.jpg"
|
463 |
+
],
|
464 |
+
"conversations": [
|
465 |
+
{
|
466 |
+
"from": "human",
|
467 |
+
"value": "\nSuppose you are an expert in judging and evaluating the quality of AI-generated videos, \nplease watch the following frames of a given video and see the text prompt for generating the video, \nthen give scores from 7 different dimensions:\n(1) visual quality, \n(2) object consistency,\n(3) dynamic degree,\n(4) motion smoothness,\n(5) text-to-video alignment,\n(6) factual consistency, \n(7) overall score\nfor each dimension, output a number from [1,2,3], in which '1' stands for 'Bad', '2' stands for 'Average', '3' stands for 'Good'.\nHere is an output example: \nvisual quality: 3\nobject consistency: 2 \ndynamic degree: 2\nmotion smoothness: 1\ntext-to-video alignment: 1\nfactual consistency: 2\noverall score: 1\n\nFor this item, the text prompt is scary halloween skeletons are having a conversation with each other ,\nall the frames of video are as follows: \n\n<image> <image> <image> <image> <image> <image> <image> <image> <image> <image> <image> <image> <image> <image> <image> <image> \n"
|
468 |
+
},
|
469 |
+
{
|
470 |
+
"from": "gpt",
|
471 |
+
"value": "visual quality: 3\n object consistency: 2\n dynamic degree: 3\n motion smoothness: 2\n text-to-video alignment: 3\n factual consistency: 2\n overall score: 3\n"
|
472 |
+
}
|
473 |
+
]
|
474 |
+
}
|
475 |
+
]
|
examples/b304986.mp4
ADDED
Binary file (53.6 kB). View file
|
|
examples/b402727.mp4
ADDED
Binary file (43.7 kB). View file
|
|
examples/b404675.mp4
ADDED
Binary file (218 kB). View file
|
|
examples/d401950.mp4
ADDED
Binary file (13 kB). View file
|
|
examples/d500506.mp4
ADDED
Binary file (53.2 kB). View file
|
|
examples/d500937.mp4
ADDED
Binary file (38.6 kB). View file
|
|
examples/data_subset.json
DELETED
@@ -1,368 +0,0 @@
|
|
1 |
-
[
|
2 |
-
{
|
3 |
-
"id": "r003679",
|
4 |
-
"images": [
|
5 |
-
"r003679_00.jpg",
|
6 |
-
"r003679_01.jpg",
|
7 |
-
"r003679_02.jpg",
|
8 |
-
"r003679_03.jpg",
|
9 |
-
"r003679_04.jpg",
|
10 |
-
"r003679_05.jpg",
|
11 |
-
"r003679_06.jpg",
|
12 |
-
"r003679_07.jpg",
|
13 |
-
"r003679_08.jpg",
|
14 |
-
"r003679_09.jpg",
|
15 |
-
"r003679_10.jpg",
|
16 |
-
"r003679_11.jpg",
|
17 |
-
"r003679_12.jpg",
|
18 |
-
"r003679_13.jpg",
|
19 |
-
"r003679_14.jpg",
|
20 |
-
"r003679_15.jpg",
|
21 |
-
"r003679_16.jpg",
|
22 |
-
"r003679_17.jpg",
|
23 |
-
"r003679_18.jpg",
|
24 |
-
"r003679_19.jpg",
|
25 |
-
"r003679_20.jpg",
|
26 |
-
"r003679_21.jpg",
|
27 |
-
"r003679_22.jpg",
|
28 |
-
"r003679_23.jpg"
|
29 |
-
],
|
30 |
-
"conversations": [
|
31 |
-
{
|
32 |
-
"from": "human",
|
33 |
-
"value": "\nSuppose you are an expert in judging and evaluating the quality of AI-generated videos, \nplease watch the following frames of a given video and see the text prompt for generating the video, \nthen give scores from 7 different dimensions:\n(1) visual quality, \n(2) object consistency,\n(3) dynamic degree,\n(4) motion smoothness,\n(5) text-to-video alignment,\n(6) factual consistency, \n(7) overall score\nfor each dimension, output a number from [1,2,3], in which '1' stands for 'Bad', '2' stands for 'Average', '3' stands for 'Good'.\nHere is an output example: \nvisual quality: 3\nobject consistency: 2 \ndynamic degree: 2\nmotion smoothness: 1\ntext-to-video alignment: 1\nfactual consistency: 2\noverall score: 1\n\nFor this item, the text prompt is dog rolls over and falls off couch,\nall the frames of video are as follows: \n\n<image> <image> <image> <image> <image> <image> <image> <image> <image> <image> <image> <image> <image> <image> <image> <image> <image> <image> <image> <image> <image> <image> <image> <image> \n"
|
34 |
-
},
|
35 |
-
{
|
36 |
-
"from": "gpt",
|
37 |
-
"value": "visual quality: 3\n object consistency: 3\n dynamic degree: 3\n motion smoothness: 3\n text-to-video alignment: 3\n factual consistency: 3\n overall score: 3\n"
|
38 |
-
}
|
39 |
-
]
|
40 |
-
},
|
41 |
-
{
|
42 |
-
"id": "r100916",
|
43 |
-
"images": [
|
44 |
-
"r100916_00.jpg",
|
45 |
-
"r100916_01.jpg",
|
46 |
-
"r100916_02.jpg",
|
47 |
-
"r100916_03.jpg",
|
48 |
-
"r100916_04.jpg",
|
49 |
-
"r100916_05.jpg",
|
50 |
-
"r100916_06.jpg",
|
51 |
-
"r100916_07.jpg",
|
52 |
-
"r100916_08.jpg",
|
53 |
-
"r100916_09.jpg",
|
54 |
-
"r100916_10.jpg",
|
55 |
-
"r100916_11.jpg",
|
56 |
-
"r100916_12.jpg",
|
57 |
-
"r100916_13.jpg",
|
58 |
-
"r100916_14.jpg",
|
59 |
-
"r100916_15.jpg"
|
60 |
-
],
|
61 |
-
"conversations": [
|
62 |
-
{
|
63 |
-
"from": "human",
|
64 |
-
"value": "\nSuppose you are an expert in judging and evaluating the quality of AI-generated videos, \nplease watch the following frames of a given video and see the text prompt for generating the video, \nthen give scores from 7 different dimensions:\n(1) visual quality, \n(2) object consistency,\n(3) dynamic degree,\n(4) motion smoothness,\n(5) text-to-video alignment,\n(6) factual consistency, \n(7) overall score\nfor each dimension, output a number from [1,2,3], in which '1' stands for 'Bad', '2' stands for 'Average', '3' stands for 'Good'.\nHere is an output example: \nvisual quality: 3\nobject consistency: 2 \ndynamic degree: 2\nmotion smoothness: 1\ntext-to-video alignment: 1\nfactual consistency: 2\noverall score: 1\n\nFor this item, the text prompt is A young man is standing in front of a glacier with mountains in the background.,\nall the frames of video are as follows: \n\n<image> <image> <image> <image> <image> <image> <image> <image> <image> <image> <image> <image> <image> <image> <image> <image> \n"
|
65 |
-
},
|
66 |
-
{
|
67 |
-
"from": "gpt",
|
68 |
-
"value": "visual quality: 3\n object consistency: 3\n dynamic degree: 3\n motion smoothness: 3\n text-to-video alignment: 3\n factual consistency: 3\n overall score: 3\n"
|
69 |
-
}
|
70 |
-
]
|
71 |
-
},
|
72 |
-
{
|
73 |
-
"id": "r004061",
|
74 |
-
"images": [
|
75 |
-
"r004061_00.jpg",
|
76 |
-
"r004061_01.jpg",
|
77 |
-
"r004061_02.jpg",
|
78 |
-
"r004061_03.jpg",
|
79 |
-
"r004061_04.jpg",
|
80 |
-
"r004061_05.jpg",
|
81 |
-
"r004061_06.jpg",
|
82 |
-
"r004061_07.jpg",
|
83 |
-
"r004061_08.jpg",
|
84 |
-
"r004061_09.jpg",
|
85 |
-
"r004061_10.jpg",
|
86 |
-
"r004061_11.jpg",
|
87 |
-
"r004061_12.jpg",
|
88 |
-
"r004061_13.jpg",
|
89 |
-
"r004061_14.jpg",
|
90 |
-
"r004061_15.jpg",
|
91 |
-
"r004061_16.jpg",
|
92 |
-
"r004061_17.jpg",
|
93 |
-
"r004061_18.jpg",
|
94 |
-
"r004061_19.jpg",
|
95 |
-
"r004061_20.jpg",
|
96 |
-
"r004061_21.jpg",
|
97 |
-
"r004061_22.jpg",
|
98 |
-
"r004061_23.jpg"
|
99 |
-
],
|
100 |
-
"conversations": [
|
101 |
-
{
|
102 |
-
"from": "human",
|
103 |
-
"value": "\nSuppose you are an expert in judging and evaluating the quality of AI-generated videos, \nplease watch the following frames of a given video and see the text prompt for generating the video, \nthen give scores from 7 different dimensions:\n(1) visual quality, \n(2) object consistency,\n(3) dynamic degree,\n(4) motion smoothness,\n(5) text-to-video alignment,\n(6) factual consistency, \n(7) overall score\nfor each dimension, output a number from [1,2,3], in which '1' stands for 'Bad', '2' stands for 'Average', '3' stands for 'Good'.\nHere is an output example: \nvisual quality: 3\nobject consistency: 2 \ndynamic degree: 2\nmotion smoothness: 1\ntext-to-video alignment: 1\nfactual consistency: 2\noverall score: 1\n\nFor this item, the text prompt is a man wearing a striped blanket over his backpack runs behind two other men.,\nall the frames of video are as follows: \n\n<image> <image> <image> <image> <image> <image> <image> <image> <image> <image> <image> <image> <image> <image> <image> <image> <image> <image> <image> <image> <image> <image> <image> <image> \n"
|
104 |
-
},
|
105 |
-
{
|
106 |
-
"from": "gpt",
|
107 |
-
"value": "visual quality: 3\n object consistency: 3\n dynamic degree: 3\n motion smoothness: 3\n text-to-video alignment: 3\n factual consistency: 3\n overall score: 3\n"
|
108 |
-
}
|
109 |
-
]
|
110 |
-
},
|
111 |
-
{
|
112 |
-
"id": "r000228",
|
113 |
-
"images": [
|
114 |
-
"r000228_00.jpg",
|
115 |
-
"r000228_01.jpg",
|
116 |
-
"r000228_02.jpg",
|
117 |
-
"r000228_03.jpg",
|
118 |
-
"r000228_04.jpg",
|
119 |
-
"r000228_05.jpg",
|
120 |
-
"r000228_06.jpg",
|
121 |
-
"r000228_07.jpg",
|
122 |
-
"r000228_08.jpg",
|
123 |
-
"r000228_09.jpg",
|
124 |
-
"r000228_10.jpg",
|
125 |
-
"r000228_11.jpg",
|
126 |
-
"r000228_12.jpg",
|
127 |
-
"r000228_13.jpg",
|
128 |
-
"r000228_14.jpg",
|
129 |
-
"r000228_15.jpg",
|
130 |
-
"r000228_16.jpg",
|
131 |
-
"r000228_17.jpg",
|
132 |
-
"r000228_18.jpg",
|
133 |
-
"r000228_19.jpg",
|
134 |
-
"r000228_20.jpg",
|
135 |
-
"r000228_21.jpg",
|
136 |
-
"r000228_22.jpg",
|
137 |
-
"r000228_23.jpg"
|
138 |
-
],
|
139 |
-
"conversations": [
|
140 |
-
{
|
141 |
-
"from": "human",
|
142 |
-
"value": "\nSuppose you are an expert in judging and evaluating the quality of AI-generated videos, \nplease watch the following frames of a given video and see the text prompt for generating the video, \nthen give scores from 7 different dimensions:\n(1) visual quality, \n(2) object consistency,\n(3) dynamic degree,\n(4) motion smoothness,\n(5) text-to-video alignment,\n(6) factual consistency, \n(7) overall score\nfor each dimension, output a number from [1,2,3], in which '1' stands for 'Bad', '2' stands for 'Average', '3' stands for 'Good'.\nHere is an output example: \nvisual quality: 3\nobject consistency: 2 \ndynamic degree: 2\nmotion smoothness: 1\ntext-to-video alignment: 1\nfactual consistency: 2\noverall score: 1\n\nFor this item, the text prompt is two people get into sports car,\nall the frames of video are as follows: \n\n<image> <image> <image> <image> <image> <image> <image> <image> <image> <image> <image> <image> <image> <image> <image> <image> <image> <image> <image> <image> <image> <image> <image> <image> \n"
|
143 |
-
},
|
144 |
-
{
|
145 |
-
"from": "gpt",
|
146 |
-
"value": "visual quality: 3\n object consistency: 3\n dynamic degree: 3\n motion smoothness: 3\n text-to-video alignment: 3\n factual consistency: 3\n overall score: 3\n"
|
147 |
-
}
|
148 |
-
]
|
149 |
-
},
|
150 |
-
{
|
151 |
-
"id": "r100887",
|
152 |
-
"images": [
|
153 |
-
"r100887_00.jpg",
|
154 |
-
"r100887_01.jpg",
|
155 |
-
"r100887_02.jpg",
|
156 |
-
"r100887_03.jpg",
|
157 |
-
"r100887_04.jpg",
|
158 |
-
"r100887_05.jpg",
|
159 |
-
"r100887_06.jpg",
|
160 |
-
"r100887_07.jpg",
|
161 |
-
"r100887_08.jpg",
|
162 |
-
"r100887_09.jpg",
|
163 |
-
"r100887_10.jpg",
|
164 |
-
"r100887_11.jpg",
|
165 |
-
"r100887_12.jpg",
|
166 |
-
"r100887_13.jpg",
|
167 |
-
"r100887_14.jpg",
|
168 |
-
"r100887_15.jpg"
|
169 |
-
],
|
170 |
-
"conversations": [
|
171 |
-
{
|
172 |
-
"from": "human",
|
173 |
-
"value": "\nSuppose you are an expert in judging and evaluating the quality of AI-generated videos, \nplease watch the following frames of a given video and see the text prompt for generating the video, \nthen give scores from 7 different dimensions:\n(1) visual quality, \n(2) object consistency,\n(3) dynamic degree,\n(4) motion smoothness,\n(5) text-to-video alignment,\n(6) factual consistency, \n(7) overall score\nfor each dimension, output a number from [1,2,3], in which '1' stands for 'Bad', '2' stands for 'Average', '3' stands for 'Good'.\nHere is an output example: \nvisual quality: 3\nobject consistency: 2 \ndynamic degree: 2\nmotion smoothness: 1\ntext-to-video alignment: 1\nfactual consistency: 2\noverall score: 1\n\nFor this item, the text prompt is A person using a spatula to spread something on a pan.,\nall the frames of video are as follows: \n\n<image> <image> <image> <image> <image> <image> <image> <image> <image> <image> <image> <image> <image> <image> <image> <image> \n"
|
174 |
-
},
|
175 |
-
{
|
176 |
-
"from": "gpt",
|
177 |
-
"value": "visual quality: 3\n object consistency: 3\n dynamic degree: 3\n motion smoothness: 3\n text-to-video alignment: 3\n factual consistency: 3\n overall score: 3\n"
|
178 |
-
}
|
179 |
-
]
|
180 |
-
},
|
181 |
-
{
|
182 |
-
"id": "r104340",
|
183 |
-
"images": [
|
184 |
-
"r104340_00.jpg",
|
185 |
-
"r104340_01.jpg",
|
186 |
-
"r104340_02.jpg",
|
187 |
-
"r104340_03.jpg",
|
188 |
-
"r104340_04.jpg",
|
189 |
-
"r104340_05.jpg",
|
190 |
-
"r104340_06.jpg",
|
191 |
-
"r104340_07.jpg",
|
192 |
-
"r104340_08.jpg",
|
193 |
-
"r104340_09.jpg",
|
194 |
-
"r104340_10.jpg",
|
195 |
-
"r104340_11.jpg",
|
196 |
-
"r104340_12.jpg",
|
197 |
-
"r104340_13.jpg",
|
198 |
-
"r104340_14.jpg",
|
199 |
-
"r104340_15.jpg"
|
200 |
-
],
|
201 |
-
"conversations": [
|
202 |
-
{
|
203 |
-
"from": "human",
|
204 |
-
"value": "\nSuppose you are an expert in judging and evaluating the quality of AI-generated videos, \nplease watch the following frames of a given video and see the text prompt for generating the video, \nthen give scores from 7 different dimensions:\n(1) visual quality, \n(2) object consistency,\n(3) dynamic degree,\n(4) motion smoothness,\n(5) text-to-video alignment,\n(6) factual consistency, \n(7) overall score\nfor each dimension, output a number from [1,2,3], in which '1' stands for 'Bad', '2' stands for 'Average', '3' stands for 'Good'.\nHere is an output example: \nvisual quality: 3\nobject consistency: 2 \ndynamic degree: 2\nmotion smoothness: 1\ntext-to-video alignment: 1\nfactual consistency: 2\noverall score: 1\n\nFor this item, the text prompt is A bartender is making an alcoholic drink by pouring a clear liquid into a glass with ice cubes.,\nall the frames of video are as follows: \n\n<image> <image> <image> <image> <image> <image> <image> <image> <image> <image> <image> <image> <image> <image> <image> <image> \n"
|
205 |
-
},
|
206 |
-
{
|
207 |
-
"from": "gpt",
|
208 |
-
"value": "visual quality: 3\n object consistency: 3\n dynamic degree: 3\n motion smoothness: 3\n text-to-video alignment: 3\n factual consistency: 3\n overall score: 3\n"
|
209 |
-
}
|
210 |
-
]
|
211 |
-
},
|
212 |
-
{
|
213 |
-
"id": "r001599",
|
214 |
-
"images": [
|
215 |
-
"r001599_00.jpg",
|
216 |
-
"r001599_01.jpg",
|
217 |
-
"r001599_02.jpg",
|
218 |
-
"r001599_03.jpg",
|
219 |
-
"r001599_04.jpg",
|
220 |
-
"r001599_05.jpg",
|
221 |
-
"r001599_06.jpg",
|
222 |
-
"r001599_07.jpg",
|
223 |
-
"r001599_08.jpg",
|
224 |
-
"r001599_09.jpg",
|
225 |
-
"r001599_10.jpg",
|
226 |
-
"r001599_11.jpg",
|
227 |
-
"r001599_12.jpg",
|
228 |
-
"r001599_13.jpg",
|
229 |
-
"r001599_14.jpg",
|
230 |
-
"r001599_15.jpg",
|
231 |
-
"r001599_16.jpg",
|
232 |
-
"r001599_17.jpg",
|
233 |
-
"r001599_18.jpg",
|
234 |
-
"r001599_19.jpg",
|
235 |
-
"r001599_20.jpg",
|
236 |
-
"r001599_21.jpg",
|
237 |
-
"r001599_22.jpg",
|
238 |
-
"r001599_23.jpg"
|
239 |
-
],
|
240 |
-
"conversations": [
|
241 |
-
{
|
242 |
-
"from": "human",
|
243 |
-
"value": "\nSuppose you are an expert in judging and evaluating the quality of AI-generated videos, \nplease watch the following frames of a given video and see the text prompt for generating the video, \nthen give scores from 7 different dimensions:\n(1) visual quality, \n(2) object consistency,\n(3) dynamic degree,\n(4) motion smoothness,\n(5) text-to-video alignment,\n(6) factual consistency, \n(7) overall score\nfor each dimension, output a number from [1,2,3], in which '1' stands for 'Bad', '2' stands for 'Average', '3' stands for 'Good'.\nHere is an output example: \nvisual quality: 3\nobject consistency: 2 \ndynamic degree: 2\nmotion smoothness: 1\ntext-to-video alignment: 1\nfactual consistency: 2\noverall score: 1\n\nFor this item, the text prompt is a man uses a camera,\nall the frames of video are as follows: \n\n<image> <image> <image> <image> <image> <image> <image> <image> <image> <image> <image> <image> <image> <image> <image> <image> <image> <image> <image> <image> <image> <image> <image> <image> \n"
|
244 |
-
},
|
245 |
-
{
|
246 |
-
"from": "gpt",
|
247 |
-
"value": "visual quality: 3\n object consistency: 3\n dynamic degree: 3\n motion smoothness: 3\n text-to-video alignment: 3\n factual consistency: 3\n overall score: 3\n"
|
248 |
-
}
|
249 |
-
]
|
250 |
-
},
|
251 |
-
{
|
252 |
-
"id": "r101298",
|
253 |
-
"images": [
|
254 |
-
"r101298_00.jpg",
|
255 |
-
"r101298_01.jpg",
|
256 |
-
"r101298_02.jpg",
|
257 |
-
"r101298_03.jpg",
|
258 |
-
"r101298_04.jpg",
|
259 |
-
"r101298_05.jpg",
|
260 |
-
"r101298_06.jpg",
|
261 |
-
"r101298_07.jpg",
|
262 |
-
"r101298_08.jpg",
|
263 |
-
"r101298_09.jpg",
|
264 |
-
"r101298_10.jpg",
|
265 |
-
"r101298_11.jpg",
|
266 |
-
"r101298_12.jpg",
|
267 |
-
"r101298_13.jpg",
|
268 |
-
"r101298_14.jpg",
|
269 |
-
"r101298_15.jpg",
|
270 |
-
"r101298_16.jpg",
|
271 |
-
"r101298_17.jpg",
|
272 |
-
"r101298_18.jpg",
|
273 |
-
"r101298_19.jpg",
|
274 |
-
"r101298_20.jpg",
|
275 |
-
"r101298_21.jpg",
|
276 |
-
"r101298_22.jpg",
|
277 |
-
"r101298_23.jpg"
|
278 |
-
],
|
279 |
-
"conversations": [
|
280 |
-
{
|
281 |
-
"from": "human",
|
282 |
-
"value": "\nSuppose you are an expert in judging and evaluating the quality of AI-generated videos, \nplease watch the following frames of a given video and see the text prompt for generating the video, \nthen give scores from 7 different dimensions:\n(1) visual quality, \n(2) object consistency,\n(3) dynamic degree,\n(4) motion smoothness,\n(5) text-to-video alignment,\n(6) factual consistency, \n(7) overall score\nfor each dimension, output a number from [1,2,3], in which '1' stands for 'Bad', '2' stands for 'Average', '3' stands for 'Good'.\nHere is an output example: \nvisual quality: 3\nobject consistency: 2 \ndynamic degree: 2\nmotion smoothness: 1\ntext-to-video alignment: 1\nfactual consistency: 2\noverall score: 1\n\nFor this item, the text prompt is A baseball game is being played in a stadium with a large crowd of people watching.,\nall the frames of video are as follows: \n\n<image> <image> <image> <image> <image> <image> <image> <image> <image> <image> <image> <image> <image> <image> <image> <image> <image> <image> <image> <image> <image> <image> <image> <image> \n"
|
283 |
-
},
|
284 |
-
{
|
285 |
-
"from": "gpt",
|
286 |
-
"value": "visual quality: 3\n object consistency: 3\n dynamic degree: 3\n motion smoothness: 3\n text-to-video alignment: 3\n factual consistency: 3\n overall score: 3\n"
|
287 |
-
}
|
288 |
-
]
|
289 |
-
},
|
290 |
-
{
|
291 |
-
"id": "r000688",
|
292 |
-
"images": [
|
293 |
-
"r000688_00.jpg",
|
294 |
-
"r000688_01.jpg",
|
295 |
-
"r000688_02.jpg",
|
296 |
-
"r000688_03.jpg",
|
297 |
-
"r000688_04.jpg",
|
298 |
-
"r000688_05.jpg",
|
299 |
-
"r000688_06.jpg",
|
300 |
-
"r000688_07.jpg",
|
301 |
-
"r000688_08.jpg",
|
302 |
-
"r000688_09.jpg",
|
303 |
-
"r000688_10.jpg",
|
304 |
-
"r000688_11.jpg",
|
305 |
-
"r000688_12.jpg",
|
306 |
-
"r000688_13.jpg",
|
307 |
-
"r000688_14.jpg",
|
308 |
-
"r000688_15.jpg",
|
309 |
-
"r000688_16.jpg",
|
310 |
-
"r000688_17.jpg",
|
311 |
-
"r000688_18.jpg",
|
312 |
-
"r000688_19.jpg",
|
313 |
-
"r000688_20.jpg",
|
314 |
-
"r000688_21.jpg",
|
315 |
-
"r000688_22.jpg",
|
316 |
-
"r000688_23.jpg"
|
317 |
-
],
|
318 |
-
"conversations": [
|
319 |
-
{
|
320 |
-
"from": "human",
|
321 |
-
"value": "\nSuppose you are an expert in judging and evaluating the quality of AI-generated videos, \nplease watch the following frames of a given video and see the text prompt for generating the video, \nthen give scores from 7 different dimensions:\n(1) visual quality, \n(2) object consistency,\n(3) dynamic degree,\n(4) motion smoothness,\n(5) text-to-video alignment,\n(6) factual consistency, \n(7) overall score\nfor each dimension, output a number from [1,2,3], in which '1' stands for 'Bad', '2' stands for 'Average', '3' stands for 'Good'.\nHere is an output example: \nvisual quality: 3\nobject consistency: 2 \ndynamic degree: 2\nmotion smoothness: 1\ntext-to-video alignment: 1\nfactual consistency: 2\noverall score: 1\n\nFor this item, the text prompt is the child furthest to the right dips her hand into the bowl shared by the other children.,\nall the frames of video are as follows: \n\n<image> <image> <image> <image> <image> <image> <image> <image> <image> <image> <image> <image> <image> <image> <image> <image> <image> <image> <image> <image> <image> <image> <image> <image> \n"
|
322 |
-
},
|
323 |
-
{
|
324 |
-
"from": "gpt",
|
325 |
-
"value": "visual quality: 3\n object consistency: 3\n dynamic degree: 3\n motion smoothness: 3\n text-to-video alignment: 3\n factual consistency: 3\n overall score: 3\n"
|
326 |
-
}
|
327 |
-
]
|
328 |
-
},
|
329 |
-
{
|
330 |
-
"id": "r104350",
|
331 |
-
"images": [
|
332 |
-
"r104350_00.jpg",
|
333 |
-
"r104350_01.jpg",
|
334 |
-
"r104350_02.jpg",
|
335 |
-
"r104350_03.jpg",
|
336 |
-
"r104350_04.jpg",
|
337 |
-
"r104350_05.jpg",
|
338 |
-
"r104350_06.jpg",
|
339 |
-
"r104350_07.jpg",
|
340 |
-
"r104350_08.jpg",
|
341 |
-
"r104350_09.jpg",
|
342 |
-
"r104350_10.jpg",
|
343 |
-
"r104350_11.jpg",
|
344 |
-
"r104350_12.jpg",
|
345 |
-
"r104350_13.jpg",
|
346 |
-
"r104350_14.jpg",
|
347 |
-
"r104350_15.jpg",
|
348 |
-
"r104350_16.jpg",
|
349 |
-
"r104350_17.jpg",
|
350 |
-
"r104350_18.jpg",
|
351 |
-
"r104350_19.jpg",
|
352 |
-
"r104350_20.jpg",
|
353 |
-
"r104350_21.jpg",
|
354 |
-
"r104350_22.jpg",
|
355 |
-
"r104350_23.jpg"
|
356 |
-
],
|
357 |
-
"conversations": [
|
358 |
-
{
|
359 |
-
"from": "human",
|
360 |
-
"value": "\nSuppose you are an expert in judging and evaluating the quality of AI-generated videos, \nplease watch the following frames of a given video and see the text prompt for generating the video, \nthen give scores from 7 different dimensions:\n(1) visual quality, \n(2) object consistency,\n(3) dynamic degree,\n(4) motion smoothness,\n(5) text-to-video alignment,\n(6) factual consistency, \n(7) overall score\nfor each dimension, output a number from [1,2,3], in which '1' stands for 'Bad', '2' stands for 'Average', '3' stands for 'Good'.\nHere is an output example: \nvisual quality: 3\nobject consistency: 2 \ndynamic degree: 2\nmotion smoothness: 1\ntext-to-video alignment: 1\nfactual consistency: 2\noverall score: 1\n\nFor this item, the text prompt is A young man is talking to a young woman in a room with books on the shelves.,\nall the frames of video are as follows: \n\n<image> <image> <image> <image> <image> <image> <image> <image> <image> <image> <image> <image> <image> <image> <image> <image> <image> <image> <image> <image> <image> <image> <image> <image> \n"
|
361 |
-
},
|
362 |
-
{
|
363 |
-
"from": "gpt",
|
364 |
-
"value": "visual quality: 3\n object consistency: 3\n dynamic degree: 3\n motion smoothness: 3\n text-to-video alignment: 3\n factual consistency: 3\n overall score: 3\n"
|
365 |
-
}
|
366 |
-
]
|
367 |
-
}
|
368 |
-
]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
examples/image1.jpg
DELETED
Binary file (37.3 kB)
|
|
examples/image10.jpg
DELETED
Binary file (13.6 kB)
|
|
examples/image11.jpg
DELETED
Binary file (7.19 kB)
|
|
examples/image12.jpg
DELETED
Binary file (155 kB)
|
|
examples/image13.jpg
DELETED
Binary file (59.1 kB)
|
|
examples/image14.jpg
DELETED
Binary file (99.9 kB)
|
|
examples/image15.jpg
DELETED
Binary file (60.7 kB)
|
|
examples/image2.jpg
DELETED
Binary file (9.86 kB)
|
|
examples/image3.jpg
DELETED
Binary file (40.7 kB)
|
|
examples/image4.jpg
DELETED
Binary file (59.7 kB)
|
|
examples/image5.jpg
DELETED
Binary file (45.2 kB)
|
|
examples/image6.jpg
DELETED
Binary file (45.6 kB)
|
|
examples/image7.jpg
DELETED
Binary file (41.3 kB)
|
|
examples/image8.jpg
DELETED
Binary file (32.2 kB)
|
|
examples/image9.jpg
DELETED
Binary file (61.1 kB)
|
|
examples/r000228/r000228_00.jpg
DELETED
Binary file (15.5 kB)
|
|
examples/r000228/r000228_01.jpg
DELETED
Binary file (16.2 kB)
|
|
examples/r000228/r000228_02.jpg
DELETED
Binary file (16.2 kB)
|
|