File size: 5,533 Bytes
a23f4af
 
e63f3e2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
bba21a6
e63f3e2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
cb26eb3
e63f3e2
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
import gradio as gr

import argparse
import datetime
import json
import os
import time

import gradio as gr
import requests
from PIL import Image

from q_align.model.builder import load_pretrained_model

from q_align.conversation import (default_conversation, conv_templates,
                                   SeparatorStyle)
from q_align.constants import LOGDIR
from q_align.utils import (build_logger, server_error_msg,
    violates_moderation, moderation_msg)

from q_align.evaluate.scorer import QAlignScorer, QAlignAestheticScorer, QAlignVideoScorer

import gradio as gr

def load_video(video_file):
    from decord import VideoReader
    vr = VideoReader(video_file)

    # Get video frame rate
    fps = vr.get_avg_fps()

    # Calculate frame indices for 1fps
    frame_indices = [int(fps * i) for i in range(int(len(vr) / fps))]
    frames = vr.get_batch(frame_indices).asnumpy()
    return [Image.fromarray(frames[i]) for i in range(int(len(vr) / fps))]


pretrained="q-future/one-align"
device="cuda:0"
tokenizer, model, image_processor, _ = load_pretrained_model(pretrained, None, "mplug_owl2", device=device)

iqa_scorer = QAlignScorer(tokenizer=tokenizer, model=model, image_processor=image_processor)
iaa_scorer = QAlignAestheticScorer(tokenizer=tokenizer, model=model, image_processor=image_processor)
vqa_scorer = QAlignVideoScorer(tokenizer=tokenizer, model=model, image_processor=image_processor)

scorers = {"Image Aesthetics (IAA)": iaa_scorer, "Image Quality (IQA)": iqa_scorer, "Video Quality (VQA)": vqa_scorer}

LEVELS = ["excellent (5)", "good (4)", "fair (3)", "poor (2)", "bad (1)"]
scores = [5,4,3,2,1]
def image_classifier(input_img, input_vid, scorer_type):
    if scorer_type is None:
        scorer_type = "Image Quality (IQA)"
    this_scorer = scorers[scorer_type]
    if input_vid is not None:
        input_ = load_video(input_vid)
    elif input_img is not None:
        input_ = [input_img]
    if "Video" in scorer_type:
        input_ = [input_]
    probs = this_scorer(input_).mean(0).tolist()
    prob_dict = {LEVEL: prob for LEVEL, prob in zip(LEVELS, probs)}
    score = sum([prob * score for score, prob in zip(scores, probs)])
    return prob_dict, score

title_markdown = ("""

<h3 align="center">Q-Align: Teaching LMMs for Visual Scoring via Discrete Text-Defined Levels</h3>

<h3 align="center"> One Unified Model for Visual scoring. </h3>

<h5 align="center">
      <a href="https://teowu.github.io/" target="_blank">Haoning Wu</a><sup>1</sup><sup>*</sup><sup>+</sup>,
      <a href="https://github.com/zzc-1998" target="_blank">Zicheng Zhang</a><sup>2</sup><sup>*</sup>,
    <a href="https://sites.google.com/view/r-panda" target="_blank">Weixia Zhang</a><sup>2</sup>,
    <a href="https://chaofengc.github.io" target="_blank">Chaofeng Chen</a><sup>1</sup>,
      <a href="https://liaoliang92.github.io" target="_blank">Liang Liao</a><sup>1</sup>,
      <a href="https://github.com/lcysyzxdxc" target="_blank">Chunyi Li</a><sup>2</sup>,
</h5>
    

<h5 align="center">
        <a href="https://github.com/YixuanGao98" target="_blank">Yixuan Gao</a><sup>2</sup>,
      <a href="https://github.com/AnnanWangDaniel" target="_blank">Annan Wang</a><sup>1</sup>,
      <a href="https://github.com/ZhangErliCarl/" target="_blank">Erli Zhang</a><sup>1</sup>,
      <a href="https://wenxiusun.com" target="_blank">Wenxiu Sun</a><sup>3</sup>,
      <a href="https://scholar.google.com/citations?user=uT9CtPYAAAAJ&hl=en" target="_blank">Qiong Yan</a><sup>3</sup>,
        <a href="https://sites.google.com/site/minxiongkuo/" target="_blank">Xiongkuo Min</a><sup>2</sup>,
      <a href="https://ee.sjtu.edu.cn/en/FacultyDetail.aspx?id=24&infoid=153&flag=153" target="_blank">Guangtao Zhai</a><sup>2</sup><sup>#</sup>,
      <a href="https://personal.ntu.edu.sg/wslin/Home.html" target="_blank">Weisi Lin</a><sup>1</sup><sup>#</sup>
</h5>

<h5 align="center">
  <sup>1</sup>Nanyang Technological University, <sup>2</sup>Shanghai Jiao Tong University, <sup>3</sup>Sensetime Research
</h5>
<h5 align="center">
<sup>*</sup>Equal contribution. <sup>+</sup>Project Lead. <sup>#</sup>Corresponding author(s). 
</h5>

<h4 align="center"> If you like the OneScorer, please give us a star ✨ on <a href='https://github.com/Q-Future/Q-Align'>GitHub</a> for latest update.  </h4>

<h5 align="center">
    <div style="display:flex; gap: 0.25rem;" align="center">
        <a href='https://q-align.github.io'><img src='https://img.shields.io/badge/Homepage-green'></a>
        <a href='https://github.com/Q-Future/Q-Align'><img src='https://img.shields.io/badge/Github-Code-blue'></a>
        <a href="https://Q-Future.github.io/Q-Align/fig/Q_Align_v0_1_preview.pdf"><img src="https://img.shields.io/badge/Technical-Report-red"></a>
        <a href='https://github.com/Q-Future/Q-Align/stargazers'><img src='https://img.shields.io/github/stars/Q-Future/Q-Align.svg?style=social'></a>
    </div>
</h5>

""")


input_img = gr.Image(type='pil', label="Upload an Image")
input_vid = gr.Video(label="Upload a Video (will INGORE the image if a video is uploaded)")

labels = gr.Label(label="Probabilities of rating levels:")
number = gr.Number(label="Output score:", info="Range in [1,5]. Higher is better.")
demo = gr.Interface(fn=image_classifier, inputs=[input_img, input_vid, gr.Radio(["Image Aesthetics (IAA)", "Image Quality (IQA)", "Video Quality (VQA)"], label="Task", info="Which Scorer will you need?"),], outputs=[labels, number], title="OneScorer", description=title_markdown)
demo.launch(share=True)