Spaces:
Running
on
Zero
Running
on
Zero
DongfuJiang
commited on
Merge branch 'main' of https://huggingface.co/spaces/TIGER-Lab/GenAI-Arena
Browse files- model/model_manager.py +29 -23
- model/model_registry.py +14 -0
- model/models/__init__.py +1 -1
- requirements.txt +2 -3
model/model_manager.py
CHANGED
@@ -18,6 +18,7 @@ class ModelManager:
|
|
18 |
self.model_vg_list = VIDEO_GENERATION_MODELS
|
19 |
self.excluding_model_list = MUSEUM_UNSUPPORTED_MODELS
|
20 |
self.desired_model_list = DESIRED_APPEAR_MODEL
|
|
|
21 |
self.loaded_models = {}
|
22 |
|
23 |
def load_model_pipe(self, model_name):
|
@@ -28,23 +29,27 @@ class ModelManager:
|
|
28 |
pipe = self.loaded_models[model_name]
|
29 |
return pipe
|
30 |
|
31 |
-
|
32 |
-
def NSFW_filter(self, prompt):
|
33 |
model_id = "meta-llama/Meta-Llama-Guard-2-8B"
|
34 |
-
device = "cuda"
|
35 |
dtype = torch.bfloat16
|
36 |
-
tokenizer = AutoTokenizer.from_pretrained(model_id, token=os.environ['HF_GUARD'])
|
37 |
-
|
|
|
|
|
|
|
38 |
chat = [{"role": "user", "content": prompt}]
|
39 |
-
input_ids = tokenizer.apply_chat_template(chat, return_tensors="pt").to(
|
40 |
-
|
|
|
41 |
prompt_len = input_ids.shape[-1]
|
42 |
-
result = tokenizer.decode(output[0][prompt_len:], skip_special_tokens=True)
|
43 |
return result
|
44 |
|
45 |
@spaces.GPU(duration=120)
|
46 |
def generate_image_ig(self, prompt, model_name):
|
47 |
if self.NSFW_filter(prompt) == 'safe':
|
|
|
48 |
pipe = self.load_model_pipe(model_name)
|
49 |
result = pipe(prompt=prompt)
|
50 |
else:
|
@@ -53,6 +58,7 @@ class ModelManager:
|
|
53 |
|
54 |
def generate_image_ig_api(self, prompt, model_name):
|
55 |
if self.NSFW_filter(prompt) == 'safe':
|
|
|
56 |
pipe = self.load_model_pipe(model_name)
|
57 |
result = pipe(prompt=prompt)
|
58 |
else:
|
@@ -119,11 +125,11 @@ class ModelManager:
|
|
119 |
|
120 |
@spaces.GPU(duration=200)
|
121 |
def generate_image_ie(self, textbox_source, textbox_target, textbox_instruct, source_image, model_name):
|
122 |
-
if self.NSFW_filter(" ".join([textbox_source, textbox_target, textbox_instruct])) == 'safe':
|
123 |
-
|
124 |
-
|
125 |
-
else:
|
126 |
-
|
127 |
return result
|
128 |
|
129 |
def generate_image_ie_museum(self, model_name):
|
@@ -187,19 +193,19 @@ class ModelManager:
|
|
187 |
|
188 |
@spaces.GPU(duration=150)
|
189 |
def generate_video_vg(self, prompt, model_name):
|
190 |
-
if self.NSFW_filter(prompt) == 'safe':
|
191 |
-
|
192 |
-
|
193 |
-
else:
|
194 |
-
|
195 |
return result
|
196 |
|
197 |
def generate_video_vg_api(self, prompt, model_name):
|
198 |
-
if self.NSFW_filter(prompt) == 'safe':
|
199 |
-
|
200 |
-
|
201 |
-
else:
|
202 |
-
|
203 |
return result
|
204 |
|
205 |
def generate_video_vg_museum(self, model_name):
|
|
|
18 |
self.model_vg_list = VIDEO_GENERATION_MODELS
|
19 |
self.excluding_model_list = MUSEUM_UNSUPPORTED_MODELS
|
20 |
self.desired_model_list = DESIRED_APPEAR_MODEL
|
21 |
+
self.load_guard()
|
22 |
self.loaded_models = {}
|
23 |
|
24 |
def load_model_pipe(self, model_name):
|
|
|
29 |
pipe = self.loaded_models[model_name]
|
30 |
return pipe
|
31 |
|
32 |
+
def load_guard(self):
|
|
|
33 |
model_id = "meta-llama/Meta-Llama-Guard-2-8B"
|
34 |
+
device = "cuda" if torch.cuda.is_available() else "cpu"
|
35 |
dtype = torch.bfloat16
|
36 |
+
self.tokenizer = AutoTokenizer.from_pretrained(model_id, token=os.environ['HF_GUARD'])
|
37 |
+
self.guard = AutoModelForCausalLM.from_pretrained(model_id, torch_dtype=dtype, device_map=device, token=os.environ['HF_GUARD'])
|
38 |
+
|
39 |
+
@spaces.GPU(duration=30)
|
40 |
+
def NSFW_filter(self, prompt):
|
41 |
chat = [{"role": "user", "content": prompt}]
|
42 |
+
input_ids = self.tokenizer.apply_chat_template(chat, return_tensors="pt").to('cuda')
|
43 |
+
self.guard.cuda()
|
44 |
+
output = self.guard.generate(input_ids=input_ids, max_new_tokens=100, pad_token_id=0)
|
45 |
prompt_len = input_ids.shape[-1]
|
46 |
+
result = self.tokenizer.decode(output[0][prompt_len:], skip_special_tokens=True)
|
47 |
return result
|
48 |
|
49 |
@spaces.GPU(duration=120)
|
50 |
def generate_image_ig(self, prompt, model_name):
|
51 |
if self.NSFW_filter(prompt) == 'safe':
|
52 |
+
print('The prompt is safe')
|
53 |
pipe = self.load_model_pipe(model_name)
|
54 |
result = pipe(prompt=prompt)
|
55 |
else:
|
|
|
58 |
|
59 |
def generate_image_ig_api(self, prompt, model_name):
|
60 |
if self.NSFW_filter(prompt) == 'safe':
|
61 |
+
print('The prompt is safe')
|
62 |
pipe = self.load_model_pipe(model_name)
|
63 |
result = pipe(prompt=prompt)
|
64 |
else:
|
|
|
125 |
|
126 |
@spaces.GPU(duration=200)
|
127 |
def generate_image_ie(self, textbox_source, textbox_target, textbox_instruct, source_image, model_name):
|
128 |
+
# if self.NSFW_filter(" ".join([textbox_source, textbox_target, textbox_instruct])) == 'safe':
|
129 |
+
pipe = self.load_model_pipe(model_name)
|
130 |
+
result = pipe(src_image = source_image, src_prompt = textbox_source, target_prompt = textbox_target, instruct_prompt = textbox_instruct)
|
131 |
+
# else:
|
132 |
+
# result = ''
|
133 |
return result
|
134 |
|
135 |
def generate_image_ie_museum(self, model_name):
|
|
|
193 |
|
194 |
@spaces.GPU(duration=150)
|
195 |
def generate_video_vg(self, prompt, model_name):
|
196 |
+
# if self.NSFW_filter(prompt) == 'safe':
|
197 |
+
pipe = self.load_model_pipe(model_name)
|
198 |
+
result = pipe(prompt=prompt)
|
199 |
+
# else:
|
200 |
+
# result = ''
|
201 |
return result
|
202 |
|
203 |
def generate_video_vg_api(self, prompt, model_name):
|
204 |
+
# if self.NSFW_filter(prompt) == 'safe':
|
205 |
+
pipe = self.load_model_pipe(model_name)
|
206 |
+
result = pipe(prompt=prompt)
|
207 |
+
# else:
|
208 |
+
# result = ''
|
209 |
return result
|
210 |
|
211 |
def generate_video_vg_museum(self, model_name):
|
model/model_registry.py
CHANGED
@@ -285,6 +285,20 @@ register_model_info(
|
|
285 |
"https://github.com/hpcaitech/Open-Sora",
|
286 |
"A community-driven opensource implementation of Sora.",
|
287 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
288 |
|
289 |
register_model_info(
|
290 |
["videogenhub_T2VTurbo_generation"],
|
|
|
285 |
"https://github.com/hpcaitech/Open-Sora",
|
286 |
"A community-driven opensource implementation of Sora.",
|
287 |
)
|
288 |
+
|
289 |
+
register_model_info(
|
290 |
+
["videogenhub_OpenSora12_generation"],
|
291 |
+
"OpenSora v1.2",
|
292 |
+
"https://github.com/hpcaitech/Open-Sora",
|
293 |
+
"A community-driven opensource implementation of Sora. v1.2",
|
294 |
+
)
|
295 |
+
|
296 |
+
register_model_info(
|
297 |
+
["videogenhub_CogVideoX_generation"],
|
298 |
+
"CogVideoX",
|
299 |
+
"https://github.com/THUDM/CogVideo",
|
300 |
+
"Text-to-Video Diffusion Models with An Expert Transformer.",
|
301 |
+
)
|
302 |
|
303 |
register_model_info(
|
304 |
["videogenhub_T2VTurbo_generation"],
|
model/models/__init__.py
CHANGED
@@ -19,7 +19,7 @@ VIDEO_GENERATION_MODELS = ['fal_AnimateDiff_text2video',
|
|
19 |
'fal_AnimateDiffTurbo_text2video',
|
20 |
'videogenhub_LaVie_generation',
|
21 |
'videogenhub_VideoCrafter2_generation',
|
22 |
-
'videogenhub_ModelScope_generation',
|
23 |
'videogenhub_OpenSora_generation', 'videogenhub_T2VTurbo_generation','fal_StableVideoDiffusion_text2video']
|
24 |
MUSEUM_UNSUPPORTED_MODELS = ['videogenhub_OpenSoraPlan_generation']
|
25 |
DESIRED_APPEAR_MODEL = ['videogenhub_T2VTurbo_generation','fal_StableVideoDiffusion_text2video']
|
|
|
19 |
'fal_AnimateDiffTurbo_text2video',
|
20 |
'videogenhub_LaVie_generation',
|
21 |
'videogenhub_VideoCrafter2_generation',
|
22 |
+
'videogenhub_ModelScope_generation', 'videogenhub_CogVideoX_generation', 'videogenhub_OpenSora12_generation',
|
23 |
'videogenhub_OpenSora_generation', 'videogenhub_T2VTurbo_generation','fal_StableVideoDiffusion_text2video']
|
24 |
MUSEUM_UNSUPPORTED_MODELS = ['videogenhub_OpenSoraPlan_generation']
|
25 |
DESIRED_APPEAR_MODEL = ['videogenhub_T2VTurbo_generation','fal_StableVideoDiffusion_text2video']
|
requirements.txt
CHANGED
@@ -7,7 +7,7 @@ h5py
|
|
7 |
xformers~=0.0.20
|
8 |
numpy>=1.23.5
|
9 |
pandas<2.0.0
|
10 |
-
peft
|
11 |
torch==2.2
|
12 |
torchvision
|
13 |
torchaudio
|
@@ -28,7 +28,6 @@ setuptools>=59.5.0
|
|
28 |
transformers
|
29 |
torchmetrics>=0.6.0
|
30 |
lpips
|
31 |
-
dreamsim
|
32 |
image-reward
|
33 |
kornia>=0.6
|
34 |
diffusers>=0.18.0
|
@@ -49,7 +48,7 @@ statsmodels
|
|
49 |
plotly
|
50 |
git+https://github.com/TIGER-AI-Lab/ImagenHub.git#egg=imagen-hub
|
51 |
fal_client
|
52 |
-
|
53 |
open_clip_torch
|
54 |
decord
|
55 |
huggingface_hub
|
|
|
7 |
xformers~=0.0.20
|
8 |
numpy>=1.23.5
|
9 |
pandas<2.0.0
|
10 |
+
peft>=0.12
|
11 |
torch==2.2
|
12 |
torchvision
|
13 |
torchaudio
|
|
|
28 |
transformers
|
29 |
torchmetrics>=0.6.0
|
30 |
lpips
|
|
|
31 |
image-reward
|
32 |
kornia>=0.6
|
33 |
diffusers>=0.18.0
|
|
|
48 |
plotly
|
49 |
git+https://github.com/TIGER-AI-Lab/ImagenHub.git#egg=imagen-hub
|
50 |
fal_client
|
51 |
+
git+https://github.com/TIGER-AI-Lab/VideoGenHub.git@arena#egg=videogen-hub
|
52 |
open_clip_torch
|
53 |
decord
|
54 |
huggingface_hub
|