Spaces:
Running
on
Zero
Running
on
Zero
from .base_prompter import BasePrompter | |
from ..models.model_manager import ModelManager | |
from ..models import HunyuanDiTCLIPTextEncoder, HunyuanDiTT5TextEncoder | |
from transformers import BertTokenizer, AutoTokenizer | |
import warnings, os | |
class HunyuanDiTPrompter(BasePrompter): | |
def __init__( | |
self, | |
tokenizer_path=None, | |
tokenizer_t5_path=None | |
): | |
if tokenizer_path is None: | |
base_path = os.path.dirname(os.path.dirname(__file__)) | |
tokenizer_path = os.path.join(base_path, "tokenizer_configs/hunyuan_dit/tokenizer") | |
if tokenizer_t5_path is None: | |
base_path = os.path.dirname(os.path.dirname(__file__)) | |
tokenizer_t5_path = os.path.join(base_path, "tokenizer_configs/hunyuan_dit/tokenizer_t5") | |
super().__init__() | |
self.tokenizer = BertTokenizer.from_pretrained(tokenizer_path) | |
with warnings.catch_warnings(): | |
warnings.simplefilter("ignore") | |
self.tokenizer_t5 = AutoTokenizer.from_pretrained(tokenizer_t5_path) | |
self.text_encoder: HunyuanDiTCLIPTextEncoder = None | |
self.text_encoder_t5: HunyuanDiTT5TextEncoder = None | |
def fetch_models(self, text_encoder: HunyuanDiTCLIPTextEncoder = None, text_encoder_t5: HunyuanDiTT5TextEncoder = None): | |
self.text_encoder = text_encoder | |
self.text_encoder_t5 = text_encoder_t5 | |
def encode_prompt_using_signle_model(self, prompt, text_encoder, tokenizer, max_length, clip_skip, device): | |
text_inputs = tokenizer( | |
prompt, | |
padding="max_length", | |
max_length=max_length, | |
truncation=True, | |
return_attention_mask=True, | |
return_tensors="pt", | |
) | |
text_input_ids = text_inputs.input_ids | |
attention_mask = text_inputs.attention_mask.to(device) | |
prompt_embeds = text_encoder( | |
text_input_ids.to(device), | |
attention_mask=attention_mask, | |
clip_skip=clip_skip | |
) | |
return prompt_embeds, attention_mask | |
def encode_prompt( | |
self, | |
prompt, | |
clip_skip=1, | |
clip_skip_2=1, | |
positive=True, | |
device="cuda" | |
): | |
prompt = self.process_prompt(prompt, positive=positive) | |
# CLIP | |
prompt_emb, attention_mask = self.encode_prompt_using_signle_model(prompt, self.text_encoder, self.tokenizer, self.tokenizer.model_max_length, clip_skip, device) | |
# T5 | |
prompt_emb_t5, attention_mask_t5 = self.encode_prompt_using_signle_model(prompt, self.text_encoder_t5, self.tokenizer_t5, self.tokenizer_t5.model_max_length, clip_skip_2, device) | |
return prompt_emb, attention_mask, prompt_emb_t5, attention_mask_t5 | |