tolgacangoz
/

anytext

Model card Files Files and versions Community

tolgacangoz commited on 6 days ago

Commit

f4ebffc

·

verified ·

1 Parent(s): 6f8882f

Upload anytext.py

Files changed (1) hide show

anytext.py +12 -2

anytext.py CHANGED Viewed

@@ -29,6 +29,7 @@ from functools import partial
 from typing import Any, Callable, Dict, List, Optional, Tuple, Union
 import cv2
 import numpy as np
 import PIL.Image
 import torch
@@ -154,7 +155,12 @@ class EmbeddingManager(nn.Module):
         self.token_dim = token_dim
         self.proj = nn.Linear(40 * 64, token_dim)
-        # self.proj.load_state_dict(load_file("proj.safetensors", device=str(embedder.device)))
         if use_fp16:
             self.proj = self.proj.to(dtype=torch.float16)
@@ -499,7 +505,10 @@ class TextEmbeddingModule(nn.Module):
         # preprocess pos_imgs(if numpy, make sure it's white pos in black bg)
         if draw_pos is None:
             pos_imgs = np.zeros((w, h, 1))
-        if isinstance(draw_pos, str):
             draw_pos = cv2.imread(draw_pos)[..., ::-1]
             if draw_pos is None:
                 raise ValueError(f"Can't read draw_pos image from {draw_pos}!")
@@ -981,6 +990,7 @@ class AnyTextPipeline(
         scheduler: KarrasDiffusionSchedulers,
         safety_checker: StableDiffusionSafetyChecker,
         feature_extractor: CLIPImageProcessor,
         text_embedding_module: TextEmbeddingModule = None,
         auxiliary_latent_module: AuxiliaryLatentModule = None,
         image_encoder: CLIPVisionModelWithProjection = None,

 from typing import Any, Callable, Dict, List, Optional, Tuple, Union
 import cv2
+import huggingface_hub
 import numpy as np
 import PIL.Image
 import torch
         self.token_dim = token_dim
         self.proj = nn.Linear(40 * 64, token_dim)
+        proj_dir = hf_hub_download(
+            repo_id="tolgacangoz/anytext",
+            filename="text_embedding_module/proj.safetensors",
+            cache_dir=HF_MODULES_CACHE
+        )
+        self.proj.load_state_dict(load_file(proj_dir, device=str(embedder.device)))
         if use_fp16:
             self.proj = self.proj.to(dtype=torch.float16)
         # preprocess pos_imgs(if numpy, make sure it's white pos in black bg)
         if draw_pos is None:
             pos_imgs = np.zeros((w, h, 1))
+        if isinstance(draw_pos, PIL.Image.Image):
+            pos_imgs = np.array(draw_pos)[..., ::-1]
+            pos_imgs = 255 - pos_imgs
+        elif isinstance(draw_pos, str):
             draw_pos = cv2.imread(draw_pos)[..., ::-1]
             if draw_pos is None:
                 raise ValueError(f"Can't read draw_pos image from {draw_pos}!")
         scheduler: KarrasDiffusionSchedulers,
         safety_checker: StableDiffusionSafetyChecker,
         feature_extractor: CLIPImageProcessor,
+        trust_remote_code: bool = False,
         text_embedding_module: TextEmbeddingModule = None,
         auxiliary_latent_module: AuxiliaryLatentModule = None,
         image_encoder: CLIPVisionModelWithProjection = None,