lnyan commited on
Commit
79bfb65
1 Parent(s): 7fa2294
Files changed (2) hide show
  1. app.py +91 -0
  2. requirements.txt +9 -0
app.py ADDED
@@ -0,0 +1,91 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import torch
3
+ from torch import Tensor, nn
4
+ from transformers import (CLIPTextModel, CLIPTokenizer, T5EncoderModel,
5
+ T5Tokenizer)
6
+ import spaces
7
+ import numpy as np
8
+ import io
9
+ import base64
10
+
11
+ class HFEmbedder(nn.Module):
12
+ def __init__(self, version: str, max_length: int, **hf_kwargs):
13
+ super().__init__()
14
+ self.is_clip = version.startswith("openai")
15
+ self.max_length = max_length
16
+ self.output_key = "pooler_output" if self.is_clip else "last_hidden_state"
17
+
18
+ if self.is_clip:
19
+ self.tokenizer: CLIPTokenizer = CLIPTokenizer.from_pretrained(version, max_length=max_length)
20
+ self.hf_module: CLIPTextModel = CLIPTextModel.from_pretrained(version, **hf_kwargs)
21
+ else:
22
+ self.tokenizer: T5Tokenizer = T5Tokenizer.from_pretrained(version, max_length=max_length)
23
+ self.hf_module: T5EncoderModel = T5EncoderModel.from_pretrained(version, **hf_kwargs)
24
+
25
+ self.hf_module = self.hf_module.eval().requires_grad_(False)
26
+
27
+ def forward(self, text: list[str]) -> Tensor:
28
+ batch_encoding = self.tokenizer(
29
+ text,
30
+ truncation=True,
31
+ max_length=self.max_length,
32
+ return_length=False,
33
+ return_overflowing_tokens=False,
34
+ padding="max_length",
35
+ return_tensors="pt",
36
+ )
37
+
38
+ outputs = self.hf_module(
39
+ input_ids=batch_encoding["input_ids"].to(self.hf_module.device),
40
+ attention_mask=None,
41
+ output_hidden_states=False,
42
+ )
43
+ return outputs[self.output_key]
44
+
45
+
46
+
47
+ def load_t5(device: str | torch.device = "cuda", max_length: int = 512) -> HFEmbedder:
48
+ # max length 64, 128, 256 and 512 should work (if your sequence is short enough)
49
+ return HFEmbedder("lnyan/t5-v1_1-xxl-encoder", max_length=max_length, torch_dtype=torch.bfloat16).to("cuda")
50
+
51
+
52
+ def load_clip(device: str | torch.device = "cuda") -> HFEmbedder:
53
+ return HFEmbedder("openai/clip-vit-large-patch14", max_length=77, torch_dtype=torch.bfloat16).to("cuda")
54
+
55
+ @spaces.GPU(duration=20)
56
+ def load_encoders():
57
+ is_schnell = True
58
+ t5 = load_t5("cuda", max_length=256 if is_schnell else 512)
59
+ clip = load_clip("cuda")
60
+ return t5, clip
61
+
62
+ import numpy as np
63
+ def b64(txt,vec):
64
+ buffer = io.BytesIO()
65
+ np.savez_compressed(buffer, txt=txt, vec=vec)
66
+ buffer.seek(0)
67
+ encoded = base64.b64encode(buffer.getvalue()).decode('utf-8')
68
+ return encoded
69
+
70
+ t5,clip=load_encoders()
71
+ @spaces.GPU(duration=10)
72
+ def convert(prompt):
73
+ if isinstance(prompt, str):
74
+ prompt = [prompt]
75
+ txt = t5(prompt)
76
+ vec = clip(prompt)
77
+ return b64(txt.cpu().numpy(),vec.cpu().numpy())
78
+
79
+ with gr.Blocks() as demo:
80
+ gr.Markdown("""A workaround for flux-flax to fit into 40G VRAM""")
81
+ with gr.Row():
82
+ with gr.Column():
83
+ prompt = gr.Textbox(label="prompt")
84
+ convert_btn = gr.Button(value="Convert")
85
+ with gr.Column():
86
+ output = gr.Textbox(label="output")
87
+
88
+ convert_btn.click(convert, inputs=prompt, outputs=output, api_name="convert")
89
+
90
+
91
+ demo.launch()
requirements.txt ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ torch
2
+ torchvision
3
+ opencv-python-headless
4
+ einops
5
+ huggingface_hub
6
+ transformers
7
+ tokenizers
8
+ sentencepiece
9
+ invisible-watermark