subinbabu commited on
Commit
aee91c4
·
verified ·
1 Parent(s): 225d9c8

update files

Browse files
Files changed (4) hide show
  1. 8000.png +0 -0
  2. app.py +57 -134
  3. requirements.txt +6 -6
  4. tokenizer_base.py +132 -0
8000.png ADDED
app.py CHANGED
@@ -1,146 +1,69 @@
1
- import gradio as gr
2
- import numpy as np
3
- import random
4
- from diffusers import DiffusionPipeline
5
  import torch
 
 
 
 
 
 
 
 
 
6
 
7
- device = "cuda" if torch.cuda.is_available() else "cpu"
8
-
9
- if torch.cuda.is_available():
10
- torch.cuda.max_memory_allocated(device=device)
11
- pipe = DiffusionPipeline.from_pretrained("stabilityai/sdxl-turbo", torch_dtype=torch.float16, variant="fp16", use_safetensors=True)
12
- pipe.enable_xformers_memory_efficient_attention()
13
- pipe = pipe.to(device)
14
- else:
15
- pipe = DiffusionPipeline.from_pretrained("stabilityai/sdxl-turbo", use_safetensors=True)
16
- pipe = pipe.to(device)
17
 
18
- MAX_SEED = np.iinfo(np.int32).max
19
- MAX_IMAGE_SIZE = 1024
 
 
 
 
20
 
21
- def infer(prompt, negative_prompt, seed, randomize_seed, width, height, guidance_scale, num_inference_steps):
 
 
 
 
 
 
 
22
 
23
- if randomize_seed:
24
- seed = random.randint(0, MAX_SEED)
25
-
26
- generator = torch.Generator().manual_seed(seed)
27
-
28
- image = pipe(
29
- prompt = prompt,
30
- negative_prompt = negative_prompt,
31
- guidance_scale = guidance_scale,
32
- num_inference_steps = num_inference_steps,
33
- width = width,
34
- height = height,
35
- generator = generator
36
- ).images[0]
37
-
38
- return image
39
 
40
- examples = [
41
- "Astronaut in a jungle, cold color palette, muted colors, detailed, 8k",
42
- "An astronaut riding a green horse",
43
- "A delicious ceviche cheesecake slice",
44
- ]
 
 
45
 
46
- css="""
47
- #col-container {
48
- margin: 0 auto;
49
- max-width: 520px;
50
- }
51
- """
52
 
53
- if torch.cuda.is_available():
54
- power_device = "GPU"
55
- else:
56
- power_device = "CPU"
 
 
 
 
57
 
58
- with gr.Blocks(css=css) as demo:
59
-
60
- with gr.Column(elem_id="col-container"):
61
- gr.Markdown(f"""
62
- # Text-to-Image Gradio Template
63
- Currently running on {power_device}.
64
- """)
65
-
66
- with gr.Row():
67
-
68
- prompt = gr.Text(
69
- label="Prompt",
70
- show_label=False,
71
- max_lines=1,
72
- placeholder="Enter your prompt",
73
- container=False,
74
- )
75
-
76
- run_button = gr.Button("Run", scale=0)
77
-
78
- result = gr.Image(label="Result", show_label=False)
79
 
80
- with gr.Accordion("Advanced Settings", open=False):
81
-
82
- negative_prompt = gr.Text(
83
- label="Negative prompt",
84
- max_lines=1,
85
- placeholder="Enter a negative prompt",
86
- visible=False,
87
- )
88
-
89
- seed = gr.Slider(
90
- label="Seed",
91
- minimum=0,
92
- maximum=MAX_SEED,
93
- step=1,
94
- value=0,
95
- )
96
-
97
- randomize_seed = gr.Checkbox(label="Randomize seed", value=True)
98
-
99
- with gr.Row():
100
-
101
- width = gr.Slider(
102
- label="Width",
103
- minimum=256,
104
- maximum=MAX_IMAGE_SIZE,
105
- step=32,
106
- value=512,
107
- )
108
-
109
- height = gr.Slider(
110
- label="Height",
111
- minimum=256,
112
- maximum=MAX_IMAGE_SIZE,
113
- step=32,
114
- value=512,
115
- )
116
-
117
- with gr.Row():
118
-
119
- guidance_scale = gr.Slider(
120
- label="Guidance scale",
121
- minimum=0.0,
122
- maximum=10.0,
123
- step=0.1,
124
- value=0.0,
125
- )
126
-
127
- num_inference_steps = gr.Slider(
128
- label="Number of inference steps",
129
- minimum=1,
130
- maximum=12,
131
- step=1,
132
- value=2,
133
- )
134
-
135
- gr.Examples(
136
- examples = examples,
137
- inputs = [prompt]
138
- )
139
 
140
- run_button.click(
141
- fn = infer,
142
- inputs = [prompt, negative_prompt, seed, randomize_seed, width, height, guidance_scale, num_inference_steps],
143
- outputs = [result]
144
- )
145
 
146
- demo.queue().launch()
 
 
 
 
 
1
  import torch
2
+ import onnx
3
+ import onnxruntime as rt
4
+ from torchvision import transforms as T
5
+ from PIL import Image
6
+ from tokenizer_base import Tokenizer
7
+ import pathlib
8
+ import os
9
+ import gradio as gr
10
+ from huggingface_hub import Repository
11
 
 
 
 
 
 
 
 
 
 
 
12
 
13
+ cwd = pathlib.Path(__file__).parent.resolve()
14
+ #model_file = os.path.join(cwd,"secret_models","captcha.onnx")
15
+ model_file = os.path.join(cwd,"captcha.onnx")
16
+ img_size = (32,128)
17
+ charset = r"0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~"
18
+ tokenizer_base = Tokenizer(charset)
19
 
20
+ def get_transform(img_size):
21
+ transforms = []
22
+ transforms.extend([
23
+ T.Resize(img_size, T.InterpolationMode.BICUBIC),
24
+ T.ToTensor(),
25
+ T.Normalize(0.5, 0.5)
26
+ ])
27
+ return T.Compose(transforms)
28
 
29
+ def to_numpy(tensor):
30
+ return tensor.detach().cpu().numpy() if tensor.requires_grad else tensor.cpu().numpy()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
31
 
32
+ def initialize_model(model_file):
33
+ transform = get_transform(img_size)
34
+ # Onnx model loading
35
+ onnx_model = onnx.load(model_file)
36
+ onnx.checker.check_model(onnx_model)
37
+ ort_session = rt.InferenceSession(model_file)
38
+ return transform,ort_session
39
 
40
+ def get_text(img_org):
41
+ # img_org = Image.open(image_path)
42
+ # Preprocess. Model expects a batch of images with shape: (B, C, H, W)
43
+ x = transform(img_org.convert('RGB')).unsqueeze(0)
 
 
44
 
45
+ # compute ONNX Runtime output prediction
46
+ ort_inputs = {ort_session.get_inputs()[0].name: to_numpy(x)}
47
+ logits = ort_session.run(None, ort_inputs)[0]
48
+ probs = torch.tensor(logits).softmax(-1)
49
+ preds, probs = tokenizer_base.decode(probs)
50
+ preds = preds[0]
51
+ print(preds)
52
+ return preds
53
 
54
+ transform,ort_session = initialize_model(model_file=model_file)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
55
 
56
+ gr.Interface(
57
+ get_text,
58
+ inputs=gr.Image(type="pil"),
59
+ outputs=gr.outputs.Textbox(),
60
+ title="Text Captcha Reader",
61
+ examples=["8000.png"]
62
+ ).launch()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
63
 
64
+ # if __name__ == "__main__":
65
+ # image_path = "8000.png"
66
+ # preds,probs = get_text(image_path)
67
+ # print(preds[0])
68
+
69
 
 
requirements.txt CHANGED
@@ -1,6 +1,6 @@
1
- accelerate
2
- diffusers
3
- invisible_watermark
4
- torch
5
- transformers
6
- xformers
 
1
+ torch==1.11.0
2
+ torchvision==0.12.0
3
+ onnx==1.14.0
4
+ onnxruntime==1.15.1
5
+ Pillow==10.0.0
6
+ numpy==1.24.4
tokenizer_base.py ADDED
@@ -0,0 +1,132 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import re
2
+ from abc import ABC, abstractmethod
3
+ from itertools import groupby
4
+ from typing import List, Optional, Tuple
5
+
6
+ import torch
7
+ from torch import Tensor
8
+ from torch.nn.utils.rnn import pad_sequence
9
+
10
+
11
+ class CharsetAdapter:
12
+ """Transforms labels according to the target charset."""
13
+
14
+ def __init__(self, target_charset) -> None:
15
+ super().__init__()
16
+ self.charset = target_charset ###
17
+ self.lowercase_only = target_charset == target_charset.lower()
18
+ self.uppercase_only = target_charset == target_charset.upper()
19
+ # self.unsupported = f'[^{re.escape(target_charset)}]'
20
+
21
+ def __call__(self, label):
22
+ if self.lowercase_only:
23
+ label = label.lower()
24
+ elif self.uppercase_only:
25
+ label = label.upper()
26
+ return label
27
+
28
+
29
+ class BaseTokenizer(ABC):
30
+
31
+ def __init__(self, charset: str, specials_first: tuple = (), specials_last: tuple = ()) -> None:
32
+ self._itos = specials_first + tuple(charset+'[UNK]') + specials_last
33
+ self._stoi = {s: i for i, s in enumerate(self._itos)}
34
+
35
+ def __len__(self):
36
+ return len(self._itos)
37
+
38
+ def _tok2ids(self, tokens: str) -> List[int]:
39
+ return [self._stoi[s] for s in tokens]
40
+
41
+ def _ids2tok(self, token_ids: List[int], join: bool = True) -> str:
42
+ tokens = [self._itos[i] for i in token_ids]
43
+ return ''.join(tokens) if join else tokens
44
+
45
+ @abstractmethod
46
+ def encode(self, labels: List[str], device: Optional[torch.device] = None) -> Tensor:
47
+ """Encode a batch of labels to a representation suitable for the model.
48
+
49
+ Args:
50
+ labels: List of labels. Each can be of arbitrary length.
51
+ device: Create tensor on this device.
52
+
53
+ Returns:
54
+ Batched tensor representation padded to the max label length. Shape: N, L
55
+ """
56
+ raise NotImplementedError
57
+
58
+ @abstractmethod
59
+ def _filter(self, probs: Tensor, ids: Tensor) -> Tuple[Tensor, List[int]]:
60
+ """Internal method which performs the necessary filtering prior to decoding."""
61
+ raise NotImplementedError
62
+
63
+ def decode(self, token_dists: Tensor, raw: bool = False) -> Tuple[List[str], List[Tensor]]:
64
+ """Decode a batch of token distributions.
65
+
66
+ Args:
67
+ token_dists: softmax probabilities over the token distribution. Shape: N, L, C
68
+ raw: return unprocessed labels (will return list of list of strings)
69
+
70
+ Returns:
71
+ list of string labels (arbitrary length) and
72
+ their corresponding sequence probabilities as a list of Tensors
73
+ """
74
+ batch_tokens = []
75
+ batch_probs = []
76
+ for dist in token_dists:
77
+ probs, ids = dist.max(-1) # greedy selection
78
+ if not raw:
79
+ probs, ids = self._filter(probs, ids)
80
+ tokens = self._ids2tok(ids, not raw)
81
+ batch_tokens.append(tokens)
82
+ batch_probs.append(probs)
83
+ return batch_tokens, batch_probs
84
+
85
+
86
+ class Tokenizer(BaseTokenizer):
87
+ BOS = '[B]'
88
+ EOS = '[E]'
89
+ PAD = '[P]'
90
+
91
+ def __init__(self, charset: str) -> None:
92
+ specials_first = (self.EOS,)
93
+ specials_last = (self.BOS, self.PAD)
94
+ super().__init__(charset, specials_first, specials_last)
95
+ self.eos_id, self.bos_id, self.pad_id = [self._stoi[s] for s in specials_first + specials_last]
96
+
97
+ def encode(self, labels: List[str], device: Optional[torch.device] = None) -> Tensor:
98
+ batch = [torch.as_tensor([self.bos_id] + self._tok2ids(y) + [self.eos_id], dtype=torch.long, device=device)
99
+ for y in labels]
100
+ return pad_sequence(batch, batch_first=True, padding_value=self.pad_id)
101
+
102
+ def _filter(self, probs: Tensor, ids: Tensor) -> Tuple[Tensor, List[int]]:
103
+ ids = ids.tolist()
104
+ try:
105
+ eos_idx = ids.index(self.eos_id)
106
+ except ValueError:
107
+ eos_idx = len(ids) # Nothing to truncate.
108
+ # Truncate after EOS
109
+ ids = ids[:eos_idx]
110
+ probs = probs[:eos_idx + 1] # but include prob. for EOS (if it exists)
111
+ return probs, ids
112
+
113
+
114
+ class CTCTokenizer(BaseTokenizer):
115
+ BLANK = '[B]'
116
+
117
+ def __init__(self, charset: str) -> None:
118
+ # BLANK uses index == 0 by default
119
+ super().__init__(charset, specials_first=(self.BLANK,))
120
+ self.blank_id = self._stoi[self.BLANK]
121
+
122
+ def encode(self, labels: List[str], device: Optional[torch.device] = None) -> Tensor:
123
+ # We use a padded representation since we don't want to use CUDNN's CTC implementation
124
+ batch = [torch.as_tensor(self._tok2ids(y), dtype=torch.long, device=device) for y in labels]
125
+ return pad_sequence(batch, batch_first=True, padding_value=self.blank_id)
126
+
127
+ def _filter(self, probs: Tensor, ids: Tensor) -> Tuple[Tensor, List[int]]:
128
+ # Best path decoding:
129
+ ids = list(zip(*groupby(ids.tolist())))[0] # Remove duplicate tokens
130
+ ids = [x for x in ids if x != self.blank_id] # Remove BLANKs
131
+ # `probs` is just pass-through since all positions are considered part of the path
132
+ return probs, ids