Spaces:

subinbabu
/

captchaPersonal1

Sleeping

App Files Files Community

subinbabu commited on Jun 30, 2024

Commit

aee91c4

verified ·

1 Parent(s): 225d9c8

update files

Browse files

Files changed (4) hide show

8000.png +0 -0
app.py +57 -134
requirements.txt +6 -6
tokenizer_base.py +132 -0

8000.png ADDED Viewed

app.py CHANGED Viewed

@@ -1,146 +1,69 @@
-import gradio as gr
-import numpy as np
-import random
-from diffusers import DiffusionPipeline
 import torch
-device = "cuda" if torch.cuda.is_available() else "cpu"
-if torch.cuda.is_available():
-    torch.cuda.max_memory_allocated(device=device)
-    pipe = DiffusionPipeline.from_pretrained("stabilityai/sdxl-turbo", torch_dtype=torch.float16, variant="fp16", use_safetensors=True)
-    pipe.enable_xformers_memory_efficient_attention()
-    pipe = pipe.to(device)
-else:
-    pipe = DiffusionPipeline.from_pretrained("stabilityai/sdxl-turbo", use_safetensors=True)
-    pipe = pipe.to(device)
-MAX_SEED = np.iinfo(np.int32).max
-MAX_IMAGE_SIZE = 1024
-def infer(prompt, negative_prompt, seed, randomize_seed, width, height, guidance_scale, num_inference_steps):
-    if randomize_seed:
-        seed = random.randint(0, MAX_SEED)
-    generator = torch.Generator().manual_seed(seed)
-    image = pipe(
-        prompt = prompt,
-        negative_prompt = negative_prompt,
-        guidance_scale = guidance_scale,
-        num_inference_steps = num_inference_steps,
-        width = width,
-        height = height,
-        generator = generator
-    ).images[0]
-    return image
-examples = [
-    "Astronaut in a jungle, cold color palette, muted colors, detailed, 8k",
-    "An astronaut riding a green horse",
-    "A delicious ceviche cheesecake slice",
-]
-css="""
-#col-container {
-    margin: 0 auto;
-    max-width: 520px;
-}
-"""
-if torch.cuda.is_available():
-    power_device = "GPU"
-else:
-    power_device = "CPU"
-with gr.Blocks(css=css) as demo:
-    with gr.Column(elem_id="col-container"):
-        gr.Markdown(f"""
-        # Text-to-Image Gradio Template
-        Currently running on {power_device}.
-        """)
-        with gr.Row():
-            prompt = gr.Text(
-                label="Prompt",
-                show_label=False,
-                max_lines=1,
-                placeholder="Enter your prompt",
-                container=False,
-            )
-            run_button = gr.Button("Run", scale=0)
-        result = gr.Image(label="Result", show_label=False)
-        with gr.Accordion("Advanced Settings", open=False):
-            negative_prompt = gr.Text(
-                label="Negative prompt",
-                max_lines=1,
-                placeholder="Enter a negative prompt",
-                visible=False,
-            )
-            seed = gr.Slider(
-                label="Seed",
-                minimum=0,
-                maximum=MAX_SEED,
-                step=1,
-                value=0,
-            )
-            randomize_seed = gr.Checkbox(label="Randomize seed", value=True)
-            with gr.Row():
-                width = gr.Slider(
-                    label="Width",
-                    minimum=256,
-                    maximum=MAX_IMAGE_SIZE,
-                    step=32,
-                    value=512,
-                )
-                height = gr.Slider(
-                    label="Height",
-                    minimum=256,
-                    maximum=MAX_IMAGE_SIZE,
-                    step=32,
-                    value=512,
-                )
-            with gr.Row():
-                guidance_scale = gr.Slider(
-                    label="Guidance scale",
-                    minimum=0.0,
-                    maximum=10.0,
-                    step=0.1,
-                    value=0.0,
-                )
-                num_inference_steps = gr.Slider(
-                    label="Number of inference steps",
-                    minimum=1,
-                    maximum=12,
-                    step=1,
-                    value=2,
-                )
-        gr.Examples(
-            examples = examples,
-            inputs = [prompt]
-        )
-    run_button.click(
-        fn = infer,
-        inputs = [prompt, negative_prompt, seed, randomize_seed, width, height, guidance_scale, num_inference_steps],
-        outputs = [result]
-    )
-demo.queue().launch()

 import torch
+import onnx
+import onnxruntime as rt
+from torchvision import transforms as T
+from PIL import Image
+from tokenizer_base import Tokenizer
+import pathlib
+import os
+import gradio as gr
+from huggingface_hub import Repository
+cwd = pathlib.Path(__file__).parent.resolve()
+#model_file = os.path.join(cwd,"secret_models","captcha.onnx")
+model_file = os.path.join(cwd,"captcha.onnx")
+img_size = (32,128)
+charset = r"0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~"
+tokenizer_base = Tokenizer(charset)
+def get_transform(img_size):
+        transforms = []
+        transforms.extend([
+            T.Resize(img_size, T.InterpolationMode.BICUBIC),
+            T.ToTensor(),
+            T.Normalize(0.5, 0.5)
+        ])
+        return T.Compose(transforms)
+def to_numpy(tensor):
+    return tensor.detach().cpu().numpy() if tensor.requires_grad else tensor.cpu().numpy()
+def initialize_model(model_file):
+    transform = get_transform(img_size)
+    # Onnx model loading
+    onnx_model = onnx.load(model_file)
+    onnx.checker.check_model(onnx_model)
+    ort_session = rt.InferenceSession(model_file)
+    return transform,ort_session
+def get_text(img_org):
+    # img_org = Image.open(image_path)
+    # Preprocess. Model expects a batch of images with shape: (B, C, H, W)
+    x = transform(img_org.convert('RGB')).unsqueeze(0)
+    # compute ONNX Runtime output prediction
+    ort_inputs = {ort_session.get_inputs()[0].name: to_numpy(x)}
+    logits = ort_session.run(None, ort_inputs)[0]
+    probs = torch.tensor(logits).softmax(-1)
+    preds, probs = tokenizer_base.decode(probs)
+    preds = preds[0]
+    print(preds)
+    return preds
+transform,ort_session = initialize_model(model_file=model_file)
+gr.Interface(
+    get_text,
+    inputs=gr.Image(type="pil"),
+    outputs=gr.outputs.Textbox(),
+    title="Text Captcha Reader",
+    examples=["8000.png"]
+).launch()
+# if __name__ == "__main__":
+#     image_path = "8000.png"
+#     preds,probs = get_text(image_path)
+#     print(preds[0])

requirements.txt CHANGED Viewed

@@ -1,6 +1,6 @@
-accelerate
-diffusers
-invisible_watermark
-torch
-transformers
-xformers

+torch==1.11.0
+torchvision==0.12.0
+onnx==1.14.0
+onnxruntime==1.15.1
+Pillow==10.0.0
+numpy==1.24.4

tokenizer_base.py ADDED Viewed

	@@ -0,0 +1,132 @@

+import re
+from abc import ABC, abstractmethod
+from itertools import groupby
+from typing import List, Optional, Tuple
+import torch
+from torch import Tensor
+from torch.nn.utils.rnn import pad_sequence
+class CharsetAdapter:
+    """Transforms labels according to the target charset."""
+    def __init__(self, target_charset) -> None:
+        super().__init__()
+        self.charset = target_charset ###
+        self.lowercase_only = target_charset == target_charset.lower()
+        self.uppercase_only = target_charset == target_charset.upper()
+#         self.unsupported = f'[^{re.escape(target_charset)}]'
+    def __call__(self, label):
+        if self.lowercase_only:
+            label = label.lower()
+        elif self.uppercase_only:
+            label = label.upper()
+        return label
+class BaseTokenizer(ABC):
+    def __init__(self, charset: str, specials_first: tuple = (), specials_last: tuple = ()) -> None:
+        self._itos = specials_first + tuple(charset+'[UNK]') + specials_last
+        self._stoi = {s: i for i, s in enumerate(self._itos)}
+    def __len__(self):
+        return len(self._itos)
+    def _tok2ids(self, tokens: str) -> List[int]:
+        return [self._stoi[s] for s in tokens]
+    def _ids2tok(self, token_ids: List[int], join: bool = True) -> str:
+        tokens = [self._itos[i] for i in token_ids]
+        return ''.join(tokens) if join else tokens
+    @abstractmethod
+    def encode(self, labels: List[str], device: Optional[torch.device] = None) -> Tensor:
+        """Encode a batch of labels to a representation suitable for the model.
+        Args:
+            labels: List of labels. Each can be of arbitrary length.
+            device: Create tensor on this device.
+        Returns:
+            Batched tensor representation padded to the max label length. Shape: N, L
+        """
+        raise NotImplementedError
+    @abstractmethod
+    def _filter(self, probs: Tensor, ids: Tensor) -> Tuple[Tensor, List[int]]:
+        """Internal method which performs the necessary filtering prior to decoding."""
+        raise NotImplementedError
+    def decode(self, token_dists: Tensor, raw: bool = False) -> Tuple[List[str], List[Tensor]]:
+        """Decode a batch of token distributions.
+        Args:
+            token_dists: softmax probabilities over the token distribution. Shape: N, L, C
+            raw: return unprocessed labels (will return list of list of strings)
+        Returns:
+            list of string labels (arbitrary length) and
+            their corresponding sequence probabilities as a list of Tensors
+        """
+        batch_tokens = []
+        batch_probs = []
+        for dist in token_dists:
+            probs, ids = dist.max(-1)  # greedy selection
+            if not raw:
+                probs, ids = self._filter(probs, ids)
+            tokens = self._ids2tok(ids, not raw)
+            batch_tokens.append(tokens)
+            batch_probs.append(probs)
+        return batch_tokens, batch_probs
+class Tokenizer(BaseTokenizer):
+    BOS = '[B]'
+    EOS = '[E]'
+    PAD = '[P]'
+    def __init__(self, charset: str) -> None:
+        specials_first = (self.EOS,)
+        specials_last = (self.BOS, self.PAD)
+        super().__init__(charset, specials_first, specials_last)
+        self.eos_id, self.bos_id, self.pad_id = [self._stoi[s] for s in specials_first + specials_last]
+    def encode(self, labels: List[str], device: Optional[torch.device] = None) -> Tensor:
+        batch = [torch.as_tensor([self.bos_id] + self._tok2ids(y) + [self.eos_id], dtype=torch.long, device=device)
+                 for y in labels]
+        return pad_sequence(batch, batch_first=True, padding_value=self.pad_id)
+    def _filter(self, probs: Tensor, ids: Tensor) -> Tuple[Tensor, List[int]]:
+        ids = ids.tolist()
+        try:
+            eos_idx = ids.index(self.eos_id)
+        except ValueError:
+            eos_idx = len(ids)  # Nothing to truncate.
+        # Truncate after EOS
+        ids = ids[:eos_idx]
+        probs = probs[:eos_idx + 1]  # but include prob. for EOS (if it exists)
+        return probs, ids
+class CTCTokenizer(BaseTokenizer):
+    BLANK = '[B]'
+    def __init__(self, charset: str) -> None:
+        # BLANK uses index == 0 by default
+        super().__init__(charset, specials_first=(self.BLANK,))
+        self.blank_id = self._stoi[self.BLANK]
+    def encode(self, labels: List[str], device: Optional[torch.device] = None) -> Tensor:
+        # We use a padded representation since we don't want to use CUDNN's CTC implementation
+        batch = [torch.as_tensor(self._tok2ids(y), dtype=torch.long, device=device) for y in labels]
+        return pad_sequence(batch, batch_first=True, padding_value=self.blank_id)
+    def _filter(self, probs: Tensor, ids: Tensor) -> Tuple[Tensor, List[int]]:
+        # Best path decoding:
+        ids = list(zip(*groupby(ids.tolist())))[0]  # Remove duplicate tokens
+        ids = [x for x in ids if x != self.blank_id]  # Remove BLANKs
+        # `probs` is just pass-through since all positions are considered part of the path
+        return probs, ids