murtazadahmardeh commited on
Commit
133bc8b
·
1 Parent(s): 047c82f

test first

Browse files
Files changed (1) hide show
  1. app.py +62 -55
app.py CHANGED
@@ -1,70 +1,77 @@
1
  import torch
2
- import onnx
3
- import onnxruntime as rt
4
  from torchvision import transforms as T
5
- from PIL import Image
6
- from tokenizer_base import Tokenizer
7
- import pathlib
8
- import os
9
  import gradio as gr
10
- from huggingface_hub import Repository
11
 
12
- repo = Repository(
13
- local_dir="secret_models",
14
- repo_type="model",
15
- clone_from="docparser/captcha",
16
- token=True
17
- )
18
- repo.git_pull()
19
 
20
- cwd = pathlib.Path(__file__).parent.resolve()
21
- model_file = os.path.join(cwd,"secret_models","captcha.onnx")
22
- img_size = (32,128)
23
- charset = r"0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~"
24
- tokenizer_base = Tokenizer(charset)
25
 
26
- def get_transform(img_size):
27
- transforms = []
28
- transforms.extend([
29
- T.Resize(img_size, T.InterpolationMode.BICUBIC),
30
  T.ToTensor(),
31
  T.Normalize(0.5, 0.5)
32
  ])
33
- return T.Compose(transforms)
34
 
35
- def to_numpy(tensor):
36
- return tensor.detach().cpu().numpy() if tensor.requires_grad else tensor.cpu().numpy()
 
 
 
 
37
 
38
- def initialize_model(model_file):
39
- transform = get_transform(img_size)
40
- # Onnx model loading
41
- onnx_model = onnx.load(model_file)
42
- onnx.checker.check_model(onnx_model)
43
- ort_session = rt.InferenceSession(model_file)
44
- return transform,ort_session
 
 
 
 
 
 
 
45
 
46
- def get_text(img_org):
47
- # img_org = Image.open(image_path)
48
- # Preprocess. Model expects a batch of images with shape: (B, C, H, W)
49
- x = transform(img_org.convert('RGB')).unsqueeze(0)
50
 
51
- # compute ONNX Runtime output prediction
52
- ort_inputs = {ort_session.get_inputs()[0].name: to_numpy(x)}
53
- logits = ort_session.run(None, ort_inputs)[0]
54
- probs = torch.tensor(logits).softmax(-1)
55
- preds, probs = tokenizer_base.decode(probs)
56
- preds = preds[0]
57
- print(preds)
58
- return preds
59
 
60
- transform,ort_session = initialize_model(model_file=model_file)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
61
 
62
- gr.Interface(
63
- get_text,
64
- inputs=gr.Image(type="pil"),
65
- outputs=gr.outputs.Textbox(),
66
- title="Text Captcha Reader",
67
- examples=["8000.png","11JW29.png","2a8486.jpg","2nbcx.png",
68
- "000679.png","000HU.png","00Uga.png.jpg","00bAQwhAZU.jpg",
69
- "00h57kYf.jpg","0EoHdtVb.png","0JS21.png","0p98z.png","10010.png"]
70
- ).launch()
 
 
 
 
 
1
  import torch
 
 
2
  from torchvision import transforms as T
 
 
 
 
3
  import gradio as gr
 
4
 
5
+ class App:
 
 
 
 
 
 
6
 
7
+ title = 'Scene Text Recognition with<br/>Permuted Autoregressive Sequence Models'
8
+ models = ['parseq', 'parseq_tiny', 'abinet', 'crnn', 'trba', 'vitstr']
 
 
 
9
 
10
+ def __init__(self):
11
+ self._model_cache = {}
12
+ self._preprocess = T.Compose([
13
+ T.Resize((32, 128), T.InterpolationMode.BICUBIC),
14
  T.ToTensor(),
15
  T.Normalize(0.5, 0.5)
16
  ])
 
17
 
18
+ def _get_model(self, name):
19
+ if name in self._model_cache:
20
+ return self._model_cache[name]
21
+ model = torch.hub.load('baudm/parseq', name, pretrained=True).eval()
22
+ self._model_cache[name] = model
23
+ return model
24
 
25
+ @torch.inference_mode()
26
+ def __call__(self, model_name, image):
27
+ if image is None:
28
+ return '', []
29
+ model = self._get_model(model_name)
30
+ image = self._preprocess(image.convert('RGB')).unsqueeze(0)
31
+ # Greedy decoding
32
+ pred = model(image).softmax(-1)
33
+ label, _ = model.tokenizer.decode(pred)
34
+ raw_label, raw_confidence = model.tokenizer.decode(pred, raw=True)
35
+ # Format confidence values
36
+ max_len = 25 if model_name == 'crnn' else len(label[0]) + 1
37
+ conf = list(map('{:0.1f}'.format, raw_confidence[0][:max_len].tolist()))
38
+ return label[0], [raw_label[0][:max_len], conf]
39
 
 
 
 
 
40
 
41
+ def main():
42
+ app = App()
 
 
 
 
 
 
43
 
44
+ with gr.Blocks(analytics_enabled=False, title=app.title.replace('<br/>', ' ')) as demo:
45
+ gr.Markdown(f"""
46
+ <div align="center">
47
+ # {app.title}
48
+ [![GitHub](https://img.shields.io/badge/baudm-parseq-blue?logo=github)](https://github.com/baudm/parseq)
49
+ </div>
50
+ To use this interactive demo for PARSeq and reproduced models:
51
+ 1. Select which model you want to use.
52
+ 2. Upload your own cropped image (or select from the given examples), or sketch on the canvas.
53
+ 3. Click **Read Text**.
54
+ *NOTE*: None of these models were trained on handwritten text datasets.
55
+ """)
56
+ model_name = gr.Radio(app.models, value=app.models[0], label='The STR model to use')
57
+ with gr.Tabs():
58
+ with gr.TabItem('Image Upload'):
59
+ image_upload = gr.Image(type='pil', source='upload', label='Image')
60
+ read_upload = gr.Button('Read Text')
61
+ with gr.TabItem('Canvas Sketch'):
62
+ image_canvas = gr.Image(type='pil', source='canvas', label='Sketch')
63
+ read_canvas = gr.Button('Read Text')
64
 
65
+ output = gr.Textbox(max_lines=1, label='Model output')
66
+ #adv_output = gr.Checkbox(label='Show detailed output')
67
+ raw_output = gr.Dataframe(row_count=2, col_count=0, label='Raw output with confidence values ([0, 1] interval; [B] - BLANK token; [E] - EOS token)')
68
+
69
+ read_upload.click(app, inputs=[model_name, image_upload], outputs=[output, raw_output])
70
+ read_canvas.click(app, inputs=[model_name, image_canvas], outputs=[output, raw_output])
71
+ #adv_output.change(lambda x: gr.update(visible=x), inputs=adv_output, outputs=raw_output)
72
+
73
+ demo.launch()
74
+
75
+
76
+ if __name__ == '__main__':
77
+ main()