File size: 1,405 Bytes
d748bf5
 
 
ac7b15a
 
03b7a8b
ac7b15a
 
 
cfa812c
ac7b15a
 
6199455
ac7b15a
 
 
1543781
ac7b15a
 
 
 
72386ad
6c47f29
72386ad
95ea484
ac7b15a
cfa812c
95ea484
ac7b15a
72386ad
95ea484
ac7b15a
423104f
ac7b15a
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
import os
os.system('pip install detectron2 -f https://dl.fbaipublicfiles.com/detectron2/wheels/cu102/torch1.9/index.html')

from deepdoctection.dataflow import DataFromList
from deepdoctection import get_dd_analyzer
from deepdoctection import Image
import gradio as gr


def analyze_image(img):
    # creating an image object and passing to the analyzer by using dataflows
    image = Image(file_name="input.png", location="")
    image.image = img[:,:,::-1]

    df = DataFromList(lst=[image])

    analyzer = get_dd_analyzer()

    df = analyzer.analyze(dataset_dataflow=df)
    df.reset_state()
    dp = next(iter(df))
    out = dp.as_dict()
    out.pop("image")
    
    return dp.viz(show_table_structure=False), dp.get_text(), out

inputs = [gr.inputs.Image(type='numpy', label="Original Image")]
outputs = [gr.outputs.Image(type="numpy", label="Output Image"), "text", gr.JSON()]

title = "Deepdoctection - A Document AI Package"
description = "Demonstration of layout analysis and output of a document page. This demo uses the deepdoctection analyzer with Tesseract's OCR engine. Models detect text, titles, tables, figures and lists as well as table cells. Based on the layout it determines reading order and generates an JSON output."

examples = [['sample_1.jpg'],['sample_2.png']]

gr.Interface(analyze_image, inputs, outputs, title=title, description=description, examples=examples).launch()