deepdoctection

Runtime error

App Files Files Community

deepdoctection / app.py

JaMe76

Update app.py

4769339 almost 3 years ago

raw

history blame

2.9 kB

	import os
	os.system('pip install detectron2 -f https://dl.fbaipublicfiles.com/detectron2/wheels/cu102/torch1.9/index.html')

	from deepdoctection.dataflow import DataFromList
	from deepdoctection import get_dd_analyzer
	import deepdoctection as dd
	import gradio as gr

	_DD_ONE = "deepdoctection/configs/conf_dd_one.yaml"
	_TESSERACT = "deepdoctection/configs/conf_tesseract.yaml"

	dd.ModelCatalog.register("layout/model_final_inf_only.pt",dd.ModelProfile(
	name="layout/model_final_inf_only.pt",
	description="Detectron2 layout detection model trained on private datasets",
	config="dd/d2/layout/CASCADE_RCNN_R_50_FPN_GN.yaml",
	size=[274632215],
	tp_model=False,
	hf_repo_id=os.environ.get("HF_REPO"),
	hf_model_name="model_final_inf_only.pt",
	hf_config_file=["Base-RCNN-FPN.yaml", "CASCADE_RCNN_R_50_FPN_GN.yaml"],
	categories={"1": dd.names.C.TEXT,
	"2": dd.names.C.TITLE,
	"3": dd.names.C.LIST,
	"4": dd.names.C.TAB,
	"5": dd.names.C.FIG},
	))

	def get_space_dd_analyzer():
	# get a dd analyzer with a special layout model
	lib, device = _auto_select_lib_and_device()
	dd_one_config_path = _maybe_copy_config_to_cache(_DD_ONE)
	_maybe_copy_config_to_cache(_TESSERACT)

	# Set up of the configuration and logging
	cfg = set_config_by_yaml(dd_one_config_path)

	cfg.freeze(freezed=False)
	cfg.LIB = lib
	cfg.DEVICE = device
	cfg.TAB = tables
	cfg.TAB_REF = table_refinement
	cfg.OCR = ocr
	cfg.LANG = language
	cfg.WEIGHTS.D2LAYOUT = "layout/model_final_inf_only.pt"
	cfg.freeze()

	return build_analyzer(cfg)



	def analyze_image(img):
	# creating an image object and passing to the analyzer by using dataflows
	image = dd.Image(file_name="input.png", location="")
	image.image = img[:,:,::-1]

	df = dd.DataFromList(lst=[image])

	analyzer = get_space_dd_analyzer()

	df = analyzer.analyze(dataset_dataflow=df)
	df.reset_state()
	dp = next(iter(df))
	out = dp.as_dict()
	out.pop("image")

	return dp.viz(show_table_structure=False), out

	inputs = [gr.inputs.Image(type='numpy', label="Original Image")]
	outputs = [gr.outputs.Image(type="numpy", label="Output Image"), gr.JSON()]

	title = "Deepdoctection - A Document AI Package"
	description = "Demonstration of layout analysis and output of a document page. This demo uses the deepdoctection analyzer with Tesseract's OCR engine. Models detect text, titles, tables, figures and lists as well as table cells. Based on the layout it determines reading order and generates an JSON output."

	examples = [['sample_1.jpg'],['sample_2.png']]

	gr.Interface(analyze_image, inputs, outputs, title=title, description=description, examples=examples).launch()