deepdoctection

Runtime error

App Files Files Community

JaMe76 commited on Jul 4, 2023

Commit

a2e256b

1 Parent(s): 397d15f

update space to v.0.25

Browse files

Files changed (1) hide show

app.py +16 -8

app.py CHANGED Viewed

@@ -1,13 +1,17 @@
 import os
-os.system('pip install detectron2 -f https://dl.fbaipublicfiles.com/detectron2/wheels/cu102/torch1.9/index.html')
-credentials_kwargs={"aws_access_key_id": os.environ["ACCESS_KEY"],"aws_secret_access_key": os.environ["SECRET_KEY"]}
 # work around: https://discuss.huggingface.co/t/how-to-install-a-specific-version-of-gradio-in-spaces/13552
-os.system("pip uninstall -y gradio")
-os.system("pip install gradio==3.4.1")
 os.system(os.environ["DD_ADDONS"])
 from os import getcwd, path, environ
 import deepdoctection as dd
 from deepdoctection.dataflow.serialize import DataFromList
@@ -16,6 +20,7 @@ from dd_addons.extern import PdfTextDetector, PostProcessor, get_xsl_path
 from dd_addons.pipe.conn import PostProcessorService
 import gradio as gr
 _DD_ONE = "conf_dd_one.yaml"
@@ -97,6 +102,9 @@ d_item = dd.D2FrcnnDetector(item_config_path, item_weights_path, categories_item
 pdf_text = PdfTextDetector(_XSL_PATH)
 # text detector
 tex_text = dd.TextractOcrDetector(**credentials_kwargs)
@@ -161,9 +169,9 @@ def build_gradio_analyzer():
         order = dd.TextOrderService(
             text_container=cfg.TEXT_ORDERING.TEXT_CONTAINER,
-            floating_text_block_names=cfg.TEXT_ORDERING.FLOATING_TEXT_BLOCK,
-            text_block_names=cfg.TEXT_ORDERING.TEXT_BLOCK,
-            text_containers_to_text_block=cfg.TEXT_ORDERING.TEXT_CONTAINER_TO_TEXT_BLOCK
         )
         pipe_component_list.append(order)
@@ -182,7 +190,7 @@ def analyze_image(img, pdf, max_datapoints):
     analyzer = build_gradio_analyzer()
     if img is not None:
-        image = dd.Image(file_name="input.png", location="")
         image.image = img[:, :, ::-1]
         df = DataFromList(lst=[image])

 import os
+os.environ["DD_ADDONS"]="pip install git+https://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@github.com/JaMe76/dd_addons.git"
+os.environ["ACCESS_KEY"]="AKIAXINDAFBPHAHBJAKU"
+os.environ["SECRET_KEY"]="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"
+os.environ["REGION"]="eu-west-1"
+os.system('pip install detectron2 -f https://dl.fbaipublicfiles.com/detectron2/wheels/cu102/torch1.9/index.html')
 # work around: https://discuss.huggingface.co/t/how-to-install-a-specific-version-of-gradio-in-spaces/13552
+#os.system("pip uninstall -y gradio")
+#os.system("pip install gradio==3.4.1")
 os.system(os.environ["DD_ADDONS"])
+import time
 from os import getcwd, path, environ
 import deepdoctection as dd
 from deepdoctection.dataflow.serialize import DataFromList
 from dd_addons.pipe.conn import PostProcessorService
 import gradio as gr
+from botocore.config import Config
 _DD_ONE = "conf_dd_one.yaml"
 pdf_text = PdfTextDetector(_XSL_PATH)
 # text detector
+credentials_kwargs={"aws_access_key_id": os.environ["ACCESS_KEY"],
+                    "aws_secret_access_key": os.environ["SECRET_KEY"],
+                    "config": Config(region_name=os.environ["REGION"])}
 tex_text = dd.TextractOcrDetector(**credentials_kwargs)
         order = dd.TextOrderService(
             text_container=cfg.TEXT_ORDERING.TEXT_CONTAINER,
+            floating_text_block_categories=cfg.TEXT_ORDERING.FLOATING_TEXT_BLOCK,
+            text_block_categories=cfg.TEXT_ORDERING.TEXT_BLOCK,
+            include_residual_text_container=cfg.TEXT_ORDERING.TEXT_CONTAINER_TO_TEXT_BLOCK
         )
         pipe_component_list.append(order)
     analyzer = build_gradio_analyzer()
     if img is not None:
+        image = dd.Image(file_name=str(time.time()).replace(".","") + ".png", location="")
         image.image = img[:, :, ::-1]
         df = DataFromList(lst=[image])