diff --git a/.gitattributes b/.gitattributes
index a6344aac8c09253b3b630fb776ae94478aa0275b..e974b8ec0d4822de4851980eaf067259b6475b9f 100644
--- a/.gitattributes
+++ b/.gitattributes
@@ -23,7 +23,6 @@
 *.pth filter=lfs diff=lfs merge=lfs -text
 *.rar filter=lfs diff=lfs merge=lfs -text
 *.safetensors filter=lfs diff=lfs merge=lfs -text
-saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.tar.* filter=lfs diff=lfs merge=lfs -text
 *.tar filter=lfs diff=lfs merge=lfs -text
 *.tflite filter=lfs diff=lfs merge=lfs -text
@@ -33,3 +32,5 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+paddleocr/**/* filter=lfs diff=lfs merge=lfs -text
+saved_model/**/* filter=lfs diff=lfs merge=lfs -text
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index f5f72f2dc86b5717d7c3dae290ca4335bf3b31da..555be4bf8fadf1379718d8dae94998c9f0c37278 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -12,7 +12,7 @@ repos:
         args: ["--allow-missing-credentials"]
       - id: detect-private-key
       - id: check-added-large-files
-        args: ["--maxkb=750"]
+        args: ["--maxkb=30000"]
       - id: debug-statements
   - repo: https://github.com/ambv/black
     rev: 22.3.0
diff --git a/Dockerfile b/Dockerfile
index da86efbde24318ea373904c2c98ab68cce5ded23..617c41d6ce56e24055b503af0ab95eb7012bdcac 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -8,6 +8,7 @@ RUN apt-get update && apt-get install --no-install-recommends -y \
     build-essential \
     python3.10-dev \
     python3-pip \
+    wget \
     git \
     ffmpeg \
     poppler-utils \
diff --git a/app.py b/app.py
index 5faf64fb4d6e726f26d9c0505f1ec432ea07bdf3..73863fa5c7ce1f8efa81539bb59ecf446304738f 100644
--- a/app.py
+++ b/app.py
@@ -1,6 +1,7 @@
-from utils import fix_problematic_imports
+from utils import fix_problematic_imports, prepare_env_mineru
 
 fix_problematic_imports()  # noqa
+prepare_env_mineru()  # noqa
 
 
 import time
diff --git a/paddleocr/whl/cls/ch_ppocr_mobile_v2.0_cls_infer/inference.pdiparams b/paddleocr/whl/cls/ch_ppocr_mobile_v2.0_cls_infer/inference.pdiparams
new file mode 100644
index 0000000000000000000000000000000000000000..9ff3226b17581322da2d8055e9513737ee9161a4
--- /dev/null
+++ b/paddleocr/whl/cls/ch_ppocr_mobile_v2.0_cls_infer/inference.pdiparams
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d1efda1b80e174b4fcb168a035ac96c1af4938892bd86a55f300a6027105d08c
+size 539978
diff --git a/paddleocr/whl/cls/ch_ppocr_mobile_v2.0_cls_infer/inference.pdiparams.info b/paddleocr/whl/cls/ch_ppocr_mobile_v2.0_cls_infer/inference.pdiparams.info
new file mode 100644
index 0000000000000000000000000000000000000000..d66b6be00c1cfaad98b41770623e5d64aadf32ee
--- /dev/null
+++ b/paddleocr/whl/cls/ch_ppocr_mobile_v2.0_cls_infer/inference.pdiparams.info
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ee0c2eaa4c09814802bf520c01dfdbc1345dc2879dc9e67424d32c7b0ee88e59
+size 18545
diff --git a/paddleocr/whl/cls/ch_ppocr_mobile_v2.0_cls_infer/inference.pdmodel b/paddleocr/whl/cls/ch_ppocr_mobile_v2.0_cls_infer/inference.pdmodel
new file mode 100644
index 0000000000000000000000000000000000000000..4adb580e0a79536f8fed7bfa25f7ce1196fb0d50
--- /dev/null
+++ b/paddleocr/whl/cls/ch_ppocr_mobile_v2.0_cls_infer/inference.pdmodel
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3c4337ec61722a20b1dca2e5bfaffc313c0592bc89ad6e0d45168224186f6683
+size 1624487
diff --git a/paddleocr/whl/det/ch/ch_PP-OCRv4_det_infer/inference.pdiparams b/paddleocr/whl/det/ch/ch_PP-OCRv4_det_infer/inference.pdiparams
new file mode 100644
index 0000000000000000000000000000000000000000..b966cc3e4144cfcaaee65452d6400ca52e1d6b1e
--- /dev/null
+++ b/paddleocr/whl/det/ch/ch_PP-OCRv4_det_infer/inference.pdiparams
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:49ee815e30cff43cb1057d33bf0d94193e4d4f1ae28451cad15b40be830df915
+size 4692937
diff --git a/paddleocr/whl/det/ch/ch_PP-OCRv4_det_infer/inference.pdiparams.info b/paddleocr/whl/det/ch/ch_PP-OCRv4_det_infer/inference.pdiparams.info
new file mode 100644
index 0000000000000000000000000000000000000000..51fd97aa642d80581d096a0d15feb19af6912915
--- /dev/null
+++ b/paddleocr/whl/det/ch/ch_PP-OCRv4_det_infer/inference.pdiparams.info
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:4e0f29e5fa476126f9efe0dc69f7ca7be960705ad549385d30c9604193463e69
+size 23580
diff --git a/paddleocr/whl/det/ch/ch_PP-OCRv4_det_infer/inference.pdmodel b/paddleocr/whl/det/ch/ch_PP-OCRv4_det_infer/inference.pdmodel
new file mode 100644
index 0000000000000000000000000000000000000000..8fcd91ec19e4a8058512ef2e9e18abb425a6ec56
--- /dev/null
+++ b/paddleocr/whl/det/ch/ch_PP-OCRv4_det_infer/inference.pdmodel
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:2ad68ed2768fe6c41166a5bc64680cc9f445390acb6528da449a4db2f7b90e14
+size 166367
diff --git a/paddleocr/whl/det/en/en_PP-OCRv3_det_infer/inference.pdiparams b/paddleocr/whl/det/en/en_PP-OCRv3_det_infer/inference.pdiparams
new file mode 100644
index 0000000000000000000000000000000000000000..11421ed44c3f95dc46947a28c67d5f883301b4ca
--- /dev/null
+++ b/paddleocr/whl/det/en/en_PP-OCRv3_det_infer/inference.pdiparams
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:83676ec730627ab4502f401410a4b6a3ce1c0bb98fa249b71db055b6bddae051
+size 2377917
diff --git a/paddleocr/whl/det/en/en_PP-OCRv3_det_infer/inference.pdiparams.info b/paddleocr/whl/det/en/en_PP-OCRv3_det_infer/inference.pdiparams.info
new file mode 100644
index 0000000000000000000000000000000000000000..835fb5f0429bc51dc77b82e4916243f943e223ac
--- /dev/null
+++ b/paddleocr/whl/det/en/en_PP-OCRv3_det_infer/inference.pdiparams.info
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:2fe414d9eadf914bf44e3f9ba212988a6f26f364e4f87c6d0af57438ffffc0c4
+size 26392
diff --git a/paddleocr/whl/det/en/en_PP-OCRv3_det_infer/inference.pdmodel b/paddleocr/whl/det/en/en_PP-OCRv3_det_infer/inference.pdmodel
new file mode 100644
index 0000000000000000000000000000000000000000..e1a10eb2579512f7bc48438732e10604d59da41c
--- /dev/null
+++ b/paddleocr/whl/det/en/en_PP-OCRv3_det_infer/inference.pdmodel
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c4bfb1b05d9d1d5a760801eaf6d20180ef7e47bcc675fb17d1f3a89da5fef427
+size 1590133
diff --git a/paddleocr/whl/det/ml/Multilingual_PP-OCRv3_det_infer/inference.pdiparams b/paddleocr/whl/det/ml/Multilingual_PP-OCRv3_det_infer/inference.pdiparams
new file mode 100644
index 0000000000000000000000000000000000000000..9f0a7cb706275c18ef92ef9873e10ce8a9658fc3
--- /dev/null
+++ b/paddleocr/whl/det/ml/Multilingual_PP-OCRv3_det_infer/inference.pdiparams
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a6119e639cade15873ac32eb4bee121b63c811c4d007d9f618baf680aef8b857
+size 2377917
diff --git a/paddleocr/whl/det/ml/Multilingual_PP-OCRv3_det_infer/inference.pdiparams.info b/paddleocr/whl/det/ml/Multilingual_PP-OCRv3_det_infer/inference.pdiparams.info
new file mode 100644
index 0000000000000000000000000000000000000000..835fb5f0429bc51dc77b82e4916243f943e223ac
--- /dev/null
+++ b/paddleocr/whl/det/ml/Multilingual_PP-OCRv3_det_infer/inference.pdiparams.info
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:2fe414d9eadf914bf44e3f9ba212988a6f26f364e4f87c6d0af57438ffffc0c4
+size 26392
diff --git a/paddleocr/whl/det/ml/Multilingual_PP-OCRv3_det_infer/inference.pdmodel b/paddleocr/whl/det/ml/Multilingual_PP-OCRv3_det_infer/inference.pdmodel
new file mode 100644
index 0000000000000000000000000000000000000000..780c62fac69ce6c0adcd54f085ce0fd2e7ab07a2
--- /dev/null
+++ b/paddleocr/whl/det/ml/Multilingual_PP-OCRv3_det_infer/inference.pdmodel
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c49f664c75549678deedcaff8d11f8f2b9f1303ef6c72deccdb98a5ea2104381
+size 1441206
diff --git a/paddleocr/whl/rec/arabic/arabic_PP-OCRv4_rec_infer/inference.pdiparams b/paddleocr/whl/rec/arabic/arabic_PP-OCRv4_rec_infer/inference.pdiparams
new file mode 100644
index 0000000000000000000000000000000000000000..b9bf142394babbadb54fe2e869e8dc87a297b225
--- /dev/null
+++ b/paddleocr/whl/rec/arabic/arabic_PP-OCRv4_rec_infer/inference.pdiparams
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b388bf744151963a21b9ee9c60b4624ba90976ae5f1f1b023e594032b16ce40f
+size 7639215
diff --git a/paddleocr/whl/rec/arabic/arabic_PP-OCRv4_rec_infer/inference.pdiparams.info b/paddleocr/whl/rec/arabic/arabic_PP-OCRv4_rec_infer/inference.pdiparams.info
new file mode 100644
index 0000000000000000000000000000000000000000..d44e9172071b9246ff8031aaf8efcf79720a4800
--- /dev/null
+++ b/paddleocr/whl/rec/arabic/arabic_PP-OCRv4_rec_infer/inference.pdiparams.info
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:710d9f3d7c503067ae708f4bc3adc6973c0d6391adb7a1470c36eb70da6b5b83
+size 102540
diff --git a/paddleocr/whl/rec/arabic/arabic_PP-OCRv4_rec_infer/inference.pdmodel b/paddleocr/whl/rec/arabic/arabic_PP-OCRv4_rec_infer/inference.pdmodel
new file mode 100644
index 0000000000000000000000000000000000000000..6d09bb16ff3d98025b8ddceff05d29dfd294f416
--- /dev/null
+++ b/paddleocr/whl/rec/arabic/arabic_PP-OCRv4_rec_infer/inference.pdmodel
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c9a2f2e222937a934c5c23fd95dbba5b62a3f6d822a14f86cf8b3e9b93632c64
+size 169634
diff --git a/paddleocr/whl/rec/ch/ch_PP-OCRv4_rec_infer/inference.pdiparams b/paddleocr/whl/rec/ch/ch_PP-OCRv4_rec_infer/inference.pdiparams
new file mode 100644
index 0000000000000000000000000000000000000000..12d1017e96adf8ccbd7f58a5ac711c1f4de6285d
--- /dev/null
+++ b/paddleocr/whl/rec/ch/ch_PP-OCRv4_rec_infer/inference.pdiparams
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a6dbfa63e7ee161688523c954e9e293f77dc24044db81e836ff9c7f103fd191a
+size 10766823
diff --git a/paddleocr/whl/rec/ch/ch_PP-OCRv4_rec_infer/inference.pdiparams.info b/paddleocr/whl/rec/ch/ch_PP-OCRv4_rec_infer/inference.pdiparams.info
new file mode 100644
index 0000000000000000000000000000000000000000..fed854ea54ed7dfb23992e31666f78d3415cd29e
--- /dev/null
+++ b/paddleocr/whl/rec/ch/ch_PP-OCRv4_rec_infer/inference.pdiparams.info
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:0706cc669bdce382f582c139906076b170b112c51bf32573fe43438025777f12
+size 30643
diff --git a/paddleocr/whl/rec/ch/ch_PP-OCRv4_rec_infer/inference.pdmodel b/paddleocr/whl/rec/ch/ch_PP-OCRv4_rec_infer/inference.pdmodel
new file mode 100644
index 0000000000000000000000000000000000000000..3fd35faa111a1eee98815342bc6f5188c964295e
--- /dev/null
+++ b/paddleocr/whl/rec/ch/ch_PP-OCRv4_rec_infer/inference.pdmodel
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:bf78f3898a004615e69c676259d8171bd7ae99000653b41fd72aaa36ae8bd304
+size 169485
diff --git a/paddleocr/whl/rec/chinese_cht/chinese_cht_PP-OCRv3_rec_infer/inference.pdiparams b/paddleocr/whl/rec/chinese_cht/chinese_cht_PP-OCRv3_rec_infer/inference.pdiparams
new file mode 100644
index 0000000000000000000000000000000000000000..361520266acae34e1f96339cf8e4f6378405154e
--- /dev/null
+++ b/paddleocr/whl/rec/chinese_cht/chinese_cht_PP-OCRv3_rec_infer/inference.pdiparams
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:53cdd14ef365d43a3de5357dea00935c56ae6d8f0f7d464b8aee187df8bdea47
+size 11081578
diff --git a/paddleocr/whl/rec/chinese_cht/chinese_cht_PP-OCRv3_rec_infer/inference.pdiparams.info b/paddleocr/whl/rec/chinese_cht/chinese_cht_PP-OCRv3_rec_infer/inference.pdiparams.info
new file mode 100644
index 0000000000000000000000000000000000000000..d43fe3ecca2d535a4aa80f100168c51fa5e58cf2
--- /dev/null
+++ b/paddleocr/whl/rec/chinese_cht/chinese_cht_PP-OCRv3_rec_infer/inference.pdiparams.info
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:cf79f0b9689b4d6b8094d8bfe2481dc4b4d1699adb622568384695b5f56dc600
+size 21964
diff --git a/paddleocr/whl/rec/chinese_cht/chinese_cht_PP-OCRv3_rec_infer/inference.pdmodel b/paddleocr/whl/rec/chinese_cht/chinese_cht_PP-OCRv3_rec_infer/inference.pdmodel
new file mode 100644
index 0000000000000000000000000000000000000000..da56b4f1b4fa78e8979ca93fd5170faadc66a6af
--- /dev/null
+++ b/paddleocr/whl/rec/chinese_cht/chinese_cht_PP-OCRv3_rec_infer/inference.pdmodel
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:343da1722eae396309e365f9a98bca53f55ca71cf757743abaf83dce9d805bc0
+size 1222540
diff --git a/paddleocr/whl/rec/cyrillic/cyrillic_PP-OCRv3_rec_infer/inference.pdiparams b/paddleocr/whl/rec/cyrillic/cyrillic_PP-OCRv3_rec_infer/inference.pdiparams
new file mode 100644
index 0000000000000000000000000000000000000000..3265b6d3bb97799cd574234999f0db7f172c302b
--- /dev/null
+++ b/paddleocr/whl/rec/cyrillic/cyrillic_PP-OCRv3_rec_infer/inference.pdiparams
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:2ef815afbb8970610618561946ce86faf60745ada64cd316ed34bfe34bdbf46f
+size 8934498
diff --git a/paddleocr/whl/rec/cyrillic/cyrillic_PP-OCRv3_rec_infer/inference.pdiparams.info b/paddleocr/whl/rec/cyrillic/cyrillic_PP-OCRv3_rec_infer/inference.pdiparams.info
new file mode 100644
index 0000000000000000000000000000000000000000..d43fe3ecca2d535a4aa80f100168c51fa5e58cf2
--- /dev/null
+++ b/paddleocr/whl/rec/cyrillic/cyrillic_PP-OCRv3_rec_infer/inference.pdiparams.info
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:cf79f0b9689b4d6b8094d8bfe2481dc4b4d1699adb622568384695b5f56dc600
+size 21964
diff --git a/paddleocr/whl/rec/cyrillic/cyrillic_PP-OCRv3_rec_infer/inference.pdmodel b/paddleocr/whl/rec/cyrillic/cyrillic_PP-OCRv3_rec_infer/inference.pdmodel
new file mode 100644
index 0000000000000000000000000000000000000000..14253fc0fc4694c933ff67bcce805b7b207b1a98
--- /dev/null
+++ b/paddleocr/whl/rec/cyrillic/cyrillic_PP-OCRv3_rec_infer/inference.pdmodel
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:377bee205d0ba2d401cb463c300366ab87b2373b45dd9c8e3b560166b064beee
+size 1020920
diff --git a/paddleocr/whl/rec/devanagari/devanagari_PP-OCRv4_rec_infer/inference.pdiparams b/paddleocr/whl/rec/devanagari/devanagari_PP-OCRv4_rec_infer/inference.pdiparams
new file mode 100644
index 0000000000000000000000000000000000000000..a2d333e90fba2bac3c7eec84ff4f8125f4acbae1
--- /dev/null
+++ b/paddleocr/whl/rec/devanagari/devanagari_PP-OCRv4_rec_infer/inference.pdiparams
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3effa6613d040b79aac36699a3c97fe835fc91791db2010d614cc9ffb08ee415
+size 7642119
diff --git a/paddleocr/whl/rec/devanagari/devanagari_PP-OCRv4_rec_infer/inference.pdiparams.info b/paddleocr/whl/rec/devanagari/devanagari_PP-OCRv4_rec_infer/inference.pdiparams.info
new file mode 100644
index 0000000000000000000000000000000000000000..d44e9172071b9246ff8031aaf8efcf79720a4800
--- /dev/null
+++ b/paddleocr/whl/rec/devanagari/devanagari_PP-OCRv4_rec_infer/inference.pdiparams.info
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:710d9f3d7c503067ae708f4bc3adc6973c0d6391adb7a1470c36eb70da6b5b83
+size 102540
diff --git a/paddleocr/whl/rec/devanagari/devanagari_PP-OCRv4_rec_infer/inference.pdmodel b/paddleocr/whl/rec/devanagari/devanagari_PP-OCRv4_rec_infer/inference.pdmodel
new file mode 100644
index 0000000000000000000000000000000000000000..7d4aa95e33d2a09971fde056aa8541f891122ac2
--- /dev/null
+++ b/paddleocr/whl/rec/devanagari/devanagari_PP-OCRv4_rec_infer/inference.pdmodel
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:854c6d41d186be07588f3cd4ced044270ad96afd7f31ee4f63a70e1d3714a9d1
+size 169634
diff --git a/paddleocr/whl/rec/en/en_PP-OCRv4_rec_infer/inference.pdiparams b/paddleocr/whl/rec/en/en_PP-OCRv4_rec_infer/inference.pdiparams
new file mode 100644
index 0000000000000000000000000000000000000000..a359b7254b8573ed9ea749fe5627b0874a80eb7f
--- /dev/null
+++ b/paddleocr/whl/rec/en/en_PP-OCRv4_rec_infer/inference.pdiparams
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:75f64a1ffb70c56b7a25655963ca16f5bf3286202e3f52ac972bee05cdee2f56
+size 7607269
diff --git a/paddleocr/whl/rec/en/en_PP-OCRv4_rec_infer/inference.pdiparams.info b/paddleocr/whl/rec/en/en_PP-OCRv4_rec_infer/inference.pdiparams.info
new file mode 100644
index 0000000000000000000000000000000000000000..d44e9172071b9246ff8031aaf8efcf79720a4800
--- /dev/null
+++ b/paddleocr/whl/rec/en/en_PP-OCRv4_rec_infer/inference.pdiparams.info
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:710d9f3d7c503067ae708f4bc3adc6973c0d6391adb7a1470c36eb70da6b5b83
+size 102540
diff --git a/paddleocr/whl/rec/en/en_PP-OCRv4_rec_infer/inference.pdmodel b/paddleocr/whl/rec/en/en_PP-OCRv4_rec_infer/inference.pdmodel
new file mode 100644
index 0000000000000000000000000000000000000000..ed2bbf17e34265c637455e51a21f0b565da3e556
--- /dev/null
+++ b/paddleocr/whl/rec/en/en_PP-OCRv4_rec_infer/inference.pdmodel
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:85b952f05f709af259cfe4254012aa7208bef0998f71f57a15495446f25ccd43
+size 2517366
diff --git a/paddleocr/whl/rec/japan/japan_PP-OCRv4_rec_infer/inference.pdiparams b/paddleocr/whl/rec/japan/japan_PP-OCRv4_rec_infer/inference.pdiparams
new file mode 100644
index 0000000000000000000000000000000000000000..8811a2b8f179391feaabd32ead4a05d3d4c27bb7
--- /dev/null
+++ b/paddleocr/whl/rec/japan/japan_PP-OCRv4_rec_infer/inference.pdiparams
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e6e9b7395bb8476e85a15776aa09445c483778c06a512921cbc9ba81c6a2719a
+size 9690407
diff --git a/paddleocr/whl/rec/japan/japan_PP-OCRv4_rec_infer/inference.pdiparams.info b/paddleocr/whl/rec/japan/japan_PP-OCRv4_rec_infer/inference.pdiparams.info
new file mode 100644
index 0000000000000000000000000000000000000000..d44e9172071b9246ff8031aaf8efcf79720a4800
--- /dev/null
+++ b/paddleocr/whl/rec/japan/japan_PP-OCRv4_rec_infer/inference.pdiparams.info
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:710d9f3d7c503067ae708f4bc3adc6973c0d6391adb7a1470c36eb70da6b5b83
+size 102540
diff --git a/paddleocr/whl/rec/japan/japan_PP-OCRv4_rec_infer/inference.pdmodel b/paddleocr/whl/rec/japan/japan_PP-OCRv4_rec_infer/inference.pdmodel
new file mode 100644
index 0000000000000000000000000000000000000000..5cf3e7a7d36dae7d2cd8a84a4d71d85964bbd1fb
--- /dev/null
+++ b/paddleocr/whl/rec/japan/japan_PP-OCRv4_rec_infer/inference.pdmodel
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d3339a79fa88863f639a00cd57e973cc278fa46c3eb369b88cb24bb06c96b89d
+size 169634
diff --git a/paddleocr/whl/rec/ka/ka_PP-OCRv4_rec_infer/inference.pdiparams b/paddleocr/whl/rec/ka/ka_PP-OCRv4_rec_infer/inference.pdiparams
new file mode 100644
index 0000000000000000000000000000000000000000..261896cee0f2727c1dfe804a70249b23fa462cdc
--- /dev/null
+++ b/paddleocr/whl/rec/ka/ka_PP-OCRv4_rec_infer/inference.pdiparams
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:580fe5bc0a45956569bc63bda99b27f33505f20e519fa43517d1d5b9bcbb029b
+size 7635343
diff --git a/paddleocr/whl/rec/ka/ka_PP-OCRv4_rec_infer/inference.pdiparams.info b/paddleocr/whl/rec/ka/ka_PP-OCRv4_rec_infer/inference.pdiparams.info
new file mode 100644
index 0000000000000000000000000000000000000000..d44e9172071b9246ff8031aaf8efcf79720a4800
--- /dev/null
+++ b/paddleocr/whl/rec/ka/ka_PP-OCRv4_rec_infer/inference.pdiparams.info
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:710d9f3d7c503067ae708f4bc3adc6973c0d6391adb7a1470c36eb70da6b5b83
+size 102540
diff --git a/paddleocr/whl/rec/ka/ka_PP-OCRv4_rec_infer/inference.pdmodel b/paddleocr/whl/rec/ka/ka_PP-OCRv4_rec_infer/inference.pdmodel
new file mode 100644
index 0000000000000000000000000000000000000000..3d467b61deb0c3620f3f8784e783982233e227d5
--- /dev/null
+++ b/paddleocr/whl/rec/ka/ka_PP-OCRv4_rec_infer/inference.pdmodel
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:bb23a7045f9f03c414f727c75752660d51760f02da2023e258bcc014f585f8e8
+size 169634
diff --git a/paddleocr/whl/rec/korean/korean_PP-OCRv4_rec_infer/inference.pdiparams b/paddleocr/whl/rec/korean/korean_PP-OCRv4_rec_infer/inference.pdiparams
new file mode 100644
index 0000000000000000000000000000000000000000..aa2e87db731b79c3eeeb4639757feda0d7d3c786
--- /dev/null
+++ b/paddleocr/whl/rec/korean/korean_PP-OCRv4_rec_infer/inference.pdiparams
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ea4f4762582336e9b85653b016c5b1091223c947fafd282161fa3145ecb72d1d
+size 23920621
diff --git a/paddleocr/whl/rec/korean/korean_PP-OCRv4_rec_infer/inference.pdiparams.info b/paddleocr/whl/rec/korean/korean_PP-OCRv4_rec_infer/inference.pdiparams.info
new file mode 100644
index 0000000000000000000000000000000000000000..e6ac749d5398cd5a89468df258c07539af4f50b9
--- /dev/null
+++ b/paddleocr/whl/rec/korean/korean_PP-OCRv4_rec_infer/inference.pdiparams.info
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:533380ead7fe161fc32ac0bdc966e65c18c0d9d1a16088736c4aa3c628701374
+size 95688
diff --git a/paddleocr/whl/rec/korean/korean_PP-OCRv4_rec_infer/inference.pdmodel b/paddleocr/whl/rec/korean/korean_PP-OCRv4_rec_infer/inference.pdmodel
new file mode 100644
index 0000000000000000000000000000000000000000..8efa630b94aa70556d04e83105c343922418c331
--- /dev/null
+++ b/paddleocr/whl/rec/korean/korean_PP-OCRv4_rec_infer/inference.pdmodel
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:992eed58af98ed6125065027e52fbc69b570e6c2e74610ec240e8a2436151ada
+size 353760
diff --git a/paddleocr/whl/rec/latin/latin_PP-OCRv3_rec_infer/inference.pdiparams b/paddleocr/whl/rec/latin/latin_PP-OCRv3_rec_infer/inference.pdiparams
new file mode 100644
index 0000000000000000000000000000000000000000..1e73c593f974a5ff2ed51a320fca12655fba12f6
--- /dev/null
+++ b/paddleocr/whl/rec/latin/latin_PP-OCRv3_rec_infer/inference.pdiparams
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:fa6f22a8b2a669c7d5b4dfd2607491abd4cd2467adf7077c9cc70ce8c47b61d9
+size 8940218
diff --git a/paddleocr/whl/rec/latin/latin_PP-OCRv3_rec_infer/inference.pdiparams.info b/paddleocr/whl/rec/latin/latin_PP-OCRv3_rec_infer/inference.pdiparams.info
new file mode 100644
index 0000000000000000000000000000000000000000..d43fe3ecca2d535a4aa80f100168c51fa5e58cf2
--- /dev/null
+++ b/paddleocr/whl/rec/latin/latin_PP-OCRv3_rec_infer/inference.pdiparams.info
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:cf79f0b9689b4d6b8094d8bfe2481dc4b4d1699adb622568384695b5f56dc600
+size 21964
diff --git a/paddleocr/whl/rec/latin/latin_PP-OCRv3_rec_infer/inference.pdmodel b/paddleocr/whl/rec/latin/latin_PP-OCRv3_rec_infer/inference.pdmodel
new file mode 100644
index 0000000000000000000000000000000000000000..204ae30cb8925b0620fac2078bd1e12e78988d41
--- /dev/null
+++ b/paddleocr/whl/rec/latin/latin_PP-OCRv3_rec_infer/inference.pdmodel
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:23e11a537d8cd7ebab669a9e48fb12f49c295ca9ecde398c6b5460830bd174b8
+size 1201877
diff --git a/paddleocr/whl/rec/ta/ta_PP-OCRv4_rec_infer/inference.pdiparams b/paddleocr/whl/rec/ta/ta_PP-OCRv4_rec_infer/inference.pdiparams
new file mode 100644
index 0000000000000000000000000000000000000000..d1ad0a2436e5069ec1718ce63d7094ee412455bf
--- /dev/null
+++ b/paddleocr/whl/rec/ta/ta_PP-OCRv4_rec_infer/inference.pdiparams
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b0ae170098a2f706de3c60496f5730b5ea4afe1d6e679a67ab6f89aa1018ffde
+size 22197581
diff --git a/paddleocr/whl/rec/ta/ta_PP-OCRv4_rec_infer/inference.pdiparams.info b/paddleocr/whl/rec/ta/ta_PP-OCRv4_rec_infer/inference.pdiparams.info
new file mode 100644
index 0000000000000000000000000000000000000000..e6ac749d5398cd5a89468df258c07539af4f50b9
--- /dev/null
+++ b/paddleocr/whl/rec/ta/ta_PP-OCRv4_rec_infer/inference.pdiparams.info
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:533380ead7fe161fc32ac0bdc966e65c18c0d9d1a16088736c4aa3c628701374
+size 95688
diff --git a/paddleocr/whl/rec/ta/ta_PP-OCRv4_rec_infer/inference.pdmodel b/paddleocr/whl/rec/ta/ta_PP-OCRv4_rec_infer/inference.pdmodel
new file mode 100644
index 0000000000000000000000000000000000000000..8587915fb395a790fcffa86cd2e1cfb091cdf6b4
--- /dev/null
+++ b/paddleocr/whl/rec/ta/ta_PP-OCRv4_rec_infer/inference.pdmodel
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:42ddd121b43ab9ef399e59705bcde6c56891a531c6df87a467aa99c5db54926f
+size 353760
diff --git a/paddleocr/whl/rec/te/te_PP-OCRv4_rec_infer/inference.pdiparams b/paddleocr/whl/rec/te/te_PP-OCRv4_rec_infer/inference.pdiparams
new file mode 100644
index 0000000000000000000000000000000000000000..97025ea77c65340fbacc2e187d3c80db85d37886
--- /dev/null
+++ b/paddleocr/whl/rec/te/te_PP-OCRv4_rec_infer/inference.pdiparams
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:6efc7c74d4ff86b4187ece2f3b4f6ecd775c3a780b82d27d39a9c506d6e9eb42
+size 22208713
diff --git a/paddleocr/whl/rec/te/te_PP-OCRv4_rec_infer/inference.pdiparams.info b/paddleocr/whl/rec/te/te_PP-OCRv4_rec_infer/inference.pdiparams.info
new file mode 100644
index 0000000000000000000000000000000000000000..e6ac749d5398cd5a89468df258c07539af4f50b9
--- /dev/null
+++ b/paddleocr/whl/rec/te/te_PP-OCRv4_rec_infer/inference.pdiparams.info
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:533380ead7fe161fc32ac0bdc966e65c18c0d9d1a16088736c4aa3c628701374
+size 95688
diff --git a/paddleocr/whl/rec/te/te_PP-OCRv4_rec_infer/inference.pdmodel b/paddleocr/whl/rec/te/te_PP-OCRv4_rec_infer/inference.pdmodel
new file mode 100644
index 0000000000000000000000000000000000000000..91727e2e098cc364f6202c1c88e265166b8694f5
--- /dev/null
+++ b/paddleocr/whl/rec/te/te_PP-OCRv4_rec_infer/inference.pdmodel
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f07501e6a3e96aa065ed5a656c8aee5bff9694ac8cb51826178dd4f34e884333
+size 353760
diff --git a/resources/fasttext-langdetect/lid.176.ftz b/resources/fasttext-langdetect/lid.176.ftz
new file mode 100644
index 0000000000000000000000000000000000000000..54ad911fadc26c1519c7043b6d596059b4116e66
--- /dev/null
+++ b/resources/fasttext-langdetect/lid.176.ftz
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:8f3472cfe8738a7b6099e8e999c3cbfae0dcd15696aac7d7738a8039db603e83
+size 938013
diff --git a/resources/model_config/UniMERNet/demo.yaml b/resources/model_config/UniMERNet/demo.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..41349a7a5c1aad0cadc1861b3803eeef2f180fc7
--- /dev/null
+++ b/resources/model_config/UniMERNet/demo.yaml
@@ -0,0 +1,46 @@
+model:
+  arch: unimernet
+  model_type: unimernet
+  model_config:
+    model_name: ./models/unimernet_base
+    max_seq_len: 1536
+
+  load_pretrained: True
+  pretrained: "./models/unimernet_base/pytorch_model.pth"
+  tokenizer_config:
+    path: ./models/unimernet_base
+
+datasets:
+  formula_rec_eval:
+    vis_processor:
+      eval:
+        name: "formula_image_eval"
+        image_size:
+          - 192
+          - 672
+
+run:
+  runner: runner_iter
+  task: unimernet_train
+
+  batch_size_train: 64
+  batch_size_eval: 64
+  num_workers: 1
+
+  iters_per_inner_epoch: 2000
+  max_iters: 60000
+
+  seed: 42
+  output_dir: "../output/demo"
+
+  evaluate: True
+  test_splits: ["eval"]
+
+  device: "cuda"
+  world_size: 1
+  dist_url: "env://"
+  distributed: True
+  distributed_type: ddp # or fsdp when train llm
+
+  generate_cfg:
+    temperature: 0.0
diff --git a/resources/model_config/layoutlmv3/layoutlmv3_base_inference.yaml b/resources/model_config/layoutlmv3/layoutlmv3_base_inference.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..b91b99ddb0a9db5b1cbfb521389faff08d278b95
--- /dev/null
+++ b/resources/model_config/layoutlmv3/layoutlmv3_base_inference.yaml
@@ -0,0 +1,351 @@
+AUG:
+  DETR: true
+CACHE_DIR: ~/cache/huggingface
+CUDNN_BENCHMARK: false
+DATALOADER:
+  ASPECT_RATIO_GROUPING: true
+  FILTER_EMPTY_ANNOTATIONS: false
+  NUM_WORKERS: 4
+  REPEAT_THRESHOLD: 0.0
+  SAMPLER_TRAIN: TrainingSampler
+DATASETS:
+  PRECOMPUTED_PROPOSAL_TOPK_TEST: 1000
+  PRECOMPUTED_PROPOSAL_TOPK_TRAIN: 2000
+  PROPOSAL_FILES_TEST: []
+  PROPOSAL_FILES_TRAIN: []
+  TEST:
+    - scihub_train
+  TRAIN:
+    - scihub_train
+GLOBAL:
+  HACK: 1.0
+ICDAR_DATA_DIR_TEST: ""
+ICDAR_DATA_DIR_TRAIN: ""
+INPUT:
+  CROP:
+    ENABLED: true
+    SIZE:
+      - 384
+      - 600
+    TYPE: absolute_range
+  FORMAT: RGB
+  MASK_FORMAT: polygon
+  MAX_SIZE_TEST: 1333
+  MAX_SIZE_TRAIN: 1333
+  MIN_SIZE_TEST: 800
+  MIN_SIZE_TRAIN:
+    - 480
+    - 512
+    - 544
+    - 576
+    - 608
+    - 640
+    - 672
+    - 704
+    - 736
+    - 768
+    - 800
+  MIN_SIZE_TRAIN_SAMPLING: choice
+  RANDOM_FLIP: horizontal
+MODEL:
+  ANCHOR_GENERATOR:
+    ANGLES:
+      - - -90
+        - 0
+        - 90
+    ASPECT_RATIOS:
+      - - 0.5
+        - 1.0
+        - 2.0
+    NAME: DefaultAnchorGenerator
+    OFFSET: 0.0
+    SIZES:
+      - - 32
+      - - 64
+      - - 128
+      - - 256
+      - - 512
+  BACKBONE:
+    FREEZE_AT: 2
+    NAME: build_vit_fpn_backbone
+  CONFIG_PATH: ""
+  DEVICE: cuda
+  FPN:
+    FUSE_TYPE: sum
+    IN_FEATURES:
+      - layer3
+      - layer5
+      - layer7
+      - layer11
+    NORM: ""
+    OUT_CHANNELS: 256
+  IMAGE_ONLY: true
+  KEYPOINT_ON: false
+  LOAD_PROPOSALS: false
+  MASK_ON: true
+  META_ARCHITECTURE: VLGeneralizedRCNN
+  PANOPTIC_FPN:
+    COMBINE:
+      ENABLED: true
+      INSTANCES_CONFIDENCE_THRESH: 0.5
+      OVERLAP_THRESH: 0.5
+      STUFF_AREA_LIMIT: 4096
+    INSTANCE_LOSS_WEIGHT: 1.0
+  PIXEL_MEAN:
+    - 127.5
+    - 127.5
+    - 127.5
+  PIXEL_STD:
+    - 127.5
+    - 127.5
+    - 127.5
+  PROPOSAL_GENERATOR:
+    MIN_SIZE: 0
+    NAME: RPN
+  RESNETS:
+    DEFORM_MODULATED: false
+    DEFORM_NUM_GROUPS: 1
+    DEFORM_ON_PER_STAGE:
+      - false
+      - false
+      - false
+      - false
+    DEPTH: 50
+    NORM: FrozenBN
+    NUM_GROUPS: 1
+    OUT_FEATURES:
+      - res4
+    RES2_OUT_CHANNELS: 256
+    RES5_DILATION: 1
+    STEM_OUT_CHANNELS: 64
+    STRIDE_IN_1X1: true
+    WIDTH_PER_GROUP: 64
+  RETINANET:
+    BBOX_REG_LOSS_TYPE: smooth_l1
+    BBOX_REG_WEIGHTS:
+      - 1.0
+      - 1.0
+      - 1.0
+      - 1.0
+    FOCAL_LOSS_ALPHA: 0.25
+    FOCAL_LOSS_GAMMA: 2.0
+    IN_FEATURES:
+      - p3
+      - p4
+      - p5
+      - p6
+      - p7
+    IOU_LABELS:
+      - 0
+      - -1
+      - 1
+    IOU_THRESHOLDS:
+      - 0.4
+      - 0.5
+    NMS_THRESH_TEST: 0.5
+    NORM: ""
+    NUM_CLASSES: 10
+    NUM_CONVS: 4
+    PRIOR_PROB: 0.01
+    SCORE_THRESH_TEST: 0.05
+    SMOOTH_L1_LOSS_BETA: 0.1
+    TOPK_CANDIDATES_TEST: 1000
+  ROI_BOX_CASCADE_HEAD:
+    BBOX_REG_WEIGHTS:
+      - - 10.0
+        - 10.0
+        - 5.0
+        - 5.0
+      - - 20.0
+        - 20.0
+        - 10.0
+        - 10.0
+      - - 30.0
+        - 30.0
+        - 15.0
+        - 15.0
+    IOUS:
+      - 0.5
+      - 0.6
+      - 0.7
+  ROI_BOX_HEAD:
+    BBOX_REG_LOSS_TYPE: smooth_l1
+    BBOX_REG_LOSS_WEIGHT: 1.0
+    BBOX_REG_WEIGHTS:
+      - 10.0
+      - 10.0
+      - 5.0
+      - 5.0
+    CLS_AGNOSTIC_BBOX_REG: true
+    CONV_DIM: 256
+    FC_DIM: 1024
+    NAME: FastRCNNConvFCHead
+    NORM: ""
+    NUM_CONV: 0
+    NUM_FC: 2
+    POOLER_RESOLUTION: 7
+    POOLER_SAMPLING_RATIO: 0
+    POOLER_TYPE: ROIAlignV2
+    SMOOTH_L1_BETA: 0.0
+    TRAIN_ON_PRED_BOXES: false
+  ROI_HEADS:
+    BATCH_SIZE_PER_IMAGE: 512
+    IN_FEATURES:
+      - p2
+      - p3
+      - p4
+      - p5
+    IOU_LABELS:
+      - 0
+      - 1
+    IOU_THRESHOLDS:
+      - 0.5
+    NAME: CascadeROIHeads
+    NMS_THRESH_TEST: 0.5
+    NUM_CLASSES: 10
+    POSITIVE_FRACTION: 0.25
+    PROPOSAL_APPEND_GT: true
+    SCORE_THRESH_TEST: 0.05
+  ROI_KEYPOINT_HEAD:
+    CONV_DIMS:
+      - 512
+      - 512
+      - 512
+      - 512
+      - 512
+      - 512
+      - 512
+      - 512
+    LOSS_WEIGHT: 1.0
+    MIN_KEYPOINTS_PER_IMAGE: 1
+    NAME: KRCNNConvDeconvUpsampleHead
+    NORMALIZE_LOSS_BY_VISIBLE_KEYPOINTS: true
+    NUM_KEYPOINTS: 17
+    POOLER_RESOLUTION: 14
+    POOLER_SAMPLING_RATIO: 0
+    POOLER_TYPE: ROIAlignV2
+  ROI_MASK_HEAD:
+    CLS_AGNOSTIC_MASK: false
+    CONV_DIM: 256
+    NAME: MaskRCNNConvUpsampleHead
+    NORM: ""
+    NUM_CONV: 4
+    POOLER_RESOLUTION: 14
+    POOLER_SAMPLING_RATIO: 0
+    POOLER_TYPE: ROIAlignV2
+  RPN:
+    BATCH_SIZE_PER_IMAGE: 256
+    BBOX_REG_LOSS_TYPE: smooth_l1
+    BBOX_REG_LOSS_WEIGHT: 1.0
+    BBOX_REG_WEIGHTS:
+      - 1.0
+      - 1.0
+      - 1.0
+      - 1.0
+    BOUNDARY_THRESH: -1
+    CONV_DIMS:
+      - -1
+    HEAD_NAME: StandardRPNHead
+    IN_FEATURES:
+      - p2
+      - p3
+      - p4
+      - p5
+      - p6
+    IOU_LABELS:
+      - 0
+      - -1
+      - 1
+    IOU_THRESHOLDS:
+      - 0.3
+      - 0.7
+    LOSS_WEIGHT: 1.0
+    NMS_THRESH: 0.7
+    POSITIVE_FRACTION: 0.5
+    POST_NMS_TOPK_TEST: 1000
+    POST_NMS_TOPK_TRAIN: 2000
+    PRE_NMS_TOPK_TEST: 1000
+    PRE_NMS_TOPK_TRAIN: 2000
+    SMOOTH_L1_BETA: 0.0
+  SEM_SEG_HEAD:
+    COMMON_STRIDE: 4
+    CONVS_DIM: 128
+    IGNORE_VALUE: 255
+    IN_FEATURES:
+      - p2
+      - p3
+      - p4
+      - p5
+    LOSS_WEIGHT: 1.0
+    NAME: SemSegFPNHead
+    NORM: GN
+    NUM_CLASSES: 10
+  VIT:
+    DROP_PATH: 0.1
+    IMG_SIZE:
+      - 224
+      - 224
+    NAME: layoutlmv3_base
+    OUT_FEATURES:
+      - layer3
+      - layer5
+      - layer7
+      - layer11
+    POS_TYPE: abs
+  WEIGHTS:
+OUTPUT_DIR:
+SCIHUB_DATA_DIR_TRAIN: ~/publaynet/layout_scihub/train
+SEED: 42
+SOLVER:
+  AMP:
+    ENABLED: true
+  BACKBONE_MULTIPLIER: 1.0
+  BASE_LR: 0.0002
+  BIAS_LR_FACTOR: 1.0
+  CHECKPOINT_PERIOD: 2000
+  CLIP_GRADIENTS:
+    CLIP_TYPE: full_model
+    CLIP_VALUE: 1.0
+    ENABLED: true
+    NORM_TYPE: 2.0
+  GAMMA: 0.1
+  GRADIENT_ACCUMULATION_STEPS: 1
+  IMS_PER_BATCH: 32
+  LR_SCHEDULER_NAME: WarmupCosineLR
+  MAX_ITER: 20000
+  MOMENTUM: 0.9
+  NESTEROV: false
+  OPTIMIZER: ADAMW
+  REFERENCE_WORLD_SIZE: 0
+  STEPS:
+    - 10000
+  WARMUP_FACTOR: 0.01
+  WARMUP_ITERS: 333
+  WARMUP_METHOD: linear
+  WEIGHT_DECAY: 0.05
+  WEIGHT_DECAY_BIAS: null
+  WEIGHT_DECAY_NORM: 0.0
+TEST:
+  AUG:
+    ENABLED: false
+    FLIP: true
+    MAX_SIZE: 4000
+    MIN_SIZES:
+      - 400
+      - 500
+      - 600
+      - 700
+      - 800
+      - 900
+      - 1000
+      - 1100
+      - 1200
+  DETECTIONS_PER_IMAGE: 100
+  EVAL_PERIOD: 1000
+  EXPECTED_RESULTS: []
+  KEYPOINT_OKS_SIGMAS: []
+  PRECISE_BN:
+    ENABLED: false
+    NUM_ITER: 200
+VERSION: 2
+VIS_PERIOD: 0
diff --git a/resources/model_config/model_configs.yaml b/resources/model_config/model_configs.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..05216117758feb0814a8dc9f76ce8878e3607719
--- /dev/null
+++ b/resources/model_config/model_configs.yaml
@@ -0,0 +1,8 @@
+weights:
+  layoutlmv3: Layout/LayoutLMv3/model_final.pth
+  doclayout_yolo: Layout/YOLO/doclayout_yolo_docstructbench_imgsz1280_2501.pt
+  yolo_v8_mfd: MFD/YOLO/yolo_v8_ft.pt
+  unimernet_small: MFR/unimernet_small_2501
+  struct_eqtable: TabRec/StructEqTable
+  tablemaster: TabRec/TableMaster
+  rapid_table: TabRec/RapidTable
diff --git a/resources/yolov11-langdetect/yolo_v11_ft.pt b/resources/yolov11-langdetect/yolo_v11_ft.pt
new file mode 100644
index 0000000000000000000000000000000000000000..6019f20ad967c9ff8ef3943943974df2cf773509
--- /dev/null
+++ b/resources/yolov11-langdetect/yolo_v11_ft.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:755e2572e745f308a611b028a186ef212bc586b5e3a7d8b4acc46abfd556ea16
+size 3204667
diff --git a/utils.py b/utils.py
index fe5e0b4f50a0cb0804541a9dab64f7f4aab9f004..0211a2beeb55be9e1ac3c069da732572225c1b7e 100644
--- a/utils.py
+++ b/utils.py
@@ -47,3 +47,39 @@ def fix_problematic_imports():
     sys.modules[
         "magic_pdf.model.sub_modules.mfr.unimernet.Unimernet"
     ] = fake_unimernet_module
+
+
+def prepare_env_mineru():
+    import json
+    import os
+
+    import nltk
+
+    # download nltk data
+    nltk.download("punkt_tab")
+
+    # download models
+    os.system(
+        "wget https://github.com/opendatalab/MinerU/raw/"
+        "dev/scripts/download_models_hf.py -O download_models_hf.py"
+    )
+    os.system("python3 download_models_hf.py")
+
+    home_path = Path.home()
+    config_path = str(home_path / "magic-pdf.json")
+
+    with open(config_path, "r") as file:
+        data = json.load(file)
+
+    data["device-mode"] = "cuda"
+    with open(config_path, "w") as file:
+        json.dump(data, file, indent=4)
+
+    os.system(
+        f"cp -r resources {home_path}/.local/lib/"
+        "python3.10/site-packages/magic_pdf/resources"
+    )
+
+    # copy OCR model weight
+    target_model_path = home_path / ".paddleocr"
+    os.system(f"cp -r paddleocr {target_model_path}")