Spaces:
Sleeping
Sleeping
Commit
·
ffde3b6
1
Parent(s):
9bed0a8
feat: add outputs
Browse files- Dockerfile +1 -1
- README.md +1 -1
- docker-compose.yml +1 -1
- src/app.py +165 -25
- src/samples/default.jpg +0 -0
- src/samples/digidepo_10301438_0017.jpg +0 -0
- src/samples/digidepo_1287221_00000002.jpg +0 -0
Dockerfile
CHANGED
@@ -16,4 +16,4 @@ RUN pip install --no-cache-dir -r requirements.txt
|
|
16 |
WORKDIR /home/user/app/src
|
17 |
|
18 |
# アプリを起動
|
19 |
-
CMD ["
|
|
|
16 |
WORKDIR /home/user/app/src
|
17 |
|
18 |
# アプリを起動
|
19 |
+
CMD ["gradio", "app.py"]
|
README.md
CHANGED
@@ -1,5 +1,5 @@
|
|
1 |
---
|
2 |
-
title:
|
3 |
emoji: 👀
|
4 |
colorFrom: red
|
5 |
colorTo: blue
|
|
|
1 |
---
|
2 |
+
title: NDL Kotenseki OCR-Lite Gradio App
|
3 |
emoji: 👀
|
4 |
colorFrom: red
|
5 |
colorTo: blue
|
docker-compose.yml
CHANGED
@@ -3,7 +3,7 @@ services:
|
|
3 |
build:
|
4 |
context: .
|
5 |
dockerfile: Dockerfile
|
6 |
-
container_name: ndlkotenocr-lite-app
|
7 |
volumes:
|
8 |
- .:/home/user/app
|
9 |
ports:
|
|
|
3 |
build:
|
4 |
context: .
|
5 |
dockerfile: Dockerfile
|
6 |
+
container_name: ndlkotenocr-lite-gradio-app
|
7 |
volumes:
|
8 |
- .:/home/user/app
|
9 |
ports:
|
src/app.py
CHANGED
@@ -5,17 +5,23 @@ import os
|
|
5 |
from rtmdet import RTMDet
|
6 |
from parseq import PARSEQ
|
7 |
from yaml import safe_load
|
|
|
|
|
|
|
|
|
8 |
|
9 |
-
|
|
|
10 |
|
11 |
# Model Heading and Description
|
12 |
-
model_heading = "
|
13 |
-
description = """
|
14 |
|
15 |
-
article = "<p style='text-align: center'
|
16 |
|
17 |
image_path = [
|
18 |
-
['samples/
|
|
|
19 |
]
|
20 |
|
21 |
# Functions to load models
|
@@ -37,8 +43,131 @@ def get_recognizer(weights_path, classes_path, device='cpu'):
|
|
37 |
charlist = list(safe_load(f)["model"]["charset_train"])
|
38 |
return PARSEQ(model_path=weights_path, charlist=charlist, device=device)
|
39 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
40 |
# YOLO Inference Function
|
41 |
-
def
|
42 |
try:
|
43 |
# Load the models
|
44 |
detector = get_detector(
|
@@ -58,38 +187,49 @@ def YOLOv11x_img_inference(image_path: str):
|
|
58 |
|
59 |
# Object detection
|
60 |
detections = detector.detect(npimg)
|
61 |
-
|
62 |
-
|
63 |
-
|
64 |
-
|
65 |
-
|
66 |
-
|
67 |
-
|
68 |
-
|
69 |
-
|
70 |
-
|
71 |
-
|
72 |
-
|
73 |
-
|
74 |
-
|
75 |
-
return result_json
|
76 |
except Exception as e:
|
77 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
78 |
|
79 |
# Gradio Inputs and Outputs
|
80 |
inputs_image = gr.Image(type="filepath", label="Input Image")
|
81 |
-
outputs_image =
|
|
|
|
|
|
|
|
|
|
|
82 |
|
83 |
# Gradio Interface
|
84 |
demo = gr.Interface(
|
85 |
-
fn=
|
86 |
inputs=inputs_image,
|
87 |
outputs=outputs_image,
|
88 |
title=model_heading,
|
89 |
description=description,
|
90 |
examples=image_path,
|
91 |
article=article,
|
92 |
-
cache_examples=False
|
|
|
93 |
)
|
94 |
|
95 |
demo.launch(share=False, server_name="0.0.0.0")
|
|
|
5 |
from rtmdet import RTMDet
|
6 |
from parseq import PARSEQ
|
7 |
from yaml import safe_load
|
8 |
+
from ndl_parser import convert_to_xml_string3
|
9 |
+
from concurrent.futures import ThreadPoolExecutor
|
10 |
+
import xml.etree.ElementTree as ET
|
11 |
+
from reading_order.xy_cut.eval import eval_xml
|
12 |
|
13 |
+
from xml.dom import minidom
|
14 |
+
import re
|
15 |
|
16 |
# Model Heading and Description
|
17 |
+
model_heading = "NDL Kotenseki OCR-Lite Gradio App"
|
18 |
+
description = """Upload an image or click an example image to use."""
|
19 |
|
20 |
+
article = "<p style='text-align: center'><a href=\"https://github.com/ndl-lab/ndlkotenocr-lite\">https://github.com/ndl-lab/ndlkotenocr-lite</a>.</p>"
|
21 |
|
22 |
image_path = [
|
23 |
+
['samples/digidepo_1287221_00000002.jpg'],
|
24 |
+
['samples/digidepo_10301438_0017.jpg']
|
25 |
]
|
26 |
|
27 |
# Functions to load models
|
|
|
43 |
charlist = list(safe_load(f)["model"]["charset_train"])
|
44 |
return PARSEQ(model_path=weights_path, charlist=charlist, device=device)
|
45 |
|
46 |
+
def create_txt(recognizer, root, img):
|
47 |
+
alltextlist = []
|
48 |
+
|
49 |
+
targetdflist=[]
|
50 |
+
|
51 |
+
tatelinecnt=0
|
52 |
+
alllinecnt=0
|
53 |
+
|
54 |
+
with ThreadPoolExecutor(max_workers=4, thread_name_prefix="thread") as executor:
|
55 |
+
for lineobj in root.findall(".//LINE"):
|
56 |
+
xmin=int(lineobj.get("X"))
|
57 |
+
ymin=int(lineobj.get("Y"))
|
58 |
+
line_w=int(lineobj.get("WIDTH"))
|
59 |
+
line_h=int(lineobj.get("HEIGHT"))
|
60 |
+
if line_h>line_w:
|
61 |
+
tatelinecnt+=1
|
62 |
+
alllinecnt+=1
|
63 |
+
lineimg=img[ymin:ymin+line_h,xmin:xmin+line_w,:]
|
64 |
+
targetdflist.append(lineimg)
|
65 |
+
resultlines = executor.map(recognizer.read, targetdflist)
|
66 |
+
|
67 |
+
resultlines=list(resultlines)
|
68 |
+
alltextlist.append("\n".join(resultlines))
|
69 |
+
|
70 |
+
alltextstr=""
|
71 |
+
for text in alltextlist:
|
72 |
+
alltextstr+=text+"\n"
|
73 |
+
return alltextstr
|
74 |
+
|
75 |
+
|
76 |
+
def create_xml(detections,classeslist,img_w,img_h,imgname, recognizer, img):
|
77 |
+
resultobj=[dict(),dict()]
|
78 |
+
resultobj[0][0]=list()
|
79 |
+
for i in range(16):
|
80 |
+
resultobj[1][i]=[]
|
81 |
+
for det in detections:
|
82 |
+
xmin,ymin,xmax,ymax=det["box"]
|
83 |
+
conf=det["confidence"]
|
84 |
+
if det["class_index"]==0:
|
85 |
+
resultobj[0][0].append([xmin,ymin,xmax,ymax])
|
86 |
+
resultobj[1][det["class_index"]].append([xmin,ymin,xmax,ymax,conf])
|
87 |
+
|
88 |
+
xmlstr=convert_to_xml_string3(img_w, img_h, imgname, classeslist, resultobj,score_thr = 0.3,min_bbox_size= 5,use_block_ad= False)
|
89 |
+
xmlstr="<OCRDATASET>"+xmlstr+"</OCRDATASET>"
|
90 |
+
|
91 |
+
root = ET.fromstring(xmlstr)
|
92 |
+
eval_xml(root, logger=None)
|
93 |
+
|
94 |
+
targetdflist=[]
|
95 |
+
|
96 |
+
tatelinecnt=0
|
97 |
+
alllinecnt=0
|
98 |
+
|
99 |
+
with ThreadPoolExecutor(max_workers=4, thread_name_prefix="thread") as executor:
|
100 |
+
for lineobj in root.findall(".//LINE"):
|
101 |
+
xmin=int(lineobj.get("X"))
|
102 |
+
ymin=int(lineobj.get("Y"))
|
103 |
+
line_w=int(lineobj.get("WIDTH"))
|
104 |
+
line_h=int(lineobj.get("HEIGHT"))
|
105 |
+
if line_h>line_w:
|
106 |
+
tatelinecnt+=1
|
107 |
+
alllinecnt+=1
|
108 |
+
lineimg=img[ymin:ymin+line_h,xmin:xmin+line_w,:]
|
109 |
+
targetdflist.append(lineimg)
|
110 |
+
resultlines = executor.map(recognizer.read, targetdflist)
|
111 |
+
resultlines=list(resultlines)
|
112 |
+
|
113 |
+
for idx,lineobj in enumerate(root.findall(".//LINE")):
|
114 |
+
lineobj.set("STRING",resultlines[idx])
|
115 |
+
|
116 |
+
return root
|
117 |
+
|
118 |
+
def create_txt(root):
|
119 |
+
alltextlist=[]
|
120 |
+
|
121 |
+
for lineobj in root.findall(".//LINE"):
|
122 |
+
alltextlist.append(lineobj.get("STRING"))
|
123 |
+
|
124 |
+
return "\n".join(alltextlist)
|
125 |
+
|
126 |
+
def create_xmlstr(root):
|
127 |
+
rough_string = ET.tostring(root, 'utf-8')
|
128 |
+
reparsed = minidom.parseString(rough_string)
|
129 |
+
pretty = re.sub(r"[\t ]+\n", "", reparsed.toprettyxml(indent="\t")) # インデント後の不要な改行を削除
|
130 |
+
pretty = pretty.replace(">\n\n\t<", ">\n\t<") # 不要な空行を削除
|
131 |
+
pretty = re.sub(r"\n\s*\n", "\n", pretty) # 連続した改行(空白行を含む)を単一の改行に置換
|
132 |
+
return pretty
|
133 |
+
|
134 |
+
def create_json(root):
|
135 |
+
resjsonarray=[]
|
136 |
+
|
137 |
+
img_w=int(root.find("PAGE").get("WIDTH"))
|
138 |
+
img_h=int(root.find("PAGE").get("HEIGHT"))
|
139 |
+
inputpath=root.find("PAGE").get("IMAGENAME")
|
140 |
+
|
141 |
+
for idx,lineobj in enumerate(root.findall(".//LINE")):
|
142 |
+
|
143 |
+
text = lineobj.get("STRING")
|
144 |
+
|
145 |
+
xmin=int(lineobj.get("X"))
|
146 |
+
ymin=int(lineobj.get("Y"))
|
147 |
+
line_w=int(lineobj.get("WIDTH"))
|
148 |
+
line_h=int(lineobj.get("HEIGHT"))
|
149 |
+
try:
|
150 |
+
conf=float(lineobj.get("CONF"))
|
151 |
+
except:
|
152 |
+
conf=0
|
153 |
+
jsonobj={"boundingBox": [[xmin,ymin],[xmin,ymin+line_h],[xmin+line_w,ymin],[xmin+line_w,ymin+line_h]],
|
154 |
+
"id": idx,"isVertical": "true","text": text,"isTextline": "true","confidence": conf}
|
155 |
+
resjsonarray.append(jsonobj)
|
156 |
+
|
157 |
+
alljsonobj={
|
158 |
+
"contents":[resjsonarray],
|
159 |
+
"imginfo": {
|
160 |
+
"img_width": img_w,
|
161 |
+
"img_height": img_h,
|
162 |
+
"img_path":inputpath,
|
163 |
+
"img_name":os.path.basename(inputpath)
|
164 |
+
}
|
165 |
+
}
|
166 |
+
|
167 |
+
return alljsonobj
|
168 |
+
|
169 |
# YOLO Inference Function
|
170 |
+
def process(image_path: str):
|
171 |
try:
|
172 |
# Load the models
|
173 |
detector = get_detector(
|
|
|
187 |
|
188 |
# Object detection
|
189 |
detections = detector.detect(npimg)
|
190 |
+
classeslist=list(detector.classes.values())
|
191 |
+
|
192 |
+
img_h,img_w=npimg.shape[:2]
|
193 |
+
imgname=os.path.basename(image_path)
|
194 |
+
|
195 |
+
root = create_xml(detections, classeslist, img_w, img_h, imgname, recognizer, npimg)
|
196 |
+
|
197 |
+
alltext = create_txt(root)
|
198 |
+
|
199 |
+
result_json = create_json(root)
|
200 |
+
|
201 |
+
pil_image =detector.draw_detections(npimg, detections=detections)
|
202 |
+
|
203 |
+
return pil_image, alltext, create_xmlstr(root), result_json
|
|
|
204 |
except Exception as e:
|
205 |
+
|
206 |
+
return [
|
207 |
+
Image.fromarray(np.zeros((100, 100), dtype=np.uint8)),
|
208 |
+
"Error",
|
209 |
+
"Error",
|
210 |
+
{}
|
211 |
+
]
|
212 |
|
213 |
# Gradio Inputs and Outputs
|
214 |
inputs_image = gr.Image(type="filepath", label="Input Image")
|
215 |
+
outputs_image = [
|
216 |
+
gr.Image(type="pil", label="Output Image"),
|
217 |
+
gr.TextArea(label="Output Text"),
|
218 |
+
gr.TextArea(label="Output XML"),
|
219 |
+
gr.JSON(label="Output JSON")
|
220 |
+
]
|
221 |
|
222 |
# Gradio Interface
|
223 |
demo = gr.Interface(
|
224 |
+
fn=process,
|
225 |
inputs=inputs_image,
|
226 |
outputs=outputs_image,
|
227 |
title=model_heading,
|
228 |
description=description,
|
229 |
examples=image_path,
|
230 |
article=article,
|
231 |
+
cache_examples=False,
|
232 |
+
flagging_mode="never"
|
233 |
)
|
234 |
|
235 |
demo.launch(share=False, server_name="0.0.0.0")
|
src/samples/default.jpg
DELETED
Binary file (445 kB)
|
|
src/samples/digidepo_10301438_0017.jpg
ADDED
![]() |
src/samples/digidepo_1287221_00000002.jpg
ADDED
![]() |