nakamura196 commited on
Commit
ffde3b6
·
1 Parent(s): 9bed0a8

feat: add outputs

Browse files
Dockerfile CHANGED
@@ -16,4 +16,4 @@ RUN pip install --no-cache-dir -r requirements.txt
16
  WORKDIR /home/user/app/src
17
 
18
  # アプリを起動
19
- CMD ["python", "app.py"]
 
16
  WORKDIR /home/user/app/src
17
 
18
  # アプリを起動
19
+ CMD ["gradio", "app.py"]
README.md CHANGED
@@ -1,5 +1,5 @@
1
  ---
2
- title: Ndlkotenocr Lite
3
  emoji: 👀
4
  colorFrom: red
5
  colorTo: blue
 
1
  ---
2
+ title: NDL Kotenseki OCR-Lite Gradio App
3
  emoji: 👀
4
  colorFrom: red
5
  colorTo: blue
docker-compose.yml CHANGED
@@ -3,7 +3,7 @@ services:
3
  build:
4
  context: .
5
  dockerfile: Dockerfile
6
- container_name: ndlkotenocr-lite-app
7
  volumes:
8
  - .:/home/user/app
9
  ports:
 
3
  build:
4
  context: .
5
  dockerfile: Dockerfile
6
+ container_name: ndlkotenocr-lite-gradio-app
7
  volumes:
8
  - .:/home/user/app
9
  ports:
src/app.py CHANGED
@@ -5,17 +5,23 @@ import os
5
  from rtmdet import RTMDet
6
  from parseq import PARSEQ
7
  from yaml import safe_load
 
 
 
 
8
 
9
-
 
10
 
11
  # Model Heading and Description
12
- model_heading = "YOLOv11x くずし字認識サービス(一文字)"
13
- description = """YOLOv11x くずし字認識サービス(一文字) Gradio demo for classification. Upload an image or click an example image to use."""
14
 
15
- article = "<p style='text-align: center'>YOLOv11x くずし字認識サービス(一文字) is a classification model trained on the <a href=\"https://lab.hi.u-tokyo.ac.jp/datasets/kuzushiji\">東京大学史料編纂所くずし字データセット</a>.</p>"
16
 
17
  image_path = [
18
- ['samples/default.jpg']
 
19
  ]
20
 
21
  # Functions to load models
@@ -37,8 +43,131 @@ def get_recognizer(weights_path, classes_path, device='cpu'):
37
  charlist = list(safe_load(f)["model"]["charset_train"])
38
  return PARSEQ(model_path=weights_path, charlist=charlist, device=device)
39
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
40
  # YOLO Inference Function
41
- def YOLOv11x_img_inference(image_path: str):
42
  try:
43
  # Load the models
44
  detector = get_detector(
@@ -58,38 +187,49 @@ def YOLOv11x_img_inference(image_path: str):
58
 
59
  # Object detection
60
  detections = detector.detect(npimg)
61
- result_json = []
62
-
63
- # Text recognition
64
- for det in detections:
65
- xmin, ymin, xmax, ymax = det["box"]
66
- line_img = npimg[int(ymin):int(ymax), int(xmin):int(xmax)]
67
- text = recognizer.read(line_img)
68
- result_json.append({
69
- "boundingBox": [[xmin, ymin], [xmax, ymin], [xmax, ymax], [xmin, ymax]],
70
- "text": text,
71
- "confidence": det["confidence"]
72
- })
73
-
74
- # Return results in JSON format
75
- return result_json
76
  except Exception as e:
77
- return {"error": str(e)}
 
 
 
 
 
 
78
 
79
  # Gradio Inputs and Outputs
80
  inputs_image = gr.Image(type="filepath", label="Input Image")
81
- outputs_image = gr.JSON(label="Output JSON")
 
 
 
 
 
82
 
83
  # Gradio Interface
84
  demo = gr.Interface(
85
- fn=YOLOv11x_img_inference,
86
  inputs=inputs_image,
87
  outputs=outputs_image,
88
  title=model_heading,
89
  description=description,
90
  examples=image_path,
91
  article=article,
92
- cache_examples=False
 
93
  )
94
 
95
  demo.launch(share=False, server_name="0.0.0.0")
 
5
  from rtmdet import RTMDet
6
  from parseq import PARSEQ
7
  from yaml import safe_load
8
+ from ndl_parser import convert_to_xml_string3
9
+ from concurrent.futures import ThreadPoolExecutor
10
+ import xml.etree.ElementTree as ET
11
+ from reading_order.xy_cut.eval import eval_xml
12
 
13
+ from xml.dom import minidom
14
+ import re
15
 
16
  # Model Heading and Description
17
+ model_heading = "NDL Kotenseki OCR-Lite Gradio App"
18
+ description = """Upload an image or click an example image to use."""
19
 
20
+ article = "<p style='text-align: center'><a href=\"https://github.com/ndl-lab/ndlkotenocr-lite\">https://github.com/ndl-lab/ndlkotenocr-lite</a>.</p>"
21
 
22
  image_path = [
23
+ ['samples/digidepo_1287221_00000002.jpg'],
24
+ ['samples/digidepo_10301438_0017.jpg']
25
  ]
26
 
27
  # Functions to load models
 
43
  charlist = list(safe_load(f)["model"]["charset_train"])
44
  return PARSEQ(model_path=weights_path, charlist=charlist, device=device)
45
 
46
+ def create_txt(recognizer, root, img):
47
+ alltextlist = []
48
+
49
+ targetdflist=[]
50
+
51
+ tatelinecnt=0
52
+ alllinecnt=0
53
+
54
+ with ThreadPoolExecutor(max_workers=4, thread_name_prefix="thread") as executor:
55
+ for lineobj in root.findall(".//LINE"):
56
+ xmin=int(lineobj.get("X"))
57
+ ymin=int(lineobj.get("Y"))
58
+ line_w=int(lineobj.get("WIDTH"))
59
+ line_h=int(lineobj.get("HEIGHT"))
60
+ if line_h>line_w:
61
+ tatelinecnt+=1
62
+ alllinecnt+=1
63
+ lineimg=img[ymin:ymin+line_h,xmin:xmin+line_w,:]
64
+ targetdflist.append(lineimg)
65
+ resultlines = executor.map(recognizer.read, targetdflist)
66
+
67
+ resultlines=list(resultlines)
68
+ alltextlist.append("\n".join(resultlines))
69
+
70
+ alltextstr=""
71
+ for text in alltextlist:
72
+ alltextstr+=text+"\n"
73
+ return alltextstr
74
+
75
+
76
+ def create_xml(detections,classeslist,img_w,img_h,imgname, recognizer, img):
77
+ resultobj=[dict(),dict()]
78
+ resultobj[0][0]=list()
79
+ for i in range(16):
80
+ resultobj[1][i]=[]
81
+ for det in detections:
82
+ xmin,ymin,xmax,ymax=det["box"]
83
+ conf=det["confidence"]
84
+ if det["class_index"]==0:
85
+ resultobj[0][0].append([xmin,ymin,xmax,ymax])
86
+ resultobj[1][det["class_index"]].append([xmin,ymin,xmax,ymax,conf])
87
+
88
+ xmlstr=convert_to_xml_string3(img_w, img_h, imgname, classeslist, resultobj,score_thr = 0.3,min_bbox_size= 5,use_block_ad= False)
89
+ xmlstr="<OCRDATASET>"+xmlstr+"</OCRDATASET>"
90
+
91
+ root = ET.fromstring(xmlstr)
92
+ eval_xml(root, logger=None)
93
+
94
+ targetdflist=[]
95
+
96
+ tatelinecnt=0
97
+ alllinecnt=0
98
+
99
+ with ThreadPoolExecutor(max_workers=4, thread_name_prefix="thread") as executor:
100
+ for lineobj in root.findall(".//LINE"):
101
+ xmin=int(lineobj.get("X"))
102
+ ymin=int(lineobj.get("Y"))
103
+ line_w=int(lineobj.get("WIDTH"))
104
+ line_h=int(lineobj.get("HEIGHT"))
105
+ if line_h>line_w:
106
+ tatelinecnt+=1
107
+ alllinecnt+=1
108
+ lineimg=img[ymin:ymin+line_h,xmin:xmin+line_w,:]
109
+ targetdflist.append(lineimg)
110
+ resultlines = executor.map(recognizer.read, targetdflist)
111
+ resultlines=list(resultlines)
112
+
113
+ for idx,lineobj in enumerate(root.findall(".//LINE")):
114
+ lineobj.set("STRING",resultlines[idx])
115
+
116
+ return root
117
+
118
+ def create_txt(root):
119
+ alltextlist=[]
120
+
121
+ for lineobj in root.findall(".//LINE"):
122
+ alltextlist.append(lineobj.get("STRING"))
123
+
124
+ return "\n".join(alltextlist)
125
+
126
+ def create_xmlstr(root):
127
+ rough_string = ET.tostring(root, 'utf-8')
128
+ reparsed = minidom.parseString(rough_string)
129
+ pretty = re.sub(r"[\t ]+\n", "", reparsed.toprettyxml(indent="\t")) # インデント後の不要な改行を削除
130
+ pretty = pretty.replace(">\n\n\t<", ">\n\t<") # 不要な空行を削除
131
+ pretty = re.sub(r"\n\s*\n", "\n", pretty) # 連続した改行(空白行を含む)を単一の改行に置換
132
+ return pretty
133
+
134
+ def create_json(root):
135
+ resjsonarray=[]
136
+
137
+ img_w=int(root.find("PAGE").get("WIDTH"))
138
+ img_h=int(root.find("PAGE").get("HEIGHT"))
139
+ inputpath=root.find("PAGE").get("IMAGENAME")
140
+
141
+ for idx,lineobj in enumerate(root.findall(".//LINE")):
142
+
143
+ text = lineobj.get("STRING")
144
+
145
+ xmin=int(lineobj.get("X"))
146
+ ymin=int(lineobj.get("Y"))
147
+ line_w=int(lineobj.get("WIDTH"))
148
+ line_h=int(lineobj.get("HEIGHT"))
149
+ try:
150
+ conf=float(lineobj.get("CONF"))
151
+ except:
152
+ conf=0
153
+ jsonobj={"boundingBox": [[xmin,ymin],[xmin,ymin+line_h],[xmin+line_w,ymin],[xmin+line_w,ymin+line_h]],
154
+ "id": idx,"isVertical": "true","text": text,"isTextline": "true","confidence": conf}
155
+ resjsonarray.append(jsonobj)
156
+
157
+ alljsonobj={
158
+ "contents":[resjsonarray],
159
+ "imginfo": {
160
+ "img_width": img_w,
161
+ "img_height": img_h,
162
+ "img_path":inputpath,
163
+ "img_name":os.path.basename(inputpath)
164
+ }
165
+ }
166
+
167
+ return alljsonobj
168
+
169
  # YOLO Inference Function
170
+ def process(image_path: str):
171
  try:
172
  # Load the models
173
  detector = get_detector(
 
187
 
188
  # Object detection
189
  detections = detector.detect(npimg)
190
+ classeslist=list(detector.classes.values())
191
+
192
+ img_h,img_w=npimg.shape[:2]
193
+ imgname=os.path.basename(image_path)
194
+
195
+ root = create_xml(detections, classeslist, img_w, img_h, imgname, recognizer, npimg)
196
+
197
+ alltext = create_txt(root)
198
+
199
+ result_json = create_json(root)
200
+
201
+ pil_image =detector.draw_detections(npimg, detections=detections)
202
+
203
+ return pil_image, alltext, create_xmlstr(root), result_json
 
204
  except Exception as e:
205
+
206
+ return [
207
+ Image.fromarray(np.zeros((100, 100), dtype=np.uint8)),
208
+ "Error",
209
+ "Error",
210
+ {}
211
+ ]
212
 
213
  # Gradio Inputs and Outputs
214
  inputs_image = gr.Image(type="filepath", label="Input Image")
215
+ outputs_image = [
216
+ gr.Image(type="pil", label="Output Image"),
217
+ gr.TextArea(label="Output Text"),
218
+ gr.TextArea(label="Output XML"),
219
+ gr.JSON(label="Output JSON")
220
+ ]
221
 
222
  # Gradio Interface
223
  demo = gr.Interface(
224
+ fn=process,
225
  inputs=inputs_image,
226
  outputs=outputs_image,
227
  title=model_heading,
228
  description=description,
229
  examples=image_path,
230
  article=article,
231
+ cache_examples=False,
232
+ flagging_mode="never"
233
  )
234
 
235
  demo.launch(share=False, server_name="0.0.0.0")
src/samples/default.jpg DELETED
Binary file (445 kB)
 
src/samples/digidepo_10301438_0017.jpg ADDED
src/samples/digidepo_1287221_00000002.jpg ADDED