Spaces:

nakamura196
/

ndlkotenocr-lite

Sleeping

App Files Files Community

nakamura196 commited on Dec 4, 2024

Commit

ffde3b6

1 Parent(s): 9bed0a8

feat: add outputs

Browse files

Files changed (7) hide show

Dockerfile +1 -1
README.md +1 -1
docker-compose.yml +1 -1
src/app.py +165 -25
src/samples/default.jpg +0 -0
src/samples/digidepo_10301438_0017.jpg +0 -0
src/samples/digidepo_1287221_00000002.jpg +0 -0

Dockerfile CHANGED Viewed

@@ -16,4 +16,4 @@ RUN pip install --no-cache-dir -r requirements.txt
 WORKDIR /home/user/app/src
 # アプリを起動
-CMD ["python", "app.py"]

 WORKDIR /home/user/app/src
 # アプリを起動
+CMD ["gradio", "app.py"]

README.md CHANGED Viewed

@@ -1,5 +1,5 @@
 ---
-title: Ndlkotenocr Lite
 emoji: 👀
 colorFrom: red
 colorTo: blue

 ---
+title: NDL Kotenseki OCR-Lite Gradio App
 emoji: 👀
 colorFrom: red
 colorTo: blue

docker-compose.yml CHANGED Viewed

@@ -3,7 +3,7 @@ services:
     build:
       context: .
       dockerfile: Dockerfile
-    container_name: ndlkotenocr-lite-app
     volumes:
       - .:/home/user/app
     ports:

     build:
       context: .
       dockerfile: Dockerfile
+    container_name: ndlkotenocr-lite-gradio-app
     volumes:
       - .:/home/user/app
     ports:

src/app.py CHANGED Viewed

@@ -5,17 +5,23 @@ import os
 from rtmdet import RTMDet
 from parseq import PARSEQ
 from yaml import safe_load
 # Model Heading and Description
-model_heading = "YOLOv11x くずし字認識サービス（一文字）"
-description = """YOLOv11x くずし字認識サービス（一文字） Gradio demo for classification. Upload an image or click an example image to use."""
-article = "<p style='text-align: center'>YOLOv11x くずし字認識サービス（一文字） is a classification model trained on the <a href=\"https://lab.hi.u-tokyo.ac.jp/datasets/kuzushiji\">東京大学史料編纂所くずし字データセット</a>.</p>"
 image_path = [
-    ['samples/default.jpg']
 ]
 # Functions to load models
@@ -37,8 +43,131 @@ def get_recognizer(weights_path, classes_path, device='cpu'):
         charlist = list(safe_load(f)["model"]["charset_train"])
     return PARSEQ(model_path=weights_path, charlist=charlist, device=device)
 # YOLO Inference Function
-def YOLOv11x_img_inference(image_path: str):
     try:
         # Load the models
         detector = get_detector(
@@ -58,38 +187,49 @@ def YOLOv11x_img_inference(image_path: str):
         # Object detection
         detections = detector.detect(npimg)
-        result_json = []
-        # Text recognition
-        for det in detections:
-            xmin, ymin, xmax, ymax = det["box"]
-            line_img = npimg[int(ymin):int(ymax), int(xmin):int(xmax)]
-            text = recognizer.read(line_img)
-            result_json.append({
-                "boundingBox": [[xmin, ymin], [xmax, ymin], [xmax, ymax], [xmin, ymax]],
-                "text": text,
-                "confidence": det["confidence"]
-            })
-        # Return results in JSON format
-        return result_json
     except Exception as e:
-        return {"error": str(e)}
 # Gradio Inputs and Outputs
 inputs_image = gr.Image(type="filepath", label="Input Image")
-outputs_image = gr.JSON(label="Output JSON")
 # Gradio Interface
 demo = gr.Interface(
-    fn=YOLOv11x_img_inference,
     inputs=inputs_image,
     outputs=outputs_image,
     title=model_heading,
     description=description,
     examples=image_path,
     article=article,
-    cache_examples=False
 )
 demo.launch(share=False, server_name="0.0.0.0")

 from rtmdet import RTMDet
 from parseq import PARSEQ
 from yaml import safe_load
+from ndl_parser import convert_to_xml_string3
+from concurrent.futures import ThreadPoolExecutor
+import xml.etree.ElementTree as ET
+from reading_order.xy_cut.eval import eval_xml
+from xml.dom import minidom
+import re
 # Model Heading and Description
+model_heading = "NDL Kotenseki OCR-Lite Gradio App"
+description = """Upload an image or click an example image to use."""
+article = "<p style='text-align: center'><a href=\"https://github.com/ndl-lab/ndlkotenocr-lite\">https://github.com/ndl-lab/ndlkotenocr-lite</a>.</p>"
 image_path = [
+    ['samples/digidepo_1287221_00000002.jpg'],
+    ['samples/digidepo_10301438_0017.jpg']
 ]
 # Functions to load models
         charlist = list(safe_load(f)["model"]["charset_train"])
     return PARSEQ(model_path=weights_path, charlist=charlist, device=device)
+def create_txt(recognizer, root, img):
+    alltextlist = []
+    targetdflist=[]
+    tatelinecnt=0
+    alllinecnt=0
+    with ThreadPoolExecutor(max_workers=4, thread_name_prefix="thread") as executor:
+        for lineobj in root.findall(".//LINE"):
+            xmin=int(lineobj.get("X"))
+            ymin=int(lineobj.get("Y"))
+            line_w=int(lineobj.get("WIDTH"))
+            line_h=int(lineobj.get("HEIGHT"))
+            if line_h>line_w:
+                tatelinecnt+=1
+            alllinecnt+=1
+            lineimg=img[ymin:ymin+line_h,xmin:xmin+line_w,:]
+            targetdflist.append(lineimg)
+        resultlines = executor.map(recognizer.read, targetdflist)
+        resultlines=list(resultlines)
+        alltextlist.append("\n".join(resultlines))
+        alltextstr=""
+        for text in alltextlist:
+            alltextstr+=text+"\n"
+        return alltextstr
+def create_xml(detections,classeslist,img_w,img_h,imgname, recognizer, img):
+    resultobj=[dict(),dict()]
+    resultobj[0][0]=list()
+    for i in range(16):
+        resultobj[1][i]=[]
+    for det in detections:
+        xmin,ymin,xmax,ymax=det["box"]
+        conf=det["confidence"]
+        if det["class_index"]==0:
+            resultobj[0][0].append([xmin,ymin,xmax,ymax])
+        resultobj[1][det["class_index"]].append([xmin,ymin,xmax,ymax,conf])
+    xmlstr=convert_to_xml_string3(img_w, img_h, imgname, classeslist, resultobj,score_thr = 0.3,min_bbox_size= 5,use_block_ad= False)
+    xmlstr="<OCRDATASET>"+xmlstr+"</OCRDATASET>"
+    root = ET.fromstring(xmlstr)
+    eval_xml(root, logger=None)
+    targetdflist=[]
+    tatelinecnt=0
+    alllinecnt=0
+    with ThreadPoolExecutor(max_workers=4, thread_name_prefix="thread") as executor:
+        for lineobj in root.findall(".//LINE"):
+            xmin=int(lineobj.get("X"))
+            ymin=int(lineobj.get("Y"))
+            line_w=int(lineobj.get("WIDTH"))
+            line_h=int(lineobj.get("HEIGHT"))
+            if line_h>line_w:
+                tatelinecnt+=1
+            alllinecnt+=1
+            lineimg=img[ymin:ymin+line_h,xmin:xmin+line_w,:]
+            targetdflist.append(lineimg)
+        resultlines = executor.map(recognizer.read, targetdflist)
+        resultlines=list(resultlines)
+        for idx,lineobj in enumerate(root.findall(".//LINE")):
+            lineobj.set("STRING",resultlines[idx])
+    return root
+def create_txt(root):
+    alltextlist=[]
+    for lineobj in root.findall(".//LINE"):
+        alltextlist.append(lineobj.get("STRING"))
+    return "\n".join(alltextlist)
+def create_xmlstr(root):
+    rough_string = ET.tostring(root, 'utf-8')
+    reparsed = minidom.parseString(rough_string)
+    pretty = re.sub(r"[\t ]+\n", "", reparsed.toprettyxml(indent="\t"))  # インデント後の不要な改行を削除
+    pretty = pretty.replace(">\n\n\t<", ">\n\t<")  # 不要な空行を削除
+    pretty = re.sub(r"\n\s*\n", "\n", pretty)  # 連続した改行（空白行を含む）を単一の改行に置換
+    return pretty
+def create_json(root):
+    resjsonarray=[]
+    img_w=int(root.find("PAGE").get("WIDTH"))
+    img_h=int(root.find("PAGE").get("HEIGHT"))
+    inputpath=root.find("PAGE").get("IMAGENAME")
+    for idx,lineobj in enumerate(root.findall(".//LINE")):
+        text = lineobj.get("STRING")
+        xmin=int(lineobj.get("X"))
+        ymin=int(lineobj.get("Y"))
+        line_w=int(lineobj.get("WIDTH"))
+        line_h=int(lineobj.get("HEIGHT"))
+        try:
+            conf=float(lineobj.get("CONF"))
+        except:
+            conf=0
+        jsonobj={"boundingBox": [[xmin,ymin],[xmin,ymin+line_h],[xmin+line_w,ymin],[xmin+line_w,ymin+line_h]],
+            "id": idx,"isVertical": "true","text": text,"isTextline": "true","confidence": conf}
+        resjsonarray.append(jsonobj)
+    alljsonobj={
+        "contents":[resjsonarray],
+        "imginfo": {
+            "img_width": img_w,
+            "img_height": img_h,
+            "img_path":inputpath,
+            "img_name":os.path.basename(inputpath)
+        }
+    }
+    return alljsonobj
 # YOLO Inference Function
+def process(image_path: str):
     try:
         # Load the models
         detector = get_detector(
         # Object detection
         detections = detector.detect(npimg)
+        classeslist=list(detector.classes.values())
+        img_h,img_w=npimg.shape[:2]
+        imgname=os.path.basename(image_path)
+        root = create_xml(detections, classeslist, img_w, img_h, imgname, recognizer, npimg)
+        alltext = create_txt(root)
+        result_json = create_json(root)
+        pil_image =detector.draw_detections(npimg, detections=detections)
+        return pil_image, alltext, create_xmlstr(root), result_json
     except Exception as e:
+        return [
+            Image.fromarray(np.zeros((100, 100), dtype=np.uint8)),
+            "Error",
+            "Error",
+            {}
+        ]
 # Gradio Inputs and Outputs
 inputs_image = gr.Image(type="filepath", label="Input Image")
+outputs_image = [
+    gr.Image(type="pil", label="Output Image"),
+    gr.TextArea(label="Output Text"),
+    gr.TextArea(label="Output XML"),
+    gr.JSON(label="Output JSON")
+]
 # Gradio Interface
 demo = gr.Interface(
+    fn=process,
     inputs=inputs_image,
     outputs=outputs_image,
     title=model_heading,
     description=description,
     examples=image_path,
     article=article,
+    cache_examples=False,
+    flagging_mode="never"
 )
 demo.launch(share=False, server_name="0.0.0.0")

src/samples/default.jpg DELETED Viewed

Binary file (445 kB)

src/samples/digidepo_10301438_0017.jpg ADDED Viewed

src/samples/digidepo_1287221_00000002.jpg ADDED Viewed