Spaces:

neoguojing
/

ai

Running

App Files Files Community

neoguojing commited on Jun 17, 2024

Commit

ac510cd

1 Parent(s): fdd016a

finish ocr

Browse files

Files changed (4) hide show

app.py +15 -12
ocr.py +61 -0
requirements.txt +4 -1
sam_everything.py +2 -3

app.py CHANGED Viewed

@@ -1,18 +1,11 @@
-import json
-from functools import partial
-from pathlib import Path
 import gradio as gr
-from PIL import Image
-import torch
 import numpy as np
 from gradio_image_prompter import ImagePrompter
-import sys
-sys.path.append("..")
 from inference import ModelFactory
 from face import FaceAlgo
 from sam_everything import SamAnything
 components = {}
@@ -113,8 +106,8 @@ def create_ui():
         with gr.Tab("OCR"):
             with gr.Row():
                 with gr.Column(scale=2):
-                        components["algo_type"] = gr.Dropdown(
-                                        ["OCR","DoNut"],value="DoNut",
                                         label="算法类别",interactive=True
                                 )
                 with gr.Column(scale=2):
@@ -124,11 +117,14 @@ def create_ui():
                 with gr.Column(scale=2):
                     with gr.Row(elem_id=''):
                         with gr.Group():
-                            components["ocr_input"] = gr.Gallery(elem_id='ocr-input',label='输入',columns=2,type="pil")
                 with gr.Column(scale=2):
                     with gr.Row():
                         with gr.Group():
-                            components["ocr_output"] = gr.Gallery(elem_id='ocr_output',label='输出',columns=2,interactive=False)
         create_event_handlers()
     return demo
@@ -172,6 +168,10 @@ def create_event_handlers():
         do_sam_everything,gradio('sam_input'),gradio("sam_output")
     )
 def do_refernce(algo_type,input_image):
 # def do_refernce():
     print("input image",input_image)
@@ -243,6 +243,9 @@ def do_sam_everything(im):
     return images
 def point_to_mask(pil_image):
     # 遍历每个像素
     width, height = pil_image.size

 import gradio as gr
 import numpy as np
 from gradio_image_prompter import ImagePrompter
 from inference import ModelFactory
 from face import FaceAlgo
 from sam_everything import SamAnything
+from ocr import do_ocr
 components = {}
         with gr.Tab("OCR"):
             with gr.Row():
                 with gr.Column(scale=2):
+                        components["ocr_type"] = gr.Dropdown(
+                                        ["OCR","Easy"],value="Easy",
                                         label="算法类别",interactive=True
                                 )
                 with gr.Column(scale=2):
                 with gr.Column(scale=2):
                     with gr.Row(elem_id=''):
                         with gr.Group():
+                            components["ocr_input"] = gr.Image(elem_id='ocr-input',label='输入',type="pil")
                 with gr.Column(scale=2):
                     with gr.Row():
                         with gr.Group():
+                            components["ocr_output"] = gr.Image(elem_id='ocr_output',label='输出',interactive=False,type="pil")
+            with gr.Row():
+                with gr.Group():
+                    components["ocr_json_output"] = gr.JSON(label="推理结果")
         create_event_handlers()
     return demo
         do_sam_everything,gradio('sam_input'),gradio("sam_output")
     )
+    components["submit_ocr_btn"].click(
+        do_ocr,gradio('ocr_type','ocr_input'),gradio("ocr_output","ocr_json_output")
+    )
 def do_refernce(algo_type,input_image):
 # def do_refernce():
     print("input image",input_image)
     return images
 def point_to_mask(pil_image):
     # 遍历每个像素
     width, height = pil_image.size

ocr.py ADDED Viewed

	@@ -0,0 +1,61 @@

+from detectron2.data.detection_utils import read_image,pil_image_to_numpy
+from detectron2.utils.visualizer import Visualizer
+from sam_everything import visimage_to_pil
+import numpy as np
+def do_ocr(ocr_type,input):
+    print(ocr_type)
+    result = None
+    np_image = pil_image_to_numpy(input)
+    if ocr_type == "OCR":
+        from paddleocr import PaddleOCR
+        ocr = PaddleOCR(lang='ch', use_angle_cls=True)
+        # img_path = 'exp.jpeg'
+        result = ocr.ocr(np_image)
+        print(result)
+        result = parse_paddle_result(result)
+    elif ocr_type == "Easy":
+        import easyocr
+        reader = easyocr.Reader(['en','ch_sim'])  # 初始化 EasyOCR，选择需要支持的语言（例如英文）
+        result = reader.readtext(np_image)
+        result = parse_esay_result(result)
+    view = Visualizer(np_image)
+    for item in result:
+        polygon = np.array(item['box'])
+        view.draw_polygon(polygon, "k")
+    vis_image = view.get_output()
+    pil_images = visimage_to_pil([vis_image])
+    return pil_images[0],result
+def parse_esay_result(data):
+    results = []
+    for entry in data:
+        box = entry[0]
+        text = entry[1]
+        confidence = entry[2]
+        result = {
+            'box': box,
+            'text': text,
+            'confidence': confidence
+        }
+        results.append(result)
+    return results
+def parse_paddle_result(data):
+    results = []
+    for entry in data[0]:
+        box = entry[0]
+        text = entry[1][0]
+        confidence = entry[1][1]
+        result = {
+            'box': box,
+            'text': text,
+            'confidence': confidence
+        }
+        results.append(result)
+    return results

requirements.txt CHANGED Viewed

@@ -11,4 +11,7 @@ omegaconf==2.3.0
 pycocotools==2.0.7
 gradio_image_prompter==0.1.0
 cloudpickle==2.2.1
-segment_anything @ git+https://github.com/facebookresearch/segment-anything.git

 pycocotools==2.0.7
 gradio_image_prompter==0.1.0
 cloudpickle==2.2.1
+segment_anything @ git+https://github.com/facebookresearch/segment-anything.git
+paddlepaddle==2.6.1
+paddleocr==2.7.3
+easyocr==1.7.1

sam_everything.py CHANGED Viewed

@@ -89,11 +89,10 @@ def bitmask_to_polygon(mask):
     return contour
 # VIS图片转换为pil
-def visimage_to_pil(visimages,need_save=True,idx=0):
     pil_images = []
     for i,visimage in enumerate(visimages):
-        visualized_image = visimage.get_image()
-        # [:, :, ::-1]
         pil_image = Image.fromarray(visualized_image)
         if need_save:
             pil_image.save(f"{idx}_{i}.jpg")

     return contour
 # VIS图片转换为pil
+def visimage_to_pil(visimages,need_save=False,idx=0):
     pil_images = []
     for i,visimage in enumerate(visimages):
+        visualized_image = visimage.get_image()[:, :, ::-1]
         pil_image = Image.fromarray(visualized_image)
         if need_save:
             pil_image.save(f"{idx}_{i}.jpg")