Spaces:
Build error
Build error
File size: 2,376 Bytes
59f9119 9236c5b 59f9119 1e86052 59f9119 3b01a7f 59f9119 47a4e45 59f9119 b7703b3 59f9119 b7703b3 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 |
import uvicorn
from fastapi.staticfiles import StaticFiles
import hashlib
from enum import Enum
from fastapi import FastAPI, UploadFile, File
from paddleocr import PaddleOCR, PPStructure, save_structure_res
from PIL import Image
import io
import numpy as np
app = FastAPI(docs_url='/')
use_gpu = False
output_dir = 'output'
class LangEnum(str, Enum):
ch = "ch"
en = "en"
ja = "japan"
ko = "korean"
# cache with ocr
ocr_cache = {}
# get ocr ins
def get_ocr(lang, use_gpu=False):
if not ocr_cache.get(lang):
ocr_cache[lang] = PaddleOCR(use_angle_cls=True, lang=lang, use_gpu=use_gpu)
return ocr_cache.get(lang)
@app.post("/ocr")
async def create_upload_file(
file: UploadFile = File(...),
lang: LangEnum = LangEnum.ch,
# use_gpu: bool = False
):
contents = await file.read()
image = Image.open(io.BytesIO(contents))
ocr = get_ocr(lang=lang, use_gpu=use_gpu)
img2np = np.array(image)
result = ocr.ocr(img2np, cls=True)[0]
final_result = []
if result:
boxes = [line[0] for line in result]
txts = [line[1][0] for line in result]
scores = [line[1][1] for line in result]
# 识别结果
final_result = [dict(boxes=box, txt=txt, score=score) for box, txt, score in zip(boxes, txts, scores)]
return final_result
@app.post("/ocr_table")
async def create_upload_file(
file: UploadFile = File(...),
lang: LangEnum = LangEnum.ch,
# use_gpu: bool = False
):
table_engine = PPStructure(show_log=True, table=True, lang=lang)
contents = await file.read()
# 计算文件内容的哈希值
file_hash = hashlib.sha256(contents).hexdigest()
image = Image.open(io.BytesIO(contents))
img2np = np.array(image)
result = table_engine(img2np)
save_structure_res(result, output_dir, f'{file_hash}')
htmls = []
types = []
bboxes = []
for item in result:
item_res = item.get('res', {})
htmls.append(item_res.get('html', ''))
types.append(item.get('type', ''))
bboxes.append(item.get('bbox', ''))
return {
'htmls': htmls,
'hash': file_hash,
'bboxes': bboxes,
'types': types,
}
app.mount("/output", StaticFiles(directory="output", follow_symlink=True, html=True), name="output")
if __name__ == '__main__':
uvicorn.run(app=app) |