Genzo1010 commited on
Commit
e832cbb
·
verified ·
1 Parent(s): f3f4390

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +5 -18
app.py CHANGED
@@ -1,6 +1,3 @@
1
- import os
2
- import asyncio
3
- from concurrent.futures import ThreadPoolExecutor
4
  from fastapi import FastAPI, File, UploadFile
5
  from fastapi.middleware.cors import CORSMiddleware
6
  from fastapi.middleware.gzip import GZipMiddleware
@@ -19,21 +16,15 @@ app.add_middleware(
19
  allow_methods=["*"],
20
  allow_headers=["*"]
21
  )
22
- app.add_middleware(GZipMiddleware, minimum_size=1000)
23
 
24
  # Initialize models once at startup
25
  ocr_model = ocr_predictor(pretrained=True)
26
- paddle_ocr = PaddleOCR(lang='en', use_angle_cls=True, use_gpu=True)
27
-
28
- # Get the number of available CPUs
29
- num_cpus = os.cpu_count()
30
-
31
- # Initialize ThreadPoolExecutor with 2 workers
32
- executor = ThreadPoolExecutor(max_workers=num_cpus)
33
 
34
  def ocr_with_doctr(file):
35
  text_output = ''
36
  doc = DocumentFile.from_pdf(file)
 
37
  result = ocr_model(doc)
38
  for page in result.pages:
39
  for block in page.blocks:
@@ -52,20 +43,16 @@ def ocr_with_paddle(img):
52
  def generate_text_from_image(img):
53
  return ocr_with_paddle(img)
54
 
55
- async def run_blocking_func(func, *args):
56
- loop = asyncio.get_event_loop()
57
- return await loop.run_in_executor(executor, func, *args)
58
-
59
  @app.post("/ocr/")
60
  async def perform_ocr(file: UploadFile = File(...)):
61
  file_bytes = await file.read()
62
  if file.filename.endswith('.pdf'):
63
- text_output = await run_blocking_func(ocr_with_doctr, io.BytesIO(file_bytes))
64
  else:
65
  img = np.array(Image.open(io.BytesIO(file_bytes)))
66
- text_output = await run_blocking_func(generate_text_from_image, img)
67
  return {"ocr_text": text_output}
68
 
69
  @app.get("/test/")
70
  async def test_call():
71
- return {"message": "Hi. I'm running"}
 
 
 
 
1
  from fastapi import FastAPI, File, UploadFile
2
  from fastapi.middleware.cors import CORSMiddleware
3
  from fastapi.middleware.gzip import GZipMiddleware
 
16
  allow_methods=["*"],
17
  allow_headers=["*"]
18
  )
 
19
 
20
  # Initialize models once at startup
21
  ocr_model = ocr_predictor(pretrained=True)
22
+ paddle_ocr = PaddleOCR(lang='en', use_angle_cls=True)
 
 
 
 
 
 
23
 
24
  def ocr_with_doctr(file):
25
  text_output = ''
26
  doc = DocumentFile.from_pdf(file)
27
+ ocr_model = ocr_predictor(pretrained=True)
28
  result = ocr_model(doc)
29
  for page in result.pages:
30
  for block in page.blocks:
 
43
  def generate_text_from_image(img):
44
  return ocr_with_paddle(img)
45
 
 
 
 
 
46
  @app.post("/ocr/")
47
  async def perform_ocr(file: UploadFile = File(...)):
48
  file_bytes = await file.read()
49
  if file.filename.endswith('.pdf'):
50
+ text_output = ocr_with_doctr(io.BytesIO(file_bytes))
51
  else:
52
  img = np.array(Image.open(io.BytesIO(file_bytes)))
53
+ text_output = generate_text_from_image(img)
54
  return {"ocr_text": text_output}
55
 
56
  @app.get("/test/")
57
  async def test_call():
58
+ return {"message": "Hi. I'm running"}