Spaces:
Running
Running
Update main.py
Browse files
main.py
CHANGED
@@ -4,6 +4,7 @@ from fastapi import FastAPI, File, UploadFile, Form
|
|
4 |
from fastapi.responses import JSONResponse
|
5 |
from transformers import pipeline
|
6 |
from pytesseract import pytesseract
|
|
|
7 |
|
8 |
app = FastAPI()
|
9 |
|
@@ -21,6 +22,13 @@ This API extracts text from an uploaded image using OCR and performs document qu
|
|
21 |
|
22 |
app = FastAPI(docs_url="/", description=description)
|
23 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
24 |
@app.post("/uploadfile/", description=description)
|
25 |
async def perform_document_qa(
|
26 |
file: UploadFile = File(...),
|
@@ -30,11 +38,7 @@ async def perform_document_qa(
|
|
30 |
# Read the uploaded file
|
31 |
contents = await file.read()
|
32 |
|
33 |
-
|
34 |
-
image = Image.open(BytesIO(contents))
|
35 |
-
|
36 |
-
# Perform OCR to extract text from the image
|
37 |
-
text_content = pytesseract.image_to_string(image)
|
38 |
|
39 |
# Split the questions string into a list
|
40 |
question_list = [q.strip() for q in questions.split(',')]
|
|
|
4 |
from fastapi.responses import JSONResponse
|
5 |
from transformers import pipeline
|
6 |
from pytesseract import pytesseract
|
7 |
+
import base64
|
8 |
|
9 |
app = FastAPI()
|
10 |
|
|
|
22 |
|
23 |
app = FastAPI(docs_url="/", description=description)
|
24 |
|
25 |
+
def get_image_content(contents):
|
26 |
+
# Convert binary content to image
|
27 |
+
image = Image.open(BytesIO(contents))
|
28 |
+
# Perform OCR to extract text from the image
|
29 |
+
text_content = pytesseract.image_to_string(image)
|
30 |
+
return text_content
|
31 |
+
|
32 |
@app.post("/uploadfile/", description=description)
|
33 |
async def perform_document_qa(
|
34 |
file: UploadFile = File(...),
|
|
|
38 |
# Read the uploaded file
|
39 |
contents = await file.read()
|
40 |
|
41 |
+
text_content = get_image_content(contents)
|
|
|
|
|
|
|
|
|
42 |
|
43 |
# Split the questions string into a list
|
44 |
question_list = [q.strip() for q in questions.split(',')]
|