Spaces:
Sleeping
Sleeping
File size: 1,333 Bytes
44df236 78bc6bc 44df236 78bc6bc 44df236 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 |
from fastapi import FastAPI, UploadFile, File
from fastapi.responses import JSONResponse
import magic_pdf
import tempfile
import os
import json
app = FastAPI()
@app.post("/extract")
async def extract(file: UploadFile = File(...)):
content = await file.read()
try:
# Save the uploaded PDF to a temporary file
with tempfile.NamedTemporaryFile(suffix=".pdf", delete=False) as temp_pdf:
temp_pdf.write(content)
temp_pdf_path = temp_pdf.name
# Process the PDF using magic_pdf.PDF class
result = magic_pdf.PDF(temp_pdf_path).parse()
# Convert result to dictionary
output = {
"pages": []
}
for page in result.pages:
page_data = {
"page_num": page.page_num,
"text": "\n".join([block.text for block in page.text_blocks]),
"tables": []
}
for table in page.tables:
page_data["tables"].append(table.to_markdown())
output["pages"].append(page_data)
# Clean up the temporary file
os.unlink(temp_pdf_path)
return {"result": output}
except Exception as e:
return JSONResponse(status_code=500, content={"error": str(e)}) |