Spaces:
Sleeping
Sleeping
from fastapi import FastAPI, UploadFile, File | |
from fastapi.responses import JSONResponse | |
import magic_pdf | |
import tempfile | |
import os | |
import json | |
app = FastAPI() | |
async def extract(file: UploadFile = File(...)): | |
content = await file.read() | |
try: | |
# Save the uploaded PDF to a temporary file | |
with tempfile.NamedTemporaryFile(suffix=".pdf", delete=False) as temp_pdf: | |
temp_pdf.write(content) | |
temp_pdf_path = temp_pdf.name | |
# Process the PDF using magic_pdf.PDF class | |
result = magic_pdf.PDF(temp_pdf_path).parse() | |
# Convert result to dictionary | |
output = { | |
"pages": [] | |
} | |
for page in result.pages: | |
page_data = { | |
"page_num": page.page_num, | |
"text": "\n".join([block.text for block in page.text_blocks]), | |
"tables": [] | |
} | |
for table in page.tables: | |
page_data["tables"].append(table.to_markdown()) | |
output["pages"].append(page_data) | |
# Clean up the temporary file | |
os.unlink(temp_pdf_path) | |
return {"result": output} | |
except Exception as e: | |
return JSONResponse(status_code=500, content={"error": str(e)}) |