docker_mineru / app.py
marcosremar2's picture
Fix: Replace magic_pdf.api import with direct magic_pdf usage
78bc6bc
raw
history blame
1.33 kB
from fastapi import FastAPI, UploadFile, File
from fastapi.responses import JSONResponse
import magic_pdf
import tempfile
import os
import json
app = FastAPI()
@app.post("/extract")
async def extract(file: UploadFile = File(...)):
content = await file.read()
try:
# Save the uploaded PDF to a temporary file
with tempfile.NamedTemporaryFile(suffix=".pdf", delete=False) as temp_pdf:
temp_pdf.write(content)
temp_pdf_path = temp_pdf.name
# Process the PDF using magic_pdf.PDF class
result = magic_pdf.PDF(temp_pdf_path).parse()
# Convert result to dictionary
output = {
"pages": []
}
for page in result.pages:
page_data = {
"page_num": page.page_num,
"text": "\n".join([block.text for block in page.text_blocks]),
"tables": []
}
for table in page.tables:
page_data["tables"].append(table.to_markdown())
output["pages"].append(page_data)
# Clean up the temporary file
os.unlink(temp_pdf_path)
return {"result": output}
except Exception as e:
return JSONResponse(status_code=500, content={"error": str(e)})