File size: 1,333 Bytes
44df236
 
78bc6bc
 
 
 
44df236
 
 
 
 
 
 
78bc6bc
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
44df236
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
from fastapi import FastAPI, UploadFile, File
from fastapi.responses import JSONResponse
import magic_pdf
import tempfile
import os
import json

app = FastAPI()

@app.post("/extract")
async def extract(file: UploadFile = File(...)):
    content = await file.read()
    try:
        # Save the uploaded PDF to a temporary file
        with tempfile.NamedTemporaryFile(suffix=".pdf", delete=False) as temp_pdf:
            temp_pdf.write(content)
            temp_pdf_path = temp_pdf.name
        
        # Process the PDF using magic_pdf.PDF class
        result = magic_pdf.PDF(temp_pdf_path).parse()
        
        # Convert result to dictionary
        output = {
            "pages": []
        }
        
        for page in result.pages:
            page_data = {
                "page_num": page.page_num,
                "text": "\n".join([block.text for block in page.text_blocks]),
                "tables": []
            }
            
            for table in page.tables:
                page_data["tables"].append(table.to_markdown())
            
            output["pages"].append(page_data)
        
        # Clean up the temporary file
        os.unlink(temp_pdf_path)
        
        return {"result": output}
    except Exception as e:
        return JSONResponse(status_code=500, content={"error": str(e)})