Arafath10's picture
Update main.py
9c62372 verified
raw
history blame
973 Bytes
try: from pip._internal.operations import freeze
except ImportError: # pip < 10.0
from pip.operations import freeze
pkgs = freeze.freeze()
for pkg in pkgs: print(pkg)
from fastapi import FastAPI, HTTPException, File, UploadFile
from fastapi.middleware.cors import CORSMiddleware
from PyPDF2 import PdfReader
app = FastAPI()
app.add_middleware(
CORSMiddleware,
allow_origins=["*"],
allow_credentials=True,
allow_methods=["*"],
allow_headers=["*"],
)
@app.post("/get_ocr_data/")
async def get_data(pdf: UploadFile = File(...)):
try:
# Read PDF file using PyPDF2
pdf_reader = PdfReader(pdf.file)
text = ""
# Extract text from each page
for page in pdf_reader.pages:
text += page.extract_text()
# Return extracted text
return {"text": text.strip()}
except Exception as e:
raise HTTPException(status_code=500, detail=f"Error processing PDF: {str(e)}")