from fastapi import FastAPI, HTTPException from dotenv import load_dotenv import boto3 import os import uvicorn import logging from uuid import uuid4 from pydantic import BaseModel from helper import PdfToSectionConverter # Load environment variables load_dotenv() # Configure logging logging.basicConfig(level=logging.INFO) logger = logging.getLogger(__name__) # Fetch AWS credentials from environment s3_access_key_id = os.getenv("S3_ACCESS_KEY_ID") s3_secret_key = os.getenv("S3_SECRET_KEY") aws_region = os.getenv("AWS_REGION") # Validate environment variables if not all([s3_access_key_id, s3_secret_key, aws_region]): logger.error("Missing AWS S3 credentials in environment variables.") raise ValueError("AWS credentials not set properly.") # Initialize FastAPI app app = FastAPI() # Configure S3 client s3_client = boto3.client( "s3", aws_access_key_id=s3_access_key_id, aws_secret_access_key=s3_secret_key, region_name=aws_region, ) class PdfRequest(BaseModel): s3_file_path: str file_title: str doc_id : str start_page: int = 0 end_page: int = 0 @app.get("/") async def start(): return {"message": "Parser API is Ready"} @app.post("/convert_pdf") async def convert_pdf(request: PdfRequest): try: output_dir = "/tmp" output_path = os.path.join(output_dir, "temp_file.pdf") doc_id = request.doc_id # Ensure the directory exists if not os.path.exists(output_dir): os.makedirs(output_dir, exist_ok=True) # Validate S3 file path if not request.s3_file_path.startswith("s3://"): raise HTTPException(status_code=400, detail="Invalid S3 file path. Must start with 's3://'") try: bucket_name, object_key = request.s3_file_path.replace("s3://", "").split("/", 1) except ValueError: raise HTTPException(status_code=400, detail="Invalid S3 file path format.") logger.info(f"Downloading {request.s3_file_path} from S3 bucket {bucket_name}...") # Download PDF from S3 try: s3_client.download_file(bucket_name, object_key, output_path) except Exception as e: logger.error(f"Failed to download file from S3: {str(e)}") raise HTTPException(status_code=500, detail="Error downloading file from S3.") # Initialize and run the converter converter = PdfToSectionConverter() output = converter.convert( downloaded_pdf_path=output_path, file_title=request.file_title, doc_id=doc_id, start_page_no=request.start_page, end_page_no=request.end_page ) # Cleanup the temporary file os.remove(output_path) return {"status": "success", "data": output} except HTTPException: raise except Exception as e: logger.error(f"Unexpected error: {str(e)}") raise HTTPException(status_code=500, detail="Internal Server Error.") def start_server(): logger.info("Starting Server...") uvicorn.run("app:app", host="0.0.0.0", port=8000, reload=True) if __name__ == "__main__": start_server()