doclink_api / app.py
Gopal2002's picture
Upload 4 files
a467a2d verified
from fastapi import FastAPI, HTTPException
from dotenv import load_dotenv
import boto3
import os
import uvicorn
import logging
from uuid import uuid4
from pydantic import BaseModel
from helper import PdfToSectionConverter
# Load environment variables
load_dotenv()
# Configure logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
# Fetch AWS credentials from environment
s3_access_key_id = os.getenv("S3_ACCESS_KEY_ID")
s3_secret_key = os.getenv("S3_SECRET_KEY")
aws_region = os.getenv("AWS_REGION")
# Validate environment variables
if not all([s3_access_key_id, s3_secret_key, aws_region]):
logger.error("Missing AWS S3 credentials in environment variables.")
raise ValueError("AWS credentials not set properly.")
# Initialize FastAPI app
app = FastAPI()
# Configure S3 client
s3_client = boto3.client(
"s3",
aws_access_key_id=s3_access_key_id,
aws_secret_access_key=s3_secret_key,
region_name=aws_region,
)
class PdfRequest(BaseModel):
s3_file_path: str
file_title: str
doc_id : str
start_page: int = 0
end_page: int = 0
@app.get("/")
async def start():
return {"message": "Parser API is Ready"}
@app.post("/convert_pdf")
async def convert_pdf(request: PdfRequest):
try:
output_dir = "/tmp"
output_path = os.path.join(output_dir, "temp_file.pdf")
doc_id = request.doc_id
# Ensure the directory exists
if not os.path.exists(output_dir):
os.makedirs(output_dir, exist_ok=True)
# Validate S3 file path
if not request.s3_file_path.startswith("s3://"):
raise HTTPException(status_code=400, detail="Invalid S3 file path. Must start with 's3://'")
try:
bucket_name, object_key = request.s3_file_path.replace("s3://", "").split("/", 1)
except ValueError:
raise HTTPException(status_code=400, detail="Invalid S3 file path format.")
logger.info(f"Downloading {request.s3_file_path} from S3 bucket {bucket_name}...")
# Download PDF from S3
try:
s3_client.download_file(bucket_name, object_key, output_path)
except Exception as e:
logger.error(f"Failed to download file from S3: {str(e)}")
raise HTTPException(status_code=500, detail="Error downloading file from S3.")
# Initialize and run the converter
converter = PdfToSectionConverter()
output = converter.convert(
downloaded_pdf_path=output_path,
file_title=request.file_title,
doc_id=doc_id,
start_page_no=request.start_page,
end_page_no=request.end_page
)
# Cleanup the temporary file
os.remove(output_path)
return {"status": "success", "data": output}
except HTTPException:
raise
except Exception as e:
logger.error(f"Unexpected error: {str(e)}")
raise HTTPException(status_code=500, detail="Internal Server Error.")
def start_server():
logger.info("Starting Server...")
uvicorn.run("app:app", host="0.0.0.0", port=8000, reload=True)
if __name__ == "__main__":
start_server()