Bot_Development / service /aws_loader.py
dsmultimedika's picture
fix : improve uploader
661a3cb
raw
history blame
5.04 kB
import os
import boto3
import tempfile
import fitz
from io import BytesIO
from fastapi.responses import JSONResponse
import logging
class Loader:
def __init__(self):
# Create S3 and Transcribe clients with credentials
self.bucket_name = "multimedika"
self.s3_client = boto3.client(
"s3",
aws_access_key_id=os.getenv("AWS_ACCESS_KEY_ID"),
aws_secret_access_key=os.getenv("AWS_SECRET_ACCESS_KEY"),
region_name="us-west-2",
)
def upload_to_s3(self, file_stream: BytesIO, object_name, folder_name="summarizer"):
try:
# If folder_name is provided, prepend it to the object_name
if folder_name:
object_name = f"{folder_name}/{object_name}"
# Open the PDF with PyMuPDF (fitz)
pdf_document = fitz.open(stream=file_stream.getvalue(), filetype="pdf")
print("Jumlah halaman : ", pdf_document.page_count)
# Loop through each page of the PDF
for page_num in range(pdf_document.page_count):
try:
# Convert the page to bytes (as a separate PDF)
page_stream = BytesIO()
single_page_pdf = fitz.open() # Create a new PDF
single_page_pdf.insert_pdf(pdf_document, from_page=page_num, to_page=page_num)
single_page_pdf.save(page_stream)
single_page_pdf.close()
# Reset the stream position to the start
page_stream.seek(0)
# Define the object name for each page (e.g., 'summarizer/object_name/page_1.pdf')
page_object_name = f"{object_name}/{page_num + 1}.pdf"
# Upload each page to S3
self.s3_client.upload_fileobj(page_stream, self.bucket_name, page_object_name)
print(f"Page {page_num + 1} of '{object_name}' successfully uploaded as '{page_object_name}' to bucket '{self.bucket_name}'.")
except Exception as page_error:
# Log the error but continue with the next page
logging.error(f"Error uploading page {page_num + 1}: {page_error}")
continue
except Exception as e:
return JSONResponse(status_code=500, content=f"Error uploading to AWS: {e}")
def upload_image_to_s3(self, file, custom_name, folder_name="summarizer"):
try:
# If folder_name is provided, prepend it to the custom_name
if folder_name:
object_name = f"{folder_name}/{custom_name}"
else:
object_name = custom_name
# Read the file into a bytes buffer
image_stream = BytesIO()
image_stream.write(file.file.read())
image_stream.seek(0) # Reset the stream position to the start
# Upload the image to S3
self.s3_client.upload_fileobj(image_stream, self.bucket_name, object_name)
print(f"Image '{object_name}' successfully uploaded to bucket '{self.bucket_name}'.")
except Exception as e:
return JSONResponse(status_code=500, content=f"Error uploading image to AWS: {e}")
def change_name_of_book(self, current_object_name, new_object_name, folder_name="summarizer"):
try:
if folder_name:
current_object_name = f"{folder_name}/{current_object_name}"
new_object_name = f"{folder_name}/{new_object_name}"
# Copy the current object to a new object with the new name
copy_source = {'Bucket': self.bucket_name, 'Key': current_object_name}
self.s3_client.copy(copy_source, self.bucket_name, new_object_name)
# Delete the old object
self.s3_client.delete_object(Bucket=self.bucket_name, Key=current_object_name)
print(f"Renamed '{current_object_name}' to '{new_object_name}'.")
except Exception as e:
return JSONResponse(status_code=500, content=f"Error renaming book: {e}")
def change_name_of_image(self, current_object_name, new_object_name, folder_name="summarizer"):
try:
if folder_name:
current_object_name = f"{folder_name}/{current_object_name}"
new_object_name = f"{folder_name}/{new_object_name}"
# Copy the current object to a new object with the new name
copy_source = {'Bucket': self.bucket_name, 'Key': current_object_name}
self.s3_client.copy(copy_source, self.bucket_name, new_object_name)
# Delete the old object
self.s3_client.delete_object(Bucket=self.bucket_name, Key=current_object_name)
print(f"Renamed image '{current_object_name}' to '{new_object_name}' in bucket '{self.bucket_name}'.")
except Exception as e:
return JSONResponse(status_code=500, content=f"Error renaming image: {e}")