Spaces:
Sleeping
Sleeping
File size: 5,015 Bytes
9002555 d57efd6 9002555 0743bb0 9002555 0743bb0 d57efd6 0743bb0 d57efd6 0743bb0 d57efd6 0743bb0 d57efd6 0743bb0 d57efd6 0743bb0 d57efd6 9002555 d57efd6 9002555 d57efd6 9002555 d57efd6 9002555 d57efd6 9002555 d57efd6 9002555 d57efd6 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 |
import os
import boto3
import tempfile
import fitz
from io import BytesIO
from fastapi.responses import JSONResponse
import logging
class Loader:
def __init__(self):
# Create S3 and Transcribe clients with credentials
self.bucket_name = "multimedika"
self.s3_client = boto3.client(
"s3",
aws_access_key_id=os.getenv("AWS_ACCESS_KEY_ID"),
aws_secret_access_key=os.getenv("AWS_SECRET_ACCESS_KEY"),
region_name="us-west-2",
)
def upload_to_s3(self, file, object_name, folder_name="summarizer"):
try:
# If folder_name is provided, prepend it to the object_name
if folder_name:
object_name = f"{folder_name}/{object_name}"
# Open the PDF with PyMuPDF (fitz)
pdf_document = fitz.open(stream=file.file.read(), filetype="pdf")
print("Jumlah halaman : ", pdf_document.page_count)
# Loop through each page of the PDF
for page_num in range(pdf_document.page_count):
try:
# Convert the page to bytes (as a separate PDF)
page_stream = BytesIO()
single_page_pdf = fitz.open() # Create a new PDF
single_page_pdf.insert_pdf(pdf_document, from_page=page_num, to_page=page_num)
single_page_pdf.save(page_stream)
single_page_pdf.close()
# Reset the stream position to the start
page_stream.seek(0)
# Define the object name for each page (e.g., 'summarizer/object_name/page_1.pdf')
page_object_name = f"{object_name}/{page_num + 1}.pdf"
# Upload each page to S3
self.s3_client.upload_fileobj(page_stream, self.bucket_name, page_object_name)
print(f"Page {page_num + 1} of '{object_name}' successfully uploaded as '{page_object_name}' to bucket '{self.bucket_name}'.")
except Exception as page_error:
# Log the error but continue with the next page
logging.error(f"Error uploading page {page_num + 1}: {page_error}")
continue
except Exception as e:
return JSONResponse(status_code=500, content=f"Error uploading to AWS: {e}")
def upload_image_to_s3(self, file, custom_name, folder_name="summarizer"):
try:
# If folder_name is provided, prepend it to the custom_name
if folder_name:
object_name = f"{folder_name}/{custom_name}"
else:
object_name = custom_name
# Read the file into a bytes buffer
image_stream = BytesIO()
image_stream.write(file.file.read())
image_stream.seek(0) # Reset the stream position to the start
# Upload the image to S3
self.s3_client.upload_fileobj(image_stream, self.bucket_name, object_name)
print(f"Image '{object_name}' successfully uploaded to bucket '{self.bucket_name}'.")
except Exception as e:
return JSONResponse(status_code=500, content=f"Error uploading image to AWS: {e}")
def change_name_of_book(self, current_object_name, new_object_name, folder_name="summarizer"):
try:
if folder_name:
current_object_name = f"{folder_name}/{current_object_name}"
new_object_name = f"{folder_name}/{new_object_name}"
# Copy the current object to a new object with the new name
copy_source = {'Bucket': self.bucket_name, 'Key': current_object_name}
self.s3_client.copy(copy_source, self.bucket_name, new_object_name)
# Delete the old object
self.s3_client.delete_object(Bucket=self.bucket_name, Key=current_object_name)
print(f"Renamed '{current_object_name}' to '{new_object_name}'.")
except Exception as e:
return JSONResponse(status_code=500, content=f"Error renaming book: {e}")
def change_name_of_image(self, current_object_name, new_object_name, folder_name="summarizer"):
try:
if folder_name:
current_object_name = f"{folder_name}/{current_object_name}"
new_object_name = f"{folder_name}/{new_object_name}"
# Copy the current object to a new object with the new name
copy_source = {'Bucket': self.bucket_name, 'Key': current_object_name}
self.s3_client.copy(copy_source, self.bucket_name, new_object_name)
# Delete the old object
self.s3_client.delete_object(Bucket=self.bucket_name, Key=current_object_name)
print(f"Renamed image '{current_object_name}' to '{new_object_name}' in bucket '{self.bucket_name}'.")
except Exception as e:
return JSONResponse(status_code=500, content=f"Error renaming image: {e}")
|