File size: 4,827 Bytes
9002555
 
 
 
 
 
d57efd6
 
9002555
 
 
 
 
 
 
 
 
 
 
 
0767396
 
9002555
0767396
9002555
0767396
9002555
0743bb0
661a3cb
d57efd6
0767396
 
 
 
 
 
 
 
 
 
 
 
 
0743bb0
 
d57efd6
 
 
 
 
 
 
 
 
 
0743bb0
d57efd6
 
0743bb0
d57efd6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9002555
d57efd6
 
 
9002555
d57efd6
 
 
 
 
 
 
 
 
9002555
d57efd6
 
 
 
 
 
 
 
 
 
 
 
 
9002555
d57efd6
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
import os
import boto3
import tempfile
import fitz
from io import BytesIO

from fastapi.responses import JSONResponse
import logging


class Loader:
    def __init__(self):
        # Create S3 and Transcribe clients with credentials
        self.bucket_name = "multimedika"
        self.s3_client = boto3.client(
            "s3",
            aws_access_key_id=os.getenv("AWS_ACCESS_KEY_ID"),
            aws_secret_access_key=os.getenv("AWS_SECRET_ACCESS_KEY"),
            region_name="us-west-2",
        )
        
    def upload_to_s3(self, file_stream: BytesIO, title, folder_name="summarizer"):
        try:
            # If folder_name is provided, prepend it to the title
            if folder_name:
                object_name = f"{folder_name}/{title}"

            # Open the PDF with PyMuPDF (fitz)
            pdf_document = fitz.open(stream=file_stream.getvalue(), filetype="pdf")
            print("Jumlah halaman : ", pdf_document.page_count)
            
                        # Create a stream for the full PDF
            full_pdf_stream = BytesIO()
            pdf_document.save(full_pdf_stream)  # Save the full document to a stream
            full_pdf_stream.seek(0)  # Reset the stream position to the start

            # Define the S3 object name for the full PDF
            full_pdf_object_name = f"{folder_name}/full_book/{title}.pdf"

            # Upload the full PDF to S3
            self.s3_client.upload_fileobj(full_pdf_stream, self.bucket_name, full_pdf_object_name)
            print(f"Full PDF '{title}.pdf' successfully uploaded as '{full_pdf_object_name}' to bucket '{self.bucket_name}'.")
            
            # Loop through each page of the PDF
            for page_num in range(pdf_document.page_count):
                try:
                    # Convert the page to bytes (as a separate PDF)
                    page_stream = BytesIO()
                    single_page_pdf = fitz.open()  # Create a new PDF
                    single_page_pdf.insert_pdf(pdf_document, from_page=page_num, to_page=page_num)
                    single_page_pdf.save(page_stream)
                    single_page_pdf.close()

                    # Reset the stream position to the start
                    page_stream.seek(0)

                    # Define the object name for each page (e.g., 'summarizer/object_name/page_1.pdf')
                    page_object_name = f"{object_name}/{page_num + 1}.pdf"

                    # Upload each page to S3
                    self.s3_client.upload_fileobj(page_stream, self.bucket_name, page_object_name)

                    print(f"Page {page_num + 1} of '{object_name}' successfully uploaded as '{page_object_name}' to bucket '{self.bucket_name}'.")

                except Exception as page_error:
                    # Log the error but continue with the next page
                    logging.error(f"Error uploading page {page_num + 1}: {page_error}")
                    continue

        except Exception as e:
            return JSONResponse(status_code=500, content=f"Error uploading to AWS: {e}")
    
    def change_name_of_book(self, current_object_name, new_object_name, folder_name="summarizer"):
        try:
            if folder_name:
                current_object_name = f"{folder_name}/{current_object_name}"
                new_object_name = f"{folder_name}/{new_object_name}"

            # Copy the current object to a new object with the new name
            copy_source = {'Bucket': self.bucket_name, 'Key': current_object_name}
            self.s3_client.copy(copy_source, self.bucket_name, new_object_name)

            # Delete the old object
            self.s3_client.delete_object(Bucket=self.bucket_name, Key=current_object_name)

            print(f"Renamed '{current_object_name}' to '{new_object_name}'.")

        except Exception as e:
            return JSONResponse(status_code=500, content=f"Error renaming book: {e}")
    
    def change_name_of_image(self, current_object_name, new_object_name, folder_name="summarizer"):
        try:
            if folder_name:
                current_object_name = f"{folder_name}/{current_object_name}"
                new_object_name = f"{folder_name}/{new_object_name}"

            # Copy the current object to a new object with the new name
            copy_source = {'Bucket': self.bucket_name, 'Key': current_object_name}
            self.s3_client.copy(copy_source, self.bucket_name, new_object_name)

            # Delete the old object
            self.s3_client.delete_object(Bucket=self.bucket_name, Key=current_object_name)

            print(f"Renamed image '{current_object_name}' to '{new_object_name}' in bucket '{self.bucket_name}'.")

        except Exception as e:
            return JSONResponse(status_code=500, content=f"Error renaming image: {e}")