Spaces:
Sleeping
Sleeping
import os | |
import boto3 | |
import tempfile | |
import fitz | |
from io import BytesIO | |
from fastapi import HTTPException | |
class Loader: | |
def __init__(self): | |
# Create S3 and Transcribe clients with credentials | |
self.bucket_name = "multimedika" | |
self.s3_client = boto3.client( | |
"s3", | |
aws_access_key_id=os.getenv("AWS_ACCESS_KEY_ID"), | |
aws_secret_access_key=os.getenv("AWS_SECRET_ACCESS_KEY"), | |
region_name="us-west-2", | |
) | |
def upload_to_s3(self, file, object_name, folder_name="summarizer"): | |
try: | |
# If folder_name is provided, prepend it to the object_name | |
if folder_name: | |
object_name = f"{folder_name}/{object_name}" | |
# Create an in-memory file-like object | |
with BytesIO() as file_stream: | |
# Write the contents of the uploaded file to the stream | |
file_stream.write(file.file.read()) | |
file_stream.seek(0) # Move to the beginning of the stream | |
# Upload file to S3 | |
self.s3_client.upload_fileobj(file_stream, self.bucket_name, object_name) | |
print(f"File '{object_name}' successfully uploaded to bucket '{self.bucket_name}'.") | |
except Exception as e: | |
raise HTTPException(status_code=400, detail=f"Error uploading to AWS: {e}") | |
def get_file_aws(self, object_name, local_file_name=None): | |
"""Downloads a PDF file from S3 and reads it using PyMuPDF.""" | |
if local_file_name is None: | |
local_file_name = "downloaded_pdf_file.pdf" # Default file name | |
try: | |
# Create a temporary directory to store the file | |
temp_dir = tempfile.mkdtemp() | |
file_path = os.path.join(temp_dir, local_file_name) | |
# Download the file from S3 | |
with open(file_path, "wb") as temp_file: | |
self.s3_client.download_fileobj( | |
self.bucket_name, object_name, temp_file | |
) | |
# Open and read the PDF using PyMuPDF | |
doc = fitz.open(file_path) | |
# Example: Print the number of pages | |
print(f"Number of pages: {doc.page_count}") | |
# Do something with the PDF, like read text | |
for page in doc: | |
print(page.get_text()) | |
# Close the document | |
doc.close() | |
# Clean up the downloaded file if needed | |
os.remove(file_path) | |
except Exception as e: | |
raise HTTPException(status_code=400, detail=f"Error get file file in aws: {e}") | |