luanpoppe
feat: adicionando OCR em casos de PDFs com problema
edd5b40
raw
history blame contribute delete
859 Bytes
import os
from google.cloud import storage
GCP_PROJECT = "gen-lang-client-0350149082"
GCP_REGION = "us-central1"
DOCUMENT_API_ID = "b34a20d22dee16bb"
GCS_BUCKET_NAME = "vella-pdfs"
def upload_to_gcs(LOCAL_PDF_PATH: str) -> str:
# Path in GCS
GCS_DESTINATION_BLOB_NAME = "gemini_uploads/" + os.path.basename(LOCAL_PDF_PATH)
"""Uploads a file to a GCS bucket and returns its URI."""
storage_client = storage.Client(
project=GCP_PROJECT,
)
bucket = storage_client.bucket(GCS_BUCKET_NAME)
blob = bucket.blob(GCS_DESTINATION_BLOB_NAME)
print(
f"Uploading {LOCAL_PDF_PATH} to gs://{GCS_BUCKET_NAME}/{GCS_DESTINATION_BLOB_NAME}..."
)
blob.upload_from_filename(LOCAL_PDF_PATH)
gcs_uri = f"gs://{GCS_BUCKET_NAME}/{GCS_DESTINATION_BLOB_NAME}"
print(f"File uploaded to {gcs_uri}")
return gcs_uri