Spaces:
Runtime error
Runtime error
"""Loading logic for loading documents from a GCS file.""" | |
import tempfile | |
from typing import List | |
from langchain.docstore.document import Document | |
from langchain.document_loaders.base import BaseLoader | |
from langchain.document_loaders.unstructured import UnstructuredFileLoader | |
class GCSFileLoader(BaseLoader): | |
"""Loading logic for loading documents from GCS.""" | |
def __init__(self, project_name: str, bucket: str, blob: str): | |
"""Initialize with bucket and key name.""" | |
self.bucket = bucket | |
self.blob = blob | |
self.project_name = project_name | |
def load(self) -> List[Document]: | |
"""Load documents.""" | |
try: | |
from google.cloud import storage | |
except ImportError: | |
raise ValueError( | |
"Could not import google-cloud-storage python package. " | |
"Please it install it with `pip install google-cloud-storage`." | |
) | |
# Initialise a client | |
storage_client = storage.Client(self.project_name) | |
# Create a bucket object for our bucket | |
bucket = storage_client.get_bucket(self.bucket) | |
# Create a blob object from the filepath | |
blob = bucket.blob(self.blob) | |
with tempfile.TemporaryDirectory() as temp_dir: | |
file_path = f"{temp_dir}/{self.blob}" | |
# Download the file to a destination | |
blob.download_to_filename(file_path) | |
loader = UnstructuredFileLoader(file_path) | |
return loader.load() | |