Spaces:
Running
Running
Implementation for Google drive is done. Now it is working fine, except need a new function that can delete the chunks if a document is removed from google drive.
aee2bfd
# src/utils/google_drive_service.py | |
from google.oauth2 import service_account | |
from googleapiclient.discovery import build | |
from googleapiclient.http import MediaIoBaseDownload | |
import io | |
import os | |
class GoogleDriveService: | |
def __init__(self, credentials_path: str): | |
""" | |
Initialize Google Drive service | |
Args: | |
credentials_path (str): Path to service account credentials file | |
""" | |
self.credentials = service_account.Credentials.from_service_account_file( | |
credentials_path, | |
scopes=['https://www.googleapis.com/auth/drive.readonly'] | |
) | |
self.service = build('drive', 'v3', credentials=self.credentials) | |
def get_folder_contents(self, folder_id: str): | |
""" | |
Get contents of a Drive folder | |
Args: | |
folder_id (str): ID of the folder to process | |
Returns: | |
List[Dict]: List of file metadata | |
""" | |
query = f"'{folder_id}' in parents and trashed=false" | |
results = self.service.files().list( | |
q=query, | |
fields="files(id, name, mimeType,modifiedTime)", | |
supportsAllDrives=True, | |
includeItemsFromAllDrives=True | |
).execute() | |
return results.get('files', []) | |
def download_file(self, file_id: str) -> bytes: | |
""" | |
Download a file from Drive | |
Args: | |
file_id (str): ID of the file to download | |
Returns: | |
bytes: File content | |
""" | |
request = self.service.files().get_media(fileId=file_id) | |
content = io.BytesIO() | |
downloader = MediaIoBaseDownload(content, request) | |
done = False | |
while not done: | |
_, done = downloader.next_chunk() | |
content.seek(0) | |
return content.read() | |
def export_file(self, file_id: str, mime_type: str) -> bytes: | |
""" | |
Export a Google Workspace file to a different format | |
Args: | |
file_id (str): ID of the file to export | |
mime_type (str): MIME type to export to | |
Returns: | |
bytes: Exported file content | |
""" | |
request = self.service.files().export_media( | |
fileId=file_id, | |
mimeType=mime_type | |
) | |
content = io.BytesIO() | |
downloader = MediaIoBaseDownload(content, request) | |
done = False | |
while not done: | |
_, done = downloader.next_chunk() | |
content.seek(0) | |
return content.read() |