Spaces:
Running
Running
# src/utils/google_drive_service.py | |
from google.oauth2 import service_account | |
from googleapiclient.discovery import build | |
from googleapiclient.http import MediaIoBaseDownload | |
import io | |
from typing import List, Dict, Any | |
import logging | |
class GoogleDriveService: | |
def __init__(self, credentials_path: str): | |
""" | |
Initialize Google Drive service | |
Args: | |
credentials_path (str): Path to service account credentials file | |
""" | |
self.credentials = service_account.Credentials.from_service_account_file( | |
credentials_path, | |
scopes=['https://www.googleapis.com/auth/drive.readonly'] | |
) | |
self.service = build('drive', 'v3', credentials=self.credentials) | |
def get_folder_contents(self, folder_id: str, include_subfolders: bool = False) -> List[Dict[str, Any]]: | |
""" | |
Get contents of a Drive folder including subfolders if specified | |
Args: | |
folder_id (str): ID of the folder to process | |
include_subfolders (bool): Whether to include contents of subfolders (default: False) | |
Returns: | |
List[Dict]: List of file metadata | |
""" | |
all_files = [] | |
try: | |
# Get all items in the current folder | |
query = f"'{folder_id}' in parents and trashed=false" | |
results = self.service.files().list( | |
q=query, | |
fields="files(id, name, mimeType, modifiedTime, parents)", | |
supportsAllDrives=True, | |
includeItemsFromAllDrives=True | |
).execute() | |
items = results.get('files', []) | |
for item in items: | |
if item['mimeType'] == 'application/vnd.google-apps.folder' and include_subfolders: | |
# Recursively get contents of subfolder | |
try: | |
subfolder_files = self.get_folder_contents( | |
item['id'], | |
include_subfolders=True | |
) | |
# Add folder path information to each file | |
for file in subfolder_files: | |
if not file.get('folder_path'): | |
file['folder_path'] = [] | |
file['folder_path'].insert(0, { | |
'id': item['id'], | |
'name': item['name'] | |
}) | |
all_files.extend(subfolder_files) | |
except Exception as e: | |
logging.error(f"Error processing subfolder {item['name']}: {str(e)}") | |
continue | |
else: | |
# For backward compatibility, maintain original structure | |
# but add folder path information | |
item['folder_path'] = [] | |
all_files.append(item) | |
return all_files | |
except Exception as e: | |
logging.error(f"Error getting folder contents for folder {folder_id}: {str(e)}") | |
return [] # Return empty list for backward compatibility | |
def download_file(self, file_id: str) -> bytes: | |
""" | |
Download a file from Drive | |
Args: | |
file_id (str): ID of the file to download | |
Returns: | |
bytes: File content | |
""" | |
request = self.service.files().get_media(fileId=file_id) | |
content = io.BytesIO() | |
downloader = MediaIoBaseDownload(content, request) | |
done = False | |
while not done: | |
_, done = downloader.next_chunk() | |
content.seek(0) | |
return content.read() | |
def export_file(self, file_id: str, mime_type: str) -> bytes: | |
""" | |
Export a Google Workspace file to a different format | |
Args: | |
file_id (str): ID of the file to export | |
mime_type (str): MIME type to export to | |
Returns: | |
bytes: Exported file content | |
""" | |
request = self.service.files().export_media( | |
fileId=file_id, | |
mimeType=mime_type | |
) | |
content = io.BytesIO() | |
downloader = MediaIoBaseDownload(content, request) | |
done = False | |
while not done: | |
_, done = downloader.next_chunk() | |
content.seek(0) | |
return content.read() |