# src/utils/google_drive_service.py from google.oauth2 import service_account from googleapiclient.discovery import build from googleapiclient.http import MediaIoBaseDownload import io from typing import List, Dict, Any import logging class GoogleDriveService: def __init__(self, credentials_path: str): """ Initialize Google Drive service Args: credentials_path (str): Path to service account credentials file """ self.credentials = service_account.Credentials.from_service_account_file( credentials_path, scopes=['https://www.googleapis.com/auth/drive.readonly'] ) self.service = build('drive', 'v3', credentials=self.credentials) def get_folder_contents(self, folder_id: str, include_subfolders: bool = False) -> List[Dict[str, Any]]: """ Get contents of a Drive folder including subfolders if specified Args: folder_id (str): ID of the folder to process include_subfolders (bool): Whether to include contents of subfolders (default: False) Returns: List[Dict]: List of file metadata """ all_files = [] try: # Get all items in the current folder query = f"'{folder_id}' in parents and trashed=false" results = self.service.files().list( q=query, fields="files(id, name, mimeType, modifiedTime, parents)", supportsAllDrives=True, includeItemsFromAllDrives=True ).execute() items = results.get('files', []) for item in items: if item['mimeType'] == 'application/vnd.google-apps.folder' and include_subfolders: # Recursively get contents of subfolder try: subfolder_files = self.get_folder_contents( item['id'], include_subfolders=True ) # Add folder path information to each file for file in subfolder_files: if not file.get('folder_path'): file['folder_path'] = [] file['folder_path'].insert(0, { 'id': item['id'], 'name': item['name'] }) all_files.extend(subfolder_files) except Exception as e: logging.error(f"Error processing subfolder {item['name']}: {str(e)}") continue else: # For backward compatibility, maintain original structure # but add folder path information item['folder_path'] = [] all_files.append(item) return all_files except Exception as e: logging.error(f"Error getting folder contents for folder {folder_id}: {str(e)}") return [] # Return empty list for backward compatibility def download_file(self, file_id: str) -> bytes: """ Download a file from Drive Args: file_id (str): ID of the file to download Returns: bytes: File content """ request = self.service.files().get_media(fileId=file_id) content = io.BytesIO() downloader = MediaIoBaseDownload(content, request) done = False while not done: _, done = downloader.next_chunk() content.seek(0) return content.read() def export_file(self, file_id: str, mime_type: str) -> bytes: """ Export a Google Workspace file to a different format Args: file_id (str): ID of the file to export mime_type (str): MIME type to export to Returns: bytes: Exported file content """ request = self.service.files().export_media( fileId=file_id, mimeType=mime_type ) content = io.BytesIO() downloader = MediaIoBaseDownload(content, request) done = False while not done: _, done = downloader.next_chunk() content.seek(0) return content.read()