File size: 4,544 Bytes
aee2bfd
 
 
 
 
be32fd8
 
aee2bfd
 
 
 
 
 
 
 
 
 
 
 
 
 
 
be32fd8
aee2bfd
be32fd8
aee2bfd
 
 
be32fd8
aee2bfd
 
 
 
be32fd8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
aee2bfd
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
# src/utils/google_drive_service.py
from google.oauth2 import service_account
from googleapiclient.discovery import build
from googleapiclient.http import MediaIoBaseDownload
import io
from typing import List, Dict, Any
import logging

class GoogleDriveService:
    def __init__(self, credentials_path: str):
        """
        Initialize Google Drive service
        
        Args:
            credentials_path (str): Path to service account credentials file
        """
        self.credentials = service_account.Credentials.from_service_account_file(
            credentials_path,
            scopes=['https://www.googleapis.com/auth/drive.readonly']
        )
        self.service = build('drive', 'v3', credentials=self.credentials)

    def get_folder_contents(self, folder_id: str, include_subfolders: bool = False) -> List[Dict[str, Any]]:
        """
        Get contents of a Drive folder including subfolders if specified
        
        Args:
            folder_id (str): ID of the folder to process
            include_subfolders (bool): Whether to include contents of subfolders (default: False)
            
        Returns:
            List[Dict]: List of file metadata
        """
        all_files = []
        try:
            # Get all items in the current folder
            query = f"'{folder_id}' in parents and trashed=false"
            results = self.service.files().list(
                q=query,
                fields="files(id, name, mimeType, modifiedTime, parents)",
                supportsAllDrives=True,
                includeItemsFromAllDrives=True
            ).execute()
            
            items = results.get('files', [])
            
            for item in items:
                if item['mimeType'] == 'application/vnd.google-apps.folder' and include_subfolders:
                    # Recursively get contents of subfolder
                    try:
                        subfolder_files = self.get_folder_contents(
                            item['id'],
                            include_subfolders=True
                        )
                        # Add folder path information to each file
                        for file in subfolder_files:
                            if not file.get('folder_path'):
                                file['folder_path'] = []
                            file['folder_path'].insert(0, {
                                'id': item['id'],
                                'name': item['name']
                            })
                        all_files.extend(subfolder_files)
                    except Exception as e:
                        logging.error(f"Error processing subfolder {item['name']}: {str(e)}")
                        continue
                else:
                    # For backward compatibility, maintain original structure
                    # but add folder path information
                    item['folder_path'] = []
                    all_files.append(item)
                    
            return all_files
            
        except Exception as e:
            logging.error(f"Error getting folder contents for folder {folder_id}: {str(e)}")
            return []  # Return empty list for backward compatibility

    def download_file(self, file_id: str) -> bytes:
        """
        Download a file from Drive
        
        Args:
            file_id (str): ID of the file to download
            
        Returns:
            bytes: File content
        """
        request = self.service.files().get_media(fileId=file_id)
        content = io.BytesIO()
        downloader = MediaIoBaseDownload(content, request)
        
        done = False
        while not done:
            _, done = downloader.next_chunk()
            
        content.seek(0)
        return content.read()

    def export_file(self, file_id: str, mime_type: str) -> bytes:
        """
        Export a Google Workspace file to a different format
        
        Args:
            file_id (str): ID of the file to export
            mime_type (str): MIME type to export to
            
        Returns:
            bytes: Exported file content
        """
        request = self.service.files().export_media(
            fileId=file_id,
            mimeType=mime_type
        )
        content = io.BytesIO()
        downloader = MediaIoBaseDownload(content, request)
        
        done = False
        while not done:
            _, done = downloader.next_chunk()
            
        content.seek(0)
        return content.read()