chatbot-backend / src /utils /google_drive_service.py
TalatMasood's picture
Changes to be committed:
be32fd8
raw
history blame
4.54 kB
# src/utils/google_drive_service.py
from google.oauth2 import service_account
from googleapiclient.discovery import build
from googleapiclient.http import MediaIoBaseDownload
import io
from typing import List, Dict, Any
import logging
class GoogleDriveService:
def __init__(self, credentials_path: str):
"""
Initialize Google Drive service
Args:
credentials_path (str): Path to service account credentials file
"""
self.credentials = service_account.Credentials.from_service_account_file(
credentials_path,
scopes=['https://www.googleapis.com/auth/drive.readonly']
)
self.service = build('drive', 'v3', credentials=self.credentials)
def get_folder_contents(self, folder_id: str, include_subfolders: bool = False) -> List[Dict[str, Any]]:
"""
Get contents of a Drive folder including subfolders if specified
Args:
folder_id (str): ID of the folder to process
include_subfolders (bool): Whether to include contents of subfolders (default: False)
Returns:
List[Dict]: List of file metadata
"""
all_files = []
try:
# Get all items in the current folder
query = f"'{folder_id}' in parents and trashed=false"
results = self.service.files().list(
q=query,
fields="files(id, name, mimeType, modifiedTime, parents)",
supportsAllDrives=True,
includeItemsFromAllDrives=True
).execute()
items = results.get('files', [])
for item in items:
if item['mimeType'] == 'application/vnd.google-apps.folder' and include_subfolders:
# Recursively get contents of subfolder
try:
subfolder_files = self.get_folder_contents(
item['id'],
include_subfolders=True
)
# Add folder path information to each file
for file in subfolder_files:
if not file.get('folder_path'):
file['folder_path'] = []
file['folder_path'].insert(0, {
'id': item['id'],
'name': item['name']
})
all_files.extend(subfolder_files)
except Exception as e:
logging.error(f"Error processing subfolder {item['name']}: {str(e)}")
continue
else:
# For backward compatibility, maintain original structure
# but add folder path information
item['folder_path'] = []
all_files.append(item)
return all_files
except Exception as e:
logging.error(f"Error getting folder contents for folder {folder_id}: {str(e)}")
return [] # Return empty list for backward compatibility
def download_file(self, file_id: str) -> bytes:
"""
Download a file from Drive
Args:
file_id (str): ID of the file to download
Returns:
bytes: File content
"""
request = self.service.files().get_media(fileId=file_id)
content = io.BytesIO()
downloader = MediaIoBaseDownload(content, request)
done = False
while not done:
_, done = downloader.next_chunk()
content.seek(0)
return content.read()
def export_file(self, file_id: str, mime_type: str) -> bytes:
"""
Export a Google Workspace file to a different format
Args:
file_id (str): ID of the file to export
mime_type (str): MIME type to export to
Returns:
bytes: Exported file content
"""
request = self.service.files().export_media(
fileId=file_id,
mimeType=mime_type
)
content = io.BytesIO()
downloader = MediaIoBaseDownload(content, request)
done = False
while not done:
_, done = downloader.next_chunk()
content.seek(0)
return content.read()