carbonnnnn's picture
update app
4e9b3f7
# ADDING GOOGLE DRIVE SUPPORT
import io
import os
import csv
import PyPDF2
from google.oauth2 import service_account
from googleapiclient.discovery import build
from googleapiclient.http import MediaIoBaseUpload, MediaIoBaseDownload
from driveapi.service import get_credentials
credentials_info = get_credentials()
credentials = service_account.Credentials.from_service_account_info(credentials_info)
service = build('drive', 'v3', credentials=credentials)
logs_id = os.environ.get('LOGS_ID')
def upload_chat_to_drive(chat_history, file_name):
# Convert chat history to CSV
csv_output = io.StringIO()
writer = csv.writer(csv_output, quoting=csv.QUOTE_ALL)
for row in chat_history:
# Ensure all items in the row are strings and handle potential encoding issues
encoded_row = [str(item).encode('utf-8', errors='replace').decode('utf-8') for item in row]
writer.writerow(encoded_row)
csv_output.seek(0)
# File metadata
file_metadata = {
'name': file_name,
'mimeType': 'application/vnd.google-apps.spreadsheet',
'parents': [logs_id]
}
try:
# Upload file
media = MediaIoBaseUpload(csv_output, mimetype='text/csv', resumable=True)
file = service.files().create(body=file_metadata, media_body=media, fields='id').execute()
print(f'File ID: {file.get("id")}')
return file.get('id')
except Exception as e:
print(f"An error occurred while uploading to Google Drive: {e}")
return None
## Read PDF files
def download_file(file_id):
service = build('drive', 'v3', credentials=credentials)
request = service.files().get_media(fileId=file_id)
fh = io.BytesIO()
downloader = MediaIoBaseDownload(fh, request)
done = False
while done is False:
status, done = downloader.next_chunk()
fh.seek(0)
return fh
# Function to process a PDF file
def process_pdf(file_stream):
if isinstance(file_stream, dict): # Check if PDF was obtained using Drag and Drop or Drive link
file_path = file_stream['name'] # Use 'path' for local testing and 'name' for Gradio
pdf_reader = PyPDF2.PdfReader(file_path)
else:
pdf_reader = PyPDF2.PdfReader(file_stream)
text = ""
for page_num in range(len(pdf_reader.pages)):
page = pdf_reader.pages[page_num]
text += page.extract_text()
return text
def drive_content(shared_folder_id):
# List files in the folder
results = service.files().list(q=f"'{shared_folder_id}' in parents", fields="files(id, name, mimeType)").execute()
items = results.get('files', [])
content = ''
for item in items:
print(f"Processing file: {item['name']}")
file_stream = download_file(item['id'])
content += str(process_pdf(file_stream))
return content