File size: 2,857 Bytes
2927735
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4e9b3f7
 
 
 
 
 
 
2927735
 
 
 
 
 
 
 
 
4e9b3f7
 
 
 
 
 
 
 
 
 
2927735
 
 
 
 
 
 
 
 
 
 
 
 
 
de36b7a
 
 
 
 
2927735
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
# ADDING GOOGLE DRIVE SUPPORT

import io
import os
import csv
import PyPDF2

from google.oauth2 import service_account
from googleapiclient.discovery import build
from googleapiclient.http import MediaIoBaseUpload, MediaIoBaseDownload
from driveapi.service import get_credentials

credentials_info = get_credentials()
credentials = service_account.Credentials.from_service_account_info(credentials_info)
service = build('drive', 'v3', credentials=credentials)

logs_id = os.environ.get('LOGS_ID')

def upload_chat_to_drive(chat_history, file_name):
    # Convert chat history to CSV
    csv_output = io.StringIO()
    writer = csv.writer(csv_output, quoting=csv.QUOTE_ALL)
    
    for row in chat_history:
        # Ensure all items in the row are strings and handle potential encoding issues
        encoded_row = [str(item).encode('utf-8', errors='replace').decode('utf-8') for item in row]
        writer.writerow(encoded_row)
    
    csv_output.seek(0)

    # File metadata
    file_metadata = {
        'name': file_name,
        'mimeType': 'application/vnd.google-apps.spreadsheet',
        'parents': [logs_id]
    }

    try:
        # Upload file
        media = MediaIoBaseUpload(csv_output, mimetype='text/csv', resumable=True)
        file = service.files().create(body=file_metadata, media_body=media, fields='id').execute()
        print(f'File ID: {file.get("id")}')
        return file.get('id')
    except Exception as e:
        print(f"An error occurred while uploading to Google Drive: {e}")
        return None
    
## Read PDF files
def download_file(file_id):
    service = build('drive', 'v3', credentials=credentials)
    request = service.files().get_media(fileId=file_id)
    fh = io.BytesIO()
    downloader = MediaIoBaseDownload(fh, request)
    done = False
    while done is False:
        status, done = downloader.next_chunk()
    fh.seek(0)
    return fh

# Function to process a PDF file
def process_pdf(file_stream):
    if isinstance(file_stream, dict): # Check if PDF was obtained using Drag and Drop or Drive link
        file_path = file_stream['name'] # Use 'path' for local testing and 'name' for Gradio
        pdf_reader = PyPDF2.PdfReader(file_path)
    else:
        pdf_reader = PyPDF2.PdfReader(file_stream)
    text = ""
    for page_num in range(len(pdf_reader.pages)):
        page = pdf_reader.pages[page_num]
        text += page.extract_text()
    return text

def drive_content(shared_folder_id):
    # List files in the folder
    results = service.files().list(q=f"'{shared_folder_id}' in parents", fields="files(id, name, mimeType)").execute()
    items = results.get('files', [])

    content = ''
    for item in items:
        print(f"Processing file: {item['name']}")
        file_stream = download_file(item['id'])
        content += str(process_pdf(file_stream))
        
    return content