File size: 2,764 Bytes
2190187
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
import os
import base64
from collections import defaultdict

import PyPDF2

async def extract_text_from_pdf(pdf_path):
    text = ""
    with open(pdf_path, "rb") as pdf_file:
        reader = PyPDF2.PdfReader(pdf_file)
        for page in reader.pages:
            text += page.extract_text() + "\n"
    return text.strip()    

async def handle_attachments(session_id, conversation, remove_content=True):
    """
    Process attachments for each message in the conversation.
    
    Args:
        session_id (str): The unique identifier for the session
        conversation (list): List of message objects containing attachments
        
    Returns:
        None
    """
    # Process attachments for each message in the conversation
    for outer_idx, msg in enumerate(conversation):
        if "attachments" in msg and msg["attachments"]:
            # Create a temporary folder for this session if it doesn't exist
            session_folder = os.path.join("temp_attachments", session_id)
            os.makedirs(session_folder, exist_ok=True)
            
            for inner_idx, attachment in enumerate(msg["attachments"]):
                attachment_name = attachment.get("name", "unknown_file")
                attachment_content = attachment.get("content")
                
                # Check if this attachment already exists in the session
                attachment_exists = False
                file_path = None
                
                for existing_attachment in msg["attachments"]:
                    if existing_attachment.get("name") == attachment_name and existing_attachment.get("file_path"):
                        attachment_exists = True
                        file_path = existing_attachment.get("file_path")
                        break
                
                # Only decode and save if it's a new attachment
                if not attachment_exists and attachment_content:
                    try:
                        file_path = os.path.join(session_folder, attachment_name)
                        # Decode base64 content and write to file
                        with open(file_path, "wb") as f:
                            f.write(base64.b64decode(attachment_content))
                        
                    except Exception as e:
                        print(f"Error saving attachment: {str(e)}")
                
                # Add file_path to the original attachment dict
                if file_path:
                    if remove_content:
                        del attachment["content"]
                    attachment["file_path"] = file_path
                    msg["attachments"][inner_idx] = attachment
                    conversation[outer_idx] = msg

    return conversation