File size: 2,563 Bytes
d6cf17e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
import gdown
import os
from urllib.parse import urlparse, parse_qs

def download_pdf_from_gdrive(url, output_path=None):
    """
    Download a PDF file from Google Drive using the provided sharing URL.

    Parameters:
    url (str): The Google Drive sharing URL of the PDF file
    output_path (str, optional): The path where the PDF should be saved.
                                If not provided, saves in current directory.

    Returns:
    str: Path to the downloaded file if successful, None if failed

    Raises:
    ValueError: If the URL is invalid or doesn't point to a Google Drive file
    """
    try:
        # Check if URL is provided
        if not url:
            raise ValueError("URL cannot be empty")

        # Handle different types of Google Drive URLs
        if 'drive.google.com' not in url:
            raise ValueError("Not a valid Google Drive URL")

        # Extract file ID from the URL
        if '/file/d/' in url:
            file_id = url.split('/file/d/')[1].split('/')[0]
        elif 'id=' in url:
            file_id = parse_qs(urlparse(url).query)['id'][0]
        else:
            raise ValueError("Could not extract file ID from the URL")

        # Set default output path if none provided
        if not output_path:
            output_path = 'downloaded_file.pdf'

        # Ensure the output path ends with .pdf
        if not output_path.lower().endswith('.pdf'):
            output_path += '.pdf'

        # Create the directory if it doesn't exist
        os.makedirs(os.path.dirname(output_path) if os.path.dirname(output_path) else '.', exist_ok=True)

        # Download the file
        output = gdown.download(id=file_id, output=output_path, quiet=False)

        if output is None:
            raise ValueError("Download failed - file might be inaccessible or not exist")

        return output

    except Exception as e:
        print(f"Error downloading PDF: {str(e)}")
        return None

def merge_strings_with_prefix(strings):
    """Merges strings in a list that start with a specific prefix.

    Args:
    strings: A list of strings.

    Returns:
    A new list of merged strings.
    """

    result = []
    current_merged_string = ""

    for string in strings:
        if string.startswith("•"):
            if current_merged_string:
                result.append(current_merged_string)
            current_merged_string = string
        else:
            current_merged_string += string

    if current_merged_string:
        result.append(current_merged_string)

    return result