File size: 5,175 Bytes
9002555
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
69beac6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9002555
 
 
 
 
 
 
 
 
 
 
 
 
 
 
d57efd6
 
 
 
 
 
 
 
 
 
25f9481
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
import re


def parse_topics_to_dict(text):
    topics = {}
    lines = text.strip().split("\n")
    current_topic = None

    topic_pattern = re.compile(r"^\d+\.\s+(.*)$")
    sub_topic_pattern = re.compile(r"^\*\s+(.*)$")

    for line in lines:
        line = line.strip()
        if topic_pattern.match(line):
            current_topic = topic_pattern.match(line).group(1)
            topics[current_topic] = []
        elif sub_topic_pattern.match(line):
            sub_topic = sub_topic_pattern.match(line).group(1)
            if current_topic:
                topics[current_topic].append(sub_topic)

    print(topics)
    return topics


def remove_all_sources(text):
    # Construct a regular expression pattern to match all sources
    pattern = r"Source \d+:(.*?)(?=Source \d+:|$)"

    # Use re.DOTALL to make '.' match newlines and re.IGNORECASE for case-insensitive matching
    updated_text = re.sub(pattern, "", text, flags=re.DOTALL)

    return updated_text.strip()


def clean_text(text):
    # Replace multiple spaces with a single space
    text = re.sub(r"\s{2,}", " ", text)
    # Remove newline characters that are not followed by a number (to keep lists or numbered points)
    text = re.sub(r"\n(?!\s*\d)", " ", text)
    # Remove unnecessary punctuation (optional, adjust as needed)
    text = re.sub(r";(?=\S)", "", text)
    # Optional: Remove extra spaces around certain characters
    text = re.sub(r"\s*([,;])\s*", r"\1 ", text)
    # Normalize whitespace to a single space
    text = re.sub(r"\s+", " ", text).strip()

    return text


def update_response(text):
    # Find all the references in the text, e.g., [1], [3], [5]
    responses = re.findall(r"\[\d+\]", text)

    # Extract the numbers from the responses, and remove duplicates
    ref_numbers = sorted(set(int(respon.strip("[]")) for respon in responses))

    # Create a mapping from old reference numbers to new ones
    ref_mapping = {old: new for new, old in enumerate(ref_numbers, start=1)}

    # Replace old responses with the updated responses in the text
    for old, new in ref_mapping.items():
        text = re.sub(rf"\[{old}\]", f"[{new}]", text)

    return text


def renumber_sources(source_list):
    new_sources = []
    for i, source in enumerate(source_list):
        # Extract the content after the colon
        content = source.split(": ", 1)[1]
        # Add the new source number and content
        new_sources.append(f"source {i+1}: {content}")
    return new_sources


def sort_and_renumber_sources(source_list):
    """
    This function takes a list of sources, sorts them based on the source number, 
    and renumbers them sequentially starting from 1.
    
    :param source_list: List of strings containing source information.
    :return: Sorted and renumbered list of sources.
    """
    
    # Function to extract source number
    def extract_source_number(source):
        match = re.search(r"Source (\d+)", source)
        return int(match.group(1)) if match else float('inf')
    
    # Sort sources based on the source number
    sorted_sources = sorted(source_list, key=extract_source_number)
    
    # Reassign the numbering in the sorted sources
    for idx, source in enumerate(sorted_sources, 1):
        sorted_sources[idx-1] = re.sub(r"Source \d+", f"Source {idx}", source)
    
    return sorted_sources

def seperate_to_list(text):
    # Step 1: Split the text by line breaks (\n)
    lines = text.split("\n")

    # Step 2: Remove occurrences of "source (number):"
    cleaned_lines = [re.sub(r"Source \d+\:", "", line) for line in lines]

    # Step 3: Split all capital sentences
    final_output = []
    for line in cleaned_lines:
        # Split any fully capitalized sentence (surrounding non-uppercase text remains intact)
        split_line = re.split(r"([A-Z\s]+[.!?])", line)
        final_output.extend([part.strip() for part in split_line if part.strip()])

    return final_output

def join_list(items):
    if not items:
        return ""
    elif len(items) == 1:
        return items[0]
    elif len(items) == 2:
        return f"{items[0]} and {items[1]}"
    else:
        return ", ".join(items[:-1]) + " and " + items[-1]

def redesign_structure_message(message, metadata):
    """ 
    This function replaces occurrences of '[n]' in the message 
    with the title of the book found in metadata[n-1]["title"].
    """
    if not metadata or metadata == []:
        return message  # Return the original message if metadata is not valid
    
    # Create a function to replace each citation with the corresponding book title
    def replace_citation(match):
        citation_number = int(match.group(1))  # Extract the citation number
        # Check if the citation number corresponds to a title in metadata
        if 1 <= citation_number <= len(metadata):
            return f"[*{metadata[citation_number - 1]['title']}*]"  # Return the title in italics
        return match.group(0)  # Return the original citation if out of bounds

    # Use regex to find all citations in the format '[n]'
    redesigned_message = re.sub(r'\[(\d+)\]', replace_citation, message)
    
    return redesigned_message