Spaces:

Multimedika
/

Bot_Development

Sleeping

File size: 5,175 Bytes

import re


def parse_topics_to_dict(text):
    topics = {}
    lines = text.strip().split("\n")
    current_topic = None

    topic_pattern = re.compile(r"^\d+\.\s+(.*)$")
    sub_topic_pattern = re.compile(r"^\*\s+(.*)$")

    for line in lines:
        line = line.strip()
        if topic_pattern.match(line):
            current_topic = topic_pattern.match(line).group(1)
            topics[current_topic] = []
        elif sub_topic_pattern.match(line):
            sub_topic = sub_topic_pattern.match(line).group(1)
            if current_topic:
                topics[current_topic].append(sub_topic)

    print(topics)
    return topics


def remove_all_sources(text):
    # Construct a regular expression pattern to match all sources
    pattern = r"Source \d+:(.*?)(?=Source \d+:|$)"

    # Use re.DOTALL to make '.' match newlines and re.IGNORECASE for case-insensitive matching
    updated_text = re.sub(pattern, "", text, flags=re.DOTALL)

    return updated_text.strip()


def clean_text(text):
    # Replace multiple spaces with a single space
    text = re.sub(r"\s{2,}", " ", text)
    # Remove newline characters that are not followed by a number (to keep lists or numbered points)
    text = re.sub(r"\n(?!\s*\d)", " ", text)
    # Remove unnecessary punctuation (optional, adjust as needed)
    text = re.sub(r";(?=\S)", "", text)
    # Optional: Remove extra spaces around certain characters
    text = re.sub(r"\s*([,;])\s*", r"\1 ", text)
    # Normalize whitespace to a single space
    text = re.sub(r"\s+", " ", text).strip()

    return text


def update_response(text):
    # Find all the references in the text, e.g., [1], [3], [5]
    responses = re.findall(r"\[\d+\]", text)

    # Extract the numbers from the responses, and remove duplicates
    ref_numbers = sorted(set(int(respon.strip("[]")) for respon in responses))

    # Create a mapping from old reference numbers to new ones
    ref_mapping = {old: new for new, old in enumerate(ref_numbers, start=1)}

    # Replace old responses with the updated responses in the text
    for old, new in ref_mapping.items():
        text = re.sub(rf"\[{old}\]", f"[{new}]", text)

    return text


def renumber_sources(source_list):
    new_sources = []
    for i, source in enumerate(source_list):
        # Extract the content after the colon
        content = source.split(": ", 1)[1]
        # Add the new source number and content
        new_sources.append(f"source {i+1}: {content}")
    return new_sources


def sort_and_renumber_sources(source_list):
    """
    This function takes a list of sources, sorts them based on the source number, 
    and renumbers them sequentially starting from 1.
    
    :param source_list: List of strings containing source information.
    :return: Sorted and renumbered list of sources.
    """
    
    # Function to extract source number
    def extract_source_number(source):
        match = re.search(r"Source (\d+)", source)
        return int(match.group(1)) if match else float('inf')
    
    # Sort sources based on the source number
    sorted_sources = sorted(source_list, key=extract_source_number)
    
    # Reassign the numbering in the sorted sources
    for idx, source in enumerate(sorted_sources, 1):
        sorted_sources[idx-1] = re.sub(r"Source \d+", f"Source {idx}", source)
    
    return sorted_sources

def seperate_to_list(text):
    # Step 1: Split the text by line breaks (\n)
    lines = text.split("\n")

    # Step 2: Remove occurrences of "source (number):"
    cleaned_lines = [re.sub(r"Source \d+\:", "", line) for line in lines]

    # Step 3: Split all capital sentences
    final_output = []
    for line in cleaned_lines:
        # Split any fully capitalized sentence (surrounding non-uppercase text remains intact)
        split_line = re.split(r"([A-Z\s]+[.!?])", line)
        final_output.extend([part.strip() for part in split_line if part.strip()])

    return final_output

def join_list(items):
    if not items:
        return ""
    elif len(items) == 1:
        return items[0]
    elif len(items) == 2:
        return f"{items[0]} and {items[1]}"
    else:
        return ", ".join(items[:-1]) + " and " + items[-1]

def redesign_structure_message(message, metadata):
    """ 
    This function replaces occurrences of '[n]' in the message 
    with the title of the book found in metadata[n-1]["title"].
    """
    if not metadata or metadata == []:
        return message  # Return the original message if metadata is not valid
    
    # Create a function to replace each citation with the corresponding book title
    def replace_citation(match):
        citation_number = int(match.group(1))  # Extract the citation number
        # Check if the citation number corresponds to a title in metadata
        if 1 <= citation_number <= len(metadata):
            return f"[*{metadata[citation_number - 1]['title']}*]"  # Return the title in italics
        return match.group(0)  # Return the original citation if out of bounds

    # Use regex to find all citations in the format '[n]'
    redesigned_message = re.sub(r'\[(\d+)\]', replace_citation, message)
    
    return redesigned_message