Spaces:
Sleeping
Sleeping
File size: 5,175 Bytes
9002555 69beac6 9002555 d57efd6 25f9481 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 |
import re
def parse_topics_to_dict(text):
topics = {}
lines = text.strip().split("\n")
current_topic = None
topic_pattern = re.compile(r"^\d+\.\s+(.*)$")
sub_topic_pattern = re.compile(r"^\*\s+(.*)$")
for line in lines:
line = line.strip()
if topic_pattern.match(line):
current_topic = topic_pattern.match(line).group(1)
topics[current_topic] = []
elif sub_topic_pattern.match(line):
sub_topic = sub_topic_pattern.match(line).group(1)
if current_topic:
topics[current_topic].append(sub_topic)
print(topics)
return topics
def remove_all_sources(text):
# Construct a regular expression pattern to match all sources
pattern = r"Source \d+:(.*?)(?=Source \d+:|$)"
# Use re.DOTALL to make '.' match newlines and re.IGNORECASE for case-insensitive matching
updated_text = re.sub(pattern, "", text, flags=re.DOTALL)
return updated_text.strip()
def clean_text(text):
# Replace multiple spaces with a single space
text = re.sub(r"\s{2,}", " ", text)
# Remove newline characters that are not followed by a number (to keep lists or numbered points)
text = re.sub(r"\n(?!\s*\d)", " ", text)
# Remove unnecessary punctuation (optional, adjust as needed)
text = re.sub(r";(?=\S)", "", text)
# Optional: Remove extra spaces around certain characters
text = re.sub(r"\s*([,;])\s*", r"\1 ", text)
# Normalize whitespace to a single space
text = re.sub(r"\s+", " ", text).strip()
return text
def update_response(text):
# Find all the references in the text, e.g., [1], [3], [5]
responses = re.findall(r"\[\d+\]", text)
# Extract the numbers from the responses, and remove duplicates
ref_numbers = sorted(set(int(respon.strip("[]")) for respon in responses))
# Create a mapping from old reference numbers to new ones
ref_mapping = {old: new for new, old in enumerate(ref_numbers, start=1)}
# Replace old responses with the updated responses in the text
for old, new in ref_mapping.items():
text = re.sub(rf"\[{old}\]", f"[{new}]", text)
return text
def renumber_sources(source_list):
new_sources = []
for i, source in enumerate(source_list):
# Extract the content after the colon
content = source.split(": ", 1)[1]
# Add the new source number and content
new_sources.append(f"source {i+1}: {content}")
return new_sources
def sort_and_renumber_sources(source_list):
"""
This function takes a list of sources, sorts them based on the source number,
and renumbers them sequentially starting from 1.
:param source_list: List of strings containing source information.
:return: Sorted and renumbered list of sources.
"""
# Function to extract source number
def extract_source_number(source):
match = re.search(r"Source (\d+)", source)
return int(match.group(1)) if match else float('inf')
# Sort sources based on the source number
sorted_sources = sorted(source_list, key=extract_source_number)
# Reassign the numbering in the sorted sources
for idx, source in enumerate(sorted_sources, 1):
sorted_sources[idx-1] = re.sub(r"Source \d+", f"Source {idx}", source)
return sorted_sources
def seperate_to_list(text):
# Step 1: Split the text by line breaks (\n)
lines = text.split("\n")
# Step 2: Remove occurrences of "source (number):"
cleaned_lines = [re.sub(r"Source \d+\:", "", line) for line in lines]
# Step 3: Split all capital sentences
final_output = []
for line in cleaned_lines:
# Split any fully capitalized sentence (surrounding non-uppercase text remains intact)
split_line = re.split(r"([A-Z\s]+[.!?])", line)
final_output.extend([part.strip() for part in split_line if part.strip()])
return final_output
def join_list(items):
if not items:
return ""
elif len(items) == 1:
return items[0]
elif len(items) == 2:
return f"{items[0]} and {items[1]}"
else:
return ", ".join(items[:-1]) + " and " + items[-1]
def redesign_structure_message(message, metadata):
"""
This function replaces occurrences of '[n]' in the message
with the title of the book found in metadata[n-1]["title"].
"""
if not metadata or metadata == []:
return message # Return the original message if metadata is not valid
# Create a function to replace each citation with the corresponding book title
def replace_citation(match):
citation_number = int(match.group(1)) # Extract the citation number
# Check if the citation number corresponds to a title in metadata
if 1 <= citation_number <= len(metadata):
return f"[*{metadata[citation_number - 1]['title']}*]" # Return the title in italics
return match.group(0) # Return the original citation if out of bounds
# Use regex to find all citations in the format '[n]'
redesigned_message = re.sub(r'\[(\d+)\]', replace_citation, message)
return redesigned_message |