Spaces:

Multimedika
/

Bot_Development

Runtime error

App Files Files Community

Bot_Development / core /parser.py

dsmultimedika

Improve the code bot development

d57efd6 9 months ago

raw

history blame

3.34 kB

	import re


	def parse_topics_to_dict(text):
	topics = {}
	lines = text.strip().split("\n")
	current_topic = None

	topic_pattern = re.compile(r"^\d+\.\s+(.*)$")
	sub_topic_pattern = re.compile(r"^\\s+(.)$")

	for line in lines:
	line = line.strip()
	if topic_pattern.match(line):
	current_topic = topic_pattern.match(line).group(1)
	topics[current_topic] = []
	elif sub_topic_pattern.match(line):
	sub_topic = sub_topic_pattern.match(line).group(1)
	if current_topic:
	topics[current_topic].append(sub_topic)

	print(topics)
	return topics


	def remove_all_sources(text):
	# Construct a regular expression pattern to match all sources
	pattern = r"Source \d+:(.*?)(?=Source \d+:\|$)"

	# Use re.DOTALL to make '.' match newlines and re.IGNORECASE for case-insensitive matching
	updated_text = re.sub(pattern, "", text, flags=re.DOTALL)

	return updated_text.strip()


	def clean_text(text):
	# Replace multiple spaces with a single space
	text = re.sub(r"\s{2,}", " ", text)
	# Remove newline characters that are not followed by a number (to keep lists or numbered points)
	text = re.sub(r"\n(?!\s*\d)", " ", text)
	# Remove unnecessary punctuation (optional, adjust as needed)
	text = re.sub(r";(?=\S)", "", text)
	# Optional: Remove extra spaces around certain characters
	text = re.sub(r"\s([,;])\s", r"\1 ", text)
	# Normalize whitespace to a single space
	text = re.sub(r"\s+", " ", text).strip()

	return text


	def update_response(text):
	# Find all the references in the text, e.g., [1], [3], [5]
	responses = re.findall(r"\[\d+\]", text)

	# Extract the numbers from the responses, and remove duplicates
	ref_numbers = sorted(set(int(respon.strip("[]")) for respon in responses))

	# Create a mapping from old reference numbers to new ones
	ref_mapping = {old: new for new, old in enumerate(ref_numbers, start=1)}

	# Replace old responses with the updated responses in the text
	for old, new in ref_mapping.items():
	text = re.sub(rf"\[{old}\]", f"[{new}]", text)

	return text


	def renumber_sources(source_list):
	new_sources = []
	for i, source in enumerate(source_list):
	# Extract the content after the colon
	content = source.split(": ", 1)[1]
	# Add the new source number and content
	new_sources.append(f"source {i+1}: {content}")
	return new_sources


	def seperate_to_list(text):
	# Step 1: Split the text by line breaks (\n)
	lines = text.split("\n")

	# Step 2: Remove occurrences of "source (number):"
	cleaned_lines = [re.sub(r"Source \d+\:", "", line) for line in lines]

	# Step 3: Split all capital sentences
	final_output = []
	for line in cleaned_lines:
	# Split any fully capitalized sentence (surrounding non-uppercase text remains intact)
	split_line = re.split(r"([A-Z\s]+[.!?])", line)
	final_output.extend([part.strip() for part in split_line if part.strip()])

	return final_output

	def join_list(items):
	if not items:
	return ""
	elif len(items) == 1:
	return items[0]
	elif len(items) == 2:
	return f"{items[0]} and {items[1]}"
	else:
	return ", ".join(items[:-1]) + " and " + items[-1]