Spaces:
Running
Running
import re | |
def find_entity_spans(entity, text): | |
""" | |
Finds the start and end indices of whole word entities in text. | |
Args: | |
entity: The entity string to search for. | |
text: The text to search within. | |
Returns: | |
A list of tuples, where each tuple contains the start and end indices | |
of a found entity. Returns an empty list if no entities are found. | |
""" | |
spans = [] | |
for m in re.finditer( | |
r"\b" + re.escape(entity) + r"\b", | |
text, | |
): # The crucial change | |
spans.append((m.start(), m.end())) | |
return spans | |
# Example usage: | |
temp_text = "win winger winning" | |
entity = {"key": "win"} # Example dictionary (adjust as needed) | |
spans = find_entity_spans(entity["key"], temp_text) | |
print(spans) # Output: [(0, 3)] (Only "win" at the beginning) | |
temp_text = "The quick brown fox jumps over the lazy dog." | |
entity = {"key": "fox"} | |
spans = find_entity_spans(entity["key"], temp_text) | |
print(spans) # Output: [(16, 19)] | |
temp_text = "foxes fox foxing" | |
entity = {"key": "fox"} | |
spans = find_entity_spans(entity["key"], temp_text) | |
print(spans) # Output: [(0, 3), (6, 9)] | |
temp_text = "winger win winning" | |
entity = {"key": "win"} | |
spans = find_entity_spans(entity["key"], temp_text) | |
print(spans) # Output: [(8, 11)] | |
temp_text = "winger win winning" | |
entity = {"key": "winger"} | |
spans = find_entity_spans(entity["key"], temp_text) | |
print(spans) # Output: [(0, 6)] | |
temp_text = "winger win winning" | |
entity = {"key": "winning"} | |
spans = find_entity_spans(entity["key"], temp_text) | |
print(spans) # Output: [(12, 19)] | |