import difflib def compare_sentences(sentence_1, sentence_2): """ Compares two sentences and identifies common phrases, outputting their start and end positions. Args: sentence_1: The first sentence (string). sentence_2: The second sentence (string). Returns: A list of dictionaries, where each dictionary represents a common phrase and contains: - "phrase": The common phrase (string). - "start_1": The starting index of the phrase in sentence_1 (int). - "end_1": The ending index of the phrase in sentence_1 (int). - "start_2": The starting index of the phrase in sentence_2 (int). - "end_2": The ending index of the phrase in sentence_2 (int). Returns an empty list if no common phrases are found. Handles edge cases like empty strings. """ if not sentence_1 or not sentence_2: # Handle empty strings return [] s = difflib.SequenceMatcher(None, sentence_1, sentence_2) common_phrases = [] for block in s.get_matching_blocks(): if block.size > 0: # Ignore zero-length matches start_1 = block.a end_1 = block.a + block.size start_2 = block.b end_2 = block.b + block.size phrase = sentence_1[start_1:end_1] # Or sentence_2[start_2:end_2], they are the same common_phrases.append({ "phrase": phrase, "start_1": start_1, "end_1": end_1, "start_2": start_2, "end_2": end_2 }) return common_phrases # Example usage: sentence_1 = " Muzzamil Hussain was in 3rd-grade school when the first bombs fell on the playground outside of his classroom in Kargil, a mountain city in India. While the violent onset of the 1998 Kargil war between Pakistan and India unfolded around him, Hussain and his family escaped south to the remote Suru Valley. After India claimed victory later that year and displaced families returned home, Hussain listened as his bedridden grandfather asked the family to visit an old property, initially built by Hussain's great-grandfather, near Kargil's bazaar to make sure it had survived the war. When Hussain's uncles cracked through an old rusty latch and peered through the hand-carved wooden doors, they discovered wooden crates stamped with names of cities worldwide. Making space on the dusty floor, the family began to lay out silks from China, silver cookware from Afghanistan, rugs from Persia, turquoise from Tibet, saddles from Mongolia, and luxury soaps and salves from London, New York, and Munich. " sentence_2 = "A quick brown fox jumps over a lazy cat." common_phrases = compare_sentences(sentence_1, sentence_2) if common_phrases: for phrase_data in common_phrases: print(phrase_data) else: print("No common phrases found.")