from difflib import SequenceMatcher import string def extract_equal_text(text1, text2): def cleanup(text): text = text.lower() text = text.translate(str.maketrans('', '', string.punctuation)) return text splited_text1 = cleanup(text1).split() splited_text2 = cleanup(text2).split() s = SequenceMatcher(None, splited_text1, splited_text2) equal_idx_1 = [] equal_idx_2 = [] text1 = text1.split() text2 = text2.split() for tag, i1, i2, j1, j2 in s.get_opcodes(): if tag == 'equal': equal_idx_1.append({"start": i1, "end": i2}) equal_idx_2.append({"start": j1, "end": j2}) subtext_1 = " ".join(text1[i1:i2]) subtext_2 = " ".join(text2[j1:j2]) print(f'{tag:7} a[{i1:2}:{i2:2}] --> b[{j1:2}:{j2:2}] {subtext_1!r:>55} --> {subtext_2!r}') return equal_idx_1, equal_idx_2 text1 = """ Miguel Almiron has permanently rejoined Atlanta United from Newcastle United for £8m. Almiron made 223 appearances for Newcastle, scoring 30 goals, but recently struggled for a starting place under Eddie Howe. He made a substitute appearance and waved farewell to fans in Newcastle's recent win against Southampton. Almiron played a key role in Newcastle reaching the Carabao Cup final and their Premier League top-four finish in 2022-23, and scored against Paris St-Germain in the Champions League. """ text2 = """ Newcastle United winger Miguel Almiron has rejoined Atlanta United on a permanent deal for £8m. Almiron has made 223 appearances for Newcastle, scoring 30 goals, but has struggled recently to gain a place in manager Eddie Howe's starting line-up. Last weekend he came on as a substitute in Newcastle's 3-1 win against Southampton and waved farewell to the travelling supporters. Almiron played a significant role in Newcastle reaching the Carabao Cup final and finishing fourth in the Premier League in 2022-23. """ idx_1, idx_2 = extract_equal_text(text1, text2) # text1_split = text1.split() # for idx in idx_1: # print(text1_split[idx["start"]:idx["end"]])