File size: 3,483 Bytes
f3f1c83
cf8bc10
f3f1c83
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
e4e39a3
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
import json
import re

def diff(str1, str2):
    indices = []
    words1 = str1.split()
    words2 = str2.split()

    for index, (word1, word2) in enumerate(zip(words1, words2)):
        if word1 != word2:
            start_index = sum(
                len(word) + 1 for word in words1[:index]
            )  # Add 1 for the space between words
            indices.append((start_index, start_index + len(word1)))

    # If str2 has more words than str1, record the indices of the extra words in str2
    for index in range(len(words1), len(words2)):
        start_index = sum(
            len(word) + 1 for word in words1
        )  # Add 1 for the space between words
        indices.append((start_index, start_index + len(words2[index])))

    return indices


def format_predicted_sentence(original_sentence, corrected_word_indices):
    formatted_sentence = ""
    current_index = 0

    for start_index, end_index in corrected_word_indices:
        # Add the part of the sentence before the corrected word
        formatted_sentence += original_sentence[current_index:start_index]

        # Add the corrected word wrapped in a span
        formatted_sentence += (
            '<span style="color:red">'
            + original_sentence[start_index : end_index + 1]
            + "</span>"
        )

        # Update the current index
        current_index = end_index + 1

    # Add the remaining part of the sentence
    formatted_sentence += original_sentence[current_index:]

    return formatted_sentence


def processInputAndResults(inputSentence, predictedSentence):
    corrected_word_indices = diff(inputSentence, predictedSentence)
    formatted_sentence = format_predicted_sentence(
        predictedSentence, corrected_word_indices
    )
    return formatted_sentence


def ensure_space_around_punctuation(sentence):
    punctuation_marks = ["।", ",", ".", "!", "?", ":", ";", '"', "'", "(", ")"]
    for mark in punctuation_marks:
        index = sentence.find(mark)
        while index != -1:
            # Ensure space before the punctuation mark
            if index > 0 and sentence[index - 1] != " ":
                sentence = sentence[:index] + " " + sentence[index:]
                index += 1  # Adjust index due to inserted space
            # Ensure space after the punctuation mark
            if index < len(sentence) - 1 and sentence[index + 1] != " ":
                sentence = sentence[: index + 1] + " " + sentence[index + 1 :]
                index += 1  # Adjust index due to inserted space
            # Find next occurrence of the punctuation mark
            index = sentence.find(mark, index + 1)
    return sentence


def check_and_insert_space(sentence):
    index = sentence.find("।")  # Find the index of "।"
    if index > 0 and sentence[index - 1] != " ":
        sentence = sentence[:index] + " " + sentence[index:]  # Insert space before "।"
    return sentence

def processPunctuation(sentence):
    # Define a regex pattern to match punctuation marks
    punctuation_pattern = r'([।,\.!\?:;"\'\(\)])'
    
    # Use re.sub() to replace punctuation marks with themselves surrounded by spaces
    # The pattern matches any punctuation mark and replaces it with " <punctuation_mark> "
    modified_sentence = re.sub(punctuation_pattern, r' \1 ', sentence)
    
    # Replace any occurrences of double spaces with single spaces
    modified_sentence = re.sub(r'\s+', ' ', modified_sentence)
    
    return modified_sentence