import textdistance
import pickle

correction_dict = {}

with open('correct.txt', 'r') as correct_file, open('incorrect.txt', 'r') as incorrect_file:
    correct_lines = correct_file.readlines()
    incorrect_lines = incorrect_file.readlines()

for correct_line, incorrect_line in zip(correct_lines, incorrect_lines):
    correct_words = correct_line.strip().split()
    incorrect_words = incorrect_line.strip().split()

    for incorrect_word, correct_word in zip(incorrect_words, correct_words):
        correction_dict[incorrect_word] = correct_word

def predict_correction(incorrect_word, correction_dict):
    if incorrect_word in correction_dict:
        return correction_dict[incorrect_word], 0  
    else:
        best_match = None
        best_distance = float('inf') 

        for correct_word in correction_dict.keys():
            distance = textdistance.levenshtein(incorrect_word, correct_word)
            if distance < best_distance:
                best_distance = distance
                best_match = correct_word

        return best_match, best_distance

def correct_text(input_text, correction_dict):
    corrected_text = []
    words = input_text.split()

    for word in words:
        best_correction, distance = predict_correction(word, correction_dict)
        if best_correction:
            corrected_text.append(best_correction)
        else:
            corrected_text.append(word) 

    return ' '.join(corrected_text)

with open('correction_model.pkl', 'wb') as model_file:
    pickle.dump(correction_dict, model_file)