Erkadaflown
/

Mongolian-spell-correction

Text2Text Generation

Model card Files Files and versions Community

Mongolian-spell-correction / worddistance.py

Erkadaflown's picture

Mongolian text correction

fc08cd7 over 1 year ago

history blame contribute delete

1.55 kB

	import textdistance
	import pickle

	correction_dict = {}

	with open('correct.txt', 'r') as correct_file, open('incorrect.txt', 'r') as incorrect_file:
	correct_lines = correct_file.readlines()
	incorrect_lines = incorrect_file.readlines()

	for correct_line, incorrect_line in zip(correct_lines, incorrect_lines):
	correct_words = correct_line.strip().split()
	incorrect_words = incorrect_line.strip().split()

	for incorrect_word, correct_word in zip(incorrect_words, correct_words):
	correction_dict[incorrect_word] = correct_word

	def predict_correction(incorrect_word, correction_dict):
	if incorrect_word in correction_dict:
	return correction_dict[incorrect_word], 0
	else:
	best_match = None
	best_distance = float('inf')

	for correct_word in correction_dict.keys():
	distance = textdistance.levenshtein(incorrect_word, correct_word)
	if distance < best_distance:
	best_distance = distance
	best_match = correct_word

	return best_match, best_distance

	def correct_text(input_text, correction_dict):
	corrected_text = []
	words = input_text.split()

	for word in words:
	best_correction, distance = predict_correction(word, correction_dict)
	if best_correction:
	corrected_text.append(best_correction)
	else:
	corrected_text.append(word)

	return ' '.join(corrected_text)

	with open('correction_model.pkl', 'wb') as model_file:
	pickle.dump(correction_dict, model_file)