|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
import nltk |
|
import numpy |
|
import xlrd |
|
import openpyxl |
|
import re |
|
import sys |
|
|
|
|
|
|
|
|
|
file1 = open(sys.argv[1],"r+",encoding='utf-8') |
|
data = file1.read() |
|
|
|
file1.close() |
|
|
|
|
|
|
|
|
|
wb_obj = openpyxl.load_workbook(sys.argv[2]) |
|
sheet_obj = wb_obj.active |
|
|
|
|
|
|
|
data = data.replace('?','') |
|
data = data.replace(' ',' ') |
|
data = data.replace(';','') |
|
data = data.replace(')','') |
|
data = data.replace('(','') |
|
data = data.replace('!','') |
|
data = data.replace(' – ',' ') |
|
data = data.replace('-',' ') |
|
data = data.replace('।','') |
|
data = data.replace('&','') |
|
data = data.replace('’','') |
|
data = data.replace('‘','') |
|
data = data.replace(':','') |
|
data = data.replace(',','') |
|
data = data.replace('/','') |
|
data = data.replace(',','') |
|
data = data.replace('.','') |
|
data = data.replace('|','') |
|
m_row = sheet_obj.max_row |
|
line = data |
|
|
|
for i in range(1,m_row+1): |
|
num = sheet_obj.cell(row = i, column = 1).value |
|
word = sheet_obj.cell(row = i, column = 2).value |
|
|
|
|
|
line = line.replace(str(num), word) |
|
|
|
|
|
|
|
file1 = open(sys.argv[3],"w+",encoding='utf-8') |
|
|
|
file1.write(line) |
|
file1.close() |
|
|
|
|