khulnasoft commited on
Commit
467b421
·
verified ·
1 Parent(s): 9a3880d

Create preprocessing.py

Browse files
Files changed (1) hide show
  1. preprocessing.py +17 -0
preprocessing.py ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import re
2
+
3
+ def preprocess_text(text):
4
+ # Remove special characters and digits
5
+ text = re.sub(r'\W', ' ', text)
6
+ text = re.sub(r'\s+', ' ', text)
7
+ return text.strip()
8
+
9
+ with open("data.txt", "r") as file:
10
+ data = file.readlines()
11
+
12
+ cleaned_data = [preprocess_text(line) for line in data]
13
+
14
+ # Save the cleaned data
15
+ with open("cleaned_data.txt", "w") as file:
16
+ for entry in cleaned_data:
17
+ file.write(entry + "\n")