lsy641 commited on
Commit
c74a961
·
1 Parent(s): 2c7cea9
Files changed (1) hide show
  1. tokenizer_13a.py +1 -3
tokenizer_13a.py CHANGED
@@ -67,8 +67,7 @@ class TokenizerRegexp(BaseTokenizer):
67
  # no leading or trailing spaces, single space within words
68
  # return ' '.join(line.split())
69
  # This line is changed with regards to the original tokenizer (seen above) to return individual words
70
- print(line)
71
- print("1:", line.split())
72
  return line.split()
73
 
74
 
@@ -98,7 +97,6 @@ class Tokenizer13a(BaseTokenizer):
98
  line = line.replace("&", "&")
99
  line = line.replace("&lt;", "<")
100
  line = line.replace("&gt;", ">")
101
- print(line)
102
 
103
  return self._post_tokenizer(f" {line} ")
104
 
 
67
  # no leading or trailing spaces, single space within words
68
  # return ' '.join(line.split())
69
  # This line is changed with regards to the original tokenizer (seen above) to return individual words
70
+
 
71
  return line.split()
72
 
73
 
 
97
  line = line.replace("&amp;", "&")
98
  line = line.replace("&lt;", "<")
99
  line = line.replace("&gt;", ">")
 
100
 
101
  return self._post_tokenizer(f" {line} ")
102