Larisa Kolesnichenko
commited on
Commit
·
099a2f3
1
Parent(s):
a0be2a1
Make processing of punctuation consistent with train data: frame each symbol with spaces
Browse files- model_wrapper.py +5 -1
model_wrapper.py
CHANGED
@@ -4,6 +4,7 @@ import tempfile
|
|
4 |
import sys
|
5 |
import datetime
|
6 |
import re
|
|
|
7 |
sys.path.append('mtool')
|
8 |
|
9 |
import torch
|
@@ -78,7 +79,10 @@ class PredictionModel:
|
|
78 |
|
79 |
|
80 |
def clean_texts(self, texts):
|
81 |
-
|
|
|
|
|
|
|
82 |
|
83 |
|
84 |
def _predict_to_mrp(self, texts, graph_mode='labeled-edge'):
|
|
|
4 |
import sys
|
5 |
import datetime
|
6 |
import re
|
7 |
+
import string
|
8 |
sys.path.append('mtool')
|
9 |
|
10 |
import torch
|
|
|
79 |
|
80 |
|
81 |
def clean_texts(self, texts):
|
82 |
+
punctuation = ''.join([f'\\{s}' for s in string.punctuation])
|
83 |
+
texts = [re.sub(f'([{punctuation}])', ' \\1 ', t) for t in texts]
|
84 |
+
texts = [re.sub(r' +', ' ', t) for t in texts]
|
85 |
+
return texts
|
86 |
|
87 |
|
88 |
def _predict_to_mrp(self, texts, graph_mode='labeled-edge'):
|