Spaces:
Build error
Build error
freemt
commited on
Commit
·
52a9494
1
Parent(s):
4c04f50
Updte pyc
Browse files- img/plt.png +0 -0
- radiobee/__main__.py +65 -7
- radiobee/align_sents.py +7 -2
- radiobee/align_sents.pyc +0 -0
- radiobee/error_msg.py +2 -2
- radiobee/gradiobee.py +68 -5
- radiobee/paras2sents.pyc +0 -0
- radiobee/shuffle_sents.pyc +0 -0
- run-pydocstle.bat → run-pydocstyle.bat +0 -0
- tests/test_paras2sents.py +8 -2
img/plt.png
CHANGED
![]() |
![]() |
radiobee/__main__.py
CHANGED
@@ -4,6 +4,8 @@ from typing import Any, Tuple, Optional, Union # noqa
|
|
4 |
|
5 |
import sys
|
6 |
from pathlib import Path # noqa
|
|
|
|
|
7 |
import platform
|
8 |
import signal
|
9 |
from random import randint
|
@@ -41,7 +43,10 @@ from radiobee.process_upload import process_upload
|
|
41 |
from radiobee.gradiobee import gradiobee
|
42 |
|
43 |
ic_install()
|
44 |
-
ic.configureOutput(
|
|
|
|
|
|
|
45 |
ic.enable()
|
46 |
# ic.disenable() # to turn off
|
47 |
|
@@ -105,6 +110,12 @@ if __name__ == "__main__":
|
|
105 |
debug = False
|
106 |
debug = True
|
107 |
share = True
|
|
|
|
|
|
|
|
|
|
|
|
|
108 |
else:
|
109 |
server_name = "127.0.0.1"
|
110 |
share = False
|
@@ -128,7 +139,6 @@ if __name__ == "__main__":
|
|
128 |
gr.inputs.File(label="file 2", optional=True),
|
129 |
]
|
130 |
|
131 |
-
# modi 1
|
132 |
_ = """
|
133 |
tf_type: Literal[linear, sqrt, log, binary] = 'linear'
|
134 |
idf_type: Optional[Literal[standard, smooth, bm25]] = None
|
@@ -148,10 +158,13 @@ if __name__ == "__main__":
|
|
148 |
) # ditto
|
149 |
input_norm_type = gr.inputs.Radio(["None", "l1", "l2"], default="None") # ditto
|
150 |
|
151 |
-
inputs
|
|
|
|
|
|
|
152 |
gr.inputs.File(label="file 1"),
|
153 |
gr.inputs.File(label="file 2", optional=True),
|
154 |
-
input_tf_type, # modi inputs
|
155 |
input_idf_type,
|
156 |
input_dl_type,
|
157 |
input_norm_type,
|
@@ -167,6 +180,7 @@ if __name__ == "__main__":
|
|
167 |
step=1,
|
168 |
default=6,
|
169 |
),
|
|
|
170 |
]
|
171 |
|
172 |
examples = [
|
@@ -179,16 +193,40 @@ if __name__ == "__main__":
|
|
179 |
"None",
|
180 |
10,
|
181 |
6,
|
|
|
182 |
],
|
183 |
[
|
|
|
184 |
"data/test_en.txt",
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
185 |
"data/test_zh.txt",
|
|
|
186 |
"linear",
|
187 |
"None",
|
188 |
"None",
|
189 |
"None",
|
190 |
10,
|
191 |
6,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
192 |
],
|
193 |
[
|
194 |
"data/shakespeare_zh500.txt",
|
@@ -199,6 +237,7 @@ if __name__ == "__main__":
|
|
199 |
"None",
|
200 |
10,
|
201 |
6,
|
|
|
202 |
],
|
203 |
[
|
204 |
"data/shakespeare_en500.txt",
|
@@ -209,6 +248,7 @@ if __name__ == "__main__":
|
|
209 |
"None",
|
210 |
10,
|
211 |
6,
|
|
|
212 |
],
|
213 |
[
|
214 |
"data/hlm-ch1-zh.txt",
|
@@ -219,6 +259,7 @@ if __name__ == "__main__":
|
|
219 |
"None",
|
220 |
10,
|
221 |
6,
|
|
|
222 |
],
|
223 |
[
|
224 |
"data/hlm-ch1-en.txt",
|
@@ -229,6 +270,7 @@ if __name__ == "__main__":
|
|
229 |
"None",
|
230 |
10,
|
231 |
6,
|
|
|
232 |
],
|
233 |
[
|
234 |
"data/ps-cn.txt",
|
@@ -239,6 +281,7 @@ if __name__ == "__main__":
|
|
239 |
"None",
|
240 |
10,
|
241 |
4,
|
|
|
242 |
],
|
243 |
[
|
244 |
"data/test-dual.txt",
|
@@ -249,6 +292,7 @@ if __name__ == "__main__":
|
|
249 |
"None",
|
250 |
10,
|
251 |
6,
|
|
|
252 |
],
|
253 |
[
|
254 |
"data/英译中国现代散文选1(汉外对照丛书).txt",
|
@@ -259,6 +303,7 @@ if __name__ == "__main__":
|
|
259 |
"None",
|
260 |
10,
|
261 |
6,
|
|
|
262 |
],
|
263 |
[
|
264 |
"data/test-zh-ja.txt",
|
@@ -269,6 +314,7 @@ if __name__ == "__main__":
|
|
269 |
"None",
|
270 |
10,
|
271 |
6,
|
|
|
272 |
],
|
273 |
[
|
274 |
"data/xiyouji-ch1-zh.txt",
|
@@ -279,6 +325,7 @@ if __name__ == "__main__":
|
|
279 |
"None",
|
280 |
10,
|
281 |
6,
|
|
|
282 |
],
|
283 |
[
|
284 |
"data/demian-hesse-de.txt",
|
@@ -289,6 +336,7 @@ if __name__ == "__main__":
|
|
289 |
"None",
|
290 |
10,
|
291 |
6,
|
|
|
292 |
],
|
293 |
[
|
294 |
"data/catcher-in-the-rye-shixianrong-zh.txt",
|
@@ -299,6 +347,7 @@ if __name__ == "__main__":
|
|
299 |
"None",
|
300 |
10,
|
301 |
6,
|
|
|
302 |
],
|
303 |
]
|
304 |
|
@@ -329,14 +378,23 @@ if __name__ == "__main__":
|
|
329 |
out_file_dl_excel = gr.outputs.File(
|
330 |
label="Click to download xlsx",
|
331 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
332 |
|
333 |
-
# modi outputs
|
334 |
-
outputs = [
|
335 |
out_df,
|
336 |
-
# "plot",
|
337 |
gr.outputs.Image(label="plot"),
|
338 |
out_file_dl,
|
339 |
out_file_dl_excel,
|
|
|
|
|
340 |
out_df_aligned,
|
341 |
gr.outputs.HTML(),
|
342 |
]
|
|
|
4 |
|
5 |
import sys
|
6 |
from pathlib import Path # noqa
|
7 |
+
import subprocess as sp
|
8 |
+
import shlex
|
9 |
import platform
|
10 |
import signal
|
11 |
from random import randint
|
|
|
43 |
from radiobee.gradiobee import gradiobee
|
44 |
|
45 |
ic_install()
|
46 |
+
ic.configureOutput(
|
47 |
+
includeContext=True,
|
48 |
+
outputFunction=logger.info,
|
49 |
+
)
|
50 |
ic.enable()
|
51 |
# ic.disenable() # to turn off
|
52 |
|
|
|
110 |
debug = False
|
111 |
debug = True
|
112 |
share = True
|
113 |
+
|
114 |
+
# set UTC+8, probably wont work in hf spaces, no permission
|
115 |
+
try:
|
116 |
+
sp.check_output(shlex.split("ln -sf /usr/share/zoneinfo/Asia/Shanghai /etc/localtime"))
|
117 |
+
except Exception as exc:
|
118 |
+
logger.error(" set timezonef failed: %s", exc)
|
119 |
else:
|
120 |
server_name = "127.0.0.1"
|
121 |
share = False
|
|
|
139 |
gr.inputs.File(label="file 2", optional=True),
|
140 |
]
|
141 |
|
|
|
142 |
_ = """
|
143 |
tf_type: Literal[linear, sqrt, log, binary] = 'linear'
|
144 |
idf_type: Optional[Literal[standard, smooth, bm25]] = None
|
|
|
158 |
) # ditto
|
159 |
input_norm_type = gr.inputs.Radio(["None", "l1", "l2"], default="None") # ditto
|
160 |
|
161 |
+
# modi inputs 1, definitions
|
162 |
+
sent_ali_algo = gr.inputs.Radio(["None", "fast", "slow"], default="None")
|
163 |
+
|
164 |
+
inputs = [ # tot. 9, meed to modify input of gradio & examples
|
165 |
gr.inputs.File(label="file 1"),
|
166 |
gr.inputs.File(label="file 2", optional=True),
|
167 |
+
input_tf_type, # modi inputs 2
|
168 |
input_idf_type,
|
169 |
input_dl_type,
|
170 |
input_norm_type,
|
|
|
180 |
step=1,
|
181 |
default=6,
|
182 |
),
|
183 |
+
sent_ali_algo,
|
184 |
]
|
185 |
|
186 |
examples = [
|
|
|
193 |
"None",
|
194 |
10,
|
195 |
6,
|
196 |
+
"None",
|
197 |
],
|
198 |
[
|
199 |
+
"data/test_zh.txt",
|
200 |
"data/test_en.txt",
|
201 |
+
"linear",
|
202 |
+
"None",
|
203 |
+
"None",
|
204 |
+
"None",
|
205 |
+
10,
|
206 |
+
6,
|
207 |
+
"fast",
|
208 |
+
],
|
209 |
+
[
|
210 |
"data/test_zh.txt",
|
211 |
+
"data/test_en.txt",
|
212 |
"linear",
|
213 |
"None",
|
214 |
"None",
|
215 |
"None",
|
216 |
10,
|
217 |
6,
|
218 |
+
"slow",
|
219 |
+
],
|
220 |
+
[
|
221 |
+
"data/test_en.txt",
|
222 |
+
"data/test_zh.txt",
|
223 |
+
"linear",
|
224 |
+
"None",
|
225 |
+
"None",
|
226 |
+
"None",
|
227 |
+
10,
|
228 |
+
6,
|
229 |
+
"None",
|
230 |
],
|
231 |
[
|
232 |
"data/shakespeare_zh500.txt",
|
|
|
237 |
"None",
|
238 |
10,
|
239 |
6,
|
240 |
+
"None",
|
241 |
],
|
242 |
[
|
243 |
"data/shakespeare_en500.txt",
|
|
|
248 |
"None",
|
249 |
10,
|
250 |
6,
|
251 |
+
"None",
|
252 |
],
|
253 |
[
|
254 |
"data/hlm-ch1-zh.txt",
|
|
|
259 |
"None",
|
260 |
10,
|
261 |
6,
|
262 |
+
"None",
|
263 |
],
|
264 |
[
|
265 |
"data/hlm-ch1-en.txt",
|
|
|
270 |
"None",
|
271 |
10,
|
272 |
6,
|
273 |
+
"None",
|
274 |
],
|
275 |
[
|
276 |
"data/ps-cn.txt",
|
|
|
281 |
"None",
|
282 |
10,
|
283 |
4,
|
284 |
+
"None",
|
285 |
],
|
286 |
[
|
287 |
"data/test-dual.txt",
|
|
|
292 |
"None",
|
293 |
10,
|
294 |
6,
|
295 |
+
"None",
|
296 |
],
|
297 |
[
|
298 |
"data/英译中国现代散文选1(汉外对照丛书).txt",
|
|
|
303 |
"None",
|
304 |
10,
|
305 |
6,
|
306 |
+
"None",
|
307 |
],
|
308 |
[
|
309 |
"data/test-zh-ja.txt",
|
|
|
314 |
"None",
|
315 |
10,
|
316 |
6,
|
317 |
+
"None",
|
318 |
],
|
319 |
[
|
320 |
"data/xiyouji-ch1-zh.txt",
|
|
|
325 |
"None",
|
326 |
10,
|
327 |
6,
|
328 |
+
"None",
|
329 |
],
|
330 |
[
|
331 |
"data/demian-hesse-de.txt",
|
|
|
336 |
"None",
|
337 |
10,
|
338 |
6,
|
339 |
+
"None",
|
340 |
],
|
341 |
[
|
342 |
"data/catcher-in-the-rye-shixianrong-zh.txt",
|
|
|
347 |
"None",
|
348 |
10,
|
349 |
6,
|
350 |
+
"None",
|
351 |
],
|
352 |
]
|
353 |
|
|
|
378 |
out_file_dl_excel = gr.outputs.File(
|
379 |
label="Click to download xlsx",
|
380 |
)
|
381 |
+
out_sents_dl = gr.outputs.File(
|
382 |
+
label="Click to download sents csv",
|
383 |
+
)
|
384 |
+
out_sents_dl_excel = gr.outputs.File(
|
385 |
+
label="Click to download sents xlsx",
|
386 |
+
)
|
387 |
+
|
388 |
+
# modi outputs 1, definitions
|
389 |
|
390 |
+
# modi outputs 2, need to modify gradio error_msg
|
391 |
+
outputs = [ # tot. 8
|
392 |
out_df,
|
|
|
393 |
gr.outputs.Image(label="plot"),
|
394 |
out_file_dl,
|
395 |
out_file_dl_excel,
|
396 |
+
out_sents_dl,
|
397 |
+
out_sents_dl_excel,
|
398 |
out_df_aligned,
|
399 |
gr.outputs.HTML(),
|
400 |
]
|
radiobee/align_sents.py
CHANGED
@@ -67,6 +67,11 @@ def align_sents(lst1: List[str], lst2: List[str]) -> List[Tuple[str, str]]:
|
|
67 |
|
68 |
texts.append(tuple(_))
|
69 |
|
70 |
-
|
|
|
|
|
|
|
|
|
|
|
71 |
|
72 |
-
|
|
|
67 |
|
68 |
texts.append(tuple(_))
|
69 |
|
70 |
+
_ = """
|
71 |
+
_ = []
|
72 |
+
for elm in texts:
|
73 |
+
_.extend(elm)
|
74 |
+
return _
|
75 |
+
"""
|
76 |
|
77 |
+
return texts
|
radiobee/align_sents.pyc
ADDED
Binary file (1.42 kB). View file
|
|
radiobee/error_msg.py
CHANGED
@@ -8,7 +8,7 @@ import pandas as pd
|
|
8 |
def error_msg(
|
9 |
msg: Optional[Union[str, Exception]],
|
10 |
title: str = "error message",
|
11 |
-
) -> Tuple[Union[pd.DataFrame, None], None, None, None, None, None]:
|
12 |
"""Prepare an error message for gradiobee outputs."""
|
13 |
if msg is None:
|
14 |
msg = "none..."
|
@@ -21,4 +21,4 @@ def error_msg(
|
|
21 |
df = pd.DataFrame([msg], columns=[title])
|
22 |
|
23 |
# return df, *((None,) * 4) # pyright complains
|
24 |
-
return df, None, None, None, None, None
|
|
|
8 |
def error_msg(
|
9 |
msg: Optional[Union[str, Exception]],
|
10 |
title: str = "error message",
|
11 |
+
) -> Tuple[Union[pd.DataFrame, None], None, None, None, None, None, None, None]:
|
12 |
"""Prepare an error message for gradiobee outputs."""
|
13 |
if msg is None:
|
14 |
msg = "none..."
|
|
|
21 |
df = pd.DataFrame([msg], columns=[title])
|
22 |
|
23 |
# return df, *((None,) * 4) # pyright complains
|
24 |
+
return df, None, None, None, None, None, None, None
|
radiobee/gradiobee.py
CHANGED
@@ -30,6 +30,10 @@ from radiobee.trim_df import trim_df
|
|
30 |
from radiobee.error_msg import error_msg
|
31 |
from radiobee.text2lists import text2lists
|
32 |
|
|
|
|
|
|
|
|
|
33 |
uname = platform.uname()
|
34 |
HFSPACES = False
|
35 |
if "amzn2" in uname.release: # on hf spaces
|
@@ -43,7 +47,7 @@ debug = False
|
|
43 |
debug = True
|
44 |
|
45 |
|
46 |
-
def gradiobee(
|
47 |
file1,
|
48 |
file2,
|
49 |
tf_type,
|
@@ -53,6 +57,7 @@ def gradiobee(
|
|
53 |
eps,
|
54 |
min_samples,
|
55 |
# debug=False,
|
|
|
56 |
):
|
57 |
"""Process inputs and return outputs."""
|
58 |
logger.debug(" *debug* ")
|
@@ -382,7 +387,7 @@ def gradiobee(
|
|
382 |
df_aligned = df_aligned[["text2", "text1", "likelihood"]]
|
383 |
df_aligned.columns = ["text1", "text2", "likelihood"]
|
384 |
|
385 |
-
ic(df_aligned.head())
|
386 |
|
387 |
# round the last column to 2
|
388 |
# df_aligned.likelihood = df_aligned.likelihood.round(2)
|
@@ -434,8 +439,66 @@ def gradiobee(
|
|
434 |
# return df_trimmed, output_plot, file_dl, file_dl_xlsx, df_aligned
|
435 |
# return df_trimmed, output_plot, file_dl, file_dl_xlsx, styled, df_html # gradio cant handle style
|
436 |
|
437 |
-
ic("
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
438 |
|
439 |
-
|
|
|
440 |
|
441 |
-
#
|
|
|
|
30 |
from radiobee.error_msg import error_msg
|
31 |
from radiobee.text2lists import text2lists
|
32 |
|
33 |
+
from radiobee.align_sents import align_sents
|
34 |
+
from radiobee.shuffle_sents import shuffle_sents # type: ignore
|
35 |
+
from radiobee.paras2sents import paras2sents # type: ignore
|
36 |
+
|
37 |
uname = platform.uname()
|
38 |
HFSPACES = False
|
39 |
if "amzn2" in uname.release: # on hf spaces
|
|
|
47 |
debug = True
|
48 |
|
49 |
|
50 |
+
def gradiobee( # noqa
|
51 |
file1,
|
52 |
file2,
|
53 |
tf_type,
|
|
|
57 |
eps,
|
58 |
min_samples,
|
59 |
# debug=False,
|
60 |
+
sent_ali_algo,
|
61 |
):
|
62 |
"""Process inputs and return outputs."""
|
63 |
logger.debug(" *debug* ")
|
|
|
387 |
df_aligned = df_aligned[["text2", "text1", "likelihood"]]
|
388 |
df_aligned.columns = ["text1", "text2", "likelihood"]
|
389 |
|
390 |
+
ic("paras aligned: ", df_aligned.head(10))
|
391 |
|
392 |
# round the last column to 2
|
393 |
# df_aligned.likelihood = df_aligned.likelihood.round(2)
|
|
|
439 |
# return df_trimmed, output_plot, file_dl, file_dl_xlsx, df_aligned
|
440 |
# return df_trimmed, output_plot, file_dl, file_dl_xlsx, styled, df_html # gradio cant handle style
|
441 |
|
442 |
+
ic("sent-ali-algo: ", sent_ali_algo)
|
443 |
+
|
444 |
+
# ### sent-ali-algo is None: para align
|
445 |
+
if sent_ali_algo in ["None"]:
|
446 |
+
ic("returning para-ali outputs")
|
447 |
+
return df_trimmed, output_plot, file_dl, file_dl_xlsx, None, None, df_aligned, df_html
|
448 |
+
|
449 |
+
# ### proceed with sent align
|
450 |
+
if sent_ali_algo in ["fast"]:
|
451 |
+
ic(sent_ali_algo)
|
452 |
+
align_func = align_sents
|
453 |
+
|
454 |
+
ic(df_aligned.shape, df_aligned.columns)
|
455 |
+
|
456 |
+
aligned_sents = paras2sents(df_aligned, align_func)
|
457 |
+
|
458 |
+
# ic(pd.DataFrame(aligned_sents).shape, aligned_sents)
|
459 |
+
ic(pd.DataFrame(aligned_sents).shape)
|
460 |
+
|
461 |
+
df_aligned_sents = pd.DataFrame(aligned_sents, columns=["text1", "text2"])
|
462 |
+
else: # ["slow"]
|
463 |
+
ic(sent_ali_algo)
|
464 |
+
align_func = shuffle_sents
|
465 |
+
aligned_sents = paras2sents(df_aligned, align_func, lang1, lang2)
|
466 |
+
|
467 |
+
# add extra entry if necessary
|
468 |
+
aligned_sents = [list(sent) + [""] if len(sent) == 2 else list(sent) for sent in aligned_sents]
|
469 |
+
|
470 |
+
df_aligned_sents = pd.DataFrame(aligned_sents, columns=["text1", "text2", "likelihood"])
|
471 |
+
|
472 |
+
# prepare sents downloads
|
473 |
+
file_dl_sents = Path(f"{file_dl.stem}-sents{file_dl.suffix}")
|
474 |
+
file_dl_xlsx_sents = Path(f"{file_dl_xlsx.stem}-sents{file_dl_xlsx.suffix}")
|
475 |
+
_ = df_aligned_sents.to_csv(index=False)
|
476 |
+
file_dl_sents.write_text(_, encoding="utf8")
|
477 |
+
|
478 |
+
df_aligned_sents.to_excel(file_dl_xlsx_sents)
|
479 |
+
|
480 |
+
# prepare html output
|
481 |
+
if len(df_aligned_sents) > 200:
|
482 |
+
df_html = None
|
483 |
+
else: # show a one-bathc table in html
|
484 |
+
# style
|
485 |
+
styled = df_aligned_sents.style.set_properties(
|
486 |
+
**{
|
487 |
+
"font-size": "10pt",
|
488 |
+
"border-color": "black",
|
489 |
+
"border": "1px black solid !important"
|
490 |
+
}
|
491 |
+
# border-color="black",
|
492 |
+
).set_table_styles([{
|
493 |
+
"selector": "", # noqs
|
494 |
+
"props": [("border", "2px black solid !important")]}] # noqs
|
495 |
+
).format(
|
496 |
+
precision=2
|
497 |
+
)
|
498 |
+
df_html = styled.to_html()
|
499 |
|
500 |
+
# aligned sents outputs
|
501 |
+
ic("aligned sents outputs")
|
502 |
|
503 |
+
# return df_trimmed, output_plot, file_dl, file_dl_xlsx, None, None, df_aligned, df_html
|
504 |
+
return df_trimmed, output_plot, file_dl, file_dl_xlsx, file_dl_sents, file_dl_xlsx_sents, df_aligned_sents, df_html
|
radiobee/paras2sents.pyc
ADDED
Binary file (2.57 kB). View file
|
|
radiobee/shuffle_sents.pyc
ADDED
Binary file (2.02 kB). View file
|
|
run-pydocstle.bat → run-pydocstyle.bat
RENAMED
File without changes
|
tests/test_paras2sents.py
CHANGED
@@ -1,6 +1,7 @@
|
|
1 |
"""Test paras2sents."""
|
2 |
# pylint: disable=invalid-name
|
3 |
|
|
|
4 |
import pandas as pd
|
5 |
from radiobee.paras2sents import paras2sents
|
6 |
from radiobee.shuffle_sents import shuffle_sents
|
@@ -14,15 +15,20 @@ def test_paras2sents_dual():
|
|
14 |
"""Test paras2sents_dual."""
|
15 |
sents = paras2sents(paras)
|
16 |
|
|
|
|
|
17 |
assert len(sents) > 202 # 208
|
18 |
# assert not sents
|
19 |
|
20 |
|
21 |
def test_paras2sents_dual_model_s():
|
22 |
"""Test paras2sents_dual_model_s."""
|
23 |
-
|
|
|
|
|
|
|
24 |
|
25 |
-
assert len(
|
26 |
# assert not sents
|
27 |
|
28 |
|
|
|
1 |
"""Test paras2sents."""
|
2 |
# pylint: disable=invalid-name
|
3 |
|
4 |
+
import numpy as np
|
5 |
import pandas as pd
|
6 |
from radiobee.paras2sents import paras2sents
|
7 |
from radiobee.shuffle_sents import shuffle_sents
|
|
|
15 |
"""Test paras2sents_dual."""
|
16 |
sents = paras2sents(paras)
|
17 |
|
18 |
+
assert np.array(sents).shape.__len__() > 1
|
19 |
+
|
20 |
assert len(sents) > 202 # 208
|
21 |
# assert not sents
|
22 |
|
23 |
|
24 |
def test_paras2sents_dual_model_s():
|
25 |
"""Test paras2sents_dual_model_s."""
|
26 |
+
sents1 = paras2sents(paras, shuffle_sents)
|
27 |
+
|
28 |
+
# assert np.array(sents1).shape.__len__() > 1
|
29 |
+
assert pd.DataFrame(sents1).shape.__len__() > 1
|
30 |
|
31 |
+
assert len(sents1) > 201 # 207
|
32 |
# assert not sents
|
33 |
|
34 |
|