Spaces:
Running
Running
victormiller
commited on
Commit
•
31dc904
1
Parent(s):
5dd5e15
Update curated.py
Browse files- curated.py +9 -9
curated.py
CHANGED
@@ -674,15 +674,15 @@ filtering_process = Div(
|
|
674 |
P("The dataset was downloaded from:", A("https://irclogs.ubuntu.com/{date.year}/{date.month:02d}/{date.day:02d}/", href="https://irclogs.ubuntu.com/{date.year}/{date.month:02d}/{date.day:02d}/"), " based on the year."),
|
675 |
P("During extraction, the logs were cleaned using following functions:"),
|
676 |
#D_code("""
|
677 |
-
def exclude_system(x):
|
678 |
-
|
679 |
-
|
680 |
-
def exclude_select_system(x):
|
681 |
-
return '\n'.join(line for line in x.split('\n') if not (line.startswith('===') and any(term in line for term in ['has joined #', 'has left #', 'Topic for #', "Topic (#", "is now known as"]) ))
|
682 |
-
|
683 |
-
def clean(x):
|
684 |
-
return '\n'.join('* ' + line[4:] if line.startswith('===') else line[8:] for line in x.split('\n'))
|
685 |
-
""", block="block", language="python" ),
|
686 |
H4("Filtering"),
|
687 |
Ol(
|
688 |
Li("Language Filter: English"),
|
|
|
674 |
P("The dataset was downloaded from:", A("https://irclogs.ubuntu.com/{date.year}/{date.month:02d}/{date.day:02d}/", href="https://irclogs.ubuntu.com/{date.year}/{date.month:02d}/{date.day:02d}/"), " based on the year."),
|
675 |
P("During extraction, the logs were cleaned using following functions:"),
|
676 |
#D_code("""
|
677 |
+
#def exclude_system(x):
|
678 |
+
# return '\n'.join(line for line in x.split('\n') if not line.startswith('==='))
|
679 |
+
#
|
680 |
+
# def exclude_select_system(x):
|
681 |
+
# return '\n'.join(line for line in x.split('\n') if not (line.startswith('===') and any(term in line for term in ['has joined #', 'has left #', 'Topic for #', "Topic (#", "is now known as"]) ))
|
682 |
+
#
|
683 |
+
# def clean(x):
|
684 |
+
# return '\n'.join('* ' + line[4:] if line.startswith('===') else line[8:] for line in x.split('\n'))
|
685 |
+
# """, block="block", language="python" ),
|
686 |
H4("Filtering"),
|
687 |
Ol(
|
688 |
Li("Language Filter: English"),
|