victormiller
commited on
Commit
•
7cc1892
1
Parent(s):
6e60fe2
Update curated.py
Browse files- curated.py +7 -7
curated.py
CHANGED
@@ -440,7 +440,7 @@ table_div_phil = Div(NotStr(table_html_phil), style="margin: 40px;")
|
|
440 |
|
441 |
filtering_process = Div(
|
442 |
Section(
|
443 |
-
|
444 |
),
|
445 |
Section(
|
446 |
Div(
|
@@ -965,7 +965,7 @@ for dataset in df.columns[1:]:
|
|
965 |
# Update the layout
|
966 |
fig.update_layout(
|
967 |
barmode='stack',
|
968 |
-
title='
|
969 |
xaxis_title='Filter',
|
970 |
yaxis_title='Number of Lines',
|
971 |
legend_title='Dataset',
|
@@ -1123,14 +1123,14 @@ def curated(request):
|
|
1123 |
copyright_disclaimer,
|
1124 |
plotly2fasthtml(treemap_chart),
|
1125 |
data_preprocessing_div,
|
1126 |
-
plotly2fasthtml(diff2_stacked_bar),
|
1127 |
H2("Curated Sources Processing"),
|
1128 |
-
|
|
|
1129 |
filtering_process,
|
1130 |
data_preparation_div,
|
1131 |
-
H2("Local Deduplication"),
|
1132 |
-
local_dedup_text,
|
1133 |
-
table_div_data_pipe,
|
1134 |
id="inner-text",
|
1135 |
)
|
1136 |
|
|
|
440 |
|
441 |
filtering_process = Div(
|
442 |
Section(
|
443 |
+
H3("This section contains the specific filtering steps taken for all 14 curated datasets."),
|
444 |
),
|
445 |
Section(
|
446 |
Div(
|
|
|
965 |
# Update the layout
|
966 |
fig.update_layout(
|
967 |
barmode='stack',
|
968 |
+
title='Document Reduction by Filter for Each Dataset',
|
969 |
xaxis_title='Filter',
|
970 |
yaxis_title='Number of Lines',
|
971 |
legend_title='Dataset',
|
|
|
1123 |
copyright_disclaimer,
|
1124 |
plotly2fasthtml(treemap_chart),
|
1125 |
data_preprocessing_div,
|
|
|
1126 |
H2("Curated Sources Processing"),
|
1127 |
+
plotly2fasthtml(diff2_stacked_bar),
|
1128 |
+
P("The figure above provides a global view of the document filtering results. ~8% of documents were removed during these three steps."),
|
1129 |
filtering_process,
|
1130 |
data_preparation_div,
|
1131 |
+
#H2("Local Deduplication"), are these numbers even right?
|
1132 |
+
#local_dedup_text,
|
1133 |
+
#table_div_data_pipe,
|
1134 |
id="inner-text",
|
1135 |
)
|
1136 |
|