CarisMu commited on
Commit
c206db1
·
verified ·
1 Parent(s): 98e5fb8

fix errors

Browse files
Files changed (1) hide show
  1. common.py +2 -12
common.py CHANGED
@@ -104,16 +104,6 @@ def dedup_pairs_bands():
104
  }
105
  ).to_html(index=False, border=0)
106
 
107
- # Get the HTML table
108
- table_html_data = dedup_pairs_bands()
109
-
110
- # Wrap the table in a Div for styling
111
- table_div_data = Div(
112
- text=table_html_data,
113
- style="display: flex; justify-content: center; align-items: center; width: 100%; max-width: 100%; height: auto; overflow-x: auto;"
114
- )
115
-
116
-
117
  def dup_docs_count_graph():
118
  dup_docs_count = {
119
  "80": 382164413,
@@ -288,7 +278,7 @@ pii_table = pd.DataFrame(
288
  )
289
 
290
  table_html_pii = pii_table.to_html(index=False, border=0)
291
- table_div_pii = Div(NotStr(table_html_pii), style="margin: 40px;")
292
 
293
  global_div = Div(
294
  Section(
@@ -374,7 +364,7 @@ global_div = Div(
374
  P(
375
  "There is a high chance that duplicates from different bands will have the same pairs in the same horizontal partition. Performing the Bloom filter step reduces the number of pairs by nearly ninefold."
376
  ),
377
- Div(NotStr(dedup_pairs_bands()), style="margin: 40px;"),
378
  P(
379
  "The resulting unique pairs are then used to identify clusters of near-duplicates by finding connected components in a graph, where the vertices represent documents and the edges represent matches."
380
  ),
 
104
  }
105
  ).to_html(index=False, border=0)
106
 
 
 
 
 
 
 
 
 
 
 
107
  def dup_docs_count_graph():
108
  dup_docs_count = {
109
  "80": 382164413,
 
278
  )
279
 
280
  table_html_pii = pii_table.to_html(index=False, border=0)
281
+ table_div_pii = Div(NotStr(table_html_pii), style="display: flex; justify-content: center; align-items: center; width: 100%; max-width: 100%; height: auto; overflow-x: auto;")
282
 
283
  global_div = Div(
284
  Section(
 
364
  P(
365
  "There is a high chance that duplicates from different bands will have the same pairs in the same horizontal partition. Performing the Bloom filter step reduces the number of pairs by nearly ninefold."
366
  ),
367
+ Div(NotStr(dedup_pairs_bands()), style="display: flex; justify-content: center; align-items: center; width: 100%; max-width: 100%; height: auto; overflow-x: auto;"),
368
  P(
369
  "The resulting unique pairs are then used to identify clusters of near-duplicates by finding connected components in a graph, where the vertices represent documents and the edges represent matches."
370
  ),