Konstantin
commited on
Commit
·
89b186a
1
Parent(s):
bd8327d
Remove subtoken indicators ('##') in token attribution
Browse files
app.py
CHANGED
@@ -62,11 +62,17 @@ toxicity_pipeline, cls_explainer = load_pipeline()
|
|
62 |
# Auxiliary functions
|
63 |
def format_explainer_html(html_string):
|
64 |
"""Extract tokens with attribution-based background color."""
|
|
|
65 |
soup = BeautifulSoup(html_string, 'html.parser')
|
66 |
p = soup.new_tag('p')
|
|
|
67 |
# Select token elements and remove model specific tokens
|
68 |
for token in soup.find_all('td')[-1].find_all('mark')[1:-1]:
|
69 |
-
|
|
|
|
|
|
|
|
|
70 |
return p.prettify()
|
71 |
|
72 |
|
|
|
62 |
# Auxiliary functions
|
63 |
def format_explainer_html(html_string):
|
64 |
"""Extract tokens with attribution-based background color."""
|
65 |
+
inside_token_prefix = '##'
|
66 |
soup = BeautifulSoup(html_string, 'html.parser')
|
67 |
p = soup.new_tag('p')
|
68 |
+
p.append(soup.new_tag('font', attrs={'color': 'black'}))
|
69 |
# Select token elements and remove model specific tokens
|
70 |
for token in soup.find_all('td')[-1].find_all('mark')[1:-1]:
|
71 |
+
text = token.font.text.strip()
|
72 |
+
if text.startswith(inside_token_prefix):
|
73 |
+
text = text[len(inside_token_prefix):]
|
74 |
+
token.string = text
|
75 |
+
p.font.append(token)
|
76 |
return p.prettify()
|
77 |
|
78 |
|