Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
@@ -130,7 +130,7 @@ STYLE = """
|
|
130 |
margin-top: -5px;
|
131 |
transform: rotate(315deg);
|
132 |
}
|
133 |
-
.
|
134 |
border: 1px solid var(--body-text-color);
|
135 |
padding: 5px;
|
136 |
border-radius: 5px;
|
@@ -141,19 +141,18 @@ STYLE = """
|
|
141 |
align-items: center;
|
142 |
justify-content: space-between;
|
143 |
overflow: hidden;
|
144 |
-
cursor: pointer;
|
145 |
}
|
146 |
-
.
|
147 |
padding: 5px;
|
148 |
font-size: 12px;
|
149 |
letter-spacing: 1px;
|
150 |
font-weight: 500;
|
151 |
}
|
152 |
/*Hover-Section*/
|
153 |
-
.
|
154 |
background: var(--primary-500);
|
155 |
}
|
156 |
-
.
|
157 |
border-color: var(--primary-500);
|
158 |
}
|
159 |
.chosen-token {
|
@@ -175,9 +174,6 @@ STYLE = """
|
|
175 |
.nonselected-sequence {
|
176 |
background-color: var(--primary-500);
|
177 |
}
|
178 |
-
.nomargin {
|
179 |
-
padding-left: 0!important;
|
180 |
-
}
|
181 |
"""
|
182 |
|
183 |
|
@@ -220,14 +216,14 @@ def generate_nodes(node, step):
|
|
220 |
selected_class = "selected-sequence"
|
221 |
else:
|
222 |
selected_class = "nonselected-sequence"
|
223 |
-
return f"<li> <
|
224 |
|
225 |
html_content = (
|
226 |
-
f"<li> <
|
227 |
)
|
228 |
if node.table is not None:
|
229 |
html_content += node.table
|
230 |
-
html_content += "</
|
231 |
|
232 |
if len(node.children.keys()) > 0:
|
233 |
html_content += "<ul> "
|
@@ -241,15 +237,16 @@ def generate_nodes(node, step):
|
|
241 |
|
242 |
def generate_html(start_sentence, original_tree):
|
243 |
html_output = f"""<div class="custom-container">
|
244 |
-
<div class="tree">
|
245 |
-
<
|
246 |
html_output += "<ul> "
|
247 |
for subnode in original_tree.children.values():
|
248 |
html_output += generate_nodes(subnode, step=1)
|
249 |
html_output += "</ul>"
|
250 |
html_output += """
|
251 |
-
</li
|
252 |
-
|
|
|
253 |
"""
|
254 |
return html_output
|
255 |
|
@@ -272,7 +269,7 @@ class BeamNode:
|
|
272 |
is_selected_sequence: bool
|
273 |
|
274 |
|
275 |
-
def generate_beams(start_sentence, scores, length_penalty, decoded_sequences):
|
276 |
input_length = len(tokenizer([start_sentence], return_tensors="pt"))
|
277 |
original_tree = BeamNode(
|
278 |
cumulative_score=0,
|
@@ -289,6 +286,8 @@ def generate_beams(start_sentence, scores, length_penalty, decoded_sequences):
|
|
289 |
beam_trees = [original_tree] * n_beams
|
290 |
|
291 |
for step, step_scores in enumerate(scores):
|
|
|
|
|
292 |
(
|
293 |
top_token_indexes,
|
294 |
top_cumulative_scores,
|
@@ -296,7 +295,7 @@ def generate_beams(start_sentence, scores, length_penalty, decoded_sequences):
|
|
296 |
current_sequence,
|
297 |
top_tokens,
|
298 |
) = ([], [], [], [], [])
|
299 |
-
for beam_ix in range(n_beams):
|
300 |
current_beam = beam_trees[beam_ix]
|
301 |
|
302 |
# skip if the beam is already final
|
@@ -316,7 +315,6 @@ def generate_beams(start_sentence, scores, length_penalty, decoded_sequences):
|
|
316 |
current_sequence += [beam_trees[beam_ix].current_sequence] * n_beams
|
317 |
top_tokens += [tokenizer.decode([el]) for el in current_top_token_indexes]
|
318 |
|
319 |
-
|
320 |
top_df = pd.DataFrame.from_dict(
|
321 |
{
|
322 |
"token_index": top_token_indexes,
|
@@ -336,6 +334,9 @@ def generate_beams(start_sentence, scores, length_penalty, decoded_sequences):
|
|
336 |
top_df_selected = top_df.sort_values("cumulative_score", ascending=False).iloc[
|
337 |
:n_beams
|
338 |
]
|
|
|
|
|
|
|
339 |
|
340 |
# Write the scores table - one per beam source
|
341 |
for beam_ix in reversed(list(range(n_beams))):
|
@@ -352,14 +353,13 @@ def generate_beams(start_sentence, scores, length_penalty, decoded_sequences):
|
|
352 |
)
|
353 |
beam_trees[beam_ix].table = markdown_table
|
354 |
|
355 |
-
# Add new children
|
356 |
cumulative_scores = [beam.cumulative_score for beam in beam_trees]
|
357 |
-
for
|
358 |
-
current_token_choice_ix = top_df_selected.iloc[beam_ix]["token_index"]
|
359 |
-
current_token_choice = tokenizer.decode([current_token_choice_ix])
|
360 |
-
|
361 |
# Update the source tree
|
362 |
-
source_beam_ix = int(
|
|
|
|
|
363 |
|
364 |
cumulative_score = (
|
365 |
cumulative_scores[source_beam_ix]
|
@@ -368,6 +368,9 @@ def generate_beams(start_sentence, scores, length_penalty, decoded_sequences):
|
|
368 |
current_sequence = (
|
369 |
beam_trees[source_beam_ix].current_sequence + current_token_choice
|
370 |
)
|
|
|
|
|
|
|
371 |
beam_trees[source_beam_ix].children[current_token_choice_ix] = BeamNode(
|
372 |
current_token_ix=current_token_choice_ix,
|
373 |
table=None,
|
@@ -387,7 +390,8 @@ def generate_beams(start_sentence, scores, length_penalty, decoded_sequences):
|
|
387 |
),
|
388 |
)
|
389 |
|
390 |
-
|
|
|
391 |
beam_trees = [
|
392 |
beam_trees[int(top_df_selected.iloc[beam_ix]["beam_index"])]
|
393 |
for beam_ix in range(n_beams)
|
@@ -400,7 +404,6 @@ def generate_beams(start_sentence, scores, length_penalty, decoded_sequences):
|
|
400 |
|
401 |
return original_tree
|
402 |
|
403 |
-
|
404 |
@spaces.GPU
|
405 |
def get_beam_search_html(
|
406 |
input_text, number_steps, number_beams, length_penalty, num_return_sequences
|
@@ -432,6 +435,7 @@ def get_beam_search_html(
|
|
432 |
outputs.scores[:],
|
433 |
length_penalty,
|
434 |
decoded_sequences,
|
|
|
435 |
)
|
436 |
html = generate_html(input_text, original_tree)
|
437 |
return html, markdown
|
@@ -466,7 +470,7 @@ This parameter will not impact the beam search paths, but only influence the cho
|
|
466 |
)
|
467 |
text = gr.Textbox(
|
468 |
label="Sentence to decode from",
|
469 |
-
value="Conclusion: thanks a lot.
|
470 |
)
|
471 |
with gr.Row():
|
472 |
n_steps = gr.Slider(
|
|
|
130 |
margin-top: -5px;
|
131 |
transform: rotate(315deg);
|
132 |
}
|
133 |
+
.tree li a {
|
134 |
border: 1px solid var(--body-text-color);
|
135 |
padding: 5px;
|
136 |
border-radius: 5px;
|
|
|
141 |
align-items: center;
|
142 |
justify-content: space-between;
|
143 |
overflow: hidden;
|
|
|
144 |
}
|
145 |
+
.tree li a span {
|
146 |
padding: 5px;
|
147 |
font-size: 12px;
|
148 |
letter-spacing: 1px;
|
149 |
font-weight: 500;
|
150 |
}
|
151 |
/*Hover-Section*/
|
152 |
+
.tree li a:hover, .tree li a:hover+ul li a {
|
153 |
background: var(--primary-500);
|
154 |
}
|
155 |
+
.tree li a:hover+ul li::after, .tree li a:hover+ul li::before, .tree li a:hover+ul::before, .tree li a:hover+ul ul::before, .tree li a:hover+ul a::before {
|
156 |
border-color: var(--primary-500);
|
157 |
}
|
158 |
.chosen-token {
|
|
|
174 |
.nonselected-sequence {
|
175 |
background-color: var(--primary-500);
|
176 |
}
|
|
|
|
|
|
|
177 |
"""
|
178 |
|
179 |
|
|
|
216 |
selected_class = "selected-sequence"
|
217 |
else:
|
218 |
selected_class = "nonselected-sequence"
|
219 |
+
return f"<li> <a href='#' class='end-of-text child {selected_class}'> <span> <b>{clean(token)}</b> <br>Total score: {node.total_score:.2f}</span> </a> </li>"
|
220 |
|
221 |
html_content = (
|
222 |
+
f"<li> <a href='#' class='nonfinal child'> <span> <b>{clean(token)}</b> </span>"
|
223 |
)
|
224 |
if node.table is not None:
|
225 |
html_content += node.table
|
226 |
+
html_content += "</a>"
|
227 |
|
228 |
if len(node.children.keys()) > 0:
|
229 |
html_content += "<ul> "
|
|
|
237 |
|
238 |
def generate_html(start_sentence, original_tree):
|
239 |
html_output = f"""<div class="custom-container">
|
240 |
+
<div class="tree">
|
241 |
+
<ul> <li> <a href='#' id='root'> <span> <b>{start_sentence}</b> </span> {original_tree.table} </a>"""
|
242 |
html_output += "<ul> "
|
243 |
for subnode in original_tree.children.values():
|
244 |
html_output += generate_nodes(subnode, step=1)
|
245 |
html_output += "</ul>"
|
246 |
html_output += """
|
247 |
+
</li> </ul>
|
248 |
+
</div>
|
249 |
+
</body>
|
250 |
"""
|
251 |
return html_output
|
252 |
|
|
|
269 |
is_selected_sequence: bool
|
270 |
|
271 |
|
272 |
+
def generate_beams(start_sentence, scores, length_penalty, decoded_sequences, beam_indexes_source):
|
273 |
input_length = len(tokenizer([start_sentence], return_tensors="pt"))
|
274 |
original_tree = BeamNode(
|
275 |
cumulative_score=0,
|
|
|
286 |
beam_trees = [original_tree] * n_beams
|
287 |
|
288 |
for step, step_scores in enumerate(scores):
|
289 |
+
|
290 |
+
# Gather all possible descendants for each beam
|
291 |
(
|
292 |
top_token_indexes,
|
293 |
top_cumulative_scores,
|
|
|
295 |
current_sequence,
|
296 |
top_tokens,
|
297 |
) = ([], [], [], [], [])
|
298 |
+
for beam_ix in range(n_beams):
|
299 |
current_beam = beam_trees[beam_ix]
|
300 |
|
301 |
# skip if the beam is already final
|
|
|
315 |
current_sequence += [beam_trees[beam_ix].current_sequence] * n_beams
|
316 |
top_tokens += [tokenizer.decode([el]) for el in current_top_token_indexes]
|
317 |
|
|
|
318 |
top_df = pd.DataFrame.from_dict(
|
319 |
{
|
320 |
"token_index": top_token_indexes,
|
|
|
334 |
top_df_selected = top_df.sort_values("cumulative_score", ascending=False).iloc[
|
335 |
:n_beams
|
336 |
]
|
337 |
+
if any(["you enjoyed" in el for el in top_df["current_sequence"]]):
|
338 |
+
print("Displaying debug info:::")
|
339 |
+
display(top_df_selected)
|
340 |
|
341 |
# Write the scores table - one per beam source
|
342 |
for beam_ix in reversed(list(range(n_beams))):
|
|
|
353 |
)
|
354 |
beam_trees[beam_ix].table = markdown_table
|
355 |
|
356 |
+
# Add new children to each beam
|
357 |
cumulative_scores = [beam.cumulative_score for beam in beam_trees]
|
358 |
+
for _, row in top_df_selected.iterrows():
|
|
|
|
|
|
|
359 |
# Update the source tree
|
360 |
+
source_beam_ix = int(row["beam_index"])
|
361 |
+
current_token_choice_ix = row["token_index"]
|
362 |
+
current_token_choice = tokenizer.decode([current_token_choice_ix])
|
363 |
|
364 |
cumulative_score = (
|
365 |
cumulative_scores[source_beam_ix]
|
|
|
368 |
current_sequence = (
|
369 |
beam_trees[source_beam_ix].current_sequence + current_token_choice
|
370 |
)
|
371 |
+
if current_token_choice_ix == 340:
|
372 |
+
print("Found info:")
|
373 |
+
print(f"We generate token '{current_token_choice}', and the total sequence is '{current_sequence}'")
|
374 |
beam_trees[source_beam_ix].children[current_token_choice_ix] = BeamNode(
|
375 |
current_token_ix=current_token_choice_ix,
|
376 |
table=None,
|
|
|
390 |
),
|
391 |
)
|
392 |
|
393 |
+
|
394 |
+
# Swap all beams by descending cumul score, so that n°1 has the highest cumulative score, and so on
|
395 |
beam_trees = [
|
396 |
beam_trees[int(top_df_selected.iloc[beam_ix]["beam_index"])]
|
397 |
for beam_ix in range(n_beams)
|
|
|
404 |
|
405 |
return original_tree
|
406 |
|
|
|
407 |
@spaces.GPU
|
408 |
def get_beam_search_html(
|
409 |
input_text, number_steps, number_beams, length_penalty, num_return_sequences
|
|
|
435 |
outputs.scores[:],
|
436 |
length_penalty,
|
437 |
decoded_sequences,
|
438 |
+
outputs.beam_indices,
|
439 |
)
|
440 |
html = generate_html(input_text, original_tree)
|
441 |
return html, markdown
|
|
|
470 |
)
|
471 |
text = gr.Textbox(
|
472 |
label="Sentence to decode from",
|
473 |
+
value="Conclusion: thanks a lot. That's all for today",
|
474 |
)
|
475 |
with gr.Row():
|
476 |
n_steps = gr.Slider(
|