Spaces:

gabrielanicole
/

MNV-beam_search

Sleeping

App Files Files Community

Gabriela Nicole Gonzalez Saez commited on Feb 18, 2024

Commit

e4bccbf

1 Parent(s): 9e85aff

topk

Browse files

Files changed (2) hide show

app.py +35 -7
plotsjs.js +140 -4

app.py CHANGED Viewed

@@ -16,8 +16,6 @@ from functools import partial
 from transformers import AutoTokenizer, MarianTokenizer, AutoModel, AutoModelForSeq2SeqLM, MarianMTModel
 model_es = "Helsinki-NLP/opus-mt-en-es"
 model_fr = "Helsinki-NLP/opus-mt-en-fr"
 model_zh = "Helsinki-NLP/opus-mt-en-zh"
@@ -75,6 +73,28 @@ contrastive_examples = [
 	]
 def split_token_from_sequences(sequences, model) -> dict :
 	n_sentences = len(sequences)
@@ -138,7 +158,8 @@ def split_token_from_sequences(sequences, model) -> dict :
 	return dict_parent
-import gradio as gr
 html = """
 <html>
@@ -149,9 +170,13 @@ html = """
     <p id="viz"></p>
     <p id="demo2"></p>
-    <div id="d3_beam_search"></div>
   </body>
 </html>
@@ -175,16 +200,19 @@ def sentence_maker(w1, model, var2={}):
   beam_dict = split_token_from_sequences(translated.sequences,model )
   tgt_text = dict_tokenizer_tr[model].decode(translated.sequences[0], skip_special_tokens=True)
-  return [tgt_text,beam_dict]
 def sentence_maker2(w1,j2):
-  #  json_value = {'one':1}
-  #  return f"{w1['two']} in sentence22..."
    print(w1,j2)
    return "in sentence22..."
 with gr.Blocks(js="plotsjs.js") as demo:
 	gr.Markdown(
 	"""

 from transformers import AutoTokenizer, MarianTokenizer, AutoModel, AutoModelForSeq2SeqLM, MarianMTModel
 model_es = "Helsinki-NLP/opus-mt-en-es"
 model_fr = "Helsinki-NLP/opus-mt-en-fr"
 model_zh = "Helsinki-NLP/opus-mt-en-zh"
 	]
+def get_k_prob_tokens(transition_scores, result, model, k_values=5):
+	tokenizer_tr = dict_tokenizer_tr[model]
+	gen_sequences = result.sequences[:, 1:]
+	result_output = []
+	# bs_alt = []
+	# bs_alt_scores = []
+	# First beam only...
+	bs = 0
+	text = ' '
+	for tok, score, i_step in zip(gen_sequences[bs], transition_scores[bs],range(len(gen_sequences[bs]))):
+		# bs_alt.append([tokenizer_tr.decode(tok) for tok in result.scores[i_step][bs].topk(k_values).indices ] )
+		# bs_alt_scores.append(np.exp(result.scores[i_step][bs].topk(k_values).values))
+		bs_alt = [tokenizer_tr.decode(tok) for tok in result.scores[i_step][bs].topk(k_values).indices ]
+		bs_alt_scores = np.exp(result.scores[i_step][bs].topk(k_values).values)
+		result_output.append([np.array(result.scores[i_step][bs].topk(k_values).indices), np.array(bs_alt_scores),bs_alt])
+	return result_output
 def split_token_from_sequences(sequences, model) -> dict :
 	n_sentences = len(sequences)
 	return dict_parent
 html = """
 <html>
     <p id="viz"></p>
     <p id="demo2"></p>
+    <h4> Exploring top-k probable tokens </h4>
+    <div id="d3_text_grid">... top 10 tokens generated at each step ...</div>
+    <h4> Exploring the Beam Search sequence generation</h4>
+    <div id="d3_beam_search">... top 4 generated sequences using Beam Search...</div>
   </body>
 </html>
   beam_dict = split_token_from_sequences(translated.sequences,model )
   tgt_text = dict_tokenizer_tr[model].decode(translated.sequences[0], skip_special_tokens=True)
+  transition_scores = dict_models_tr[model].compute_transition_scores(
+	translated.sequences, translated.scores, translated.beam_indices  , normalize_logits=True
+	)
+  prob_tokens = get_k_prob_tokens(transition_scores, translated, model, k_values=10)
+  return [tgt_text,[beam_dict,prob_tokens]]
 def sentence_maker2(w1,j2):
    print(w1,j2)
    return "in sentence22..."
 with gr.Blocks(js="plotsjs.js") as demo:
 	gr.Markdown(
 	"""

plotsjs.js CHANGED Viewed

@@ -41,20 +41,24 @@ async () => {
 	  globalThis.testFn_out_json = (data) => {
-		const idMapping = data.reduce((acc, el, i) => {
 		acc[el.id] = i;
 		return acc;
 		}, {});
 		let root;
-		data.forEach(el => {
 		// Handle the root element
 		if (el.parentId === null) {
 			root = el;
 			return;
 		}
-		// Use our mapping to locate the parent element in our data array
-		const parentEl = data[idMapping[el.parentId]];
 		// Add our current el to its parent's `children` array
 		parentEl.children = [...(parentEl.children || []), el];
 		});
@@ -63,6 +67,14 @@ async () => {
 		// document.getElementById('d3_beam_search').innerHTML = Tree(root)
 		d3.select('#d3_beam_search').html("");
 		d3.select('#d3_beam_search').append(function(){return  Tree(root);});
 		// $('#d3_beam_search').html(Tree(root)) ;
 		return(['string', {}])
@@ -206,6 +218,130 @@ function Tree(data, { // data is either tabular (array of objects) or hierarchy
 	return svg.node();
   }

 	  globalThis.testFn_out_json = (data) => {
+		console.log(data);
+		data_beam = data[0];
+		data_probs = data[1];
+		const idMapping = data_beam.reduce((acc, el, i) => {
 		acc[el.id] = i;
 		return acc;
 		}, {});
 		let root;
+		data_beam.forEach(el => {
 		// Handle the root element
 		if (el.parentId === null) {
 			root = el;
 			return;
 		}
+		// Use our mapping to locate the parent element in our data_beam array
+		const parentEl = data_beam[idMapping[el.parentId]];
 		// Add our current el to its parent's `children` array
 		parentEl.children = [...(parentEl.children || []), el];
 		});
 		// document.getElementById('d3_beam_search').innerHTML = Tree(root)
 		d3.select('#d3_beam_search').html("");
 		d3.select('#d3_beam_search').append(function(){return  Tree(root);});
+		//probabilities;
+		//
+		d3.select('#d3_text_grid').html("");
+		d3.select('#d3_text_grid').append(function(){return  TextGrid(data_probs);});
+		// $('#d3_text_grid').html(TextGrid(data)) ;
 		// $('#d3_beam_search').html(Tree(root)) ;
 		return(['string', {}])
 	return svg.node();
   }
+  function TextGrid(data, div_name, {
+	width = 640, // outer width, in pixels
+	height , // outer height, in pixels
+	r = 3, // radius of nodes
+	padding = 1, // horizontal padding for first and last column
+	// text = d => d[2],
+} = {}){
+	// console.log("TextGrid", data);
+	// Compute the layout.
+	const dx = 10;
+	const dy = 10;  //width / (root.height + padding);
+	const marginTop = 20;
+	const marginRight = 20;
+	const marginBottom = 30;
+	const marginLeft = 30;
+	// Center the tree.
+	let x0 = Infinity;
+	let x1 = -x0;
+	topk = 10;
+	word_length = 20;
+	const rectWidth = 60;
+	const rectTotal = 70;
+	wval = 0
+	const realWidth = rectTotal * data.length
+	const totalWidth = (realWidth > width) ? realWidth : width;
+	// root.each(d => {
+	// 	if (d.x > x1) x1 = d.x;
+	// 	if (d.x < x0) x0 = d.x;
+	// });
+	// Compute the default height.
+	// if (height === undefined) height = x1 - x0 + dx * 2;
+	if (height === undefined) height = topk * word_length + 10;
+	const parent = d3.create("div");
+	// parent.append("svg")
+    //   .attr("width", width)
+    //   .attr("height", height)
+    //   .style("position", "absolute")
+    //   .style("pointer-events", "none")
+    //   .style("z-index", 1);
+	// const svg = d3.create("svg")
+	// // svg = parent.append("svg")
+	// 	.attr("viewBox", [-dy * padding / 2, x0 - dx, width, height])
+	// 	.attr("width", width)
+	// 	.attr("height", height)
+	// 	.attr("style", "max-width: 100%; height: auto; height: intrinsic;")
+	// 	.attr("font-family", "sans-serif")
+	// 	.attr("font-size", 10);
+	// div.data([1, 2, 4, 8, 16, 32], d => d);
+	// div.enter().append("div").text(d => d);
+	const body = parent.append("div")
+	.style("overflow-x", "scroll")
+	.style("-webkit-overflow-scrolling", "touch");
+	const svg = body.append("svg")
+		.attr("width", totalWidth)
+		.attr("height", height)
+		.style("display", "block")
+		.attr("font-family", "sans-serif")
+		.attr("font-size", 10);
+		data.forEach(words_list => {
+			// console.log(wval, words_list);
+			words = words_list[2]; // {'t': words_list[2], 'p': words_list[1]};
+			scores =  words_list[1];
+			words_score = words.map( (x,i) => {return {t: x, p: scores[i]}})
+			// console.log(words_score);
+			// svg.selectAll("text").enter()
+			// 	.data(words)
+			// 	.join("text")
+			// 	.text((d,i) => (d))
+			// 	.attr("x", wval)
+			// 	.attr("y", ((d,i) => (20 + i*20)))
+			var probs = svg.selectAll("text").enter()
+				.data(words_score).join('g');
+			  probs.append("rect")
+				// .data(words)
+				.attr("x", wval)
+				.attr("y", ((d,i) => ( 10+ i*20)))
+				.attr('width', rectWidth)
+				.attr('height', 15)
+				.attr("color", 'gray')
+				.attr("fill", "gray")
+				// .attr("fill-opacity", "0.2")
+				.attr("fill-opacity", (d) => (d.p))
+				.attr("stroke-opacity", 0.8)
+				.append("svg:title")
+				.text(function(d){return d.t+":"+d.p;});
+			probs.append("text")
+				// .data(words)
+				.text((d,i) => (d.t))
+				.attr("x", wval)
+				.attr("y", ((d,i) => (20 + i*20)))
+				// .attr("fill", 'white')
+				.attr("font-weight", 700);
+			wval = wval + rectTotal;
+			});
+	body.node().scrollBy(totalWidth, 0);
+	// return svg.node();
+	return parent.node();
+  }