Spaces:
Build error
Build error
danseith
commited on
Commit
•
616c8c6
1
Parent(s):
a95bc58
Added warning to single edit and added list of words to not substitute.
Browse files
app.py
CHANGED
@@ -28,7 +28,7 @@ tab_one_examples = [['A crustless _ made from two slices of baked bread.'],
|
|
28 |
]
|
29 |
|
30 |
|
31 |
-
def add_mask(text
|
32 |
split_text = text.split()
|
33 |
|
34 |
# If the user supplies a mask, don't add more
|
@@ -36,13 +36,20 @@ def add_mask(text, size=1):
|
|
36 |
u_pos = [i for i, s in enumerate(split_text) if '_' in s][0]
|
37 |
split_text[u_pos] = '[MASK]'
|
38 |
return ' '.join(split_text), '[MASK]'
|
39 |
-
|
40 |
-
|
41 |
-
|
42 |
-
|
43 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
44 |
masked_output = ' '.join(split_text)
|
45 |
-
return masked_output,
|
46 |
|
47 |
|
48 |
class TempScalePipe(FillMaskPipeline):
|
@@ -154,8 +161,7 @@ def sample_output(out, sampling):
|
|
154 |
|
155 |
|
156 |
def unmask_single(text, temp=1):
|
157 |
-
|
158 |
-
masked_text, masked = tp[0], tp[1]
|
159 |
res = scrambler(masked_text, temp=temp, top_k=10)
|
160 |
out = {item["token_str"]: item["score"] for item in res}
|
161 |
return out
|
@@ -164,21 +170,20 @@ def unmask_single(text, temp=1):
|
|
164 |
def unmask(text, temp, rounds):
|
165 |
sampling = 'multi'
|
166 |
for _ in range(rounds):
|
167 |
-
|
168 |
-
masked_text, masked = tp[0], tp[1]
|
169 |
split_text = masked_text.split()
|
170 |
res = scrambler(masked_text, temp=temp, top_k=15)
|
171 |
mask_pos = [i for i, t in enumerate(split_text) if 'MASK' in t][0]
|
172 |
out = {item["token_str"]: item["score"] for item in res}
|
173 |
new_token = sample_output(out, sampling)
|
174 |
unsuccessful_iters = 0
|
175 |
-
while new_token == masked
|
176 |
if unsuccessful_iters > 5:
|
177 |
break
|
178 |
print('skipped', new_token)
|
179 |
new_token = sample_output(out, sampling=sampling)
|
180 |
unsuccessful_iters += 1
|
181 |
-
if new_token == masked
|
182 |
split_text[mask_pos] = new_token
|
183 |
else:
|
184 |
split_text[mask_pos] = '*' + new_token + '*'
|
@@ -188,6 +193,7 @@ def unmask(text, temp, rounds):
|
|
188 |
text[0] = text[0].upper()
|
189 |
return ''.join(text)
|
190 |
|
|
|
191 |
textbox1 = gr.Textbox(label="Input Sentence", lines=5)
|
192 |
output_textbox1 = gr.Textbox(placeholder="Output will appear here", lines=4)
|
193 |
|
@@ -197,13 +203,15 @@ temp_slider2 = gr.Slider(1.0, 3.0, value=1.0, label='Creativity')
|
|
197 |
edit_slider2 = gr.Slider(1, 20, step=1, value=1.0, label='Number of edits')
|
198 |
|
199 |
title1 = "Patent-BERT Sentence Remix-er: Single Edit"
|
200 |
-
description1 = """<p>Try inserting a '_' where you want the model to generate a list of likely words.
|
|
|
201 |
<br/>
|
202 |
<p/>"""
|
203 |
title2 = "Patent-BERT Sentence Remix-er: Multiple Edits"
|
204 |
description2 = """<p>Try typing in a sentence for the model to remix. Adjust the 'creativity' scale bar to change the
|
205 |
the model's confidence in its likely substitutions and the 'number of edits' for the number of edits you want
|
206 |
-
the model to attempt to make.
|
|
|
207 |
|
208 |
demo1 = gr.Interface(
|
209 |
fn=unmask_single,
|
|
|
28 |
]
|
29 |
|
30 |
|
31 |
+
def add_mask(text):
|
32 |
split_text = text.split()
|
33 |
|
34 |
# If the user supplies a mask, don't add more
|
|
|
36 |
u_pos = [i for i, s in enumerate(split_text) if '_' in s][0]
|
37 |
split_text[u_pos] = '[MASK]'
|
38 |
return ' '.join(split_text), '[MASK]'
|
39 |
+
|
40 |
+
idx = np.random.randint(len(split_text), size=1).astype(int)[0]
|
41 |
+
# Don't mask certain words
|
42 |
+
num_iters = 0
|
43 |
+
while split_text[idx].lower() in ['a', 'an', 'the', 'is', 'and', 'or']:
|
44 |
+
num_iters += 1
|
45 |
+
idx = np.random.randint(len(split_text), size=1).astype(int)[0]
|
46 |
+
if num_iters > 10:
|
47 |
+
break
|
48 |
+
|
49 |
+
masked_string = split_text[idx]
|
50 |
+
split_text[idx] = '[MASK]'
|
51 |
masked_output = ' '.join(split_text)
|
52 |
+
return masked_output, masked_string
|
53 |
|
54 |
|
55 |
class TempScalePipe(FillMaskPipeline):
|
|
|
161 |
|
162 |
|
163 |
def unmask_single(text, temp=1):
|
164 |
+
masked_text, _ = add_mask(text)
|
|
|
165 |
res = scrambler(masked_text, temp=temp, top_k=10)
|
166 |
out = {item["token_str"]: item["score"] for item in res}
|
167 |
return out
|
|
|
170 |
def unmask(text, temp, rounds):
|
171 |
sampling = 'multi'
|
172 |
for _ in range(rounds):
|
173 |
+
masked_text, masked = add_mask(text)
|
|
|
174 |
split_text = masked_text.split()
|
175 |
res = scrambler(masked_text, temp=temp, top_k=15)
|
176 |
mask_pos = [i for i, t in enumerate(split_text) if 'MASK' in t][0]
|
177 |
out = {item["token_str"]: item["score"] for item in res}
|
178 |
new_token = sample_output(out, sampling)
|
179 |
unsuccessful_iters = 0
|
180 |
+
while new_token == masked:
|
181 |
if unsuccessful_iters > 5:
|
182 |
break
|
183 |
print('skipped', new_token)
|
184 |
new_token = sample_output(out, sampling=sampling)
|
185 |
unsuccessful_iters += 1
|
186 |
+
if new_token == masked:
|
187 |
split_text[mask_pos] = new_token
|
188 |
else:
|
189 |
split_text[mask_pos] = '*' + new_token + '*'
|
|
|
193 |
text[0] = text[0].upper()
|
194 |
return ''.join(text)
|
195 |
|
196 |
+
|
197 |
textbox1 = gr.Textbox(label="Input Sentence", lines=5)
|
198 |
output_textbox1 = gr.Textbox(placeholder="Output will appear here", lines=4)
|
199 |
|
|
|
203 |
edit_slider2 = gr.Slider(1, 20, step=1, value=1.0, label='Number of edits')
|
204 |
|
205 |
title1 = "Patent-BERT Sentence Remix-er: Single Edit"
|
206 |
+
description1 = """<p>Try inserting a '_' where you want the model to generate a list of likely words.
|
207 |
+
<strong>Note:</strong> You can only add one '_' per submission.
|
208 |
<br/>
|
209 |
<p/>"""
|
210 |
title2 = "Patent-BERT Sentence Remix-er: Multiple Edits"
|
211 |
description2 = """<p>Try typing in a sentence for the model to remix. Adjust the 'creativity' scale bar to change the
|
212 |
the model's confidence in its likely substitutions and the 'number of edits' for the number of edits you want
|
213 |
+
the model to attempt to make. The words substituted in the output sentence will be enclosed in asterisks (e.g., *word*).
|
214 |
+
<br/> <p/> """
|
215 |
|
216 |
demo1 = gr.Interface(
|
217 |
fn=unmask_single,
|