Spaces:

B-patents
/

patent-bert

Build error

App Files Files Community

danseith commited on Feb 26, 2023

Commit

616c8c6

1 Parent(s): a95bc58

Added warning to single edit and added list of words to not substitute.

Browse files

Files changed (1) hide show

app.py +23 -15

app.py CHANGED Viewed

@@ -28,7 +28,7 @@ tab_one_examples = [['A crustless _ made from two slices of baked bread.'],
                     ]
-def add_mask(text, size=1):
     split_text = text.split()
     # If the user supplies a mask, don't add more
@@ -36,13 +36,20 @@ def add_mask(text, size=1):
         u_pos = [i for i, s in enumerate(split_text) if '_' in s][0]
         split_text[u_pos] = '[MASK]'
         return ' '.join(split_text), '[MASK]'
-    idx = np.random.randint(len(split_text), size=size)
-    masked_strings = []
-    for i in idx:
-        masked_strings.append(split_text[i])
-        split_text[i] = '[MASK]'
     masked_output = ' '.join(split_text)
-    return masked_output, masked_strings
 class TempScalePipe(FillMaskPipeline):
@@ -154,8 +161,7 @@ def sample_output(out, sampling):
 def unmask_single(text, temp=1):
-    tp = add_mask(text, size=1)
-    masked_text, masked = tp[0], tp[1]
     res = scrambler(masked_text, temp=temp, top_k=10)
     out = {item["token_str"]: item["score"] for item in res}
     return out
@@ -164,21 +170,20 @@ def unmask_single(text, temp=1):
 def unmask(text, temp, rounds):
     sampling = 'multi'
     for _ in range(rounds):
-        tp = add_mask(text, size=1)
-        masked_text, masked = tp[0], tp[1]
         split_text = masked_text.split()
         res = scrambler(masked_text, temp=temp, top_k=15)
         mask_pos = [i for i, t in enumerate(split_text) if 'MASK' in t][0]
         out = {item["token_str"]: item["score"] for item in res}
         new_token = sample_output(out, sampling)
         unsuccessful_iters = 0
-        while new_token == masked[0]:
             if unsuccessful_iters > 5:
                 break
             print('skipped', new_token)
             new_token = sample_output(out, sampling=sampling)
             unsuccessful_iters += 1
-        if new_token == masked[0]:
             split_text[mask_pos] = new_token
         else:
             split_text[mask_pos] = '*' + new_token + '*'
@@ -188,6 +193,7 @@ def unmask(text, temp, rounds):
     text[0] = text[0].upper()
     return ''.join(text)
 textbox1 = gr.Textbox(label="Input Sentence", lines=5)
 output_textbox1 = gr.Textbox(placeholder="Output will appear here", lines=4)
@@ -197,13 +203,15 @@ temp_slider2 = gr.Slider(1.0, 3.0, value=1.0, label='Creativity')
 edit_slider2 = gr.Slider(1, 20, step=1, value=1.0, label='Number of edits')
 title1 = "Patent-BERT Sentence Remix-er: Single Edit"
-description1 = """<p>Try inserting a '_' where you want the model to generate a list of likely words.
 <br/>
 <p/>"""
 title2 = "Patent-BERT Sentence Remix-er: Multiple Edits"
 description2 = """<p>Try typing in a sentence for the model to remix. Adjust the 'creativity' scale bar to change the
 the model's confidence in its likely substitutions and the 'number of edits' for the number of edits you want
-the model to attempt to make. <br/> <p/> """
 demo1 = gr.Interface(
     fn=unmask_single,

                     ]
+def add_mask(text):
     split_text = text.split()
     # If the user supplies a mask, don't add more
         u_pos = [i for i, s in enumerate(split_text) if '_' in s][0]
         split_text[u_pos] = '[MASK]'
         return ' '.join(split_text), '[MASK]'
+    idx = np.random.randint(len(split_text), size=1).astype(int)[0]
+    # Don't mask certain words
+    num_iters = 0
+    while split_text[idx].lower() in ['a', 'an', 'the', 'is', 'and', 'or']:
+        num_iters += 1
+        idx = np.random.randint(len(split_text), size=1).astype(int)[0]
+        if num_iters > 10:
+            break
+    masked_string = split_text[idx]
+    split_text[idx] = '[MASK]'
     masked_output = ' '.join(split_text)
+    return masked_output, masked_string
 class TempScalePipe(FillMaskPipeline):
 def unmask_single(text, temp=1):
+    masked_text, _ = add_mask(text)
     res = scrambler(masked_text, temp=temp, top_k=10)
     out = {item["token_str"]: item["score"] for item in res}
     return out
 def unmask(text, temp, rounds):
     sampling = 'multi'
     for _ in range(rounds):
+        masked_text, masked = add_mask(text)
         split_text = masked_text.split()
         res = scrambler(masked_text, temp=temp, top_k=15)
         mask_pos = [i for i, t in enumerate(split_text) if 'MASK' in t][0]
         out = {item["token_str"]: item["score"] for item in res}
         new_token = sample_output(out, sampling)
         unsuccessful_iters = 0
+        while new_token == masked:
             if unsuccessful_iters > 5:
                 break
             print('skipped', new_token)
             new_token = sample_output(out, sampling=sampling)
             unsuccessful_iters += 1
+        if new_token == masked:
             split_text[mask_pos] = new_token
         else:
             split_text[mask_pos] = '*' + new_token + '*'
     text[0] = text[0].upper()
     return ''.join(text)
 textbox1 = gr.Textbox(label="Input Sentence", lines=5)
 output_textbox1 = gr.Textbox(placeholder="Output will appear here", lines=4)
 edit_slider2 = gr.Slider(1, 20, step=1, value=1.0, label='Number of edits')
 title1 = "Patent-BERT Sentence Remix-er: Single Edit"
+description1 = """<p>Try inserting a '_' where you want the model to generate a list of likely words.
+<strong>Note:</strong> You can only add one '_' per submission.
 <br/>
 <p/>"""
 title2 = "Patent-BERT Sentence Remix-er: Multiple Edits"
 description2 = """<p>Try typing in a sentence for the model to remix. Adjust the 'creativity' scale bar to change the
 the model's confidence in its likely substitutions and the 'number of edits' for the number of edits you want
+the model to attempt to make. The words substituted in the output sentence will be enclosed in asterisks (e.g., *word*).
+<br/> <p/> """
 demo1 = gr.Interface(
     fn=unmask_single,