File size: 6,189 Bytes
4b89d6b
 
ea7f5b6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ee305a4
ea7f5b6
960f419
92afc5b
 
ee305a4
 
 
ea7f5b6
 
 
ee305a4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ea7f5b6
960f419
 
 
ea7f5b6
 
 
 
 
 
 
 
 
92afc5b
ea7f5b6
 
ee305a4
 
 
 
 
960f419
ea7f5b6
ee305a4
 
 
 
 
 
ea7f5b6
ee305a4
ea7f5b6
ee305a4
ea7f5b6
 
ee305a4
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
import nltk
nltk.download('stopwords')
from transformers import AutoTokenizer
from transformers import AutoModelForSeq2SeqLM
import plotly.graph_objs as go
import textwrap
from transformers import pipeline
import re
import requests
from PIL import Image
import itertools
import numpy as np
import matplotlib.pyplot as plt
import matplotlib
from matplotlib.colors import ListedColormap, rgb2hex
import ipywidgets as widgets
from IPython.display import display, HTML
import pandas as pd
from pprint import pprint
from tenacity import retry
from tqdm import tqdm
from transformers import GPT2LMHeadModel
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, AutoModelForMaskedLM
import random
from nltk.corpus import stopwords
from termcolor import colored
from nltk.translate.bleu_score import sentence_bleu
from transformers import BertTokenizer, BertModel
import gradio as gr
from tree import generate_subplot
from paraphraser import generate_paraphrase
from lcs import find_common_subsequences
from highlighter import highlight_common_words, highlight_common_words_dict
from entailment import analyze_entailment
from masking_methods import mask_non_stopword, mask_non_stopword_pseudorandom, high_entropy_words
from sampling_methods import sample_word


# Function for the Gradio interface
def model(prompt):
    user_prompt = prompt
    paraphrased_sentences = generate_paraphrase(user_prompt)
    analyzed_paraphrased_sentences, selected_sentences, discarded_sentences = analyze_entailment(user_prompt, paraphrased_sentences, 0.7)
    length_accepted_sentences = len(selected_sentences)
    common_grams = find_common_subsequences(user_prompt, selected_sentences)

    masked_sentences = []
    masked_words = []
    masked_logits = []
    selected_sentences_list = list(selected_sentences.keys())

    for sentence in selected_sentences_list:
        # Mask non-stopword
        masked_sent, logits, words = mask_non_stopword(sentence)
        masked_sentences.append(masked_sent)
        masked_words.append(words)
        masked_logits.append(logits)
        
        # Mask non-stopword pseudorandom
        masked_sent, logits, words = mask_non_stopword_pseudorandom(sentence)
        masked_sentences.append(masked_sent)
        masked_words.append(words)
        masked_logits.append(logits)
        
        # High entropy words
        masked_sent, logits, words = high_entropy_words(sentence, common_grams)
        masked_sentences.append(masked_sent)
        masked_words.append(words)
        masked_logits.append(logits)

    sampled_sentences = []
    for masked_sent, words, logits in zip(masked_sentences, masked_words, masked_logits):
        sampled_sentences.append(sample_word(masked_sent, words, logits, sampling_technique='inverse_transform', temperature=1.0))
        sampled_sentences.append(sample_word(masked_sent, words, logits, sampling_technique='exponential_minimum', temperature=1.0))
        sampled_sentences.append(sample_word(masked_sent, words, logits, sampling_technique='temperature', temperature=1.0))
        sampled_sentences.append(sample_word(masked_sent, words, logits, sampling_technique='greedy', temperature=1.0))

    # Predefined set of colors that are visible on a white background, excluding black
    colors = ["red", "blue", "brown", "green"]

    # Function to generate color from predefined set
    def select_color():
        return random.choice(colors)

    # Create highlight_info with selected colors
    highlight_info = [(word, select_color()) for _, word in common_grams]


    highlighted_user_prompt = highlight_common_words(common_grams, [user_prompt], "User Prompt (Highlighted and Numbered)")
    highlighted_accepted_sentences = highlight_common_words_dict(common_grams, selected_sentences, "Paraphrased Sentences")
    highlighted_discarded_sentences = highlight_common_words_dict(common_grams, discarded_sentences, "Discarded Sentences")

    # Initialize empty list to hold the trees
    trees = []

    # Initialize the indices for masked and sampled sentences
    masked_index = 0
    sampled_index = 0

    for i, sentence in enumerate(selected_sentences):
        # Generate the sublists of masked and sampled sentences based on current indices
        next_masked_sentences = masked_sentences[masked_index:masked_index + 3]
        next_sampled_sentences = sampled_sentences[sampled_index:sampled_index + 12]
        
        # Create the tree for the current sentence
        tree = generate_subplot(sentence, next_masked_sentences, next_sampled_sentences, highlight_info)
        trees.append(tree)
        
        # Update the indices for the next iteration
        masked_index += 3
        sampled_index += 12


    # Return all the outputs together
    return [highlighted_user_prompt, highlighted_accepted_sentences, highlighted_discarded_sentences] + trees


with gr.Blocks(theme=gr.themes.Monochrome()) as demo:
    gr.Markdown("# **AIISC Watermarking Model**")

    with gr.Row():
        user_input = gr.Textbox(label="User Prompt")

    with gr.Row():
        submit_button = gr.Button("Submit")
        clear_button = gr.Button("Clear")

    with gr.Row():
        highlighted_user_prompt = gr.HTML()

    with gr.Row():
        with gr.Tabs():
            with gr.TabItem("Paraphrased Sentences"):
                highlighted_accepted_sentences = gr.HTML()
            with gr.TabItem("Discarded Sentences"):
                highlighted_discarded_sentences = gr.HTML()

    with gr.Row():
        with gr.Tabs():
            tree_tabs = []
            for i in range(3):  # Adjust this range according to the number of trees
                with gr.TabItem(f"Tree {i+1}"):
                    tree = gr.Plot()
                    tree_tabs.append(tree)

    submit_button.click(model, inputs=user_input, outputs=[highlighted_user_prompt, highlighted_accepted_sentences, highlighted_discarded_sentences] + tree_tabs)
    clear_button.click(lambda: "", inputs=None, outputs=user_input)
    clear_button.click(lambda: "", inputs=None, outputs=[highlighted_user_prompt, highlighted_accepted_sentences, highlighted_discarded_sentences] + tree_tabs)

# Launch the demo
demo.launch(share=True)