aiisc-watermarking-modelv3

Sleeping

File size: 6,189 Bytes

import nltk
nltk.download('stopwords')
from transformers import AutoTokenizer
from transformers import AutoModelForSeq2SeqLM
import plotly.graph_objs as go
import textwrap
from transformers import pipeline
import re
import requests
from PIL import Image
import itertools
import numpy as np
import matplotlib.pyplot as plt
import matplotlib
from matplotlib.colors import ListedColormap, rgb2hex
import ipywidgets as widgets
from IPython.display import display, HTML
import pandas as pd
from pprint import pprint
from tenacity import retry
from tqdm import tqdm
from transformers import GPT2LMHeadModel
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, AutoModelForMaskedLM
import random
from nltk.corpus import stopwords
from termcolor import colored
from nltk.translate.bleu_score import sentence_bleu
from transformers import BertTokenizer, BertModel
import gradio as gr
from tree import generate_subplot
from paraphraser import generate_paraphrase
from lcs import find_common_subsequences
from highlighter import highlight_common_words, highlight_common_words_dict
from entailment import analyze_entailment
from masking_methods import mask_non_stopword, mask_non_stopword_pseudorandom, high_entropy_words
from sampling_methods import sample_word


# Function for the Gradio interface
def model(prompt):
    user_prompt = prompt
    paraphrased_sentences = generate_paraphrase(user_prompt)
    analyzed_paraphrased_sentences, selected_sentences, discarded_sentences = analyze_entailment(user_prompt, paraphrased_sentences, 0.7)
    length_accepted_sentences = len(selected_sentences)
    common_grams = find_common_subsequences(user_prompt, selected_sentences)

    masked_sentences = []
    masked_words = []
    masked_logits = []
    selected_sentences_list = list(selected_sentences.keys())

    for sentence in selected_sentences_list:
        # Mask non-stopword
        masked_sent, logits, words = mask_non_stopword(sentence)
        masked_sentences.append(masked_sent)
        masked_words.append(words)
        masked_logits.append(logits)
        
        # Mask non-stopword pseudorandom
        masked_sent, logits, words = mask_non_stopword_pseudorandom(sentence)
        masked_sentences.append(masked_sent)
        masked_words.append(words)
        masked_logits.append(logits)
        
        # High entropy words
        masked_sent, logits, words = high_entropy_words(sentence, common_grams)
        masked_sentences.append(masked_sent)
        masked_words.append(words)
        masked_logits.append(logits)

    sampled_sentences = []
    for masked_sent, words, logits in zip(masked_sentences, masked_words, masked_logits):
        sampled_sentences.append(sample_word(masked_sent, words, logits, sampling_technique='inverse_transform', temperature=1.0))
        sampled_sentences.append(sample_word(masked_sent, words, logits, sampling_technique='exponential_minimum', temperature=1.0))
        sampled_sentences.append(sample_word(masked_sent, words, logits, sampling_technique='temperature', temperature=1.0))
        sampled_sentences.append(sample_word(masked_sent, words, logits, sampling_technique='greedy', temperature=1.0))

    # Predefined set of colors that are visible on a white background, excluding black
    colors = ["red", "blue", "brown", "green"]

    # Function to generate color from predefined set
    def select_color():
        return random.choice(colors)

    # Create highlight_info with selected colors
    highlight_info = [(word, select_color()) for _, word in common_grams]


    highlighted_user_prompt = highlight_common_words(common_grams, [user_prompt], "User Prompt (Highlighted and Numbered)")
    highlighted_accepted_sentences = highlight_common_words_dict(common_grams, selected_sentences, "Paraphrased Sentences")
    highlighted_discarded_sentences = highlight_common_words_dict(common_grams, discarded_sentences, "Discarded Sentences")

    # Initialize empty list to hold the trees
    trees = []

    # Initialize the indices for masked and sampled sentences
    masked_index = 0
    sampled_index = 0

    for i, sentence in enumerate(selected_sentences):
        # Generate the sublists of masked and sampled sentences based on current indices
        next_masked_sentences = masked_sentences[masked_index:masked_index + 3]
        next_sampled_sentences = sampled_sentences[sampled_index:sampled_index + 12]
        
        # Create the tree for the current sentence
        tree = generate_subplot(sentence, next_masked_sentences, next_sampled_sentences, highlight_info)
        trees.append(tree)
        
        # Update the indices for the next iteration
        masked_index += 3
        sampled_index += 12


    # Return all the outputs together
    return [highlighted_user_prompt, highlighted_accepted_sentences, highlighted_discarded_sentences] + trees


with gr.Blocks(theme=gr.themes.Monochrome()) as demo:
    gr.Markdown("# **AIISC Watermarking Model**")

    with gr.Row():
        user_input = gr.Textbox(label="User Prompt")

    with gr.Row():
        submit_button = gr.Button("Submit")
        clear_button = gr.Button("Clear")

    with gr.Row():
        highlighted_user_prompt = gr.HTML()

    with gr.Row():
        with gr.Tabs():
            with gr.TabItem("Paraphrased Sentences"):
                highlighted_accepted_sentences = gr.HTML()
            with gr.TabItem("Discarded Sentences"):
                highlighted_discarded_sentences = gr.HTML()

    with gr.Row():
        with gr.Tabs():
            tree_tabs = []
            for i in range(3):  # Adjust this range according to the number of trees
                with gr.TabItem(f"Tree {i+1}"):
                    tree = gr.Plot()
                    tree_tabs.append(tree)

    submit_button.click(model, inputs=user_input, outputs=[highlighted_user_prompt, highlighted_accepted_sentences, highlighted_discarded_sentences] + tree_tabs)
    clear_button.click(lambda: "", inputs=None, outputs=user_input)
    clear_button.click(lambda: "", inputs=None, outputs=[highlighted_user_prompt, highlighted_accepted_sentences, highlighted_discarded_sentences] + tree_tabs)

# Launch the demo
demo.launch(share=True)