import nltk nltk.download('stopwords') from transformers import AutoTokenizer from transformers import AutoModelForSeq2SeqLM import plotly.graph_objs as go import textwrap from transformers import pipeline import re import requests from PIL import Image import itertools import numpy as np import matplotlib.pyplot as plt import matplotlib from matplotlib.colors import ListedColormap, rgb2hex import ipywidgets as widgets from IPython.display import display, HTML import pandas as pd from pprint import pprint from tenacity import retry from tqdm import tqdm from transformers import GPT2LMHeadModel from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, AutoModelForMaskedLM import random from nltk.corpus import stopwords from termcolor import colored from nltk.translate.bleu_score import sentence_bleu from transformers import BertTokenizer, BertModel import gradio as gr from tree import generate_subplot from paraphraser import generate_paraphrase from lcs import find_common_subsequences from highlighter import highlight_common_words, highlight_common_words_dict from entailment import analyze_entailment from masking_methods import mask_non_stopword, mask_non_stopword_pseudorandom, high_entropy_words from sampling_methods import sample_word # Function for the Gradio interface def model(prompt): user_prompt = prompt paraphrased_sentences = generate_paraphrase(user_prompt) analyzed_paraphrased_sentences, selected_sentences, discarded_sentences = analyze_entailment(user_prompt, paraphrased_sentences, 0.7) length_accepted_sentences = len(selected_sentences) common_grams = find_common_subsequences(user_prompt, selected_sentences) masked_sentences = [] masked_words = [] masked_logits = [] selected_sentences_list = list(selected_sentences.keys()) for sentence in selected_sentences_list: # Mask non-stopword masked_sent, logits, words = mask_non_stopword(sentence) masked_sentences.append(masked_sent) masked_words.append(words) masked_logits.append(logits) # Mask non-stopword pseudorandom masked_sent, logits, words = mask_non_stopword_pseudorandom(sentence) masked_sentences.append(masked_sent) masked_words.append(words) masked_logits.append(logits) # High entropy words masked_sent, logits, words = high_entropy_words(sentence, common_grams) masked_sentences.append(masked_sent) masked_words.append(words) masked_logits.append(logits) sampled_sentences = [] for masked_sent, words, logits in zip(masked_sentences, masked_words, masked_logits): sampled_sentences.append(sample_word(masked_sent, words, logits, sampling_technique='inverse_transform', temperature=1.0)) sampled_sentences.append(sample_word(masked_sent, words, logits, sampling_technique='exponential_minimum', temperature=1.0)) sampled_sentences.append(sample_word(masked_sent, words, logits, sampling_technique='temperature', temperature=1.0)) sampled_sentences.append(sample_word(masked_sent, words, logits, sampling_technique='greedy', temperature=1.0)) # Predefined set of colors that are visible on a white background, excluding black colors = ["red", "blue", "brown", "green"] # Function to generate color from predefined set def select_color(): return random.choice(colors) # Create highlight_info with selected colors highlight_info = [(word, select_color()) for _, word in common_grams] highlighted_user_prompt = highlight_common_words(common_grams, [user_prompt], "User Prompt (Highlighted and Numbered)") highlighted_accepted_sentences = highlight_common_words_dict(common_grams, selected_sentences, "Paraphrased Sentences") highlighted_discarded_sentences = highlight_common_words_dict(common_grams, discarded_sentences, "Discarded Sentences") # Initialize empty list to hold the trees trees = [] # Initialize the indices for masked and sampled sentences masked_index = 0 sampled_index = 0 for i, sentence in enumerate(selected_sentences): # Generate the sublists of masked and sampled sentences based on current indices next_masked_sentences = masked_sentences[masked_index:masked_index + 3] next_sampled_sentences = sampled_sentences[sampled_index:sampled_index + 12] # Create the tree for the current sentence tree = generate_subplot(sentence, next_masked_sentences, next_sampled_sentences, highlight_info) trees.append(tree) # Update the indices for the next iteration masked_index += 3 sampled_index += 12 # Return all the outputs together return [highlighted_user_prompt, highlighted_accepted_sentences, highlighted_discarded_sentences] + trees with gr.Blocks(theme=gr.themes.Monochrome()) as demo: gr.Markdown("# **AIISC Watermarking Model**") with gr.Row(): user_input = gr.Textbox(label="User Prompt") with gr.Row(): submit_button = gr.Button("Submit") clear_button = gr.Button("Clear") with gr.Row(): highlighted_user_prompt = gr.HTML() with gr.Row(): with gr.Tabs(): with gr.TabItem("Paraphrased Sentences"): highlighted_accepted_sentences = gr.HTML() with gr.TabItem("Discarded Sentences"): highlighted_discarded_sentences = gr.HTML() with gr.Row(): with gr.Tabs(): tree_tabs = [] for i in range(3): # Adjust this range according to the number of trees with gr.TabItem(f"Tree {i+1}"): tree = gr.Plot() tree_tabs.append(tree) submit_button.click(model, inputs=user_input, outputs=[highlighted_user_prompt, highlighted_accepted_sentences, highlighted_discarded_sentences] + tree_tabs) clear_button.click(lambda: "", inputs=None, outputs=user_input) clear_button.click(lambda: "", inputs=None, outputs=[highlighted_user_prompt, highlighted_accepted_sentences, highlighted_discarded_sentences] + tree_tabs) # Launch the demo demo.launch(share=True)