jgyasu's picture
Upload folder using huggingface_hub
ee305a4 verified
raw
history blame
6.19 kB
import nltk
nltk.download('stopwords')
from transformers import AutoTokenizer
from transformers import AutoModelForSeq2SeqLM
import plotly.graph_objs as go
import textwrap
from transformers import pipeline
import re
import requests
from PIL import Image
import itertools
import numpy as np
import matplotlib.pyplot as plt
import matplotlib
from matplotlib.colors import ListedColormap, rgb2hex
import ipywidgets as widgets
from IPython.display import display, HTML
import pandas as pd
from pprint import pprint
from tenacity import retry
from tqdm import tqdm
from transformers import GPT2LMHeadModel
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, AutoModelForMaskedLM
import random
from nltk.corpus import stopwords
from termcolor import colored
from nltk.translate.bleu_score import sentence_bleu
from transformers import BertTokenizer, BertModel
import gradio as gr
from tree import generate_subplot
from paraphraser import generate_paraphrase
from lcs import find_common_subsequences
from highlighter import highlight_common_words, highlight_common_words_dict
from entailment import analyze_entailment
from masking_methods import mask_non_stopword, mask_non_stopword_pseudorandom, high_entropy_words
from sampling_methods import sample_word
# Function for the Gradio interface
def model(prompt):
user_prompt = prompt
paraphrased_sentences = generate_paraphrase(user_prompt)
analyzed_paraphrased_sentences, selected_sentences, discarded_sentences = analyze_entailment(user_prompt, paraphrased_sentences, 0.7)
length_accepted_sentences = len(selected_sentences)
common_grams = find_common_subsequences(user_prompt, selected_sentences)
masked_sentences = []
masked_words = []
masked_logits = []
selected_sentences_list = list(selected_sentences.keys())
for sentence in selected_sentences_list:
# Mask non-stopword
masked_sent, logits, words = mask_non_stopword(sentence)
masked_sentences.append(masked_sent)
masked_words.append(words)
masked_logits.append(logits)
# Mask non-stopword pseudorandom
masked_sent, logits, words = mask_non_stopword_pseudorandom(sentence)
masked_sentences.append(masked_sent)
masked_words.append(words)
masked_logits.append(logits)
# High entropy words
masked_sent, logits, words = high_entropy_words(sentence, common_grams)
masked_sentences.append(masked_sent)
masked_words.append(words)
masked_logits.append(logits)
sampled_sentences = []
for masked_sent, words, logits in zip(masked_sentences, masked_words, masked_logits):
sampled_sentences.append(sample_word(masked_sent, words, logits, sampling_technique='inverse_transform', temperature=1.0))
sampled_sentences.append(sample_word(masked_sent, words, logits, sampling_technique='exponential_minimum', temperature=1.0))
sampled_sentences.append(sample_word(masked_sent, words, logits, sampling_technique='temperature', temperature=1.0))
sampled_sentences.append(sample_word(masked_sent, words, logits, sampling_technique='greedy', temperature=1.0))
# Predefined set of colors that are visible on a white background, excluding black
colors = ["red", "blue", "brown", "green"]
# Function to generate color from predefined set
def select_color():
return random.choice(colors)
# Create highlight_info with selected colors
highlight_info = [(word, select_color()) for _, word in common_grams]
highlighted_user_prompt = highlight_common_words(common_grams, [user_prompt], "User Prompt (Highlighted and Numbered)")
highlighted_accepted_sentences = highlight_common_words_dict(common_grams, selected_sentences, "Paraphrased Sentences")
highlighted_discarded_sentences = highlight_common_words_dict(common_grams, discarded_sentences, "Discarded Sentences")
# Initialize empty list to hold the trees
trees = []
# Initialize the indices for masked and sampled sentences
masked_index = 0
sampled_index = 0
for i, sentence in enumerate(selected_sentences):
# Generate the sublists of masked and sampled sentences based on current indices
next_masked_sentences = masked_sentences[masked_index:masked_index + 3]
next_sampled_sentences = sampled_sentences[sampled_index:sampled_index + 12]
# Create the tree for the current sentence
tree = generate_subplot(sentence, next_masked_sentences, next_sampled_sentences, highlight_info)
trees.append(tree)
# Update the indices for the next iteration
masked_index += 3
sampled_index += 12
# Return all the outputs together
return [highlighted_user_prompt, highlighted_accepted_sentences, highlighted_discarded_sentences] + trees
with gr.Blocks(theme=gr.themes.Monochrome()) as demo:
gr.Markdown("# **AIISC Watermarking Model**")
with gr.Row():
user_input = gr.Textbox(label="User Prompt")
with gr.Row():
submit_button = gr.Button("Submit")
clear_button = gr.Button("Clear")
with gr.Row():
highlighted_user_prompt = gr.HTML()
with gr.Row():
with gr.Tabs():
with gr.TabItem("Paraphrased Sentences"):
highlighted_accepted_sentences = gr.HTML()
with gr.TabItem("Discarded Sentences"):
highlighted_discarded_sentences = gr.HTML()
with gr.Row():
with gr.Tabs():
tree_tabs = []
for i in range(3): # Adjust this range according to the number of trees
with gr.TabItem(f"Tree {i+1}"):
tree = gr.Plot()
tree_tabs.append(tree)
submit_button.click(model, inputs=user_input, outputs=[highlighted_user_prompt, highlighted_accepted_sentences, highlighted_discarded_sentences] + tree_tabs)
clear_button.click(lambda: "", inputs=None, outputs=user_input)
clear_button.click(lambda: "", inputs=None, outputs=[highlighted_user_prompt, highlighted_accepted_sentences, highlighted_discarded_sentences] + tree_tabs)
# Launch the demo
demo.launch(share=True)