aiisc-watermarking-modelv3

Sleeping

App Files Files Community

aiisc-watermarking-modelv3 / app.py

jgyasu

Upload folder using huggingface_hub

ee305a4 verified 11 months ago

raw

history blame

6.19 kB

	import nltk
	nltk.download('stopwords')
	from transformers import AutoTokenizer
	from transformers import AutoModelForSeq2SeqLM
	import plotly.graph_objs as go
	import textwrap
	from transformers import pipeline
	import re
	import requests
	from PIL import Image
	import itertools
	import numpy as np
	import matplotlib.pyplot as plt
	import matplotlib
	from matplotlib.colors import ListedColormap, rgb2hex
	import ipywidgets as widgets
	from IPython.display import display, HTML
	import pandas as pd
	from pprint import pprint
	from tenacity import retry
	from tqdm import tqdm
	from transformers import GPT2LMHeadModel
	from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, AutoModelForMaskedLM
	import random
	from nltk.corpus import stopwords
	from termcolor import colored
	from nltk.translate.bleu_score import sentence_bleu
	from transformers import BertTokenizer, BertModel
	import gradio as gr
	from tree import generate_subplot
	from paraphraser import generate_paraphrase
	from lcs import find_common_subsequences
	from highlighter import highlight_common_words, highlight_common_words_dict
	from entailment import analyze_entailment
	from masking_methods import mask_non_stopword, mask_non_stopword_pseudorandom, high_entropy_words
	from sampling_methods import sample_word


	# Function for the Gradio interface
	def model(prompt):
	user_prompt = prompt
	paraphrased_sentences = generate_paraphrase(user_prompt)
	analyzed_paraphrased_sentences, selected_sentences, discarded_sentences = analyze_entailment(user_prompt, paraphrased_sentences, 0.7)
	length_accepted_sentences = len(selected_sentences)
	common_grams = find_common_subsequences(user_prompt, selected_sentences)

	masked_sentences = []
	masked_words = []
	masked_logits = []
	selected_sentences_list = list(selected_sentences.keys())

	for sentence in selected_sentences_list:
	# Mask non-stopword
	masked_sent, logits, words = mask_non_stopword(sentence)
	masked_sentences.append(masked_sent)
	masked_words.append(words)
	masked_logits.append(logits)

	# Mask non-stopword pseudorandom
	masked_sent, logits, words = mask_non_stopword_pseudorandom(sentence)
	masked_sentences.append(masked_sent)
	masked_words.append(words)
	masked_logits.append(logits)

	# High entropy words
	masked_sent, logits, words = high_entropy_words(sentence, common_grams)
	masked_sentences.append(masked_sent)
	masked_words.append(words)
	masked_logits.append(logits)

	sampled_sentences = []
	for masked_sent, words, logits in zip(masked_sentences, masked_words, masked_logits):
	sampled_sentences.append(sample_word(masked_sent, words, logits, sampling_technique='inverse_transform', temperature=1.0))
	sampled_sentences.append(sample_word(masked_sent, words, logits, sampling_technique='exponential_minimum', temperature=1.0))
	sampled_sentences.append(sample_word(masked_sent, words, logits, sampling_technique='temperature', temperature=1.0))
	sampled_sentences.append(sample_word(masked_sent, words, logits, sampling_technique='greedy', temperature=1.0))

	# Predefined set of colors that are visible on a white background, excluding black
	colors = ["red", "blue", "brown", "green"]

	# Function to generate color from predefined set
	def select_color():
	return random.choice(colors)

	# Create highlight_info with selected colors
	highlight_info = [(word, select_color()) for _, word in common_grams]


	highlighted_user_prompt = highlight_common_words(common_grams, [user_prompt], "User Prompt (Highlighted and Numbered)")
	highlighted_accepted_sentences = highlight_common_words_dict(common_grams, selected_sentences, "Paraphrased Sentences")
	highlighted_discarded_sentences = highlight_common_words_dict(common_grams, discarded_sentences, "Discarded Sentences")

	# Initialize empty list to hold the trees
	trees = []

	# Initialize the indices for masked and sampled sentences
	masked_index = 0
	sampled_index = 0

	for i, sentence in enumerate(selected_sentences):
	# Generate the sublists of masked and sampled sentences based on current indices
	next_masked_sentences = masked_sentences[masked_index:masked_index + 3]
	next_sampled_sentences = sampled_sentences[sampled_index:sampled_index + 12]

	# Create the tree for the current sentence
	tree = generate_subplot(sentence, next_masked_sentences, next_sampled_sentences, highlight_info)
	trees.append(tree)

	# Update the indices for the next iteration
	masked_index += 3
	sampled_index += 12


	# Return all the outputs together
	return [highlighted_user_prompt, highlighted_accepted_sentences, highlighted_discarded_sentences] + trees


	with gr.Blocks(theme=gr.themes.Monochrome()) as demo:
	gr.Markdown("# AIISC Watermarking Model")

	with gr.Row():
	user_input = gr.Textbox(label="User Prompt")

	with gr.Row():
	submit_button = gr.Button("Submit")
	clear_button = gr.Button("Clear")

	with gr.Row():
	highlighted_user_prompt = gr.HTML()

	with gr.Row():
	with gr.Tabs():
	with gr.TabItem("Paraphrased Sentences"):
	highlighted_accepted_sentences = gr.HTML()
	with gr.TabItem("Discarded Sentences"):
	highlighted_discarded_sentences = gr.HTML()

	with gr.Row():
	with gr.Tabs():
	tree_tabs = []
	for i in range(3): # Adjust this range according to the number of trees
	with gr.TabItem(f"Tree {i+1}"):
	tree = gr.Plot()
	tree_tabs.append(tree)

	submit_button.click(model, inputs=user_input, outputs=[highlighted_user_prompt, highlighted_accepted_sentences, highlighted_discarded_sentences] + tree_tabs)
	clear_button.click(lambda: "", inputs=None, outputs=user_input)
	clear_button.click(lambda: "", inputs=None, outputs=[highlighted_user_prompt, highlighted_accepted_sentences, highlighted_discarded_sentences] + tree_tabs)

	# Launch the demo
	demo.launch(share=True)