Spaces:

WordLift
/

synthID

Sleeping

App Files Files Community

synthID / app.py

cyberandy

Update app.py

a9e6964 verified 9 months ago

raw

history blame

5.24 kB

	import gradio as gr
	import torch
	import os
	from transformers import (
	AutoModelForCausalLM,
	AutoTokenizer,
	SynthIDTextWatermarkingConfig,
	)
	from huggingface_hub import login

	def initialize_model(hf_token):
	"""Initialize the model and tokenizer with authentication."""
	try:
	# Login to Hugging Face
	login(token=hf_token)

	# Initialize model and tokenizer with auth token
	MODEL_NAME = "google/gemma-2b"
	tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, token=hf_token)
	model = AutoModelForCausalLM.from_pretrained(
	MODEL_NAME,
	token=hf_token,
	device_map="auto" # This will automatically handle GPU if available
	)

	# Configure watermarking
	WATERMARK_KEYS = [654, 400, 836, 123, 340, 443, 597, 160, 57, 789]
	watermarking_config = SynthIDTextWatermarkingConfig(
	keys=WATERMARK_KEYS,
	ngram_len=5,
	gamma=0.5,
	)

	return model, tokenizer, watermarking_config, "Model initialized successfully!"
	except Exception as e:
	return None, None, None, f"Error initializing model: {str(e)}"

	class SynthIDApp:
	def __init__(self):
	self.model = None
	self.tokenizer = None
	self.watermarking_config = None

	def login(self, hf_token):
	"""Login and initialize the model."""
	self.model, self.tokenizer, self.watermarking_config, message = initialize_model(hf_token)
	return message

	def apply_watermark(self, text):
	"""Apply SynthID watermark to input text."""
	if not all([self.model, self.tokenizer, self.watermarking_config]):
	return text, "Error: Model not initialized. Please login first."

	try:
	# Tokenize input
	inputs = self.tokenizer(text, return_tensors="pt", truncation=True, max_length=512)
	inputs = {k: v.to(self.model.device) for k, v in inputs.items()}

	# Generate with watermark
	with torch.no_grad():
	outputs = self.model.generate(
	**inputs,
	watermarking_config=self.watermarking_config,
	do_sample=True,
	max_length=len(inputs["input_ids"][0]) + 100,
	pad_token_id=self.tokenizer.eos_token_id,
	temperature=0.7,
	top_p=0.9
	)

	# Decode output
	watermarked_text = self.tokenizer.decode(outputs[0], skip_special_tokens=True)
	return watermarked_text, "Watermark applied successfully!"
	except Exception as e:
	return text, f"Error applying watermark: {str(e)}"

	def analyze_text(self, text):
	"""Analyze text characteristics."""
	try:
	total_words = len(text.split())
	avg_word_length = sum(len(word) for word in text.split()) / total_words if total_words > 0 else 0

	analysis = f"""Text Analysis:
	- Total words: {total_words}
	- Average word length: {avg_word_length:.2f}

	Note: This is a basic analysis. The official SynthID detector is not yet available in the public transformers package."""

	return analysis
	except Exception as e:
	return f"Error analyzing text: {str(e)}"

	# Create Gradio interface
	app_instance = SynthIDApp()

	with gr.Blocks(title="SynthID Text Watermarking Tool") as app:
	gr.Markdown("# SynthID Text Watermarking Tool")

	# Login section
	with gr.Row():
	hf_token = gr.Textbox(label="Enter Hugging Face Token", type="password")
	login_status = gr.Textbox(label="Login Status")
	login_btn = gr.Button("Login")
	login_btn.click(app_instance.login, inputs=[hf_token], outputs=[login_status])

	with gr.Tab("Apply Watermark"):
	with gr.Row():
	input_text = gr.Textbox(label="Input Text", lines=5)
	output_text = gr.Textbox(label="Watermarked Text", lines=5)
	status = gr.Textbox(label="Status")
	apply_btn = gr.Button("Apply Watermark")
	apply_btn.click(app_instance.apply_watermark, inputs=[input_text], outputs=[output_text, status])

	with gr.Tab("Analyze Text"):
	with gr.Row():
	analyze_input = gr.Textbox(label="Text to Analyze", lines=5)
	analyze_result = gr.Textbox(label="Analysis Result", lines=5)
	analyze_btn = gr.Button("Analyze Text")
	analyze_btn.click(app_instance.analyze_text, inputs=[analyze_input], outputs=[analyze_result])

	gr.Markdown("""
	### Instructions:
	1. Enter your Hugging Face token and click Login
	2. Wait for the model to initialize
	3. Use the tabs to apply watermarks or analyze text

	### Notes:
	- The watermark is designed to be imperceptible to humans
	- This demo only implements watermark application
	- The official detector will be available in future releases
	- For production use, use your own secure watermark keys
	- Your token is never stored and is only used for model access
	""")

	# Launch the app
	if __name__ == "__main__":
	app.launch()