import gradio as gr import torch import os from transformers import ( AutoModelForCausalLM, AutoTokenizer, SynthIDTextWatermarkingConfig, ) from huggingface_hub import login def initialize_model(hf_token): """Initialize the model and tokenizer with authentication.""" try: # Login to Hugging Face login(token=hf_token) # Initialize model and tokenizer with auth token MODEL_NAME = "google/gemma-2b" tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, token=hf_token) model = AutoModelForCausalLM.from_pretrained( MODEL_NAME, token=hf_token, device_map="auto" # This will automatically handle GPU if available ) # Configure watermarking WATERMARK_KEYS = [654, 400, 836, 123, 340, 443, 597, 160, 57, 789] watermarking_config = SynthIDTextWatermarkingConfig( keys=WATERMARK_KEYS, ngram_len=5, gamma=0.5, ) return model, tokenizer, watermarking_config, "Model initialized successfully!" except Exception as e: return None, None, None, f"Error initializing model: {str(e)}" class SynthIDApp: def __init__(self): self.model = None self.tokenizer = None self.watermarking_config = None def login(self, hf_token): """Login and initialize the model.""" self.model, self.tokenizer, self.watermarking_config, message = initialize_model(hf_token) return message def apply_watermark(self, text): """Apply SynthID watermark to input text.""" if not all([self.model, self.tokenizer, self.watermarking_config]): return text, "Error: Model not initialized. Please login first." try: # Tokenize input inputs = self.tokenizer(text, return_tensors="pt", truncation=True, max_length=512) inputs = {k: v.to(self.model.device) for k, v in inputs.items()} # Generate with watermark with torch.no_grad(): outputs = self.model.generate( **inputs, watermarking_config=self.watermarking_config, do_sample=True, max_length=len(inputs["input_ids"][0]) + 100, pad_token_id=self.tokenizer.eos_token_id, temperature=0.7, top_p=0.9 ) # Decode output watermarked_text = self.tokenizer.decode(outputs[0], skip_special_tokens=True) return watermarked_text, "Watermark applied successfully!" except Exception as e: return text, f"Error applying watermark: {str(e)}" def analyze_text(self, text): """Analyze text characteristics.""" try: total_words = len(text.split()) avg_word_length = sum(len(word) for word in text.split()) / total_words if total_words > 0 else 0 analysis = f"""Text Analysis: - Total words: {total_words} - Average word length: {avg_word_length:.2f} Note: This is a basic analysis. The official SynthID detector is not yet available in the public transformers package.""" return analysis except Exception as e: return f"Error analyzing text: {str(e)}" # Create Gradio interface app_instance = SynthIDApp() with gr.Blocks(title="SynthID Text Watermarking Tool") as app: gr.Markdown("# SynthID Text Watermarking Tool") # Login section with gr.Row(): hf_token = gr.Textbox(label="Enter Hugging Face Token", type="password") login_status = gr.Textbox(label="Login Status") login_btn = gr.Button("Login") login_btn.click(app_instance.login, inputs=[hf_token], outputs=[login_status]) with gr.Tab("Apply Watermark"): with gr.Row(): input_text = gr.Textbox(label="Input Text", lines=5) output_text = gr.Textbox(label="Watermarked Text", lines=5) status = gr.Textbox(label="Status") apply_btn = gr.Button("Apply Watermark") apply_btn.click(app_instance.apply_watermark, inputs=[input_text], outputs=[output_text, status]) with gr.Tab("Analyze Text"): with gr.Row(): analyze_input = gr.Textbox(label="Text to Analyze", lines=5) analyze_result = gr.Textbox(label="Analysis Result", lines=5) analyze_btn = gr.Button("Analyze Text") analyze_btn.click(app_instance.analyze_text, inputs=[analyze_input], outputs=[analyze_result]) gr.Markdown(""" ### Instructions: 1. Enter your Hugging Face token and click Login 2. Wait for the model to initialize 3. Use the tabs to apply watermarks or analyze text ### Notes: - The watermark is designed to be imperceptible to humans - This demo only implements watermark application - The official detector will be available in future releases - For production use, use your own secure watermark keys - Your token is never stored and is only used for model access """) # Launch the app if __name__ == "__main__": app.launch()