Spaces:

WordLift
/

synthID

Sleeping

App Files Files Community

cyberandy commited on Oct 25, 2024

Commit

a9e6964

verified ·

1 Parent(s): a30b4fd

Update app.py

Browse files

Files changed (1) hide show

app.py +92 -51

app.py CHANGED Viewed

@@ -1,72 +1,107 @@
 import gradio as gr
 import torch
 from transformers import (
     AutoModelForCausalLM,
     AutoTokenizer,
     SynthIDTextWatermarkingConfig,
 )
-# Initialize model and tokenizer
-MODEL_NAME = "google/gemma-2-2b"  # You can change this to your preferred model
-tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
-model = AutoModelForCausalLM.from_pretrained(MODEL_NAME)
-# Configure watermarking
-WATERMARK_KEYS = [654, 400, 836, 123, 340, 443, 597, 160, 57, 789]  # Example keys
-watermarking_config = SynthIDTextWatermarkingConfig(
-    keys=WATERMARK_KEYS,
-    ngram_len=5,
-    gamma=0.5,  # Additional parameter to control watermark strength
-)
-def apply_watermark(text):
-    """Apply SynthID watermark to input text."""
     try:
-        # Tokenize input
-        inputs = tokenizer(text, return_tensors="pt", truncation=True, max_length=512)
-        # Generate with watermark
-        with torch.no_grad():
-            outputs = model.generate(
-                **inputs,
-                watermarking_config=watermarking_config,
-                do_sample=True,
-                max_length=len(inputs["input_ids"][0]) + 100,  # Add some extra tokens
-                pad_token_id=tokenizer.eos_token_id,
-                temperature=0.7,  # Add some randomness to generation
-                top_p=0.9
-            )
-        # Decode output
-        watermarked_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
-        return watermarked_text, "Watermark applied successfully!"
     except Exception as e:
-        return text, f"Error applying watermark: {str(e)}"
-def analyze_text(text):
-    """Analyze text characteristics that might indicate watermarking."""
-    try:
-        # Basic text analysis (since we don't have access to the detector yet)
-        total_words = len(text.split())
-        avg_word_length = sum(len(word) for word in text.split()) / total_words if total_words > 0 else 0
-        # Create analysis report
-        analysis = f"""Text Analysis:
 - Total words: {total_words}
 - Average word length: {avg_word_length:.2f}
-Note: This is a basic analysis. The official SynthID detector is not yet available in the public transformers package.
-For proper watermark detection, please refer to the official Google DeepMind implementation when it becomes available."""
-        return analysis
-    except Exception as e:
-        return f"Error analyzing text: {str(e)}"
 # Create Gradio interface
 with gr.Blocks(title="SynthID Text Watermarking Tool") as app:
     gr.Markdown("# SynthID Text Watermarking Tool")
-    gr.Markdown("""This demo shows how to apply SynthID watermarks to text.
-                Note: The official detector is not yet publicly available.""")
     with gr.Tab("Apply Watermark"):
         with gr.Row():
@@ -74,21 +109,27 @@ with gr.Blocks(title="SynthID Text Watermarking Tool") as app:
             output_text = gr.Textbox(label="Watermarked Text", lines=5)
             status = gr.Textbox(label="Status")
         apply_btn = gr.Button("Apply Watermark")
-        apply_btn.click(apply_watermark, inputs=[input_text], outputs=[output_text, status])
     with gr.Tab("Analyze Text"):
         with gr.Row():
             analyze_input = gr.Textbox(label="Text to Analyze", lines=5)
             analyze_result = gr.Textbox(label="Analysis Result", lines=5)
         analyze_btn = gr.Button("Analyze Text")
-        analyze_btn.click(analyze_text, inputs=[analyze_input], outputs=[analyze_result])
     gr.Markdown("""
     ### Notes:
     - The watermark is designed to be imperceptible to humans
     - This demo only implements watermark application
     - The official detector will be available in future releases
     - For production use, use your own secure watermark keys
     """)
 # Launch the app

 import gradio as gr
 import torch
+import os
 from transformers import (
     AutoModelForCausalLM,
     AutoTokenizer,
     SynthIDTextWatermarkingConfig,
 )
+from huggingface_hub import login
+def initialize_model(hf_token):
+    """Initialize the model and tokenizer with authentication."""
     try:
+        # Login to Hugging Face
+        login(token=hf_token)
+        # Initialize model and tokenizer with auth token
+        MODEL_NAME = "google/gemma-2b"
+        tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, token=hf_token)
+        model = AutoModelForCausalLM.from_pretrained(
+            MODEL_NAME,
+            token=hf_token,
+            device_map="auto"  # This will automatically handle GPU if available
+        )
+        # Configure watermarking
+        WATERMARK_KEYS = [654, 400, 836, 123, 340, 443, 597, 160, 57, 789]
+        watermarking_config = SynthIDTextWatermarkingConfig(
+            keys=WATERMARK_KEYS,
+            ngram_len=5,
+            gamma=0.5,
+        )
+        return model, tokenizer, watermarking_config, "Model initialized successfully!"
     except Exception as e:
+        return None, None, None, f"Error initializing model: {str(e)}"
+class SynthIDApp:
+    def __init__(self):
+        self.model = None
+        self.tokenizer = None
+        self.watermarking_config = None
+    def login(self, hf_token):
+        """Login and initialize the model."""
+        self.model, self.tokenizer, self.watermarking_config, message = initialize_model(hf_token)
+        return message
+    def apply_watermark(self, text):
+        """Apply SynthID watermark to input text."""
+        if not all([self.model, self.tokenizer, self.watermarking_config]):
+            return text, "Error: Model not initialized. Please login first."
+        try:
+            # Tokenize input
+            inputs = self.tokenizer(text, return_tensors="pt", truncation=True, max_length=512)
+            inputs = {k: v.to(self.model.device) for k, v in inputs.items()}
+            # Generate with watermark
+            with torch.no_grad():
+                outputs = self.model.generate(
+                    **inputs,
+                    watermarking_config=self.watermarking_config,
+                    do_sample=True,
+                    max_length=len(inputs["input_ids"][0]) + 100,
+                    pad_token_id=self.tokenizer.eos_token_id,
+                    temperature=0.7,
+                    top_p=0.9
+                )
+            # Decode output
+            watermarked_text = self.tokenizer.decode(outputs[0], skip_special_tokens=True)
+            return watermarked_text, "Watermark applied successfully!"
+        except Exception as e:
+            return text, f"Error applying watermark: {str(e)}"
+    def analyze_text(self, text):
+        """Analyze text characteristics."""
+        try:
+            total_words = len(text.split())
+            avg_word_length = sum(len(word) for word in text.split()) / total_words if total_words > 0 else 0
+            analysis = f"""Text Analysis:
 - Total words: {total_words}
 - Average word length: {avg_word_length:.2f}
+Note: This is a basic analysis. The official SynthID detector is not yet available in the public transformers package."""
+            return analysis
+        except Exception as e:
+            return f"Error analyzing text: {str(e)}"
 # Create Gradio interface
+app_instance = SynthIDApp()
 with gr.Blocks(title="SynthID Text Watermarking Tool") as app:
     gr.Markdown("# SynthID Text Watermarking Tool")
+    # Login section
+    with gr.Row():
+        hf_token = gr.Textbox(label="Enter Hugging Face Token", type="password")
+        login_status = gr.Textbox(label="Login Status")
+    login_btn = gr.Button("Login")
+    login_btn.click(app_instance.login, inputs=[hf_token], outputs=[login_status])
     with gr.Tab("Apply Watermark"):
         with gr.Row():
             output_text = gr.Textbox(label="Watermarked Text", lines=5)
             status = gr.Textbox(label="Status")
         apply_btn = gr.Button("Apply Watermark")
+        apply_btn.click(app_instance.apply_watermark, inputs=[input_text], outputs=[output_text, status])
     with gr.Tab("Analyze Text"):
         with gr.Row():
             analyze_input = gr.Textbox(label="Text to Analyze", lines=5)
             analyze_result = gr.Textbox(label="Analysis Result", lines=5)
         analyze_btn = gr.Button("Analyze Text")
+        analyze_btn.click(app_instance.analyze_text, inputs=[analyze_input], outputs=[analyze_result])
     gr.Markdown("""
+    ### Instructions:
+    1. Enter your Hugging Face token and click Login
+    2. Wait for the model to initialize
+    3. Use the tabs to apply watermarks or analyze text
     ### Notes:
     - The watermark is designed to be imperceptible to humans
     - This demo only implements watermark application
     - The official detector will be available in future releases
     - For production use, use your own secure watermark keys
+    - Your token is never stored and is only used for model access
     """)
 # Launch the app