Spaces:

WordLift
/

synthID

Sleeping

File size: 5,241 Bytes

import gradio as gr
import torch
import os
from transformers import (
    AutoModelForCausalLM,
    AutoTokenizer,
    SynthIDTextWatermarkingConfig,
)
from huggingface_hub import login

def initialize_model(hf_token):
    """Initialize the model and tokenizer with authentication."""
    try:
        # Login to Hugging Face
        login(token=hf_token)
        
        # Initialize model and tokenizer with auth token
        MODEL_NAME = "google/gemma-2b"
        tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, token=hf_token)
        model = AutoModelForCausalLM.from_pretrained(
            MODEL_NAME, 
            token=hf_token,
            device_map="auto"  # This will automatically handle GPU if available
        )
        
        # Configure watermarking
        WATERMARK_KEYS = [654, 400, 836, 123, 340, 443, 597, 160, 57, 789]
        watermarking_config = SynthIDTextWatermarkingConfig(
            keys=WATERMARK_KEYS,
            ngram_len=5,
            gamma=0.5,
        )
        
        return model, tokenizer, watermarking_config, "Model initialized successfully!"
    except Exception as e:
        return None, None, None, f"Error initializing model: {str(e)}"

class SynthIDApp:
    def __init__(self):
        self.model = None
        self.tokenizer = None
        self.watermarking_config = None
    
    def login(self, hf_token):
        """Login and initialize the model."""
        self.model, self.tokenizer, self.watermarking_config, message = initialize_model(hf_token)
        return message

    def apply_watermark(self, text):
        """Apply SynthID watermark to input text."""
        if not all([self.model, self.tokenizer, self.watermarking_config]):
            return text, "Error: Model not initialized. Please login first."
            
        try:
            # Tokenize input
            inputs = self.tokenizer(text, return_tensors="pt", truncation=True, max_length=512)
            inputs = {k: v.to(self.model.device) for k, v in inputs.items()}
            
            # Generate with watermark
            with torch.no_grad():
                outputs = self.model.generate(
                    **inputs,
                    watermarking_config=self.watermarking_config,
                    do_sample=True,
                    max_length=len(inputs["input_ids"][0]) + 100,
                    pad_token_id=self.tokenizer.eos_token_id,
                    temperature=0.7,
                    top_p=0.9
                )
            
            # Decode output
            watermarked_text = self.tokenizer.decode(outputs[0], skip_special_tokens=True)
            return watermarked_text, "Watermark applied successfully!"
        except Exception as e:
            return text, f"Error applying watermark: {str(e)}"

    def analyze_text(self, text):
        """Analyze text characteristics."""
        try:
            total_words = len(text.split())
            avg_word_length = sum(len(word) for word in text.split()) / total_words if total_words > 0 else 0
            
            analysis = f"""Text Analysis:
- Total words: {total_words}
- Average word length: {avg_word_length:.2f}

Note: This is a basic analysis. The official SynthID detector is not yet available in the public transformers package."""
            
            return analysis
        except Exception as e:
            return f"Error analyzing text: {str(e)}"

# Create Gradio interface
app_instance = SynthIDApp()

with gr.Blocks(title="SynthID Text Watermarking Tool") as app:
    gr.Markdown("# SynthID Text Watermarking Tool")
    
    # Login section
    with gr.Row():
        hf_token = gr.Textbox(label="Enter Hugging Face Token", type="password")
        login_status = gr.Textbox(label="Login Status")
    login_btn = gr.Button("Login")
    login_btn.click(app_instance.login, inputs=[hf_token], outputs=[login_status])
    
    with gr.Tab("Apply Watermark"):
        with gr.Row():
            input_text = gr.Textbox(label="Input Text", lines=5)
            output_text = gr.Textbox(label="Watermarked Text", lines=5)
            status = gr.Textbox(label="Status")
        apply_btn = gr.Button("Apply Watermark")
        apply_btn.click(app_instance.apply_watermark, inputs=[input_text], outputs=[output_text, status])
    
    with gr.Tab("Analyze Text"):
        with gr.Row():
            analyze_input = gr.Textbox(label="Text to Analyze", lines=5)
            analyze_result = gr.Textbox(label="Analysis Result", lines=5)
        analyze_btn = gr.Button("Analyze Text")
        analyze_btn.click(app_instance.analyze_text, inputs=[analyze_input], outputs=[analyze_result])
    
    gr.Markdown("""
    ### Instructions:
    1. Enter your Hugging Face token and click Login
    2. Wait for the model to initialize
    3. Use the tabs to apply watermarks or analyze text
    
    ### Notes:
    - The watermark is designed to be imperceptible to humans
    - This demo only implements watermark application
    - The official detector will be available in future releases
    - For production use, use your own secure watermark keys
    - Your token is never stored and is only used for model access
    """)

# Launch the app
if __name__ == "__main__":
    app.launch()