File size: 3,806 Bytes
5ea9e86
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
eb0691b
 
5ea9e86
 
 
 
 
 
 
 
 
 
 
 
 
 
 
eb0691b
 
 
5ea9e86
 
 
 
 
 
 
 
eb0691b
 
5ea9e86
eb0691b
 
 
5ea9e86
eb0691b
 
 
 
 
 
 
5ea9e86
eb0691b
5ea9e86
eb0691b
5ea9e86
 
 
 
eb0691b
 
5ea9e86
 
 
 
 
 
 
 
 
eb0691b
5ea9e86
eb0691b
 
 
 
5ea9e86
 
 
eb0691b
 
 
 
5ea9e86
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
import gradio as gr
import torch
from transformers import (
    AutoModelForCausalLM,
    AutoTokenizer,
    SynthIDTextWatermarkingConfig,
)

# Initialize model and tokenizer
MODEL_NAME = "google/gemma-2b"  # You can change this to your preferred model
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
model = AutoModelForCausalLM.from_pretrained(MODEL_NAME)

# Configure watermarking
WATERMARK_KEYS = [654, 400, 836, 123, 340, 443, 597, 160, 57, 789]  # Example keys
watermarking_config = SynthIDTextWatermarkingConfig(
    keys=WATERMARK_KEYS,
    ngram_len=5,
    gamma=0.5,  # Additional parameter to control watermark strength
)

def apply_watermark(text):
    """Apply SynthID watermark to input text."""
    try:
        # Tokenize input
        inputs = tokenizer(text, return_tensors="pt", truncation=True, max_length=512)
        
        # Generate with watermark
        with torch.no_grad():
            outputs = model.generate(
                **inputs,
                watermarking_config=watermarking_config,
                do_sample=True,
                max_length=len(inputs["input_ids"][0]) + 100,  # Add some extra tokens
                pad_token_id=tokenizer.eos_token_id,
                temperature=0.7,  # Add some randomness to generation
                top_p=0.9
            )
        
        # Decode output
        watermarked_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
        return watermarked_text, "Watermark applied successfully!"
    except Exception as e:
        return text, f"Error applying watermark: {str(e)}"

def analyze_text(text):
    """Analyze text characteristics that might indicate watermarking."""
    try:
        # Basic text analysis (since we don't have access to the detector yet)
        total_words = len(text.split())
        avg_word_length = sum(len(word) for word in text.split()) / total_words if total_words > 0 else 0
        
        # Create analysis report
        analysis = f"""Text Analysis:
- Total words: {total_words}
- Average word length: {avg_word_length:.2f}

Note: This is a basic analysis. The official SynthID detector is not yet available in the public transformers package.
For proper watermark detection, please refer to the official Google DeepMind implementation when it becomes available."""
        
        return analysis
    except Exception as e:
        return f"Error analyzing text: {str(e)}"

# Create Gradio interface
with gr.Blocks(title="SynthID Text Watermarking Tool") as app:
    gr.Markdown("# SynthID Text Watermarking Tool")
    gr.Markdown("""This demo shows how to apply SynthID watermarks to text. 
                Note: The official detector is not yet publicly available.""")
    
    with gr.Tab("Apply Watermark"):
        with gr.Row():
            input_text = gr.Textbox(label="Input Text", lines=5)
            output_text = gr.Textbox(label="Watermarked Text", lines=5)
            status = gr.Textbox(label="Status")
        apply_btn = gr.Button("Apply Watermark")
        apply_btn.click(apply_watermark, inputs=[input_text], outputs=[output_text, status])
    
    with gr.Tab("Analyze Text"):
        with gr.Row():
            analyze_input = gr.Textbox(label="Text to Analyze", lines=5)
            analyze_result = gr.Textbox(label="Analysis Result", lines=5)
        analyze_btn = gr.Button("Analyze Text")
        analyze_btn.click(analyze_text, inputs=[analyze_input], outputs=[analyze_result])
    
    gr.Markdown("""
    ### Notes:
    - The watermark is designed to be imperceptible to humans
    - This demo only implements watermark application
    - The official detector will be available in future releases
    - For production use, use your own secure watermark keys
    """)

# Launch the app
if __name__ == "__main__":
    app.launch()