File size: 5,241 Bytes
5ea9e86
 
a9e6964
5ea9e86
 
 
 
 
a9e6964
5ea9e86
a9e6964
 
5ea9e86
a9e6964
 
 
 
 
 
 
 
 
 
 
5ea9e86
a9e6964
 
 
 
 
 
 
5ea9e86
a9e6964
5ea9e86
a9e6964
5ea9e86
a9e6964
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
eb0691b
 
 
a9e6964
 
 
 
 
5ea9e86
 
a9e6964
 
5ea9e86
 
a9e6964
 
 
 
 
 
 
5ea9e86
 
 
180ea05
 
5ea9e86
180ea05
a9e6964
5ea9e86
eb0691b
5ea9e86
180ea05
eb0691b
 
a9e6964
5ea9e86
 
a9e6964
 
 
 
 
180ea05
 
 
eb0691b
180ea05
a9e6964
5ea9e86
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
import gradio as gr
import torch
import os
from transformers import (
    AutoModelForCausalLM,
    AutoTokenizer,
    SynthIDTextWatermarkingConfig,
)
from huggingface_hub import login

def initialize_model(hf_token):
    """Initialize the model and tokenizer with authentication."""
    try:
        # Login to Hugging Face
        login(token=hf_token)
        
        # Initialize model and tokenizer with auth token
        MODEL_NAME = "google/gemma-2b"
        tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, token=hf_token)
        model = AutoModelForCausalLM.from_pretrained(
            MODEL_NAME, 
            token=hf_token,
            device_map="auto"  # This will automatically handle GPU if available
        )
        
        # Configure watermarking
        WATERMARK_KEYS = [654, 400, 836, 123, 340, 443, 597, 160, 57, 789]
        watermarking_config = SynthIDTextWatermarkingConfig(
            keys=WATERMARK_KEYS,
            ngram_len=5,
            gamma=0.5,
        )
        
        return model, tokenizer, watermarking_config, "Model initialized successfully!"
    except Exception as e:
        return None, None, None, f"Error initializing model: {str(e)}"

class SynthIDApp:
    def __init__(self):
        self.model = None
        self.tokenizer = None
        self.watermarking_config = None
    
    def login(self, hf_token):
        """Login and initialize the model."""
        self.model, self.tokenizer, self.watermarking_config, message = initialize_model(hf_token)
        return message

    def apply_watermark(self, text):
        """Apply SynthID watermark to input text."""
        if not all([self.model, self.tokenizer, self.watermarking_config]):
            return text, "Error: Model not initialized. Please login first."
            
        try:
            # Tokenize input
            inputs = self.tokenizer(text, return_tensors="pt", truncation=True, max_length=512)
            inputs = {k: v.to(self.model.device) for k, v in inputs.items()}
            
            # Generate with watermark
            with torch.no_grad():
                outputs = self.model.generate(
                    **inputs,
                    watermarking_config=self.watermarking_config,
                    do_sample=True,
                    max_length=len(inputs["input_ids"][0]) + 100,
                    pad_token_id=self.tokenizer.eos_token_id,
                    temperature=0.7,
                    top_p=0.9
                )
            
            # Decode output
            watermarked_text = self.tokenizer.decode(outputs[0], skip_special_tokens=True)
            return watermarked_text, "Watermark applied successfully!"
        except Exception as e:
            return text, f"Error applying watermark: {str(e)}"

    def analyze_text(self, text):
        """Analyze text characteristics."""
        try:
            total_words = len(text.split())
            avg_word_length = sum(len(word) for word in text.split()) / total_words if total_words > 0 else 0
            
            analysis = f"""Text Analysis:
- Total words: {total_words}
- Average word length: {avg_word_length:.2f}

Note: This is a basic analysis. The official SynthID detector is not yet available in the public transformers package."""
            
            return analysis
        except Exception as e:
            return f"Error analyzing text: {str(e)}"

# Create Gradio interface
app_instance = SynthIDApp()

with gr.Blocks(title="SynthID Text Watermarking Tool") as app:
    gr.Markdown("# SynthID Text Watermarking Tool")
    
    # Login section
    with gr.Row():
        hf_token = gr.Textbox(label="Enter Hugging Face Token", type="password")
        login_status = gr.Textbox(label="Login Status")
    login_btn = gr.Button("Login")
    login_btn.click(app_instance.login, inputs=[hf_token], outputs=[login_status])
    
    with gr.Tab("Apply Watermark"):
        with gr.Row():
            input_text = gr.Textbox(label="Input Text", lines=5)
            output_text = gr.Textbox(label="Watermarked Text", lines=5)
            status = gr.Textbox(label="Status")
        apply_btn = gr.Button("Apply Watermark")
        apply_btn.click(app_instance.apply_watermark, inputs=[input_text], outputs=[output_text, status])
    
    with gr.Tab("Analyze Text"):
        with gr.Row():
            analyze_input = gr.Textbox(label="Text to Analyze", lines=5)
            analyze_result = gr.Textbox(label="Analysis Result", lines=5)
        analyze_btn = gr.Button("Analyze Text")
        analyze_btn.click(app_instance.analyze_text, inputs=[analyze_input], outputs=[analyze_result])
    
    gr.Markdown("""
    ### Instructions:
    1. Enter your Hugging Face token and click Login
    2. Wait for the model to initialize
    3. Use the tabs to apply watermarks or analyze text
    
    ### Notes:
    - The watermark is designed to be imperceptible to humans
    - This demo only implements watermark application
    - The official detector will be available in future releases
    - For production use, use your own secure watermark keys
    - Your token is never stored and is only used for model access
    """)

# Launch the app
if __name__ == "__main__":
    app.launch()