File size: 7,619 Bytes
5ea9e86
9c1cc06
a3c284e
5ea9e86
a9e6964
 
9c1cc06
 
a9e6964
f03955d
a9e6964
 
9c1cc06
de4bf2e
 
a3c284e
9c1cc06
 
 
 
 
 
de4bf2e
 
a3c284e
9c1cc06
a3c284e
9c1cc06
a3c284e
9c1cc06
de4bf2e
 
 
 
 
 
 
 
 
a9e6964
f03955d
 
 
 
 
 
 
 
 
 
 
 
9c1cc06
 
 
a9e6964
 
f03955d
 
 
9c1cc06
 
 
 
 
 
 
 
 
 
 
 
 
a3c284e
a9e6964
9c1cc06
 
 
 
de4bf2e
 
a3c284e
9c1cc06
a9e6964
9c1cc06
 
 
 
 
 
 
f03955d
a9e6964
 
 
 
 
 
 
 
f03955d
a3c284e
a9e6964
f03955d
eb0691b
f03955d
eb0691b
a9e6964
 
 
 
 
5ea9e86
 
a9e6964
 
5ea9e86
 
9c1cc06
a9e6964
 
 
a3c284e
 
 
 
 
a9e6964
 
 
5ea9e86
 
 
f03955d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
180ea05
f03955d
 
 
 
 
5ea9e86
eb0691b
5ea9e86
a3c284e
 
 
 
 
eb0691b
 
a9e6964
5ea9e86
 
a9e6964
 
a3c284e
f03955d
a9e6964
180ea05
9c1cc06
a3c284e
180ea05
 
eb0691b
180ea05
a3c284e
5ea9e86
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
import gradio as gr
import requests
from transformers import SynthIDTextWatermarkingConfig

class SynthIDApp:
    def __init__(self):
        self.api_url = "https://api-inference.huggingface.co/models/google/gemma-2b"
        self.headers = None
        self.watermarking_config = None
        self.WATERMARK_KEYS = [654, 400, 836, 123, 340, 443, 597, 160, 57, 789]
    
    def login(self, hf_token):
        """Initialize the API headers with authentication."""
        if not hf_token or not hf_token.startswith('hf_'):
            return "Error: Please enter a valid Hugging Face token (starts with 'hf_')"
        try:
            self.headers = {"Authorization": f"Bearer {hf_token}"}
            
            # Test the connection with a simple query
            response = requests.post(
                self.api_url,
                headers=self.headers,
                json={"inputs": "Test", "parameters": {"max_new_tokens": 1}},
                timeout=10  # Add 10 second timeout
            )
            response.raise_for_status()
            
            return "API connection initialized successfully!"
        except Exception as e:
            self.headers = None
            error_msg = str(e)
            if "timeout" in error_msg.lower():
                return "Error: API connection timed out. Please try again."
            elif "forbidden" in error_msg.lower():
                return "Error: Invalid token or insufficient permissions."
            elif "not found" in error_msg.lower():
                return "Error: Model not found or unavailable."
            else:
                return f"Error initializing API: {error_msg}"

    def update_watermark_config(self, ngram_len):
        """Update the watermarking configuration with new ngram_len."""
        try:
            self.watermarking_config = SynthIDTextWatermarkingConfig(
                keys=self.WATERMARK_KEYS,
                ngram_len=ngram_len
            )
            return f"Watermark config updated: ngram_len = {ngram_len}"
        except Exception as e:
            return f"Error updating config: {str(e)}"

    def apply_watermark(self, text, ngram_len):
        """Apply SynthID watermark to input text using the inference API."""
        if not self.headers:
            return text, "Error: API not initialized. Please login first."
            
        try:
            # Update watermark config with current ngram_len
            self.update_watermark_config(ngram_len)
            
            # Prepare the API request parameters
            params = {
                "inputs": text,
                "parameters": {
                    "max_new_tokens": 100,
                    "do_sample": True,
                    "temperature": 0.7,
                    "top_p": 0.9,
                    "watermarking_config": {
                        "keys": self.watermarking_config.keys,
                        "ngram_len": self.watermarking_config.ngram_len
                    }
                }
            }
            
            # Make the API call
            response = requests.post(
                self.api_url,
                headers=self.headers,
                json=params,
                timeout=30  # Add 30 second timeout for generation
            )
            response.raise_for_status()
            
            # Extract the generated text
            result = response.json()
            if isinstance(result, list) and len(result) > 0:
                watermarked_text = result[0].get('generated_text', text)
            else:
                watermarked_text = text
                
            return watermarked_text, f"Watermark applied successfully! (ngram_len: {ngram_len})"
        except Exception as e:
            return text, f"Error applying watermark: {str(e)}"

    def analyze_text(self, text):
        """Analyze text characteristics."""
        try:
            total_words = len(text.split())
            avg_word_length = sum(len(word) for word in text.split()) / total_words if total_words > 0 else 0
            char_count = len(text)
            
            analysis = f"""Text Analysis:
- Total characters: {char_count}
- Total words: {total_words}
- Average word length: {avg_word_length:.2f}

Note: This is a basic analysis. The official SynthID detector is not yet available in the public transformers package."""
            
            return analysis
        except Exception as e:
            return f"Error analyzing text: {str(e)}"

# Create Gradio interface
app_instance = SynthIDApp()

with gr.Blocks(title="SynthID Text Watermarking Tool") as app:
    gr.Markdown("# SynthID Text Watermarking Tool")
    gr.Markdown("Using Hugging Face Inference API for faster processing")
    
    # Login section
    with gr.Row():
        hf_token = gr.Textbox(
            label="Enter Hugging Face Token", 
            type="password",
            placeholder="hf_..."
        )
        login_status = gr.Textbox(label="Login Status")
    login_btn = gr.Button("Login")
    login_btn.click(app_instance.login, inputs=[hf_token], outputs=[login_status])
    
    with gr.Tab("Apply Watermark"):
        with gr.Row():
            with gr.Column(scale=3):
                input_text = gr.Textbox(
                    label="Input Text", 
                    lines=5,
                    placeholder="Enter text to watermark..."
                )
                output_text = gr.Textbox(label="Watermarked Text", lines=5)
            with gr.Column(scale=1):
                ngram_len = gr.Slider(
                    label="N-gram Length",
                    minimum=2,
                    maximum=5,
                    step=1,
                    value=5,
                    info="Controls watermark detectability (2-5)"
                )
                status = gr.Textbox(label="Status")
        
        gr.Markdown("""
        ### N-gram Length Parameter:
        - Higher values (4-5): More detectable watermark, but more brittle to changes
        - Lower values (2-3): More robust to changes, but harder to detect
        - Default (5): Maximum detectability""")
        
        apply_btn = gr.Button("Apply Watermark")
        apply_btn.click(
            app_instance.apply_watermark, 
            inputs=[input_text, ngram_len], 
            outputs=[output_text, status]
        )
    
    with gr.Tab("Analyze Text"):
        with gr.Row():
            analyze_input = gr.Textbox(
                label="Text to Analyze", 
                lines=5,
                placeholder="Enter text to analyze..."
            )
            analyze_result = gr.Textbox(label="Analysis Result", lines=5)
        analyze_btn = gr.Button("Analyze Text")
        analyze_btn.click(app_instance.analyze_text, inputs=[analyze_input], outputs=[analyze_result])
    
    gr.Markdown("""
    ### Instructions:
    1. Enter your Hugging Face token and click Login
    2. Once connected, you can use the tabs to apply watermarks or analyze text
    3. Adjust the N-gram Length slider to control watermark characteristics
    
    ### Notes:
    - This version uses Hugging Face's Inference API for faster processing
    - No model download required - everything runs in the cloud
    - The watermark is designed to be imperceptible to humans
    - This demo only implements watermark application
    - The official detector will be available in future releases
    - For production use, use your own secure watermark keys
    - Your token is never stored and is only used for API access
    """)

# Launch the app
if __name__ == "__main__":
    app.launch()