cyberandy commited on
Commit
5ea9e86
·
verified ·
1 Parent(s): 94ca90c

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +93 -0
app.py ADDED
@@ -0,0 +1,93 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import torch
3
+ from transformers import (
4
+ AutoModelForCausalLM,
5
+ AutoTokenizer,
6
+ SynthIDTextWatermarkingConfig,
7
+ SynthIDTextBayesianDetector
8
+ )
9
+
10
+ # Initialize model and tokenizer
11
+ MODEL_NAME = "google/gemma-2b" # You can change this to your preferred model
12
+ tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
13
+ model = AutoModelForCausalLM.from_pretrained(MODEL_NAME)
14
+
15
+ # Configure watermarking
16
+ WATERMARK_KEYS = [654, 400, 836, 123, 340, 443, 597, 160, 57, 789] # Example keys
17
+ watermarking_config = SynthIDTextWatermarkingConfig(
18
+ keys=WATERMARK_KEYS,
19
+ ngram_len=5
20
+ )
21
+
22
+ # Initialize detector
23
+ detector = SynthIDTextBayesianDetector(watermarking_config)
24
+
25
+ def apply_watermark(text):
26
+ """Apply SynthID watermark to input text."""
27
+ try:
28
+ # Tokenize input
29
+ inputs = tokenizer(text, return_tensors="pt", truncation=True, max_length=512)
30
+
31
+ # Generate with watermark
32
+ with torch.no_grad():
33
+ outputs = model.generate(
34
+ **inputs,
35
+ watermarking_config=watermarking_config,
36
+ do_sample=True,
37
+ max_length=len(inputs["input_ids"][0]) + 100, # Add some extra tokens
38
+ pad_token_id=tokenizer.eos_token_id
39
+ )
40
+
41
+ # Decode output
42
+ watermarked_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
43
+ return watermarked_text, "Watermark applied successfully!"
44
+ except Exception as e:
45
+ return text, f"Error applying watermark: {str(e)}"
46
+
47
+ def detect_watermark(text):
48
+ """Detect if text contains SynthID watermark."""
49
+ try:
50
+ # Get detection score
51
+ score = detector.detect(text)
52
+
53
+ # Interpret results
54
+ threshold = 0.5 # You can adjust this threshold
55
+ is_watermarked = score > threshold
56
+
57
+ result = f"Watermark Detection Score: {score:.3f}\n"
58
+ result += f"Verdict: {'WATERMARK DETECTED' if is_watermarked else 'NO WATERMARK DETECTED'}"
59
+
60
+ return result
61
+ except Exception as e:
62
+ return f"Error detecting watermark: {str(e)}"
63
+
64
+ # Create Gradio interface
65
+ with gr.Blocks(title="SynthID Text Watermarking Tool") as app:
66
+ gr.Markdown("# SynthID Text Watermarking Tool")
67
+ gr.Markdown("Apply and detect SynthID watermarks in text")
68
+
69
+ with gr.Tab("Apply Watermark"):
70
+ with gr.Row():
71
+ input_text = gr.Textbox(label="Input Text", lines=5)
72
+ output_text = gr.Textbox(label="Watermarked Text", lines=5)
73
+ status = gr.Textbox(label="Status")
74
+ apply_btn = gr.Button("Apply Watermark")
75
+ apply_btn.click(apply_watermark, inputs=[input_text], outputs=[output_text, status])
76
+
77
+ with gr.Tab("Detect Watermark"):
78
+ with gr.Row():
79
+ detect_input = gr.Textbox(label="Text to Check", lines=5)
80
+ detect_result = gr.Textbox(label="Detection Result", lines=3)
81
+ detect_btn = gr.Button("Detect Watermark")
82
+ detect_btn.click(detect_watermark, inputs=[detect_input], outputs=[detect_result])
83
+
84
+ gr.Markdown("""
85
+ ### Notes:
86
+ - The watermark is designed to be imperceptible to humans but detectable by the classifier
87
+ - Detection scores above 0.5 indicate likely presence of a watermark
88
+ - The watermark is somewhat robust to minor text modifications but may not survive major changes
89
+ """)
90
+
91
+ # Launch the app
92
+ if __name__ == "__main__":
93
+ app.launch()