cyberandy commited on
Commit
eb0691b
·
verified ·
1 Parent(s): c0cedf0

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +30 -27
app.py CHANGED
@@ -4,7 +4,6 @@ from transformers import (
4
  AutoModelForCausalLM,
5
  AutoTokenizer,
6
  SynthIDTextWatermarkingConfig,
7
- SynthIDTextBayesianDetector
8
  )
9
 
10
  # Initialize model and tokenizer
@@ -16,12 +15,10 @@ model = AutoModelForCausalLM.from_pretrained(MODEL_NAME)
16
  WATERMARK_KEYS = [654, 400, 836, 123, 340, 443, 597, 160, 57, 789] # Example keys
17
  watermarking_config = SynthIDTextWatermarkingConfig(
18
  keys=WATERMARK_KEYS,
19
- ngram_len=5
 
20
  )
21
 
22
- # Initialize detector
23
- detector = SynthIDTextBayesianDetector(watermarking_config)
24
-
25
  def apply_watermark(text):
26
  """Apply SynthID watermark to input text."""
27
  try:
@@ -35,7 +32,9 @@ def apply_watermark(text):
35
  watermarking_config=watermarking_config,
36
  do_sample=True,
37
  max_length=len(inputs["input_ids"][0]) + 100, # Add some extra tokens
38
- pad_token_id=tokenizer.eos_token_id
 
 
39
  )
40
 
41
  # Decode output
@@ -44,27 +43,30 @@ def apply_watermark(text):
44
  except Exception as e:
45
  return text, f"Error applying watermark: {str(e)}"
46
 
47
- def detect_watermark(text):
48
- """Detect if text contains SynthID watermark."""
49
  try:
50
- # Get detection score
51
- score = detector.detect(text)
52
-
53
- # Interpret results
54
- threshold = 0.5 # You can adjust this threshold
55
- is_watermarked = score > threshold
56
 
57
- result = f"Watermark Detection Score: {score:.3f}\n"
58
- result += f"Verdict: {'WATERMARK DETECTED' if is_watermarked else 'NO WATERMARK DETECTED'}"
 
 
 
 
 
59
 
60
- return result
61
  except Exception as e:
62
- return f"Error detecting watermark: {str(e)}"
63
 
64
  # Create Gradio interface
65
  with gr.Blocks(title="SynthID Text Watermarking Tool") as app:
66
  gr.Markdown("# SynthID Text Watermarking Tool")
67
- gr.Markdown("Apply and detect SynthID watermarks in text")
 
68
 
69
  with gr.Tab("Apply Watermark"):
70
  with gr.Row():
@@ -74,18 +76,19 @@ with gr.Blocks(title="SynthID Text Watermarking Tool") as app:
74
  apply_btn = gr.Button("Apply Watermark")
75
  apply_btn.click(apply_watermark, inputs=[input_text], outputs=[output_text, status])
76
 
77
- with gr.Tab("Detect Watermark"):
78
  with gr.Row():
79
- detect_input = gr.Textbox(label="Text to Check", lines=5)
80
- detect_result = gr.Textbox(label="Detection Result", lines=3)
81
- detect_btn = gr.Button("Detect Watermark")
82
- detect_btn.click(detect_watermark, inputs=[detect_input], outputs=[detect_result])
83
 
84
  gr.Markdown("""
85
  ### Notes:
86
- - The watermark is designed to be imperceptible to humans but detectable by the classifier
87
- - Detection scores above 0.5 indicate likely presence of a watermark
88
- - The watermark is somewhat robust to minor text modifications but may not survive major changes
 
89
  """)
90
 
91
  # Launch the app
 
4
  AutoModelForCausalLM,
5
  AutoTokenizer,
6
  SynthIDTextWatermarkingConfig,
 
7
  )
8
 
9
  # Initialize model and tokenizer
 
15
  WATERMARK_KEYS = [654, 400, 836, 123, 340, 443, 597, 160, 57, 789] # Example keys
16
  watermarking_config = SynthIDTextWatermarkingConfig(
17
  keys=WATERMARK_KEYS,
18
+ ngram_len=5,
19
+ gamma=0.5, # Additional parameter to control watermark strength
20
  )
21
 
 
 
 
22
  def apply_watermark(text):
23
  """Apply SynthID watermark to input text."""
24
  try:
 
32
  watermarking_config=watermarking_config,
33
  do_sample=True,
34
  max_length=len(inputs["input_ids"][0]) + 100, # Add some extra tokens
35
+ pad_token_id=tokenizer.eos_token_id,
36
+ temperature=0.7, # Add some randomness to generation
37
+ top_p=0.9
38
  )
39
 
40
  # Decode output
 
43
  except Exception as e:
44
  return text, f"Error applying watermark: {str(e)}"
45
 
46
+ def analyze_text(text):
47
+ """Analyze text characteristics that might indicate watermarking."""
48
  try:
49
+ # Basic text analysis (since we don't have access to the detector yet)
50
+ total_words = len(text.split())
51
+ avg_word_length = sum(len(word) for word in text.split()) / total_words if total_words > 0 else 0
 
 
 
52
 
53
+ # Create analysis report
54
+ analysis = f"""Text Analysis:
55
+ - Total words: {total_words}
56
+ - Average word length: {avg_word_length:.2f}
57
+
58
+ Note: This is a basic analysis. The official SynthID detector is not yet available in the public transformers package.
59
+ For proper watermark detection, please refer to the official Google DeepMind implementation when it becomes available."""
60
 
61
+ return analysis
62
  except Exception as e:
63
+ return f"Error analyzing text: {str(e)}"
64
 
65
  # Create Gradio interface
66
  with gr.Blocks(title="SynthID Text Watermarking Tool") as app:
67
  gr.Markdown("# SynthID Text Watermarking Tool")
68
+ gr.Markdown("""This demo shows how to apply SynthID watermarks to text.
69
+ Note: The official detector is not yet publicly available.""")
70
 
71
  with gr.Tab("Apply Watermark"):
72
  with gr.Row():
 
76
  apply_btn = gr.Button("Apply Watermark")
77
  apply_btn.click(apply_watermark, inputs=[input_text], outputs=[output_text, status])
78
 
79
+ with gr.Tab("Analyze Text"):
80
  with gr.Row():
81
+ analyze_input = gr.Textbox(label="Text to Analyze", lines=5)
82
+ analyze_result = gr.Textbox(label="Analysis Result", lines=5)
83
+ analyze_btn = gr.Button("Analyze Text")
84
+ analyze_btn.click(analyze_text, inputs=[analyze_input], outputs=[analyze_result])
85
 
86
  gr.Markdown("""
87
  ### Notes:
88
+ - The watermark is designed to be imperceptible to humans
89
+ - This demo only implements watermark application
90
+ - The official detector will be available in future releases
91
+ - For production use, use your own secure watermark keys
92
  """)
93
 
94
  # Launch the app