cyberandy commited on
Commit
a9e6964
·
verified ·
1 Parent(s): a30b4fd

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +92 -51
app.py CHANGED
@@ -1,72 +1,107 @@
1
  import gradio as gr
2
  import torch
 
3
  from transformers import (
4
  AutoModelForCausalLM,
5
  AutoTokenizer,
6
  SynthIDTextWatermarkingConfig,
7
  )
 
8
 
9
- # Initialize model and tokenizer
10
- MODEL_NAME = "google/gemma-2-2b" # You can change this to your preferred model
11
- tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
12
- model = AutoModelForCausalLM.from_pretrained(MODEL_NAME)
13
-
14
- # Configure watermarking
15
- WATERMARK_KEYS = [654, 400, 836, 123, 340, 443, 597, 160, 57, 789] # Example keys
16
- watermarking_config = SynthIDTextWatermarkingConfig(
17
- keys=WATERMARK_KEYS,
18
- ngram_len=5,
19
- gamma=0.5, # Additional parameter to control watermark strength
20
- )
21
-
22
- def apply_watermark(text):
23
- """Apply SynthID watermark to input text."""
24
  try:
25
- # Tokenize input
26
- inputs = tokenizer(text, return_tensors="pt", truncation=True, max_length=512)
 
 
 
 
 
 
 
 
 
27
 
28
- # Generate with watermark
29
- with torch.no_grad():
30
- outputs = model.generate(
31
- **inputs,
32
- watermarking_config=watermarking_config,
33
- do_sample=True,
34
- max_length=len(inputs["input_ids"][0]) + 100, # Add some extra tokens
35
- pad_token_id=tokenizer.eos_token_id,
36
- temperature=0.7, # Add some randomness to generation
37
- top_p=0.9
38
- )
39
 
40
- # Decode output
41
- watermarked_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
42
- return watermarked_text, "Watermark applied successfully!"
43
  except Exception as e:
44
- return text, f"Error applying watermark: {str(e)}"
45
 
46
- def analyze_text(text):
47
- """Analyze text characteristics that might indicate watermarking."""
48
- try:
49
- # Basic text analysis (since we don't have access to the detector yet)
50
- total_words = len(text.split())
51
- avg_word_length = sum(len(word) for word in text.split()) / total_words if total_words > 0 else 0
52
-
53
- # Create analysis report
54
- analysis = f"""Text Analysis:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
55
  - Total words: {total_words}
56
  - Average word length: {avg_word_length:.2f}
57
 
58
- Note: This is a basic analysis. The official SynthID detector is not yet available in the public transformers package.
59
- For proper watermark detection, please refer to the official Google DeepMind implementation when it becomes available."""
60
-
61
- return analysis
62
- except Exception as e:
63
- return f"Error analyzing text: {str(e)}"
64
 
65
  # Create Gradio interface
 
 
66
  with gr.Blocks(title="SynthID Text Watermarking Tool") as app:
67
  gr.Markdown("# SynthID Text Watermarking Tool")
68
- gr.Markdown("""This demo shows how to apply SynthID watermarks to text.
69
- Note: The official detector is not yet publicly available.""")
 
 
 
 
 
70
 
71
  with gr.Tab("Apply Watermark"):
72
  with gr.Row():
@@ -74,21 +109,27 @@ with gr.Blocks(title="SynthID Text Watermarking Tool") as app:
74
  output_text = gr.Textbox(label="Watermarked Text", lines=5)
75
  status = gr.Textbox(label="Status")
76
  apply_btn = gr.Button("Apply Watermark")
77
- apply_btn.click(apply_watermark, inputs=[input_text], outputs=[output_text, status])
78
 
79
  with gr.Tab("Analyze Text"):
80
  with gr.Row():
81
  analyze_input = gr.Textbox(label="Text to Analyze", lines=5)
82
  analyze_result = gr.Textbox(label="Analysis Result", lines=5)
83
  analyze_btn = gr.Button("Analyze Text")
84
- analyze_btn.click(analyze_text, inputs=[analyze_input], outputs=[analyze_result])
85
 
86
  gr.Markdown("""
 
 
 
 
 
87
  ### Notes:
88
  - The watermark is designed to be imperceptible to humans
89
  - This demo only implements watermark application
90
  - The official detector will be available in future releases
91
  - For production use, use your own secure watermark keys
 
92
  """)
93
 
94
  # Launch the app
 
1
  import gradio as gr
2
  import torch
3
+ import os
4
  from transformers import (
5
  AutoModelForCausalLM,
6
  AutoTokenizer,
7
  SynthIDTextWatermarkingConfig,
8
  )
9
+ from huggingface_hub import login
10
 
11
+ def initialize_model(hf_token):
12
+ """Initialize the model and tokenizer with authentication."""
 
 
 
 
 
 
 
 
 
 
 
 
 
13
  try:
14
+ # Login to Hugging Face
15
+ login(token=hf_token)
16
+
17
+ # Initialize model and tokenizer with auth token
18
+ MODEL_NAME = "google/gemma-2b"
19
+ tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, token=hf_token)
20
+ model = AutoModelForCausalLM.from_pretrained(
21
+ MODEL_NAME,
22
+ token=hf_token,
23
+ device_map="auto" # This will automatically handle GPU if available
24
+ )
25
 
26
+ # Configure watermarking
27
+ WATERMARK_KEYS = [654, 400, 836, 123, 340, 443, 597, 160, 57, 789]
28
+ watermarking_config = SynthIDTextWatermarkingConfig(
29
+ keys=WATERMARK_KEYS,
30
+ ngram_len=5,
31
+ gamma=0.5,
32
+ )
 
 
 
 
33
 
34
+ return model, tokenizer, watermarking_config, "Model initialized successfully!"
 
 
35
  except Exception as e:
36
+ return None, None, None, f"Error initializing model: {str(e)}"
37
 
38
+ class SynthIDApp:
39
+ def __init__(self):
40
+ self.model = None
41
+ self.tokenizer = None
42
+ self.watermarking_config = None
43
+
44
+ def login(self, hf_token):
45
+ """Login and initialize the model."""
46
+ self.model, self.tokenizer, self.watermarking_config, message = initialize_model(hf_token)
47
+ return message
48
+
49
+ def apply_watermark(self, text):
50
+ """Apply SynthID watermark to input text."""
51
+ if not all([self.model, self.tokenizer, self.watermarking_config]):
52
+ return text, "Error: Model not initialized. Please login first."
53
+
54
+ try:
55
+ # Tokenize input
56
+ inputs = self.tokenizer(text, return_tensors="pt", truncation=True, max_length=512)
57
+ inputs = {k: v.to(self.model.device) for k, v in inputs.items()}
58
+
59
+ # Generate with watermark
60
+ with torch.no_grad():
61
+ outputs = self.model.generate(
62
+ **inputs,
63
+ watermarking_config=self.watermarking_config,
64
+ do_sample=True,
65
+ max_length=len(inputs["input_ids"][0]) + 100,
66
+ pad_token_id=self.tokenizer.eos_token_id,
67
+ temperature=0.7,
68
+ top_p=0.9
69
+ )
70
+
71
+ # Decode output
72
+ watermarked_text = self.tokenizer.decode(outputs[0], skip_special_tokens=True)
73
+ return watermarked_text, "Watermark applied successfully!"
74
+ except Exception as e:
75
+ return text, f"Error applying watermark: {str(e)}"
76
+
77
+ def analyze_text(self, text):
78
+ """Analyze text characteristics."""
79
+ try:
80
+ total_words = len(text.split())
81
+ avg_word_length = sum(len(word) for word in text.split()) / total_words if total_words > 0 else 0
82
+
83
+ analysis = f"""Text Analysis:
84
  - Total words: {total_words}
85
  - Average word length: {avg_word_length:.2f}
86
 
87
+ Note: This is a basic analysis. The official SynthID detector is not yet available in the public transformers package."""
88
+
89
+ return analysis
90
+ except Exception as e:
91
+ return f"Error analyzing text: {str(e)}"
 
92
 
93
  # Create Gradio interface
94
+ app_instance = SynthIDApp()
95
+
96
  with gr.Blocks(title="SynthID Text Watermarking Tool") as app:
97
  gr.Markdown("# SynthID Text Watermarking Tool")
98
+
99
+ # Login section
100
+ with gr.Row():
101
+ hf_token = gr.Textbox(label="Enter Hugging Face Token", type="password")
102
+ login_status = gr.Textbox(label="Login Status")
103
+ login_btn = gr.Button("Login")
104
+ login_btn.click(app_instance.login, inputs=[hf_token], outputs=[login_status])
105
 
106
  with gr.Tab("Apply Watermark"):
107
  with gr.Row():
 
109
  output_text = gr.Textbox(label="Watermarked Text", lines=5)
110
  status = gr.Textbox(label="Status")
111
  apply_btn = gr.Button("Apply Watermark")
112
+ apply_btn.click(app_instance.apply_watermark, inputs=[input_text], outputs=[output_text, status])
113
 
114
  with gr.Tab("Analyze Text"):
115
  with gr.Row():
116
  analyze_input = gr.Textbox(label="Text to Analyze", lines=5)
117
  analyze_result = gr.Textbox(label="Analysis Result", lines=5)
118
  analyze_btn = gr.Button("Analyze Text")
119
+ analyze_btn.click(app_instance.analyze_text, inputs=[analyze_input], outputs=[analyze_result])
120
 
121
  gr.Markdown("""
122
+ ### Instructions:
123
+ 1. Enter your Hugging Face token and click Login
124
+ 2. Wait for the model to initialize
125
+ 3. Use the tabs to apply watermarks or analyze text
126
+
127
  ### Notes:
128
  - The watermark is designed to be imperceptible to humans
129
  - This demo only implements watermark application
130
  - The official detector will be available in future releases
131
  - For production use, use your own secure watermark keys
132
+ - Your token is never stored and is only used for model access
133
  """)
134
 
135
  # Launch the app