cyberandy commited on
Commit
180ea05
·
verified ·
1 Parent(s): ebd0825

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +17 -53
app.py CHANGED
@@ -7,37 +7,23 @@ from transformers import (
7
  )
8
 
9
  # Initialize model and tokenizer
10
- MODEL_NAME = "mistralai/Mistral-7B-Instruct-v0.2" # Changed to open-access model
11
- print(f"Loading model and tokenizer from {MODEL_NAME}...")
12
-
13
  tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
14
- model = AutoModelForCausalLM.from_pretrained(
15
- MODEL_NAME,
16
- torch_dtype=torch.float16, # Use half precision to reduce memory usage
17
- device_map="auto" # Automatically handle device placement
18
- )
19
 
20
  # Configure watermarking
21
  WATERMARK_KEYS = [654, 400, 836, 123, 340, 443, 597, 160, 57, 789] # Example keys
22
  watermarking_config = SynthIDTextWatermarkingConfig(
23
  keys=WATERMARK_KEYS,
24
  ngram_len=5,
25
- gamma=0.5, # Controls watermark strength
26
  )
27
 
28
- def format_prompt(text):
29
- """Format the prompt for Mistral instruction model."""
30
- return f"<s>[INST] {text} [/INST]"
31
-
32
  def apply_watermark(text):
33
  """Apply SynthID watermark to input text."""
34
  try:
35
- # Format the prompt for Mistral
36
- formatted_text = format_prompt(text)
37
-
38
  # Tokenize input
39
- inputs = tokenizer(formatted_text, return_tensors="pt", truncation=True, max_length=512)
40
- inputs = {k: v.to(model.device) for k, v in inputs.items()}
41
 
42
  # Generate with watermark
43
  with torch.no_grad():
@@ -45,16 +31,14 @@ def apply_watermark(text):
45
  **inputs,
46
  watermarking_config=watermarking_config,
47
  do_sample=True,
48
- max_length=len(inputs["input_ids"][0]) + 200, # Add more tokens for generation
49
  pad_token_id=tokenizer.eos_token_id,
50
- temperature=0.7,
51
  top_p=0.9
52
  )
53
 
54
  # Decode output
55
  watermarked_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
56
- # Remove the instruction prompt from the output
57
- watermarked_text = watermarked_text.replace(text, "").strip()
58
  return watermarked_text, "Watermark applied successfully!"
59
  except Exception as e:
60
  return text, f"Error applying watermark: {str(e)}"
@@ -62,20 +46,17 @@ def apply_watermark(text):
62
  def analyze_text(text):
63
  """Analyze text characteristics that might indicate watermarking."""
64
  try:
65
- # Basic text analysis
66
  total_words = len(text.split())
67
  avg_word_length = sum(len(word) for word in text.split()) / total_words if total_words > 0 else 0
68
- sentences = text.split('.')
69
- avg_sentence_length = sum(len(s.split()) for s in sentences if s.strip()) / len(sentences) if sentences else 0
70
 
71
  # Create analysis report
72
  analysis = f"""Text Analysis:
73
  - Total words: {total_words}
74
  - Average word length: {avg_word_length:.2f}
75
- - Average sentence length: {avg_sentence_length:.2f} words
76
 
77
  Note: This is a basic analysis. The official SynthID detector is not yet available in the public transformers package.
78
- For proper watermark detection, please refer to the official implementation when it becomes available."""
79
 
80
  return analysis
81
  except Exception as e:
@@ -84,47 +65,30 @@ For proper watermark detection, please refer to the official implementation when
84
  # Create Gradio interface
85
  with gr.Blocks(title="SynthID Text Watermarking Tool") as app:
86
  gr.Markdown("# SynthID Text Watermarking Tool")
87
- gr.Markdown("""This demo shows how to apply SynthID watermarks to text using Mistral-7B-Instruct-v0.2.
88
  Note: The official detector is not yet publicly available.""")
89
 
90
  with gr.Tab("Apply Watermark"):
91
  with gr.Row():
92
- input_text = gr.Textbox(
93
- label="Input Text (Prompt)",
94
- lines=5,
95
- placeholder="Enter text you want to watermark..."
96
- )
97
- output_text = gr.Textbox(
98
- label="Generated Text with Watermark",
99
- lines=5
100
- )
101
  status = gr.Textbox(label="Status")
102
- apply_btn = gr.Button("Generate with Watermark")
103
  apply_btn.click(apply_watermark, inputs=[input_text], outputs=[output_text, status])
104
 
105
  with gr.Tab("Analyze Text"):
106
  with gr.Row():
107
- analyze_input = gr.Textbox(
108
- label="Text to Analyze",
109
- lines=5,
110
- placeholder="Enter text to analyze..."
111
- )
112
  analyze_result = gr.Textbox(label="Analysis Result", lines=5)
113
  analyze_btn = gr.Button("Analyze Text")
114
  analyze_btn.click(analyze_text, inputs=[analyze_input], outputs=[analyze_result])
115
 
116
  gr.Markdown("""
117
- ### Usage Notes:
118
- 1. Enter a prompt in the "Input Text" box
119
- 2. Click "Generate with Watermark" to create watermarked text
120
- 3. The model will generate a response with an embedded watermark
121
- 4. The watermark is designed to be imperceptible to humans
122
-
123
- ### Technical Notes:
124
- - Using Mistral-7B-Instruct-v0.2 model
125
- - Half-precision (float16) for efficient memory usage
126
- - Automatic device placement (CPU/GPU)
127
  - The official detector will be available in future releases
 
128
  """)
129
 
130
  # Launch the app
 
7
  )
8
 
9
  # Initialize model and tokenizer
10
+ MODEL_NAME = "google/gemma-2b" # You can change this to your preferred model
 
 
11
  tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
12
+ model = AutoModelForCausalLM.from_pretrained(MODEL_NAME)
 
 
 
 
13
 
14
  # Configure watermarking
15
  WATERMARK_KEYS = [654, 400, 836, 123, 340, 443, 597, 160, 57, 789] # Example keys
16
  watermarking_config = SynthIDTextWatermarkingConfig(
17
  keys=WATERMARK_KEYS,
18
  ngram_len=5,
19
+ gamma=0.5, # Additional parameter to control watermark strength
20
  )
21
 
 
 
 
 
22
  def apply_watermark(text):
23
  """Apply SynthID watermark to input text."""
24
  try:
 
 
 
25
  # Tokenize input
26
+ inputs = tokenizer(text, return_tensors="pt", truncation=True, max_length=512)
 
27
 
28
  # Generate with watermark
29
  with torch.no_grad():
 
31
  **inputs,
32
  watermarking_config=watermarking_config,
33
  do_sample=True,
34
+ max_length=len(inputs["input_ids"][0]) + 100, # Add some extra tokens
35
  pad_token_id=tokenizer.eos_token_id,
36
+ temperature=0.7, # Add some randomness to generation
37
  top_p=0.9
38
  )
39
 
40
  # Decode output
41
  watermarked_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
 
 
42
  return watermarked_text, "Watermark applied successfully!"
43
  except Exception as e:
44
  return text, f"Error applying watermark: {str(e)}"
 
46
  def analyze_text(text):
47
  """Analyze text characteristics that might indicate watermarking."""
48
  try:
49
+ # Basic text analysis (since we don't have access to the detector yet)
50
  total_words = len(text.split())
51
  avg_word_length = sum(len(word) for word in text.split()) / total_words if total_words > 0 else 0
 
 
52
 
53
  # Create analysis report
54
  analysis = f"""Text Analysis:
55
  - Total words: {total_words}
56
  - Average word length: {avg_word_length:.2f}
 
57
 
58
  Note: This is a basic analysis. The official SynthID detector is not yet available in the public transformers package.
59
+ For proper watermark detection, please refer to the official Google DeepMind implementation when it becomes available."""
60
 
61
  return analysis
62
  except Exception as e:
 
65
  # Create Gradio interface
66
  with gr.Blocks(title="SynthID Text Watermarking Tool") as app:
67
  gr.Markdown("# SynthID Text Watermarking Tool")
68
+ gr.Markdown("""This demo shows how to apply SynthID watermarks to text.
69
  Note: The official detector is not yet publicly available.""")
70
 
71
  with gr.Tab("Apply Watermark"):
72
  with gr.Row():
73
+ input_text = gr.Textbox(label="Input Text", lines=5)
74
+ output_text = gr.Textbox(label="Watermarked Text", lines=5)
 
 
 
 
 
 
 
75
  status = gr.Textbox(label="Status")
76
+ apply_btn = gr.Button("Apply Watermark")
77
  apply_btn.click(apply_watermark, inputs=[input_text], outputs=[output_text, status])
78
 
79
  with gr.Tab("Analyze Text"):
80
  with gr.Row():
81
+ analyze_input = gr.Textbox(label="Text to Analyze", lines=5)
 
 
 
 
82
  analyze_result = gr.Textbox(label="Analysis Result", lines=5)
83
  analyze_btn = gr.Button("Analyze Text")
84
  analyze_btn.click(analyze_text, inputs=[analyze_input], outputs=[analyze_result])
85
 
86
  gr.Markdown("""
87
+ ### Notes:
88
+ - The watermark is designed to be imperceptible to humans
89
+ - This demo only implements watermark application
 
 
 
 
 
 
 
90
  - The official detector will be available in future releases
91
+ - For production use, use your own secure watermark keys
92
  """)
93
 
94
  # Launch the app