cyberandy commited on
Commit
fee0baa
·
verified ·
1 Parent(s): 5ed2c98

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +16 -12
app.py CHANGED
@@ -34,14 +34,16 @@ class SynthIDApp:
34
  try:
35
  # Prepare the API request parameters
36
  # Prepare the API request parameters for watermarking
37
- prompt = f"<s>[INST] Rewrite this text exactly as is: {text} [/INST]"
38
  params = {
39
  "inputs": prompt,
40
  "parameters": {
41
- "max_new_tokens": len(text.split()) * 2, # Allow some flexibility
 
42
  "do_sample": True,
43
- "temperature": 0.1, # Lower temperature for more faithful reproduction
44
- "top_p": 0.9,
 
45
  "watermarking_config": {
46
  "keys": self.WATERMARK_KEYS,
47
  "ngram_len": int(ngram_len)
@@ -75,17 +77,19 @@ class SynthIDApp:
75
  watermarked_text = result[0].get('generated_text', '').strip()
76
 
77
  # Clean up Mistral's response format
78
- watermarked_text = watermarked_text.replace("<s>[INST]", "").replace("[/INST]", "")
79
- watermarked_text = watermarked_text.replace("Rewrite this text exactly as is:", "").strip()
80
 
81
  if not watermarked_text:
82
  return text, "Error: No watermarked text generated"
83
 
84
- # Add basic length validation
85
- if len(watermarked_text) < len(text) * 0.5:
86
- return text, "Error: Generated text too short"
87
- if len(watermarked_text) > len(text) * 1.5:
88
- return text, "Error: Generated text too long"
 
 
 
89
 
90
  return watermarked_text, f"Watermark applied successfully! (ngram_len: {ngram_len})"
91
  else:
@@ -138,7 +142,7 @@ with gr.Blocks(title="SynthID Text Watermarking Tool") as app:
138
  label="Input Text",
139
  lines=5,
140
  placeholder="Enter text to watermark...",
141
- value="The quick brown fox jumps over the lazy dog."
142
  )
143
  output_text = gr.Textbox(label="Watermarked Text", lines=5)
144
  with gr.Column(scale=1):
 
34
  try:
35
  # Prepare the API request parameters
36
  # Prepare the API request parameters for watermarking
37
+ prompt = f"<s>[INST] Apply watermark to this text: {text} [/INST]"
38
  params = {
39
  "inputs": prompt,
40
  "parameters": {
41
+ "max_new_tokens": len(text.split()) + 10, # Limit to input length plus some buffer
42
+ "min_new_tokens": len(text.split()), # At least return same length
43
  "do_sample": True,
44
+ "temperature": 0.1, # Low temperature for more faithful reproduction
45
+ "top_p": 0.95,
46
+ "repetition_penalty": 1.2, # Prevent repetitions
47
  "watermarking_config": {
48
  "keys": self.WATERMARK_KEYS,
49
  "ngram_len": int(ngram_len)
 
77
  watermarked_text = result[0].get('generated_text', '').strip()
78
 
79
  # Clean up Mistral's response format
80
+ watermarked_text = watermarked_text.split("[/INST]")[-1].strip()
 
81
 
82
  if not watermarked_text:
83
  return text, "Error: No watermarked text generated"
84
 
85
+ # More precise length validation
86
+ input_words = len(text.split())
87
+ output_words = len(watermarked_text.split())
88
+
89
+ if output_words < input_words * 0.8:
90
+ return text, f"Error: Generated text too short ({output_words} vs {input_words} words)"
91
+ if output_words > input_words * 1.2:
92
+ return text, f"Error: Generated text too long ({output_words} vs {input_words} words)"
93
 
94
  return watermarked_text, f"Watermark applied successfully! (ngram_len: {ngram_len})"
95
  else:
 
142
  label="Input Text",
143
  lines=5,
144
  placeholder="Enter text to watermark...",
145
+ value="This is a test sentence for the watermarking system."
146
  )
147
  output_text = gr.Textbox(label="Watermarked Text", lines=5)
148
  with gr.Column(scale=1):