cyberandy commited on
Commit
4827b54
·
verified ·
1 Parent(s): fee0baa

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +27 -13
app.py CHANGED
@@ -34,16 +34,21 @@ class SynthIDApp:
34
  try:
35
  # Prepare the API request parameters
36
  # Prepare the API request parameters for watermarking
37
- prompt = f"<s>[INST] Apply watermark to this text: {text} [/INST]"
 
 
 
 
38
  params = {
39
  "inputs": prompt,
40
  "parameters": {
41
- "max_new_tokens": len(text.split()) + 10, # Limit to input length plus some buffer
42
- "min_new_tokens": len(text.split()), # At least return same length
43
  "do_sample": True,
44
- "temperature": 0.1, # Low temperature for more faithful reproduction
45
- "top_p": 0.95,
46
- "repetition_penalty": 1.2, # Prevent repetitions
 
47
  "watermarking_config": {
48
  "keys": self.WATERMARK_KEYS,
49
  "ngram_len": int(ngram_len)
@@ -76,20 +81,29 @@ class SynthIDApp:
76
 
77
  watermarked_text = result[0].get('generated_text', '').strip()
78
 
79
- # Clean up Mistral's response format
80
- watermarked_text = watermarked_text.split("[/INST]")[-1].strip()
 
 
 
 
 
 
 
81
 
82
  if not watermarked_text:
83
  return text, "Error: No watermarked text generated"
84
 
85
- # More precise length validation
86
  input_words = len(text.split())
87
  output_words = len(watermarked_text.split())
88
 
89
- if output_words < input_words * 0.8:
90
- return text, f"Error: Generated text too short ({output_words} vs {input_words} words)"
91
- if output_words > input_words * 1.2:
92
- return text, f"Error: Generated text too long ({output_words} vs {input_words} words)"
 
 
93
 
94
  return watermarked_text, f"Watermark applied successfully! (ngram_len: {ngram_len})"
95
  else:
 
34
  try:
35
  # Prepare the API request parameters
36
  # Prepare the API request parameters for watermarking
37
+ prompt = f"<s>[INST] Apply watermark to the following text WITHOUT adding any extra words or changing the meaning: {text} [/INST]"
38
+
39
+ # Calculate exact token limits
40
+ input_length = len(text.split())
41
+
42
  params = {
43
  "inputs": prompt,
44
  "parameters": {
45
+ "max_new_tokens": input_length + 2, # Very strict limit
46
+ "min_new_tokens": input_length - 2, # Allow tiny variations
47
  "do_sample": True,
48
+ "temperature": 0.01, # Almost deterministic
49
+ "top_p": 0.99,
50
+ "repetition_penalty": 1.5, # Strongly prevent repetitions
51
+ "stop": ["[/INST]", "\n", "."], # Stop at natural boundaries
52
  "watermarking_config": {
53
  "keys": self.WATERMARK_KEYS,
54
  "ngram_len": int(ngram_len)
 
81
 
82
  watermarked_text = result[0].get('generated_text', '').strip()
83
 
84
+ # Clean up Mistral's response format and extract only the relevant part
85
+ parts = watermarked_text.split("[/INST]")
86
+ if len(parts) > 1:
87
+ watermarked_text = parts[-1].strip()
88
+
89
+ # Remove any trailing periods or extra spaces
90
+ watermarked_text = watermarked_text.rstrip('.')
91
+ while ' ' in watermarked_text:
92
+ watermarked_text = watermarked_text.replace(' ', ' ')
93
 
94
  if not watermarked_text:
95
  return text, "Error: No watermarked text generated"
96
 
97
+ # Very strict length validation
98
  input_words = len(text.split())
99
  output_words = len(watermarked_text.split())
100
 
101
+ if output_words != input_words:
102
+ return text, f"Error: Length mismatch ({output_words} vs {input_words} words)"
103
+
104
+ # Add back the period if the original had one
105
+ if text.strip().endswith('.'):
106
+ watermarked_text += '.'
107
 
108
  return watermarked_text, f"Watermark applied successfully! (ngram_len: {ngram_len})"
109
  else: