Spaces:

WordLift
/

synthID

Sleeping

App Files Files Community

cyberandy commited on Oct 25, 2024

Commit

4827b54

verified ·

1 Parent(s): fee0baa

Update app.py

Browse files

Files changed (1) hide show

app.py +27 -13

app.py CHANGED Viewed

@@ -34,16 +34,21 @@ class SynthIDApp:
         try:
             # Prepare the API request parameters
             # Prepare the API request parameters for watermarking
-            prompt = f"<s>[INST] Apply watermark to this text: {text} [/INST]"
             params = {
                 "inputs": prompt,
                 "parameters": {
-                    "max_new_tokens": len(text.split()) + 10,  # Limit to input length plus some buffer
-                    "min_new_tokens": len(text.split()),       # At least return same length
                     "do_sample": True,
-                    "temperature": 0.1,     # Low temperature for more faithful reproduction
-                    "top_p": 0.95,
-                    "repetition_penalty": 1.2,  # Prevent repetitions
                     "watermarking_config": {
                         "keys": self.WATERMARK_KEYS,
                         "ngram_len": int(ngram_len)
@@ -76,20 +81,29 @@ class SynthIDApp:
                 watermarked_text = result[0].get('generated_text', '').strip()
-                # Clean up Mistral's response format
-                watermarked_text = watermarked_text.split("[/INST]")[-1].strip()
                 if not watermarked_text:
                     return text, "Error: No watermarked text generated"
-                # More precise length validation
                 input_words = len(text.split())
                 output_words = len(watermarked_text.split())
-                if output_words < input_words * 0.8:
-                    return text, f"Error: Generated text too short ({output_words} vs {input_words} words)"
-                if output_words > input_words * 1.2:
-                    return text, f"Error: Generated text too long ({output_words} vs {input_words} words)"
                 return watermarked_text, f"Watermark applied successfully! (ngram_len: {ngram_len})"
             else:

         try:
             # Prepare the API request parameters
             # Prepare the API request parameters for watermarking
+            prompt = f"<s>[INST] Apply watermark to the following text WITHOUT adding any extra words or changing the meaning: {text} [/INST]"
+            # Calculate exact token limits
+            input_length = len(text.split())
             params = {
                 "inputs": prompt,
                 "parameters": {
+                    "max_new_tokens": input_length + 2,    # Very strict limit
+                    "min_new_tokens": input_length - 2,    # Allow tiny variations
                     "do_sample": True,
+                    "temperature": 0.01,                   # Almost deterministic
+                    "top_p": 0.99,
+                    "repetition_penalty": 1.5,            # Strongly prevent repetitions
+                    "stop": ["[/INST]", "\n", "."],      # Stop at natural boundaries
                     "watermarking_config": {
                         "keys": self.WATERMARK_KEYS,
                         "ngram_len": int(ngram_len)
                 watermarked_text = result[0].get('generated_text', '').strip()
+                # Clean up Mistral's response format and extract only the relevant part
+                parts = watermarked_text.split("[/INST]")
+                if len(parts) > 1:
+                    watermarked_text = parts[-1].strip()
+                # Remove any trailing periods or extra spaces
+                watermarked_text = watermarked_text.rstrip('.')
+                while '  ' in watermarked_text:
+                    watermarked_text = watermarked_text.replace('  ', ' ')
                 if not watermarked_text:
                     return text, "Error: No watermarked text generated"
+                # Very strict length validation
                 input_words = len(text.split())
                 output_words = len(watermarked_text.split())
+                if output_words != input_words:
+                    return text, f"Error: Length mismatch ({output_words} vs {input_words} words)"
+                # Add back the period if the original had one
+                if text.strip().endswith('.'):
+                    watermarked_text += '.'
                 return watermarked_text, f"Watermark applied successfully! (ngram_len: {ngram_len})"
             else: