Spaces:

WordLift
/

synthID

Sleeping

App Files Files Community

cyberandy commited on Oct 25, 2024

Commit

1ce31e1

verified ·

1 Parent(s): 97627fd

Update app.py

Browse files

Files changed (1) hide show

app.py +30 -7

app.py CHANGED Viewed

@@ -33,17 +33,26 @@ class SynthIDApp:
         try:
             # Prepare the API request parameters
             params = {
                 "inputs": text,
                 "parameters": {
-                    "return_full_text": True,
                     "do_sample": True,
                     "temperature": 0.7,
                     "top_p": 0.9,
-                    "max_length": None,  # Use input length
                     "watermarking_config": {
                         "keys": self.WATERMARK_KEYS,
-                        "ngram_len": int(ngram_len)  # Ensure integer
                     }
                 }
             }
@@ -56,14 +65,26 @@ class SynthIDApp:
             )
             response.raise_for_status()
             # Extract the watermarked text
             result = response.json()
             if isinstance(result, list) and len(result) > 0:
-                watermarked_text = result[0].get('generated_text', '')
                 if not watermarked_text:
                     return text, "Error: No watermarked text generated"
-                # Clean up any extra whitespace
-                watermarked_text = watermarked_text.strip()
             else:
                 return text, "Error: Unexpected API response format"
@@ -159,7 +180,9 @@ with gr.Blocks(title="SynthID Text Watermarking Tool") as app:
     3. Adjust the N-gram Length slider to control watermark characteristics
     ### Notes:
-    - This version uses Hugging Face's Inference API for faster processing
     - No model download required - everything runs in the cloud
     - The watermark is designed to be imperceptible to humans
     - This demo only implements watermark application

         try:
             # Prepare the API request parameters
+            # Calculate input length in tokens first
+            test_response = requests.post(
+                self.api_url,
+                headers=self.headers,
+                json={"inputs": text}
+            )
+            input_length = len(test_response.json()[0]['tokens'])
+            # Prepare the API request parameters for watermarking
             params = {
                 "inputs": text,
                 "parameters": {
+                    "max_length": input_length,  # Limit to input length
+                    "min_length": input_length,  # Force exact length
                     "do_sample": True,
                     "temperature": 0.7,
                     "top_p": 0.9,
                     "watermarking_config": {
                         "keys": self.WATERMARK_KEYS,
+                        "ngram_len": int(ngram_len)
                     }
                 }
             }
             )
             response.raise_for_status()
+            # Make the API call
+            response = requests.post(
+                self.api_url,
+                headers=self.headers,
+                json=params
+            )
+            response.raise_for_status()
             # Extract the watermarked text
             result = response.json()
             if isinstance(result, list) and len(result) > 0:
+                watermarked_text = result[0].get('generated_text', '').strip()
                 if not watermarked_text:
                     return text, "Error: No watermarked text generated"
+                # Verify we're not getting repetition
+                if len(watermarked_text.split()) > len(text.split()) * 1.5:
+                    return text, "Error: Generated text too long. Please try again."
+                return watermarked_text, f"Watermark applied successfully! (ngram_len: {ngram_len})"
             else:
                 return text, "Error: Unexpected API response format"
     3. Adjust the N-gram Length slider to control watermark characteristics
     ### Notes:
+    - The watermarking process attempts to maintain the original meaning while adding the watermark
+    - If you get unexpected results, try adjusting the n-gram length or slightly rephrasing your text
+    - This is an experimental feature using the Inference API
     - No model download required - everything runs in the cloud
     - The watermark is designed to be imperceptible to humans
     - This demo only implements watermark application