cyberandy commited on
Commit
1ce31e1
·
verified ·
1 Parent(s): 97627fd

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +30 -7
app.py CHANGED
@@ -33,17 +33,26 @@ class SynthIDApp:
33
 
34
  try:
35
  # Prepare the API request parameters
 
 
 
 
 
 
 
 
 
36
  params = {
37
  "inputs": text,
38
  "parameters": {
39
- "return_full_text": True,
 
40
  "do_sample": True,
41
  "temperature": 0.7,
42
  "top_p": 0.9,
43
- "max_length": None, # Use input length
44
  "watermarking_config": {
45
  "keys": self.WATERMARK_KEYS,
46
- "ngram_len": int(ngram_len) # Ensure integer
47
  }
48
  }
49
  }
@@ -56,14 +65,26 @@ class SynthIDApp:
56
  )
57
  response.raise_for_status()
58
 
 
 
 
 
 
 
 
 
59
  # Extract the watermarked text
60
  result = response.json()
61
  if isinstance(result, list) and len(result) > 0:
62
- watermarked_text = result[0].get('generated_text', '')
63
  if not watermarked_text:
64
  return text, "Error: No watermarked text generated"
65
- # Clean up any extra whitespace
66
- watermarked_text = watermarked_text.strip()
 
 
 
 
67
  else:
68
  return text, "Error: Unexpected API response format"
69
 
@@ -159,7 +180,9 @@ with gr.Blocks(title="SynthID Text Watermarking Tool") as app:
159
  3. Adjust the N-gram Length slider to control watermark characteristics
160
 
161
  ### Notes:
162
- - This version uses Hugging Face's Inference API for faster processing
 
 
163
  - No model download required - everything runs in the cloud
164
  - The watermark is designed to be imperceptible to humans
165
  - This demo only implements watermark application
 
33
 
34
  try:
35
  # Prepare the API request parameters
36
+ # Calculate input length in tokens first
37
+ test_response = requests.post(
38
+ self.api_url,
39
+ headers=self.headers,
40
+ json={"inputs": text}
41
+ )
42
+ input_length = len(test_response.json()[0]['tokens'])
43
+
44
+ # Prepare the API request parameters for watermarking
45
  params = {
46
  "inputs": text,
47
  "parameters": {
48
+ "max_length": input_length, # Limit to input length
49
+ "min_length": input_length, # Force exact length
50
  "do_sample": True,
51
  "temperature": 0.7,
52
  "top_p": 0.9,
 
53
  "watermarking_config": {
54
  "keys": self.WATERMARK_KEYS,
55
+ "ngram_len": int(ngram_len)
56
  }
57
  }
58
  }
 
65
  )
66
  response.raise_for_status()
67
 
68
+ # Make the API call
69
+ response = requests.post(
70
+ self.api_url,
71
+ headers=self.headers,
72
+ json=params
73
+ )
74
+ response.raise_for_status()
75
+
76
  # Extract the watermarked text
77
  result = response.json()
78
  if isinstance(result, list) and len(result) > 0:
79
+ watermarked_text = result[0].get('generated_text', '').strip()
80
  if not watermarked_text:
81
  return text, "Error: No watermarked text generated"
82
+
83
+ # Verify we're not getting repetition
84
+ if len(watermarked_text.split()) > len(text.split()) * 1.5:
85
+ return text, "Error: Generated text too long. Please try again."
86
+
87
+ return watermarked_text, f"Watermark applied successfully! (ngram_len: {ngram_len})"
88
  else:
89
  return text, "Error: Unexpected API response format"
90
 
 
180
  3. Adjust the N-gram Length slider to control watermark characteristics
181
 
182
  ### Notes:
183
+ - The watermarking process attempts to maintain the original meaning while adding the watermark
184
+ - If you get unexpected results, try adjusting the n-gram length or slightly rephrasing your text
185
+ - This is an experimental feature using the Inference API
186
  - No model download required - everything runs in the cloud
187
  - The watermark is designed to be imperceptible to humans
188
  - This demo only implements watermark application