cyberandy commited on
Commit
4d833d7
·
verified ·
1 Parent(s): 4827b54

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +23 -27
app.py CHANGED
@@ -34,21 +34,14 @@ class SynthIDApp:
34
  try:
35
  # Prepare the API request parameters
36
  # Prepare the API request parameters for watermarking
37
- prompt = f"<s>[INST] Apply watermark to the following text WITHOUT adding any extra words or changing the meaning: {text} [/INST]"
38
-
39
- # Calculate exact token limits
40
- input_length = len(text.split())
41
 
42
  params = {
43
  "inputs": prompt,
44
  "parameters": {
45
- "max_new_tokens": input_length + 2, # Very strict limit
46
- "min_new_tokens": input_length - 2, # Allow tiny variations
47
- "do_sample": True,
48
- "temperature": 0.01, # Almost deterministic
49
- "top_p": 0.99,
50
- "repetition_penalty": 1.5, # Strongly prevent repetitions
51
- "stop": ["[/INST]", "\n", "."], # Stop at natural boundaries
52
  "watermarking_config": {
53
  "keys": self.WATERMARK_KEYS,
54
  "ngram_len": int(ngram_len)
@@ -79,28 +72,31 @@ class SynthIDApp:
79
  if 'error' in result[0]:
80
  return text, f"API Error: {result[0]['error']}"
81
 
82
- watermarked_text = result[0].get('generated_text', '').strip()
83
 
84
- # Clean up Mistral's response format and extract only the relevant part
85
- parts = watermarked_text.split("[/INST]")
86
- if len(parts) > 1:
87
- watermarked_text = parts[-1].strip()
 
 
 
 
 
 
 
 
 
 
 
 
88
 
89
- # Remove any trailing periods or extra spaces
90
- watermarked_text = watermarked_text.rstrip('.')
91
- while ' ' in watermarked_text:
92
- watermarked_text = watermarked_text.replace(' ', ' ')
93
 
94
  if not watermarked_text:
95
  return text, "Error: No watermarked text generated"
96
 
97
- # Very strict length validation
98
- input_words = len(text.split())
99
- output_words = len(watermarked_text.split())
100
-
101
- if output_words != input_words:
102
- return text, f"Error: Length mismatch ({output_words} vs {input_words} words)"
103
-
104
  # Add back the period if the original had one
105
  if text.strip().endswith('.'):
106
  watermarked_text += '.'
 
34
  try:
35
  # Prepare the API request parameters
36
  # Prepare the API request parameters for watermarking
37
+ prompt = f"<s>[INST] Return the exact same text, with watermark applied: {text} [/INST]"
 
 
 
38
 
39
  params = {
40
  "inputs": prompt,
41
  "parameters": {
42
+ "return_full_text": True,
43
+ "do_sample": False, # Deterministic generation
44
+ "temperature": 0.01, # Almost deterministic
 
 
 
 
45
  "watermarking_config": {
46
  "keys": self.WATERMARK_KEYS,
47
  "ngram_len": int(ngram_len)
 
72
  if 'error' in result[0]:
73
  return text, f"API Error: {result[0]['error']}"
74
 
75
+ generated_text = result[0].get('generated_text', '').strip()
76
 
77
+ # Extract only the response part after the instruction
78
+ try:
79
+ # First try splitting on [/INST]
80
+ parts = generated_text.split("[/INST]")
81
+ if len(parts) > 1:
82
+ watermarked_text = parts[-1].strip()
83
+ else:
84
+ # If no [/INST], try finding the original text and take what follows
85
+ idx = generated_text.find(text)
86
+ if idx != -1:
87
+ watermarked_text = generated_text[idx + len(text):].strip()
88
+ else:
89
+ # If all else fails, take the whole text
90
+ watermarked_text = generated_text
91
+ except Exception as e:
92
+ return text, f"Error processing response: {str(e)}"
93
 
94
+ # Clean up the text
95
+ watermarked_text = watermarked_text.strip(' .')
 
 
96
 
97
  if not watermarked_text:
98
  return text, "Error: No watermarked text generated"
99
 
 
 
 
 
 
 
 
100
  # Add back the period if the original had one
101
  if text.strip().endswith('.'):
102
  watermarked_text += '.'