File size: 9,076 Bytes
5ea9e86 9c1cc06 bf8477e 5ea9e86 a9e6964 5ed2c98 9c1cc06 f03955d a9e6964 9c1cc06 a3c284e 9c1cc06 bf8477e a3c284e 9c1cc06 a3c284e 9c1cc06 a3c284e 9c1cc06 bf8477e f03955d 9c1cc06 a9e6964 9c1cc06 1ce31e1 4d833d7 4827b54 9c1cc06 5ed2c98 9c1cc06 4d833d7 9c1cc06 bf8477e 1ce31e1 9c1cc06 a3c284e a9e6964 9c1cc06 bf8477e a3c284e 9c1cc06 a9e6964 1ce31e1 d2f0972 1ce31e1 97627fd 9c1cc06 d2f0972 4d833d7 5ed2c98 4d833d7 4827b54 4d833d7 5ed2c98 97627fd 1ce31e1 4827b54 1ce31e1 9c1cc06 d2f0972 9c1cc06 f03955d a9e6964 f03955d a3c284e a9e6964 f03955d eb0691b f03955d eb0691b a9e6964 5ea9e86 a9e6964 5ea9e86 5ed2c98 a9e6964 a3c284e a9e6964 5ea9e86 f03955d 97627fd 120c013 f03955d 120c013 f03955d 180ea05 f03955d 5ea9e86 eb0691b 5ea9e86 a3c284e eb0691b a9e6964 5ea9e86 a9e6964 a3c284e f03955d a9e6964 180ea05 1ce31e1 a3c284e 180ea05 eb0691b 180ea05 a3c284e 5ea9e86 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 |
import gradio as gr
import requests
import json
class SynthIDApp:
def __init__(self):
self.api_url = "https://api-inference.huggingface.co/models/mistralai/Mistral-7B-Instruct-v0.2"
self.headers = None
self.WATERMARK_KEYS = [654, 400, 836, 123, 340, 443, 597, 160, 57, 789]
def login(self, hf_token):
"""Initialize the API headers with authentication."""
try:
self.headers = {"Authorization": f"Bearer {hf_token}"}
# Test the connection with a simple query
response = requests.post(
self.api_url,
headers=self.headers,
json={"inputs": "Test", "parameters": {"max_new_tokens": 1}}
)
response.raise_for_status()
return "API connection initialized successfully!"
except Exception as e:
self.headers = None
return f"Error initializing API: {str(e)}"
def apply_watermark(self, text, ngram_len):
"""Apply SynthID watermark to input text using the inference API."""
if not self.headers:
return text, "Error: API not initialized. Please login first."
try:
# Prepare the API request parameters
# Prepare the API request parameters for watermarking
prompt = f"<s>[INST] Return the exact same text, with watermark applied: {text} [/INST]"
params = {
"inputs": prompt,
"parameters": {
"return_full_text": True,
"do_sample": False, # Deterministic generation
"temperature": 0.01, # Almost deterministic
"watermarking_config": {
"keys": self.WATERMARK_KEYS,
"ngram_len": int(ngram_len)
}
}
}
# Make the API call
response = requests.post(
self.api_url,
headers=self.headers,
json=params
)
response.raise_for_status()
# Make the API call
response = requests.post(
self.api_url,
headers=self.headers,
json=params,
timeout=30 # Add timeout
)
response.raise_for_status()
# Extract the watermarked text
result = response.json()
if isinstance(result, list) and len(result) > 0:
if 'error' in result[0]:
return text, f"API Error: {result[0]['error']}"
generated_text = result[0].get('generated_text', '').strip()
# Extract only the response part after the instruction
try:
# First try splitting on [/INST]
parts = generated_text.split("[/INST]")
if len(parts) > 1:
watermarked_text = parts[-1].strip()
else:
# If no [/INST], try finding the original text and take what follows
idx = generated_text.find(text)
if idx != -1:
watermarked_text = generated_text[idx + len(text):].strip()
else:
# If all else fails, take the whole text
watermarked_text = generated_text
except Exception as e:
return text, f"Error processing response: {str(e)}"
# Clean up the text
watermarked_text = watermarked_text.strip(' .')
if not watermarked_text:
return text, "Error: No watermarked text generated"
# Add back the period if the original had one
if text.strip().endswith('.'):
watermarked_text += '.'
return watermarked_text, f"Watermark applied successfully! (ngram_len: {ngram_len})"
else:
return text, f"Error: Unexpected API response format: {str(result)}"
return watermarked_text, f"Watermark applied successfully! (ngram_len: {ngram_len})"
except Exception as e:
return text, f"Error applying watermark: {str(e)}"
def analyze_text(self, text):
"""Analyze text characteristics."""
try:
total_words = len(text.split())
avg_word_length = sum(len(word) for word in text.split()) / total_words if total_words > 0 else 0
char_count = len(text)
analysis = f"""Text Analysis:
- Total characters: {char_count}
- Total words: {total_words}
- Average word length: {avg_word_length:.2f}
Note: This is a basic analysis. The official SynthID detector is not yet available in the public transformers package."""
return analysis
except Exception as e:
return f"Error analyzing text: {str(e)}"
# Create Gradio interface
app_instance = SynthIDApp()
with gr.Blocks(title="SynthID Text Watermarking Tool") as app:
gr.Markdown("# SynthID Text Watermarking Tool")
gr.Markdown("Using Mistral-7B-Instruct-v0.2 with Hugging Face Inference API")
# Login section
with gr.Row():
hf_token = gr.Textbox(
label="Enter Hugging Face Token",
type="password",
placeholder="hf_..."
)
login_status = gr.Textbox(label="Login Status")
login_btn = gr.Button("Login")
login_btn.click(app_instance.login, inputs=[hf_token], outputs=[login_status])
with gr.Tab("Apply Watermark"):
with gr.Row():
with gr.Column(scale=3):
input_text = gr.Textbox(
label="Input Text",
lines=5,
placeholder="Enter text to watermark...",
value="Test Sentence: WordLift is a cutting-edge platform designed to enhance your digital content by leveraging the power of semantic technology. It transforms your website into a structured repository of knowledge, making your content more discoverable, engaging, and aligned with modern search engine algorithms. By utilizing AI-driven entity extraction and knowledge graph generation, WordLift helps you bridge the gap between your content and search intent, ensuring optimal visibility and performance."
)
output_text = gr.Textbox(label="Watermarked Text", lines=5)
with gr.Column(scale=1):
ngram_len = gr.Slider(
label="N-gram Length",
minimum=2,
maximum=5,
step=1,
value=2,
info="Controls watermark detectability (2-5)"
)
status = gr.Textbox(label="Status")
gr.Markdown("""
### N-gram Length Parameter:
- Higher values (4-5): More detectable watermark, but more brittle to changes
- Lower values (2-3): More robust to changes, but harder to detect
- Default (5): Maximum detectability""")
apply_btn = gr.Button("Apply Watermark")
apply_btn.click(
app_instance.apply_watermark,
inputs=[input_text, ngram_len],
outputs=[output_text, status]
)
with gr.Tab("Analyze Text"):
with gr.Row():
analyze_input = gr.Textbox(
label="Text to Analyze",
lines=5,
placeholder="Enter text to analyze..."
)
analyze_result = gr.Textbox(label="Analysis Result", lines=5)
analyze_btn = gr.Button("Analyze Text")
analyze_btn.click(app_instance.analyze_text, inputs=[analyze_input], outputs=[analyze_result])
gr.Markdown("""
### Instructions:
1. Enter your Hugging Face token and click Login
2. Once connected, you can use the tabs to apply watermarks or analyze text
3. Adjust the N-gram Length slider to control watermark characteristics
### Notes:
- The watermarking process attempts to maintain the original meaning while adding the watermark
- If you get unexpected results, try adjusting the n-gram length or slightly rephrasing your text
- This is an experimental feature using the Inference API
- No model download required - everything runs in the cloud
- The watermark is designed to be imperceptible to humans
- This demo only implements watermark application
- The official detector will be available in future releases
- For production use, use your own secure watermark keys
- Your token is never stored and is only used for API access
""")
# Launch the app
if __name__ == "__main__":
app.launch() |