import gradio as gr import requests import json class SynthIDApp: def __init__(self): self.api_url = "https://api-inference.huggingface.co/models/mistralai/Mistral-7B-Instruct-v0.2" self.headers = None self.WATERMARK_KEYS = [654, 400, 836, 123, 340, 443, 597, 160, 57, 789] def login(self, hf_token): """Initialize the API headers with authentication.""" try: self.headers = {"Authorization": f"Bearer {hf_token}"} # Test the connection with a simple query response = requests.post( self.api_url, headers=self.headers, json={"inputs": "Test", "parameters": {"max_new_tokens": 1}} ) response.raise_for_status() return "API connection initialized successfully!" except Exception as e: self.headers = None return f"Error initializing API: {str(e)}" def apply_watermark(self, text, ngram_len): """Apply SynthID watermark to input text using the inference API.""" if not self.headers: return text, "Error: API not initialized. Please login first." try: # Prepare the API request parameters # Prepare the API request parameters for watermarking prompt = f"[INST] Return the exact same text, with watermark applied: {text} [/INST]" params = { "inputs": prompt, "parameters": { "return_full_text": True, "do_sample": False, # Deterministic generation "temperature": 0.01, # Almost deterministic "watermarking_config": { "keys": self.WATERMARK_KEYS, "ngram_len": int(ngram_len) } } } # Make the API call response = requests.post( self.api_url, headers=self.headers, json=params ) response.raise_for_status() # Make the API call response = requests.post( self.api_url, headers=self.headers, json=params, timeout=30 # Add timeout ) response.raise_for_status() # Extract the watermarked text result = response.json() if isinstance(result, list) and len(result) > 0: if 'error' in result[0]: return text, f"API Error: {result[0]['error']}" generated_text = result[0].get('generated_text', '').strip() # Extract only the response part after the instruction try: # First try splitting on [/INST] parts = generated_text.split("[/INST]") if len(parts) > 1: watermarked_text = parts[-1].strip() else: # If no [/INST], try finding the original text and take what follows idx = generated_text.find(text) if idx != -1: watermarked_text = generated_text[idx + len(text):].strip() else: # If all else fails, take the whole text watermarked_text = generated_text except Exception as e: return text, f"Error processing response: {str(e)}" # Clean up the text watermarked_text = watermarked_text.strip(' .') if not watermarked_text: return text, "Error: No watermarked text generated" # Add back the period if the original had one if text.strip().endswith('.'): watermarked_text += '.' return watermarked_text, f"Watermark applied successfully! (ngram_len: {ngram_len})" else: return text, f"Error: Unexpected API response format: {str(result)}" return watermarked_text, f"Watermark applied successfully! (ngram_len: {ngram_len})" except Exception as e: return text, f"Error applying watermark: {str(e)}" def analyze_text(self, text): """Analyze text characteristics.""" try: total_words = len(text.split()) avg_word_length = sum(len(word) for word in text.split()) / total_words if total_words > 0 else 0 char_count = len(text) analysis = f"""Text Analysis: - Total characters: {char_count} - Total words: {total_words} - Average word length: {avg_word_length:.2f} Note: This is a basic analysis. The official SynthID detector is not yet available in the public transformers package.""" return analysis except Exception as e: return f"Error analyzing text: {str(e)}" # Create Gradio interface app_instance = SynthIDApp() with gr.Blocks(title="SynthID Text Watermarking Tool") as app: gr.Markdown("# SynthID Text Watermarking Tool") gr.Markdown("Using Mistral-7B-Instruct-v0.2 with Hugging Face Inference API") # Login section with gr.Row(): hf_token = gr.Textbox( label="Enter Hugging Face Token", type="password", placeholder="hf_..." ) login_status = gr.Textbox(label="Login Status") login_btn = gr.Button("Login") login_btn.click(app_instance.login, inputs=[hf_token], outputs=[login_status]) with gr.Tab("Apply Watermark"): with gr.Row(): with gr.Column(scale=3): input_text = gr.Textbox( label="Input Text", lines=5, placeholder="Enter text to watermark...", value="Test Sentence: WordLift is a cutting-edge platform designed to enhance your digital content by leveraging the power of semantic technology. It transforms your website into a structured repository of knowledge, making your content more discoverable, engaging, and aligned with modern search engine algorithms. By utilizing AI-driven entity extraction and knowledge graph generation, WordLift helps you bridge the gap between your content and search intent, ensuring optimal visibility and performance." ) output_text = gr.Textbox(label="Watermarked Text", lines=5) with gr.Column(scale=1): ngram_len = gr.Slider( label="N-gram Length", minimum=2, maximum=5, step=1, value=2, info="Controls watermark detectability (2-5)" ) status = gr.Textbox(label="Status") gr.Markdown(""" ### N-gram Length Parameter: - Higher values (4-5): More detectable watermark, but more brittle to changes - Lower values (2-3): More robust to changes, but harder to detect - Default (5): Maximum detectability""") apply_btn = gr.Button("Apply Watermark") apply_btn.click( app_instance.apply_watermark, inputs=[input_text, ngram_len], outputs=[output_text, status] ) with gr.Tab("Analyze Text"): with gr.Row(): analyze_input = gr.Textbox( label="Text to Analyze", lines=5, placeholder="Enter text to analyze..." ) analyze_result = gr.Textbox(label="Analysis Result", lines=5) analyze_btn = gr.Button("Analyze Text") analyze_btn.click(app_instance.analyze_text, inputs=[analyze_input], outputs=[analyze_result]) gr.Markdown(""" ### Instructions: 1. Enter your Hugging Face token and click Login 2. Once connected, you can use the tabs to apply watermarks or analyze text 3. Adjust the N-gram Length slider to control watermark characteristics ### Notes: - The watermarking process attempts to maintain the original meaning while adding the watermark - If you get unexpected results, try adjusting the n-gram length or slightly rephrasing your text - This is an experimental feature using the Inference API - No model download required - everything runs in the cloud - The watermark is designed to be imperceptible to humans - This demo only implements watermark application - The official detector will be available in future releases - For production use, use your own secure watermark keys - Your token is never stored and is only used for API access """) # Launch the app if __name__ == "__main__": app.launch()