Spaces:

WordLift
/

synthID

Sleeping

File size: 5,481 Bytes

5ea9e86
a3c284e
 
 
5ea9e86
a9e6964
 
a3c284e
a9e6964
 
 
a3c284e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
a9e6964
 
a3c284e
 
 
a9e6964
 
a3c284e
 
 
 
 
a9e6964
a3c284e
 
 
 
 
 
 
 
 
 
a9e6964
a3c284e
a9e6964
 
 
 
 
 
 
 
 
 
a3c284e
 
 
 
 
 
 
 
 
a9e6964
eb0691b
a3c284e
eb0691b
a9e6964
 
 
 
 
5ea9e86
 
a9e6964
 
5ea9e86
 
a3c284e
a9e6964
 
 
a3c284e
 
 
 
 
a9e6964
 
 
5ea9e86
 
 
a3c284e
 
 
 
 
180ea05
5ea9e86
180ea05
a9e6964
5ea9e86
eb0691b
5ea9e86
a3c284e
 
 
 
 
eb0691b
 
a9e6964
5ea9e86
 
a9e6964
 
a3c284e
a9e6964
180ea05
a3c284e
 
180ea05
 
eb0691b
180ea05
a3c284e
5ea9e86

import gradio as gr
from huggingface_hub import InferenceClient
from transformers import SynthIDTextWatermarkingConfig
import json

class SynthIDApp:
    def __init__(self):
        self.client = None
        self.watermarking_config = None
    
    def login(self, hf_token):
        """Initialize the inference client with authentication."""
        try:
            # Initialize the inference client
            self.client = InferenceClient(
                model="google/gemma-2b",
                token=hf_token
            )
            
            # Configure watermarking
            WATERMARK_KEYS = [654, 400, 836, 123, 340, 443, 597, 160, 57, 789]
            self.watermarking_config = SynthIDTextWatermarkingConfig(
                keys=WATERMARK_KEYS,
                ngram_len=5
            )
            
            # Test the connection
            _ = self.client.token_count("Test")
            return "Inference client initialized successfully!"
        except Exception as e:
            self.client = None
            self.watermarking_config = None
            return f"Error initializing client: {str(e)}"

    def apply_watermark(self, text):
        """Apply SynthID watermark to input text using the inference endpoint."""
        if not self.client:
            return text, "Error: Client not initialized. Please login first."
            
        try:
            # Convert watermarking config to dict for the API call
            watermark_dict = {
                "keys": self.watermarking_config.keys,
                "ngram_len": self.watermarking_config.ngram_len
            }
            
            # Make the API call with watermarking config
            response = self.client.text_generation(
                text,
                max_new_tokens=100,
                do_sample=True,
                temperature=0.7,
                top_p=0.9,
                watermarking_config=watermark_dict,
                return_full_text=False
            )
            
            watermarked_text = response
            return watermarked_text, "Watermark applied successfully!"
        except Exception as e:
            return text, f"Error applying watermark: {str(e)}"

    def analyze_text(self, text):
        """Analyze text characteristics."""
        try:
            total_words = len(text.split())
            avg_word_length = sum(len(word) for word in text.split()) / total_words if total_words > 0 else 0
            
            # Get token count if client is available
            token_info = ""
            if self.client:
                try:
                    token_count = self.client.token_count(text)
                    token_info = f"\n- Token count: {token_count}"
                except:
                    pass
            
            analysis = f"""Text Analysis:
- Total words: {total_words}
- Average word length: {avg_word_length:.2f}{token_info}

Note: This is a basic analysis. The official SynthID detector is not yet available in the public transformers package."""
            
            return analysis
        except Exception as e:
            return f"Error analyzing text: {str(e)}"

# Create Gradio interface
app_instance = SynthIDApp()

with gr.Blocks(title="SynthID Text Watermarking Tool") as app:
    gr.Markdown("# SynthID Text Watermarking Tool")
    gr.Markdown("Using Hugging Face Inference Endpoints for faster processing")
    
    # Login section
    with gr.Row():
        hf_token = gr.Textbox(
            label="Enter Hugging Face Token", 
            type="password",
            placeholder="hf_..."
        )
        login_status = gr.Textbox(label="Login Status")
    login_btn = gr.Button("Login")
    login_btn.click(app_instance.login, inputs=[hf_token], outputs=[login_status])
    
    with gr.Tab("Apply Watermark"):
        with gr.Row():
            input_text = gr.Textbox(
                label="Input Text", 
                lines=5,
                placeholder="Enter text to watermark..."
            )
            output_text = gr.Textbox(label="Watermarked Text", lines=5)
            status = gr.Textbox(label="Status")
        apply_btn = gr.Button("Apply Watermark")
        apply_btn.click(app_instance.apply_watermark, inputs=[input_text], outputs=[output_text, status])
    
    with gr.Tab("Analyze Text"):
        with gr.Row():
            analyze_input = gr.Textbox(
                label="Text to Analyze", 
                lines=5,
                placeholder="Enter text to analyze..."
            )
            analyze_result = gr.Textbox(label="Analysis Result", lines=5)
        analyze_btn = gr.Button("Analyze Text")
        analyze_btn.click(app_instance.analyze_text, inputs=[analyze_input], outputs=[analyze_result])
    
    gr.Markdown("""
    ### Instructions:
    1. Enter your Hugging Face token and click Login
    2. Once connected, you can use the tabs to apply watermarks or analyze text
    
    ### Notes:
    - This version uses Hugging Face's Inference Endpoints for faster processing
    - No model download required - everything runs in the cloud
    - The watermark is designed to be imperceptible to humans
    - This demo only implements watermark application
    - The official detector will be available in future releases
    - For production use, use your own secure watermark keys
    - Your token is never stored and is only used for API access
    """)

# Launch the app
if __name__ == "__main__":
    app.launch()