Spaces:

WordLift
/

synthID

Sleeping

App Files Files Community

cyberandy commited on Oct 25, 2024

Commit

a3c284e

verified ·

1 Parent(s): 69295e5

Update app.py

Browse files

Files changed (1) hide show

app.py +75 -65

app.py CHANGED Viewed

@@ -1,74 +1,61 @@
 import gradio as gr
-import torch
-import os
-from transformers import (
-    AutoModelForCausalLM,
-    AutoTokenizer,
-    SynthIDTextWatermarkingConfig,
-)
-from huggingface_hub import login
-def initialize_model(hf_token):
-    """Initialize the model and tokenizer with authentication."""
-    try:
-        # Login to Hugging Face
-        login(token=hf_token)
-        # Initialize model and tokenizer with auth token
-        MODEL_NAME = "google/gemma-2b"
-        tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, token=hf_token)
-        model = AutoModelForCausalLM.from_pretrained(
-            MODEL_NAME,
-            token=hf_token,
-            device_map="auto"  # This will automatically handle GPU if available
-        )
-        # Configure watermarking with only the supported parameters
-        WATERMARK_KEYS = [654, 400, 836, 123, 340, 443, 597, 160, 57, 789]
-        watermarking_config = SynthIDTextWatermarkingConfig(
-            keys=WATERMARK_KEYS,
-            ngram_len=5
-        )
-        return model, tokenizer, watermarking_config, "Model initialized successfully!"
-    except Exception as e:
-        return None, None, None, f"Error initializing model: {str(e)}"
 class SynthIDApp:
     def __init__(self):
-        self.model = None
-        self.tokenizer = None
         self.watermarking_config = None
     def login(self, hf_token):
-        """Login and initialize the model."""
-        self.model, self.tokenizer, self.watermarking_config, message = initialize_model(hf_token)
-        return message
     def apply_watermark(self, text):
-        """Apply SynthID watermark to input text."""
-        if not all([self.model, self.tokenizer, self.watermarking_config]):
-            return text, "Error: Model not initialized. Please login first."
         try:
-            # Tokenize input
-            inputs = self.tokenizer(text, return_tensors="pt", truncation=True, max_length=512)
-            inputs = {k: v.to(self.model.device) for k, v in inputs.items()}
-            # Generate with watermark
-            with torch.no_grad():
-                outputs = self.model.generate(
-                    **inputs,
-                    watermarking_config=self.watermarking_config,
-                    do_sample=True,
-                    max_length=len(inputs["input_ids"][0]) + 100,
-                    pad_token_id=self.tokenizer.eos_token_id,
-                    temperature=0.7,
-                    top_p=0.9
-                )
-            # Decode output
-            watermarked_text = self.tokenizer.decode(outputs[0], skip_special_tokens=True)
             return watermarked_text, "Watermark applied successfully!"
         except Exception as e:
             return text, f"Error applying watermark: {str(e)}"
@@ -79,9 +66,18 @@ class SynthIDApp:
             total_words = len(text.split())
             avg_word_length = sum(len(word) for word in text.split()) / total_words if total_words > 0 else 0
             analysis = f"""Text Analysis:
 - Total words: {total_words}
-- Average word length: {avg_word_length:.2f}
 Note: This is a basic analysis. The official SynthID detector is not yet available in the public transformers package."""
@@ -94,17 +90,26 @@ app_instance = SynthIDApp()
 with gr.Blocks(title="SynthID Text Watermarking Tool") as app:
     gr.Markdown("# SynthID Text Watermarking Tool")
     # Login section
     with gr.Row():
-        hf_token = gr.Textbox(label="Enter Hugging Face Token", type="password")
         login_status = gr.Textbox(label="Login Status")
     login_btn = gr.Button("Login")
     login_btn.click(app_instance.login, inputs=[hf_token], outputs=[login_status])
     with gr.Tab("Apply Watermark"):
         with gr.Row():
-            input_text = gr.Textbox(label="Input Text", lines=5)
             output_text = gr.Textbox(label="Watermarked Text", lines=5)
             status = gr.Textbox(label="Status")
         apply_btn = gr.Button("Apply Watermark")
@@ -112,7 +117,11 @@ with gr.Blocks(title="SynthID Text Watermarking Tool") as app:
     with gr.Tab("Analyze Text"):
         with gr.Row():
-            analyze_input = gr.Textbox(label="Text to Analyze", lines=5)
             analyze_result = gr.Textbox(label="Analysis Result", lines=5)
         analyze_btn = gr.Button("Analyze Text")
         analyze_btn.click(app_instance.analyze_text, inputs=[analyze_input], outputs=[analyze_result])
@@ -120,15 +129,16 @@ with gr.Blocks(title="SynthID Text Watermarking Tool") as app:
     gr.Markdown("""
     ### Instructions:
     1. Enter your Hugging Face token and click Login
-    2. Wait for the model to initialize
-    3. Use the tabs to apply watermarks or analyze text
     ### Notes:
     - The watermark is designed to be imperceptible to humans
     - This demo only implements watermark application
     - The official detector will be available in future releases
     - For production use, use your own secure watermark keys
-    - Your token is never stored and is only used for model access
     """)
 # Launch the app

 import gradio as gr
+from huggingface_hub import InferenceClient
+from transformers import SynthIDTextWatermarkingConfig
+import json
 class SynthIDApp:
     def __init__(self):
+        self.client = None
         self.watermarking_config = None
     def login(self, hf_token):
+        """Initialize the inference client with authentication."""
+        try:
+            # Initialize the inference client
+            self.client = InferenceClient(
+                model="google/gemma-2b",
+                token=hf_token
+            )
+            # Configure watermarking
+            WATERMARK_KEYS = [654, 400, 836, 123, 340, 443, 597, 160, 57, 789]
+            self.watermarking_config = SynthIDTextWatermarkingConfig(
+                keys=WATERMARK_KEYS,
+                ngram_len=5
+            )
+            # Test the connection
+            _ = self.client.token_count("Test")
+            return "Inference client initialized successfully!"
+        except Exception as e:
+            self.client = None
+            self.watermarking_config = None
+            return f"Error initializing client: {str(e)}"
     def apply_watermark(self, text):
+        """Apply SynthID watermark to input text using the inference endpoint."""
+        if not self.client:
+            return text, "Error: Client not initialized. Please login first."
         try:
+            # Convert watermarking config to dict for the API call
+            watermark_dict = {
+                "keys": self.watermarking_config.keys,
+                "ngram_len": self.watermarking_config.ngram_len
+            }
+            # Make the API call with watermarking config
+            response = self.client.text_generation(
+                text,
+                max_new_tokens=100,
+                do_sample=True,
+                temperature=0.7,
+                top_p=0.9,
+                watermarking_config=watermark_dict,
+                return_full_text=False
+            )
+            watermarked_text = response
             return watermarked_text, "Watermark applied successfully!"
         except Exception as e:
             return text, f"Error applying watermark: {str(e)}"
             total_words = len(text.split())
             avg_word_length = sum(len(word) for word in text.split()) / total_words if total_words > 0 else 0
+            # Get token count if client is available
+            token_info = ""
+            if self.client:
+                try:
+                    token_count = self.client.token_count(text)
+                    token_info = f"\n- Token count: {token_count}"
+                except:
+                    pass
             analysis = f"""Text Analysis:
 - Total words: {total_words}
+- Average word length: {avg_word_length:.2f}{token_info}
 Note: This is a basic analysis. The official SynthID detector is not yet available in the public transformers package."""
 with gr.Blocks(title="SynthID Text Watermarking Tool") as app:
     gr.Markdown("# SynthID Text Watermarking Tool")
+    gr.Markdown("Using Hugging Face Inference Endpoints for faster processing")
     # Login section
     with gr.Row():
+        hf_token = gr.Textbox(
+            label="Enter Hugging Face Token",
+            type="password",
+            placeholder="hf_..."
+        )
         login_status = gr.Textbox(label="Login Status")
     login_btn = gr.Button("Login")
     login_btn.click(app_instance.login, inputs=[hf_token], outputs=[login_status])
     with gr.Tab("Apply Watermark"):
         with gr.Row():
+            input_text = gr.Textbox(
+                label="Input Text",
+                lines=5,
+                placeholder="Enter text to watermark..."
+            )
             output_text = gr.Textbox(label="Watermarked Text", lines=5)
             status = gr.Textbox(label="Status")
         apply_btn = gr.Button("Apply Watermark")
     with gr.Tab("Analyze Text"):
         with gr.Row():
+            analyze_input = gr.Textbox(
+                label="Text to Analyze",
+                lines=5,
+                placeholder="Enter text to analyze..."
+            )
             analyze_result = gr.Textbox(label="Analysis Result", lines=5)
         analyze_btn = gr.Button("Analyze Text")
         analyze_btn.click(app_instance.analyze_text, inputs=[analyze_input], outputs=[analyze_result])
     gr.Markdown("""
     ### Instructions:
     1. Enter your Hugging Face token and click Login
+    2. Once connected, you can use the tabs to apply watermarks or analyze text
     ### Notes:
+    - This version uses Hugging Face's Inference Endpoints for faster processing
+    - No model download required - everything runs in the cloud
     - The watermark is designed to be imperceptible to humans
     - This demo only implements watermark application
     - The official detector will be available in future releases
     - For production use, use your own secure watermark keys
+    - Your token is never stored and is only used for API access
     """)
 # Launch the app