|
import gradio as gr |
|
import torch |
|
import os |
|
from transformers import ( |
|
AutoModelForCausalLM, |
|
AutoTokenizer, |
|
SynthIDTextWatermarkingConfig, |
|
) |
|
from huggingface_hub import login |
|
|
|
def initialize_model(hf_token): |
|
"""Initialize the model and tokenizer with authentication.""" |
|
try: |
|
|
|
login(token=hf_token) |
|
|
|
|
|
MODEL_NAME = "google/gemma-2b" |
|
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, token=hf_token) |
|
model = AutoModelForCausalLM.from_pretrained( |
|
MODEL_NAME, |
|
token=hf_token, |
|
device_map="auto" |
|
) |
|
|
|
|
|
WATERMARK_KEYS = [654, 400, 836, 123, 340, 443, 597, 160, 57, 789] |
|
watermarking_config = SynthIDTextWatermarkingConfig( |
|
keys=WATERMARK_KEYS, |
|
ngram_len=5, |
|
gamma=0.5, |
|
) |
|
|
|
return model, tokenizer, watermarking_config, "Model initialized successfully!" |
|
except Exception as e: |
|
return None, None, None, f"Error initializing model: {str(e)}" |
|
|
|
class SynthIDApp: |
|
def __init__(self): |
|
self.model = None |
|
self.tokenizer = None |
|
self.watermarking_config = None |
|
|
|
def login(self, hf_token): |
|
"""Login and initialize the model.""" |
|
self.model, self.tokenizer, self.watermarking_config, message = initialize_model(hf_token) |
|
return message |
|
|
|
def apply_watermark(self, text): |
|
"""Apply SynthID watermark to input text.""" |
|
if not all([self.model, self.tokenizer, self.watermarking_config]): |
|
return text, "Error: Model not initialized. Please login first." |
|
|
|
try: |
|
|
|
inputs = self.tokenizer(text, return_tensors="pt", truncation=True, max_length=512) |
|
inputs = {k: v.to(self.model.device) for k, v in inputs.items()} |
|
|
|
|
|
with torch.no_grad(): |
|
outputs = self.model.generate( |
|
**inputs, |
|
watermarking_config=self.watermarking_config, |
|
do_sample=True, |
|
max_length=len(inputs["input_ids"][0]) + 100, |
|
pad_token_id=self.tokenizer.eos_token_id, |
|
temperature=0.7, |
|
top_p=0.9 |
|
) |
|
|
|
|
|
watermarked_text = self.tokenizer.decode(outputs[0], skip_special_tokens=True) |
|
return watermarked_text, "Watermark applied successfully!" |
|
except Exception as e: |
|
return text, f"Error applying watermark: {str(e)}" |
|
|
|
def analyze_text(self, text): |
|
"""Analyze text characteristics.""" |
|
try: |
|
total_words = len(text.split()) |
|
avg_word_length = sum(len(word) for word in text.split()) / total_words if total_words > 0 else 0 |
|
|
|
analysis = f"""Text Analysis: |
|
- Total words: {total_words} |
|
- Average word length: {avg_word_length:.2f} |
|
|
|
Note: This is a basic analysis. The official SynthID detector is not yet available in the public transformers package.""" |
|
|
|
return analysis |
|
except Exception as e: |
|
return f"Error analyzing text: {str(e)}" |
|
|
|
|
|
app_instance = SynthIDApp() |
|
|
|
with gr.Blocks(title="SynthID Text Watermarking Tool") as app: |
|
gr.Markdown("# SynthID Text Watermarking Tool") |
|
|
|
|
|
with gr.Row(): |
|
hf_token = gr.Textbox(label="Enter Hugging Face Token", type="password") |
|
login_status = gr.Textbox(label="Login Status") |
|
login_btn = gr.Button("Login") |
|
login_btn.click(app_instance.login, inputs=[hf_token], outputs=[login_status]) |
|
|
|
with gr.Tab("Apply Watermark"): |
|
with gr.Row(): |
|
input_text = gr.Textbox(label="Input Text", lines=5) |
|
output_text = gr.Textbox(label="Watermarked Text", lines=5) |
|
status = gr.Textbox(label="Status") |
|
apply_btn = gr.Button("Apply Watermark") |
|
apply_btn.click(app_instance.apply_watermark, inputs=[input_text], outputs=[output_text, status]) |
|
|
|
with gr.Tab("Analyze Text"): |
|
with gr.Row(): |
|
analyze_input = gr.Textbox(label="Text to Analyze", lines=5) |
|
analyze_result = gr.Textbox(label="Analysis Result", lines=5) |
|
analyze_btn = gr.Button("Analyze Text") |
|
analyze_btn.click(app_instance.analyze_text, inputs=[analyze_input], outputs=[analyze_result]) |
|
|
|
gr.Markdown(""" |
|
### Instructions: |
|
1. Enter your Hugging Face token and click Login |
|
2. Wait for the model to initialize |
|
3. Use the tabs to apply watermarks or analyze text |
|
|
|
### Notes: |
|
- The watermark is designed to be imperceptible to humans |
|
- This demo only implements watermark application |
|
- The official detector will be available in future releases |
|
- For production use, use your own secure watermark keys |
|
- Your token is never stored and is only used for model access |
|
""") |
|
|
|
|
|
if __name__ == "__main__": |
|
app.launch() |