File size: 5,241 Bytes
5ea9e86 a9e6964 5ea9e86 a9e6964 5ea9e86 a9e6964 5ea9e86 a9e6964 5ea9e86 a9e6964 5ea9e86 a9e6964 5ea9e86 a9e6964 5ea9e86 a9e6964 eb0691b a9e6964 5ea9e86 a9e6964 5ea9e86 a9e6964 5ea9e86 180ea05 5ea9e86 180ea05 a9e6964 5ea9e86 eb0691b 5ea9e86 180ea05 eb0691b a9e6964 5ea9e86 a9e6964 180ea05 eb0691b 180ea05 a9e6964 5ea9e86 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 |
import gradio as gr
import torch
import os
from transformers import (
AutoModelForCausalLM,
AutoTokenizer,
SynthIDTextWatermarkingConfig,
)
from huggingface_hub import login
def initialize_model(hf_token):
"""Initialize the model and tokenizer with authentication."""
try:
# Login to Hugging Face
login(token=hf_token)
# Initialize model and tokenizer with auth token
MODEL_NAME = "google/gemma-2b"
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, token=hf_token)
model = AutoModelForCausalLM.from_pretrained(
MODEL_NAME,
token=hf_token,
device_map="auto" # This will automatically handle GPU if available
)
# Configure watermarking
WATERMARK_KEYS = [654, 400, 836, 123, 340, 443, 597, 160, 57, 789]
watermarking_config = SynthIDTextWatermarkingConfig(
keys=WATERMARK_KEYS,
ngram_len=5,
gamma=0.5,
)
return model, tokenizer, watermarking_config, "Model initialized successfully!"
except Exception as e:
return None, None, None, f"Error initializing model: {str(e)}"
class SynthIDApp:
def __init__(self):
self.model = None
self.tokenizer = None
self.watermarking_config = None
def login(self, hf_token):
"""Login and initialize the model."""
self.model, self.tokenizer, self.watermarking_config, message = initialize_model(hf_token)
return message
def apply_watermark(self, text):
"""Apply SynthID watermark to input text."""
if not all([self.model, self.tokenizer, self.watermarking_config]):
return text, "Error: Model not initialized. Please login first."
try:
# Tokenize input
inputs = self.tokenizer(text, return_tensors="pt", truncation=True, max_length=512)
inputs = {k: v.to(self.model.device) for k, v in inputs.items()}
# Generate with watermark
with torch.no_grad():
outputs = self.model.generate(
**inputs,
watermarking_config=self.watermarking_config,
do_sample=True,
max_length=len(inputs["input_ids"][0]) + 100,
pad_token_id=self.tokenizer.eos_token_id,
temperature=0.7,
top_p=0.9
)
# Decode output
watermarked_text = self.tokenizer.decode(outputs[0], skip_special_tokens=True)
return watermarked_text, "Watermark applied successfully!"
except Exception as e:
return text, f"Error applying watermark: {str(e)}"
def analyze_text(self, text):
"""Analyze text characteristics."""
try:
total_words = len(text.split())
avg_word_length = sum(len(word) for word in text.split()) / total_words if total_words > 0 else 0
analysis = f"""Text Analysis:
- Total words: {total_words}
- Average word length: {avg_word_length:.2f}
Note: This is a basic analysis. The official SynthID detector is not yet available in the public transformers package."""
return analysis
except Exception as e:
return f"Error analyzing text: {str(e)}"
# Create Gradio interface
app_instance = SynthIDApp()
with gr.Blocks(title="SynthID Text Watermarking Tool") as app:
gr.Markdown("# SynthID Text Watermarking Tool")
# Login section
with gr.Row():
hf_token = gr.Textbox(label="Enter Hugging Face Token", type="password")
login_status = gr.Textbox(label="Login Status")
login_btn = gr.Button("Login")
login_btn.click(app_instance.login, inputs=[hf_token], outputs=[login_status])
with gr.Tab("Apply Watermark"):
with gr.Row():
input_text = gr.Textbox(label="Input Text", lines=5)
output_text = gr.Textbox(label="Watermarked Text", lines=5)
status = gr.Textbox(label="Status")
apply_btn = gr.Button("Apply Watermark")
apply_btn.click(app_instance.apply_watermark, inputs=[input_text], outputs=[output_text, status])
with gr.Tab("Analyze Text"):
with gr.Row():
analyze_input = gr.Textbox(label="Text to Analyze", lines=5)
analyze_result = gr.Textbox(label="Analysis Result", lines=5)
analyze_btn = gr.Button("Analyze Text")
analyze_btn.click(app_instance.analyze_text, inputs=[analyze_input], outputs=[analyze_result])
gr.Markdown("""
### Instructions:
1. Enter your Hugging Face token and click Login
2. Wait for the model to initialize
3. Use the tabs to apply watermarks or analyze text
### Notes:
- The watermark is designed to be imperceptible to humans
- This demo only implements watermark application
- The official detector will be available in future releases
- For production use, use your own secure watermark keys
- Your token is never stored and is only used for model access
""")
# Launch the app
if __name__ == "__main__":
app.launch() |