cyberandy commited on
Commit
a3c284e
·
verified ·
1 Parent(s): 69295e5

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +75 -65
app.py CHANGED
@@ -1,74 +1,61 @@
1
  import gradio as gr
2
- import torch
3
- import os
4
- from transformers import (
5
- AutoModelForCausalLM,
6
- AutoTokenizer,
7
- SynthIDTextWatermarkingConfig,
8
- )
9
- from huggingface_hub import login
10
-
11
- def initialize_model(hf_token):
12
- """Initialize the model and tokenizer with authentication."""
13
- try:
14
- # Login to Hugging Face
15
- login(token=hf_token)
16
-
17
- # Initialize model and tokenizer with auth token
18
- MODEL_NAME = "google/gemma-2b"
19
- tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, token=hf_token)
20
- model = AutoModelForCausalLM.from_pretrained(
21
- MODEL_NAME,
22
- token=hf_token,
23
- device_map="auto" # This will automatically handle GPU if available
24
- )
25
-
26
- # Configure watermarking with only the supported parameters
27
- WATERMARK_KEYS = [654, 400, 836, 123, 340, 443, 597, 160, 57, 789]
28
- watermarking_config = SynthIDTextWatermarkingConfig(
29
- keys=WATERMARK_KEYS,
30
- ngram_len=5
31
- )
32
-
33
- return model, tokenizer, watermarking_config, "Model initialized successfully!"
34
- except Exception as e:
35
- return None, None, None, f"Error initializing model: {str(e)}"
36
 
37
  class SynthIDApp:
38
  def __init__(self):
39
- self.model = None
40
- self.tokenizer = None
41
  self.watermarking_config = None
42
 
43
  def login(self, hf_token):
44
- """Login and initialize the model."""
45
- self.model, self.tokenizer, self.watermarking_config, message = initialize_model(hf_token)
46
- return message
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
47
 
48
  def apply_watermark(self, text):
49
- """Apply SynthID watermark to input text."""
50
- if not all([self.model, self.tokenizer, self.watermarking_config]):
51
- return text, "Error: Model not initialized. Please login first."
52
 
53
  try:
54
- # Tokenize input
55
- inputs = self.tokenizer(text, return_tensors="pt", truncation=True, max_length=512)
56
- inputs = {k: v.to(self.model.device) for k, v in inputs.items()}
 
 
57
 
58
- # Generate with watermark
59
- with torch.no_grad():
60
- outputs = self.model.generate(
61
- **inputs,
62
- watermarking_config=self.watermarking_config,
63
- do_sample=True,
64
- max_length=len(inputs["input_ids"][0]) + 100,
65
- pad_token_id=self.tokenizer.eos_token_id,
66
- temperature=0.7,
67
- top_p=0.9
68
- )
69
 
70
- # Decode output
71
- watermarked_text = self.tokenizer.decode(outputs[0], skip_special_tokens=True)
72
  return watermarked_text, "Watermark applied successfully!"
73
  except Exception as e:
74
  return text, f"Error applying watermark: {str(e)}"
@@ -79,9 +66,18 @@ class SynthIDApp:
79
  total_words = len(text.split())
80
  avg_word_length = sum(len(word) for word in text.split()) / total_words if total_words > 0 else 0
81
 
 
 
 
 
 
 
 
 
 
82
  analysis = f"""Text Analysis:
83
  - Total words: {total_words}
84
- - Average word length: {avg_word_length:.2f}
85
 
86
  Note: This is a basic analysis. The official SynthID detector is not yet available in the public transformers package."""
87
 
@@ -94,17 +90,26 @@ app_instance = SynthIDApp()
94
 
95
  with gr.Blocks(title="SynthID Text Watermarking Tool") as app:
96
  gr.Markdown("# SynthID Text Watermarking Tool")
 
97
 
98
  # Login section
99
  with gr.Row():
100
- hf_token = gr.Textbox(label="Enter Hugging Face Token", type="password")
 
 
 
 
101
  login_status = gr.Textbox(label="Login Status")
102
  login_btn = gr.Button("Login")
103
  login_btn.click(app_instance.login, inputs=[hf_token], outputs=[login_status])
104
 
105
  with gr.Tab("Apply Watermark"):
106
  with gr.Row():
107
- input_text = gr.Textbox(label="Input Text", lines=5)
 
 
 
 
108
  output_text = gr.Textbox(label="Watermarked Text", lines=5)
109
  status = gr.Textbox(label="Status")
110
  apply_btn = gr.Button("Apply Watermark")
@@ -112,7 +117,11 @@ with gr.Blocks(title="SynthID Text Watermarking Tool") as app:
112
 
113
  with gr.Tab("Analyze Text"):
114
  with gr.Row():
115
- analyze_input = gr.Textbox(label="Text to Analyze", lines=5)
 
 
 
 
116
  analyze_result = gr.Textbox(label="Analysis Result", lines=5)
117
  analyze_btn = gr.Button("Analyze Text")
118
  analyze_btn.click(app_instance.analyze_text, inputs=[analyze_input], outputs=[analyze_result])
@@ -120,15 +129,16 @@ with gr.Blocks(title="SynthID Text Watermarking Tool") as app:
120
  gr.Markdown("""
121
  ### Instructions:
122
  1. Enter your Hugging Face token and click Login
123
- 2. Wait for the model to initialize
124
- 3. Use the tabs to apply watermarks or analyze text
125
 
126
  ### Notes:
 
 
127
  - The watermark is designed to be imperceptible to humans
128
  - This demo only implements watermark application
129
  - The official detector will be available in future releases
130
  - For production use, use your own secure watermark keys
131
- - Your token is never stored and is only used for model access
132
  """)
133
 
134
  # Launch the app
 
1
  import gradio as gr
2
+ from huggingface_hub import InferenceClient
3
+ from transformers import SynthIDTextWatermarkingConfig
4
+ import json
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5
 
6
  class SynthIDApp:
7
  def __init__(self):
8
+ self.client = None
 
9
  self.watermarking_config = None
10
 
11
  def login(self, hf_token):
12
+ """Initialize the inference client with authentication."""
13
+ try:
14
+ # Initialize the inference client
15
+ self.client = InferenceClient(
16
+ model="google/gemma-2b",
17
+ token=hf_token
18
+ )
19
+
20
+ # Configure watermarking
21
+ WATERMARK_KEYS = [654, 400, 836, 123, 340, 443, 597, 160, 57, 789]
22
+ self.watermarking_config = SynthIDTextWatermarkingConfig(
23
+ keys=WATERMARK_KEYS,
24
+ ngram_len=5
25
+ )
26
+
27
+ # Test the connection
28
+ _ = self.client.token_count("Test")
29
+ return "Inference client initialized successfully!"
30
+ except Exception as e:
31
+ self.client = None
32
+ self.watermarking_config = None
33
+ return f"Error initializing client: {str(e)}"
34
 
35
  def apply_watermark(self, text):
36
+ """Apply SynthID watermark to input text using the inference endpoint."""
37
+ if not self.client:
38
+ return text, "Error: Client not initialized. Please login first."
39
 
40
  try:
41
+ # Convert watermarking config to dict for the API call
42
+ watermark_dict = {
43
+ "keys": self.watermarking_config.keys,
44
+ "ngram_len": self.watermarking_config.ngram_len
45
+ }
46
 
47
+ # Make the API call with watermarking config
48
+ response = self.client.text_generation(
49
+ text,
50
+ max_new_tokens=100,
51
+ do_sample=True,
52
+ temperature=0.7,
53
+ top_p=0.9,
54
+ watermarking_config=watermark_dict,
55
+ return_full_text=False
56
+ )
 
57
 
58
+ watermarked_text = response
 
59
  return watermarked_text, "Watermark applied successfully!"
60
  except Exception as e:
61
  return text, f"Error applying watermark: {str(e)}"
 
66
  total_words = len(text.split())
67
  avg_word_length = sum(len(word) for word in text.split()) / total_words if total_words > 0 else 0
68
 
69
+ # Get token count if client is available
70
+ token_info = ""
71
+ if self.client:
72
+ try:
73
+ token_count = self.client.token_count(text)
74
+ token_info = f"\n- Token count: {token_count}"
75
+ except:
76
+ pass
77
+
78
  analysis = f"""Text Analysis:
79
  - Total words: {total_words}
80
+ - Average word length: {avg_word_length:.2f}{token_info}
81
 
82
  Note: This is a basic analysis. The official SynthID detector is not yet available in the public transformers package."""
83
 
 
90
 
91
  with gr.Blocks(title="SynthID Text Watermarking Tool") as app:
92
  gr.Markdown("# SynthID Text Watermarking Tool")
93
+ gr.Markdown("Using Hugging Face Inference Endpoints for faster processing")
94
 
95
  # Login section
96
  with gr.Row():
97
+ hf_token = gr.Textbox(
98
+ label="Enter Hugging Face Token",
99
+ type="password",
100
+ placeholder="hf_..."
101
+ )
102
  login_status = gr.Textbox(label="Login Status")
103
  login_btn = gr.Button("Login")
104
  login_btn.click(app_instance.login, inputs=[hf_token], outputs=[login_status])
105
 
106
  with gr.Tab("Apply Watermark"):
107
  with gr.Row():
108
+ input_text = gr.Textbox(
109
+ label="Input Text",
110
+ lines=5,
111
+ placeholder="Enter text to watermark..."
112
+ )
113
  output_text = gr.Textbox(label="Watermarked Text", lines=5)
114
  status = gr.Textbox(label="Status")
115
  apply_btn = gr.Button("Apply Watermark")
 
117
 
118
  with gr.Tab("Analyze Text"):
119
  with gr.Row():
120
+ analyze_input = gr.Textbox(
121
+ label="Text to Analyze",
122
+ lines=5,
123
+ placeholder="Enter text to analyze..."
124
+ )
125
  analyze_result = gr.Textbox(label="Analysis Result", lines=5)
126
  analyze_btn = gr.Button("Analyze Text")
127
  analyze_btn.click(app_instance.analyze_text, inputs=[analyze_input], outputs=[analyze_result])
 
129
  gr.Markdown("""
130
  ### Instructions:
131
  1. Enter your Hugging Face token and click Login
132
+ 2. Once connected, you can use the tabs to apply watermarks or analyze text
 
133
 
134
  ### Notes:
135
+ - This version uses Hugging Face's Inference Endpoints for faster processing
136
+ - No model download required - everything runs in the cloud
137
  - The watermark is designed to be imperceptible to humans
138
  - This demo only implements watermark application
139
  - The official detector will be available in future releases
140
  - For production use, use your own secure watermark keys
141
+ - Your token is never stored and is only used for API access
142
  """)
143
 
144
  # Launch the app