mduppes hadyelsahar commited on
Commit
2ebc2c3
·
verified ·
1 Parent(s): fb43736

add alias and model message _size for each model (#1)

Browse files

- add alias and model message _size for each model (3bbdd207da6ee955fbbbb31b8f2d103a59996b50)


Co-authored-by: Hady Elsahar <[email protected]>

Files changed (1) hide show
  1. backend/descriptions.py +120 -90
backend/descriptions.py CHANGED
@@ -89,96 +89,126 @@ METRIC_DESCRIPTIONS = {
89
  }
90
 
91
  MODEL_DESCRIPTIONS = {
92
- "audioseal": {
93
- "full_name": "AudioSeal",
94
- "description": "AudioSeal is the first audio watermarking technique designed specifically for localized detection of AI-generated speech.",
95
- "paper_link": "https://arxiv.org/abs/2401.17264",
96
- "github_link": "https://github.com/facebookresearch/audioseal",
97
- },
98
- "wavmark_fast": {
99
- "full_name": "WavMark",
100
- "description": "WavMark uses invertible networks to hide 32 bits in 1-second audio segments. Detection is performed by sliding along the audio in 0.05-second steps and decoding the message for each window. If the first 10 decoded bits match a synchronization pattern, the rest of the payload is saved (22 bits), and the window can directly slide 1 second (instead of 0.05 seconds).",
101
- "paper_link": "https://arxiv.org/pdf/2308.12770",
102
- "github_link": "https://github.com/wavmark/wavmark",
103
- },
104
- "timbre": {
105
- "full_name": "Timbre",
106
- "description": "Timbre embeds the watermark into the frequency domain, which is inherently robust against common data processing methods.",
107
- "paper_link": "https://arxiv.org/abs/2312.03410",
108
- "github_link": "https://github.com/TimbreWatermarking/TimbreWatermarking",
109
- },
110
- "wam": {
111
- "full_name": "Watermark Anything Model",
112
- "description": "The Watermark Anything Model (WAM) is designed for localized image watermarking.",
113
- "paper_link": "https://arxiv.org/abs/2411.07231",
114
- "github_link": "https://github.com/facebookresearch/watermark-anything",
115
- },
116
- "trustmark": {
117
- "full_name": "TrustMark - Universal Watermarking for Arbitrary Resolution Images",
118
- "description": "TrustMark - a GAN-based watermarking method with novel design in architecture and spatio-spectra losses to balance the trade-off between watermarked image quality with the watermark recovery accuracy.",
119
- "paper_link": "https://arxiv.org/abs/2311.18297",
120
- "github_link": "https://github.com/adobe/trustmark",
121
- },
122
- "ssl": {
123
- "full_name": "Self-Supervised Latent Spaces",
124
- "description": "This approach revisits watermarking techniques using pre-trained deep networks and self-supervised methods to embed marks and binary messages into latent spaces.",
125
- "paper_link": "https://arxiv.org/abs/2112.09581",
126
- "github_link": "https://github.com/facebookresearch/ssl_watermarking",
127
- },
128
- "fnns": {
129
- "full_name": "Fixed Neural Network Steganography",
130
- "description": "This approach revisits steganography through adversarial perturbation: it modifies the image such that a fixed decoder correctly outputs the desired message (similar to SSL but with a different network).",
131
- "paper_link": "https://openreview.net/pdf?id=hcMvApxGSzZ",
132
- "github_link": "https://github.com/varshakishore/FNNS",
133
- },
134
- "hidden": {
135
- "full_name": "Hiding Data With Deep Networks",
136
- "description": "First deep watermarking approach from 2018. We use the model trained and open-sourced here, which uses the same architecture and a similar training procedure. Note that this implementation uses a Just Noticeable Difference heatmap to modulate the watermark distortion for less visibility instead of using a perceptual loss during training like in the original paper.",
137
- "paper_link": "https://arxiv.org/abs/1807.09937",
138
- "github_link": "https://github.com/ando-khachatryan/HiDDeN",
139
- },
140
- "dctdwt": {
141
- "full_name": "Combined DCT-DWT",
142
- "description": "The algorithm watermarks a given image using a combination of the Discrete Wavelet Transform (DWT) and the Discrete Cosine Transform (DCT). Performance evaluation results show that combining the two transforms improved the performance of the watermarking algorithms that are based solely on the DWT transform.",
143
- "paper_link": "https://pdfs.semanticscholar.org/1c47/f281c00cffad4e30deff48a922553cb04d17.pdf",
144
- "github_link": "https://github.com/ShieldMnt/invisible-watermark",
145
- },
146
- "invismark": {
147
- "full_name": "InvisMark: Invisible and Robust Watermarking for AI-generated Image Provenance",
148
- "description": "InvisMark is a post-generation watermarking network that uses a MUNIT-style encoder-decoder to add tiny residual signals to any high-resolution image, leaving the picture visually unchanged. Through adversarially robust training that always optimizes for the hardest noise and compression transforms, the decoder recovers the message with > 97 % bit accuracy after typical edits such as JPEG, crops, blur and color shifts. The system can carry up to 256 bits (enough for a UUID plus error-correction) while preserving image quality at ≈51 dB PSNR and 0.998 SSIM, outperforming prior watermarking work on both imperceptibility and robustness. Together, these design choices make InvisMark a scalable, model-agnostic “soft binding” for proving AI-image provenance in real-world, high-resolution scenarios",
149
- "paper_link": "https://arxiv.org/pdf/2411.07795",
150
- "github_link": "https://github.com/microsoft/InvisMark",
151
- },
152
- "cine_jit": {
153
- "full_name": "CIN: Towards Blind Watermarking: Combining Invertible and Non-invertible Mechanisms",
154
- "description": "It remains a challenge to design a watermarking model with high imperceptibility and robustness against strong noise attacks. To resolve this issue, we present a framework Combining the Invertible and Non-invertible (CIN) mechanisms. The CIN is composed of the invertible part to achieve high imperceptibility and the non-invertible part to strengthen the robustness against strong noise attacks.",
155
- "paper_link": "https://arxiv.org/abs/2212.12678",
156
- "github_link": "https://github.com/rmpku/CIN",
157
- },
158
- "mbrs_jit": {
159
- "full_name": "MBRS: Mini-Batch of Real and Simulated JPEG Compression",
160
- "description": "An end-to-end auto-encoder watermarking framework that, during training, randomly applies one of three noise layers per mini-batch: a real JPEG compressor (with variable quality factors), a differentiable simulated JPEG layer, or a noise-free identity layer. To boost performance it incorporates Squeeze-and-Excitation blocks for richer feature learning, a message processor to expand the payload more effectively, and an additive diffusion block to guard against crop attacks. Under JPEG compression at Q=50, MBRS achieves a bit error rate <0.01% and PSNR >36 dB, while also demonstrating strong robustness to Gaussian filtering, cropping, crop-out, and dropout distortions.",
161
- "paper_link": "https://arxiv.org/pdf/2108.08211",
162
- "github_link": "https://github.com/jzyustc/mbrs",
163
- },
164
- "videoseal_0.0": {
165
- "full_name": "VideoSeal",
166
- "description": "A neural video watermarking system designed to embed imperceptible watermarks that are robust against common video manipulations and processing operations. Legacy model with more robust but visible watermarks.",
167
- "paper_link": "https://arxiv.org/abs/2412.09492",
168
- "github_link": "https://github.com/facebookresearch/videoseal",
169
- },
170
- "videoseal_1.0": {
171
- "full_name": "VideoSeal",
172
- "description": "A neural video watermarking system designed to embed imperceptible watermarks that are robust against common video manipulations and processing operations. Updated model with best balance of efficiency and robustness.",
173
- "paper_link": "https://arxiv.org/abs/2412.09492",
174
- "github_link": "https://github.com/facebookresearch/videoseal",
175
- },
176
- "rivagan": {
177
- "full_name": "RivaGAN",
178
- "description": "A GAN-based approach for robust invisible video watermarking that maintains high visual quality while providing resistance against common video attacks and transformations.",
179
- "paper_link": "https://arxiv.org/abs/1909.01285",
180
- "github_link": "https://github.com/DAI-Lab/RivaGAN",
181
- },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
182
  }
183
 
184
  DATASET_DESCRIPTIONS = {
 
89
  }
90
 
91
  MODEL_DESCRIPTIONS = {
92
+ "audioseal": {
93
+ "full_name": "AudioSeal",
94
+ "description": "AudioSeal is the first audio watermarking technique designed specifically for localized detection of AI-generated speech.",
95
+ "paper_link": "https://arxiv.org/abs/2401.17264",
96
+ "github_link": "https://github.com/facebookresearch/audioseal",
97
+ "message_size": "16 bits",
98
+ "alias": "AudioSeal"
99
+ },
100
+ "wavmark_fast": {
101
+ "full_name": "WavMark",
102
+ "description": "WavMark uses invertible networks to hide 32 bits in 1-second audio segments. Detection is performed by sliding along the audio in 0.05-second steps and decoding the message for each window. If the first 10 decoded bits match a synchronization pattern, the rest of the message_size is saved (22 bits), and the window can directly slide 1 second (instead of 0.05 seconds).",
103
+ "paper_link": "https://arxiv.org/pdf/2308.12770",
104
+ "github_link": "https://github.com/wavmark/wavmark",
105
+ "message_size": "32 bits (16-bit message_size)",
106
+ "alias": "WavMark"
107
+ },
108
+ "timbre": {
109
+ "full_name": "Timbre",
110
+ "description": "Timbre embeds the watermark into the frequency domain, which is inherently robust against common data processing methods.",
111
+ "paper_link": "https://arxiv.org/abs/2312.03410",
112
+ "github_link": "https://github.com/TimbreWatermarking/TimbreWatermarking",
113
+ "message_size": "30 bits",
114
+ "alias": "Timbre"
115
+ },
116
+ "wam": {
117
+ "full_name": "Watermark Anything Model",
118
+ "description": "The Watermark Anything Model (WAM) is designed for localized image watermarking.",
119
+ "paper_link": "https://arxiv.org/abs/2411.07231",
120
+ "github_link": "https://github.com/facebookresearch/watermark-anything",
121
+ "message_size": "32 bits per Mask",
122
+ "alias": "WAM"
123
+ },
124
+ "trustmark": {
125
+ "full_name": "TrustMark - Universal Watermarking for Arbitrary Resolution Images",
126
+ "description": "TrustMark - a GAN-based watermarking method with novel design in architecture and spatio-spectra losses to balance the trade-off between watermarked image quality with the watermark recovery accuracy.",
127
+ "paper_link": "https://arxiv.org/abs/2311.18297",
128
+ "github_link": "https://github.com/adobe/trustmark",
129
+ "message_size": "32 bits",
130
+ "alias": "TrustMark"
131
+ },
132
+ "ssl": {
133
+ "full_name": "Self-Supervised Latent Spaces",
134
+ "description": "This approach revisits watermarking techniques using pre-trained deep networks and self-supervised methods to embed marks and binary messages into latent spaces.",
135
+ "paper_link": "https://arxiv.org/abs/2112.09581",
136
+ "github_link": "https://github.com/facebookresearch/ssl_watermarking",
137
+ "message_size": "48 bits",
138
+ "alias": "SSL Watermarking"
139
+ },
140
+ "fnns": {
141
+ "full_name": "Fixed Neural Network Steganography",
142
+ "description": "This approach revisits steganography through adversarial perturbation: it modifies the image such that a fixed decoder correctly outputs the desired message (similar to SSL but with a different network).",
143
+ "paper_link": "https://openreview.net/pdf?id=hcMvApxGSzZ",
144
+ "github_link": "https://github.com/varshakishore/FNNS",
145
+ "message_size": "48 bits",
146
+ "alias": "Fixed NN Steganography"
147
+ },
148
+ "hidden": {
149
+ "full_name": "Hiding Data With Deep Networks",
150
+ "description": "First deep watermarking approach from 2018. We use the model trained and open-sourced here, which uses the same architecture and a similar training procedure. Note that this implementation uses a Just Noticeable Difference heatmap to modulate the watermark distortion for less visibility instead of using a perceptual loss during training like in the original paper.",
151
+ "paper_link": "https://arxiv.org/abs/1807.09937",
152
+ "github_link": "https://github.com/ando-khachatryan/HiDDeN",
153
+ "message_size": "48 bits",
154
+ "alias": "HiDDeN"
155
+ },
156
+ "dctdwt": {
157
+ "full_name": "Combined DCT-DWT",
158
+ "description": "The algorithm watermarks a given image using a combination of the Discrete Wavelet Transform (DWT) and the Discrete Cosine Transform (DCT). Performance evaluation results show that combining the two transforms improved the performance of the watermarking algorithms that are based solely on the DWT transform.",
159
+ "paper_link": "https://pdfs.semanticscholar.org/1c47/f281c00cffad4e30deff48a922553cb04d17.pdf",
160
+ "github_link": "https://github.com/ShieldMnt/invisible-watermark",
161
+ "message_size": "48 bits",
162
+ "alias": "DCT-DWT"
163
+ },
164
+ "invismark": {
165
+ "full_name": "InvisMark: Invisible and Robust Watermarking for AI-generated Image Provenance",
166
+ "description": "InvisMark is a post-generation watermarking network that uses a MUNIT-style encoder-decoder to add tiny residual signals to any high-resolution image, leaving the picture visually unchanged. Through adversarially robust training that always optimizes for the hardest noise and compression transforms, the decoder recovers the message with > 97 % bit accuracy after typical edits such as JPEG, crops, blur and color shifts. The system can carry up to 256 bits (enough for a UUID plus error-correction) while preserving image quality at \u224851 dB PSNR and 0.998 SSIM, outperforming prior watermarking work on both imperceptibility and robustness. Together, these design choices make InvisMark a scalable, model-agnostic \u201csoft binding\u201d for proving AI-image provenance in real-world, high-resolution scenarios",
167
+ "paper_link": "https://arxiv.org/pdf/2411.07795",
168
+ "github_link": "https://github.com/microsoft/InvisMark",
169
+ "message_size": "100 bits",
170
+ "alias": "InvisMark"
171
+ },
172
+ "cine_jit": {
173
+ "full_name": "CIN: Towards Blind Watermarking: Combining Invertible and Non-invertible Mechanisms",
174
+ "description": "It remains a challenge to design a watermarking model with high imperceptibility and robustness against strong noise attacks. To resolve this issue, we present a framework Combining the Invertible and Non-invertible (CIN) mechanisms. The CIN is composed of the invertible part to achieve high imperceptibility and the non-invertible part to strengthen the robustness against strong noise attacks.",
175
+ "paper_link": "https://arxiv.org/abs/2212.12678",
176
+ "github_link": "https://github.com/rmpku/CIN",
177
+ "message_size": "30 bits",
178
+ "alias": "CIN"
179
+ },
180
+ "mbrs_jit": {
181
+ "full_name": "MBRS: Mini-Batch of Real and Simulated JPEG Compression",
182
+ "description": "An end-to-end auto-encoder watermarking framework that, during training, randomly applies one of three noise layers per mini-batch: a real JPEG compressor (with variable quality factors), a differentiable simulated JPEG layer, or a noise-free identity layer. To boost performance it incorporates Squeeze-and-Excitation blocks for richer feature learning, a message processor to expand the message_size more effectively, and an additive diffusion block to guard against crop attacks. Under JPEG compression at Q=50, MBRS achieves a bit error rate <0.01% and PSNR >36 dB, while also demonstrating strong robustness to Gaussian filtering, cropping, crop-out, and dropout distortions.",
183
+ "paper_link": "https://arxiv.org/pdf/2108.08211",
184
+ "github_link": "https://github.com/jzyustc/mbrs",
185
+ "message_size": "256 bits",
186
+ "alias": "MBRS"
187
+ },
188
+ "videoseal_0.0": {
189
+ "full_name": "VideoSeal",
190
+ "description": "A neural video watermarking system designed to embed imperceptible watermarks that are robust against common video manipulations and processing operations. Legacy model with more robust but visible watermarks.",
191
+ "paper_link": "https://arxiv.org/abs/2412.09492",
192
+ "github_link": "https://github.com/facebookresearch/videoseal",
193
+ "message_size": "96 bits",
194
+ "alias": "VideoSeal v1.0"
195
+ },
196
+ "videoseal_1.0": {
197
+ "full_name": "VideoSeal",
198
+ "description": "A neural video watermarking system designed to embed imperceptible watermarks that are robust against common video manipulations and processing operations. Updated model with best balance of efficiency and robustness.",
199
+ "paper_link": "https://arxiv.org/abs/2412.09492",
200
+ "github_link": "https://github.com/facebookresearch/videoseal",
201
+ "message_size": "256 bits",
202
+ "alias": "VideoSeal v2.0"
203
+ },
204
+ "rivagan": {
205
+ "full_name": "RivaGAN",
206
+ "description": "A GAN-based approach for robust invisible video watermarking that maintains high visual quality while providing resistance against common video attacks and transformations.",
207
+ "paper_link": "https://arxiv.org/abs/1909.01285",
208
+ "github_link": "https://github.com/DAI-Lab/RivaGAN",
209
+ "message_size": "32 bits",
210
+ "alias": "RivaGAN"
211
+ } ,
212
  }
213
 
214
  DATASET_DESCRIPTIONS = {