mariotawfik commited on
Commit
a3956a0
ยท
1 Parent(s): 0c213af
Files changed (1) hide show
  1. app.py +233 -33
app.py CHANGED
@@ -85,7 +85,7 @@ def keygen():
85
  return [list(evaluation_key)[:ENCRYPTED_DATA_BROWSER_LIMIT], user_id]
86
 
87
 
88
- def encode_quantize_encrypt(test_file, eval_key):
89
  ugly = ['Machine', 'SizeOfOptionalHeader', 'Characteristics',
90
  'MajorLinkerVersion', 'MinorLinkerVersion', 'SizeOfCode',
91
  'SizeOfInitializedData', 'SizeOfUninitializedData',
@@ -119,6 +119,10 @@ def encode_quantize_encrypt(test_file, eval_key):
119
 
120
  encodings = np.array(encodings).reshape(1, -1)
121
 
 
 
 
 
122
  quantized_encodings = fhe_api.model.quantize_input(encodings).astype(numpy.uint8)
123
  encrypted_quantized_encoding = fhe_api.quantize_encrypt_serialize(encodings)
124
 
@@ -132,7 +136,6 @@ def encode_quantize_encrypt(test_file, eval_key):
132
  encrypted_quantized_encoding_shorten_hex = "".join(f"{i:02x}" for i in encrypted_quantized_encoding_shorten)
133
  return (encodings[0],quantized_encodings[0],encrypted_quantized_encoding_shorten_hex)
134
 
135
-
136
  def run_fhe(user_id):
137
  encoded_data_path = Path(f"tmp/tmp_encrypted_quantized_encoding_{user_id}.npy")
138
  encrypted_quantized_encoding = numpy.load(encoded_data_path)
@@ -182,15 +185,6 @@ def decrypt_prediction(user_id):
182
  return predictions
183
 
184
 
185
- def process_pipeline(test_file):
186
-
187
- eval_key = keygen()
188
- encodings = encode_quantize_encrypt(test_file, eval_key)
189
- encrypted_quantized_encoding = run_fhe(eval_key[1])
190
- encrypted_prediction = decrypt_prediction(eval_key[1])
191
-
192
- return eval_key, encodings, encrypted_quantized_encoding, encrypted_prediction
193
-
194
  if __name__ == "__main__":
195
  """
196
  with gr.Blocks() as demo:
@@ -216,34 +210,240 @@ if __name__ == "__main__":
216
 
217
 
218
  """
219
- with gr.Blocks() as demo:
220
- print("Starting the FHE Model")
221
 
222
-
223
- inputs = (
224
- [
225
- gr.File(label="Test File"),
226
- ],
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
227
  )
228
- outputs = (
229
- [
230
- gr.Textbox(label="Evaluation Key"),
231
- gr.Textbox(label="Encodings"),
232
- gr.Textbox(label="Encrypted Quantized Encoding"),
233
- gr.Textbox(label="Encrypted Prediction"),
234
- ],
235
  )
236
- title = ("FHE Model",)
237
- description = ("This is a FHE Model",)
238
 
239
- #gr.Interface(fn, inputs, outputs, title=title, description=description).launch()
 
 
 
 
 
 
 
 
 
 
 
 
 
240
 
241
- run_bottn = gr.Button("Run")
 
 
 
 
 
 
 
 
 
242
 
243
- run_bottn.click(
244
- fn=(process_pipeline),
245
- inputs=inputs, outputs=outputs
 
 
 
 
 
 
 
 
246
  )
247
 
248
 
249
- demo.launch() #share=True)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
85
  return [list(evaluation_key)[:ENCRYPTED_DATA_BROWSER_LIMIT], user_id]
86
 
87
 
88
+ def encode_quantize(test_file, eval_key):
89
  ugly = ['Machine', 'SizeOfOptionalHeader', 'Characteristics',
90
  'MajorLinkerVersion', 'MinorLinkerVersion', 'SizeOfCode',
91
  'SizeOfInitializedData', 'SizeOfUninitializedData',
 
119
 
120
  encodings = np.array(encodings).reshape(1, -1)
121
 
122
+ return encodings
123
+
124
+ def encrypt_encoded_quantize(encodings):
125
+
126
  quantized_encodings = fhe_api.model.quantize_input(encodings).astype(numpy.uint8)
127
  encrypted_quantized_encoding = fhe_api.quantize_encrypt_serialize(encodings)
128
 
 
136
  encrypted_quantized_encoding_shorten_hex = "".join(f"{i:02x}" for i in encrypted_quantized_encoding_shorten)
137
  return (encodings[0],quantized_encodings[0],encrypted_quantized_encoding_shorten_hex)
138
 
 
139
  def run_fhe(user_id):
140
  encoded_data_path = Path(f"tmp/tmp_encrypted_quantized_encoding_{user_id}.npy")
141
  encrypted_quantized_encoding = numpy.load(encoded_data_path)
 
185
  return predictions
186
 
187
 
 
 
 
 
 
 
 
 
 
188
  if __name__ == "__main__":
189
  """
190
  with gr.Blocks() as demo:
 
210
 
211
 
212
  """
 
 
213
 
214
+ print("๐Ÿš€ Starting the ClairVault demo...")
215
+ with gr.Blocks(css=".gradio-container { font-size: 20px; }") as demo:
216
+
217
+ gr.Markdown(
218
+ """
219
+ <p align="center">
220
+ <img width=200 src="/api/placeholder/200/200" alt="ClairVault Logo">
221
+ </p>
222
+ <h2 align="center">๐Ÿ”’ ClairVault: Privacy-Preserving Cloud-Based Malware Scanning</h2>
223
+ <p align="center">
224
+ <a href="#">GitHub</a>
225
+ โ€”
226
+ <a href="#">Documentation</a>
227
+ โ€”
228
+ <a href="#">Community</a>
229
+ โ€”
230
+ <a href="#">@ClairVault</a>
231
+ </p>
232
+ <p align="center">
233
+ <img src="/api/placeholder/600/300" alt="ClairVault Concept" width="60%" height="60%">
234
+ </p>
235
+ """
236
  )
237
+
238
+ gr.Markdown("## ๐Ÿ“‹ Executive Summary")
239
+ gr.Markdown(
240
+ """
241
+ ### ClairVault is a **malware scanning service**, cloud-based and machine-learning enabled, that leverages fully homomorphic encryption (FHE) to securely scan files without seeing your data.
242
+ """
 
243
  )
 
 
244
 
245
+ gr.Markdown("## ๐Ÿšจ The Problem")
246
+ gr.Markdown(
247
+ """
248
+ 1. Privacy: do you really trust the Russian-based Kaspersky and that there is no back-door built in McAfee by Oncle Sam?
249
+ - Require access to plaintext data, posing privacy and security risks
250
+ - Often closed-source, lacking transparency
251
+ 2. Protection:
252
+ - The malware scanner itself might be compromised
253
+ - Require frequent local updates
254
+ - Mostly rule-based โ†’ need machine learning - but mostly on cloud
255
+ 3. Resources
256
+ - Consume local resources (Windows Defender only runs ML based models on the cloud)
257
+ """
258
+ )
259
 
260
+ gr.Markdown("## ๐Ÿ’ก Our Solution: ClairVault")
261
+ gr.Markdown(
262
+ """
263
+ Key features include:
264
+ - Local extraction of features and encryption of user data (files, logs)
265
+ - Transmission of encrypted data to secure, open-source cloud servers
266
+ - Malware classification performed on encrypted data
267
+ - Return of encrypted classification results
268
+ """
269
+ )
270
 
271
+ gr.Markdown("### ๐Ÿ› ๏ธ Technical Implementation")
272
+ gr.Markdown(
273
+ """
274
+ - **Encryption Method**: Fully Homomorphic Encryption using the TFHE (Fast Fully Homomorphic Encryption over the Torus) library
275
+ - **Machine Learning Model**: Linear classifier optimized for FHE computations using ConcreteML
276
+ - **Performance Metrics**:
277
+ - Encrypted file scanning: ~30 seconds per MB
278
+ - Plaintext file scanning: ~0.5 seconds per MB
279
+ - Estimated time to scan 1GB: ~8.5 hours (encrypted) vs. ~8.5 minutes (plaintext)
280
+ *Note: These are preliminary figures based on our proof-of-concept. We aim to significantly improve performance in future iterations.*
281
+ """
282
  )
283
 
284
 
285
+
286
+ gr.Markdown("# ๐Ÿ—๏ธ Step 1: Generate the keys")
287
+
288
+ b_gen_key = gr.Button("๐Ÿ”‘ Generate the keys and send public part to server")
289
+
290
+ evaluation_key = gr.Textbox(
291
+ label="Evaluation key (truncated):",
292
+ max_lines=4,
293
+ interactive=False,
294
+ )
295
+
296
+ gr.Markdown("# ๐Ÿ“ค Step 2: Upload a file for scanning")
297
+ gr.Markdown("## Client side")
298
+ gr.Markdown(
299
+ "Upload a file you want to scan for malware. ClairVault will encrypt it locally before sending it to the cloud."
300
+ )
301
+ file_input = gr.File(label="Upload a file:")
302
+
303
+ gr.Markdown("# ๐Ÿ“ฅ Step 3: Extract executable file features")
304
+
305
+ b_extract = gr.Button("๐Ÿ“ฅ Extract features and save")
306
+
307
+ extracted_vector = gr.Textbox(
308
+ label="Extracted vector:",
309
+ max_lines=4,
310
+ interactive=False,
311
+ )
312
+
313
+ gr.Markdown("# ๐Ÿ”’ Step 4: Encrypt the file with the private key")
314
+ b_encrypt_file = gr.Button(
315
+ "๐Ÿ” Encrypt the file and send to server"
316
+ )
317
+
318
+ encrypted_file = gr.Textbox(
319
+ label="Encrypted file content (truncated):",
320
+ max_lines=4,
321
+ interactive=False,
322
+ )
323
+
324
+ gr.Markdown("# ๐Ÿ–ฅ๏ธ Step 5: Run the FHE-based malware scan")
325
+ gr.Markdown("## Server side")
326
+ gr.Markdown(
327
+ "The encrypted file is received by the server. Using the evaluation key and FHE, the server can perform the malware scan directly on the encrypted data. Once the scan is finished, the server returns the encrypted result to the client."
328
+ )
329
+
330
+ b_run_fhe_scan = gr.Button("๐Ÿ›ก๏ธ Run FHE-based malware scan")
331
+ encrypted_scan_result = gr.Textbox(
332
+ label="Encrypted scan result (truncated):",
333
+ max_lines=4,
334
+ interactive=False,
335
+ )
336
+
337
+ gr.Markdown("# ๐Ÿ”“ Step 6: Decrypt the scan result")
338
+ gr.Markdown("## Client side")
339
+ gr.Markdown(
340
+ "The encrypted scan result is sent back to the client, who can finally decrypt it with their private key. Only the client is aware of the original file content and the scan result."
341
+ )
342
+ b_decrypt_result = gr.Button("๐Ÿ” Decrypt scan result")
343
+
344
+ scan_result = gr.Textbox(label="Scan Result:")
345
+
346
+ eval_key = keygen()
347
+ encodings = encode_quantize(file_input, eval_key)
348
+ encrypted_quantized_encoding = encrypt_encoded_quantize(encodings)
349
+ encrypted_prediction = run_fhe(eval_key[1])
350
+ decrypted_encryption = decrypt_prediction(eval_key[1])
351
+
352
+ # Button for key generation
353
+ b_gen_key.click(eval_key, inputs=[], outputs=[evaluation_key])
354
+
355
+ # Button to extract vector
356
+ b_extract.click(
357
+ encodings,
358
+ inputs=[],
359
+ outputs=[extracted_vector],
360
+ )
361
+
362
+ # Button to encrypt file
363
+ b_encrypt_file.click(
364
+ encrypted_quantized_encoding,
365
+ inputs=[],
366
+ outputs=[encrypted_file],
367
+ )
368
+
369
+ # Button to run FHE-based malware scan
370
+ b_run_fhe_scan.click(encrypted_prediction, inputs=[], outputs=[encrypted_scan_result])
371
+
372
+ # Button to decrypt the scan result
373
+ b_decrypt_result.click(decrypted_encryption, inputs=[], outputs=[scan_result])
374
+
375
+ gr.Markdown(
376
+ "ClairVault is built using advanced Fully Homomorphic Encryption techniques to ensure your data remains private and secure throughout the entire malware scanning process."
377
+ )
378
+ gr.Markdown("## ๐ŸŒ Market Opportunity")
379
+ gr.Markdown(
380
+ """
381
+ The global cybersecurity market is valued at $60 billion in 2024 with an annual growth rate of 15% projected by Morgan Stanley. In 2023, there were 6 billion cyberattacks, a 10% increase from 2022.
382
+ **Target Industries**: Finance, Healthcare, Government, Legal Services, Individuals
383
+ **Estimated TAM (Total Addressable Market) for Privacy-Preserving Malware Scanning**:
384
+ While exact figures for malware scanning are not available, we estimate it to be approximately 10% of the total cybersecurity market, or $6 billion. (Kaspersky has 700 million of revenue, Norton 1.5 billion, McAfee 2.5 billion.)
385
+ Based on the critical need in our target industries we estimate privacy-preserving to be 5-10% of that, approximately $300-600 million.
386
+ """
387
+ )
388
+
389
+ gr.Markdown("## ๐Ÿ† Competitive Advantage")
390
+ gr.Markdown(
391
+ """
392
+ Unlike traditional solutions:
393
+ 1. Complete data privacy through FHE
394
+ 2. Open-source transparency
395
+ 3. Cloud-based scanning without local resource consumption
396
+ 4. Immunity to local malware compromise
397
+ """
398
+ )
399
+
400
+ gr.Markdown("## ๐Ÿš€ Go-To-Market Strategy")
401
+ gr.Markdown("### ๐ŸŽฏ Target Customers")
402
+ gr.Markdown(
403
+ """
404
+ 1. Enterprise Clients in sensitive industries
405
+ 2. Cloud Service Providers
406
+ 3. Cybersecurity Firms
407
+ 4. Privacy-conscious individuals
408
+ """
409
+ )
410
+
411
+ gr.Markdown("### ๐Ÿ’ฐ Revenue Model")
412
+ gr.Markdown(
413
+ """
414
+ 1. Enterprise Licensing
415
+ 2. Tiered Subscription Plans
416
+ 3. API Access Fees
417
+ """
418
+ )
419
+
420
+ gr.Markdown("### ๐Ÿ“ˆ Sales and Marketing Channels")
421
+ gr.Markdown(
422
+ """
423
+ 1. Direct Enterprise Sales
424
+ 2. Partnerships with cloud providers and cybersecurity firms
425
+ 3. Industry events and conferences
426
+ 4. Content marketing (whitepapers, case studies)
427
+ """
428
+ )
429
+
430
+ gr.Markdown("## ๐Ÿ… Achievements and Roadmap")
431
+ gr.Markdown("### ๐Ÿ† Current Achievements")
432
+ gr.Markdown(
433
+ """
434
+ - Developed a proof-of-concept multiscanner using the TFHE library
435
+ - Successfully demonstrated end-to-end process from local encryption to cloud scanning
436
+ - Implemented a linear classifier for malware detection on encrypted data
437
+ """
438
+ )
439
+
440
+ gr.Markdown("### ๐Ÿ”ฎ Future Development")
441
+ gr.Markdown(
442
+ """
443
+ 1. Develop real-time scanning capabilities
444
+ 2. Add behavior analysis through encrypted log processing
445
+ 3. Expand to support a wider range of file types and encryption schemes
446
+ """
447
+ )
448
+
449
+ demo.launch(share=False)