# notes https://huggingface.co/spaces/Joeythemonster/Text-To-image-AllModels/blob/main/app.py from diffusers import StableDiffusionPipeline from diffusers import DiffusionPipeline import torch import time import matplotlib.pyplot as plt import tensorflow as tf import os import sys import requests from image_similarity_measures.evaluate import evaluation from PIL import Image from huggingface_hub import from_pretrained_keras from math import sqrt, ceil import numpy as np import pandas as pd import gradio as gr modelieo=[ 'nathanReitinger/MNIST-diffusion', 'nathanReitinger/MNIST-diffusion-oneImage', 'nathanReitinger/MNIST-GAN', 'nathanReitinger/MNIST-GAN-noDropout' ] def get_sims(gen_filepath, gen_label, file_path, hunting_time_limit): (train_images, train_labels), (_, _) = tf.keras.datasets.mnist.load_data() train_images = train_images.reshape(train_images.shape[0], 28, 28, 1).astype('float32') train_images = (train_images - 127.5) / 127.5 # Normalize the images to [-1, 1] print("how long to hunt", hunting_time_limit) if hunting_time_limit == None: hunting_time_limit = 2 lowest_score = 10000 lowest_image = None lowest_image_path = '' start = time.time() for i in range(len(train_labels)): # print(i) if train_labels[i] == gen_label: ### # get a real image (of correct number) ### # print(i) to_check = train_images[i] fig = plt.figure(figsize=(1, 1)) plt.subplot(1, 1, 0+1) plt.imshow(to_check, cmap='gray') plt.axis('off') plt.savefig(file_path + 'real_deal.png') plt.close() # baseline = evaluation(org_img_path='results/real_deal.png', pred_img_path='results/real_deal.png', metrics=["rmse", "psnr"]) # print("---") ### # check how close that real training data is to generated number ### results = evaluation(org_img_path=file_path + 'real_deal.png', pred_img_path=file_path+'generated_image.png', metrics=["rmse", "psnr"]) if results['rmse'] < lowest_score: lowest_score = results['rmse'] lowest_image = to_check to_save = train_images[i] fig = plt.figure(figsize=(1, 1)) plt.subplot(1, 1, 0+1) plt.imshow(to_save, cmap='gray') plt.axis('off') plt.savefig(file_path + 'keeper.png') plt.close() lowest_image_path = file_path + 'keeper.png' print(lowest_score, str(round( ((i/len(train_labels)) * 100),2 )) + '%') now = time.time() if now-start > hunting_time_limit: print(str(now-start) + "s") return [lowest_image_path, lowest_score] return [lowest_image_path, lowest_score] def digit_recognition(filename): API_URL = "https://api-inference.huggingface.co/models/farleyknight/mnist-digit-classification-2022-09-04" special_string = '-h-f-_-RT-U-J-E-M-Pb-GC-c-i-v-sji-bMsQmxuh-x-h-C-W-B-F-W-z-Gv-' is_escaped = special_string.replace("-", '') bear = "Bearer " + is_escaped headers = {"Authorization": bear} # get a prediction on what number this is def query(filename): with open(filename, "rb") as f: data = f.read() response = requests.post(API_URL, headers=headers, data=data) return response.json() # use latest model to generate a new image, return path ret = False output = None while ret == False: output = query(filename + 'generated_image.png') if 'error' in output: time.sleep(10) ret = False else: ret = True print(output) low_score_log = '' this_label_for_this_image = int(output[0]['label']) return {'full': output, 'number': this_label_for_this_image} def get_other(original_image, hunting_time_limit): RANDO = str(time.time()) file_path = 'tester/' + 'generation' + "/" + RANDO + '/' os.makedirs(file_path) fig = plt.figure(figsize=(1, 1)) plt.subplot(1, 1, 0+1) plt.imshow(original_image, cmap='gray') plt.axis('off') plt.savefig(file_path + 'generated_image.png') plt.close() print('[+] done saving generation') print("[-] what digit is this") ret = digit_recognition(file_path) print(ret['full']) print(ret['number']) print("[+]", ret['number']) print("[-] show some most similar numbers") if ret["full"][0]['score'] <= 0.90: print("[!] error in image digit recognition, likely to not find a similar score") sys.exit() gen_filepath = file_path + 'generated_image.png' gen_label = ret['number'] ret_sims = get_sims(gen_filepath, gen_label, file_path, hunting_time_limit) print("[+] done sims") # get the file-Path return (file_path + 'generated_image.png', ret_sims) def generate_and_save_images(model): noise_dim = 100 num_examples_to_generate = 1 seed = tf.random.normal([num_examples_to_generate, noise_dim]) # print(seed) n_samples = 1 # Notice `training` is set to False. # This is so all layers run in inference mode (batchnorm). examples = model(seed, training=False) examples = examples * 255.0 size = ceil(sqrt(n_samples)) digit_images = np.zeros((28*size, 28*size), dtype=float) n = 0 for i in range(size): for j in range(size): if n == n_samples: break digit_images[i* 28 : (i+1)*28, j*28 : (j+1)*28] = examples[n, :, :, 0] n += 1 digit_images = (digit_images/127.5) -1 return digit_images def TextToImage(Prompt,inference_steps, model): model_id = model if 'GAN' in model_id: print("do something else") model = from_pretrained_keras(model) image = generate_and_save_images(model) else: pipe = DiffusionPipeline.from_pretrained(model_id) the_randomness = int(str(time.time())[-1]) print('seed', the_randomness) image = pipe(generator= torch.manual_seed(the_randomness), num_inference_steps=inference_steps).images[0] # pipe = StableDiffusionPipeline.from_pretrained(model_id, torch_dtype=torch.float16) # pipe = pipe.to("cpu") prompt = Prompt print(prompt) hunting_time_limit = None if prompt.isnumeric(): hunting_time_limit = abs(int(prompt)) original_image, other_images = get_other(image, hunting_time_limit) the_file = other_images[0] the_rmse = other_images[1] ai_gen = Image.open(open(original_image, 'rb')) training_data = Image.open(open(the_file, 'rb')) another_one = (training_data, "RMSE: " + str(round(the_rmse,5) )) return [ai_gen, another_one] df = pd.DataFrame({ "Model" : ['MNIST-diffusion', 'MNIST-diffusion-oneImage', 'MNIST-GAN', 'MNIST-GAN-noDropout'], "Class (Architecture)" : ['UNet2DModel', 'UNet2DModel', 'Sequential', 'Sequential'], "Dataset Examples" : [60000, 1, 60000, 60000], "Training Loops" : [300, 100000, 90000, 90000], "Notes" : ['Similar architecture as Stable Diffusion, different training data', 'Toy model, purposed to store protected content', 'GANs are not as likely to store protected content', 'less dropout, more copying?'] }) # Applying style to highlight the maximum value in each row styler = df#.style.highlight_max(color = 'lightgreen', axis = 0) with gr.Blocks() as app: interface = gr.Interface(fn=TextToImage, inputs=[gr.Textbox(show_label=True, label='How many seconds to hunt for copies?',), gr.Slider(1, 1000, label='Inference Steps (leave unchanged for default, best is 1000 but it is slow!)', value=10, step=1), gr.Dropdown(modelieo)], outputs=gr.Gallery(label="Generated image", show_label=True, elem_id="gallery", columns=[2], rows=[1], object_fit="contain", height="auto"), # css="#output_image{width: 256px !important; height: 256px !important;}", title='Unconditional Image Generation', ) gr.HTML( "
" "

Do machine learing models store protected content?

" + "

Enter a time to hunt for copies (seconds), select a model, and hit submit!

" + "

These image generation models will give you a 'bespoke' generation ❤ of an MNIST hand-drawn digit

" + "

then the program will search in training data (for n seconds) to find similar images: RMSE, lower is more similar

" + "

@nathanReitinger

" ) gr.Dataframe(styler) app.queue().launch() # interface.launch(share=True)