modelProblems / app.py
temp-9384289
table
26699f4
raw
history blame
8.68 kB
# notes https://huggingface.co/spaces/Joeythemonster/Text-To-image-AllModels/blob/main/app.py
from diffusers import StableDiffusionPipeline
from diffusers import DiffusionPipeline
import torch
import time
import matplotlib.pyplot as plt
import tensorflow as tf
import os
import sys
import requests
from image_similarity_measures.evaluate import evaluation
from PIL import Image
from huggingface_hub import from_pretrained_keras
from math import sqrt, ceil
import numpy as np
import pandas as pd
import gradio as gr
modelieo=[
'nathanReitinger/MNIST-diffusion',
'nathanReitinger/MNIST-diffusion-oneImage',
'nathanReitinger/MNIST-GAN',
'nathanReitinger/MNIST-GAN-noDropout'
]
def get_sims(gen_filepath, gen_label, file_path, hunting_time_limit):
(train_images, train_labels), (_, _) = tf.keras.datasets.mnist.load_data()
train_images = train_images.reshape(train_images.shape[0], 28, 28, 1).astype('float32')
train_images = (train_images - 127.5) / 127.5 # Normalize the images to [-1, 1]
print("how long to hunt", hunting_time_limit)
if hunting_time_limit == None:
hunting_time_limit = 2
lowest_score = 10000
lowest_image = None
lowest_image_path = ''
start = time.time()
for i in range(len(train_labels)):
# print(i)
if train_labels[i] == gen_label:
###
# get a real image (of correct number)
###
# print(i)
to_check = train_images[i]
fig = plt.figure(figsize=(1, 1))
plt.subplot(1, 1, 0+1)
plt.imshow(to_check, cmap='gray')
plt.axis('off')
plt.savefig(file_path + 'real_deal.png')
plt.close()
# baseline = evaluation(org_img_path='results/real_deal.png', pred_img_path='results/real_deal.png', metrics=["rmse", "psnr"])
# print("---")
###
# check how close that real training data is to generated number
###
results = evaluation(org_img_path=file_path + 'real_deal.png', pred_img_path=file_path+'generated_image.png', metrics=["rmse", "psnr"])
if results['rmse'] < lowest_score:
lowest_score = results['rmse']
lowest_image = to_check
to_save = train_images[i]
fig = plt.figure(figsize=(1, 1))
plt.subplot(1, 1, 0+1)
plt.imshow(to_save, cmap='gray')
plt.axis('off')
plt.savefig(file_path + 'keeper.png')
plt.close()
lowest_image_path = file_path + 'keeper.png'
print(lowest_score, str(round( ((i/len(train_labels)) * 100),2 )) + '%')
now = time.time()
if now-start > hunting_time_limit:
print(str(now-start) + "s")
return [lowest_image_path, lowest_score]
return [lowest_image_path, lowest_score]
def digit_recognition(filename):
API_URL = "https://api-inference.huggingface.co/models/farleyknight/mnist-digit-classification-2022-09-04"
special_string = '-h-f-_-RT-U-J-E-M-Pb-GC-c-i-v-sji-bMsQmxuh-x-h-C-W-B-F-W-z-Gv-'
is_escaped = special_string.replace("-", '')
bear = "Bearer " + is_escaped
headers = {"Authorization": bear}
# get a prediction on what number this is
def query(filename):
with open(filename, "rb") as f:
data = f.read()
response = requests.post(API_URL, headers=headers, data=data)
return response.json()
# use latest model to generate a new image, return path
ret = False
output = None
while ret == False:
output = query(filename + 'generated_image.png')
if 'error' in output:
time.sleep(10)
ret = False
else:
ret = True
print(output)
low_score_log = ''
this_label_for_this_image = int(output[0]['label'])
return {'full': output, 'number': this_label_for_this_image}
def get_other(original_image, hunting_time_limit):
RANDO = str(time.time())
file_path = 'tester/' + 'generation' + "/" + RANDO + '/'
os.makedirs(file_path)
fig = plt.figure(figsize=(1, 1))
plt.subplot(1, 1, 0+1)
plt.imshow(original_image, cmap='gray')
plt.axis('off')
plt.savefig(file_path + 'generated_image.png')
plt.close()
print('[+] done saving generation')
print("[-] what digit is this")
ret = digit_recognition(file_path)
print(ret['full'])
print(ret['number'])
print("[+]", ret['number'])
print("[-] show some most similar numbers")
if ret["full"][0]['score'] <= 0.90:
print("[!] error in image digit recognition, likely to not find a similar score")
sys.exit()
gen_filepath = file_path + 'generated_image.png'
gen_label = ret['number']
ret_sims = get_sims(gen_filepath, gen_label, file_path, hunting_time_limit)
print("[+] done sims")
# get the file-Path
return (file_path + 'generated_image.png', ret_sims)
def generate_and_save_images(model):
noise_dim = 100
num_examples_to_generate = 1
seed = tf.random.normal([num_examples_to_generate, noise_dim])
# print(seed)
n_samples = 1
# Notice `training` is set to False.
# This is so all layers run in inference mode (batchnorm).
examples = model(seed, training=False)
examples = examples * 255.0
size = ceil(sqrt(n_samples))
digit_images = np.zeros((28*size, 28*size), dtype=float)
n = 0
for i in range(size):
for j in range(size):
if n == n_samples:
break
digit_images[i* 28 : (i+1)*28, j*28 : (j+1)*28] = examples[n, :, :, 0]
n += 1
digit_images = (digit_images/127.5) -1
return digit_images
def TextToImage(Prompt,inference_steps, model):
model_id = model
if 'GAN' in model_id:
print("do something else")
model = from_pretrained_keras(model)
image = generate_and_save_images(model)
else:
pipe = DiffusionPipeline.from_pretrained(model_id)
the_randomness = int(str(time.time())[-1])
print('seed', the_randomness)
image = pipe(generator= torch.manual_seed(the_randomness), num_inference_steps=inference_steps).images[0]
# pipe = StableDiffusionPipeline.from_pretrained(model_id, torch_dtype=torch.float16)
# pipe = pipe.to("cpu")
prompt = Prompt
print(prompt)
hunting_time_limit = None
if prompt.isnumeric():
hunting_time_limit = abs(int(prompt))
original_image, other_images = get_other(image, hunting_time_limit)
the_file = other_images[0]
the_rmse = other_images[1]
ai_gen = Image.open(open(original_image, 'rb'))
training_data = Image.open(open(the_file, 'rb'))
another_one = (training_data, "RMSE: " + str(round(the_rmse,5) ))
return [ai_gen, another_one]
df = pd.DataFrame({
"Model" : ['MNIST-diffusion', 'MNIST-diffusion-oneImage', 'MNIST-GAN', 'MNIST-GAN-noDropout'],
"Class (Architecture)" : ['UNet2DModel', 'UNet2DModel', 'Sequential', 'Sequential'],
"Dataset Examples" : [60000, 1, 60000, 60000],
"Training Loops" : [300, 100000, 90000, 90000],
"Notes" : ['Similar architecture as Stable Diffusion, different training data', 'Toy model, purposed to store protected content', 'GANs are not as likely to store protected content', 'less dropout, more copying?']
})
# Applying style to highlight the maximum value in each row
styler = df#.style.highlight_max(color = 'lightgreen', axis = 0)
with gr.Blocks() as app:
interface = gr.Interface(fn=TextToImage,
inputs=[gr.Textbox(show_label=True, label='How many seconds to hunt for copies?',), gr.Slider(1, 1000, label='Inference Steps (leave unchanged for default, best is 1000 but it is slow!)', value=10, step=1), gr.Dropdown(modelieo)],
outputs=gr.Gallery(label="Generated image", show_label=True, elem_id="gallery", columns=[2], rows=[1], object_fit="contain", height="auto"),
# css="#output_image{width: 256px !important; height: 256px !important;}",
title='Unconditional Image Generation',
)
gr.HTML(
"<hr>"
"<h1><center>Do machine learing models store protected content?</center></h1>" +
"<p><center><span style='color: red;'>Enter a time to hunt for copies (seconds), select a model, and hit submit!</center></p>" +
"<p><center><strong>These image generation models will give you a 'bespoke' generation ❤ of an <a href='https://paperswithcode.com/dataset/mnist'>MNIST hand-drawn digit<a></p> " +
"<p><center>then the program will search in training data (for <i>n</i> seconds) to find similar images: <a href='https://medium.com/@mygreatlearning/rmse-what-does-it-mean-2d446c0b1d0e'>RMSE<a>, lower is more similar</p>" +
"<p><a href='https://nathanreitinger.umiacs.io'>@nathanReitinger</a></p>"
)
gr.Dataframe(styler)
app.queue().launch()
# interface.launch(share=True)