Spaces:

haor
/

clip_aes_onnx

Sleeping

App Files Files Community

haor commited on Apr 8

Commit

167e9d9

•

1 Parent(s): 31ab6ff

Create app.py

Browse files

Files changed (1) hide show

app.py +96 -0

app.py ADDED Viewed

	@@ -0,0 +1,96 @@

+import gradio as gr
+import torch
+import clip
+import pandas as pd
+import hashlib
+import numpy as np
+import cv2
+from PIL import Image
+import onnxruntime as ort
+import requests
+def _binary_array_to_hex(arr):
+    bit_string = ''.join(str(b) for b in 1 * arr.flatten())
+    width = int(np.ceil(len(bit_string) / 4))
+    return '{:0>{width}x}'.format(int(bit_string, 2), width=width)
+def phashstr(image, hash_size=8, highfreq_factor=4):
+    if hash_size < 2:
+        raise ValueError('Hash size must be greater than or equal to 2')
+    import scipy.fftpack
+    img_size = hash_size * highfreq_factor
+    image = image.convert('L').resize((img_size, img_size), Image.Resampling.LANCZOS)
+    pixels = np.asarray(image)
+    dct = scipy.fftpack.dct(scipy.fftpack.dct(pixels, axis=0), axis=1)
+    dctlowfreq = dct[:hash_size, :hash_size]
+    med = np.median(dctlowfreq)
+    diff = dctlowfreq > med
+    return _binary_array_to_hex(diff.flatten())
+def normalized(a, axis=-1, order=2):
+    l2 = np.atleast_1d(np.linalg.norm(a, order, axis))
+    l2[l2 == 0] = 1
+    return a / np.expand_dims(l2, axis)
+def convert_numpy_types(data):
+    if isinstance(data, dict):
+        return {key: convert_numpy_types(value) for key, value in data.items()}
+    elif isinstance(data, list):
+        return [convert_numpy_types(item) for item in data]
+    elif isinstance(data, np.float64):
+        return float(data)
+    elif isinstance(data, np.int64):
+        return int(data)
+    else:
+        return data
+def download_onnx_model(url, filename):
+    response = requests.get(url)
+    with open(filename, 'wb') as f:
+        f.write(response.content)
+def predict(image):
+    onnx_url = "https://huggingface.co/haor/aesthetics/resolve/main/aesthetic_score_mlp.onnx"
+    onnx_path = "aesthetic_score_mlp.onnx"
+    download_onnx_model(onnx_url, onnx_path)
+    device = "cuda" if torch.cuda.is_available() else "cpu"
+    ort_session = ort.InferenceSession(onnx_path)
+    model2, preprocess = clip.load("ViT-L/14", device=device)
+    image = Image.fromarray(image)
+    image_np = cv2.cvtColor(np.array(image), cv2.COLOR_RGB2GRAY)
+    laplacian_variance = cv2.Laplacian(image_np, cv2.CV_64F).var()
+    phash = phashstr(image)
+    md5 = hashlib.md5(image.tobytes()).hexdigest()
+    sha1 = hashlib.sha1(image.tobytes()).hexdigest()
+    inputs = preprocess(image).unsqueeze(0).to(device)
+    with torch.no_grad():
+        img_emb = model2.encode_image(inputs)
+    img_emb = normalized(img_emb.cpu().numpy())
+    ort_inputs = {ort_session.get_inputs()[0].name: img_emb.astype(np.float32)}
+    ort_outs = ort_session.run(None, ort_inputs)
+    prediction = ort_outs[0].item()
+    result = {
+        "clip_aesthetic": prediction,
+        "phash": phash,
+        "md5": md5,
+        "sha1": sha1,
+        "laplacian_variance": laplacian_variance
+    }
+    return convert_numpy_types(result)
+title = "CLIP Aesthetic Score"
+description = "Upload an image to predict its aesthetic score using the CLIP model and calculate other image metrics."
+gr.Interface(
+    fn=predict,
+    inputs=gr.Image(type="numpy"),
+    outputs=gr.JSON(label="Result"),
+    title=title,
+    description=description,
+    examples=[["example1.jpg"], ["example2.jpg"]]
+).launch()