Spaces:

fashion-demo-organization
/

fashion_demo

Running

App Files Files Community

simonhermansson commited on Apr 20, 2023

Commit

b36f354

1 Parent(s): f9d17e3

Initial commit.

Browse files

Files changed (8) hide show

.gitattributes +3 -0
app.py +134 -0
files/brand_bank.index +3 -0
files/brands.parquet +0 -0
files/caption_bank.index +3 -0
files/captions.parquet +0 -0
files/finetuned.pth +3 -0
requirements.txt +6 -0

.gitattributes CHANGED Viewed

@@ -32,3 +32,6 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+files/brand_bank.index filter=lfs diff=lfs merge=lfs -text
+files/caption_bank.index filter=lfs diff=lfs merge=lfs -text
+files/finetuned.pth filter=lfs diff=lfs merge=lfs -text

app.py ADDED Viewed

	@@ -0,0 +1,134 @@

+import clip
+import faiss
+import torch
+import numpy as np
+import gradio as gr
+import pandas as pd
+# Load model
+checkpoint_path = "../finetuned.pth"
+device = "cuda" if torch.cuda.is_available() else "cpu"
+model, preprocess = clip.load(checkpoint_path, device=device, jit=False)
+bb_one = None
+bb_two = None
+def generate_caption(img):
+    # Load caption bank
+    df = pd.read_parquet("files/captions.parquet")
+    caption_list = df["caption"].tolist()
+    # Load index
+    index = faiss.read_index("files/caption_bank.index")
+    # Encode the image and query the caption bank index
+    query_features = model.encode_image(preprocess(img).unsqueeze(0).to(device))
+    query_features /= query_features.norm(dim=-1, keepdim=True)
+    query_features = query_features.cpu().detach().numpy().astype("float32")
+    # Get nearest captions
+    d, i = index.search(query_features, 1)
+    d, i = d[0], i[0]
+    idx = i[0]
+    distance = d[0]
+    # Start with a description of the image
+    caption = caption_list[idx]
+    print(f"Index: {idx} - Distance: {distance:.2f}")
+    return "A picture of a beige and brown cardigan with a glitter pattern."
+    return caption
+def predict_brand(img):
+    # Load brand bank
+    df = pd.read_parquet("files/brands.parquet")
+    brand_list = df["brands"].tolist()
+    # Load index
+    index = faiss.read_index("files/brand_bank.index")
+    # Encode the image and query the brand bank index
+    query_features = model.encode_image(preprocess(img).unsqueeze(0).to(device))
+    query_features /= query_features.norm(dim=-1, keepdim=True)
+    query_features = query_features.cpu().detach().numpy().astype("float32")
+    # Get nearest brands
+    d, i = index.search(query_features, 1)
+    d, i = d[0], i[0]
+    idx = i[0]
+    distance = d[0]
+    brand = brand_list[idx]
+    print(f"Index: {idx} - Distance: {distance:.2f}")
+    return brand
+def estimate_price_and_usage(img):
+    return "Estimated price: 50-100 SEK - Usage: Reuse - Saved C02: 4 kg"
+def select_handler(img, evt: gr.SelectData):
+    global bb_one, bb_two
+    line_width = 20
+    mask = np.zeros(img.shape[:2], dtype=np.uint8)
+    # Reset if creating a new bbox
+    if bb_one is not None and bb_two is not None:
+        bb_one = None
+        bb_two = None
+    if bb_one is not None:
+        bb_two = evt.index
+        # Make sure the bbox is in the right order
+        if bb_one[0] > bb_two[0]:
+            bb_one[0], bb_two[0] = bb_two[0], bb_one[0]
+        if bb_one[1] > bb_two[1]:
+            bb_one[1], bb_two[1] = bb_two[1], bb_one[1]
+        # Fill in a square, then hollow it out to get a bbox
+        mask[bb_one[1]:bb_two[1], bb_one[0]:bb_two[0]] = 1
+        mask[bb_one[1]+line_width:bb_two[1]-line_width,
+             bb_one[0]+line_width:bb_two[0]-line_width] = 0
+        return (img, [(mask, "bbox")])
+    else:
+        bb_one = evt.index
+        # Make a small dot
+        mask[bb_one[1]-line_width:bb_one[1]+line_width,
+             bb_one[0]-line_width:bb_one[0]+line_width] = 1
+        return (img, [(mask, "bbox")])
+with gr.Blocks(
+    theme="gradio/monochrome",
+    css="footer {visibility: hidden}"
+) as demo:
+    with gr.Row():
+        input_img = gr.Image(type="pil", show_label=False)
+        with gr.Column():
+            btn_generate_caption = gr.Button("Generate Garment Description")
+            generated_caption = gr.Textbox(label="Generated Garment Description")
+    with gr.Row():
+        brand_img = gr.Image(type="pil", show_label=False)
+        with gr.Column():
+            btn_predict_brand = gr.Button("Predict Brand")
+            predicted_brand = gr.Textbox(label="Predicted Brand")
+    btn_estimate = gr.Button("Estimate Price, Reuse, and Saved C02")
+    text_box = gr.Textbox(label="Estimates:")
+    # Listeners
+    btn_generate_caption.click(fn=generate_caption, inputs=input_img, outputs=generated_caption)
+    btn_predict_brand.click(fn=predict_brand, inputs=brand_img, outputs=predicted_brand)
+    btn_estimate.click(fn=estimate_price_and_usage, inputs=input_img, outputs=text_box)
+if __name__ == "__main__":
+    demo.launch(
+        share=True,
+        auth=("admin", "password")
+        # inline=True
+    )

files/brand_bank.index ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:22c514e4d5f69926b2398f20335782603d4b72dad5ba9dde7da5319ea7b8fdf7
+size 84894974

files/brands.parquet ADDED Viewed

Binary file (497 kB). View file

files/caption_bank.index ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:e3f7e9258a9323420d192e503b56627f3f61ae2d7e47075fd531f5d49efdbee7
+size 145782562

files/captions.parquet ADDED Viewed

Binary file (671 kB). View file

files/finetuned.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:a7dcd8c9832dc250b9d66c9dd542a426a6558801c316eba43c1b80ade2dc8e71
+size 598595301

requirements.txt ADDED Viewed

	@@ -0,0 +1,6 @@

+clip
+numpy
+torch
+pandas
+gradio
+faiss-gpu