Spaces:

zamborg
/

redcaps-dev

Build error

App Files Files Community

zamborg commited on Nov 18, 2021

Commit

7925ce3

1 Parent(s): 53c2d82

init

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

.gitattributes +0 -1
.gitignore +2 -0
README.md +3 -3
__pycache__/model.cpython-37.pyc +0 -0
app.py +139 -0
config.yaml +63 -0
datasets/common_30k.model +0 -0
datasets/tmp +0 -0
experiment.ipynb +0 -0
model.py +149 -0
requirements.txt +18 -0
samples/.ipynb_checkpoints/test-checkpoint.jpg +0 -0
samples/0.jpg +0 -0
samples/1.jpg +0 -0
samples/10.jpg +0 -0
samples/100.jpg +0 -0
samples/11.jpg +0 -0
samples/12.jpg +0 -0
samples/13.jpg +0 -0
samples/14.jpg +0 -0
samples/15.jpg +0 -0
samples/16.jpg +0 -0
samples/17.jpg +0 -0
samples/18.jpg +0 -0
samples/19.jpg +0 -0
samples/2.jpg +0 -0
samples/20.jpg +0 -0
samples/21.jpg +0 -0
samples/22.jpg +0 -0
samples/23.jpg +0 -0
samples/24.jpg +0 -0
samples/25.jpg +0 -0
samples/26.jpg +0 -0
samples/27.jpg +0 -0
samples/28.jpg +0 -0
samples/29.jpg +0 -0
samples/3.jpg +0 -0
samples/30.jpg +0 -0
samples/31.jpg +0 -0
samples/32.jpg +0 -0
samples/33.jpg +0 -0
samples/34.jpg +0 -0
samples/35.jpg +0 -0
samples/36.jpg +0 -0
samples/37.jpg +0 -0
samples/38.jpg +0 -0
samples/39.jpg +0 -0
samples/4.jpg +0 -0
samples/40.jpg +0 -0
samples/41.jpg +0 -0

.gitattributes CHANGED Viewed

@@ -15,7 +15,6 @@
 *.parquet filter=lfs diff=lfs merge=lfs -text
 *.pb filter=lfs diff=lfs merge=lfs -text
 *.pt filter=lfs diff=lfs merge=lfs -text
-*.pth filter=lfs diff=lfs merge=lfs -text
 *.rar filter=lfs diff=lfs merge=lfs -text
 saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.tar.* filter=lfs diff=lfs merge=lfs -text

 *.parquet filter=lfs diff=lfs merge=lfs -text
 *.pb filter=lfs diff=lfs merge=lfs -text
 *.pt filter=lfs diff=lfs merge=lfs -text
 *.rar filter=lfs diff=lfs merge=lfs -text
 saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.tar.* filter=lfs diff=lfs merge=lfs -text

.gitignore ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ *.pth
2	+

README.md CHANGED Viewed

@@ -1,8 +1,8 @@
 ---
-title: Redcaps Dev
-emoji: 📚
 colorFrom: red
-colorTo: red
 sdk: streamlit
 app_file: app.py
 pinned: false

 ---
+title: Redcaps
+emoji: 📊
 colorFrom: red
+colorTo: pink
 sdk: streamlit
 app_file: app.py
 pinned: false

__pycache__/model.cpython-37.pyc ADDED Viewed

Binary file (4.7 kB). View file

app.py ADDED Viewed

	@@ -0,0 +1,139 @@

+import streamlit as st
+import io
+import sys
+import time
+import json
+sys.path.append("./virtex/")
+from model import *
+# # TODO:
+# - Reformat the model introduction
+# - Center the images using the 3 column method
+# - Make the iterative text generation
+def gen_show_caption(sub_prompt=None, cap_prompt = ""):
+    with st.spinner("Generating Caption"):
+        if sub_prompt is None and cap_prompt is not "":
+            st.write("Without a specified subreddit we default to /r/pics")
+        subreddit, caption = virtexModel.predict(image_dict, sub_prompt=sub_prompt, prompt = cap_prompt)
+        st.markdown(
+            f"""
+            <style>
+                red{{
+                    color:#c62828
+                }}
+                mono{{
+                    font-family: "Inconsolata";
+                }}
+            </style>
+            ### <red> r/{subreddit} </red>  {caption}
+            """,
+            unsafe_allow_html=True)
+st.title("Image Captioning Demo from RedCaps")
+st.sidebar.markdown(
+    """
+    ### Image Captioning Model from VirTex trained on RedCaps
+    Use this page to caption your own images or try out some of our samples.
+    You can also generate captions as if they are from specific subreddits,
+    as if they start with a particular prompt, or even both.
+    Share your results on twitter with #redcaps or with a friend.
+    """
+)
+with st.spinner("Loading Model"):
+    virtexModel, imageLoader, sample_images, valid_subs = create_objects()
+# staggered = st.sidebar.checkbox("Iteratively Generate Captions")
+# if staggered:
+#     pass
+# else:
+select_idx = None
+st.sidebar.title("Select a sample image")
+if st.sidebar.button("Random Sample Image"):
+    select_idx = get_rand_idx(sample_images)
+sample_image = sample_images[0 if select_idx is None else select_idx]
+uploaded_image = None
+# with st.sidebar.form("file-uploader-form", clear_on_submit=True):
+uploaded_file = st.sidebar.file_uploader("Choose a file")
+# submitted = st.form_submit_button("Submit")
+if uploaded_file is not None:# and submitted:
+    uploaded_image = Image.open(io.BytesIO(uploaded_file.getvalue()))
+    select_idx = None # set this to help rewrite the cache
+# class OnChange():
+#     def __init__(self, idx):
+#         self.idx = idx
+#     def __call__(self):
+#         st.write(f"the idx is: {self.idx}")
+#         st.write(f"the sample_image is {sample_image}")
+# sample_image = st.sidebar.selectbox(
+#     "",
+#     sample_images,
+#     index = 0 if select_idx is None else select_idx,
+#     on_change=OnChange(0 if select_idx is None else select_idx)
+# )
+st.sidebar.title("Select a Subreddit")
+sub = st.sidebar.selectbox(
+    "Type below to condition on a subreddit. Select None for a predicted subreddit",
+    valid_subs
+)
+st.sidebar.title("Write a Custom Prompt")
+cap_prompt = st.sidebar.text_input(
+    "Write the start of your caption below",
+    value=""
+)
+_ = st.sidebar.button("Regenerate Caption")
+advanced = st.sidebar.checkbox("Advanced Options")
+num_captions=1
+if advanced:
+    num_captions = st.sidebar.select_slider("Number of Captions to Predict", options=[1,2,3,4,5], value=1)
+    nuc_size = st.sidebar.slider("Nucelus Size:", min_value=0.0, max_value=1.0, value=0.8, step=0.05)
+    virtexModel.model.decoder.nucleus_size = nuc_size
+if False: #uploaded_image is None:# and submitted:
+    st.write("Please select a file to upload")
+else:
+    image_file = sample_image
+    # LOAD AND CACHE THE IMAGE
+    if uploaded_image is not None:
+        image = uploaded_image
+    elif select_idx is None and 'image' in st.session_state:
+        image = st.session_state['image']
+    else:
+        image = Image.open(image_file)
+    image = image.convert("RGB")
+    st.session_state['image'] = image
+    image_dict = imageLoader.transform(image)
+    show_image = imageLoader.show_resize(image)
+    show = st.image(show_image)
+    show.image(show_image, "Your Image")
+    for i in range(num_captions):
+        gen_show_caption(sub, imageLoader.text_transform(cap_prompt))

config.yaml ADDED Viewed

	@@ -0,0 +1,63 @@

+AMP: true
+CUDNN_BENCHMARK: true
+CUDNN_DETERMINISTIC: false
+DATA:
+  EOS_INDEX: 2
+  IMAGE_CROP_SIZE: 224
+  IMAGE_TRANSFORM_TRAIN:
+  - random_resized_crop
+  - horizontal_flip
+  - color_jitter
+  - normalize
+  IMAGE_TRANSFORM_VAL:
+  - smallest_resize
+  - center_crop
+  - normalize
+  MASKED_LM:
+    MASK_PROBABILITY: 0.85
+    MASK_PROPORTION: 0.15
+    REPLACE_PROBABILITY: 0.1
+  MASK_INDEX: 3
+  MAX_CAPTION_LENGTH: 50
+  ROOT: datasets/redcaps/tarfiles/*.tar
+  SOS_INDEX: 1
+  TOKENIZER_MODEL: datasets/common_30k.model
+  UNK_INDEX: 0
+  USE_PERCENTAGE: 100.0
+  USE_SINGLE_CAPTION: false
+  VOCAB_SIZE: 30000
+MODEL:
+  DECODER:
+    BEAM_SIZE: 5
+    MAX_DECODING_STEPS: 30
+    NAME: nucleus_sampling
+    NUCLEUS_SIZE: 0.9
+  LABEL_SMOOTHING: 0.1
+  NAME: virtex_web
+  TEXTUAL:
+    DROPOUT: 0.1
+    NAME: transdec_prenorm::L6_H512_A8_F2048
+  VISUAL:
+    FEATURE_SIZE: 2048
+    FROZEN: false
+    NAME: torchvision::resnet50
+    PRETRAINED: false
+OPTIM:
+  BATCH_SIZE: 256
+  CLIP_GRAD_NORM: 10.0
+  CNN_LR: 0.0005
+  LOOKAHEAD:
+    ALPHA: 0.5
+    STEPS: 5
+    USE: false
+  LR: 0.0005
+  LR_DECAY_NAME: cosine
+  LR_GAMMA: 0.1
+  LR_STEPS: []
+  NO_DECAY: .*textual.(embedding|transformer).*(norm.*|bias)
+  NUM_ITERATIONS: 1500000
+  OPTIMIZER_NAME: adamw
+  SGD_MOMENTUM: 0.9
+  WARMUP_STEPS: 10000
+  WEIGHT_DECAY: 0.01
+RANDOM_SEED: 0

datasets/common_30k.model ADDED Viewed

Binary file (748 kB). View file

datasets/tmp ADDED Viewed

File without changes

experiment.ipynb ADDED Viewed

File without changes

model.py ADDED Viewed

	@@ -0,0 +1,149 @@

+import streamlit as st
+from huggingface_hub import hf_hub_url, cached_download
+from PIL import Image
+import os
+import json
+import glob
+import random
+from typing import Any, Dict, List
+import torch
+import torchvision
+import wordsegment as ws
+from virtex.config import Config
+from virtex.factories import TokenizerFactory, PretrainingModelFactory, ImageTransformsFactory
+from virtex.utils.checkpointing import CheckpointManager
+CONFIG_PATH = "config.yaml"
+MODEL_PATH = "checkpoint_last5.pth"
+VALID_SUBREDDITS_PATH = "subreddit_list.json"
+SAMPLES_PATH = "./samples/*.jpg"
+class ImageLoader():
+    def __init__(self):
+        self.image_transform = torchvision.transforms.Compose([
+                               torchvision.transforms.ToTensor(),
+                               torchvision.transforms.Resize(256),
+                               torchvision.transforms.CenterCrop(224),
+                               torchvision.transforms.Normalize((.485, .456, .406), (.229, .224, .225))])
+        self.show_size=500
+    def load(self, im_path):
+        im = torch.FloatTensor(self.image_transform(Image.open(im_path))).unsqueeze(0)
+        return {"image": im}
+    def raw_load(self, im_path):
+        im = torch.FloatTensor(Image.open(im_path))
+        return {"image": im}
+    def transform(self, image):
+        im = torch.FloatTensor(self.image_transform(image)).unsqueeze(0)
+        return {"image": im}
+    def text_transform(self, text):
+        # at present just lowercasing:
+        return text.lower()
+    def show_resize(self, image):
+        # ugh we need to do this manually cuz this is pytorch==0.8 not 1.9 lol
+        image = torchvision.transforms.functional.to_tensor(image)
+        x,y = image.shape[-2:]
+        ratio = float(self.show_size/max((x,y)))
+        image = torchvision.transforms.functional.resize(image, [int(x * ratio), int(y * ratio)])
+        return torchvision.transforms.functional.to_pil_image(image)
+class VirTexModel():
+    def __init__(self):
+        self.config = Config(CONFIG_PATH)
+        ws.load()
+        self.device = 'cpu'
+        self.tokenizer = TokenizerFactory.from_config(self.config)
+        self.model = PretrainingModelFactory.from_config(self.config).to(self.device)
+        CheckpointManager(model=self.model).load(MODEL_PATH)
+        self.model.eval()
+        self.valid_subs = json.load(open(VALID_SUBREDDITS_PATH))
+    def predict(self, image_dict, sub_prompt = None, prompt = ""):
+        if sub_prompt is None:
+            subreddit_tokens = torch.tensor([self.model.sos_index], device=self.device).long()
+        else:
+            subreddit_tokens = " ".join(ws.segment(ws.clean(sub_prompt)))
+            subreddit_tokens = (
+                [self.model.sos_index] +
+                self.tokenizer.encode(subreddit_tokens) +
+                [self.tokenizer.token_to_id("[SEP]")]
+                               )
+            subreddit_tokens = torch.tensor(subreddit_tokens, device=self.device).long()
+        if prompt is not "":
+            # at present prompts without subreddits will break without this change
+            # TODO FIX
+            cap_tokens = self.tokenizer.encode(prompt)
+            cap_tokens = torch.tensor(cap_tokens, device=self.device).long()
+            subreddit_tokens = subreddit_tokens if sub_prompt is not None else torch.tensor(
+                (
+                    [self.model.sos_index] +
+                    self.tokenizer.encode("pics") +
+                    [self.tokenizer.token_to_id("[SEP]")]
+                ), device = self.device).long()
+            subreddit_tokens = torch.cat(
+                [
+                    subreddit_tokens,
+                    torch.tensor([self.tokenizer.token_to_id("[SEP]")], device=self.device).long(),
+                    cap_tokens
+                ])
+        predictions: List[Dict[str, Any]] = []
+        is_valid_subreddit = False
+        subreddit, rest_of_caption = "", ""
+        image_dict["decode_prompt"] = subreddit_tokens
+        while not is_valid_subreddit:
+            with torch.no_grad():
+                caption = self.model(image_dict)["predictions"][0].tolist()
+            if self.tokenizer.token_to_id("[SEP]") in caption:
+                sep_index = caption.index(self.tokenizer.token_to_id("[SEP]"))
+                caption[sep_index] = self.tokenizer.token_to_id("://")
+            caption = self.tokenizer.decode(caption)
+            if "://" in caption:
+                subreddit, rest_of_caption = caption.split("://")
+                subreddit = "".join(subreddit.split())
+                rest_of_caption = rest_of_caption.strip()
+            else:
+                subreddit, rest_of_caption = "", caption
+            is_valid_subreddit = subreddit in self.valid_subs
+        return subreddit, rest_of_caption
+def download_files():
+    #download model files
+    download_files = [CONFIG_PATH, MODEL_PATH, VALID_SUBREDDITS_PATH]
+    for f in download_files:
+        fp = cached_download(hf_hub_url("zamborg/redcaps", filename=f))
+        os.system(f"cp {fp} ./{f}")
+def get_samples():
+    return glob.glob(SAMPLES_PATH)
+def get_rand_idx(samples):
+    return random.randint(0,len(samples)-1)
+@st.cache(allow_output_mutation=True) # allow mutation to update nucleus size
+def create_objects():
+    sample_images = get_samples()
+    virtexModel = VirTexModel()
+    imageLoader = ImageLoader()
+    valid_subs = json.load(open(VALID_SUBREDDITS_PATH))
+    valid_subs.insert(0, None)
+    return virtexModel, imageLoader, sample_images, valid_subs

requirements.txt ADDED Viewed

	@@ -0,0 +1,18 @@

+albumentations>=0.5.0
+Cython>=0.25
+ftfy==5.8
+future==0.18.0
+huggingface-hub==0.1.2
+lmdb==0.97
+loguru==0.3.2
+mypy_extensions==0.4.1
+lvis==0.5.3
+numpy>=1.17
+opencv-python==4.1.2.30
+sentencepiece>=0.1.90
+torch==1.7.0
+torchvision==0.8
+tqdm>=4.50.0
+wordsegment==1.3.1
+whatimage==0.0.3
+git+git://github.com/facebookresearch/fvcore.git#egg=fvcore