In [1]:
import pandas as pd
import json
from PIL import Image
import numpy as np
import gradio as gr 

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
import os
import sys
from pathlib import Path

import torch
import torch.nn.functional as F

from src.data.embs import ImageDataset
from src.model.blip_embs import blip_embs

In [3]:
from src.data.transforms import transform_test
#
transform = transform_test(384)

In [4]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [29]:
import json 
import numpy as np 
from PIL import Image
import torch.nn.functional as F
import torch
from transformers import StoppingCriteria, StoppingCriteriaList, TextIteratorStreamer



class StoppingCriteriaSub(StoppingCriteria):

    def __init__(self, stops=[], encounters=1):
        super().__init__()
        self.stops = stops

    def __call__(self, input_ids: torch.LongTensor, scores: torch.FloatTensor):
        for stop in self.stops:
            if torch.all(input_ids[:, -len(stop):] == stop).item():
                return True

        return False



class Chat:

    def __init__(self, model, transform, dataframe, tar_img_feats, device='cuda:0', stopping_criteria=None):
        self.device = device
        self.model = model
        self.transform = transform
        self.df = dataframe
        self.tar_img_feats = tar_img_feats
        self.img_feats = None
        self.target_recipe = None
        self.messages = []

        if stopping_criteria is not None:
            self.stopping_criteria = stopping_criteria
        else:
            stop_words_ids = [torch.tensor([2]).to(self.device)]
            self.stopping_criteria = StoppingCriteriaList([StoppingCriteriaSub(stops=stop_words_ids)])

    def encode_image(self, image_path):
        img = Image.fromarray(image_path).convert("RGB")
        img = self.transform(img).unsqueeze(0)
        img = img.to(self.device)
        img_embs = self.model.visual_encoder(img)
        img_feats = F.normalize(self.model.vision_proj(img_embs[:, 0, :]), dim=-1).cpu()

        self.img_feats = img_feats 

        self.get_target(self.img_feats, self.tar_img_feats)

    def get_target(self, img_feats, tar_img_feats) : 
        score = (img_feats @ tar_img_feats.t()).squeeze(0).cpu().detach().numpy()
        index = np.argsort(score)[::-1][0]
        print(index)
        self.target_recipe = self.df.iloc[index]

    def ask(self, msg):
        if "nutrition" in msg or "nutrients" in msg : 
            return json.dumps(self.target_recipe["recipe_nutrients"], indent=4)
        elif "instruction" in msg :
            return json.dumps(self.target_recipe["recipe_instructions"], indent=4)
        elif "ingredients" in msg :
            return json.dumps(self.target_recipe["recipe_ingredients"], indent=4)
        elif "tag" in msg or "class" in msg :
            return json.dumps(self.target_recipe["tags"], indent=4)
        else:
            return "Conversational capabilities will be included later."


In [5]:
def get_blip_config(model="base"):
    config = dict()
    if model == "base":
        config[
            "pretrained"
        ] = "https://storage.googleapis.com/sfr-vision-language-research/BLIP/models/model_base_capfilt_large.pth "
        config["vit"] = "base"
        config["batch_size_train"] = 32
        config["batch_size_test"] = 16
        config["vit_grad_ckpt"] = True
        config["vit_ckpt_layer"] = 4
        config["init_lr"] = 1e-5
    elif model == "large":
        config[
            "pretrained"
        ] = "https://storage.googleapis.com/sfr-vision-language-research/BLIP/models/model_large_retrieval_coco.pth"
        config["vit"] = "large"
        config["batch_size_train"] = 16
        config["batch_size_test"] = 32
        config["vit_grad_ckpt"] = True
        config["vit_ckpt_layer"] = 12
        config["init_lr"] = 5e-6

    config["image_size"] = 384
    config["queue_size"] = 57600
    config["alpha"] = 0.4
    config["k_test"] = 256
    config["negative_all_rank"] = True

    return config

In [6]:
print("Creating model")
config = get_blip_config("large")

model = blip_embs(
        pretrained=config["pretrained"],
        image_size=config["image_size"],
        vit=config["vit"],
        vit_grad_ckpt=config["vit_grad_ckpt"],
        vit_ckpt_layer=config["vit_ckpt_layer"],
        queue_size=config["queue_size"],
        negative_all_rank=config["negative_all_rank"],
    )

model = model.to(device)
model.eval()

Creating model
load checkpoint from https://storage.googleapis.com/sfr-vision-language-research/BLIP/models/model_large_retrieval_coco.pth
missing keys:
[]


BLIPEmbs(
  (visual_encoder): VisionTransformer(
    (patch_embed): PatchEmbed(
      (proj): Conv2d(3, 1024, kernel_size=(16, 16), stride=(16, 16))
      (norm): Identity()
    )
    (pos_drop): Dropout(p=0.0, inplace=False)
    (blocks): ModuleList(
      (0): Block(
        (norm1): LayerNorm((1024,), eps=1e-06, elementwise_affine=True)
        (attn): Attention(
          (qkv): Linear(in_features=1024, out_features=3072, bias=True)
          (attn_drop): Dropout(p=0.0, inplace=False)
          (proj): Linear(in_features=1024, out_features=1024, bias=True)
          (proj_drop): Dropout(p=0.0, inplace=False)
        )
        (drop_path): Identity()
        (norm2): LayerNorm((1024,), eps=1e-06, elementwise_affine=True)
        (mlp): Mlp(
          (fc1): Linear(in_features=1024, out_features=4096, bias=True)
          (act): GELU(approximate='none')
          (fc2): Linear(in_features=4096, out_features=1024, bias=True)
          (drop): Dropout(p=0.0, inplace=False)
        )
  

In [7]:
df = pd.read_json("datasets/sidechef/my_recipes.json")

In [28]:
df.head(10)

Unnamed: 0,recipe_name,recipe_time,recipe_yields,recipe_ingredients,recipe_instructions,recipe_image,blogger,recipe_nutrients,tags,id_
0,Asian Potato Salad with Seven Minute Egg,0,4 servings,"[2 1/2 cup Multi-Colored Fingerling Potato, 3/...",Fill a large stock pot with water.\nAdd the Mu...,https://www.sidechef.com/recipe/eeeeeceb-493e-...,sidechef.com,"{'calories': '80 calories', 'proteinContent': ...","[Salad, Lunch, Brunch, Appetizers, Side Dish, ...",1
1,Everything Breakfast Bombs,0,8 servings,"[5 tablespoon Butter, 12 ounce Turkey Breakfas...","First, preheat the oven to 375 degrees F (190 ...",https://www.sidechef.com/recipe/525f6843-4337-...,sidechef.com,"{'calories': '56 calories', 'proteinContent': ...","[Breakfast, Brunch, Low-Carb, Eggs, American, ...",2
2,Bacon Swiss Deviled Eggs,0,6 servings,"[6 Egg, 1/4 cup Mayonnaise, 1/4 cup Avocado, 1...",Cut each hard boiled Egg (6) in half lengthwis...,https://www.sidechef.com/recipe/2075e8cf-4fa9-...,sidechef.com,"{'calories': '38 calories', 'proteinContent': ...","[Breakfast, Brunch, Low-Carb, Eggs, American, ...",3
3,Farmers Market Breakfast Pizza,0,2 servings,"[1/2 Pizza Dough, 1/2 cup Kale, 1/2 cup Onion,...","For homemade pizza sauce, finely chop the Swee...",https://www.sidechef.com/recipe/1cd15944-9411-...,sidechef.com,"{'calories': '315 calories', 'proteinContent':...","[Breakfast, Brunch, Main Dish, Budget-Friendly...",4
4,Scrambled Eggs,0,2 servings,"[3 Egg, 2 tablespoon Heavy Cream, 2 tablespoon...",Crack Egg (3) into a bowl.\nPour in Heavy Crea...,https://www.sidechef.com/recipe/08d39a01-c030-...,sidechef.com,"{'calories': '127 calories', 'proteinContent':...","[Breakfast, Brunch, Vegetarian, Low-Carb, Pesc...",5
5,Fettuccini Carbonara,0,2 servings,"[2 Shallot, 1 clove Garlic, 2 Egg, 6 slice Bac...",Put a generously salted pot of water on to boi...,https://www.sidechef.com/recipe/9e5df75f-bf1a-...,sidechef.com,"{'calories': '495 calories', 'proteinContent':...","[Pasta, Dinner, Side Dish, Main Dish, Pork, Eg...",6
6,Sausage Egg Muffins,0,6 servings,"[1 pound Ground Pork, 1 1/2 teaspoon Fresh Par...",Preheat your oven to 350 degrees F (175 degree...,https://www.sidechef.com/recipe/49d5e5a3-4d16-...,sidechef.com,"{'calories': '44 calories', 'proteinContent': ...","[Keto, Breakfast, Brunch, Budget-Friendly, Low...",7
7,Shakshuka,0,4 servings,"[1 tablespoon Oil, 3 Tomato, 1 Green Chili Pep...",Preheat oven to 180 degrees C (350 degrees F) ...,https://www.sidechef.com/recipe/de00577b-38d4-...,sidechef.com,"{'calories': '99 calories', 'fatContent': '2.5...","[Breakfast, Brunch, Main Dish, Vegetarian, Pes...",8
8,Huevos Rancheros,0,1 serving,"[2 Yellow Corn Tortilla, 2 tablespoon Pinto Be...","In a small frying pan, spray a little Nonstick...",https://www.sidechef.com/recipe/5284bc88-1305-...,sidechef.com,"{'calories': '290 calories', 'proteinContent':...","[Breakfast, Brunch, Eggs, Quick, Mexican, Shel...",9
9,Homemade Pasta,0,4 servings,"[1 cup All-Purpose Flour, 1 teaspoon Salt, 1 Egg]",Mix All-Purpose Flour (1 cup) and Salt (1 teas...,https://www.sidechef.com/recipe/8528a7af-b6d8-...,sidechef.com,"{'calories': '33 calories', 'proteinContent': ...","[Pasta, Budget-Friendly, Vegetarian, Pescatari...",10


In [8]:
print("Loading Target Embedding")
tar_img_feats = []
for _id in df["id_"].tolist():     
    tar_img_feats.append(torch.load("datasets/sidechef/blip-embs-large/{:07d}.pth".format(_id)).unsqueeze(0))

tar_img_feats = torch.cat(tar_img_feats, dim=0)

Loading Target Embedding


In [23]:
tar_img_feats.shape

torch.Size([8333, 256])

In [9]:
def respond_to_user(image, message):
    # Process the image and message here
    # For demonstration, I'll just return a simple text response
    chat = Chat(model,transform,df,tar_img_feats)
    chat.encode_image(image)
    response = chat.ask(message)
    return response

In [20]:
from PIL import Image
import numpy as np

# Load the image
image_path = '/home/fahadkhan/omkar/CoVR_old/Nutrigenics-flask-chatbot/datasets/sidechef/images/0000006.png'  # Replace with your image path
img = Image.open(image_path)

# Convert image to NumPy array
img_array = np.array(img)


In [30]:
res = respond_to_user(image=img_array, message="ingredients")

import pprint

pprint.pprint(res)

5
('[\n'
 '    "2 Shallot",\n'
 '    "1 clove Garlic",\n'
 '    "2 Egg",\n'
 '    "6 slice Bacon",\n'
 '    "1/2 cup Heavy Cream",\n'
 '    "1/4 cup Grated Parmesan Cheese",\n'
 '    "8 ounce Fettuccine",\n'
 '    "1 tablespoon Olive Oil",\n'
 '    "to taste Salt",\n'
 '    "to taste Ground Black Pepper",\n'
 '    "to taste Fresh Parsley"\n'
 ']')


In [46]:


# Define the custom CSS to add a footer
custom_css = """
/* Footer style */
.gradio-footer {
    display: flex;
    justify-content: center;
    align-items: center;
    padding: 10px;
    background-color: #f8f9fa;
    color: #333;
    font-size: 0.9em;
}

.custom-header {
    text-align: center;
    padding: 12px;
    background-color: #333; 
    color: white;
    position: bottom;
    bottom: 0;
    width: 100%;
    font-size: 0.8em;
}

.footer {
    width: 100%;
    background-color: #f2f2f2;
    color: #555;
    text-align: center;
    padding: 10px 0;
    position: absolute;
    bottom: 0;
    left: 0;
}

/* Make sure the interface leaves space for the footer */
.body {
    margin-bottom: 50px;
}
"""

# Add a custom footer by injecting HTML into the description
custom_footer_html = """
<footer> <p> Reach out to us at {omkar.thawakar, muzammal.naseer}@mbzuai.ac.ae </p> </footer>
"""

custom_header_html = """
<div class='custom-header'>Nutrition-GPT Demo</div>
"""

def respond_to_user(image, message):
    # Process the image and message here
    # For demonstration, I'll just return a simple text response
    chat = Chat(model,transform,df,tar_img_feats)
    chat.encode_image(image)
    response = chat.ask(message)
    return response

iface = gr.Interface(
    fn=respond_to_user,
    inputs=[gr.Image(height="70%"), gr.Textbox(label="Ask Query"),],
    outputs=[gr.Textbox(label="Nutrition-GPT")],
    title=custom_header_html,  
    # description="Upload an food image and ask queries!",
    css=custom_css,
    # description=custom_footer_html   
)

iface.launch(show_error=True, height="650px")

Running on local URL:  http://127.0.0.1:7866

To create a public link, set `share=True` in `launch()`.




In [None]:
# example_texts = gr.Dataset(components=[gr.Textbox(visible=False)],
            # label="Prompt Examples",
            # samples=[
            #     ["Provide nutritional information for given food image."],
            #     ["What are the nutrients available in given food image."],
            #     ["Could you provide a detailed nutritional data of the given food image?"],
            #     ["Describe the instructions to prepare given food."],
            #     ["What are the key ingredients in this food image?"],
            #     ["Could you highlight the dietary tags for this food image?"],
            # ],)

# example_images = gr.Dataset(components=[image], label="Food Examples",
#                     samples=[
#                         [os.path.join(os.path.dirname("./"), "./datasets/sidechef/sample_images/0000018.png")],
#                         [os.path.join(os.path.dirname("./"), "./datasets/sidechef/sample_images/0000021.png")],
#                         [os.path.join(os.path.dirname("./"), "./datasets/sidechef/sample_images/0000035.png")],
#                         [os.path.join(os.path.dirname("./"), "./datasets/sidechef/sample_images/0000038.png")],
#                         [os.path.join(os.path.dirname("./"), "./datasets/sidechef/sample_images/0000090.png")],
#                         [os.path.join(os.path.dirname("./"), "./datasets/sidechef/sample_images/0000122.png")],
#                     ])

