Spaces:

chuanenlin
/

foodnet

Running

File size: 14,922 Bytes

import requests
from io import BytesIO
import numpy as np
from gensim.models.fasttext import FastText
from scipy import spatial
import itertools
import gdown
import warnings
import nltk
warnings.filterwarnings('ignore')

import pickle
import pdb
from concurrent.futures import ProcessPoolExecutor

import matplotlib.pyplot as plt
import streamlit as st
import argparse
import logging
from pyunsplash import PyUnsplash
import blacklists
api_key = 'hzcKZ0e4we95wSd8_ip2zTB3m2DrOMWehAxrYjqjwg0'

# instantiate PyUnsplash object
py_un = PyUnsplash(api_key=api_key)

# pyunsplash logger defaults to level logging.ERROR
# If you need to change that, use getLogger/setLevel
# on the module logger, like this:
logging.getLogger("pyunsplash").setLevel(logging.DEBUG)

# TODO: 
# Image search: Option 1 -> google image search api || Option 2 -> open ai clip search
from PIL import Image


# NLTK Datasets
nltk.download('wordnet')
nltk.download('punkt')
nltk.download('averaged_perceptron_tagger')

# Average embedding → Compare
def recommend_ingredients(yum, leftovers, n=10):
  '''
  Uses a mean aggregation method 

  :params
  yum -> FastText Word2Vec Obj
  leftovers -> list of str
  n -> int top_n to return  

  :returns
  output -> top_n recommendations
  '''
  leftovers_embedding_sum = np.zeros([32,])
  for ingredient in leftovers:
    # pdb.set_trace()

    ingredient_embedding = yum.get_vector(ingredient, norm=True)

    leftovers_embedding_sum += ingredient_embedding
  leftovers_embedding = leftovers_embedding_sum / len(leftovers) # Embedding for leftovers
  top_matches = yum.similar_by_vector(leftovers_embedding, topn=100)
  top_matches = [(x[0].replace('_',' '), x[1]) for x in top_matches]
  leftovers = [x.replace('_',' ') for x in leftovers]
  output = [x for x in top_matches if not any(ignore in x[0] for ignore in leftovers)] # Remove boring same item matches, e.g. "romaine lettuce" if leftovers already contain "lettuce".
  return output[:n]

# Compare → Find intersection
def recommend_ingredients_intersect(yum, leftovers, n=10):
  '''
  Finds top combined probabilities
  
  :params
  yum -> FastText Word2Vec Obj
  leftovers -> list of str
  n -> int top_n to return  

  :returns
  output -> top_n recommendations
  '''
  first = True
  for ingredient in leftovers:
    ingredient_embedding = yum.get_vector(ingredient, norm=True)
    ingredient_matches = yum.similar_by_vector(ingredient_embedding, topn=10000)
    ingredient_matches = [(x[0].replace('_',' '), x[1]) for x in ingredient_matches]
    ingredient_output = [x for x in ingredient_matches if not any(ignore in x[0] for ignore in leftovers)] # Remove boring same item matches, e.g. "romaine lettuce" if leftovers already contain "lettuce".
    if first:
      output = ingredient_output
      first = False
    else:
      output = [x for x in output for y in ingredient_output if x[0] == y[0]]
  return output[:n]

def recommend_ingredients_subsets(model, yum,leftovers, subset_size):
  '''
  Returns all subsets from each ingredient 

  :params
  model -> FastText Obj
  yum -> FastText Word2Vec Obj
  leftovers -> list of str
  n -> int top_n to return  

  :returns
  output -> top_n recommendations
  '''
  all_outputs = {}
  for leftovers_subset in itertools.combinations(leftovers, subset_size):
    leftovers_embedding_sum = np.empty([100,])
    for ingredient in leftovers_subset:
      ingredient_embedding = yum.word_vec(ingredient, use_norm=True)
      leftovers_embedding_sum += ingredient_embedding
    leftovers_embedding = leftovers_embedding_sum / len(leftovers_subset) # Embedding for leftovers
    top_matches = model.similar_by_vector(leftovers_embedding, topn=100)
    top_matches = [(x[0].replace('_',' '), x[1]) for x in top_matches]
    output = [x for x in top_matches if not any(ignore in x[0] for ignore in leftovers_subset)] # Remove boring same item matches, e.g. "romaine lettuce" if leftovers already contain "lettuce".
    all_outputs[leftovers_subset] = output[:10]
  return all_outputs



def filter_adjectives(data):
    '''
    Remove adjectives that are not associated with a food item 

    :params
    data

    :returns
    data
    '''
    recipe_ingredients_token = [nltk.word_tokenize(x) for x in data]
    inds = []
    for i, r in enumerate(recipe_ingredients_token): 
        out = nltk.pos_tag(r)
        out = [x[1] for x in out]
        if len(out) > 1:
            inds.append(int(i))
        elif 'NN' in out or 'NNS' in out:
            inds.append(int(i))
    
    return [data[i] for i in inds]

def plural_to_singular(lemma, recipe): 
  '''
  :params
  lemma -> nltk lemma Obj
  recipe -> list of str

  :returns
  recipe -> converted recipe
  '''
  return [lemma.lemmatize(r) for r in recipe]

def filter_lemma(data):
    '''
    Convert plural to roots

    :params 
    data -> list of lists

    :returns
    data -> returns filtered data
    '''
    # Initialize Lemmatizer (to reduce plurals to stems)
    lemma = nltk.wordnet.WordNetLemmatizer()

    # NOTE: This uses all the computational resources of your computer 
    with ProcessPoolExecutor() as executor: 
        out = list(executor.map(plural_to_singular, itertools.repeat(lemma), data))

    return out


def train_model(data):
    '''
    Train fastfood text 
    NOTE: gensim==4.1.2

    :params
    data -> list of lists of all recipes
    save -> bool 

    :returns 
    model -> FastFood model obj
    '''
    model = FastText(data, vector_size=32, window=99, min_count=5, workers=40, sg=1) # Train model
    
    return model

@st.cache_resource
def load_model(filename):
  '''
  Load the FastText Model
  :params:
  filename -> path to the model 

  :returns
  model -> this is the full FastText obj
  yum -> this is the FastText Word2Vec obj
  '''
  # Load Models

  model = FastText.load(filename)
  yum = model.wv

  return model, yum

@st.cache_resource
def load_data(filename='data/all_recipes_ingredients_lemma.pkl'):
  '''
  Load data
  :params:
  filename -> path to dataset

  :return
  data -> list of all recipes 
  '''
  return pickle.load(open(filename,'rb'))

def plot_results(names, probs, n=5):
  '''
  Plots a bar chart of the names of the items vs. probability of similarity 
  :params:
  names -> list of str 
  probs -> list of float values
  n -> int of how many bars to show NOTE: Max = 100

  :return
  fig -> return figure for plotting 
  '''
  plt.bar(range(len(names)), probs, align='center')
  ax = plt.gca()

  ax.xaxis.set_major_locator(plt.FixedLocator(range(len(names))))
  ax.xaxis.set_major_formatter(plt.FixedFormatter(names))
  ax.set_ylabel('Probability',fontsize='large', fontweight='bold')
  ax.set_xlabel('Ingredients', fontsize='large', fontweight='bold')
  ax.xaxis.labelpad = 10
  ax.set_title(f'FoodNet Top {n} Predictions = {st.session_state.leftovers}')
  # mpld3.show()
  plt.xticks(rotation=45, ha='right')
  fig = plt.gcf()

  return fig

def load_image(image_file):
	img = Image.open(image_file)
	return img

st.set_page_config(page_title="FoodNet", page_icon = "🍔", layout = "centered", initial_sidebar_state = "auto")

##### UI/UX #####
## Sidebar ##
add_selectbox = st.sidebar.selectbox("Pages", ("FoodNet Recommender", "Food Donation Resources", "Contact Team"))

model, yum = load_model('fastfood.pth')

if add_selectbox == "FoodNet Recommender":
    st.title("FoodNet 🍔")
    st.write("Search for similar food ingredients. Select two or more ingredients to find complementary ingredients.")
    ingredients = list(yum.key_to_index.keys())
    ingredients = [x.replace('_',' ') for x in ingredients]
    st.multiselect("Type or select food ingredients", ingredients, default=['bread', 'lettuce'], key="leftovers")

    ## Slider ##
    st.slider("Select number of recommendations to show", min_value=1, max_value=10, value=3, step=1, key='top_n')

    ## Show Images ## 
    # search = py_un.search(type_="photos", query="cookie")
    # py_un.photos(type_="single", photo_id='l0_kVknpO2g')

    # st.image(search)
    ## Images
    # for leftover in st.session_state.leftovers:
    #   search = py_un.search(type_='photos', query=leftover)
    #   for photo in search.entries:
    #       # print(photo.id, photo.link_download)
    #     st.image(photo.link_download, caption=leftover, width=200)
    #     break
    # (f"![Alt Text]({search.link_next})")

    ## Get food recommendation ##
    ingredients_no_space = [x.replace(' ','_') for x in st.session_state.get('leftovers')]
    out = recommend_ingredients(yum, ingredients_no_space, n=st.session_state.top_n)
    names = [o[0] for o in out]
    probs = [o[1] for o in out]

    # if 'probs' not in st.session_state:
    #     st.session_state['probs'] = False
    
    # if st.session_state.probs:
    #     st.table(data=out)
    # else:
    #     st.table(data=names)
        
    # st.checkbox(label="Show model scores", value=False, key="probs")
    # ## Plot Results ##
    # st.checkbox(label="Show results bar chart", value=False, key="plot")
    # if st.session_state.plot:
    #     fig = plot_results(names, probs, st.session_state.top_n)

    #     ## Show Plot ##
    #     st.pyplot(fig)
    st.selectbox(label="Select dietary restriction", options=('None', 'Kosher', 'Vegetarian'), key="diet")
    if st.session_state.diet != 'None':
      if st.session_state.diet == 'Vegetarian':
        out = [o for o in out if not any(ignore in o[0] for ignore in blacklists.vegitarian)]
      if st.session_state.diet == 'Kosher': 
        out = [o for o in out if not any(ignore in o[0] for ignore in blacklists.kosher)]
      names = [o[0] for o in out]
      probs = [o[1] for o in out]

    col1, col2, col3 = st.columns(3)

    for i, name in enumerate(names): 
      search = py_un.search(type_='photos', query=name)
      for photo in search.entries:
        col_id = i % 3
        if col_id == 0:
          col1.image(photo.link_download, caption=name, use_column_width=True)
        elif col_id == 1:
          col2.image(photo.link_download, caption=name, use_column_width=True)
        elif col_id == 2: 
          col3.image(photo.link_download, caption=name, use_column_width=True)
        break

elif add_selectbox == "Food Donation Resources":
    st.title('Food Donation Resources')
    st.subheader('Pittsburgh Food Bank:')
    st.write("In 2000, the Food Bank opened the doors on its facility in Duquesne."
              "This facility was the first LEED-certified building in Pittsburgh and the first LEED-certified "
              "food bank in the nation. Learn more about that facility here. "
              "Today, we work with a network of more than 850 partners across the 11 counties we serve. "
              "In addition to sourcing, warehousing and distributing food, the Food Bank is actively engaged in "
              "stabilizing lives and confronting issues of chronic hunger, poor nutrition and health. "
              "And, through our advocacy efforts, we have become a primary driver in comprehensive anti-hunger "
              "endeavors regionally, statewide and at the national level."
              )
    st.write("Check out this [link](https://pittsburghfoodbank.org/)👈")
    st.subheader('412 Food Rescue:')
    st.write("412 Food Rescue is a nonprofit organization dedicated to ending hunger by organizing "
              "volunteers to deliver surplus food to insecure communities instead of landfills."
              "Since its creation in 2015, the organization has redistributed over three million pounds of food through "
              "the use of its mobile application, Food Rescue Hero. They are currently rolling out the app nationwide."
              )
    st.write("Check out this [link](https://412foodrescue.org/)👈")

    # st.subheader('Image')
    # st.multiselect("Select leftovers:", list(yum.key_to_index.keys()), key="leftovers")
    # image_file = st.file_uploader("Upload Food Image:", type=["png", "jpg", "jpeg"])
    # if image_file is not None:
    #     # To See details
    #     file_details = {"filename": image_file.name, "filetype": image_file.type,
    #                     "filesize": image_file.size}
    #     st.write(file_details)
    #
    #     # To View Uploaded Image
    #     st.image(load_image(image_file), width=250)
if add_selectbox == "Contact Team":
    st.title('Contact Team')
    st.subheader('David Chuan-En Lin')
    col1, mid, col2 = st.columns([20, 2, 10])
    with col1:
        st.write('Pronouns: he/him/his')
        st.write(
            'Research/career interests: Human-AI Co-Design by (1) building ML-infused creativity support tools and '
            '(2) investigating how such tools augment design processes')
        st.write('Favorite Food: Ice cream sandwich')
        st.write('A painfully boring fact: Second-year PhD at HCII SCS')
        st.write('Hobbies: Making travel videos, graphic design, music')
        st.write('Email: [email protected]')
    with col2:
        st.image('https://chuanenlin.com/images/me.jpg', width=300)

    st.subheader('Mitchell Fogelson')
    col1, mid, col2 = st.columns([20, 2, 10])
    with col1:
        st.write('Pronouns: he/him/his')
        st.write('Research/career interests: Robotics, AI')
        st.write('Favorite Food: Deep Dish Pizza')
        st.write('A painfully boring fact: Am a middle child')
        st.write('Hobbies: Golf, Traveling, Games')
        st.write('Email: [email protected]')
    with col2:
        st.image('https://images.squarespace-cdn.com/content/v1/562661f3e4b0ae7c10f0a2cc/1590528961389-2142HA48O7LRZ9FWGP0F/about_image.jpg?format=2500w', width=300)

    st.subheader('Sunny Yang')
    col1, mid, col2 = st.columns([20, 2, 10])
    with col1:
        st.write('Pronouns: She/Her/Hers')
        st.write('Research/career interests: Product Manager')
        st.write('Favorite Food: Sushi')
        st.write('A painfully boring fact: I do not like rainy:(')
        st.write('Hobbies: Viola, Basketball')
        st.write('Email: [email protected]')
    with col2:
        st.image('https://media-exp1.licdn.com/dms/image/C4D03AQF37KjK_GYwzA/profile-displayphoto-shrink_400_400/0/1638326708803?e=1643846400&v=beta&t=q10CTNCG6h5guez1YT0j4j_oLlrGJB_8NugaBOUSAGg', width=300)

    st.subheader('Shihao Xu')
    col1, mid, col2 = st.columns([20, 2, 10])
    with col1:
        st.write('Pronouns: he/him/his')
        st.write('Research/career interests: Autonomous Vehicle')
        st.write('Favorite Food: Dumplings')
        st.write('A painfully boring fact:  Covid is still not gone')
        st.write('Hobbies: photography')
        st.write('Email: [email protected]')
    with col2:
        st.image('https://scontent-ort2-1.xx.fbcdn.net/v/t39.30808-6/261420667_131245119324840_3342182275866550937_n.jpg?_nc_cat=100&ccb=1-5&_nc_sid=730e14&_nc_ohc=IP7khn2w6cwAX_wC85x&_nc_ht=scontent-ort2-1.xx&oh=063c2b6b0ed5e9fc10adb2c391c471cf&oe=61AA72C1', width=300)