import os
os.system('pip install --upgrade transformers')
import nltk
from transformers import VisionEncoderDecoderModel, AutoTokenizer, ViTImageProcessor, pipeline
import torch
from PIL import Image
import streamlit as st
from nltk.corpus import stopwords
from io import BytesIO


# os.system('pip install nltk')
nltk.download('stopwords')

# Load the pre-trained model
model = VisionEncoderDecoderModel.from_pretrained(
    "SumanthKarnati/Image2Ingredients")
model.eval()

# Define the feature extractor
feature_extractor = ViTImageProcessor.from_pretrained(
    'nlpconnect/vit-gpt2-image-captioning')

# Load the tokenizer
tokenizer = AutoTokenizer.from_pretrained(
    'nlpconnect/vit-gpt2-image-captioning')

# Set up text generation pipeline
generator = pipeline('text-generation', model='EleutherAI/gpt-neo-2.7B')

# Device configuration
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Transfer the model to GPU if available
model = model.to(device)

# Set prediction arguments
max_length = 16
num_beams = 4
gen_kwargs = {"max_length": max_length, "num_beams": num_beams}

# Function to predict ingredients from images


def predict_step(image_files, model, feature_extractor, tokenizer, device, gen_kwargs):
    images = []
    for image_file in image_files:
        if image_file is not None:
            # Create a BytesIO object from the UploadedFile (image_file)
            byte_stream = BytesIO(image_file.getvalue())
            image = Image.open(byte_stream)
            if image.mode != "RGB":
                image = image.convert(mode="RGB")
            images.append(image)

    if not images:
        return None

    inputs = feature_extractor(images=images, return_tensors="pt")
    inputs.to(device)
    output_ids = model.generate(inputs["pixel_values"], **gen_kwargs)

    preds = tokenizer.batch_decode(output_ids, skip_special_tokens=True)
    preds = [pred.strip() for pred in preds]
    return preds


# Get the list of English stop words
stop_words = set(stopwords.words('english'))

# Function to remove stop words from a list of words


def remove_stop_words(word_list):
    return [word for word in word_list if word not in stop_words]

# Streamlit app code


def main():
    st.title("Image2Nutrients: Food Ingredient Recognition")
    st.write("Upload an image of your food to recognize the ingredients!")

    # File upload
    uploaded_file = st.file_uploader(
        "Choose an image", type=["jpg", "jpeg", "png"])

    if uploaded_file is not None:
        # Display the uploaded image
        image = Image.open(uploaded_file)
        st.image(image, caption="Uploaded Image", use_column_width=True)

        # Perform ingredient recognition
        preds = predict_step([uploaded_file], model,
                             feature_extractor, tokenizer, device, gen_kwargs)

        preds = preds[0].split('-')
        # remove numbers
        preds = [x for x in preds if not any(c.isdigit() for c in x)]
        # remove empty strings
        preds = list(filter(None, preds))
        # remove duplicates

        preds = list(dict.fromkeys(preds))

        preds = remove_stop_words(preds)

        # Display the recognized ingredients
        st.subheader("Recognized Ingredients:")
        for ingredient in preds:
            st.write(ingredient)

        preds_str = ', '.join(preds)

        # Prepare the prompt
        prompt = f"You are a knowledgeable assistant that provides nutritional advice based on a list of ingredients. The identified ingredients are: {preds_str}. Note that some ingredients may not make sense, so use the ones that do. Can you provide a nutritional analysis and suggestions for improvement?"

         # Generate a sequence of text
        suggestions = generator(prompt, do_sample=True, min_length=200)

        # Extract the generated text
        suggestions = suggestions[0]['generated_text'][len(prompt):]

        st.subheader("Nutritional Analysis and Suggestions:")
        st.write(suggestions)


if __name__ == "__main__":
    main()