Spaces:

chen196473
/

amazon-multimodal-product-assistant

Build error

App Files Files Community

wisdom196473 commited on Dec 11, 2024

Commit

44f740c

1 Parent(s): 43acb67

Update README, model.py, and requirements.txt

Browse files

Files changed (7) hide show

.ipynb_checkpoints/README-checkpoint.md +4 -2
.ipynb_checkpoints/amazon_app-checkpoint.py +269 -0
.ipynb_checkpoints/model-checkpoint.py +7 -8
.ipynb_checkpoints/requirements-checkpoint.txt +14 -0
README.md +4 -4
model.py +7 -8
requirements.txt +4 -4

.ipynb_checkpoints/README-checkpoint.md CHANGED Viewed

@@ -41,6 +41,8 @@ streamlit run amazon_app.py
 - `model.py`: Core AI model implementations
 - `requirements.txt`: Project dependencies
-## License
-MIT License

 - `model.py`: Core AI model implementations
 - `requirements.txt`: Project dependencies
+## Future Directions
+- [ ] Fine-Tune FashionClip embedding model based on the specific domain data
+- [ ] Fine-Tune large language model to improve its generalization capabilities
+- [ ] Develop feedback loops for continuous improvement

.ipynb_checkpoints/amazon_app-checkpoint.py ADDED Viewed

	@@ -0,0 +1,269 @@

+import streamlit as st
+# Configure page
+st.set_page_config(
+    page_title="E-commerce Visual Assistant",
+    page_icon="🛍️",
+    layout="wide"
+)
+from streamlit_chat import message
+import torch
+from PIL import Image
+import requests
+from io import BytesIO
+from model import initialize_models, load_data, chatbot, cleanup_resources
+# Helper functions
+def load_image_from_url(url):
+    try:
+        response = requests.get(url)
+        img = Image.open(BytesIO(response.content))
+        return img
+    except Exception as e:
+        st.error(f"Error loading image from URL: {str(e)}")
+        return None
+def initialize_assistant():
+    if not st.session_state.models_loaded:
+        with st.spinner("Loading models and data..."):
+            initialize_models()
+            load_data()
+            st.session_state.models_loaded = True
+        st.success("Assistant is ready!")
+def display_chat_history():
+    for message in st.session_state.messages:
+        with st.chat_message(message["role"]):
+            st.markdown(message["content"])
+            if "image" in message:
+                st.image(message["image"], caption="Uploaded Image", width=200)
+            if "display_images" in message:
+                # Since we only have one image, we don't need multiple columns
+                img_data = message["display_images"][0]  # Get the first (and only) image
+                st.image(
+                    img_data['image'],
+                    caption=f"{img_data['product_name']}\nPrice: ${img_data['price']:.2f}",
+                    width=350  # Adjusted width for single image display
+                )
+def handle_user_input(prompt, uploaded_image):
+    # Add user message
+    st.session_state.messages.append({"role": "user", "content": prompt})
+    # Generate response
+    with st.spinner("Processing your request..."):
+        try:
+            response = chatbot(prompt, image_input=uploaded_image)
+            if isinstance(response, dict):
+                assistant_message = {
+                    "role": "assistant",
+                    "content": response['text']
+                }
+                if 'images' in response and response['images']:
+                    assistant_message["display_images"] = response['images']
+                st.session_state.messages.append(assistant_message)
+            else:
+                st.session_state.messages.append({
+                    "role": "assistant",
+                    "content": response
+                })
+        except Exception as e:
+            st.error(f"Error: {str(e)}")
+            st.session_state.messages.append({
+                "role": "assistant",
+                "content": f"I encountered an error: {str(e)}"
+            })
+    st.rerun()
+# Custom CSS for enhanced styling
+st.markdown("""
+    <style>
+        /* Main container styling */
+        .main {
+            background: linear-gradient(135deg, #f5f7fa 0%, #e8edf2 100%);
+            padding: 20px;
+            border-radius: 15px;
+        }
+        /* Header styling */
+        .stTitle {
+            color: #1e3d59;
+            font-size: 2.5rem !important;
+            text-align: center;
+            padding: 20px;
+            text-shadow: 2px 2px 4px rgba(0,0,0,0.1);
+        }
+        /* Sidebar styling */
+        .css-1d391kg {
+            background: linear-gradient(180deg, #1e3d59 0%, #2b5876 100%);
+        }
+        /* Chat container styling */
+        .stChatMessage {
+            background-color: white;
+            border-radius: 15px;
+            box-shadow: 0 4px 6px rgba(0,0,0,0.1);
+            margin: 10px 0;
+            padding: 15px;
+        }
+        /* Input box styling */
+        .stTextInput > div > div > input {
+            border-radius: 20px;
+            border: 2px solid #1e3d59;
+            padding: 10px 20px;
+        }
+        /* Radio button styling */
+        .stRadio > label {
+            background-color: white;
+            padding: 10px 20px;
+            border-radius: 10px;
+            margin: 5px;
+            box-shadow: 0 2px 4px rgba(0,0,0,0.1);
+        }
+        /* Button styling */
+        .stButton > button {
+            background: linear-gradient(90deg, #1e3d59 0%, #2b5876 100%);
+            color: white;
+            border-radius: 20px;
+            padding: 10px 25px;
+            border: none;
+            box-shadow: 0 4px 6px rgba(0,0,0,0.1);
+            transition: all 0.3s ease;
+        }
+        .stButton > button:hover {
+            transform: translateY(-2px);
+            box-shadow: 0 6px 8px rgba(0,0,0,0.2);
+        }
+        /* Footer styling */
+        footer {
+            background-color: white;
+            border-radius: 10px;
+            padding: 20px;
+            margin-top: 30px;
+            text-align: center;
+            box-shadow: 0 4px 6px rgba(0,0,0,0.1);
+        }
+    </style>
+""", unsafe_allow_html=True)
+# Initialize session state
+if 'messages' not in st.session_state:
+    st.session_state.messages = []
+if 'models_loaded' not in st.session_state:
+    st.session_state.models_loaded = False
+# Main title with enhanced styling
+st.markdown("<h1 class='stTitle'>🛍️ Amazon E-commerce Visual Assistant</h1>", unsafe_allow_html=True)
+# Sidebar configuration with enhanced styling
+with st.sidebar:
+    st.title("Assistant Features")
+    st.markdown("### 🤖 How It Works")
+    st.markdown("""
+    This AI-powered shopping assistant combines:
+    **🧠 Advanced Technologies**
+    - FashionCLIP Visual AI
+    - Mistral-7B Language Model
+    - Multimodal Understanding
+    **💫 Capabilities**
+    - Product Search & Recognition
+    - Visual Analysis
+    - Detailed Comparisons
+    - Price Analysis
+    """)
+    st.markdown("---")
+    st.markdown("### 👥 Development Team")
+    team_members = {
+        "Yu-Chih (Wisdom) Chen",
+        "Feier Xu",
+        "Yanchen Dong",
+        "Kitae Kim"
+    }
+    for name in team_members:
+        st.markdown(f"**{name}**")
+    st.markdown("---")
+    if st.button("🔄 Reset Chat"):
+        st.session_state.messages = []
+        st.rerun()
+# Main chat interface
+def main():
+    # Initialize assistant
+    initialize_assistant()
+    # Chat container
+    chat_container = st.container()
+    # User input section at the bottom
+    input_container = st.container()
+    with input_container:
+        # Chat input
+        prompt = st.chat_input("What would you like to know?")
+        # Input options below chat input
+        col1, col2, col3 = st.columns([1,1,1])
+        with col1:
+            input_option = st.radio(
+                "Input Method:",
+                ("Text Only", "Upload Image", "Image URL"),
+                key="input_method"
+            )
+        # Handle different input methods
+        uploaded_image = None
+        if input_option == "Upload Image":
+            with col2:
+                uploaded_file = st.file_uploader("Choose image", type=["jpg", "jpeg", "png"])
+                if uploaded_file:
+                    uploaded_image = Image.open(uploaded_file)
+                    st.image(uploaded_image, caption="Uploaded Image", width=200)
+        elif input_option == "Image URL":
+            with col2:
+                image_url = st.text_input("Enter image URL")
+                if image_url:
+                    uploaded_image = load_image_from_url(image_url)
+                    if uploaded_image:
+                        st.image(uploaded_image, caption="Image from URL", width=200)
+    # Display chat history
+    with chat_container:
+        display_chat_history()
+    # Handle user input and generate response
+    if prompt:
+        handle_user_input(prompt, uploaded_image)
+    # Footer
+    st.markdown("""
+    <footer>
+        <h3>💡 Tips for Best Results</h3>
+        <p>Be specific in your questions for more accurate responses!</p>
+        <p>Try asking about product features, comparisons, or prices.</p>
+    </footer>
+    """, unsafe_allow_html=True)
+if __name__ == "__main__":
+    try:
+        main()
+    finally:
+        cleanup_resources()

.ipynb_checkpoints/model-checkpoint.py CHANGED Viewed

@@ -47,12 +47,6 @@ text_faiss: Optional[object] = None
 image_faiss: Optional[object] = None
 def initialize_models() -> bool:
-    """
-    Initialize CLIP and LLM models with proper error handling and GPU optimization.
-    Returns:
-        bool: True if initialization successful, raises RuntimeError otherwise
-    """
     global clip_model, clip_preprocess, clip_tokenizer, llm_tokenizer, llm_model, device
     try:
@@ -80,10 +74,14 @@ def initialize_models() -> bool:
                 bnb_4bit_quant_type="nf4"
             )
             llm_tokenizer = AutoTokenizer.from_pretrained(
                 model_name,
                 padding_side="left",
-                truncation_side="left"
             )
             llm_tokenizer.pad_token = llm_tokenizer.eos_token
@@ -91,7 +89,8 @@ def initialize_models() -> bool:
                 model_name,
                 quantization_config=quantization_config,
                 device_map="auto",
-                torch_dtype=torch.float16
             )
             llm_model.eval()
             print("LLM initialized successfully")

 image_faiss: Optional[object] = None
 def initialize_models() -> bool:
     global clip_model, clip_preprocess, clip_tokenizer, llm_tokenizer, llm_model, device
     try:
                 bnb_4bit_quant_type="nf4"
             )
+            # Get token from Streamlit secrets
+            hf_token = st.secrets["HUGGINGFACE_TOKEN"]
             llm_tokenizer = AutoTokenizer.from_pretrained(
                 model_name,
                 padding_side="left",
+                truncation_side="left",
+                token=hf_token  # Add token here
             )
             llm_tokenizer.pad_token = llm_tokenizer.eos_token
                 model_name,
                 quantization_config=quantization_config,
                 device_map="auto",
+                torch_dtype=torch.float16,
+                token=hf_token  # Add token here
             )
             llm_model.eval()
             print("LLM initialized successfully")

.ipynb_checkpoints/requirements-checkpoint.txt ADDED Viewed

	@@ -0,0 +1,14 @@

+streamlit==1.28.2
+streamlit-chat==0.1.1
+torch>=2.0.0
+transformers==4.35.2
+open_clip_torch==2.23.0
+pillow==10.1.0
+pandas==2.1.3
+numpy==1.26.2
+faiss-cpu>=1.7.4
+huggingface_hub==0.19.4
+langchain==0.0.339
+requests==2.31.0
+bitsandbytes>=0.41.1
+matplotlib==3.7.1

README.md CHANGED Viewed

@@ -1,4 +1,3 @@
-<<<<<<< HEAD
 # Amazon E-commerce Visual Assistant
 A multimodal AI assistant that helps users search and explore Amazon products through natural language and image-based interactions.
@@ -42,7 +41,8 @@ streamlit run amazon_app.py
 - `model.py`: Core AI model implementations
 - `requirements.txt`: Project dependencies
-## License
-MIT License
-=======

 # Amazon E-commerce Visual Assistant
 A multimodal AI assistant that helps users search and explore Amazon products through natural language and image-based interactions.
 - `model.py`: Core AI model implementations
 - `requirements.txt`: Project dependencies
+## Future Directions
+- [ ] Fine-Tune FashionClip embedding model based on the specific domain data
+- [ ] Fine-Tune large language model to improve its generalization capabilities
+- [ ] Develop feedback loops for continuous improvement

model.py CHANGED Viewed

@@ -47,12 +47,6 @@ text_faiss: Optional[object] = None
 image_faiss: Optional[object] = None
 def initialize_models() -> bool:
-    """
-    Initialize CLIP and LLM models with proper error handling and GPU optimization.
-    Returns:
-        bool: True if initialization successful, raises RuntimeError otherwise
-    """
     global clip_model, clip_preprocess, clip_tokenizer, llm_tokenizer, llm_model, device
     try:
@@ -80,10 +74,14 @@ def initialize_models() -> bool:
                 bnb_4bit_quant_type="nf4"
             )
             llm_tokenizer = AutoTokenizer.from_pretrained(
                 model_name,
                 padding_side="left",
-                truncation_side="left"
             )
             llm_tokenizer.pad_token = llm_tokenizer.eos_token
@@ -91,7 +89,8 @@ def initialize_models() -> bool:
                 model_name,
                 quantization_config=quantization_config,
                 device_map="auto",
-                torch_dtype=torch.float16
             )
             llm_model.eval()
             print("LLM initialized successfully")

 image_faiss: Optional[object] = None
 def initialize_models() -> bool:
     global clip_model, clip_preprocess, clip_tokenizer, llm_tokenizer, llm_model, device
     try:
                 bnb_4bit_quant_type="nf4"
             )
+            # Get token from Streamlit secrets
+            hf_token = st.secrets["HUGGINGFACE_TOKEN"]
             llm_tokenizer = AutoTokenizer.from_pretrained(
                 model_name,
                 padding_side="left",
+                truncation_side="left",
+                token=hf_token  # Add token here
             )
             llm_tokenizer.pad_token = llm_tokenizer.eos_token
                 model_name,
                 quantization_config=quantization_config,
                 device_map="auto",
+                torch_dtype=torch.float16,
+                token=hf_token  # Add token here
             )
             llm_model.eval()
             print("LLM initialized successfully")

requirements.txt CHANGED Viewed

@@ -1,14 +1,14 @@
 streamlit==1.28.2
 streamlit-chat==0.1.1
-torch==2.1.1
 transformers==4.35.2
 open_clip_torch==2.23.0
 pillow==10.1.0
 pandas==2.1.3
 numpy==1.26.2
-faiss-cpu==1.7.4
 huggingface_hub==0.19.4
 langchain==0.0.339
 requests==2.31.0
-pyngrok==7.0.3
-bitsandbytes==0.41.1

 streamlit==1.28.2
 streamlit-chat==0.1.1
+torch>=2.0.0
 transformers==4.35.2
 open_clip_torch==2.23.0
 pillow==10.1.0
 pandas==2.1.3
 numpy==1.26.2
+faiss-cpu>=1.7.4
 huggingface_hub==0.19.4
 langchain==0.0.339
 requests==2.31.0
+bitsandbytes>=0.41.1
+matplotlib==3.7.1