Spaces:

GMARTINEZMILLA
/

Final_Project

Sleeping

App Files Files Community

GMARTINEZMILLA commited on Oct 15, 2024

Commit

ad618df

1 Parent(s): 9d618d8

feat: updated app.py

Browse files

Files changed (1) hide show

app.py +332 -589

app.py CHANGED Viewed

@@ -1,7 +1,6 @@
 import streamlit as st
 import pandas as pd
 import plotly.express as px
-import plotly.graph_objects as go
 import matplotlib.pyplot as plt
 import numpy as np
 import lightgbm as lgb
@@ -12,76 +11,84 @@ from sklearn.metrics import mean_absolute_error, mean_squared_error
 # Page configuration
 st.set_page_config(page_title="Customer Insights App", page_icon=":bar_chart:")
-# Load CSV files
 df = pd.read_csv("df_clean.csv")
 nombres_proveedores = pd.read_csv("nombres_proveedores.csv", sep=';')
 euros_proveedor = pd.read_csv("euros_proveedor.csv", sep=',')
 ventas_clientes = pd.read_csv("ventas_clientes.csv", sep=',')
-customer_clusters = pd.read_csv('predicts/customer_clusters.csv')
-df_agg_2024 = pd.read_csv('predicts/df_agg_2024.csv')
 # Ensure customer codes are strings
 df['CLIENTE'] = df['CLIENTE'].astype(str)
 nombres_proveedores['codigo'] = nombres_proveedores['codigo'].astype(str)
 euros_proveedor['CLIENTE'] = euros_proveedor['CLIENTE'].astype(str)
-customer_clusters['cliente_id'] = customer_clusters['cliente_id'].astype(str)
 fieles_df = pd.read_csv("clientes_relevantes.csv")
 cestas = pd.read_csv("cestas.csv")
 productos = pd.read_csv("productos.csv")
 df_agg_2024['cliente_id'] = df_agg_2024['cliente_id'].astype(str)
-# Convert columns in euros_proveedor to numeric
 for col in euros_proveedor.columns:
     if col != 'CLIENTE':
         euros_proveedor[col] = pd.to_numeric(euros_proveedor[col], errors='coerce')
-# Check for NaN values in euros_proveedor
 if euros_proveedor.isna().any().any():
     st.warning("Some values in euros_proveedor couldn't be converted to numbers. Please review the input data.")
-# Ignore the last two columns in df
 df = df.iloc[:, :-2]
 # Function to get supplier name
 def get_supplier_name(code):
-    code = str(code)
     name = nombres_proveedores[nombres_proveedores['codigo'] == code]['nombre'].values
     return name[0] if len(name) > 0 else code
-# Function to create radar chart using Plotly
 def radar_chart(categories, values, amounts, title):
-    # Ensure the first and last data points are the same to close the radar chart
-    values.append(values[0])
-    amounts.append(amounts[0])
-    categories.append(categories[0])
-    fig = px.line_polar(
-        r=values,
-        theta=categories,
-        line_close=True,
-        labels={'r': '% Units Sold', 'theta': 'Manufacturers'},
-        title=title
-    )
-    # Adding a second trace for the spend amounts
-    fig.add_trace(
-        go.Scatterpolar(
-            r=amounts,
-            theta=categories,
-            mode='lines+markers',
-            name="Spend (€)",
-            line=dict(color='blue', width=2)
-        )
-    )
-    fig.update_traces(fill='toself')  # Fill the radar chart area
-    fig.update_layout(
-        polar=dict(
-            radialaxis=dict(visible=True, range=[0, 1])
-        ),
-        showlegend=True
-    )
     return fig
 # Main page design
@@ -114,632 +121,368 @@ elif page == "Customer Analysis":
     if st.button("Calcular"):
         if customer_code:
             customer_match = customer_clusters[customer_clusters['cliente_id'] == customer_code]
             if not customer_match.empty:
                 cluster = customer_match['cluster_id'].values[0]
                 st.write(f"Customer {customer_code} belongs to cluster {cluster}")
-                # Load the corresponding model
                 model_path = f'models/modelo_cluster_{cluster}.txt'
                 gbm = lgb.Booster(model_file=model_path)
                 st.write(f"Loaded model for cluster {cluster}")
                 # Load predict data for that cluster
                 predict_data = pd.read_csv(f'predicts/predict_cluster_{cluster}.csv')
                 predict_data['cliente_id'] = predict_data['cliente_id'].astype(str)
                 # Filter for the specific customer
-                customer_data = predict_data[predict_data['cliente_id'] == customer_code]
                 if not customer_data.empty:
                     lag_features = [f'precio_total_lag_{lag}' for lag in range(1, 25)]
                     features = lag_features + ['mes', 'marca_id_encoded', 'año', 'cluster_id']
                     X_predict = customer_data[features]
                     # Convert categorical features to 'category' dtype
                     categorical_features = ['mes', 'marca_id_encoded', 'cluster_id']
                     for feature in categorical_features:
                         X_predict[feature] = X_predict[feature].astype('category')
                     # Make Prediction for the selected customer
                     y_pred = gbm.predict(X_predict, num_iteration=gbm.best_iteration)
-                    # Results DataFrame
                     results = customer_data[['cliente_id', 'marca_id_encoded', 'fecha_mes']].copy()
                     results['ventas_predichas'] = y_pred
-                    # Load actual sales data
-                    actual_sales = df_agg_2024[df_agg_2024['cliente_id'] == customer_code]
                     if not actual_sales.empty:
-                        results = results.merge(
-                            actual_sales[['cliente_id', 'marca_id_encoded', 'fecha_mes', 'precio_total']],
-                            on=['cliente_id', 'marca_id_encoded', 'fecha_mes'],
-                            how='left'
-                        )
                         results.rename(columns={'precio_total': 'ventas_reales'}, inplace=True)
                         results['ventas_reales'].fillna(0, inplace=True)
-                        # Calculate error metrics
                         valid_results = results.dropna(subset=['ventas_reales'])
                         if not valid_results.empty:
                             mae = mean_absolute_error(valid_results['ventas_reales'], valid_results['ventas_predichas'])
                             mape = np.mean(np.abs((valid_results['ventas_reales'] - valid_results['ventas_predichas']) / valid_results['ventas_reales'])) * 100
                             rmse = np.sqrt(mean_squared_error(valid_results['ventas_reales'], valid_results['ventas_predichas']))
                             st.write(f"MAE: {mae:.2f}")
                             st.write(f"MAPE: {mape:.2f}%")
                             st.write(f"RMSE: {rmse:.2f}")
-                        # Plot radar chart
-                        top_units = df[df["CLIENTE"] == str(customer_code)].iloc[:, 1:].T
-                        top_sales = euros_proveedor[euros_proveedor["CLIENTE"] == str(customer_code)].iloc[:, 1:].T
-                        combined_top = pd.concat([top_units, top_sales]).index.unique()[:20]
-                        combined_data = pd.DataFrame({
-                            'units': top_units.loc[combined_top, top_units.columns[0]],
-                            'sales': top_sales.loc[combined_top, top_sales.columns[0]]
-                        }).fillna(0)
-                        manufacturers = [get_supplier_name(m) for m in combined_data.index]
-                        values = combined_data['units'].tolist()
-                        amounts = combined_data['sales'].tolist()
-                        fig = radar_chart(manufacturers, values, amounts, f'Radar Chart for Customer {customer_code}')
-                        st.plotly_chart(fig)
-# Articles Recommendations Page
-elif page == "Articles Recommendations":
-    st.title("Articles Recommendations")
-    st.markdown("""
-        Get tailored recommendations for your customers based on their basket.
-    """)
-    partial_code = st.text_input("Enter part of Customer Code for Recommendations (or leave empty to see all)")
-    if partial_code:
-        filtered_customers = df[df['CLIENTE'].str.contains(partial_code)]
-    else:
-        filtered_customers = df
-    customer_list = filtered_customers['CLIENTE'].unique()
-    customer_code = st.selectbox("Select Customer Code for Recommendations", [""] + list(customer_list))
-    if customer_code:
-        option = st.selectbox("Select Recommendation Type", ["Select an option", "By Purchase History", "By Current Basket"])
-        if option == "By Current Basket":
-            st.write("Select the items and assign quantities for the basket:")
-            available_articles = productos['ARTICULO'].unique()
-            selected_articles = st.multiselect("Select Articles", available_articles)
-            quantities = {article: st.number_input(f"Quantity for {article}", min_value=0, step=1) for article in selected_articles}
-            if st.button("Calcular"):
-                new_basket = [f"{article} x{quantities[article]}" for article in selected_articles if quantities[article] > 0]
-                if new_basket:
-                    def recomienda(new_basket):
-                        tfidf = TfidfVectorizer()
-                        tfidf_matrix = tfidf.fit_transform(cestas['Cestas'])
-                        new_basket_tfidf = tfidf.transform([' '.join(new_basket)])
-                        similarities = cosine_similarity(new_basket_tfidf, tfidf_matrix)
-                        similar_indices = similarities.argsort()[0][-3:]
-                        recommendations_count = {}
-                        total_similarity = 0
-                        for idx in similar_indices:
-                            sim_score = similarities[0][idx]
-                            total_similarity += sim_score
-                            products = cestas.iloc[idx]['Cestas'].split()
-                            for product in products:
-                                if product not in new_basket:
-                                    recommendations_count[product] = recommendations_count.get(product, 0) + sim_score
-                        recommendations_with_prob = [(prod, score / total_similarity) for prod, score in recommendations_count.items()]
-                        recommendations_with_prob.sort(key=lambda x: x[1], reverse=True)
-                        recommendations_df = pd.DataFrame({
-                            'ARTICULO': [r[0] for r in recommendations_with_prob],
-                            'PROBABILIDAD': [r[1] for r in recommendations_with_prob]
-                        })
-                        return recommendations_df
-                    recommendations_df = recomienda(new_basket)
-                    st.dataframe(recommendations_df)
-                else:
-                    st.warning("Please select at least one article and set its quantity.")
-# import streamlit as st
-# import pandas as pd
-# import plotly.express as px
-# import matplotlib.pyplot as plt
-# import numpy as np
-# import lightgbm as lgb
-# from sklearn.feature_extraction.text import TfidfVectorizer
-# from sklearn.metrics.pairwise import cosine_similarity
-# from sklearn.metrics import mean_absolute_error, mean_squared_error
-# # Page configuration
-# st.set_page_config(page_title="Customer Insights App", page_icon=":bar_chart:")
-# # Load CSV files at the top
-# df = pd.read_csv("df_clean.csv")
-# nombres_proveedores = pd.read_csv("nombres_proveedores.csv", sep=';')
-# euros_proveedor = pd.read_csv("euros_proveedor.csv", sep=',')
-# ventas_clientes = pd.read_csv("ventas_clientes.csv", sep=',')
-# customer_clusters = pd.read_csv('predicts/customer_clusters.csv')  # Load the customer clusters here
-# df_agg_2024 = pd.read_csv('predicts/df_agg_2024.csv')
-# # Ensure customer codes are strings
-# df['CLIENTE'] = df['CLIENTE'].astype(str)
-# nombres_proveedores['codigo'] = nombres_proveedores['codigo'].astype(str)
-# euros_proveedor['CLIENTE'] = euros_proveedor['CLIENTE'].astype(str)
-# customer_clusters['cliente_id'] = customer_clusters['cliente_id'].astype(str)  # Ensure customer IDs are strings
-# fieles_df = pd.read_csv("clientes_relevantes.csv")
-# cestas = pd.read_csv("cestas.csv")
-# productos = pd.read_csv("productos.csv")
-# df_agg_2024['cliente_id'] = df_agg_2024['cliente_id'].astype(str)
-# # Convert all columns except 'CLIENTE' to float in euros_proveedor
-# for col in euros_proveedor.columns:
-#     if col != 'CLIENTE':
-#         euros_proveedor[col] = pd.to_numeric(euros_proveedor[col], errors='coerce')
-# # Check for NaN values after conversion
-# if euros_proveedor.isna().any().any():
-#     st.warning("Some values in euros_proveedor couldn't be converted to numbers. Please review the input data.")
-# # Ignore the last two columns of df
-# df = df.iloc[:, :-2]
-# # Function to get supplier name
-# def get_supplier_name(code):
-#     code = str(code)  # Ensure code is a string
-#     name = nombres_proveedores[nombres_proveedores['codigo'] == code]['nombre'].values
-#     return name[0] if len(name) > 0 else code
-# # Function to create radar chart with square root transformation
-# def radar_chart(categories, values, amounts, title):
-#     N = len(categories)
-#     angles = [n / float(N) * 2 * np.pi for n in range(N)]
-#     angles += angles[:1]
-#     fig, ax = plt.subplots(figsize=(12, 12), subplot_kw=dict(projection='polar'))
-#     # Apply square root transformation
-#     sqrt_values = np.sqrt(values)
-#     sqrt_amounts = np.sqrt(amounts)
-#     max_sqrt_value = max(sqrt_values)
-#     normalized_values = [v / max_sqrt_value for v in sqrt_values]
-#     # Adjust scaling for spend values
-#     max_sqrt_amount = max(sqrt_amounts)
-#     scaling_factor = 0.7  # Adjust this value to control how much the spend values are scaled up
-#     normalized_amounts = [min((a / max_sqrt_amount) * scaling_factor, 1.0) for a in sqrt_amounts]
-#     normalized_values += normalized_values[:1]
-#     ax.plot(angles, normalized_values, 'o-', linewidth=2, color='#FF69B4', label='% Units (sqrt)')
-#     ax.fill(angles, normalized_values, alpha=0.25, color='#FF69B4')
-#     normalized_amounts += normalized_amounts[:1]
-#     ax.plot(angles, normalized_amounts, 'o-', linewidth=2, color='#4B0082', label='% Spend (sqrt)')
-#     ax.fill(angles, normalized_amounts, alpha=0.25, color='#4B0082')
-#     ax.set_xticks(angles[:-1])
-#     ax.set_xticklabels(categories, size=8, wrap=True)
-#     ax.set_ylim(0, 1)
-#     circles = np.linspace(0, 1, 5)
-#     for circle in circles:
-#         ax.plot(angles, [circle]*len(angles), '--', color='gray', alpha=0.3, linewidth=0.5)
-#     ax.set_yticklabels([])
-#     ax.spines['polar'].set_visible(False)
-#     plt.title(title, size=16, y=1.1)
-#     plt.legend(loc='upper right', bbox_to_anchor=(1.3, 1.1))
-#     return fig
-# # Main page design
-# st.title("Welcome to Customer Insights App")
-# st.markdown("""
-#     This app helps businesses analyze customer behaviors and provide personalized recommendations based on purchase history.
-#     Use the tools below to dive deeper into your customer data.
-# """)
-# # Navigation menu
-# page = st.selectbox("Select the tool you want to use", ["", "Customer Analysis", "Articles Recommendations"])
-# # Home Page
-# if page == "":
-#     st.markdown("## Welcome to the Customer Insights App")
-#     st.write("Use the dropdown menu to navigate between the different sections.")
-# # Customer Analysis Page
-# elif page == "Customer Analysis":
-#     st.title("Customer Analysis")
-#     st.markdown("Use the tools below to explore your customer data.")
-#     partial_code = st.text_input("Enter part of Customer Code (or leave empty to see all)")
-#     if partial_code:
-#         filtered_customers = df[df['CLIENTE'].str.contains(partial_code)]
-#     else:
-#         filtered_customers = df
-#     customer_list = filtered_customers['CLIENTE'].unique()
-#     customer_code = st.selectbox("Select Customer Code", customer_list)
-#     if st.button("Calcular"):
-#         if customer_code:
-#             # Find Customer's Cluster
-#             customer_match = customer_clusters[customer_clusters['cliente_id'] == customer_code]
-#             if not customer_match.empty:
-#                 cluster = customer_match['cluster_id'].values[0]
-#                 st.write(f"Customer {customer_code} belongs to cluster {cluster}")
-#                 # Load the Corresponding Model
-#                 model_path = f'models/modelo_cluster_{cluster}.txt'
-#                 gbm = lgb.Booster(model_file=model_path)
-#                 st.write(f"Loaded model for cluster {cluster}")
-#                 # Inspect the model
-#                 st.write("### Model Information:")
-#                 st.write(f"Number of trees: {gbm.num_trees()}")
-#                 st.write(f"Number of features: {gbm.num_feature()}")
-#                 st.write("Feature names:")
-#                 st.write(gbm.feature_name())
-#                 # Load predict data for that cluster
-#                 predict_data = pd.read_csv(f'predicts/predict_cluster_{cluster}.csv')
-#                 # Convert cliente_id to string
-#                 predict_data['cliente_id'] = predict_data['cliente_id'].astype(str)
-#                 st.write("### Predict Data DataFrame:")
-#                 st.write(predict_data.head())
-#                 st.write(f"Shape: {predict_data.shape}")
-#                 # Filter for the specific customer
-#                 customer_code_str = str(customer_code)
-#                 customer_data = predict_data[predict_data['cliente_id'] == customer_code_str]
-#                 # Add debug statements
-#                 st.write(f"Unique customer IDs in predict data: {predict_data['cliente_id'].unique()}")
-#                 st.write(f"Customer code we're looking for: {customer_code_str}")
-#                 st.write("### Customer Data:")
-#                 st.write(customer_data.head())
-#                 st.write(f"Shape: {customer_data.shape}")
-#                 if not customer_data.empty:
-#                     # Define features consistently with the training process
-#                     lag_features = [f'precio_total_lag_{lag}' for lag in range(1, 25)]
-#                     features = lag_features + ['mes', 'marca_id_encoded', 'año', 'cluster_id']
-#                     # Prepare data for prediction
-#                     X_predict = customer_data[features]
-#                     # Convert categorical features to 'category' dtype
-#                     categorical_features = ['mes', 'marca_id_encoded', 'cluster_id']
-#                     for feature in categorical_features:
-#                         X_predict[feature] = X_predict[feature].astype('category')
-#                     st.write("### Features for Prediction:")
-#                     st.write(X_predict.head())
-#                     st.write(f"Shape: {X_predict.shape}")
-#                     st.write("Data types:")
-#                     st.write(X_predict.dtypes)
-#                     # Make Prediction for the selected customer
-#                     y_pred = gbm.predict(X_predict, num_iteration=gbm.best_iteration)
-#                     st.write("### Prediction Results:")
-#                     st.write(f"Type of y_pred: {type(y_pred)}")
-#                     st.write(f"Shape of y_pred: {y_pred.shape}")
-#                     st.write("First few predictions:")
-#                     st.write(y_pred[:5])
-#                     # Reassemble the results
-#                     results = customer_data[['cliente_id', 'marca_id_encoded', 'fecha_mes']].copy()
-#                     results['ventas_predichas'] = y_pred
-#                     st.write("### Results DataFrame:")
-#                     st.write(results.head())
-#                     st.write(f"Shape: {results.shape}")
-#                     st.write(f"Predicted total sales for Customer {customer_code}: {results['ventas_predichas'].sum():.2f}")
-#                     # Load actual data
-#                     actual_sales = df_agg_2024[df_agg_2024['cliente_id'] == customer_code_str]
-#                     st.write("### Actual Sales DataFrame:")
-#                     st.write(actual_sales.head())
-#                     st.write(f"Shape: {actual_sales.shape}")
-#                     if not actual_sales.empty:
-#                         results = results.merge(actual_sales[['cliente_id', 'marca_id_encoded', 'fecha_mes', 'precio_total']],
-#                                                 on=['cliente_id', 'marca_id_encoded', 'fecha_mes'],
-#                                                 how='left')
-#                         results.rename(columns={'precio_total': 'ventas_reales'}, inplace=True)
-#                         results['ventas_reales'].fillna(0, inplace=True)
-#                         st.write("### Final Results DataFrame:")
-#                         st.write(results.head())
-#                         st.write(f"Shape: {results.shape}")
-#                         # Calculate metrics only for non-null actual sales
-#                         valid_results = results.dropna(subset=['ventas_reales'])
-#                         if not valid_results.empty:
-#                             mae = mean_absolute_error(valid_results['ventas_reales'], valid_results['ventas_predichas'])
-#                             mape = np.mean(np.abs((valid_results['ventas_reales'] - valid_results['ventas_predichas']) / valid_results['ventas_reales'])) * 100
-#                             rmse = np.sqrt(mean_squared_error(valid_results['ventas_reales'], valid_results['ventas_predichas']))
-#                             st.write(f"Actual total sales for Customer {customer_code}: {valid_results['ventas_reales'].sum():.2f}")
-#                             st.write(f"MAE: {mae:.2f}")
-#                             st.write(f"MAPE: {mape:.2f}%")
-#                             st.write(f"RMSE: {rmse:.2f}")
-#                         # Analysis of results
-#                         threshold_good = 100  # You may want to adjust this threshold
-#                         if mae < threshold_good:
-#                             st.success(f"Customer {customer_code} is performing well based on the predictions.")
-#                         else:
-#                             st.warning(f"Customer {customer_code} is not performing well based on the predictions.")
-#                     else:
-#                         st.warning(f"No actual sales data found for customer {customer_code} in df_agg_2024.")
-#                     st.write("### Debug Information for Radar Chart:")
-#                     st.write(f"Shape of customer_data: {customer_data.shape}")
-#                     st.write(f"Shape of euros_proveedor: {euros_proveedor.shape}")
-#                     # Get percentage of units sold for each manufacturer
-#                     customer_df = df[df["CLIENTE"] == str(customer_code)]  # Get the customer data
-#                     all_manufacturers = customer_df.iloc[:, 1:].T  # Exclude CLIENTE column (manufacturers are in columns)
-#                     all_manufacturers.index = all_manufacturers.index.astype(str)
-#                     # Get total sales for each manufacturer from euros_proveedor
-#                     customer_euros = euros_proveedor[euros_proveedor["CLIENTE"] == str(customer_code)]
-#                     sales_data = customer_euros.iloc[:, 1:].T  # Exclude CLIENTE column
-#                     sales_data.index = sales_data.index.astype(str)
-#                     # Remove the 'CLIENTE' row from sales_data to avoid issues with mixed types
-#                     sales_data_filtered = sales_data.drop(index='CLIENTE', errors='ignore')
-#                     # Ensure all values are numeric
-#                     sales_data_filtered = sales_data_filtered.apply(pd.to_numeric, errors='coerce')
-#                     all_manufacturers = all_manufacturers.apply(pd.to_numeric, errors='coerce')
-#                     # Sort manufacturers by percentage of units and get top 10
-#                     top_units = all_manufacturers.sort_values(by=all_manufacturers.columns[0], ascending=False).head(10)
-#                     # Sort manufacturers by total sales and get top 10
-#                     top_sales = sales_data_filtered.sort_values(by=sales_data_filtered.columns[0], ascending=False).head(10)
-#                     # Combine top manufacturers from both lists and get up to 20 unique manufacturers
-#                     combined_top = pd.concat([top_units, top_sales]).index.unique()[:20]
-#                     # Filter out manufacturers that are not present in both datasets
-#                     combined_top = [m for m in combined_top if m in all_manufacturers.index and m in sales_data_filtered.index]
-#                     st.write(f"Number of combined top manufacturers: {len(combined_top)}")
-#                     if combined_top:
-#                         # Create a DataFrame with combined data for these top manufacturers
-#                         combined_data = pd.DataFrame({
-#                             'units': all_manufacturers.loc[combined_top, all_manufacturers.columns[0]],
-#                             'sales': sales_data_filtered.loc[combined_top, sales_data_filtered.columns[0]]
-#                         }).fillna(0)
-#                         # Sort by units, then by sales
-#                         combined_data_sorted = combined_data.sort_values(by=['units', 'sales'], ascending=False)
-#                         # Filter out manufacturers with 0 units
-#                         non_zero_manufacturers = combined_data_sorted[combined_data_sorted['units'] > 0]
-#                         # If we have less than 3 non-zero manufacturers, add some zero-value ones
-#                         if len(non_zero_manufacturers) < 3:
-#                             zero_manufacturers = combined_data_sorted[combined_data_sorted['units'] == 0].head(3 - len(non_zero_manufacturers))
-#                             manufacturers_to_show = pd.concat([non_zero_manufacturers, zero_manufacturers])
-#                         else:
-#                             manufacturers_to_show = non_zero_manufacturers
-#                         values = manufacturers_to_show['units'].tolist()
-#                         amounts = manufacturers_to_show['sales'].tolist()
-#                         manufacturers = [get_supplier_name(m) for m in manufacturers_to_show.index]
-#                         st.write(f"### Results for top {len(manufacturers)} manufacturers:")
-#                         for manufacturer, value, amount in zip(manufacturers, values, amounts):
-#                             st.write(f"{manufacturer} = {value:.2f}% of units, €{amount:.2f} total sales")
-#                         if manufacturers:  # Only create the chart if we have data
-#                             fig = radar_chart(manufacturers, values, amounts, f'Radar Chart for Top {len(manufacturers)} Manufacturers of Customer {customer_code}')
-#                             st.pyplot(fig)
-#                         else:
-#                             st.warning("No data available to create the radar chart.")
-#                     else:
-#                         st.warning("No combined top manufacturers found.")
-#                     # Ensure codigo_cliente in ventas_clientes is a string
-#                     ventas_clientes['codigo_cliente'] = ventas_clientes['codigo_cliente'].astype(str).str.strip()
-#                     # Ensure customer_code is a string and strip any spaces
-#                     customer_code = str(customer_code).strip()
-#                     if customer_code in ventas_clientes['codigo_cliente'].unique():
-#                         st.write(f"Customer {customer_code} found in ventas_clientes")
-#                     else:
-#                         st.write(f"Customer {customer_code} not found in ventas_clientes")
-#                     # Customer sales 2021-2024 (if data exists)
-#                     sales_columns = ['VENTA_2021', 'VENTA_2022', 'VENTA_2023']
-#                     if all(col in ventas_clientes.columns for col in sales_columns):
-#                         customer_sales_data = ventas_clientes[ventas_clientes['codigo_cliente'] == customer_code]
-#                         if not customer_sales_data.empty:
-#                             customer_sales = customer_sales_data[sales_columns].values[0]
-#                             years = ['2021', '2022', '2023']
-#                             fig_sales = px.line(x=years, y=customer_sales, markers=True, title=f'Sales Over the Years for Customer {customer_code}')
-#                             fig_sales.update_layout(xaxis_title="Year", yaxis_title="Sales")
-#                             st.plotly_chart(fig_sales)
-#                         else:
-#                             st.warning(f"No historical sales data found for customer {customer_code}")
-#                     else:
-#                         st.warning("Sales data for 2021-2023 not available in the dataset.")
-#                 else:
-#                     st.warning(f"No data found for customer {customer_code}. Please check the code.")
-#         else:
-#             st.warning("Please select a customer.")
-# # Customer Recommendations Page
-# elif page == "Articles Recommendations":
-#     st.title("Articles Recommendations")
-#     st.markdown("""
-#         Get tailored recommendations for your customers based on their basket.
-#     """)
-#     # Campo input para cliente
-#     partial_code = st.text_input("Enter part of Customer Code for Recommendations (or leave empty to see all)")
-#     if partial_code:
-#         filtered_customers = df[df['CLIENTE'].str.contains(partial_code)]
-#     else:
-#         filtered_customers = df
-#     customer_list = filtered_customers['CLIENTE'].unique()
-#     customer_code = st.selectbox("Select Customer Code for Recommendations", [""] + list(customer_list))
-#     # Definición de la función recomienda
-#     def recomienda(new_basket):
-#         # Calcular la matriz TF-IDF
-#         tfidf = TfidfVectorizer()
-#         tfidf_matrix = tfidf.fit_transform(cestas['Cestas'])
-#         # Convertir la nueva cesta en formato TF-IDF
-#         new_basket_str = ' '.join(new_basket)
-#         new_basket_tfidf = tfidf.transform([new_basket_str])
-#         # Comparar la nueva cesta con las anteriores
-#         similarities = cosine_similarity(new_basket_tfidf, tfidf_matrix)
-#         # Obtener los índices de las cestas más similares
-#         similar_indices = similarities.argsort()[0][-3:]  # Las 3 más similares
-#         # Crear un diccionario para contar las recomendaciones
-#         recommendations_count = {}
-#         total_similarity = 0
-#         # Recomendar productos de cestas similares
-#         for idx in similar_indices:
-#             sim_score = similarities[0][idx]
-#             total_similarity += sim_score
-#             products = cestas.iloc[idx]['Cestas'].split()
-#             for product in products:
-#                 if product.strip() not in new_basket:  # Evitar recomendar lo que ya está en la cesta
-#                     if product.strip() in recommendations_count:
-#                         recommendations_count[product.strip()] += sim_score
-#                     else:
-#                         recommendations_count[product.strip()] = sim_score
-#         # Calcular la probabilidad relativa de cada producto recomendado
-#         recommendations_with_prob = []
-#         if total_similarity > 0:  # Verificar que total_similarity no sea cero
-#             recommendations_with_prob = [(product, score / total_similarity) for product, score in recommendations_count.items()]
-#         else:
-#             print("No se encontraron similitudes suficientes para calcular probabilidades.")
-#         recommendations_with_prob.sort(key=lambda x: x[1], reverse=True)  # Ordenar por puntuación
-#         # Crear un nuevo DataFrame para almacenar las recomendaciones con descripciones y probabilidades
-#         recommendations_df = pd.DataFrame(columns=['ARTICULO', 'DESCRIPCION', 'PROBABILIDAD'])
-#         # Agregar las recomendaciones al DataFrame usando pd.concat
-#         for product, prob in recommendations_with_prob:
-#             # Buscar la descripción en el DataFrame de productos
-#             description = productos.loc[productos['ARTICULO'] == product, 'DESCRIPCION']
-#             if not description.empty:
-#                 # Crear un nuevo DataFrame temporal para la recomendación
-#                 temp_df = pd.DataFrame({
-#                     'ARTICULO': [product],
-#                     'DESCRIPCION': [description.values[0]],  # Obtener el primer valor encontrado
-#                     'PROBABILIDAD': [prob]
-#                 })
-#                 # Concatenar el DataFrame temporal al DataFrame de recomendaciones
-#                 recommendations_df = pd.concat([recommendations_df, temp_df], ignore_index=True)
-#         return recommendations_df
-#     # Comprobar si el cliente está en el CSV de fieles
-#     is_fiel = customer_code in fieles_df['Cliente'].astype(str).values
-#     if customer_code:
-#         if is_fiel:
-#             st.write(f"### Customer {customer_code} is a loyal customer.")
-#             option = st.selectbox("Select Recommendation Type", ["Select an option", "By Purchase History", "By Current Basket"])
-#             if option == "By Purchase History":
-#                 st.warning("Option not available... aún")
-#             elif option == "By Current Basket":
-#                 st.write("Select the items and assign quantities for the basket:")
-#                 # Mostrar lista de artículos disponibles
-#                 available_articles = productos['ARTICULO'].unique()
-#                 selected_articles = st.multiselect("Select Articles", available_articles)
-#                 # Crear inputs para ingresar las cantidades de cada artículo seleccionado
-#                 quantities = {}
-#                 for article in selected_articles:
-#                     quantities[article] = st.number_input(f"Quantity for {article}", min_value=0, step=1)
-#                 if st.button("Calcular"):  # Añadimos el botón "Calcular"
-#                     # Crear una lista de artículos basada en la selección
-#                     new_basket = [f"{article} x{quantities[article]}" for article in selected_articles if quantities[article] > 0]
-#                     if new_basket:
-#                         # Procesar la lista para recomendar
-#                         recommendations_df = recomienda(new_basket)
-#                         if not recommendations_df.empty:
-#                             st.write("### Recommendations based on the current basket:")
-#                             st.dataframe(recommendations_df)
-#                         else:
-#                             st.warning("No recommendations found for the provided basket.")
-#                     else:
-#                         st.warning("Please select at least one article and set its quantity.")
-#         else:
-#             st.write(f"### Customer {customer_code} is not a loyal customer.")
-#             st.write("Select items and assign quantities for the basket:")
-#             # Mostrar lista de artículos disponibles
-#             available_articles = productos['ARTICULO'].unique()
-#             selected_articles = st.multiselect("Select Articles", available_articles)
-#             # Crear inputs para ingresar las cantidades de cada artículo seleccionado
-#             quantities = {}
-#             for article in selected_articles:
-#                 quantities[article] = st.number_input(f"Quantity for {article}", min_value=0, step=1)
-#             if st.button("Calcular"):  # Añadimos el botón "Calcular"
-#                 # Crear una lista de artículos basada en la selección
-#                 new_basket = [f"{article} x{quantities[article]}" for article in selected_articles if quantities[article] > 0]
-#                 if new_basket:
-#                     # Procesar la lista para recomendar
-#                     recommendations_df = recomienda(new_basket)
-#                     if not recommendations_df.empty:
-#                         st.write("### Recommendations based on the current basket:")
-#                         st.dataframe(recommendations_df)
-#                     else:
-#                         st.warning("No recommendations found for the provided basket.")
-#                 else:
-#                     st.warning("Please select at least one article and set its quantity.")
 # Customer Analysis Page

 import streamlit as st
 import pandas as pd
 import plotly.express as px
 import matplotlib.pyplot as plt
 import numpy as np
 import lightgbm as lgb
 # Page configuration
 st.set_page_config(page_title="Customer Insights App", page_icon=":bar_chart:")
+# Load CSV files at the top
 df = pd.read_csv("df_clean.csv")
 nombres_proveedores = pd.read_csv("nombres_proveedores.csv", sep=';')
 euros_proveedor = pd.read_csv("euros_proveedor.csv", sep=',')
 ventas_clientes = pd.read_csv("ventas_clientes.csv", sep=',')
+customer_clusters = pd.read_csv('predicts/customer_clusters.csv')  # Load the customer clusters here
+df_agg_2024 = pd.read_csv('predicts/df_agg_2024.csv')
 # Ensure customer codes are strings
 df['CLIENTE'] = df['CLIENTE'].astype(str)
 nombres_proveedores['codigo'] = nombres_proveedores['codigo'].astype(str)
 euros_proveedor['CLIENTE'] = euros_proveedor['CLIENTE'].astype(str)
+customer_clusters['cliente_id'] = customer_clusters['cliente_id'].astype(str)  # Ensure customer IDs are strings
 fieles_df = pd.read_csv("clientes_relevantes.csv")
 cestas = pd.read_csv("cestas.csv")
 productos = pd.read_csv("productos.csv")
 df_agg_2024['cliente_id'] = df_agg_2024['cliente_id'].astype(str)
+# Convert all columns except 'CLIENTE' to float in euros_proveedor
 for col in euros_proveedor.columns:
     if col != 'CLIENTE':
         euros_proveedor[col] = pd.to_numeric(euros_proveedor[col], errors='coerce')
+# Check for NaN values after conversion
 if euros_proveedor.isna().any().any():
     st.warning("Some values in euros_proveedor couldn't be converted to numbers. Please review the input data.")
+# Ignore the last two columns of df
 df = df.iloc[:, :-2]
 # Function to get supplier name
 def get_supplier_name(code):
+    code = str(code)  # Ensure code is a string
     name = nombres_proveedores[nombres_proveedores['codigo'] == code]['nombre'].values
     return name[0] if len(name) > 0 else code
+# Function to create radar chart with square root transformation
 def radar_chart(categories, values, amounts, title):
+    N = len(categories)
+    angles = [n / float(N) * 2 * np.pi for n in range(N)]
+    angles += angles[:1]
+    fig, ax = plt.subplots(figsize=(12, 12), subplot_kw=dict(projection='polar'))
+    # Apply square root transformation
+    sqrt_values = np.sqrt(values)
+    sqrt_amounts = np.sqrt(amounts)
+    max_sqrt_value = max(sqrt_values)
+    normalized_values = [v / max_sqrt_value for v in sqrt_values]
+    # Adjust scaling for spend values
+    max_sqrt_amount = max(sqrt_amounts)
+    scaling_factor = 0.7  # Adjust this value to control how much the spend values are scaled up
+    normalized_amounts = [min((a / max_sqrt_amount) * scaling_factor, 1.0) for a in sqrt_amounts]
+    normalized_values += normalized_values[:1]
+    ax.plot(angles, normalized_values, 'o-', linewidth=2, color='#FF69B4', label='% Units (sqrt)')
+    ax.fill(angles, normalized_values, alpha=0.25, color='#FF69B4')
+    normalized_amounts += normalized_amounts[:1]
+    ax.plot(angles, normalized_amounts, 'o-', linewidth=2, color='#4B0082', label='% Spend (sqrt)')
+    ax.fill(angles, normalized_amounts, alpha=0.25, color='#4B0082')
+    ax.set_xticks(angles[:-1])
+    ax.set_xticklabels(categories, size=8, wrap=True)
+    ax.set_ylim(0, 1)
+    circles = np.linspace(0, 1, 5)
+    for circle in circles:
+        ax.plot(angles, [circle]*len(angles), '--', color='gray', alpha=0.3, linewidth=0.5)
+    ax.set_yticklabels([])
+    ax.spines['polar'].set_visible(False)
+    plt.title(title, size=16, y=1.1)
+    plt.legend(loc='upper right', bbox_to_anchor=(1.3, 1.1))
     return fig
 # Main page design
     if st.button("Calcular"):
         if customer_code:
+            # Find Customer's Cluster
             customer_match = customer_clusters[customer_clusters['cliente_id'] == customer_code]
             if not customer_match.empty:
                 cluster = customer_match['cluster_id'].values[0]
                 st.write(f"Customer {customer_code} belongs to cluster {cluster}")
+                # Load the Corresponding Model
                 model_path = f'models/modelo_cluster_{cluster}.txt'
                 gbm = lgb.Booster(model_file=model_path)
                 st.write(f"Loaded model for cluster {cluster}")
+                # Inspect the model
+                st.write("### Model Information:")
+                st.write(f"Number of trees: {gbm.num_trees()}")
+                st.write(f"Number of features: {gbm.num_feature()}")
+                st.write("Feature names:")
+                st.write(gbm.feature_name())
                 # Load predict data for that cluster
                 predict_data = pd.read_csv(f'predicts/predict_cluster_{cluster}.csv')
+                # Convert cliente_id to string
                 predict_data['cliente_id'] = predict_data['cliente_id'].astype(str)
+                st.write("### Predict Data DataFrame:")
+                st.write(predict_data.head())
+                st.write(f"Shape: {predict_data.shape}")
                 # Filter for the specific customer
+                customer_code_str = str(customer_code)
+                customer_data = predict_data[predict_data['cliente_id'] == customer_code_str]
+                # Add debug statements
+                st.write(f"Unique customer IDs in predict data: {predict_data['cliente_id'].unique()}")
+                st.write(f"Customer code we're looking for: {customer_code_str}")
+                st.write("### Customer Data:")
+                st.write(customer_data.head())
+                st.write(f"Shape: {customer_data.shape}")
                 if not customer_data.empty:
+                    # Define features consistently with the training process
                     lag_features = [f'precio_total_lag_{lag}' for lag in range(1, 25)]
                     features = lag_features + ['mes', 'marca_id_encoded', 'año', 'cluster_id']
+                    # Prepare data for prediction
                     X_predict = customer_data[features]
                     # Convert categorical features to 'category' dtype
                     categorical_features = ['mes', 'marca_id_encoded', 'cluster_id']
                     for feature in categorical_features:
                         X_predict[feature] = X_predict[feature].astype('category')
+                    st.write("### Features for Prediction:")
+                    st.write(X_predict.head())
+                    st.write(f"Shape: {X_predict.shape}")
+                    st.write("Data types:")
+                    st.write(X_predict.dtypes)
                     # Make Prediction for the selected customer
                     y_pred = gbm.predict(X_predict, num_iteration=gbm.best_iteration)
+                    st.write("### Prediction Results:")
+                    st.write(f"Type of y_pred: {type(y_pred)}")
+                    st.write(f"Shape of y_pred: {y_pred.shape}")
+                    st.write("First few predictions:")
+                    st.write(y_pred[:5])
+                    # Reassemble the results
                     results = customer_data[['cliente_id', 'marca_id_encoded', 'fecha_mes']].copy()
                     results['ventas_predichas'] = y_pred
+                    st.write("### Results DataFrame:")
+                    st.write(results.head())
+                    st.write(f"Shape: {results.shape}")
+                    st.write(f"Predicted total sales for Customer {customer_code}: {results['ventas_predichas'].sum():.2f}")
+                    # Load actual data
+                    actual_sales = df_agg_2024[df_agg_2024['cliente_id'] == customer_code_str]
+                    st.write("### Actual Sales DataFrame:")
+                    st.write(actual_sales.head())
+                    st.write(f"Shape: {actual_sales.shape}")
                     if not actual_sales.empty:
+                        results = results.merge(actual_sales[['cliente_id', 'marca_id_encoded', 'fecha_mes', 'precio_total']],
+                                                on=['cliente_id', 'marca_id_encoded', 'fecha_mes'],
+                                                how='left')
                         results.rename(columns={'precio_total': 'ventas_reales'}, inplace=True)
                         results['ventas_reales'].fillna(0, inplace=True)
+                        st.write("### Final Results DataFrame:")
+                        st.write(results.head())
+                        st.write(f"Shape: {results.shape}")
+                        # Calculate metrics only for non-null actual sales
                         valid_results = results.dropna(subset=['ventas_reales'])
                         if not valid_results.empty:
                             mae = mean_absolute_error(valid_results['ventas_reales'], valid_results['ventas_predichas'])
                             mape = np.mean(np.abs((valid_results['ventas_reales'] - valid_results['ventas_predichas']) / valid_results['ventas_reales'])) * 100
                             rmse = np.sqrt(mean_squared_error(valid_results['ventas_reales'], valid_results['ventas_predichas']))
+                            st.write(f"Actual total sales for Customer {customer_code}: {valid_results['ventas_reales'].sum():.2f}")
                             st.write(f"MAE: {mae:.2f}")
                             st.write(f"MAPE: {mape:.2f}%")
                             st.write(f"RMSE: {rmse:.2f}")
+                        # Analysis of results
+                        threshold_good = 100  # You may want to adjust this threshold
+                        if mae < threshold_good:
+                            st.success(f"Customer {customer_code} is performing well based on the predictions.")
+                        else:
+                            st.warning(f"Customer {customer_code} is not performing well based on the predictions.")
+                    else:
+                        st.warning(f"No actual sales data found for customer {customer_code} in df_agg_2024.")
+                    st.write("### Debug Information for Radar Chart:")
+                    st.write(f"Shape of customer_data: {customer_data.shape}")
+                    st.write(f"Shape of euros_proveedor: {euros_proveedor.shape}")
+                    # Get percentage of units sold for each manufacturer
+                    customer_df = df[df["CLIENTE"] == str(customer_code)]  # Get the customer data
+                    all_manufacturers = customer_df.iloc[:, 1:].T  # Exclude CLIENTE column (manufacturers are in columns)
+                    all_manufacturers.index = all_manufacturers.index.astype(str)
+                    # Get total sales for each manufacturer from euros_proveedor
+                    customer_euros = euros_proveedor[euros_proveedor["CLIENTE"] == str(customer_code)]
+                    sales_data = customer_euros.iloc[:, 1:].T  # Exclude CLIENTE column
+                    sales_data.index = sales_data.index.astype(str)
+                    # Remove the 'CLIENTE' row from sales_data to avoid issues with mixed types
+                    sales_data_filtered = sales_data.drop(index='CLIENTE', errors='ignore')
+                    # Ensure all values are numeric
+                    sales_data_filtered = sales_data_filtered.apply(pd.to_numeric, errors='coerce')
+                    all_manufacturers = all_manufacturers.apply(pd.to_numeric, errors='coerce')
+                    # Sort manufacturers by percentage of units and get top 10
+                    top_units = all_manufacturers.sort_values(by=all_manufacturers.columns[0], ascending=False).head(10)
+                    # Sort manufacturers by total sales and get top 10
+                    top_sales = sales_data_filtered.sort_values(by=sales_data_filtered.columns[0], ascending=False).head(10)
+                    # Combine top manufacturers from both lists and get up to 20 unique manufacturers
+                    combined_top = pd.concat([top_units, top_sales]).index.unique()[:20]
+                    # Filter out manufacturers that are not present in both datasets
+                    combined_top = [m for m in combined_top if m in all_manufacturers.index and m in sales_data_filtered.index]
+                    st.write(f"Number of combined top manufacturers: {len(combined_top)}")
+                    if combined_top:
+                        # Create a DataFrame with combined data for these top manufacturers
+                        combined_data = pd.DataFrame({
+                            'units': all_manufacturers.loc[combined_top, all_manufacturers.columns[0]],
+                            'sales': sales_data_filtered.loc[combined_top, sales_data_filtered.columns[0]]
+                        }).fillna(0)
+                        # Sort by units, then by sales
+                        combined_data_sorted = combined_data.sort_values(by=['units', 'sales'], ascending=False)
+                        # Filter out manufacturers with 0 units
+                        non_zero_manufacturers = combined_data_sorted[combined_data_sorted['units'] > 0]
+                        # If we have less than 3 non-zero manufacturers, add some zero-value ones
+                        if len(non_zero_manufacturers) < 3:
+                            zero_manufacturers = combined_data_sorted[combined_data_sorted['units'] == 0].head(3 - len(non_zero_manufacturers))
+                            manufacturers_to_show = pd.concat([non_zero_manufacturers, zero_manufacturers])
+                        else:
+                            manufacturers_to_show = non_zero_manufacturers
+                        values = manufacturers_to_show['units'].tolist()
+                        amounts = manufacturers_to_show['sales'].tolist()
+                        manufacturers = [get_supplier_name(m) for m in manufacturers_to_show.index]
+                        st.write(f"### Results for top {len(manufacturers)} manufacturers:")
+                        for manufacturer, value, amount in zip(manufacturers, values, amounts):
+                            st.write(f"{manufacturer} = {value:.2f}% of units, €{amount:.2f} total sales")
+                        if manufacturers:  # Only create the chart if we have data
+                            fig = radar_chart(manufacturers, values, amounts, f'Radar Chart for Top {len(manufacturers)} Manufacturers of Customer {customer_code}')
+                            st.pyplot(fig)
+                        else:
+                            st.warning("No data available to create the radar chart.")
+                    else:
+                        st.warning("No combined top manufacturers found.")
+                    # Ensure codigo_cliente in ventas_clientes is a string
+                    ventas_clientes['codigo_cliente'] = ventas_clientes['codigo_cliente'].astype(str).str.strip()
+                    # Ensure customer_code is a string and strip any spaces
+                    customer_code = str(customer_code).strip()
+                    if customer_code in ventas_clientes['codigo_cliente'].unique():
+                        st.write(f"Customer {customer_code} found in ventas_clientes")
+                    else:
+                        st.write(f"Customer {customer_code} not found in ventas_clientes")
+                    # Customer sales 2021-2024 (if data exists)
+                    sales_columns = ['VENTA_2021', 'VENTA_2022', 'VENTA_2023']
+                    if all(col in ventas_clientes.columns for col in sales_columns):
+                        customer_sales_data = ventas_clientes[ventas_clientes['codigo_cliente'] == customer_code]
+                        if not customer_sales_data.empty:
+                            customer_sales = customer_sales_data[sales_columns].values[0]
+                            years = ['2021', '2022', '2023']
+                            fig_sales = px.line(x=years, y=customer_sales, markers=True, title=f'Sales Over the Years for Customer {customer_code}')
+                            fig_sales.update_layout(xaxis_title="Year", yaxis_title="Sales")
+                            st.plotly_chart(fig_sales)
+                        else:
+                            st.warning(f"No historical sales data found for customer {customer_code}")
+                    else:
+                        st.warning("Sales data for 2021-2023 not available in the dataset.")
+                else:
+                    st.warning(f"No data found for customer {customer_code}. Please check the code.")
+        else:
+            st.warning("Please select a customer.")
+# Customer Recommendations Page
+elif page == "Articles Recommendations":
+    st.title("Articles Recommendations")
+    st.markdown("""
+        Get tailored recommendations for your customers based on their basket.
+    """)
+    # Campo input para cliente
+    partial_code = st.text_input("Enter part of Customer Code for Recommendations (or leave empty to see all)")
+    if partial_code:
+        filtered_customers = df[df['CLIENTE'].str.contains(partial_code)]
+    else:
+        filtered_customers = df
+    customer_list = filtered_customers['CLIENTE'].unique()
+    customer_code = st.selectbox("Select Customer Code for Recommendations", [""] + list(customer_list))
+    # Definición de la función recomienda
+    def recomienda(new_basket):
+        # Calcular la matriz TF-IDF
+        tfidf = TfidfVectorizer()
+        tfidf_matrix = tfidf.fit_transform(cestas['Cestas'])
+        # Convertir la nueva cesta en formato TF-IDF
+        new_basket_str = ' '.join(new_basket)
+        new_basket_tfidf = tfidf.transform([new_basket_str])
+        # Comparar la nueva cesta con las anteriores
+        similarities = cosine_similarity(new_basket_tfidf, tfidf_matrix)
+        # Obtener los índices de las cestas más similares
+        similar_indices = similarities.argsort()[0][-3:]  # Las 3 más similares
+        # Crear un diccionario para contar las recomendaciones
+        recommendations_count = {}
+        total_similarity = 0
+        # Recomendar productos de cestas similares
+        for idx in similar_indices:
+            sim_score = similarities[0][idx]
+            total_similarity += sim_score
+            products = cestas.iloc[idx]['Cestas'].split()
+            for product in products:
+                if product.strip() not in new_basket:  # Evitar recomendar lo que ya está en la cesta
+                    if product.strip() in recommendations_count:
+                        recommendations_count[product.strip()] += sim_score
+                    else:
+                        recommendations_count[product.strip()] = sim_score
+        # Calcular la probabilidad relativa de cada producto recomendado
+        recommendations_with_prob = []
+        if total_similarity > 0:  # Verificar que total_similarity no sea cero
+            recommendations_with_prob = [(product, score / total_similarity) for product, score in recommendations_count.items()]
+        else:
+            print("No se encontraron similitudes suficientes para calcular probabilidades.")
+        recommendations_with_prob.sort(key=lambda x: x[1], reverse=True)  # Ordenar por puntuación
+        # Crear un nuevo DataFrame para almacenar las recomendaciones con descripciones y probabilidades
+        recommendations_df = pd.DataFrame(columns=['ARTICULO', 'DESCRIPCION', 'PROBABILIDAD'])
+        # Agregar las recomendaciones al DataFrame usando pd.concat
+        for product, prob in recommendations_with_prob:
+            # Buscar la descripción en el DataFrame de productos
+            description = productos.loc[productos['ARTICULO'] == product, 'DESCRIPCION']
+            if not description.empty:
+                # Crear un nuevo DataFrame temporal para la recomendación
+                temp_df = pd.DataFrame({
+                    'ARTICULO': [product],
+                    'DESCRIPCION': [description.values[0]],  # Obtener el primer valor encontrado
+                    'PROBABILIDAD': [prob]
+                })
+                # Concatenar el DataFrame temporal al DataFrame de recomendaciones
+                recommendations_df = pd.concat([recommendations_df, temp_df], ignore_index=True)
+        return recommendations_df
+    # Comprobar si el cliente está en el CSV de fieles
+    is_fiel = customer_code in fieles_df['Cliente'].astype(str).values
+    if customer_code:
+        if is_fiel:
+            st.write(f"### Customer {customer_code} is a loyal customer.")
+            option = st.selectbox("Select Recommendation Type", ["Select an option", "By Purchase History", "By Current Basket"])
+            if option == "By Purchase History":
+                st.warning("Option not available... aún")
+            elif option == "By Current Basket":
+                st.write("Select the items and assign quantities for the basket:")
+                # Mostrar lista de artículos disponibles
+                available_articles = productos['ARTICULO'].unique()
+                selected_articles = st.multiselect("Select Articles", available_articles)
+                # Crear inputs para ingresar las cantidades de cada artículo seleccionado
+                quantities = {}
+                for article in selected_articles:
+                    quantities[article] = st.number_input(f"Quantity for {article}", min_value=0, step=1)
+                if st.button("Calcular"):  # Añadimos el botón "Calcular"
+                    # Crear una lista de artículos basada en la selección
+                    new_basket = [f"{article} x{quantities[article]}" for article in selected_articles if quantities[article] > 0]
+                    if new_basket:
+                        # Procesar la lista para recomendar
+                        recommendations_df = recomienda(new_basket)
+                        if not recommendations_df.empty:
+                            st.write("### Recommendations based on the current basket:")
+                            st.dataframe(recommendations_df)
+                        else:
+                            st.warning("No recommendations found for the provided basket.")
+                    else:
+                        st.warning("Please select at least one article and set its quantity.")
+        else:
+            st.write(f"### Customer {customer_code} is not a loyal customer.")
+            st.write("Select items and assign quantities for the basket:")
+            # Mostrar lista de artículos disponibles
+            available_articles = productos['ARTICULO'].unique()
+            selected_articles = st.multiselect("Select Articles", available_articles)
+            # Crear inputs para ingresar las cantidades de cada artículo seleccionado
+            quantities = {}
+            for article in selected_articles:
+                quantities[article] = st.number_input(f"Quantity for {article}", min_value=0, step=1)
+            if st.button("Calcular"):  # Añadimos el botón "Calcular"
+                # Crear una lista de artículos basada en la selección
+                new_basket = [f"{article} x{quantities[article]}" for article in selected_articles if quantities[article] > 0]
+                if new_basket:
+                    # Procesar la lista para recomendar
+                    recommendations_df = recomienda(new_basket)
+                    if not recommendations_df.empty:
+                        st.write("### Recommendations based on the current basket:")
+                        st.dataframe(recommendations_df)
+                    else:
+                        st.warning("No recommendations found for the provided basket.")
+                else:
+                    st.warning("Please select at least one article and set its quantity.")
 # Customer Analysis Page