Spaces:
Runtime error
Runtime error
import pandas as pd | |
def load_data(catalog): | |
catalog = pd.read_excel('catalog_1k.xlsx') | |
return catalog | |
def preprocess_data(catalog): | |
# Clean description | |
catalog['Description'] = catalog['Description'].str.replace('\n', '') | |
# Id column to integer | |
catalog['Id'] = pd.to_numeric(catalog['Id'], errors='coerce').astype('Int64') | |
# Map gender | |
catalog['Gender'] = catalog['Gender'].map({1: 'Women', 2: 'Men', 3: 'Unisex'}) | |
# Drop sub-sub-categories | |
catalog = catalog.drop(['L3'], axis=1) | |
# Drop items without gender | |
catalog = catalog.dropna(subset=['Gender']) | |
# Use best image link | |
catalog['Image'] = catalog['Image'].str.split(',').str[-1] | |
# Convert the columns to strings before joining them | |
catalog["SimpleMetadata"] = catalog[["L1", "L2", "Gender", "MaterialName", "BrandName", "Name"]].astype(str).agg(', '.join, axis=1) | |
return catalog |