Spaces:

andrewsunanda
/

fast_food_classification

Sleeping

App Files Files Community

andrewsunanda commited on Apr 8, 2023

Commit

cb4591a

1 Parent(s): ae47e4d

Update eda.py

Browse files

Files changed (1) hide show

eda.py +1 -132

eda.py CHANGED Viewed

@@ -34,138 +34,7 @@ def run():
     dataset_path = "andrewsunanda/fast_food_image_classification"
     st.write(os.listdir(dataset_path))
-    import os
-    import torch
-    import torchvision.transforms as transforms
-    from torch.utils.data import DataLoader
-    from datasets import load_dataset
-    # Define the path to the dataset
-    dataset_path = 'andrewsunanda/fast_food_image_classification'
-    # Load the dataset from Hugging Face
-    dataset = load_dataset(dataset_path)
-    # Define the batch size and image size
-    batch_size = 256
-    img_size = (64, 64)
-    # Define the paths to the train, validation, and test folders
-    train_path = os.path.join(dataset_path, 'Train')
-    valid_path = os.path.join(dataset_path, 'Valid')
-    test_path = os.path.join(dataset_path, 'Test')
-    # Define the transforms for the dataset
-    transform = transforms.Compose([
-        transforms.Resize(img_size),
-        transforms.ToTensor(),
-    ])
-    # Load the training dataset
-    train_dataset = dataset['train']
-    train_dataset = train_dataset.map(lambda x: {'image': transform(x['image']), 'label': x['label']})
-    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
-    # Load the validation dataset
-    valid_dataset = dataset['validation']
-    valid_dataset = valid_dataset.map(lambda x: {'image': transform(x['image']), 'label': x['label']})
-    valid_loader = DataLoader(valid_dataset, batch_size=batch_size, shuffle=False)
-    # Load the testing dataset
-    test_dataset = dataset['test']
-    test_dataset = test_dataset.map(lambda x: {'image': transform(x['image']), 'label': x['label']})
-    test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)
-    # Create data generators for training, validation, and testing
-    train_datagen = ImageDataGenerator(
-        rescale=1./255,
-        horizontal_flip=True
-    )
-    valid_datagen = ImageDataGenerator(
-        rescale=1./255
-    )
-    test_datagen = ImageDataGenerator(
-        rescale=1./255
-    )
-    train_generator = train_datagen.flow_from_directory(
-    train_path,
-    target_size=img_size,
-    batch_size=batch_size,
-    class_mode='categorical'
-    )
-    valid_generator = valid_datagen.flow_from_directory(
-        valid_path,
-        target_size=img_size,
-        batch_size=batch_size,
-        class_mode='categorical'
-    )
-    test_generator = test_datagen.flow_from_directory(
-        test_path,
-        target_size=img_size,
-        batch_size=batch_size,
-        class_mode='categorical'
-    )
-    st.write('## Showing Random Samples')
-    class_names = list(train_generator.class_indices.keys())
-    train_classes = pd.Series(train_generator.classes)
-    test_classes = pd.Series(test_generator.classes)
-    valid_classes = pd.Series(valid_generator.classes)
-    # Plot some samples from each class
-    fig, ax = plt.subplots(nrows=2, ncols=5, figsize=(10, 6), subplot_kw={'xticks': [], 'yticks': []})
-    for i, axi in enumerate(ax.flat):
-        img = plt.imread(f'{train_path}/{class_names[i]}/{os.listdir(train_path+"/"+class_names[i])[0]}')
-        axi.imshow(img)
-        axi.set_title(class_names[i])
-    plt.tight_layout()
-    st.pyplot(fig)
-    st.markdown('---')
-    st.write('## Balance Classification')
-   # Create a pandas dataframe to show the distribution of classes in train, test, and validation data
-    df = pd.concat([train_classes.value_counts(), test_classes.value_counts(), valid_classes.value_counts()], axis=1)
-    df.columns = ['Training Data', 'Test Data', 'Validation Data']
-    df.index = class_names
-    fig, ax = plt.subplots(figsize=(12, 6))
-    df.plot(kind='bar', stacked=False, ax=ax, width=0.8)
-    plt.xlabel('Class')
-    plt.ylabel('Data Distribution')
-    plt.title('Data Distribution for each class')
-    plt.xticks(rotation=45, ha='right')
-    st.pyplot(fig)
-    st.markdown('---')
-    st.write('## Mean Pixel Value')
-    # Plot the mean of pixel mean of each channel for each class (unstacked bar chart)
-    means = []
-    for i in range(len(class_names)):
-        class_name = class_names[i]
-        img_path = os.path.join(train_path, class_name, os.listdir(os.path.join(train_path, class_name))[0])
-        img = image.load_img(img_path, target_size=img_size)
-        img_array = image.img_to_array(img)
-        means.append(np.mean(img_array, axis=(0, 1)))
-    means_df = pd.DataFrame(means, columns=['Red', 'Green', 'Blue'])
-    means_df.index = class_names
-    fig, ax = plt.subplots(figsize=(12, 6))
-    means_df.plot(kind='bar', stacked=False, ax=ax, width=0.8)
-    plt.xlabel('Class')
-    plt.ylabel('Mean pixel value')
-    plt.title('Mean pixel value of each channel for each class')
-    plt.xticks(rotation=45, ha='right')
-    st.pyplot(fig)
-    st.markdown('---')

     dataset_path = "andrewsunanda/fast_food_image_classification"
     st.write(os.listdir(dataset_path))
+    #