Spaces:

louiecerv
/

cats_dog_dataset_explore

Sleeping

App Files Files Community

louiecerv commited on Feb 3

Commit

eefd65c

1 Parent(s): 5c52553

sync with remote

Browse files

Files changed (2) hide show

app.py +90 -0
requirements.txt +6 -0

app.py ADDED Viewed

	@@ -0,0 +1,90 @@

+import streamlit as st
+from datasets import load_dataset
+import numpy as np
+import matplotlib.pyplot as plt
+# Load dataset from Hugging Face
+repo_id = "louiecerv/cats_dogs_dataset"
+dataset = load_dataset(repo_id)
+# Select the split (train set by default)
+split = "train"
+data = dataset[split]
+def main():
+    # Streamlit app
+    st.title("Image Dataset Explorer")
+    st.subheader(f"Displaying the first 25 images from the {split} set")
+    # Extract images and labels
+    images = np.array(data["image"][:25])  # First 25 images
+    labels = np.array(data["label"][:25])
+    label_names = dataset[split].features["label"].names  # Get class names
+    # Display 5x5 grid of images
+    fig, axes = plt.subplots(5, 5, figsize=(10, 10))
+    fig.subplots_adjust(hspace=0.5)
+    for i, ax in enumerate(axes.flat):
+        ax.imshow(images[i])
+        ax.set_title(label_names[labels[i]])
+        ax.axis("off")
+    st.pyplot(fig)
+    # Additional Exploration Options
+    st.sidebar.title("Explore the Dataset")
+    # Random Image Viewer
+    if st.sidebar.button("Show Random Images"):
+        rand_indices = np.random.choice(len(data), 25, replace=False)
+        rand_images = np.array(data["image"])[rand_indices]
+        rand_labels = np.array(data["label"])[rand_indices]
+        fig, axes = plt.subplots(5, 5, figsize=(10, 10))
+        fig.subplots_adjust(hspace=0.5)
+        for i, ax in enumerate(axes.flat):
+            ax.imshow(rand_images[i])
+            ax.set_title(label_names[rand_labels[i]])
+            ax.axis("off")
+        st.pyplot(fig)
+    # Class Distribution
+    if st.sidebar.button("Show Class Distribution"):
+        import pandas as pd
+        import seaborn as sns
+        label_counts = pd.Series(labels).value_counts().sort_index()
+        label_names_map = {i: name for i, name in enumerate(label_names)}
+        fig, ax = plt.subplots(figsize=(8, 4))
+        sns.barplot(x=[label_names_map[i] for i in label_counts.index], y=label_counts.values, ax=ax)
+        ax.set_title("Class Distribution")
+        ax.set_ylabel("Count")
+        ax.set_xlabel("Class")
+        st.pyplot(fig)
+    # Filter by class label
+    selected_label = st.sidebar.selectbox("Filter by Label", label_names)
+    if st.sidebar.button("Show Filtered Images"):
+        filtered_indices = [i for i, label in enumerate(labels) if label_names[label] == selected_label]
+        filtered_images = np.array(data["image"])[filtered_indices[:25]]
+        fig, axes = plt.subplots(5, 5, figsize=(10, 10))
+        fig.subplots_adjust(hspace=0.5)
+        for i, ax in enumerate(axes.flat):
+            if i < len(filtered_images):
+                ax.imshow(filtered_images[i])
+                ax.set_title(selected_label)
+                ax.axis("off")
+            else:
+                ax.axis("off")
+        st.pyplot(fig)
+if __name__ == "__main__":
+    main()

requirements.txt ADDED Viewed

	@@ -0,0 +1,6 @@

+streamlit
+datasets
+numpy
+matplotlib
+seaborn
+pandas