louiecerv commited on
Commit
eefd65c
·
1 Parent(s): 5c52553

sync with remote

Browse files
Files changed (2) hide show
  1. app.py +90 -0
  2. requirements.txt +6 -0
app.py ADDED
@@ -0,0 +1,90 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ from datasets import load_dataset
3
+ import numpy as np
4
+ import matplotlib.pyplot as plt
5
+
6
+ # Load dataset from Hugging Face
7
+ repo_id = "louiecerv/cats_dogs_dataset"
8
+ dataset = load_dataset(repo_id)
9
+
10
+ # Select the split (train set by default)
11
+ split = "train"
12
+ data = dataset[split]
13
+
14
+ def main():
15
+ # Streamlit app
16
+ st.title("Image Dataset Explorer")
17
+ st.subheader(f"Displaying the first 25 images from the {split} set")
18
+
19
+ # Extract images and labels
20
+ images = np.array(data["image"][:25]) # First 25 images
21
+ labels = np.array(data["label"][:25])
22
+ label_names = dataset[split].features["label"].names # Get class names
23
+
24
+ # Display 5x5 grid of images
25
+ fig, axes = plt.subplots(5, 5, figsize=(10, 10))
26
+ fig.subplots_adjust(hspace=0.5)
27
+
28
+ for i, ax in enumerate(axes.flat):
29
+ ax.imshow(images[i])
30
+ ax.set_title(label_names[labels[i]])
31
+ ax.axis("off")
32
+
33
+ st.pyplot(fig)
34
+
35
+ # Additional Exploration Options
36
+ st.sidebar.title("Explore the Dataset")
37
+
38
+ # Random Image Viewer
39
+ if st.sidebar.button("Show Random Images"):
40
+ rand_indices = np.random.choice(len(data), 25, replace=False)
41
+ rand_images = np.array(data["image"])[rand_indices]
42
+ rand_labels = np.array(data["label"])[rand_indices]
43
+
44
+ fig, axes = plt.subplots(5, 5, figsize=(10, 10))
45
+ fig.subplots_adjust(hspace=0.5)
46
+
47
+ for i, ax in enumerate(axes.flat):
48
+ ax.imshow(rand_images[i])
49
+ ax.set_title(label_names[rand_labels[i]])
50
+ ax.axis("off")
51
+
52
+ st.pyplot(fig)
53
+
54
+ # Class Distribution
55
+ if st.sidebar.button("Show Class Distribution"):
56
+ import pandas as pd
57
+ import seaborn as sns
58
+
59
+ label_counts = pd.Series(labels).value_counts().sort_index()
60
+ label_names_map = {i: name for i, name in enumerate(label_names)}
61
+
62
+ fig, ax = plt.subplots(figsize=(8, 4))
63
+ sns.barplot(x=[label_names_map[i] for i in label_counts.index], y=label_counts.values, ax=ax)
64
+ ax.set_title("Class Distribution")
65
+ ax.set_ylabel("Count")
66
+ ax.set_xlabel("Class")
67
+
68
+ st.pyplot(fig)
69
+
70
+ # Filter by class label
71
+ selected_label = st.sidebar.selectbox("Filter by Label", label_names)
72
+ if st.sidebar.button("Show Filtered Images"):
73
+ filtered_indices = [i for i, label in enumerate(labels) if label_names[label] == selected_label]
74
+ filtered_images = np.array(data["image"])[filtered_indices[:25]]
75
+
76
+ fig, axes = plt.subplots(5, 5, figsize=(10, 10))
77
+ fig.subplots_adjust(hspace=0.5)
78
+
79
+ for i, ax in enumerate(axes.flat):
80
+ if i < len(filtered_images):
81
+ ax.imshow(filtered_images[i])
82
+ ax.set_title(selected_label)
83
+ ax.axis("off")
84
+ else:
85
+ ax.axis("off")
86
+
87
+ st.pyplot(fig)
88
+
89
+ if __name__ == "__main__":
90
+ main()
requirements.txt ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ streamlit
2
+ datasets
3
+ numpy
4
+ matplotlib
5
+ seaborn
6
+ pandas