3v324v23 commited on
Commit
1b4d479
·
1 Parent(s): 466431f

Added Homepage

Browse files
Files changed (2) hide show
  1. app.py → HomePage.py +0 -0
  2. pages/Dataset.py +111 -41
app.py → HomePage.py RENAMED
File without changes
pages/Dataset.py CHANGED
@@ -2,61 +2,131 @@ import streamlit as st
2
  import pandas as pd
3
  import os
4
  from PIL import Image
 
 
5
 
6
  st.set_page_config(layout="wide")
7
- st.title("📂 Dataset Information")
8
 
9
- # Introduction
10
- st.markdown("""
11
- ### 🧾 Dataset Overview
12
 
13
- ## Dataset Descrption
 
 
 
 
 
14
 
15
- ### DDR dataset contains 13,673 fundus images from 147 hospitals, covering 23 provinces in China. The images are classified into 5 classes according to DR severity: none, mild, moderate, severe, and proliferative DR. There is a sixth category which indicates the images with poor quality. The dataset presented here does not include the images with poor quality (sixth category) and all images have been preprocessed to delete the black background. https://www.kaggle.com/datasets/mariaherrerot/ddrdataset
16
 
17
- - **No_DR**
18
- - **Mild**
19
- - **Moderate**
20
- - **Severe**
21
- - **Proliferative_DR**
22
- """)
23
 
24
- # Dataset preparation explanation
25
- st.markdown("""
26
- ### 🧪 Data Preparation & Splitting
 
 
27
 
28
- The original dataset was preprocessed and resized to **224x224 pixels**. It was then split into three sets:
 
29
 
30
- - **Training Set**: Used to train the model.
31
- - **Validation Set** *(optional)*: Used to fine-tune hyperparameters.
32
- - **Testing Set**: Used for final model evaluation.
33
 
34
- We used an 80-20 stratified split:
35
- - **80%** of the data was used for training.
36
- - **20%** was reserved for testing, ensuring each class was proportionally represented.
37
 
38
- A CSV file (`test_labels.csv`) was created for the test set, containing the filenames and their corresponding class labels.
39
- """)
 
 
 
 
 
 
 
40
 
41
- # Visualizing the test dataset
42
- st.markdown("### 📸 Sample Images from Test Dataset")
43
 
44
- csv_path = "D:/DR_Classification/splits/test_labels.csv"
45
- img_dir = "D:/DR_Classification/splits/test"
 
 
 
 
 
 
 
46
 
47
- try:
48
- df = pd.read_csv(csv_path)
49
- class_names = df.iloc[:, 1].unique()
 
50
 
51
- for class_name in class_names:
52
- st.subheader(f"🔍 Class: {class_name}")
53
- class_samples = df[df.iloc[:, 1] == class_name].head(3)
54
- cols = st.columns(len(class_samples))
55
 
56
- for i, row in enumerate(class_samples.itertuples()):
57
- img_path = os.path.join(img_dir, row[1])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
58
  if os.path.exists(img_path):
59
- image = Image.open(img_path).convert('RGB')
60
- cols[i].image(image, caption=row[1], use_column_width=True)
61
- except Exception as e:
62
- st.error(f"Error loading dataset: {e}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2
  import pandas as pd
3
  import os
4
  from PIL import Image
5
+ import matplotlib.pyplot as plt
6
+ import seaborn as sns
7
 
8
  st.set_page_config(layout="wide")
9
+ st.title("🩺 Diabetic Retinopathy Project")
10
 
11
+ # Tabs
12
+ tab1, tab2, tab3 = st.tabs(["📂 Dataset Info", "📊 Training Visualization", "🤖 Algorithm Used"])
 
13
 
14
+ # =============================
15
+ # Tab 1: Dataset Information
16
+ # =============================
17
+ with tab1:
18
+ st.markdown("""
19
+ ### 🧾 Dataset Overview
20
 
21
+ **Dataset Description:**
22
 
23
+ The DDR dataset contains **13,673 fundus images** from **147 hospitals** across **23 provinces in China**. The images are labeled into 5 classes based on DR severity:
 
 
 
 
 
24
 
25
+ - **No_DR**
26
+ - **Mild**
27
+ - **Moderate**
28
+ - **Severe**
29
+ - **Proliferative_DR**
30
 
31
+ Poor-quality images were removed, and black backgrounds were deleted.
32
+ [📎 Dataset source](https://www.kaggle.com/datasets/mariaherrerot/ddrdataset)
33
 
34
+ ### 🧪 Data Preparation & Splitting
 
 
35
 
36
+ - All images resized to **224x224**
37
+ - **80% Training**, **20% Testing** (stratified by class)
38
+ """)
39
 
40
+ # =============================
41
+ # Tab 2: Training Visualization
42
+ # =============================
43
+ with tab2:
44
+ st.markdown("### 📊 Training Data Class Distribution")
45
+
46
+ # CSV path and image folder path (adjust as needed)
47
+ CSV_PATH = r"D:\\DR_Classification\\dataset\\DR_grading.csv"
48
+ IMG_FOLDER = r"D:\\DR_Classification\\dataset\\images" # Folder where all images are stored
49
 
50
+ # Load CSV
51
+ df = pd.read_csv(CSV_PATH)
52
 
53
+ # Map the 'diagnosis' column to 'label' if it's numeric (e.g., 0 to 4)
54
+ label_map = {
55
+ 0: "No_DR",
56
+ 1: "Mild",
57
+ 2: "Moderate",
58
+ 3: "Severe",
59
+ 4: "Proliferative_DR"
60
+ }
61
+ df['label'] = df['diagnosis'].map(label_map)
62
 
63
+ # --- Metric 1: Class Distribution ---
64
+ st.subheader("1️⃣ Class Distribution")
65
+ class_counts = df['label'].value_counts().reset_index()
66
+ class_counts.columns = ['Class', 'Count']
67
 
68
+ fig1, ax1 = plt.subplots()
69
+ sns.barplot(data=class_counts, x='Class', y='Count', palette='rocket', ax=ax1)
70
+ ax1.set_title("Class Distribution")
71
+ st.pyplot(fig1)
72
 
73
+ # --- Metric 2: Sample Images Per Class ---
74
+ st.subheader("2️⃣ Sample Images Per Class")
75
+
76
+ cols = st.columns(len(class_counts))
77
+ for i, label in enumerate(class_counts['Class']):
78
+ sample_row = df[df['label'] == label].iloc[0] # Get first image of this class
79
+ img_path = os.path.join(IMG_FOLDER, sample_row['id_code']) # Assuming image filenames are id_code.png
80
+ if os.path.exists(img_path):
81
+ image = Image.open(img_path)
82
+ cols[i].image(image, caption=label, use_container_width=True)
83
+ else:
84
+ cols[i].write(f"Image not found: {sample_row['id_code']}")
85
+
86
+ # --- Metric 3: Image Size Distribution ---
87
+ st.subheader("3️⃣ Image Size Distribution")
88
+
89
+ image_sizes = []
90
+
91
+ # Check a few images per class for speed
92
+ for label in class_counts['Class']:
93
+ sample_paths = df[df['label'] == label]['id_code'][:5] # 5 images per class
94
+ for img_code in sample_paths:
95
+ img_path = os.path.join(IMG_FOLDER, str(img_code)) # Assuming image filenames are id_code.png
96
  if os.path.exists(img_path):
97
+ try:
98
+ with Image.open(img_path) as img:
99
+ image_sizes.append(img.size)
100
+ except Exception as e:
101
+ st.warning(f"Error loading image {img_code}: {e}")
102
+ pass
103
+
104
+ if image_sizes:
105
+ widths, heights = zip(*image_sizes)
106
+ fig2, ax2 = plt.subplots()
107
+ sns.histplot(widths, kde=True, label="Width", color="blue")
108
+ sns.histplot(heights, kde=True, label="Height", color="green")
109
+ ax2.legend()
110
+ ax2.set_title("Image Size Distribution")
111
+ st.pyplot(fig2)
112
+ else:
113
+ st.info("No image size data available. Check your paths.")
114
+
115
+ # =============================
116
+ # Tab 3: Algorithm Used
117
+ # =============================
118
+ with tab3:
119
+ st.markdown("""
120
+ ### 🤖 Model and Algorithm
121
+
122
+ We used **Transfer Learning** with **ResNet50** for DR classification.
123
+
124
+ #### 🏗️ Model Details:
125
+ - Input Image Size: **224x224**
126
+ - Pretrained on **ImageNet**
127
+ - Optimizer: **Adam**
128
+ - Loss Function: **Categorical Crossentropy**
129
+ - Evaluation Metrics: **Accuracy**, **Precision**, **Recall**
130
+
131
+ This architecture is ideal for medical image analysis due to its deep layers and robustness to overfitting.
132
+ """)