|
|
|
''' |
|
import pandas as pd |
|
from sklearn.utils import resample |
|
|
|
# Load the cleaned dataset |
|
df = pd.read_csv("cleaned_qbias_dataset.csv") |
|
|
|
# Separate majority and minority classes |
|
df_left = df[df.label == 0] |
|
df_center = df[df.label == 1] |
|
df_right = df[df.label == 2] |
|
|
|
# Determine target size (matching the majority class: 'left') |
|
target_size = len(df_left) |
|
|
|
# Upsample center class to match 'left' |
|
df_center_upsampled = resample( |
|
df_center, |
|
replace=True, |
|
n_samples=target_size, |
|
random_state=42 |
|
) |
|
|
|
# Combine all classes into one balanced DataFrame |
|
df_balanced = pd.concat([df_left, df_center_upsampled, df_right]) |
|
|
|
# Shuffle the final dataset |
|
df_balanced = df_balanced.sample(frac=1, random_state=42).reset_index(drop=True) |
|
|
|
# Save to new CSV |
|
df_balanced.to_csv("cleaned_qbias_balanced.csv", index=False) |
|
|
|
print("Balanced dataset saved as cleaned_qbias_balanced.csv") |
|
print(df_balanced['label'].value_counts()) |
|
|
|
''' |
|
|
|
import pandas as pd |
|
df = pd.read_csv("cleaned_qbias_dataset.csv") |
|
print(df['label'].value_counts()) |