bias-detector / Qbias /datasetcleanup.py
mjwagerman's picture
refreshed repository, added models with lfs
4dfb4e3
'''
import pandas as pd
from sklearn.utils import resample
# Load the cleaned dataset
df = pd.read_csv("cleaned_qbias_dataset.csv")
# Separate majority and minority classes
df_left = df[df.label == 0]
df_center = df[df.label == 1]
df_right = df[df.label == 2]
# Determine target size (matching the majority class: 'left')
target_size = len(df_left)
# Upsample center class to match 'left'
df_center_upsampled = resample(
df_center,
replace=True,
n_samples=target_size,
random_state=42
)
# Combine all classes into one balanced DataFrame
df_balanced = pd.concat([df_left, df_center_upsampled, df_right])
# Shuffle the final dataset
df_balanced = df_balanced.sample(frac=1, random_state=42).reset_index(drop=True)
# Save to new CSV
df_balanced.to_csv("cleaned_qbias_balanced.csv", index=False)
print("Balanced dataset saved as cleaned_qbias_balanced.csv")
print(df_balanced['label'].value_counts())
'''
import pandas as pd
df = pd.read_csv("cleaned_qbias_dataset.csv")
print(df['label'].value_counts())