Spaces:
Runtime error
Runtime error
# -*- coding: utf-8 -*- | |
"""Flux Classification App.ipynb | |
Automatically generated by Colab. | |
Original file is located at | |
https://colab.research.google.com/drive/1ckzOtXUiFW_NqlIandwoH07lnsLGKTLB | |
""" | |
import gradio as gr | |
from sklearn.model_selection import train_test_split | |
from sklearn.metrics import ( | |
accuracy_score, | |
f1_score, | |
confusion_matrix, | |
ConfusionMatrixDisplay, | |
) | |
from sklearn.preprocessing import StandardScaler | |
from sklearn.decomposition import PCA | |
import umap | |
import pywt | |
import os | |
from PIL import Image | |
import matplotlib.pyplot as plt | |
import numpy as np | |
from xgboost import XGBClassifier | |
from sklearn.model_selection import cross_val_score, KFold | |
from sklearn.dummy import DummyClassifier | |
from sklearn.ensemble import RandomForestClassifier | |
from sklearn.svm import SVC | |
from sklearn.neighbors import KNeighborsClassifier | |
from sklearn.model_selection import train_test_split | |
from sklearn.metrics import classification_report | |
import plotly.express as px | |
import pandas as pd | |
import joblib | |
from tqdm import tqdm | |
import lzma | |
class FluxClassifier: | |
def __init__( | |
self, | |
wavelets=["db4", "db10"], | |
umap_n_neighbors=16, | |
umap_n_components=32, | |
random_state=42, | |
): | |
self.wavelets = wavelets | |
self.umap_n_neighbors = umap_n_neighbors | |
self.umap_n_components = umap_n_components | |
self.random_state = random_state | |
self.reducer = umap.UMAP( | |
n_neighbors=self.umap_n_neighbors, | |
n_components=self.umap_n_components, | |
random_state=self.random_state, | |
) | |
self.classifier = KNeighborsClassifier(n_neighbors=7) # Default classifier | |
def load_images_from_folder(self, folder): | |
images = [] | |
labels = [] | |
print(f"Loading images from {folder}") | |
for filename in tqdm(os.listdir(folder)): | |
if not ( | |
filename.endswith(".jpg") | |
or filename.endswith(".png") | |
or filename.endswith("jpeg") | |
or filename.endswith("webp") | |
): | |
continue | |
img = Image.open(os.path.join(folder, filename)) | |
img = img.resize((512, 512)) | |
if img is not None: | |
images.append(img) | |
labels.append( | |
1 if "AI" in folder else 0 | |
) # Assuming folder names contain "AI" or not | |
return images, labels | |
def extract_wavelet_features(self, images): | |
all_features = [] | |
for img in images: | |
img_gray = img.convert("L") | |
img_array = np.array(img_gray) | |
features = [] | |
for wavelet in self.wavelets: | |
cA, cD = pywt.dwt(img_array, wavelet) | |
features.extend(cD.flatten()) | |
all_features.append(features) | |
return np.array(all_features) | |
def fit(self, train_folder1, train_folder2): | |
# Load images and extract features | |
images1, labels1 = self.load_images_from_folder(train_folder1) | |
images2, labels2 = self.load_images_from_folder(train_folder2) | |
min_length = min(len(images1), len(images2)) | |
images1 = images1[:min_length] | |
images2 = images2[:min_length] | |
labels1 = labels1[:min_length] | |
labels2 = labels2[:min_length] | |
images = images1 + images2 | |
labels = labels1 + labels2 | |
features = self.extract_wavelet_features(images) | |
# Apply UMAP dimensionality reduction | |
embeddings = self.reducer.fit_transform(features) | |
X_train, X_test, y_train, y_test = train_test_split( | |
embeddings, labels, test_size=0.2, random_state=42 | |
) | |
# Train the classifier | |
self.classifier.fit(X_train, y_train) | |
acc = self.classifier.score(X_test, y_test) | |
y_pred = self.classifier.predict(X_test) | |
print(f"Classifier accuracy = {acc}") | |
f1 = f1_score(y_test, y_pred) | |
print(f"Classifier F1 = {f1}") | |
print(classification_report(y_test, y_pred)) | |
def predict(self, images): | |
# Load images and extract features | |
features = self.extract_wavelet_features(images) | |
# Apply UMAP dimensionality reduction | |
embeddings = self.reducer.transform(features) | |
# Make predictions | |
return self.classifier.predict(embeddings) | |
def predict_proba(self, images): | |
# Load images and extract features | |
features = self.extract_wavelet_features(images) | |
# Apply UMAP dimensionality reduction | |
embeddings = self.reducer.transform(features) | |
# Make predictions | |
return self.classifier.predict_proba(embeddings) | |
def score(self, test_folder): | |
# Load images and extract features | |
images, labels = self.load_images_from_folder(test_folder) | |
features = self.extract_wavelet_features(images) | |
# Apply UMAP dimensionality reduction | |
embeddings = self.reducer.transform(features) | |
# Evaluate the classifier | |
return self.classifier.score(embeddings, labels) | |
def cross_val_score(self, folder1, folder2, n_splits=5): | |
# Load images and extract features | |
# Load images and extract features | |
images1, labels1 = self.load_images_from_folder(folder1) | |
images2, labels2 = self.load_images_from_folder(folder2) | |
min_length = min(len(images1), len(images2)) | |
images1 = images1[:min_length] | |
images2 = images2[:min_length] | |
labels1 = labels1[:min_length] | |
labels2 = labels2[:min_length] | |
images = images1 + images2 | |
labels = labels1 + labels2 | |
features = self.extract_wavelet_features(images) | |
# Apply UMAP dimensionality reduction | |
embeddings = self.reducer.fit_transform(features) | |
# Perform four-fold cross-validation | |
kfold = KFold(n_splits=n_splits, shuffle=True, random_state=42) | |
scores = cross_val_score( | |
self.classifier, embeddings, labels, cv=kfold, scoring="accuracy" | |
) | |
# Print the cross-validation scores | |
print("Cross-validation scores:", scores) | |
print("Average cross-validation score:", scores.mean()) | |
def save_model(self, filename): | |
joblib.dump(self, filename, compress=("zlib", 9)) | |
def load_model(filename): | |
return joblib.load(filename) | |
# Load the knn classifier from the file | |
filename = "flux_classifier.pkl" | |
classifier = joblib.load(filename) | |
def classify_image(image): | |
# apply wavelet function to image | |
probabilities = list( | |
classifier.predict_proba([Image.fromarray(image).resize((512, 512))]) | |
) | |
labels = ["Photo", "FLUX"] | |
return {f"{labels[i]}": prob for i, prob in enumerate(probabilities[0])} | |
interface = gr.Interface( | |
fn=classify_image, inputs=["image"], outputs=gr.Label(num_top_classes=2) | |
) | |
interface.launch(share=True) | |