Spaces:
Runtime error
Runtime error
File size: 6,904 Bytes
92055e5 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 |
# -*- coding: utf-8 -*-
"""Flux Classification App.ipynb
Automatically generated by Colab.
Original file is located at
https://colab.research.google.com/drive/1ckzOtXUiFW_NqlIandwoH07lnsLGKTLB
"""
import gradio as gr
from sklearn.model_selection import train_test_split
from sklearn.metrics import (
accuracy_score,
f1_score,
confusion_matrix,
ConfusionMatrixDisplay,
)
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
import umap
import pywt
import os
from PIL import Image
import matplotlib.pyplot as plt
import numpy as np
from xgboost import XGBClassifier
from sklearn.model_selection import cross_val_score, KFold
from sklearn.dummy import DummyClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
import plotly.express as px
import pandas as pd
import joblib
from tqdm import tqdm
import lzma
class FluxClassifier:
def __init__(
self,
wavelets=["db4", "db10"],
umap_n_neighbors=16,
umap_n_components=32,
random_state=42,
):
self.wavelets = wavelets
self.umap_n_neighbors = umap_n_neighbors
self.umap_n_components = umap_n_components
self.random_state = random_state
self.reducer = umap.UMAP(
n_neighbors=self.umap_n_neighbors,
n_components=self.umap_n_components,
random_state=self.random_state,
)
self.classifier = KNeighborsClassifier(n_neighbors=7) # Default classifier
def load_images_from_folder(self, folder):
images = []
labels = []
print(f"Loading images from {folder}")
for filename in tqdm(os.listdir(folder)):
if not (
filename.endswith(".jpg")
or filename.endswith(".png")
or filename.endswith("jpeg")
or filename.endswith("webp")
):
continue
img = Image.open(os.path.join(folder, filename))
img = img.resize((512, 512))
if img is not None:
images.append(img)
labels.append(
1 if "AI" in folder else 0
) # Assuming folder names contain "AI" or not
return images, labels
def extract_wavelet_features(self, images):
all_features = []
for img in images:
img_gray = img.convert("L")
img_array = np.array(img_gray)
features = []
for wavelet in self.wavelets:
cA, cD = pywt.dwt(img_array, wavelet)
features.extend(cD.flatten())
all_features.append(features)
return np.array(all_features)
def fit(self, train_folder1, train_folder2):
# Load images and extract features
images1, labels1 = self.load_images_from_folder(train_folder1)
images2, labels2 = self.load_images_from_folder(train_folder2)
min_length = min(len(images1), len(images2))
images1 = images1[:min_length]
images2 = images2[:min_length]
labels1 = labels1[:min_length]
labels2 = labels2[:min_length]
images = images1 + images2
labels = labels1 + labels2
features = self.extract_wavelet_features(images)
# Apply UMAP dimensionality reduction
embeddings = self.reducer.fit_transform(features)
X_train, X_test, y_train, y_test = train_test_split(
embeddings, labels, test_size=0.2, random_state=42
)
# Train the classifier
self.classifier.fit(X_train, y_train)
acc = self.classifier.score(X_test, y_test)
y_pred = self.classifier.predict(X_test)
print(f"Classifier accuracy = {acc}")
f1 = f1_score(y_test, y_pred)
print(f"Classifier F1 = {f1}")
print(classification_report(y_test, y_pred))
def predict(self, images):
# Load images and extract features
features = self.extract_wavelet_features(images)
# Apply UMAP dimensionality reduction
embeddings = self.reducer.transform(features)
# Make predictions
return self.classifier.predict(embeddings)
def predict_proba(self, images):
# Load images and extract features
features = self.extract_wavelet_features(images)
# Apply UMAP dimensionality reduction
embeddings = self.reducer.transform(features)
# Make predictions
return self.classifier.predict_proba(embeddings)
def score(self, test_folder):
# Load images and extract features
images, labels = self.load_images_from_folder(test_folder)
features = self.extract_wavelet_features(images)
# Apply UMAP dimensionality reduction
embeddings = self.reducer.transform(features)
# Evaluate the classifier
return self.classifier.score(embeddings, labels)
def cross_val_score(self, folder1, folder2, n_splits=5):
# Load images and extract features
# Load images and extract features
images1, labels1 = self.load_images_from_folder(folder1)
images2, labels2 = self.load_images_from_folder(folder2)
min_length = min(len(images1), len(images2))
images1 = images1[:min_length]
images2 = images2[:min_length]
labels1 = labels1[:min_length]
labels2 = labels2[:min_length]
images = images1 + images2
labels = labels1 + labels2
features = self.extract_wavelet_features(images)
# Apply UMAP dimensionality reduction
embeddings = self.reducer.fit_transform(features)
# Perform four-fold cross-validation
kfold = KFold(n_splits=n_splits, shuffle=True, random_state=42)
scores = cross_val_score(
self.classifier, embeddings, labels, cv=kfold, scoring="accuracy"
)
# Print the cross-validation scores
print("Cross-validation scores:", scores)
print("Average cross-validation score:", scores.mean())
def save_model(self, filename):
joblib.dump(self, filename, compress=("zlib", 9))
@staticmethod
def load_model(filename):
return joblib.load(filename)
# Load the knn classifier from the file
filename = "flux_classifier.pkl"
classifier = joblib.load(filename)
def classify_image(image):
# apply wavelet function to image
probabilities = list(
classifier.predict_proba([Image.fromarray(image).resize((512, 512))])
)
labels = ["Photo", "FLUX"]
return {f"{labels[i]}": prob for i, prob in enumerate(probabilities[0])}
interface = gr.Interface(
fn=classify_image, inputs=["image"], outputs=gr.Label(num_top_classes=2)
)
interface.launch(share=True)
|