Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -14,9 +14,10 @@ from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
|
|
14 |
import torch
|
15 |
import torch.nn as nn
|
16 |
import torch.optim as optim
|
17 |
-
from torch.utils.data import DataLoader, TensorDataset
|
18 |
import multiprocessing as mp
|
19 |
from joblib import Parallel, delayed
|
|
|
20 |
|
21 |
warnings.filterwarnings('ignore', category=FutureWarning, module='huggingface_hub.file_download')
|
22 |
|
@@ -37,61 +38,75 @@ data = {
|
|
37 |
}
|
38 |
df = pd.DataFrame(data)
|
39 |
|
40 |
-
# Encoding the contexts using One-Hot Encoding
|
41 |
-
encoder = OneHotEncoder(handle_unknown='ignore', sparse=
|
42 |
contexts_encoded = encoder.fit_transform(df[['context']])
|
43 |
|
44 |
# Encoding emotions
|
45 |
-
emotions_target = df['emotion']
|
46 |
-
emotion_classes = df['emotion']
|
47 |
|
48 |
-
#
|
49 |
-
class
|
50 |
def __init__(self, input_size, hidden_size, num_classes):
|
51 |
-
super(
|
52 |
-
self.
|
53 |
-
|
54 |
-
|
55 |
-
|
56 |
-
|
|
|
|
|
|
|
|
|
57 |
|
58 |
def forward(self, x):
|
59 |
-
|
60 |
-
|
61 |
-
|
62 |
-
|
63 |
-
|
64 |
-
|
65 |
-
|
66 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
67 |
X_train, X_test, y_train, y_test = train_test_split(contexts_encoded, emotions_target, test_size=0.2, random_state=42)
|
68 |
input_size = X_train.shape[1]
|
69 |
hidden_size = 64
|
70 |
num_classes = len(emotion_classes)
|
71 |
|
72 |
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
73 |
-
model =
|
74 |
criterion = nn.CrossEntropyLoss()
|
75 |
optimizer = optim.Adam(model.parameters(), lr=0.001)
|
76 |
|
77 |
-
train_dataset =
|
78 |
-
train_loader = DataLoader(train_dataset, batch_size=
|
79 |
|
80 |
num_epochs = 100
|
81 |
for epoch in range(num_epochs):
|
82 |
for batch_X, batch_y in train_loader:
|
|
|
83 |
outputs = model(batch_X)
|
84 |
loss = criterion(outputs, batch_y)
|
85 |
optimizer.zero_grad()
|
86 |
loss.backward()
|
87 |
optimizer.step()
|
|
|
88 |
|
89 |
-
# Ensemble with Random Forest
|
90 |
-
rf_model = RandomForestClassifier(n_estimators=
|
91 |
rf_model.fit(X_train, y_train)
|
92 |
|
93 |
-
# Isolation Forest Anomaly Detection Model
|
94 |
-
isolation_forest = IsolationForest(contamination=0.1, random_state=42, n_jobs=-1)
|
95 |
isolation_forest.fit(X_train)
|
96 |
|
97 |
# Enhanced Emotional States
|
@@ -149,16 +164,16 @@ def update_emotion(emotion, percentage, intensity):
|
|
149 |
def normalize_context(context):
|
150 |
return context.lower().strip()
|
151 |
|
152 |
-
#
|
153 |
-
def parallel_evaluate(individual):
|
154 |
-
ideal_state = individual[-1]
|
155 |
-
other_emotions = individual[:-1]
|
156 |
-
intensities = individual[-21:-1]
|
157 |
-
return (abs(ideal_state - 100),
|
158 |
-
sum(other_emotions),
|
159 |
-
max(intensities) - min(intensities))
|
160 |
-
|
161 |
def evolve_emotions():
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
162 |
creator.create("FitnessMulti", base.Fitness, weights=(-1.0, -1.0, -1.0))
|
163 |
creator.create("Individual", list, fitness=creator.FitnessMulti)
|
164 |
|
@@ -172,20 +187,20 @@ def evolve_emotions():
|
|
172 |
n=1)
|
173 |
toolbox.register("population", tools.initRepeat, list, toolbox.individual)
|
174 |
|
175 |
-
|
176 |
-
toolbox.register("map", pool.map)
|
177 |
-
toolbox.register("evaluate", parallel_evaluate)
|
178 |
toolbox.register("mate", tools.cxSimulatedBinaryBounded, low=0, up=120, eta=20.0)
|
179 |
toolbox.register("mutate", tools.mutPolynomialBounded, low=0, up=120, eta=20.0, indpb=0.1)
|
180 |
toolbox.register("select", tools.selNSGA2)
|
181 |
|
182 |
-
population = toolbox.population(n=
|
183 |
-
|
184 |
-
algorithms.eaMuPlusLambda(population, toolbox, mu=100, lambda_=100,
|
185 |
-
cxpb=0.7, mutpb=0.3, ngen=50, verbose=False)
|
186 |
-
|
187 |
-
pool.close()
|
188 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
189 |
best_individual = tools.selBest(population, k=1)[0]
|
190 |
|
191 |
for idx, emotion in enumerate(emotions.keys()):
|
@@ -198,10 +213,10 @@ def evolve_emotions():
|
|
198 |
# Initialize the pre-trained language model (BLOOM-1b7)
|
199 |
model_name = 'bigscience/bloom-1b7'
|
200 |
tokenizer = AutoTokenizer.from_pretrained(model_name)
|
201 |
-
lm_model = AutoModelForCausalLM.from_pretrained(model_name)
|
202 |
|
203 |
-
def generate_text(prompt, max_length=
|
204 |
-
input_ids = tokenizer.encode(prompt, return_tensors='pt').to(device)
|
205 |
with torch.no_grad():
|
206 |
output = lm_model.generate(
|
207 |
input_ids,
|
@@ -216,7 +231,7 @@ def generate_text(prompt, max_length=150):
|
|
216 |
generated_text = tokenizer.decode(output[0], skip_special_tokens=True)
|
217 |
return generated_text
|
218 |
|
219 |
-
sentiment_pipeline = pipeline("sentiment-analysis", model=model_name, tokenizer=tokenizer,
|
220 |
|
221 |
def get_sentiment(text):
|
222 |
result = sentiment_pipeline(text)[0]
|
@@ -228,7 +243,7 @@ def get_emotional_response(context):
|
|
228 |
|
229 |
# Neural network prediction
|
230 |
with torch.no_grad():
|
231 |
-
nn_output = model(torch.FloatTensor(context_encoded).to(device))
|
232 |
nn_prediction = nn_output.argmax(1).item()
|
233 |
|
234 |
# Random Forest prediction
|
@@ -239,7 +254,7 @@ def get_emotional_response(context):
|
|
239 |
predicted_emotion = emotion_classes[int(round(ensemble_prediction))]
|
240 |
|
241 |
# Anomaly detection
|
242 |
-
anomaly_score = isolation_forest.decision_function(context_encoded)
|
243 |
is_anomaly = anomaly_score < 0
|
244 |
|
245 |
# Calculate emotion intensity based on model confidence
|
|
|
14 |
import torch
|
15 |
import torch.nn as nn
|
16 |
import torch.optim as optim
|
17 |
+
from torch.utils.data import DataLoader, TensorDataset, IterableDataset
|
18 |
import multiprocessing as mp
|
19 |
from joblib import Parallel, delayed
|
20 |
+
import gc
|
21 |
|
22 |
warnings.filterwarnings('ignore', category=FutureWarning, module='huggingface_hub.file_download')
|
23 |
|
|
|
38 |
}
|
39 |
df = pd.DataFrame(data)
|
40 |
|
41 |
+
# Encoding the contexts using One-Hot Encoding (memory-efficient)
|
42 |
+
encoder = OneHotEncoder(handle_unknown='ignore', sparse=True)
|
43 |
contexts_encoded = encoder.fit_transform(df[['context']])
|
44 |
|
45 |
# Encoding emotions
|
46 |
+
emotions_target = pd.Categorical(df['emotion']).codes
|
47 |
+
emotion_classes = pd.Categorical(df['emotion']).categories
|
48 |
|
49 |
+
# Memory-efficient Neural Network with PyTorch
|
50 |
+
class MemoryEfficientNN(nn.Module):
|
51 |
def __init__(self, input_size, hidden_size, num_classes):
|
52 |
+
super(MemoryEfficientNN, self).__init__()
|
53 |
+
self.layers = nn.Sequential(
|
54 |
+
nn.Linear(input_size, hidden_size),
|
55 |
+
nn.ReLU(),
|
56 |
+
nn.Dropout(0.2),
|
57 |
+
nn.Linear(hidden_size, hidden_size),
|
58 |
+
nn.ReLU(),
|
59 |
+
nn.Dropout(0.2),
|
60 |
+
nn.Linear(hidden_size, num_classes)
|
61 |
+
)
|
62 |
|
63 |
def forward(self, x):
|
64 |
+
return self.layers(x)
|
65 |
+
|
66 |
+
# Memory-efficient dataset
|
67 |
+
class MemoryEfficientDataset(IterableDataset):
|
68 |
+
def __init__(self, X, y, batch_size):
|
69 |
+
self.X = X
|
70 |
+
self.y = y
|
71 |
+
self.batch_size = batch_size
|
72 |
+
|
73 |
+
def __iter__(self):
|
74 |
+
for i in range(0, len(self.y), self.batch_size):
|
75 |
+
X_batch = self.X[i:i+self.batch_size].toarray()
|
76 |
+
y_batch = self.y[i:i+self.batch_size]
|
77 |
+
yield torch.FloatTensor(X_batch), torch.LongTensor(y_batch)
|
78 |
+
|
79 |
+
# Train Memory-Efficient Neural Network
|
80 |
X_train, X_test, y_train, y_test = train_test_split(contexts_encoded, emotions_target, test_size=0.2, random_state=42)
|
81 |
input_size = X_train.shape[1]
|
82 |
hidden_size = 64
|
83 |
num_classes = len(emotion_classes)
|
84 |
|
85 |
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
86 |
+
model = MemoryEfficientNN(input_size, hidden_size, num_classes).to(device)
|
87 |
criterion = nn.CrossEntropyLoss()
|
88 |
optimizer = optim.Adam(model.parameters(), lr=0.001)
|
89 |
|
90 |
+
train_dataset = MemoryEfficientDataset(X_train, y_train, batch_size=32)
|
91 |
+
train_loader = DataLoader(train_dataset, batch_size=None)
|
92 |
|
93 |
num_epochs = 100
|
94 |
for epoch in range(num_epochs):
|
95 |
for batch_X, batch_y in train_loader:
|
96 |
+
batch_X, batch_y = batch_X.to(device), batch_y.to(device)
|
97 |
outputs = model(batch_X)
|
98 |
loss = criterion(outputs, batch_y)
|
99 |
optimizer.zero_grad()
|
100 |
loss.backward()
|
101 |
optimizer.step()
|
102 |
+
gc.collect() # Garbage collection after each epoch
|
103 |
|
104 |
+
# Ensemble with Random Forest (memory-efficient)
|
105 |
+
rf_model = RandomForestClassifier(n_estimators=50, random_state=42, n_jobs=-1)
|
106 |
rf_model.fit(X_train, y_train)
|
107 |
|
108 |
+
# Isolation Forest Anomaly Detection Model (memory-efficient)
|
109 |
+
isolation_forest = IsolationForest(contamination=0.1, random_state=42, n_jobs=-1, max_samples=100)
|
110 |
isolation_forest.fit(X_train)
|
111 |
|
112 |
# Enhanced Emotional States
|
|
|
164 |
def normalize_context(context):
|
165 |
return context.lower().strip()
|
166 |
|
167 |
+
# Memory-efficient genetic algorithm for emotion evolution
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
168 |
def evolve_emotions():
|
169 |
+
def evaluate(individual):
|
170 |
+
ideal_state = individual[-1]
|
171 |
+
other_emotions = individual[:-1]
|
172 |
+
intensities = individual[-21:-1]
|
173 |
+
return (abs(ideal_state - 100),
|
174 |
+
sum(other_emotions),
|
175 |
+
max(intensities) - min(intensities))
|
176 |
+
|
177 |
creator.create("FitnessMulti", base.Fitness, weights=(-1.0, -1.0, -1.0))
|
178 |
creator.create("Individual", list, fitness=creator.FitnessMulti)
|
179 |
|
|
|
187 |
n=1)
|
188 |
toolbox.register("population", tools.initRepeat, list, toolbox.individual)
|
189 |
|
190 |
+
toolbox.register("evaluate", evaluate)
|
|
|
|
|
191 |
toolbox.register("mate", tools.cxSimulatedBinaryBounded, low=0, up=120, eta=20.0)
|
192 |
toolbox.register("mutate", tools.mutPolynomialBounded, low=0, up=120, eta=20.0, indpb=0.1)
|
193 |
toolbox.register("select", tools.selNSGA2)
|
194 |
|
195 |
+
population = toolbox.population(n=50)
|
|
|
|
|
|
|
|
|
|
|
196 |
|
197 |
+
for gen in range(25):
|
198 |
+
offspring = algorithms.varAnd(population, toolbox, cxpb=0.7, mutpb=0.3)
|
199 |
+
fits = toolbox.map(toolbox.evaluate, offspring)
|
200 |
+
for fit, ind in zip(fits, offspring):
|
201 |
+
ind.fitness.values = fit
|
202 |
+
population = toolbox.select(offspring + population, k=len(population))
|
203 |
+
|
204 |
best_individual = tools.selBest(population, k=1)[0]
|
205 |
|
206 |
for idx, emotion in enumerate(emotions.keys()):
|
|
|
213 |
# Initialize the pre-trained language model (BLOOM-1b7)
|
214 |
model_name = 'bigscience/bloom-1b7'
|
215 |
tokenizer = AutoTokenizer.from_pretrained(model_name)
|
216 |
+
lm_model = AutoModelForCausalLM.from_pretrained(model_name, device_map="auto", low_cpu_mem_usage=True)
|
217 |
|
218 |
+
def generate_text(prompt, max_length=100):
|
219 |
+
input_ids = tokenizer.encode(prompt, return_tensors='pt').to(lm_model.device)
|
220 |
with torch.no_grad():
|
221 |
output = lm_model.generate(
|
222 |
input_ids,
|
|
|
231 |
generated_text = tokenizer.decode(output[0], skip_special_tokens=True)
|
232 |
return generated_text
|
233 |
|
234 |
+
sentiment_pipeline = pipeline("sentiment-analysis", model=model_name, tokenizer=tokenizer, device_map="auto")
|
235 |
|
236 |
def get_sentiment(text):
|
237 |
result = sentiment_pipeline(text)[0]
|
|
|
243 |
|
244 |
# Neural network prediction
|
245 |
with torch.no_grad():
|
246 |
+
nn_output = model(torch.FloatTensor(context_encoded.toarray()).to(device))
|
247 |
nn_prediction = nn_output.argmax(1).item()
|
248 |
|
249 |
# Random Forest prediction
|
|
|
254 |
predicted_emotion = emotion_classes[int(round(ensemble_prediction))]
|
255 |
|
256 |
# Anomaly detection
|
257 |
+
anomaly_score = isolation_forest.decision_function(context_encoded.toarray())
|
258 |
is_anomaly = anomaly_score < 0
|
259 |
|
260 |
# Calculate emotion intensity based on model confidence
|