Spaces:
Sleeping
Sleeping
import warnings | |
import numpy as np | |
import pandas as pd | |
import os | |
import json | |
import random | |
import gradio as gr | |
import torch | |
import torch.nn as nn | |
import torch.optim as optim | |
from torch.utils.data import DataLoader, IterableDataset | |
from sklearn.ensemble import IsolationForest, RandomForestClassifier | |
from sklearn.model_selection import train_test_split | |
from sklearn.preprocessing import OneHotEncoder | |
from sklearn.neural_network import MLPClassifier | |
from deap import base, creator, tools, algorithms | |
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline, AutoModelForSequenceClassification | |
import gc | |
import multiprocessing as mp | |
from joblib import Parallel, delayed | |
warnings.filterwarnings('ignore', category=FutureWarning, module='huggingface_hub.file_download') | |
# Initialize Example Emotions Dataset | |
data = { | |
'context': [ | |
'I am happy', 'I am sad', 'I am angry', 'I am excited', 'I am calm', | |
'I am feeling joyful', 'I am grieving', 'I am feeling peaceful', 'I am frustrated', | |
'I am determined', 'I feel resentment', 'I am feeling glorious', 'I am motivated', | |
'I am surprised', 'I am fearful', 'I am trusting', 'I feel disgust', 'I am optimistic', | |
'I am pessimistic', 'I feel bored', 'I am envious' | |
], | |
'emotion': [ | |
'joy', 'sadness', 'anger', 'joy', 'calmness', 'joy', 'grief', 'calmness', 'anger', | |
'determination', 'resentment', 'glory', 'motivation', 'surprise', 'fear', 'trust', | |
'disgust', 'optimism', 'pessimism', 'boredom', 'envy' | |
] | |
} | |
df = pd.DataFrame(data) | |
# Encoding the contexts using One-Hot Encoding (memory-efficient) | |
encoder = OneHotEncoder(handle_unknown='ignore', sparse=True) | |
contexts_encoded = encoder.fit_transform(df[['context']]) | |
# Encoding emotions | |
emotions_target = pd.Categorical(df['emotion']).codes | |
emotion_classes = pd.Categorical(df['emotion']).categories | |
# Memory-efficient Neural Network with PyTorch | |
class MemoryEfficientNN(nn.Module): | |
def __init__(self, input_size, hidden_size, num_classes): | |
super(MemoryEfficientNN, self).__init__() | |
self.layers = nn.Sequential( | |
nn.Embedding(input_size, hidden_size), | |
nn.ReLU(), | |
nn.Dropout(0.2), | |
nn.Linear(hidden_size, hidden_size), | |
nn.ReLU(), | |
nn.Dropout(0.2), | |
nn.Linear(hidden_size, num_classes) | |
) | |
def forward(self, x): | |
return self.layers(x.long()) | |
# Memory-efficient dataset | |
class MemoryEfficientDataset(IterableDataset): | |
def __init__(self, X, y, batch_size): | |
self.X = X | |
self.y = torch.LongTensor(y) # Convert labels to long tensors | |
self.batch_size = batch_size | |
def __iter__(self): | |
for i in range(0, len(self.y), self.batch_size): | |
X_batch = self.X[i:i+self.batch_size].toarray() | |
y_batch = self.y[i:i+self.batch_size] | |
yield torch.FloatTensor(X_batch), y_batch | |
# Train Memory-Efficient Neural Network | |
X_train, X_test, y_train, y_test = train_test_split(contexts_encoded, emotions_target, test_size=0.2, random_state=42) | |
input_size = X_train.shape[1] | |
hidden_size = 64 | |
num_classes = len(emotion_classes) | |
device = torch.device("cuda" if torch.cuda.is_available() else "cpu") | |
model = MemoryEfficientNN(input_size, hidden_size, num_classes).to(device) | |
criterion = nn.CrossEntropyLoss() | |
optimizer = optim.Adam(model.parameters(), lr=0.001) | |
train_dataset = MemoryEfficientDataset(X_train, y_train, batch_size=32) | |
train_loader = DataLoader(train_dataset, batch_size=None, num_workers=4, pin_memory=True) | |
num_epochs = 100 | |
for epoch in range(num_epochs): | |
for batch_X, batch_y in train_loader: | |
batch_X, batch_y = batch_X.to(device, non_blocking=True), batch_y.to(device, non_blocking=True) | |
outputs = model(batch_X) | |
loss = criterion(outputs, batch_y) | |
optimizer.zero_grad() | |
loss.backward() | |
optimizer.step() | |
gc.collect() # Garbage collection after each epoch | |
# Ensemble with Random Forest (memory-efficient) | |
rf_model = RandomForestClassifier(n_estimators=50, random_state=42, n_jobs=-1) | |
rf_model.fit(X_train, y_train) | |
# Isolation Forest Anomaly Detection Model (memory-efficient) | |
isolation_forest = IsolationForest(contamination=0.1, random_state=42, n_jobs=-1, max_samples='auto') | |
isolation_forest.fit(X_train) # Fit the model before using it | |
# Enhanced Emotional States | |
emotions = { | |
'joy': {'percentage': 10, 'motivation': 'positive', 'intensity': 0}, | |
'pleasure': {'percentage': 10, 'motivation': 'selfish', 'intensity': 0}, | |
'sadness': {'percentage': 10, 'motivation': 'negative', 'intensity': 0}, | |
'grief': {'percentage': 10, 'motivation': 'negative', 'intensity': 0}, | |
'anger': {'percentage': 10, 'motivation': 'traumatic or strong', 'intensity': 0}, | |
'calmness': {'percentage': 10, 'motivation': 'neutral', 'intensity': 0}, | |
'determination': {'percentage': 10, 'motivation': 'positive', 'intensity': 0}, | |
'resentment': {'percentage': 10, 'motivation': 'negative', 'intensity': 0}, | |
'glory': {'percentage': 10, 'motivation': 'positive', 'intensity': 0}, | |
'motivation': {'percentage': 10, 'motivation': 'positive', 'intensity': 0}, | |
'ideal_state': {'percentage': 100, 'motivation': 'balanced', 'intensity': 0}, | |
'fear': {'percentage': 10, 'motivation': 'defensive', 'intensity': 0}, | |
'surprise': {'percentage': 10, 'motivation': 'unexpected', 'intensity': 0}, | |
'anticipation': {'percentage': 10, 'motivation': 'predictive', 'intensity': 0}, | |
'trust': {'percentage': 10, 'motivation': 'reliable', 'intensity': 0}, | |
'disgust': {'percentage': 10, 'motivation': 'repulsive', 'intensity': 0}, | |
'optimism': {'percentage': 10, 'motivation': 'hopeful', 'intensity': 0}, | |
'pessimism': {'percentage': 10, 'motivation': 'doubtful', 'intensity': 0}, | |
'boredom': {'percentage': 10, 'motivation': 'indifferent', 'intensity': 0}, | |
'envy': {'percentage': 10, 'motivation': 'jealous', 'intensity': 0} | |
} | |
total_percentage = 200 | |
emotion_history_file = 'emotion_history.json' | |
def load_historical_data(file_path=emotion_history_file): | |
if os.path.exists(file_path): | |
with open(file_path, 'r') as file: | |
return json.load(file) | |
return [] | |
def save_historical_data(historical_data, file_path=emotion_history_file): | |
with open(file_path, 'w') as file: | |
json.dump(historical_data, file) | |
emotion_history = load_historical_data() | |
def update_emotion(emotion, percentage, intensity): | |
emotions['ideal_state']['percentage'] -= percentage | |
emotions[emotion]['percentage'] += percentage | |
emotions[emotion]['intensity'] = intensity | |
total_current = sum(e['percentage'] for e in emotions.values()) | |
adjustment = total_percentage - total_current | |
emotions['ideal_state']['percentage'] += adjustment | |
def normalize_context(context): | |
return context.lower().strip() | |
# Memory-efficient genetic algorithm for emotion evolution | |
def evolve_emotions(): | |
def evaluate(individual): | |
ideal_state = individual[-1] | |
other_emotions = individual[:-1] | |
intensities = individual[-21:-1] | |
return (abs(ideal_state - 100), | |
sum(other_emotions), | |
max(intensities) - min(intensities)) | |
creator.create("FitnessMulti", base.Fitness, weights=(-1.0, -1.0, -1.0)) | |
creator.create("Individual", list, fitness=creator.FitnessMulti) | |
toolbox = base.Toolbox() | |
toolbox.register("attr_float", random.uniform, 0, 20) | |
toolbox.register("attr_intensity", random.uniform, 0, 10) | |
toolbox.register("individual", tools.initCycle, creator.Individual, | |
(toolbox.attr_float,) * (len(emotions) - 1) + | |
(toolbox.attr_intensity,) * len(emotions) + | |
(lambda: 100,), n=1) | |
toolbox.register("population", tools.initRepeat, list, toolbox.individual) | |
toolbox.register("mate", tools.cxTwoPoint) | |
toolbox.register("mutate", tools.mutGaussian, mu=0, sigma=1, indpb=0.2) | |
toolbox.register("select", tools.selNSGA2) | |
toolbox.register("evaluate", evaluate) | |
population = toolbox.population(n=100) | |
algorithms.eaMuPlusLambda(population, toolbox, mu=50, lambda_=100, cxpb=0.7, mutpb=0.2, ngen=100, | |
stats=None, halloffame=None, verbose=False) | |
best_individual = tools.selBest(population, k=1)[0] | |
emotion_values = best_individual[:len(emotions)-1] | |
intensities = best_individual[-21:-1] | |
ideal_state = best_individual[-1] | |
for i, emotion in enumerate(emotions): | |
emotions[emotion]['percentage'] = emotion_values[i] | |
emotions[emotion]['intensity'] = intensities[i] | |
emotions['ideal_state']['percentage'] = ideal_state | |
# Lazy loading for the language models | |
_distilgpt3_tokenizer = None | |
_distilgpt3_lm_model = None | |
def get_distilgpt3_model(): | |
global _distilgpt3_tokenizer, _distilgpt3_lm_model | |
if _distilgpt3_tokenizer is None or _distilgpt3_lm_model is None: | |
distilgpt3_model_name = 'distilgpt2' # Replace with the fine-tuned DistilGPT-3 model name | |
_distilgpt3_tokenizer = AutoTokenizer.from_pretrained(distilgpt3_model_name) | |
_distilgpt3_lm_model = AutoModelForCausalLM.from_pretrained(distilgpt3_model_name, device_map="auto", low_cpu_mem_usage=True) | |
return _distilgpt3_tokenizer, _distilgpt3_lm_model | |
_bloom_tokenizer = None | |
_bloom_lm_model = None | |
def get_bloom_model(): | |
global _bloom_tokenizer, _bloom_lm_model | |
if _bloom_tokenizer is None or _bloom_lm_model is None: | |
bloom_model_name = 'bigscience/bloom-1b7' | |
_bloom_tokenizer = AutoTokenizer.from_pretrained(bloom_model_name) | |
_bloom_lm_model = AutoModelForCausalLM.from_pretrained(bloom_model_name, device_map="auto", low_cpu_mem_usage=True) | |
return _bloom_tokenizer, _bloom_lm_model | |
def generate_text(prompt, max_length=100, model_type='distilgpt3'): | |
if model_type == 'distilgpt3': | |
distilgpt3_tokenizer, distilgpt3_lm_model = get_distilgpt3_model() | |
input_ids = distilgpt3_tokenizer.encode(prompt, return_tensors='pt').to(distilgpt3_lm_model.device) | |
with torch.no_grad(): | |
output = distilgpt3_lm_model.generate( | |
input_ids, | |
max_length=max_length, | |
num_return_sequences=1, | |
no_repeat_ngram_size=2, | |
do_sample=True, | |
top_k=50, | |
top_p=0.95, | |
temperature=0.7 | |
) | |
generated_text = distilgpt3_tokenizer.decode(output[0], skip_special_tokens=True) | |
elif model_type == 'bloom': | |
bloom_tokenizer, bloom_lm_model = get_bloom_model() | |
input_ids = bloom_tokenizer.encode(prompt, return_tensors='pt').to(bloom_lm_model.device) | |
with torch.no_grad(): | |
output = bloom_lm_model.generate( | |
input_ids, | |
max_length=max_length, | |
num_return_sequences=1, | |
no_repeat_ngram_size=2, | |
do_sample=True, | |
top_k=50, | |
top_p=0.95, | |
temperature=0.7 | |
) | |
generated_text = bloom_tokenizer.decode(output[0], skip_special_tokens=True) | |
else: | |
raise ValueError("Invalid model type. Choose 'distilgpt3' or 'bloom'.") | |
return generated_text | |
model_name = "distilbert-base-uncased-finetuned-sst-2-english" | |
tokenizer = AutoTokenizer.from_pretrained(model_name) | |
model = AutoModelForSequenceClassification.from_pretrained(model_name) | |
sentiment_pipeline = pipeline("sentiment-analysis", model=model, tokenizer=tokenizer) | |
def get_sentiment(text): | |
result = sentiment_pipeline(text)[0] | |
return f"Sentiment: {result['label']}, Score: {result['score']:.4f}" | |
def process_input(text): | |
try: | |
normalized_text = normalize_context(text) | |
encoded_text = encoder.transform([[normalized_text]]) | |
rf_prediction = rf_model.predict(encoded_text)[0] | |
isolation_score = isolation_forest.decision_function(encoded_text)[0] | |
nn_output = model(torch.LongTensor(encoded_text.toarray()).to(device, non_blocking=True)) | |
nn_prediction = nn_output.argmax(dim=1).item() | |
predicted_emotion = emotion_classes[rf_prediction] | |
sentiment_score = isolation_score | |
distilgpt3_generated_text = generate_text(normalized_text, model_type='distilgpt3') | |
bloom_generated_text = generate_text(normalized_text, model_type='bloom') | |
historical_data = load_historical_data() | |
historical_data.append({ | |
'context': text, | |
'predicted_emotion': predicted_emotion, | |
'sentiment_score': sentiment_score, | |
'distilgpt3_generated_text': distilgpt3_generated_text, | |
'bloom_generated_text': bloom_generated_text | |
}) | |
save_historical_data(historical_data) | |
return predicted_emotion, sentiment_score, distilgpt3_generated_text, bloom_generated_text | |
except Exception as e: | |
error_message = f"An error occurred: {str(e)}" | |
print(error_message) # Logging the error | |
return error_message, error_message, error_message, error_message | |
iface = gr.Interface( | |
fn=process_input, | |
inputs="text", | |
outputs=[ | |
gr.Textbox(label="Emotional Response"), | |
gr.Textbox(label="Sentiment Response"), | |
gr.Textbox(label="DistilGPT-3 Generated Text"), | |
gr.Textbox(label="BLOOM Generated Text") | |
], | |
live=True | |
) | |
iface.launch(share=True) |