Spaces:

Sephfox
/

Cain

Running

App Files Files Community

Cain / app.py

Sephfox

Update app.py

faa570b verified 12 months ago

raw

history blame

13.1 kB

	import warnings
	import numpy as np
	import pandas as pd
	import os
	import json
	import random
	import gradio as gr
	import torch
	import torch.nn as nn
	import torch.optim as optim
	from torch.utils.data import DataLoader, IterableDataset
	from sklearn.ensemble import IsolationForest, RandomForestClassifier
	from sklearn.model_selection import train_test_split
	from sklearn.preprocessing import OneHotEncoder
	from sklearn.neural_network import MLPClassifier
	from deap import base, creator, tools, algorithms
	from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline, AutoModelForSequenceClassification
	import gc
	import multiprocessing as mp
	from joblib import Parallel, delayed

	warnings.filterwarnings('ignore', category=FutureWarning, module='huggingface_hub.file_download')

	# Initialize Example Emotions Dataset
	data = {
	'context': [
	'I am happy', 'I am sad', 'I am angry', 'I am excited', 'I am calm',
	'I am feeling joyful', 'I am grieving', 'I am feeling peaceful', 'I am frustrated',
	'I am determined', 'I feel resentment', 'I am feeling glorious', 'I am motivated',
	'I am surprised', 'I am fearful', 'I am trusting', 'I feel disgust', 'I am optimistic',
	'I am pessimistic', 'I feel bored', 'I am envious'
	],
	'emotion': [
	'joy', 'sadness', 'anger', 'joy', 'calmness', 'joy', 'grief', 'calmness', 'anger',
	'determination', 'resentment', 'glory', 'motivation', 'surprise', 'fear', 'trust',
	'disgust', 'optimism', 'pessimism', 'boredom', 'envy'
	]
	}
	df = pd.DataFrame(data)

	# Encoding the contexts using One-Hot Encoding (memory-efficient)
	encoder = OneHotEncoder(handle_unknown='ignore', sparse=True)
	contexts_encoded = encoder.fit_transform(df[['context']])

	# Encoding emotions
	emotions_target = pd.Categorical(df['emotion']).codes
	emotion_classes = pd.Categorical(df['emotion']).categories

	# Memory-efficient Neural Network with PyTorch
	class MemoryEfficientNN(nn.Module):
	def __init__(self, input_size, hidden_size, num_classes):
	super(MemoryEfficientNN, self).__init__()
	self.layers = nn.Sequential(
	nn.Embedding(input_size, hidden_size),
	nn.ReLU(),
	nn.Dropout(0.2),
	nn.Linear(hidden_size, hidden_size),
	nn.ReLU(),
	nn.Dropout(0.2),
	nn.Linear(hidden_size, num_classes)
	)

	def forward(self, x):
	return self.layers(x.long())

	# Memory-efficient dataset
	class MemoryEfficientDataset(IterableDataset):
	def __init__(self, X, y, batch_size):
	self.X = X
	self.y = torch.LongTensor(y) # Convert labels to long tensors
	self.batch_size = batch_size

	def __iter__(self):
	for i in range(0, len(self.y), self.batch_size):
	X_batch = self.X[i:i+self.batch_size].toarray()
	y_batch = self.y[i:i+self.batch_size]
	yield torch.FloatTensor(X_batch), y_batch

	# Train Memory-Efficient Neural Network
	X_train, X_test, y_train, y_test = train_test_split(contexts_encoded, emotions_target, test_size=0.2, random_state=42)
	input_size = X_train.shape[1]
	hidden_size = 64
	num_classes = len(emotion_classes)

	device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
	model = MemoryEfficientNN(input_size, hidden_size, num_classes).to(device)
	criterion = nn.CrossEntropyLoss()
	optimizer = optim.Adam(model.parameters(), lr=0.001)

	train_dataset = MemoryEfficientDataset(X_train, y_train, batch_size=32)
	train_loader = DataLoader(train_dataset, batch_size=None)

	num_epochs = 100
	for epoch in range(num_epochs):
	for batch_X, batch_y in train_loader:
	batch_X, batch_y = batch_X.to(device), batch_y.to(device)
	outputs = model(batch_X)
	loss = criterion(outputs, batch_y)
	optimizer.zero_grad()
	loss.backward()
	optimizer.step()
	gc.collect() # Garbage collection after each epoch

	# Ensemble with Random Forest (memory-efficient)
	rf_model = RandomForestClassifier(n_estimators=50, random_state=42, n_jobs=-1)
	rf_model.fit(X_train, y_train)

	# Isolation Forest Anomaly Detection Model (memory-efficient)
	isolation_forest = IsolationForest(contamination=0.1, random_state=42, n_jobs=-1, max_samples='auto')
	isolation_forest.fit(X_train) # Fit the model before using it

	# Enhanced Emotional States
	emotions = {
	'joy': {'percentage': 10, 'motivation': 'positive', 'intensity': 0},
	'pleasure': {'percentage': 10, 'motivation': 'selfish', 'intensity': 0},
	'sadness': {'percentage': 10, 'motivation': 'negative', 'intensity': 0},
	'grief': {'percentage': 10, 'motivation': 'negative', 'intensity': 0},
	'anger': {'percentage': 10, 'motivation': 'traumatic or strong', 'intensity': 0},
	'calmness': {'percentage': 10, 'motivation': 'neutral', 'intensity': 0},
	'determination': {'percentage': 10, 'motivation': 'positive', 'intensity': 0},
	'resentment': {'percentage': 10, 'motivation': 'negative', 'intensity': 0},
	'glory': {'percentage': 10, 'motivation': 'positive', 'intensity': 0},
	'motivation': {'percentage': 10, 'motivation': 'positive', 'intensity': 0},
	'ideal_state': {'percentage': 100, 'motivation': 'balanced', 'intensity': 0},
	'fear': {'percentage': 10, 'motivation': 'defensive', 'intensity': 0},
	'surprise': {'percentage': 10, 'motivation': 'unexpected', 'intensity': 0},
	'anticipation': {'percentage': 10, 'motivation': 'predictive', 'intensity': 0},
	'trust': {'percentage': 10, 'motivation': 'reliable', 'intensity': 0},
	'disgust': {'percentage': 10, 'motivation': 'repulsive', 'intensity': 0},
	'optimism': {'percentage': 10, 'motivation': 'hopeful', 'intensity': 0},
	'pessimism': {'percentage': 10, 'motivation': 'doubtful', 'intensity': 0},
	'boredom': {'percentage': 10, 'motivation': 'indifferent', 'intensity': 0},
	'envy': {'percentage': 10, 'motivation': 'jealous', 'intensity': 0},
	emotion_history_file = 'emotion_history.json'

	def load_historical_data(file_path=emotion_history_file):
	if os.path.exists(file_path):
	with open(file_path, 'r') as file:
	return json.load(file)
	return []

	def save_historical_data(historical_data, file_path=emotion_history_file):
	with open(file_path, 'w') as file:
	json.dump(historical_data, file)

	emotion_history = load_historical_data()

	def update_emotion(emotion, percentage, intensity):
	emotions['ideal_state']['percentage'] -= percentage
	emotions[emotion]['percentage'] += percentage
	emotions[emotion]['intensity'] = intensity

	total_current = sum(e['percentage'] for e in emotions.values())
	adjustment = total_percentage - total_current
	emotions['ideal_state']['percentage'] += adjustment

	def normalize_context(context):
	return context.lower().strip()

	# Memory-efficient genetic algorithm for emotion evolution
	def evolve_emotions():
	def evaluate(individual):
	ideal_state = individual[-1]
	other_emotions = individual[:-1]
	intensities = individual[-21:-1]
	return (abs(ideal_state - 100),
	sum(other_emotions),
	max(intensities) - min(intensities))

	creator.create("FitnessMulti", base.Fitness, weights=(-1.0, -1.0, -1.0))
	creator.create("Individual", list, fitness=creator.FitnessMulti)

	toolbox = base.Toolbox()
	toolbox.register("attr_float", random.uniform, 0, 20)
	toolbox.register("attr_intensity", random.uniform, 0, 10)
	toolbox.register("individual", tools.initCycle, creator.Individual,
	(toolbox.attr_float,) * (len(emotions) - 1) +
	(toolbox.attr_intensity,) * len(emotions) +
	(lambda: 100,), n=1)
	toolbox.register("population", tools.initRepeat, list, toolbox.individual)
	toolbox.register("mate", tools.cxTwoPoint)
	toolbox.register("mutate", tools.mutGaussian, mu=0, sigma=1, indpb=0.2)
	toolbox.register("select", tools.selNSGA2)
	toolbox.register("evaluate", evaluate)

	population = toolbox.population(n=100)
	algorithms.eaMuPlusLambda(population, toolbox, mu=50, lambda_=100, cxpb=0.7, mutpb=0.2, ngen=100,
	stats=None, halloffame=None, verbose=False)

	best_individual = tools.selBest(population, k=1)[0]
	emotion_values = best_individual[:len(emotions)-1]
	intensities = best_individual[-21:-1]
	ideal_state = best_individual[-1]

	for i, emotion in enumerate(emotions):
	emotions[emotion]['percentage'] = emotion_values[i]
	emotions[emotion]['intensity'] = intensities[i]

	emotions['ideal_state']['percentage'] = ideal_state

	# Lazy loading for the language models
	_bloom_tokenizer = None
	_bloom_lm_model = None
	def get_bloom_model():
	global _bloom_tokenizer, _bloom_lm_model
	if _bloom_tokenizer is None or _bloom_lm_model is None:
	bloom_model_name = 'bigscience/bloom-1b7'
	_bloom_tokenizer = AutoTokenizer.from_pretrained(bloom_model_name)
	_bloom_lm_model = AutoModelForCausalLM.from_pretrained(bloom_model_name, device_map="auto", low_cpu_mem_usage=True)
	return _bloom_tokenizer, _bloom_lm_model

	_gpt_tokenizer = None
	_gpt_lm_model = None
	def get_gpt_model():
	global _gpt_tokenizer, _gpt_lm_model
	if _gpt_tokenizer is None or _gpt_lm_model is None:
	gpt_model_name = 'gpt2-medium'
	_gpt_tokenizer = AutoTokenizer.from_pretrained(gpt_model_name)
	_gpt_lm_model = AutoModelForCausalLM.from_pretrained(gpt_model_name, device_map="auto", low_cpu_mem_usage=True)
	return _gpt_tokenizer, _gpt_lm_model

	def generate_text(prompt, max_length=100, model_type='bloom'):
	if model_type == 'bloom':
	bloom_tokenizer, bloom_lm_model = get_bloom_model()
	input_ids = bloom_tokenizer.encode(prompt, return_tensors='pt').to(bloom_lm_model.device)
	with torch.no_grad():
	output = bloom_lm_model.generate(
	input_ids,
	max_length=max_length,
	num_return_sequences=1,
	no_repeat_ngram_size=2,
	do_sample=True,
	top_k=50,
	top_p=0.95,
	temperature=0.7
	)
	generated_text = bloom_tokenizer.decode(output[0], skip_special_tokens=True)
	elif model_type == 'gpt':
	gpt_tokenizer, gpt_lm_model = get_gpt_model()
	input_ids = gpt_tokenizer.encode(prompt, return_tensors='pt').to(gpt_lm_model.device)
	with torch.no_grad():
	output = gpt_lm_model.generate(
	input_ids,
	max_length=max_length,
	num_return_sequences=1,
	no_repeat_ngram_size=2,
	do_sample=True,
	top_k=50,
	top_p=0.95,
	temperature=0.7
	)
	generated_text = gpt_tokenizer.decode(output[0], skip_special_tokens=True)
	else:
	raise ValueError("Invalid model type. Choose 'bloom' or 'gpt'.")

	return generated_text

	model_name = "distilbert-base-uncased-finetuned-sst-2-english"
	tokenizer = AutoTokenizer.from_pretrained(model_name)
	model = AutoModelForSequenceClassification.from_pretrained(model_name)
	sentiment_pipeline = pipeline("sentiment-analysis", model=model, tokenizer=tokenizer)
	def get_sentiment(text):
	result = sentiment_pipeline(text)[0]
	return f"Sentiment: {result['label']}, Score: {result['score']:.4f}"

	def process_input(text):
	try:
	normalized_text = normalize_context(text)
	encoded_text = encoder.transform([[normalized_text]])

	rf_prediction = rf_model.predict(encoded_text)[0]
	isolation_score = isolation_forest.decision_function(encoded_text)[0]
	nn_output = model(torch.LongTensor(encoded_text.toarray()).to(device))
	nn_prediction = nn_output.argmax(dim=1).item()

	predicted_emotion = emotion_classes[rf_prediction]
	sentiment_score = isolation_score
	bloom_generated_text = generate_text(normalized_text, model_type='bloom')
	gpt_generated_text = generate_text(normalized_text, model_type='gpt')

	historical_data = load_historical_data()
	historical_data.append({
	'context': text,
	'predicted_emotion': predicted_emotion,
	'sentiment_score': sentiment_score,
	'bloom_generated_text': bloom_generated_text,
	'gpt_generated_text': gpt_generated_text
	})
	save_historical_data(historical_data)

	return predicted_emotion, sentiment_score, bloom_generated_text, gpt_generated_text

	except Exception as e:
	error_message = f"An error occurred: {str(e)}"
	print(error_message) # Logging the error
	return error_message, error_message, error_message, error_message

	iface = gr.Interface(
	fn=process_input,
	inputs="text",
	outputs=[
	gr.Textbox(label="Emotional Response"),
	gr.Textbox(label="Sentiment Response"),
	gr.Textbox(label="BLOOM Generated Text"),
	gr.Textbox(label="GPT Generated Text")
	],
	live=True
	)

	iface.launch(share=True)