Cain / app.py
Sephfox's picture
Update app.py
c4d75ea verified
raw
history blame
14.2 kB
import warnings
import numpy as np
import pandas as pd
import os
import json
import random
import gradio as gr
import torch
from sklearn.preprocessing import OneHotEncoder
from transformers import AutoModelForSequenceClassification, AutoTokenizer, AutoModelForCausalLM, pipeline
from deap import base, creator, tools, algorithms
import nltk
from nltk.sentiment import SentimentIntensityAnalyzer
from nltk.tokenize import word_tokenize
from nltk.tag import pos_tag
from nltk.chunk import ne_chunk
from textblob import TextBlob
import matplotlib.pyplot as plt
import seaborn as sns
import ssl
import spacy
from spacy import displacy
from collections import Counter
import en_core_web_sm
from gensim import corpora
from gensim.models import LdaModel
from gensim.utils import simple_preprocess
from neuralcoref import NeuralCoref
# NLTK data download
try:
_create_unverified_https_context = ssl._create_unverified_context
except AttributeError:
pass
else:
ssl._create_default_https_context = _create_unverified_https_context
nltk.download('words', quiet=True)
nltk.download('vader_lexicon', quiet=True)
nltk.download('punkt', quiet=True)
nltk.download('averaged_perceptron_tagger', quiet=True)
nltk.download('maxent_ne_chunker', quiet=True)
# Set NLTK data path
nltk.data.path.append('/home/user/nltk_data')
warnings.filterwarnings('ignore', category=FutureWarning, module='huggingface_hub.file_download')
# Load spaCy model
nlp = en_core_web_sm.load()
# Add NeuralCoref to spaCy pipeline
coref = NeuralCoref(nlp.vocab)
nlp.add_pipe(coref, name='neuralcoref')
# Initialize Example Dataset (For Emotion Prediction)
data = {
'context': [
'I am overjoyed', 'I am deeply saddened', 'I am seething with rage', 'I am exhilarated', 'I am tranquil',
'I am brimming with joy', 'I am grieving profoundly', 'I am at peace', 'I am frustrated beyond measure',
'I am determined to succeed', 'I feel resentment burning within me', 'I am feeling glorious and triumphant',
'I am motivated and inspired', 'I am utterly surprised', 'I am gripped by fear', 'I am trusting and open',
'I feel a sense of disgust', 'I am optimistic and hopeful', 'I am pessimistic and gloomy', 'I feel bored and listless',
'I am envious and jealous'
],
'emotion': [
'joy', 'sadness', 'anger', 'joy', 'calmness', 'joy', 'grief', 'calmness', 'anger',
'determination', 'resentment', 'glory', 'motivation', 'surprise', 'fear', 'trust',
'disgust', 'optimism', 'pessimism', 'boredom', 'envy'
]
}
df = pd.DataFrame(data)
# Encoding the contexts using One-Hot Encoding (memory-efficient)
try:
encoder = OneHotEncoder(handle_unknown='ignore', sparse_output=True)
except TypeError:
encoder = OneHotEncoder(handle_unknown='ignore', sparse=True)
contexts_encoded = encoder.fit_transform(df[['context']])
# Encoding emotions
emotions_target = pd.Categorical(df['emotion']).codes
emotion_classes = pd.Categorical(df['emotion']).categories
# Load pre-trained BERT model for emotion prediction
emotion_prediction_model = None
emotion_prediction_tokenizer = None
# Load pre-trained large language model and tokenizer for response generation
response_model = None
response_tokenizer = None
def load_models():
global emotion_prediction_model, emotion_prediction_tokenizer, response_model, response_tokenizer
if emotion_prediction_model is None or response_model is None:
emotion_prediction_model = AutoModelForSequenceClassification.from_pretrained("bhadresh-savani/distilbert-base-uncased-emotion")
emotion_prediction_tokenizer = AutoTokenizer.from_pretrained("bhadresh-savani/distilbert-base-uncased-emotion")
response_model_name = "gpt2-xl"
response_tokenizer = AutoTokenizer.from_pretrained(response_model_name)
response_model = AutoModelForCausalLM.from_pretrained(response_model_name)
response_tokenizer.pad_token = response_tokenizer.eos_token
# Enhanced Emotional States
emotions = {
'joy': {'percentage': 20, 'motivation': 'positive and uplifting', 'intensity': 8},
'sadness': {'percentage': 15, 'motivation': 'reflective and introspective', 'intensity': 6},
'anger': {'percentage': 15, 'motivation': 'passionate and driven', 'intensity': 7},
'fear': {'percentage': 10, 'motivation': 'cautious and protective', 'intensity': 5},
'love': {'percentage': 15, 'motivation': 'affectionate and caring', 'intensity': 7},
'surprise': {'percentage': 10, 'motivation': 'curious and intrigued', 'intensity': 6},
'neutral': {'percentage': 15, 'motivation': 'balanced and composed', 'intensity': 4},
}
total_percentage = 100
emotion_history_file = 'emotion_history.json'
global conversation_history
conversation_history = []
max_history_length = 1000 # Increase the maximum history length
def load_historical_data(file_path=emotion_history_file):
if os.path.exists(file_path):
with open(file_path, 'r') as file:
return json.load(file)
return []
def save_historical_data(historical_data, file_path=emotion_history_file):
with open(file_path, 'w') as file:
json.dump(historical_data, file)
emotion_history = load_historical_data()
def update_emotion(emotion, percentage, intensity):
emotions[emotion]['percentage'] += percentage
emotions[emotion]['intensity'] = intensity
# Normalize percentages
total = sum(e['percentage'] for e in emotions.values())
for e in emotions:
emotions[e]['percentage'] = (emotions[e]['percentage'] / total) * 100
def normalize_context(context):
return context.lower().strip()
creator.create("FitnessMulti", base.Fitness, weights=(-1.0, -0.5, -0.2))
creator.create("Individual", list, fitness=creator.FitnessMulti)
def evaluate(individual):
emotion_values = individual[:len(emotions)]
intensities = individual[len(emotions):]
total_diff = abs(100 - sum(emotion_values))
intensity_range = max(intensities) - min(intensities)
emotion_balance = max(emotion_values) - min(emotion_values)
return total_diff, intensity_range, emotion_balance
def evolve_emotions():
toolbox = base.Toolbox()
toolbox.register("attr_float", random.uniform, 0, 100)
toolbox.register("attr_intensity", random.uniform, 0, 10)
toolbox.register("individual", tools.initCycle, creator.Individual,
toolbox.register("individual", tools.initCycle, creator.Individual,
(toolbox.attr_float,) * len(emotions) +
(toolbox.attr_intensity,) * len(emotions), n=1)
toolbox.register("population", tools.initRepeat, list, toolbox.individual)
toolbox.register("mate", tools.cxTwoPoint)
toolbox.register("mutate", tools.mutGaussian, mu=0, sigma=1, indpb=0.2)
toolbox.register("select", tools.selNSGA2)
toolbox.register("evaluate", evaluate)
population = toolbox.population(n=100)
algorithms.eaMuPlusLambda(population, toolbox, mu=50, lambda_=100, cxpb=0.7, mutpb=0.2, ngen=50,
stats=None, halloffame=None, verbose=False)
best_individual = tools.selBest(population, k=1)[0]
emotion_values = best_individual[:len(emotions)]
intensities = best_individual[len(emotions):]
def predict_emotion(context):
load_models()
inputs = emotion_prediction_tokenizer(context, return_tensors="pt", truncation=True, max_length=512)
with torch.no_grad():
outputs = emotion_prediction_model(**inputs)
probabilities = torch.nn.functional.softmax(outputs.logits, dim=-1)
predicted_class = torch.argmax(probabilities, dim=-1).item()
emotion_labels = ["sadness", "joy", "love", "anger", "fear", "surprise"]
return emotion_labels[predicted_class]
def sentiment_analysis(text):
sia = SentimentIntensityAnalyzer()
sentiment_scores = sia.polarity_scores(text)
return sentiment_scores
def extract_entities(text):
doc = nlp(text)
# Named Entity Recognition
named_entities = [(ent.text, ent.label_) for ent in doc.ents]
# Noun Phrases
noun_phrases = [chunk.text for chunk in doc.noun_chunks]
# Key Phrases (using textrank algorithm)
from textacy.extract import keyterms as kt
keyterms = kt.textrank(doc, normalize="lemma", topn=5)
# Dependency Parsing
dependencies = [(token.text, token.dep_, token.head.text) for token in doc]
# Part-of-Speech Tagging
pos_tags = [(token.text, token.pos_) for token in doc]
return {
"named_entities": named_entities,
"noun_phrases": noun_phrases,
"key_phrases": keyterms,
"dependencies": dependencies,
"pos_tags": pos_tags
}
def analyze_context(text):
doc = nlp(text)
# Coreference resolution
resolved_text = doc._.coref_resolved
# Topic modeling
processed_text = simple_preprocess(resolved_text)
dictionary = corpora.Dictionary([processed_text])
corpus = [dictionary.doc2bow(processed_text)]
lda_model = LdaModel(corpus=corpus, id2word=dictionary, num_topics=3, random_state=42)
topics = lda_model.print_topics()
return {
"resolved_text": resolved_text,
"topics": topics
}
def analyze_text_complexity(text):
blob = TextBlob(text)
return {
'word_count': len(blob.words),
'sentence_count': len(blob.sentences),
'average_sentence_length': len(blob.words) / len(blob.sentences) if len(blob.sentences) > 0 else 0,
'polarity': blob.sentiment.polarity,
'subjectivity': blob.sentiment.subjectivity
}
def get_ai_emotion(input_text):
predicted_emotion = predict_emotion(input_text)
ai_emotion = predicted_emotion
ai_emotion_percentage = emotions[predicted_emotion]['percentage']
ai_emotion_intensity = emotions[predicted_emotion]['intensity']
return ai_emotion, ai_emotion_percentage, ai_emotion_intensity
def generate_emotion_visualization(ai_emotion, ai_emotion_percentage, ai_emotion_intensity):
emotion_visualization_path = 'emotional_state.png'
try:
plt.figure(figsize=(8, 6))
emotions_df = pd.DataFrame([(e, d['percentage'], d['intensity']) for e, d in emotions.items()],
columns=['emotion', 'percentage', 'intensity'])
sns.barplot(x='emotion', y='percentage', data=emotions_df)
plt.title(f'Current Emotional State: {ai_emotion.capitalize()} ({ai_emotion_percentage:.2f}%)')
plt.xlabel('Emotion')
plt.ylabel('Percentage')
plt.xticks(rotation=90)
plt.savefig(emotion_visualization_path)
plt.close()
except Exception as e:
print(f"Error generating emotion visualization: {e}")
emotion_visualization_path = None
return emotion_visualization_path
def generate_response(ai_emotion, input_text, entities, context_analysis):
load_models()
prompt = f"As an AI assistant, I am currently feeling {ai_emotion}. My response will reflect this emotional state. "
prompt += f"The input text contains the following entities: {entities['named_entities']}. "
prompt += f"The main topics are: {context_analysis['topics']}. "
prompt += f"Considering this context, here's my response to '{input_text}': "
inputs = response_tokenizer(prompt, return_tensors="pt", padding=True, truncation=True, max_length=8192)
temperature = 0.7
if ai_emotion == 'anger':
temperature = 0.9
elif ai_emotion == 'joy':
temperature = 0.5
with torch.no_grad():
response_ids = response_model.generate(
inputs.input_ids,
attention_mask=inputs.attention_mask,
max_length=400,
num_return_sequences=1,
no_repeat_ngram_size=2,
do_sample=True,
top_k=50,
top_p=0.95,
temperature=temperature,
pad_token_id=response_tokenizer.eos_token_id
)
response = response_tokenizer.decode(response_ids[0], skip_special_tokens=True)
return response.strip()
def interactive_interface(input_text):
predicted_emotion = predict_emotion(input_text)
sentiment_scores = sentiment_analysis(input_text)
text_complexity = analyze_text_complexity(input_text)
ai_emotion, ai_emotion_percentage, ai_emotion_intensity = get_ai_emotion(input_text)
emotion_visualization = generate_emotion_visualization(ai_emotion, ai_emotion_percentage, ai_emotion_intensity)
entities = extract_entities(input_text)
context_analysis = analyze_context(input_text)
response = generate_response(ai_emotion, input_text, entities, context_analysis)
conversation_history.append({'user': input_text, 'response': response})
if len(conversation_history) > max_history_length:
conversation_history.pop(0)
return {
"emotion": predicted_emotion,
"sentiment": sentiment_scores,
"entities": entities,
"context_analysis": context_analysis,
"text_complexity": text_complexity,
"ai_emotion": ai_emotion,
"ai_emotion_percentage": ai_emotion_percentage,
"ai_emotion_intensity": ai_emotion_intensity,
"emotion_visualization": emotion_visualization,
"response": response
}
# Gradio interface
def gradio_interface(input_text):
result = interactive_interface(input_text)
output = f"Predicted Emotion: {result['emotion']}\n"
output += f"Sentiment: {result['sentiment']}\n"
output += f"AI Emotion: {result['ai_emotion']} ({result['ai_emotion_percentage']:.2f}%, Intensity: {result['ai_emotion_intensity']:.2f})\n"
output += f"Entities: {result['entities']}\n"
output += f"Context Analysis: {result['context_analysis']}\n"
output += f"Text Complexity: {result['text_complexity']}\n"
output += f"AI Response: {result['response']}"
return output, result['emotion_visualization']
iface = gr.Interface(
fn=gradio_interface,
inputs="text",
outputs=["text", gr.Image(type="filepath")],
title="Enhanced AI Assistant",
description="Enter your text to interact with the AI assistant."
)
if __name__ == "__main__":
iface.launch()