Spaces:
Sleeping
Sleeping
import warnings | |
import numpy as np | |
import pandas as pd | |
import os | |
import json | |
import random | |
import gradio as gr | |
import torch | |
from sklearn.preprocessing import OneHotEncoder | |
from transformers import AutoModelForSequenceClassification, AutoTokenizer, AutoModelForCausalLM, pipeline | |
from deap import base, creator, tools, algorithms | |
import nltk | |
from nltk.sentiment import SentimentIntensityAnalyzer | |
from nltk.tokenize import word_tokenize | |
from nltk.tag import pos_tag | |
from nltk.chunk import ne_chunk | |
from textblob import TextBlob | |
import matplotlib.pyplot as plt | |
import seaborn as sns | |
import ssl | |
import spacy | |
from spacy import displacy | |
from collections import Counter | |
import en_core_web_sm | |
from gensim import corpora | |
from gensim.models import LdaModel | |
from gensim.utils import simple_preprocess | |
from neuralcoref import NeuralCoref | |
# NLTK data download | |
try: | |
_create_unverified_https_context = ssl._create_unverified_context | |
except AttributeError: | |
pass | |
else: | |
ssl._create_default_https_context = _create_unverified_https_context | |
nltk.download('words', quiet=True) | |
nltk.download('vader_lexicon', quiet=True) | |
nltk.download('punkt', quiet=True) | |
nltk.download('averaged_perceptron_tagger', quiet=True) | |
nltk.download('maxent_ne_chunker', quiet=True) | |
# Set NLTK data path | |
nltk.data.path.append('/home/user/nltk_data') | |
warnings.filterwarnings('ignore', category=FutureWarning, module='huggingface_hub.file_download') | |
# Load spaCy model | |
nlp = en_core_web_sm.load() | |
# Add NeuralCoref to spaCy pipeline | |
coref = NeuralCoref(nlp.vocab) | |
nlp.add_pipe(coref, name='neuralcoref') | |
# Initialize Example Dataset (For Emotion Prediction) | |
data = { | |
'context': [ | |
'I am overjoyed', 'I am deeply saddened', 'I am seething with rage', 'I am exhilarated', 'I am tranquil', | |
'I am brimming with joy', 'I am grieving profoundly', 'I am at peace', 'I am frustrated beyond measure', | |
'I am determined to succeed', 'I feel resentment burning within me', 'I am feeling glorious and triumphant', | |
'I am motivated and inspired', 'I am utterly surprised', 'I am gripped by fear', 'I am trusting and open', | |
'I feel a sense of disgust', 'I am optimistic and hopeful', 'I am pessimistic and gloomy', 'I feel bored and listless', | |
'I am envious and jealous' | |
], | |
'emotion': [ | |
'joy', 'sadness', 'anger', 'joy', 'calmness', 'joy', 'grief', 'calmness', 'anger', | |
'determination', 'resentment', 'glory', 'motivation', 'surprise', 'fear', 'trust', | |
'disgust', 'optimism', 'pessimism', 'boredom', 'envy' | |
] | |
} | |
df = pd.DataFrame(data) | |
# Encoding the contexts using One-Hot Encoding (memory-efficient) | |
try: | |
encoder = OneHotEncoder(handle_unknown='ignore', sparse_output=True) | |
except TypeError: | |
encoder = OneHotEncoder(handle_unknown='ignore', sparse=True) | |
contexts_encoded = encoder.fit_transform(df[['context']]) | |
# Encoding emotions | |
emotions_target = pd.Categorical(df['emotion']).codes | |
emotion_classes = pd.Categorical(df['emotion']).categories | |
# Load pre-trained BERT model for emotion prediction | |
emotion_prediction_model = None | |
emotion_prediction_tokenizer = None | |
# Load pre-trained large language model and tokenizer for response generation | |
response_model = None | |
response_tokenizer = None | |
def load_models(): | |
global emotion_prediction_model, emotion_prediction_tokenizer, response_model, response_tokenizer | |
if emotion_prediction_model is None or response_model is None: | |
emotion_prediction_model = AutoModelForSequenceClassification.from_pretrained("bhadresh-savani/distilbert-base-uncased-emotion") | |
emotion_prediction_tokenizer = AutoTokenizer.from_pretrained("bhadresh-savani/distilbert-base-uncased-emotion") | |
response_model_name = "gpt2-xl" | |
response_tokenizer = AutoTokenizer.from_pretrained(response_model_name) | |
response_model = AutoModelForCausalLM.from_pretrained(response_model_name) | |
response_tokenizer.pad_token = response_tokenizer.eos_token | |
# Enhanced Emotional States | |
emotions = { | |
'joy': {'percentage': 20, 'motivation': 'positive and uplifting', 'intensity': 8}, | |
'sadness': {'percentage': 15, 'motivation': 'reflective and introspective', 'intensity': 6}, | |
'anger': {'percentage': 15, 'motivation': 'passionate and driven', 'intensity': 7}, | |
'fear': {'percentage': 10, 'motivation': 'cautious and protective', 'intensity': 5}, | |
'love': {'percentage': 15, 'motivation': 'affectionate and caring', 'intensity': 7}, | |
'surprise': {'percentage': 10, 'motivation': 'curious and intrigued', 'intensity': 6}, | |
'neutral': {'percentage': 15, 'motivation': 'balanced and composed', 'intensity': 4}, | |
} | |
total_percentage = 100 | |
emotion_history_file = 'emotion_history.json' | |
global conversation_history | |
conversation_history = [] | |
max_history_length = 1000 # Increase the maximum history length | |
def load_historical_data(file_path=emotion_history_file): | |
if os.path.exists(file_path): | |
with open(file_path, 'r') as file: | |
return json.load(file) | |
return [] | |
def save_historical_data(historical_data, file_path=emotion_history_file): | |
with open(file_path, 'w') as file: | |
json.dump(historical_data, file) | |
emotion_history = load_historical_data() | |
def update_emotion(emotion, percentage, intensity): | |
emotions[emotion]['percentage'] += percentage | |
emotions[emotion]['intensity'] = intensity | |
# Normalize percentages | |
total = sum(e['percentage'] for e in emotions.values()) | |
for e in emotions: | |
emotions[e]['percentage'] = (emotions[e]['percentage'] / total) * 100 | |
def normalize_context(context): | |
return context.lower().strip() | |
creator.create("FitnessMulti", base.Fitness, weights=(-1.0, -0.5, -0.2)) | |
creator.create("Individual", list, fitness=creator.FitnessMulti) | |
def evaluate(individual): | |
emotion_values = individual[:len(emotions)] | |
intensities = individual[len(emotions):] | |
total_diff = abs(100 - sum(emotion_values)) | |
intensity_range = max(intensities) - min(intensities) | |
emotion_balance = max(emotion_values) - min(emotion_values) | |
return total_diff, intensity_range, emotion_balance | |
def evolve_emotions(): | |
toolbox = base.Toolbox() | |
toolbox.register("attr_float", random.uniform, 0, 100) | |
toolbox.register("attr_intensity", random.uniform, 0, 10) | |
toolbox.register("individual", tools.initCycle, creator.Individual, | |
toolbox.register("individual", tools.initCycle, creator.Individual, | |
(toolbox.attr_float,) * len(emotions) + | |
(toolbox.attr_intensity,) * len(emotions), n=1) | |
toolbox.register("population", tools.initRepeat, list, toolbox.individual) | |
toolbox.register("mate", tools.cxTwoPoint) | |
toolbox.register("mutate", tools.mutGaussian, mu=0, sigma=1, indpb=0.2) | |
toolbox.register("select", tools.selNSGA2) | |
toolbox.register("evaluate", evaluate) | |
population = toolbox.population(n=100) | |
algorithms.eaMuPlusLambda(population, toolbox, mu=50, lambda_=100, cxpb=0.7, mutpb=0.2, ngen=50, | |
stats=None, halloffame=None, verbose=False) | |
best_individual = tools.selBest(population, k=1)[0] | |
emotion_values = best_individual[:len(emotions)] | |
intensities = best_individual[len(emotions):] | |
def predict_emotion(context): | |
load_models() | |
inputs = emotion_prediction_tokenizer(context, return_tensors="pt", truncation=True, max_length=512) | |
with torch.no_grad(): | |
outputs = emotion_prediction_model(**inputs) | |
probabilities = torch.nn.functional.softmax(outputs.logits, dim=-1) | |
predicted_class = torch.argmax(probabilities, dim=-1).item() | |
emotion_labels = ["sadness", "joy", "love", "anger", "fear", "surprise"] | |
return emotion_labels[predicted_class] | |
def sentiment_analysis(text): | |
sia = SentimentIntensityAnalyzer() | |
sentiment_scores = sia.polarity_scores(text) | |
return sentiment_scores | |
def extract_entities(text): | |
doc = nlp(text) | |
# Named Entity Recognition | |
named_entities = [(ent.text, ent.label_) for ent in doc.ents] | |
# Noun Phrases | |
noun_phrases = [chunk.text for chunk in doc.noun_chunks] | |
# Key Phrases (using textrank algorithm) | |
from textacy.extract import keyterms as kt | |
keyterms = kt.textrank(doc, normalize="lemma", topn=5) | |
# Dependency Parsing | |
dependencies = [(token.text, token.dep_, token.head.text) for token in doc] | |
# Part-of-Speech Tagging | |
pos_tags = [(token.text, token.pos_) for token in doc] | |
return { | |
"named_entities": named_entities, | |
"noun_phrases": noun_phrases, | |
"key_phrases": keyterms, | |
"dependencies": dependencies, | |
"pos_tags": pos_tags | |
} | |
def analyze_context(text): | |
doc = nlp(text) | |
# Coreference resolution | |
resolved_text = doc._.coref_resolved | |
# Topic modeling | |
processed_text = simple_preprocess(resolved_text) | |
dictionary = corpora.Dictionary([processed_text]) | |
corpus = [dictionary.doc2bow(processed_text)] | |
lda_model = LdaModel(corpus=corpus, id2word=dictionary, num_topics=3, random_state=42) | |
topics = lda_model.print_topics() | |
return { | |
"resolved_text": resolved_text, | |
"topics": topics | |
} | |
def analyze_text_complexity(text): | |
blob = TextBlob(text) | |
return { | |
'word_count': len(blob.words), | |
'sentence_count': len(blob.sentences), | |
'average_sentence_length': len(blob.words) / len(blob.sentences) if len(blob.sentences) > 0 else 0, | |
'polarity': blob.sentiment.polarity, | |
'subjectivity': blob.sentiment.subjectivity | |
} | |
def get_ai_emotion(input_text): | |
predicted_emotion = predict_emotion(input_text) | |
ai_emotion = predicted_emotion | |
ai_emotion_percentage = emotions[predicted_emotion]['percentage'] | |
ai_emotion_intensity = emotions[predicted_emotion]['intensity'] | |
return ai_emotion, ai_emotion_percentage, ai_emotion_intensity | |
def generate_emotion_visualization(ai_emotion, ai_emotion_percentage, ai_emotion_intensity): | |
emotion_visualization_path = 'emotional_state.png' | |
try: | |
plt.figure(figsize=(8, 6)) | |
emotions_df = pd.DataFrame([(e, d['percentage'], d['intensity']) for e, d in emotions.items()], | |
columns=['emotion', 'percentage', 'intensity']) | |
sns.barplot(x='emotion', y='percentage', data=emotions_df) | |
plt.title(f'Current Emotional State: {ai_emotion.capitalize()} ({ai_emotion_percentage:.2f}%)') | |
plt.xlabel('Emotion') | |
plt.ylabel('Percentage') | |
plt.xticks(rotation=90) | |
plt.savefig(emotion_visualization_path) | |
plt.close() | |
except Exception as e: | |
print(f"Error generating emotion visualization: {e}") | |
emotion_visualization_path = None | |
return emotion_visualization_path | |
def generate_response(ai_emotion, input_text, entities, context_analysis): | |
load_models() | |
prompt = f"As an AI assistant, I am currently feeling {ai_emotion}. My response will reflect this emotional state. " | |
prompt += f"The input text contains the following entities: {entities['named_entities']}. " | |
prompt += f"The main topics are: {context_analysis['topics']}. " | |
prompt += f"Considering this context, here's my response to '{input_text}': " | |
inputs = response_tokenizer(prompt, return_tensors="pt", padding=True, truncation=True, max_length=8192) | |
temperature = 0.7 | |
if ai_emotion == 'anger': | |
temperature = 0.9 | |
elif ai_emotion == 'joy': | |
temperature = 0.5 | |
with torch.no_grad(): | |
response_ids = response_model.generate( | |
inputs.input_ids, | |
attention_mask=inputs.attention_mask, | |
max_length=400, | |
num_return_sequences=1, | |
no_repeat_ngram_size=2, | |
do_sample=True, | |
top_k=50, | |
top_p=0.95, | |
temperature=temperature, | |
pad_token_id=response_tokenizer.eos_token_id | |
) | |
response = response_tokenizer.decode(response_ids[0], skip_special_tokens=True) | |
return response.strip() | |
def interactive_interface(input_text): | |
predicted_emotion = predict_emotion(input_text) | |
sentiment_scores = sentiment_analysis(input_text) | |
text_complexity = analyze_text_complexity(input_text) | |
ai_emotion, ai_emotion_percentage, ai_emotion_intensity = get_ai_emotion(input_text) | |
emotion_visualization = generate_emotion_visualization(ai_emotion, ai_emotion_percentage, ai_emotion_intensity) | |
entities = extract_entities(input_text) | |
context_analysis = analyze_context(input_text) | |
response = generate_response(ai_emotion, input_text, entities, context_analysis) | |
conversation_history.append({'user': input_text, 'response': response}) | |
if len(conversation_history) > max_history_length: | |
conversation_history.pop(0) | |
return { | |
"emotion": predicted_emotion, | |
"sentiment": sentiment_scores, | |
"entities": entities, | |
"context_analysis": context_analysis, | |
"text_complexity": text_complexity, | |
"ai_emotion": ai_emotion, | |
"ai_emotion_percentage": ai_emotion_percentage, | |
"ai_emotion_intensity": ai_emotion_intensity, | |
"emotion_visualization": emotion_visualization, | |
"response": response | |
} | |
# Gradio interface | |
def gradio_interface(input_text): | |
result = interactive_interface(input_text) | |
output = f"Predicted Emotion: {result['emotion']}\n" | |
output += f"Sentiment: {result['sentiment']}\n" | |
output += f"AI Emotion: {result['ai_emotion']} ({result['ai_emotion_percentage']:.2f}%, Intensity: {result['ai_emotion_intensity']:.2f})\n" | |
output += f"Entities: {result['entities']}\n" | |
output += f"Context Analysis: {result['context_analysis']}\n" | |
output += f"Text Complexity: {result['text_complexity']}\n" | |
output += f"AI Response: {result['response']}" | |
return output, result['emotion_visualization'] | |
iface = gr.Interface( | |
fn=gradio_interface, | |
inputs="text", | |
outputs=["text", gr.Image(type="filepath")], | |
title="Enhanced AI Assistant", | |
description="Enter your text to interact with the AI assistant." | |
) | |
if __name__ == "__main__": | |
iface.launch() |