Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -38,75 +38,23 @@ data = {
|
|
38 |
}
|
39 |
df = pd.DataFrame(data)
|
40 |
|
41 |
-
# Encoding the contexts using One-Hot Encoding (memory-efficient)
|
42 |
-
encoder = OneHotEncoder(handle_unknown='ignore', sparse=True)
|
43 |
-
contexts_encoded = encoder.fit_transform(df[['context']])
|
44 |
-
|
45 |
# Encoding emotions
|
46 |
emotions_target = pd.Categorical(df['emotion']).codes
|
47 |
emotion_classes = pd.Categorical(df['emotion']).categories
|
48 |
|
49 |
-
#
|
50 |
-
|
51 |
-
def __init__(self, input_size, hidden_size, num_classes):
|
52 |
-
super(MemoryEfficientNN, self).__init__()
|
53 |
-
self.layers = nn.Sequential(
|
54 |
-
nn.Embedding(input_size, hidden_size),
|
55 |
-
nn.ReLU(),
|
56 |
-
nn.Dropout(0.2),
|
57 |
-
nn.Linear(hidden_size, hidden_size),
|
58 |
-
nn.ReLU(),
|
59 |
-
nn.Dropout(0.2),
|
60 |
-
nn.Linear(hidden_size, num_classes)
|
61 |
-
)
|
62 |
-
|
63 |
-
def forward(self, x):
|
64 |
-
return self.layers(x.long())
|
65 |
-
|
66 |
-
# Memory-efficient dataset
|
67 |
-
class MemoryEfficientDataset(IterableDataset):
|
68 |
-
def __init__(self, X, y, batch_size):
|
69 |
-
self.X = X
|
70 |
-
self.y = torch.LongTensor(y.unsqueeze(1)) # Convert labels to long tensors and add a new dimension
|
71 |
-
self.batch_size = batch_size
|
72 |
-
|
73 |
-
def __iter__(self):
|
74 |
-
for i in range(0, len(self.y), self.batch_size):
|
75 |
-
X_batch = self.X[i:i+self.batch_size].toarray()
|
76 |
-
y_batch = self.y[i:i+self.batch_size]
|
77 |
-
yield torch.FloatTensor(X_batch), y_batch
|
78 |
-
# Train Memory-Efficient Neural Network
|
79 |
-
X_train, X_test, y_train, y_test = train_test_split(contexts_encoded, emotions_target, test_size=0.2, random_state=42)
|
80 |
-
input_size = X_train.shape[1]
|
81 |
-
hidden_size = 64
|
82 |
-
num_classes = len(emotion_classes)
|
83 |
-
|
84 |
-
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
85 |
-
model = MemoryEfficientNN(input_size, hidden_size, num_classes).to(device)
|
86 |
-
criterion = nn.CrossEntropyLoss()
|
87 |
-
optimizer = optim.Adam(model.parameters(), lr=0.001)
|
88 |
|
89 |
-
|
90 |
-
|
91 |
-
|
92 |
-
|
93 |
-
|
94 |
-
|
95 |
-
|
96 |
-
|
97 |
-
|
98 |
-
|
99 |
-
loss.backward()
|
100 |
-
optimizer.step()
|
101 |
-
gc.collect() # Garbage collection after each epoch
|
102 |
-
|
103 |
-
# Ensemble with Random Forest (memory-efficient)
|
104 |
-
rf_model = RandomForestClassifier(n_estimators=50, random_state=42, n_jobs=-1)
|
105 |
-
rf_model.fit(X_train, y_train)
|
106 |
-
|
107 |
-
# Isolation Forest Anomaly Detection Model (memory-efficient)
|
108 |
-
isolation_forest = IsolationForest(contamination=0.1, random_state=42, n_jobs=-1, max_samples='auto')
|
109 |
-
isolation_forest.fit(X_train) # Fit the model before using it
|
110 |
|
111 |
# Enhanced Emotional States
|
112 |
emotions = {
|
@@ -199,112 +147,54 @@ def evolve_emotions():
|
|
199 |
|
200 |
emotions['ideal_state']['percentage'] = ideal_state
|
201 |
|
202 |
-
|
203 |
-
|
204 |
-
|
205 |
-
|
206 |
-
|
207 |
-
|
208 |
-
|
209 |
-
|
210 |
-
|
211 |
-
|
212 |
-
|
213 |
-
|
214 |
-
|
215 |
-
|
216 |
-
|
217 |
-
if _bloom_tokenizer is None or _bloom_lm_model is None:
|
218 |
-
bloom_model_name = 'bigscience/bloom-1b7'
|
219 |
-
_bloom_tokenizer = AutoTokenizer.from_pretrained(bloom_model_name)
|
220 |
-
_bloom_lm_model = AutoModelForCausalLM.from_pretrained(bloom_model_name, device_map="auto", low_cpu_mem_usage=True)
|
221 |
-
return _bloom_tokenizer, _bloom_lm_model
|
222 |
-
|
223 |
-
def generate_text(prompt, max_length=100, model_type='distilgpt3'):
|
224 |
-
if model_type == 'distilgpt3':
|
225 |
-
distilgpt3_tokenizer, distilgpt3_lm_model = get_distilgpt3_model()
|
226 |
-
input_ids = distilgpt3_tokenizer.encode(prompt, return_tensors='pt').to(distilgpt3_lm_model.device)
|
227 |
-
with torch.no_grad():
|
228 |
-
output = distilgpt3_lm_model.generate(
|
229 |
-
input_ids,
|
230 |
-
max_length=max_length,
|
231 |
-
num_return_sequences=1,
|
232 |
-
no_repeat_ngram_size=2,
|
233 |
-
do_sample=True,
|
234 |
-
top_k=50,
|
235 |
-
top_p=0.95,
|
236 |
-
temperature=0.7
|
237 |
-
)
|
238 |
-
generated_text = distilgpt3_tokenizer.decode(output[0], skip_special_tokens=True)
|
239 |
-
elif model_type == 'bloom':
|
240 |
-
bloom_tokenizer, bloom_lm_model = get_bloom_model()
|
241 |
-
input_ids = bloom_tokenizer.encode(prompt, return_tensors='pt').to(bloom_lm_model.device)
|
242 |
-
with torch.no_grad():
|
243 |
-
output = bloom_lm_model.generate(
|
244 |
-
input_ids,
|
245 |
-
max_length=max_length,
|
246 |
-
num_return_sequences=1,
|
247 |
-
no_repeat_ngram_size=2,
|
248 |
-
do_sample=True,
|
249 |
-
top_k=50,
|
250 |
-
top_p=0.95,
|
251 |
-
temperature=0.7
|
252 |
-
)
|
253 |
-
generated_text = bloom_tokenizer.decode(output[0], skip_special_tokens=True)
|
254 |
else:
|
255 |
-
|
256 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
257 |
return generated_text
|
258 |
|
259 |
-
|
260 |
-
|
261 |
-
|
262 |
-
|
263 |
-
def get_sentiment(text):
|
264 |
-
result = sentiment_pipeline(text)[0]
|
265 |
-
return f"Sentiment: {result['label']}, Score: {result['score']:.4f}"
|
266 |
-
|
267 |
-
def process_input(text):
|
268 |
-
try:
|
269 |
-
normalized_text = normalize_context(text)
|
270 |
-
encoded_text = encoder.transform([[normalized_text]])
|
271 |
-
|
272 |
-
rf_prediction = rf_model.predict(encoded_text)[0]
|
273 |
-
isolation_score = isolation_forest.decision_function(encoded_text)[0]
|
274 |
-
nn_output = model(torch.LongTensor(encoded_text.toarray()).to(device, non_blocking=True))
|
275 |
-
nn_prediction = nn_output.argmax(dim=1).item()
|
276 |
-
|
277 |
-
predicted_emotion = emotion_classes[rf_prediction]
|
278 |
-
sentiment_score = isolation_score
|
279 |
-
distilgpt3_generated_text = generate_text(normalized_text, model_type='distilgpt3')
|
280 |
-
bloom_generated_text = generate_text(normalized_text, model_type='bloom')
|
281 |
-
|
282 |
-
historical_data = load_historical_data()
|
283 |
-
historical_data.append({
|
284 |
-
'context': text,
|
285 |
-
'predicted_emotion': predicted_emotion,
|
286 |
-
'sentiment_score': sentiment_score,
|
287 |
-
'distilgpt3_generated_text': distilgpt3_generated_text,
|
288 |
-
'bloom_generated_text': bloom_generated_text
|
289 |
-
})
|
290 |
-
save_historical_data(historical_data)
|
291 |
-
|
292 |
-
return predicted_emotion, sentiment_score, distilgpt3_generated_text, bloom_generated_text
|
293 |
-
except Exception as e:
|
294 |
-
error_message = f"An error occurred: {str(e)}"
|
295 |
-
print(error_message) # Logging the error
|
296 |
-
return error_message, error_message, error_message, error_message
|
297 |
-
|
298 |
-
iface = gr.Interface(
|
299 |
-
fn=process_input,
|
300 |
-
inputs="text",
|
301 |
-
outputs=[
|
302 |
-
gr.Textbox(label="Emotional Response"),
|
303 |
-
gr.Textbox(label="Sentiment Response"),
|
304 |
-
gr.Textbox(label="DistilGPT-3 Generated Text"),
|
305 |
-
gr.Textbox(label="BLOOM Generated Text")
|
306 |
-
],
|
307 |
-
live=True
|
308 |
-
)
|
309 |
|
310 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
38 |
}
|
39 |
df = pd.DataFrame(data)
|
40 |
|
|
|
|
|
|
|
|
|
41 |
# Encoding emotions
|
42 |
emotions_target = pd.Categorical(df['emotion']).codes
|
43 |
emotion_classes = pd.Categorical(df['emotion']).categories
|
44 |
|
45 |
+
# Load pre-trained BERT model for emotion prediction
|
46 |
+
emotion_prediction_model = AutoModelForSequenceClassification.from_pretrained("bhadresh-savani/distilbert-base-go-emotions-emotion")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
47 |
|
48 |
+
# Lazy loading for the fine-tuned language model
|
49 |
+
_finetuned_lm_tokenizer = None
|
50 |
+
_finetuned_lm_model = None
|
51 |
+
def get_finetuned_lm_model():
|
52 |
+
global _finetuned_lm_tokenizer, _finetuned_lm_model
|
53 |
+
if _finetuned_lm_tokenizer is None or _finetuned_lm_model is None:
|
54 |
+
finetuned_lm_model_name = "microsoft/DialoGPT-large" # Replace with your fine-tuned language model name
|
55 |
+
_finetuned_lm_tokenizer = AutoTokenizer.from_pretrained(finetuned_lm_model_name)
|
56 |
+
_finetuned_lm_model = AutoModelForCausalLM.from_pretrained(finetuned_lm_model_name, device_map="auto", low_cpu_mem_usage=True)
|
57 |
+
return _finetuned_lm_tokenizer, _finetuned_lm_model
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
58 |
|
59 |
# Enhanced Emotional States
|
60 |
emotions = {
|
|
|
147 |
|
148 |
emotions['ideal_state']['percentage'] = ideal_state
|
149 |
|
150 |
+
def predict_emotion(context):
|
151 |
+
emotion_prediction_pipeline = pipeline('text-classification', model=emotion_prediction_model, return_all_scores=True)
|
152 |
+
predictions = emotion_prediction_pipeline(context)
|
153 |
+
emotion_scores = predictions[0]
|
154 |
+
emotion_pred = max(emotion_scores, key=emotion_scores.get)
|
155 |
+
return emotion_pred
|
156 |
+
|
157 |
+
def generate_text(prompt, max_length=100, emotion=None):
|
158 |
+
finetuned_lm_tokenizer, finetuned_lm_model = get_finetuned_lm_model()
|
159 |
+
input_ids = finetuned_lm_tokenizer.encode(prompt, return_tensors='pt').to(finetuned_lm_model.device)
|
160 |
+
|
161 |
+
if emotion is not None:
|
162 |
+
emotion_intensity = emotions[emotion]['intensity']
|
163 |
+
top_p = 0.95 - (emotion_intensity / 10) # Adjust top_p based on emotion intensity
|
164 |
+
temperature = 0.7 + (emotion_intensity / 5) # Adjust temperature based on emotion intensity
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
165 |
else:
|
166 |
+
top_p = 0.95
|
167 |
+
temperature = 0.7
|
168 |
+
|
169 |
+
with torch.no_grad():
|
170 |
+
output = finetuned_lm_model.generate(
|
171 |
+
input_ids,
|
172 |
+
max_length=max_length,
|
173 |
+
num_return_sequences=1,
|
174 |
+
no_repeat_ngram_size=2,
|
175 |
+
do_sample=True,
|
176 |
+
top_k=50,
|
177 |
+
top_p=top_p,
|
178 |
+
temperature=temperature
|
179 |
+
)
|
180 |
+
generated_text = finetuned_lm_tokenizer.decode(output[0], skip_special_tokens=True)
|
181 |
return generated_text
|
182 |
|
183 |
+
def generate_response(context, emotion=None):
|
184 |
+
prompt = context
|
185 |
+
generated_text = generate_text(prompt, emotion=emotion)
|
186 |
+
return generated_text
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
187 |
|
188 |
+
with gr.Blocks() as demo:
|
189 |
+
gr.Markdown("# Emotion-Aware Language Model")
|
190 |
+
|
191 |
+
context_input = gr.Textbox(label="Enter a context")
|
192 |
+
predict_btn = gr.Button("Predict Emotion and Generate Text")
|
193 |
+
|
194 |
+
with gr.Row():
|
195 |
+
emotion_output = gr.Textbox(label="Predicted Emotion")
|
196 |
+
generated_text_output = gr.Textbox(label="Generated Text")
|
197 |
+
|
198 |
+
predict_btn.click(fn=lambda context: (predict_emotion(context), generate_response(context, emotion=predict_emotion(context))), inputs=context_input, outputs=[emotion_output, generated_text_output])
|
199 |
+
|
200 |
+
demo.launch()
|