Sephfox commited on
Commit
aaf362d
·
verified ·
1 Parent(s): 1136b8e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +59 -169
app.py CHANGED
@@ -38,75 +38,23 @@ data = {
38
  }
39
  df = pd.DataFrame(data)
40
 
41
- # Encoding the contexts using One-Hot Encoding (memory-efficient)
42
- encoder = OneHotEncoder(handle_unknown='ignore', sparse=True)
43
- contexts_encoded = encoder.fit_transform(df[['context']])
44
-
45
  # Encoding emotions
46
  emotions_target = pd.Categorical(df['emotion']).codes
47
  emotion_classes = pd.Categorical(df['emotion']).categories
48
 
49
- # Memory-efficient Neural Network with PyTorch
50
- class MemoryEfficientNN(nn.Module):
51
- def __init__(self, input_size, hidden_size, num_classes):
52
- super(MemoryEfficientNN, self).__init__()
53
- self.layers = nn.Sequential(
54
- nn.Embedding(input_size, hidden_size),
55
- nn.ReLU(),
56
- nn.Dropout(0.2),
57
- nn.Linear(hidden_size, hidden_size),
58
- nn.ReLU(),
59
- nn.Dropout(0.2),
60
- nn.Linear(hidden_size, num_classes)
61
- )
62
-
63
- def forward(self, x):
64
- return self.layers(x.long())
65
-
66
- # Memory-efficient dataset
67
- class MemoryEfficientDataset(IterableDataset):
68
- def __init__(self, X, y, batch_size):
69
- self.X = X
70
- self.y = torch.LongTensor(y.unsqueeze(1)) # Convert labels to long tensors and add a new dimension
71
- self.batch_size = batch_size
72
-
73
- def __iter__(self):
74
- for i in range(0, len(self.y), self.batch_size):
75
- X_batch = self.X[i:i+self.batch_size].toarray()
76
- y_batch = self.y[i:i+self.batch_size]
77
- yield torch.FloatTensor(X_batch), y_batch
78
- # Train Memory-Efficient Neural Network
79
- X_train, X_test, y_train, y_test = train_test_split(contexts_encoded, emotions_target, test_size=0.2, random_state=42)
80
- input_size = X_train.shape[1]
81
- hidden_size = 64
82
- num_classes = len(emotion_classes)
83
-
84
- device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
85
- model = MemoryEfficientNN(input_size, hidden_size, num_classes).to(device)
86
- criterion = nn.CrossEntropyLoss()
87
- optimizer = optim.Adam(model.parameters(), lr=0.001)
88
 
89
- train_dataset = MemoryEfficientDataset(X_train, y_train, batch_size=32)
90
- train_loader = DataLoader(train_dataset, batch_size=None, num_workers=4, pin_memory=True)
91
-
92
- num_epochs = 100
93
- for epoch in range(num_epochs):
94
- for batch_X, batch_y in train_loader:
95
- batch_X, batch_y = batch_X.to(device, non_blocking=True), batch_y.to(device, non_blocking=True)
96
- outputs = model(batch_X)
97
- loss = criterion(outputs, batch_y)
98
- optimizer.zero_grad()
99
- loss.backward()
100
- optimizer.step()
101
- gc.collect() # Garbage collection after each epoch
102
-
103
- # Ensemble with Random Forest (memory-efficient)
104
- rf_model = RandomForestClassifier(n_estimators=50, random_state=42, n_jobs=-1)
105
- rf_model.fit(X_train, y_train)
106
-
107
- # Isolation Forest Anomaly Detection Model (memory-efficient)
108
- isolation_forest = IsolationForest(contamination=0.1, random_state=42, n_jobs=-1, max_samples='auto')
109
- isolation_forest.fit(X_train) # Fit the model before using it
110
 
111
  # Enhanced Emotional States
112
  emotions = {
@@ -199,112 +147,54 @@ def evolve_emotions():
199
 
200
  emotions['ideal_state']['percentage'] = ideal_state
201
 
202
- # Lazy loading for the language models
203
- _distilgpt3_tokenizer = None
204
- _distilgpt3_lm_model = None
205
- def get_distilgpt3_model():
206
- global _distilgpt3_tokenizer, _distilgpt3_lm_model
207
- if _distilgpt3_tokenizer is None or _distilgpt3_lm_model is None:
208
- distilgpt3_model_name = 'distilgpt2' # Replace with the fine-tuned DistilGPT-3 model name
209
- _distilgpt3_tokenizer = AutoTokenizer.from_pretrained(distilgpt3_model_name)
210
- _distilgpt3_lm_model = AutoModelForCausalLM.from_pretrained(distilgpt3_model_name, device_map="auto", low_cpu_mem_usage=True)
211
- return _distilgpt3_tokenizer, _distilgpt3_lm_model
212
-
213
- _bloom_tokenizer = None
214
- _bloom_lm_model = None
215
- def get_bloom_model():
216
- global _bloom_tokenizer, _bloom_lm_model
217
- if _bloom_tokenizer is None or _bloom_lm_model is None:
218
- bloom_model_name = 'bigscience/bloom-1b7'
219
- _bloom_tokenizer = AutoTokenizer.from_pretrained(bloom_model_name)
220
- _bloom_lm_model = AutoModelForCausalLM.from_pretrained(bloom_model_name, device_map="auto", low_cpu_mem_usage=True)
221
- return _bloom_tokenizer, _bloom_lm_model
222
-
223
- def generate_text(prompt, max_length=100, model_type='distilgpt3'):
224
- if model_type == 'distilgpt3':
225
- distilgpt3_tokenizer, distilgpt3_lm_model = get_distilgpt3_model()
226
- input_ids = distilgpt3_tokenizer.encode(prompt, return_tensors='pt').to(distilgpt3_lm_model.device)
227
- with torch.no_grad():
228
- output = distilgpt3_lm_model.generate(
229
- input_ids,
230
- max_length=max_length,
231
- num_return_sequences=1,
232
- no_repeat_ngram_size=2,
233
- do_sample=True,
234
- top_k=50,
235
- top_p=0.95,
236
- temperature=0.7
237
- )
238
- generated_text = distilgpt3_tokenizer.decode(output[0], skip_special_tokens=True)
239
- elif model_type == 'bloom':
240
- bloom_tokenizer, bloom_lm_model = get_bloom_model()
241
- input_ids = bloom_tokenizer.encode(prompt, return_tensors='pt').to(bloom_lm_model.device)
242
- with torch.no_grad():
243
- output = bloom_lm_model.generate(
244
- input_ids,
245
- max_length=max_length,
246
- num_return_sequences=1,
247
- no_repeat_ngram_size=2,
248
- do_sample=True,
249
- top_k=50,
250
- top_p=0.95,
251
- temperature=0.7
252
- )
253
- generated_text = bloom_tokenizer.decode(output[0], skip_special_tokens=True)
254
  else:
255
- raise ValueError("Invalid model type. Choose 'distilgpt3' or 'bloom'.")
256
-
 
 
 
 
 
 
 
 
 
 
 
 
 
257
  return generated_text
258
 
259
- model_name = "distilbert-base-uncased-finetuned-sst-2-english"
260
- tokenizer = AutoTokenizer.from_pretrained(model_name)
261
- model = AutoModelForSequenceClassification.from_pretrained(model_name)
262
- sentiment_pipeline = pipeline("sentiment-analysis", model=model, tokenizer=tokenizer)
263
- def get_sentiment(text):
264
- result = sentiment_pipeline(text)[0]
265
- return f"Sentiment: {result['label']}, Score: {result['score']:.4f}"
266
-
267
- def process_input(text):
268
- try:
269
- normalized_text = normalize_context(text)
270
- encoded_text = encoder.transform([[normalized_text]])
271
-
272
- rf_prediction = rf_model.predict(encoded_text)[0]
273
- isolation_score = isolation_forest.decision_function(encoded_text)[0]
274
- nn_output = model(torch.LongTensor(encoded_text.toarray()).to(device, non_blocking=True))
275
- nn_prediction = nn_output.argmax(dim=1).item()
276
-
277
- predicted_emotion = emotion_classes[rf_prediction]
278
- sentiment_score = isolation_score
279
- distilgpt3_generated_text = generate_text(normalized_text, model_type='distilgpt3')
280
- bloom_generated_text = generate_text(normalized_text, model_type='bloom')
281
-
282
- historical_data = load_historical_data()
283
- historical_data.append({
284
- 'context': text,
285
- 'predicted_emotion': predicted_emotion,
286
- 'sentiment_score': sentiment_score,
287
- 'distilgpt3_generated_text': distilgpt3_generated_text,
288
- 'bloom_generated_text': bloom_generated_text
289
- })
290
- save_historical_data(historical_data)
291
-
292
- return predicted_emotion, sentiment_score, distilgpt3_generated_text, bloom_generated_text
293
- except Exception as e:
294
- error_message = f"An error occurred: {str(e)}"
295
- print(error_message) # Logging the error
296
- return error_message, error_message, error_message, error_message
297
-
298
- iface = gr.Interface(
299
- fn=process_input,
300
- inputs="text",
301
- outputs=[
302
- gr.Textbox(label="Emotional Response"),
303
- gr.Textbox(label="Sentiment Response"),
304
- gr.Textbox(label="DistilGPT-3 Generated Text"),
305
- gr.Textbox(label="BLOOM Generated Text")
306
- ],
307
- live=True
308
- )
309
 
310
- iface.launch(share=True)
 
 
 
 
 
 
 
 
 
 
 
 
 
38
  }
39
  df = pd.DataFrame(data)
40
 
 
 
 
 
41
  # Encoding emotions
42
  emotions_target = pd.Categorical(df['emotion']).codes
43
  emotion_classes = pd.Categorical(df['emotion']).categories
44
 
45
+ # Load pre-trained BERT model for emotion prediction
46
+ emotion_prediction_model = AutoModelForSequenceClassification.from_pretrained("bhadresh-savani/distilbert-base-go-emotions-emotion")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
47
 
48
+ # Lazy loading for the fine-tuned language model
49
+ _finetuned_lm_tokenizer = None
50
+ _finetuned_lm_model = None
51
+ def get_finetuned_lm_model():
52
+ global _finetuned_lm_tokenizer, _finetuned_lm_model
53
+ if _finetuned_lm_tokenizer is None or _finetuned_lm_model is None:
54
+ finetuned_lm_model_name = "microsoft/DialoGPT-large" # Replace with your fine-tuned language model name
55
+ _finetuned_lm_tokenizer = AutoTokenizer.from_pretrained(finetuned_lm_model_name)
56
+ _finetuned_lm_model = AutoModelForCausalLM.from_pretrained(finetuned_lm_model_name, device_map="auto", low_cpu_mem_usage=True)
57
+ return _finetuned_lm_tokenizer, _finetuned_lm_model
 
 
 
 
 
 
 
 
 
 
 
58
 
59
  # Enhanced Emotional States
60
  emotions = {
 
147
 
148
  emotions['ideal_state']['percentage'] = ideal_state
149
 
150
+ def predict_emotion(context):
151
+ emotion_prediction_pipeline = pipeline('text-classification', model=emotion_prediction_model, return_all_scores=True)
152
+ predictions = emotion_prediction_pipeline(context)
153
+ emotion_scores = predictions[0]
154
+ emotion_pred = max(emotion_scores, key=emotion_scores.get)
155
+ return emotion_pred
156
+
157
+ def generate_text(prompt, max_length=100, emotion=None):
158
+ finetuned_lm_tokenizer, finetuned_lm_model = get_finetuned_lm_model()
159
+ input_ids = finetuned_lm_tokenizer.encode(prompt, return_tensors='pt').to(finetuned_lm_model.device)
160
+
161
+ if emotion is not None:
162
+ emotion_intensity = emotions[emotion]['intensity']
163
+ top_p = 0.95 - (emotion_intensity / 10) # Adjust top_p based on emotion intensity
164
+ temperature = 0.7 + (emotion_intensity / 5) # Adjust temperature based on emotion intensity
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
165
  else:
166
+ top_p = 0.95
167
+ temperature = 0.7
168
+
169
+ with torch.no_grad():
170
+ output = finetuned_lm_model.generate(
171
+ input_ids,
172
+ max_length=max_length,
173
+ num_return_sequences=1,
174
+ no_repeat_ngram_size=2,
175
+ do_sample=True,
176
+ top_k=50,
177
+ top_p=top_p,
178
+ temperature=temperature
179
+ )
180
+ generated_text = finetuned_lm_tokenizer.decode(output[0], skip_special_tokens=True)
181
  return generated_text
182
 
183
+ def generate_response(context, emotion=None):
184
+ prompt = context
185
+ generated_text = generate_text(prompt, emotion=emotion)
186
+ return generated_text
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
187
 
188
+ with gr.Blocks() as demo:
189
+ gr.Markdown("# Emotion-Aware Language Model")
190
+
191
+ context_input = gr.Textbox(label="Enter a context")
192
+ predict_btn = gr.Button("Predict Emotion and Generate Text")
193
+
194
+ with gr.Row():
195
+ emotion_output = gr.Textbox(label="Predicted Emotion")
196
+ generated_text_output = gr.Textbox(label="Generated Text")
197
+
198
+ predict_btn.click(fn=lambda context: (predict_emotion(context), generate_response(context, emotion=predict_emotion(context))), inputs=context_input, outputs=[emotion_output, generated_text_output])
199
+
200
+ demo.launch()