Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
@@ -170,9 +170,9 @@ def classify_emotion(text, classifier):
|
|
170 |
return "LABEL_2"
|
171 |
|
172 |
def get_embedding_for_text(text, tokenizer, model):
|
173 |
-
"""Get embedding for complete text."""
|
174 |
chunks = split_text(text)
|
175 |
chunk_embeddings = []
|
|
|
176 |
|
177 |
for chunk in chunks:
|
178 |
try:
|
@@ -189,18 +189,15 @@ def get_embedding_for_text(text, tokenizer, model):
|
|
189 |
outputs = model(**inputs)
|
190 |
|
191 |
embedding = outputs.last_hidden_state[:, 0, :].cpu().numpy()
|
192 |
-
chunk_embeddings.append(embedding
|
193 |
except Exception as e:
|
194 |
-
st.warning(f"Error processing chunk: {str(e)}")
|
195 |
continue
|
196 |
|
197 |
if chunk_embeddings:
|
198 |
-
#
|
199 |
-
|
200 |
-
|
201 |
-
|
202 |
-
return chunk_embeddings
|
203 |
-
return np.zeros((1, model.config.hidden_size))
|
204 |
|
205 |
def format_topics(topic_model, topic_counts):
|
206 |
"""Format topics for display."""
|
@@ -237,6 +234,7 @@ def format_emotions(emotion_counts):
|
|
237 |
|
238 |
def process_and_summarize(df, bert_tokenizer, bert_model, emotion_classifier, top_n=50, topic_strategy="Auto", n_topics=None, min_topic_size=3):
|
239 |
summaries = []
|
|
|
240 |
|
241 |
topic_model_params = {
|
242 |
"language": "arabic",
|
@@ -253,7 +251,7 @@ def process_and_summarize(df, bert_tokenizer, bert_model, emotion_classifier, to
|
|
253 |
topic_model_params["nr_topics"] = "auto"
|
254 |
|
255 |
topic_model = BERTopic(
|
256 |
-
embedding_model=None,
|
257 |
**topic_model_params
|
258 |
)
|
259 |
|
@@ -270,26 +268,29 @@ def process_and_summarize(df, bert_tokenizer, bert_model, emotion_classifier, to
|
|
270 |
|
271 |
texts = [clean_arabic_text(poem) for poem in group['poem'].dropna()]
|
272 |
all_emotions = []
|
|
|
273 |
|
274 |
# Generate embeddings
|
275 |
-
embeddings = []
|
276 |
for i, text in enumerate(texts):
|
277 |
try:
|
278 |
embedding = get_embedding_for_text(text, bert_tokenizer, bert_model)
|
279 |
-
if embedding is not None and
|
280 |
embeddings.append(embedding)
|
281 |
-
|
282 |
progress = (i + 1) / len(texts) * 0.4
|
283 |
progress_bar.progress(progress, text=f"Generated embeddings for {i+1}/{len(texts)} poems...")
|
284 |
except Exception as e:
|
285 |
st.warning(f"Error processing poem {i+1} in {country}: {str(e)}")
|
286 |
continue
|
287 |
|
288 |
-
# Convert
|
289 |
-
|
290 |
-
|
|
|
|
|
|
|
|
|
291 |
# Process emotions
|
292 |
-
for i, text in enumerate(texts):
|
293 |
try:
|
294 |
emotion = classify_emotion(text, emotion_classifier)
|
295 |
all_emotions.append(emotion)
|
@@ -305,8 +306,7 @@ def process_and_summarize(df, bert_tokenizer, bert_model, emotion_classifier, to
|
|
305 |
continue
|
306 |
|
307 |
# Ensure texts and embeddings match
|
308 |
-
|
309 |
-
texts = texts[:len(embeddings)]
|
310 |
|
311 |
# Fit and transform the topic model
|
312 |
topics, probs = topic_model.fit_transform(texts, embeddings)
|
@@ -329,6 +329,7 @@ def process_and_summarize(df, bert_tokenizer, bert_model, emotion_classifier, to
|
|
329 |
|
330 |
return summaries, topic_model
|
331 |
|
|
|
332 |
try:
|
333 |
bert_tokenizer, bert_model, emotion_classifier = load_models()
|
334 |
st.success("Models loaded successfully!")
|
|
|
170 |
return "LABEL_2"
|
171 |
|
172 |
def get_embedding_for_text(text, tokenizer, model):
|
|
|
173 |
chunks = split_text(text)
|
174 |
chunk_embeddings = []
|
175 |
+
embedding_size = model.config.hidden_size
|
176 |
|
177 |
for chunk in chunks:
|
178 |
try:
|
|
|
189 |
outputs = model(**inputs)
|
190 |
|
191 |
embedding = outputs.last_hidden_state[:, 0, :].cpu().numpy()
|
192 |
+
chunk_embeddings.append(embedding.reshape(-1))
|
193 |
except Exception as e:
|
|
|
194 |
continue
|
195 |
|
196 |
if chunk_embeddings:
|
197 |
+
# Ensure consistent shape
|
198 |
+
final_embedding = np.mean(chunk_embeddings, axis=0)
|
199 |
+
return final_embedding.reshape(-1)
|
200 |
+
return np.zeros(embedding_size)
|
|
|
|
|
201 |
|
202 |
def format_topics(topic_model, topic_counts):
|
203 |
"""Format topics for display."""
|
|
|
234 |
|
235 |
def process_and_summarize(df, bert_tokenizer, bert_model, emotion_classifier, top_n=50, topic_strategy="Auto", n_topics=None, min_topic_size=3):
|
236 |
summaries = []
|
237 |
+
embedding_size = bert_model.config.hidden_size
|
238 |
|
239 |
topic_model_params = {
|
240 |
"language": "arabic",
|
|
|
251 |
topic_model_params["nr_topics"] = "auto"
|
252 |
|
253 |
topic_model = BERTopic(
|
254 |
+
embedding_model=None,
|
255 |
**topic_model_params
|
256 |
)
|
257 |
|
|
|
268 |
|
269 |
texts = [clean_arabic_text(poem) for poem in group['poem'].dropna()]
|
270 |
all_emotions = []
|
271 |
+
embeddings = []
|
272 |
|
273 |
# Generate embeddings
|
|
|
274 |
for i, text in enumerate(texts):
|
275 |
try:
|
276 |
embedding = get_embedding_for_text(text, bert_tokenizer, bert_model)
|
277 |
+
if embedding is not None and embedding.shape[0] == embedding_size:
|
278 |
embeddings.append(embedding)
|
|
|
279 |
progress = (i + 1) / len(texts) * 0.4
|
280 |
progress_bar.progress(progress, text=f"Generated embeddings for {i+1}/{len(texts)} poems...")
|
281 |
except Exception as e:
|
282 |
st.warning(f"Error processing poem {i+1} in {country}: {str(e)}")
|
283 |
continue
|
284 |
|
285 |
+
# Convert to numpy array and ensure 2D shape
|
286 |
+
if embeddings:
|
287 |
+
embeddings = np.vstack(embeddings)
|
288 |
+
else:
|
289 |
+
st.warning(f"No valid embeddings generated for {country}")
|
290 |
+
continue
|
291 |
+
|
292 |
# Process emotions
|
293 |
+
for i, text in enumerate(texts[:len(embeddings)]):
|
294 |
try:
|
295 |
emotion = classify_emotion(text, emotion_classifier)
|
296 |
all_emotions.append(emotion)
|
|
|
306 |
continue
|
307 |
|
308 |
# Ensure texts and embeddings match
|
309 |
+
texts = texts[:len(embeddings)]
|
|
|
310 |
|
311 |
# Fit and transform the topic model
|
312 |
topics, probs = topic_model.fit_transform(texts, embeddings)
|
|
|
329 |
|
330 |
return summaries, topic_model
|
331 |
|
332 |
+
|
333 |
try:
|
334 |
bert_tokenizer, bert_model, emotion_classifier = load_models()
|
335 |
st.success("Models loaded successfully!")
|