Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -229,12 +229,13 @@ def extract_problem_domains(df,
|
|
229 |
text_column='Problem_Description',
|
230 |
cluster_range=(10, 50),
|
231 |
top_words=17,
|
232 |
-
# method='sentence_transformers'
|
233 |
-
method='tfidf_kmeans'
|
|
|
234 |
):
|
235 |
|
236 |
|
237 |
-
|
238 |
|
239 |
if method == 'sentence_transformers':
|
240 |
# Sentence Transformers approach
|
@@ -294,6 +295,12 @@ def extract_problem_domains(df,
|
|
294 |
for i in range(optimal_n_clusters):
|
295 |
center = kmeans.cluster_centers_[i]
|
296 |
|
|
|
|
|
|
|
|
|
|
|
|
|
297 |
|
298 |
|
299 |
# top_word_indices = center.argsort()[-top_words:][::-1]
|
@@ -306,6 +313,7 @@ def extract_problem_domains(df,
|
|
306 |
df["Problem_Cluster"] = cluster_labels
|
307 |
df['Problem_Category_Words'] = [cluster_representations[label] for label in cluster_labels]
|
308 |
|
|
|
309 |
return df, optimal_n_clusters
|
310 |
|
311 |
|
@@ -338,7 +346,7 @@ def nlp_pipeline(original_df, console_messages):
|
|
338 |
|
339 |
# Domain Clustering
|
340 |
try:
|
341 |
-
domain_df, optimal_n_clusters = extract_problem_domains(processed_df)
|
342 |
# print(f"Optimal clusters: {optimal_clusters}")
|
343 |
# print(result_df.head())
|
344 |
# console_messages.append(f"Optimal clusters: {optimal_n_clusters}")
|
|
|
229 |
text_column='Problem_Description',
|
230 |
cluster_range=(10, 50),
|
231 |
top_words=17,
|
232 |
+
# method='sentence_transformers',
|
233 |
+
method='tfidf_kmeans',
|
234 |
+
console_messages
|
235 |
):
|
236 |
|
237 |
|
238 |
+
console_messages.append("Extracting Problem Domains...")
|
239 |
|
240 |
if method == 'sentence_transformers':
|
241 |
# Sentence Transformers approach
|
|
|
295 |
for i in range(optimal_n_clusters):
|
296 |
center = kmeans.cluster_centers_[i]
|
297 |
|
298 |
+
# print(f"top_words: {top_words}, type: {type(top_words)}")
|
299 |
+
# print(f"center.argsort(): {center.argsort()}, type: {type(center.argsort())}")
|
300 |
+
|
301 |
+
console_messages.append(f"top_words: {top_words}, type: {type(top_words)}",
|
302 |
+
f"center.argsort(): {center.argsort()}, type: {type(center.argsort())}"
|
303 |
+
)
|
304 |
|
305 |
|
306 |
# top_word_indices = center.argsort()[-top_words:][::-1]
|
|
|
313 |
df["Problem_Cluster"] = cluster_labels
|
314 |
df['Problem_Category_Words'] = [cluster_representations[label] for label in cluster_labels]
|
315 |
|
316 |
+
console_messages.append("Returning from Problem Domain Extraction function.")
|
317 |
return df, optimal_n_clusters
|
318 |
|
319 |
|
|
|
346 |
|
347 |
# Domain Clustering
|
348 |
try:
|
349 |
+
domain_df, optimal_n_clusters = extract_problem_domains(processed_df, console_messages)
|
350 |
# print(f"Optimal clusters: {optimal_clusters}")
|
351 |
# print(result_df.head())
|
352 |
# console_messages.append(f"Optimal clusters: {optimal_n_clusters}")
|