Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -229,8 +229,8 @@ def extract_problem_domains(df,
|
|
229 |
text_column='Problem_Description',
|
230 |
cluster_range=(10, 50),
|
231 |
top_words=17,
|
232 |
-
|
233 |
-
method='tfidf_kmeans'
|
234 |
):
|
235 |
|
236 |
|
@@ -339,8 +339,8 @@ def nlp_pipeline(original_df, console_messages):
|
|
339 |
# Domain Clustering
|
340 |
try:
|
341 |
domain_df, optimal_n_clusters = extract_problem_domains(processed_df)
|
342 |
-
print(f"Optimal clusters: {optimal_clusters}")
|
343 |
-
print(result_df.head())
|
344 |
# console_messages.append(f"Optimal clusters: {optimal_n_clusters}")
|
345 |
|
346 |
console_messages.append("NLP pipeline completed.")
|
@@ -348,8 +348,8 @@ def nlp_pipeline(original_df, console_messages):
|
|
348 |
except Exception as e:
|
349 |
# print(f"Error in extract_problem_domains: {e}")
|
350 |
console_messages.append(f"Error in extract_problem_domains: {str(e)}")
|
351 |
-
|
352 |
-
return domain_df, console_messages
|
353 |
|
354 |
|
355 |
# problem_clusters, problem_model = perform_clustering(processed_df['Problem_Description'], n_clusters=10)
|
|
|
229 |
text_column='Problem_Description',
|
230 |
cluster_range=(10, 50),
|
231 |
top_words=17,
|
232 |
+
method='sentence_transformers'
|
233 |
+
# method='tfidf_kmeans'
|
234 |
):
|
235 |
|
236 |
|
|
|
339 |
# Domain Clustering
|
340 |
try:
|
341 |
domain_df, optimal_n_clusters = extract_problem_domains(processed_df)
|
342 |
+
# print(f"Optimal clusters: {optimal_clusters}")
|
343 |
+
# print(result_df.head())
|
344 |
# console_messages.append(f"Optimal clusters: {optimal_n_clusters}")
|
345 |
|
346 |
console_messages.append("NLP pipeline completed.")
|
|
|
348 |
except Exception as e:
|
349 |
# print(f"Error in extract_problem_domains: {e}")
|
350 |
console_messages.append(f"Error in extract_problem_domains: {str(e)}")
|
351 |
+
return processed_df, console_messages
|
352 |
+
# return domain_df, console_messages
|
353 |
|
354 |
|
355 |
# problem_clusters, problem_model = perform_clustering(processed_df['Problem_Description'], n_clusters=10)
|