Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -241,8 +241,8 @@ import numpy as np
|
|
241 |
def extract_problem_domains(df,
|
242 |
text_column='Processed_ProblemDescription_forDomainExtraction',
|
243 |
# text_column='Problem_Description',
|
244 |
-
cluster_range=(10,
|
245 |
-
top_words=
|
246 |
method='sentence_transformers'
|
247 |
# method='tfidf_kmeans'
|
248 |
):
|
@@ -375,10 +375,13 @@ def nlp_pipeline(original_df):
|
|
375 |
|
376 |
|
377 |
# Starting the Pipeline for Domain Extraction
|
378 |
-
console_messages.append("
|
379 |
# Apply the text_processing_for_domain function to the DataFrame
|
380 |
processed_df['Processed_ProblemDescription_forDomainExtraction'] = processed_df['Problem_Description'].apply(text_processing_for_domain)
|
381 |
-
|
|
|
|
|
|
|
382 |
|
383 |
|
384 |
# Domain Clustering
|
@@ -459,7 +462,7 @@ interface = gr.Interface(
|
|
459 |
|
460 |
outputs=[
|
461 |
gr.File(label="Download the processed Excel File containing the ** Project Proposals ** for each Location~Problem paired combination"), # File download output
|
462 |
-
gr.Textbox(label="Console Messages", lines=
|
463 |
],
|
464 |
|
465 |
|
|
|
241 |
def extract_problem_domains(df,
|
242 |
text_column='Processed_ProblemDescription_forDomainExtraction',
|
243 |
# text_column='Problem_Description',
|
244 |
+
cluster_range=(10, 25),
|
245 |
+
top_words=17,
|
246 |
method='sentence_transformers'
|
247 |
# method='tfidf_kmeans'
|
248 |
):
|
|
|
375 |
|
376 |
|
377 |
# Starting the Pipeline for Domain Extraction
|
378 |
+
console_messages.append("Executing Text processing function for Domain identification")
|
379 |
# Apply the text_processing_for_domain function to the DataFrame
|
380 |
processed_df['Processed_ProblemDescription_forDomainExtraction'] = processed_df['Problem_Description'].apply(text_processing_for_domain)
|
381 |
+
|
382 |
+
console_messages.append("Removing entries which could not be allocated to any Problem Domain")
|
383 |
+
processed_df = processed_df.dropna(subset=['Processed_ProblemDescription_forDomainExtraction'], axis=0)
|
384 |
+
|
385 |
|
386 |
|
387 |
# Domain Clustering
|
|
|
462 |
|
463 |
outputs=[
|
464 |
gr.File(label="Download the processed Excel File containing the ** Project Proposals ** for each Location~Problem paired combination"), # File download output
|
465 |
+
gr.Textbox(label="Console Messages", lines=25, interactive=False) # Console messages output
|
466 |
],
|
467 |
|
468 |
|