SantanuBanerjee commited on
Commit
cb90440
·
verified ·
1 Parent(s): 8c34617

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +8 -5
app.py CHANGED
@@ -241,8 +241,8 @@ import numpy as np
241
  def extract_problem_domains(df,
242
  text_column='Processed_ProblemDescription_forDomainExtraction',
243
  # text_column='Problem_Description',
244
- cluster_range=(10, 50),
245
- top_words=30,
246
  method='sentence_transformers'
247
  # method='tfidf_kmeans'
248
  ):
@@ -375,10 +375,13 @@ def nlp_pipeline(original_df):
375
 
376
 
377
  # Starting the Pipeline for Domain Extraction
378
- console_messages.append("Entering Text processing function for Domain identification")
379
  # Apply the text_processing_for_domain function to the DataFrame
380
  processed_df['Processed_ProblemDescription_forDomainExtraction'] = processed_df['Problem_Description'].apply(text_processing_for_domain)
381
- console_messages.append("Exiting Text processing function for Domain identification")
 
 
 
382
 
383
 
384
  # Domain Clustering
@@ -459,7 +462,7 @@ interface = gr.Interface(
459
 
460
  outputs=[
461
  gr.File(label="Download the processed Excel File containing the ** Project Proposals ** for each Location~Problem paired combination"), # File download output
462
- gr.Textbox(label="Console Messages", lines=100, interactive=False) # Console messages output
463
  ],
464
 
465
 
 
241
  def extract_problem_domains(df,
242
  text_column='Processed_ProblemDescription_forDomainExtraction',
243
  # text_column='Problem_Description',
244
+ cluster_range=(10, 25),
245
+ top_words=17,
246
  method='sentence_transformers'
247
  # method='tfidf_kmeans'
248
  ):
 
375
 
376
 
377
  # Starting the Pipeline for Domain Extraction
378
+ console_messages.append("Executing Text processing function for Domain identification")
379
  # Apply the text_processing_for_domain function to the DataFrame
380
  processed_df['Processed_ProblemDescription_forDomainExtraction'] = processed_df['Problem_Description'].apply(text_processing_for_domain)
381
+
382
+ console_messages.append("Removing entries which could not be allocated to any Problem Domain")
383
+ processed_df = processed_df.dropna(subset=['Processed_ProblemDescription_forDomainExtraction'], axis=0)
384
+
385
 
386
 
387
  # Domain Clustering
 
462
 
463
  outputs=[
464
  gr.File(label="Download the processed Excel File containing the ** Project Proposals ** for each Location~Problem paired combination"), # File download output
465
+ gr.Textbox(label="Console Messages", lines=25, interactive=False) # Console messages output
466
  ],
467
 
468