SantanuBanerjee commited on
Commit
1ef648a
·
verified ·
1 Parent(s): a2fcce4

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +12 -4
app.py CHANGED
@@ -1,3 +1,11 @@
 
 
 
 
 
 
 
 
1
  import gradio as gr
2
  import pandas as pd
3
 
@@ -229,11 +237,11 @@ import numpy as np
229
 
230
 
231
  def extract_problem_domains(df,
232
- text_column='Problem_Description',
233
  cluster_range=(10, 50),
234
  top_words=17,
235
- # method='sentence_transformers'
236
- method='tfidf_kmeans'
237
  ):
238
 
239
 
@@ -261,7 +269,7 @@ def extract_problem_domains(df,
261
 
262
  elif method == 'tfidf_kmeans':
263
  # TF-IDF Vectorization and K-Means approach
264
- vectorizer = TfidfVectorizer(stop_words='english', max_features=5000)
265
  X = vectorizer.fit_transform(df[text_column])
266
 
267
  # Perform K-Means clustering with Silhouette Analysis
 
1
+ import csv
2
+ import sys
3
+
4
+ # Increase CSV field size limit
5
+ csv.field_size_limit(sys.maxsize)
6
+
7
+
8
+
9
  import gradio as gr
10
  import pandas as pd
11
 
 
237
 
238
 
239
  def extract_problem_domains(df,
240
+ text_column='Processed_ProblemDescription_forDomainExtraction',
241
  cluster_range=(10, 50),
242
  top_words=17,
243
+ method='sentence_transformers'
244
+ # method='tfidf_kmeans'
245
  ):
246
 
247
 
 
269
 
270
  elif method == 'tfidf_kmeans':
271
  # TF-IDF Vectorization and K-Means approach
272
+ vectorizer = TfidfVectorizer(stop_words='english', max_features=3000)
273
  X = vectorizer.fit_transform(df[text_column])
274
 
275
  # Perform K-Means clustering with Silhouette Analysis