Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -1,3 +1,11 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
import gradio as gr
|
2 |
import pandas as pd
|
3 |
|
@@ -229,11 +237,11 @@ import numpy as np
|
|
229 |
|
230 |
|
231 |
def extract_problem_domains(df,
|
232 |
-
text_column='
|
233 |
cluster_range=(10, 50),
|
234 |
top_words=17,
|
235 |
-
|
236 |
-
method='tfidf_kmeans'
|
237 |
):
|
238 |
|
239 |
|
@@ -261,7 +269,7 @@ def extract_problem_domains(df,
|
|
261 |
|
262 |
elif method == 'tfidf_kmeans':
|
263 |
# TF-IDF Vectorization and K-Means approach
|
264 |
-
vectorizer = TfidfVectorizer(stop_words='english', max_features=
|
265 |
X = vectorizer.fit_transform(df[text_column])
|
266 |
|
267 |
# Perform K-Means clustering with Silhouette Analysis
|
|
|
1 |
+
import csv
|
2 |
+
import sys
|
3 |
+
|
4 |
+
# Increase CSV field size limit
|
5 |
+
csv.field_size_limit(sys.maxsize)
|
6 |
+
|
7 |
+
|
8 |
+
|
9 |
import gradio as gr
|
10 |
import pandas as pd
|
11 |
|
|
|
237 |
|
238 |
|
239 |
def extract_problem_domains(df,
|
240 |
+
text_column='Processed_ProblemDescription_forDomainExtraction',
|
241 |
cluster_range=(10, 50),
|
242 |
top_words=17,
|
243 |
+
method='sentence_transformers'
|
244 |
+
# method='tfidf_kmeans'
|
245 |
):
|
246 |
|
247 |
|
|
|
269 |
|
270 |
elif method == 'tfidf_kmeans':
|
271 |
# TF-IDF Vectorization and K-Means approach
|
272 |
+
vectorizer = TfidfVectorizer(stop_words='english', max_features=3000)
|
273 |
X = vectorizer.fit_transform(df[text_column])
|
274 |
|
275 |
# Perform K-Means clustering with Silhouette Analysis
|