separate function for cleaning
Browse files
app.py
CHANGED
@@ -185,9 +185,7 @@ def get_topic_value(row, i):
|
|
185 |
except Exception as e:
|
186 |
print(e)
|
187 |
|
188 |
-
def
|
189 |
-
|
190 |
-
print('cleaning')
|
191 |
df.rename(columns = {'tweet':'original_tweets'}, inplace = True)
|
192 |
|
193 |
# Apply the function above and get tweets free of emoji's
|
@@ -246,6 +244,13 @@ def full_lda(df):
|
|
246 |
# Apply tokenizer
|
247 |
df['lemma_tokens'] = df['lemmas_back_to_text'].apply(tokenize)
|
248 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
249 |
print('base model setup')
|
250 |
# Create a id2word dictionary
|
251 |
global id2word
|
@@ -532,11 +537,13 @@ def main(dataset, model, progress=gr.Progress(track_tqdm=True)):
|
|
532 |
print(df)
|
533 |
|
534 |
if model == 'LDA':
|
|
|
535 |
print('doing lda')
|
536 |
top_tweets = full_lda(df)
|
537 |
print('done lda')
|
538 |
place_data = 'test'
|
539 |
else:
|
|
|
540 |
base_bertopic(df)
|
541 |
top_tweets = optimized_bertopic()
|
542 |
|
|
|
185 |
except Exception as e:
|
186 |
print(e)
|
187 |
|
188 |
+
def cleaning(df):
|
|
|
|
|
189 |
df.rename(columns = {'tweet':'original_tweets'}, inplace = True)
|
190 |
|
191 |
# Apply the function above and get tweets free of emoji's
|
|
|
244 |
# Apply tokenizer
|
245 |
df['lemma_tokens'] = df['lemmas_back_to_text'].apply(tokenize)
|
246 |
|
247 |
+
return df
|
248 |
+
|
249 |
+
def full_lda(df):
|
250 |
+
|
251 |
+
print('cleaning')
|
252 |
+
|
253 |
+
|
254 |
print('base model setup')
|
255 |
# Create a id2word dictionary
|
256 |
global id2word
|
|
|
537 |
print(df)
|
538 |
|
539 |
if model == 'LDA':
|
540 |
+
df = cleaning(df)
|
541 |
print('doing lda')
|
542 |
top_tweets = full_lda(df)
|
543 |
print('done lda')
|
544 |
place_data = 'test'
|
545 |
else:
|
546 |
+
df = cleaning(df)
|
547 |
base_bertopic(df)
|
548 |
top_tweets = optimized_bertopic()
|
549 |
|