ivanleomk commited on
Commit
cc4eb16
·
verified ·
1 Parent(s): a3e7bd3

Add BERTopic model

Browse files
Files changed (3) hide show
  1. config.json +1 -1
  2. topic_embeddings.safetensors +2 -2
  3. topics.json +247 -225
config.json CHANGED
@@ -7,7 +7,7 @@
7
  1,
8
  1
9
  ],
10
- "nr_topics": null,
11
  "seed_topic_list": null,
12
  "top_n_words": 10,
13
  "verbose": false,
 
7
  1,
8
  1
9
  ],
10
+ "nr_topics": "auto",
11
  "seed_topic_list": null,
12
  "top_n_words": 10,
13
  "verbose": false,
topic_embeddings.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d6aa00888f04bf6b447c3be75f44c72ec99ded44a09c032fa9a38bd9b21eefdd
3
- size 7768
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:00b4eefa8073a699d5494b43dfc3ad289e11c9a073f3a3b24f9909e37148c821
3
+ size 9304
topics.json CHANGED
@@ -2,283 +2,371 @@
2
  "topic_representations": {
3
  "-1": [
4
  [
5
- "my",
6
- 0.0925146102747601
7
  ],
8
  [
9
  "to",
10
- 0.07215612754651507
11
  ],
12
  [
13
- "account",
14
- 0.06266361737679865
15
  ],
16
  [
17
- "email",
18
- 0.060877514443525814
19
  ],
20
  [
21
- "and",
22
- 0.056720612969736364
23
  ],
24
  [
25
- "im",
26
- 0.05091418911864891
27
  ],
28
  [
29
- "address",
30
- 0.04671754187618322
31
  ],
32
  [
33
- "update",
34
- 0.04293915885838518
35
  ],
36
  [
37
- "it",
38
- 0.04240601463471914
39
  ],
40
  [
41
- "on",
42
- 0.042249833419957056
43
  ]
44
  ],
45
  "0": [
46
  [
47
- "refund",
48
- 0.09780048031812771
49
  ],
50
  [
51
- "nike",
52
- 0.0771655880061528
53
  ],
54
  [
55
  "my",
56
- 0.07090100669378316
57
  ],
58
  [
59
- "for",
60
- 0.0702291981611251
61
  ],
62
  [
63
- "store",
64
- 0.06557694231173772
65
  ],
66
  [
67
- "returned",
68
- 0.06445310160942126
69
  ],
70
  [
71
- "to",
72
- 0.0569793641174838
73
  ],
74
  [
75
- "credit",
76
- 0.05509217281156294
77
  ],
78
  [
79
- "week",
80
- 0.05184008907974587
81
  ],
82
  [
83
- "but",
84
- 0.04735366909886878
85
  ]
86
  ],
87
  "1": [
88
  [
89
- "my",
90
- 0.08460328579766645
91
  ],
92
  [
93
- "the",
94
- 0.07094299263625117
95
  ],
96
  [
97
- "klarna",
98
- 0.06458022575274856
99
  ],
100
  [
101
- "payment",
102
- 0.062198495965973315
103
  ],
104
  [
105
  "for",
106
- 0.0600619168728834
107
  ],
108
  [
109
- "to",
110
- 0.05790585784297135
111
  ],
112
  [
113
- "can",
114
- 0.05343104679150231
115
  ],
116
  [
117
- "balance",
118
- 0.048286831003222515
119
  ],
120
  [
121
- "it",
122
- 0.048210808696525
123
  ],
124
  [
125
- "pay",
126
- 0.046543648095764396
127
  ]
128
  ],
129
  "2": [
130
  [
131
- "to",
132
- 0.07988684401413271
133
  ],
134
  [
135
- "email",
136
- 0.07818383125857495
137
  ],
138
  [
139
- "the",
140
- 0.0743833718778085
141
  ],
142
  [
143
- "log",
144
- 0.07325712128381653
145
  ],
146
  [
147
- "my",
148
- 0.06631520948060893
149
  ],
150
  [
151
- "code",
152
- 0.061557366382747544
153
  ],
154
  [
155
- "verification",
156
- 0.061557366382747544
157
  ],
158
  [
159
- "klarna",
160
- 0.06124442070643416
161
  ],
162
  [
163
- "account",
164
- 0.060358266904742525
165
  ],
166
  [
167
- "cant",
168
- 0.0582455608885341
169
  ]
170
  ],
171
  "3": [
172
  [
173
- "card",
174
- 0.19762271128630962
175
  ],
176
  [
177
- "klarna",
178
- 0.13679343967547672
179
  ],
180
  [
181
  "it",
182
- 0.10812688990525995
183
  ],
184
  [
185
- "to",
186
- 0.09897056977942115
187
  ],
188
  [
189
- "need",
190
- 0.08731103928150745
191
  ],
192
  [
193
- "details",
194
- 0.0872664512504428
195
  ],
196
  [
197
- "call",
198
- 0.0842952806456567
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
199
  ],
200
  [
201
- "do",
202
- 0.08388436429722848
203
  ],
204
  [
205
  "what",
206
- 0.08147993997513413
207
  ],
208
  [
209
- "my",
210
- 0.0789970025286935
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
211
  ]
212
  ]
213
  },
214
  "topics": [
 
 
 
215
  2,
216
  -1,
217
- -1,
218
  1,
219
  -1,
220
- 1,
 
 
221
  0,
 
 
 
 
 
 
222
  0,
223
  -1,
 
 
 
 
224
  1,
 
 
225
  1,
226
- 2,
227
- 2,
228
  0,
229
  1,
230
- -1,
231
  1,
232
  -1,
233
- 2,
 
 
 
 
 
234
  0,
235
- 2,
 
 
236
  1,
237
- 0,
 
 
 
238
  1,
 
239
  -1,
240
  1,
241
  -1,
 
 
242
  1,
243
- 0,
244
- 0,
245
- 0,
246
  2,
247
  1,
248
- 1,
249
- 1,
250
  0,
 
 
 
 
 
 
 
 
 
 
 
 
 
251
  0,
 
 
 
 
 
 
 
 
252
  0,
253
- 1,
254
  2,
255
  -1,
 
 
256
  3,
257
- 0,
258
- 0,
259
- 0,
260
  -1,
 
261
  -1,
262
- 1,
263
- 2,
264
- 0,
265
  3,
266
- 1,
 
267
  3,
268
  -1,
269
  -1,
270
- 0,
271
- 0,
272
- 0,
273
- 0,
274
- 0,
275
  -1,
276
- 0,
277
- 0,
278
- 0,
279
- 0,
280
  -1,
281
  1,
 
 
 
 
 
282
  0,
283
  0,
284
  0,
@@ -288,100 +376,40 @@
288
  0,
289
  0,
290
  0,
 
291
  0,
 
 
 
292
  0,
293
  0,
294
  0,
295
  0,
296
  0,
297
  0,
298
- 0,
299
- 0,
300
- 0,
301
- 0,
302
- 0,
303
- 0,
304
- 0,
305
- 0,
306
- 0,
307
- 0,
308
- 0,
309
- 0,
310
- 0,
311
- 0,
312
- 0,
313
- 0,
314
- -1,
315
- 1,
316
  -1,
317
- 3,
318
- -1,
319
- 1,
320
  0,
321
- -1,
322
  0,
323
- 1,
324
- 2,
325
- 2,
326
- 1,
327
- 1,
328
- -1,
329
- 3,
330
- 1,
331
- 1,
332
- 3,
333
  0,
334
  -1,
335
- 1,
336
- 1,
337
- 1,
338
- 1,
339
- 3,
340
  0,
341
- 3,
342
  0,
343
- 2,
344
  0,
345
- 2,
346
  -1,
347
  0,
348
- 1,
349
- 2,
350
  0,
351
  0,
352
- -1,
353
  0,
354
- -1,
355
  0,
356
- 1,
357
  2,
358
  1,
359
- -1,
360
- -1,
361
- 0,
362
- -1,
363
- 1,
364
- 1,
365
- 3,
366
- 1,
367
- -1,
368
- 0,
369
- 1,
370
- 3,
371
- 3,
372
- 2,
373
  1,
374
  0,
375
- -1,
376
- -1,
377
- -1,
378
- 0,
379
  2,
380
- 0,
381
- 0,
382
- 0,
383
  -1,
384
  1,
 
385
  1,
386
  1,
387
  1,
@@ -389,40 +417,23 @@
389
  1,
390
  1,
391
  1,
392
- 3,
393
- 3,
394
- 1,
395
- 1,
396
  1,
397
  1,
398
  1,
399
  -1,
400
- 2,
401
- 2,
402
- -1,
403
- 2,
404
- 2,
405
- -1,
406
- 2,
407
- 2,
408
- 0,
409
- -1,
410
- -1,
411
- 3,
412
- -1,
413
- 3,
414
- 3,
415
- -1
416
  ],
417
  "topic_sizes": {
418
- "2": 22,
419
- "-1": 39,
420
- "1": 49,
421
- "0": 75,
422
- "3": 16
 
423
  },
424
  "topic_mapper": [
425
  [
 
426
  -1,
427
  -1,
428
  -1
@@ -430,30 +441,41 @@
430
  [
431
  0,
432
  0,
433
- 0
 
434
  ],
435
  [
436
  1,
437
  1,
438
- 1
 
439
  ],
440
  [
441
  2,
442
  2,
443
- 2
 
444
  ],
445
  [
446
  3,
447
  3,
448
- 3
 
 
 
 
 
 
 
449
  ]
450
  ],
451
  "topic_labels": {
452
- "-1": "-1_my_to_account_email",
453
- "0": "0_refund_nike_my_for",
454
- "1": "1_my_the_klarna_payment",
455
- "2": "2_to_email_the_log",
456
- "3": "3_card_klarna_it_to"
 
457
  },
458
  "custom_labels": null,
459
  "_outliers": 1,
 
2
  "topic_representations": {
3
  "-1": [
4
  [
5
+ "for",
6
+ 0.06950123787285631
7
  ],
8
  [
9
  "to",
10
+ 0.06358623890495364
11
  ],
12
  [
13
+ "klarna",
14
+ 0.056008605515195375
15
  ],
16
  [
17
+ "the",
18
+ 0.05526021854373463
19
  ],
20
  [
21
+ "this",
22
+ 0.047063617600156385
23
  ],
24
  [
25
+ "but",
26
+ 0.04485834467962027
27
  ],
28
  [
29
+ "my",
30
+ 0.044710255077474545
31
  ],
32
  [
33
+ "can",
34
+ 0.03997704935896341
35
  ],
36
  [
37
+ "card",
38
+ 0.03997704935896341
39
  ],
40
  [
41
+ "payment",
42
+ 0.0368765588447971
43
  ]
44
  ],
45
  "0": [
46
  [
47
+ "klarna",
48
+ 0.07428408310064095
49
  ],
50
  [
51
+ "declined",
52
+ 0.0665555736222138
53
  ],
54
  [
55
  "my",
56
+ 0.06438188783764985
57
  ],
58
  [
59
+ "in",
60
+ 0.06201746744326283
61
  ],
62
  [
63
+ "ve",
64
+ 0.05546297801851149
65
  ],
66
  [
67
+ "for",
68
+ 0.05420215921706332
69
  ],
70
  [
71
+ "was",
72
+ 0.04446664148367333
73
  ],
74
  [
75
+ "used",
76
+ 0.04444433077245707
77
  ],
78
  [
79
+ "at",
80
+ 0.04027858466271196
81
  ],
82
  [
83
+ "before",
84
+ 0.04027858466271196
85
  ]
86
  ],
87
  "1": [
88
  [
89
+ "payment",
90
+ 0.08536121708247815
91
  ],
92
  [
93
+ "to",
94
+ 0.07915650928595176
95
  ],
96
  [
97
+ "the",
98
+ 0.07130418857510153
99
  ],
100
  [
101
+ "my",
102
+ 0.06838030844100587
103
  ],
104
  [
105
  "for",
106
+ 0.05990222324342297
107
  ],
108
  [
109
+ "pay",
110
+ 0.05685945441389676
111
  ],
112
  [
113
+ "due",
114
+ 0.05297539266380401
115
  ],
116
  [
117
+ "card",
118
+ 0.05174832356094194
119
  ],
120
  [
121
+ "is",
122
+ 0.04960378880746989
123
  ],
124
  [
125
+ "bought",
126
+ 0.04678939009238022
127
  ]
128
  ],
129
  "2": [
130
  [
131
+ "my",
132
+ 0.1000157134590229
133
  ],
134
  [
135
+ "klarna",
136
+ 0.0698464820203958
137
  ],
138
  [
139
+ "details",
140
+ 0.069575448046544
141
  ],
142
  [
143
+ "and",
144
+ 0.06388479529999462
145
  ],
146
  [
147
+ "account",
148
+ 0.059278082016552026
149
  ],
150
  [
151
+ "email",
152
+ 0.054707554949117644
153
  ],
154
  [
155
+ "call",
156
+ 0.051694725032909686
157
  ],
158
  [
159
+ "to",
160
+ 0.04874838158686345
161
  ],
162
  [
163
+ "card",
164
+ 0.04768649011443565
165
  ],
166
  [
167
+ "someone",
168
+ 0.04573025310006154
169
  ]
170
  ],
171
  "3": [
172
  [
173
+ "store",
174
+ 0.07648671998618503
175
  ],
176
  [
177
+ "the",
178
+ 0.0758730667234483
179
  ],
180
  [
181
  "it",
182
+ 0.06819721096009933
183
  ],
184
  [
185
+ "my",
186
+ 0.06489569193906065
187
  ],
188
  [
189
+ "refund",
190
+ 0.06469806328347248
191
  ],
192
  [
193
+ "back",
194
+ 0.06469806328347248
195
  ],
196
  [
197
+ "ago",
198
+ 0.06033985798100565
199
+ ],
200
+ [
201
+ "days",
202
+ 0.058789069873903504
203
+ ],
204
+ [
205
+ "and",
206
+ 0.056881309470881554
207
+ ],
208
+ [
209
+ "credit",
210
+ 0.05631877029181582
211
+ ]
212
+ ],
213
+ "4": [
214
+ [
215
+ "ago",
216
+ 0.07768571396080334
217
+ ],
218
+ [
219
+ "the",
220
+ 0.07760470428230178
221
  ],
222
  [
223
+ "shoes",
224
+ 0.07362119482074926
225
  ],
226
  [
227
  "what",
228
+ 0.06773811759798386
229
  ],
230
  [
231
+ "days",
232
+ 0.06727921946803261
233
+ ],
234
+ [
235
+ "sneakers",
236
+ 0.06646629647052138
237
+ ],
238
+ [
239
+ "ordered",
240
+ 0.06646629647052138
241
+ ],
242
+ [
243
+ "they",
244
+ 0.06244569194233741
245
+ ],
246
+ [
247
+ "and",
248
+ 0.06196633297867347
249
+ ],
250
+ [
251
+ "adidas",
252
+ 0.0557160739140403
253
  ]
254
  ]
255
  },
256
  "topics": [
257
+ 2,
258
+ 3,
259
+ 0,
260
  2,
261
  -1,
262
+ 3,
263
  1,
264
  -1,
265
+ -1,
266
+ -1,
267
+ -1,
268
  0,
269
+ 1,
270
+ -1,
271
+ -1,
272
+ -1,
273
+ -1,
274
+ -1,
275
  0,
276
  -1,
277
+ 3,
278
+ -1,
279
+ -1,
280
+ -1,
281
  1,
282
+ -1,
283
+ -1,
284
  1,
 
 
285
  0,
286
  1,
287
+ 1,
288
  1,
289
  -1,
290
+ -1,
291
+ -1,
292
+ 3,
293
+ 3,
294
+ 3,
295
+ 3,
296
  0,
297
+ -1,
298
+ 4,
299
+ -1,
300
  1,
301
+ 3,
302
+ -1,
303
+ -1,
304
+ 2,
305
  1,
306
+ 4,
307
  -1,
308
  1,
309
  -1,
310
+ -1,
311
+ 3,
312
  1,
313
+ 2,
314
+ -1,
 
315
  2,
316
  1,
317
+ 4,
318
+ 2,
319
  0,
320
+ -1,
321
+ 4,
322
+ 2,
323
+ -1,
324
+ -1,
325
+ 2,
326
+ 2,
327
+ 2,
328
+ 2,
329
+ 2,
330
+ 2,
331
+ 2,
332
+ 2,
333
  0,
334
+ 2,
335
+ 2,
336
+ 2,
337
+ 4,
338
+ 2,
339
+ 2,
340
+ 2,
341
+ 2,
342
  0,
343
+ 2,
344
  2,
345
  -1,
346
+ -1,
347
+ -1,
348
  3,
349
+ 4,
 
 
350
  -1,
351
+ 4,
352
  -1,
 
 
 
353
  3,
354
+ 3,
355
+ 4,
356
  3,
357
  -1,
358
  -1,
359
+ 3,
 
 
 
 
360
  -1,
361
+ 4,
362
+ 4,
 
 
363
  -1,
364
  1,
365
+ 1,
366
+ 1,
367
+ 1,
368
+ 3,
369
+ 1,
370
  0,
371
  0,
372
  0,
 
376
  0,
377
  0,
378
  0,
379
+ -1,
380
  0,
381
+ -1,
382
+ 4,
383
+ 4,
384
  0,
385
  0,
386
  0,
387
  0,
388
  0,
389
  0,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
390
  -1,
 
 
 
391
  0,
 
392
  0,
393
+ 4,
 
 
 
 
 
 
 
 
 
394
  0,
395
  -1,
 
 
 
 
 
396
  0,
 
397
  0,
 
398
  0,
 
399
  -1,
400
  0,
 
 
401
  0,
402
  0,
 
403
  0,
 
404
  0,
 
405
  2,
406
  1,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
407
  1,
408
  0,
 
 
 
 
409
  2,
 
 
 
410
  -1,
411
  1,
412
+ -1,
413
  1,
414
  1,
415
  1,
 
417
  1,
418
  1,
419
  1,
 
 
 
 
420
  1,
421
  1,
422
  1,
423
  -1,
424
+ 1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
425
  ],
426
  "topic_sizes": {
427
+ "2": 26,
428
+ "3": 15,
429
+ "0": 36,
430
+ "-1": 47,
431
+ "1": 31,
432
+ "4": 13
433
  },
434
  "topic_mapper": [
435
  [
436
+ -1,
437
  -1,
438
  -1,
439
  -1
 
441
  [
442
  0,
443
  0,
444
+ 0,
445
+ 2
446
  ],
447
  [
448
  1,
449
  1,
450
+ 1,
451
+ 3
452
  ],
453
  [
454
  2,
455
  2,
456
+ 2,
457
+ 1
458
  ],
459
  [
460
  3,
461
  3,
462
+ 3,
463
+ 4
464
+ ],
465
+ [
466
+ 4,
467
+ 4,
468
+ 4,
469
+ 0
470
  ]
471
  ],
472
  "topic_labels": {
473
+ "-1": "-1_for_to_klarna_the",
474
+ "0": "0_klarna_declined_my_in",
475
+ "1": "1_payment_to_the_my",
476
+ "2": "2_my_klarna_details_and",
477
+ "3": "3_store_the_it_my",
478
+ "4": "4_ago_the_shoes_what"
479
  },
480
  "custom_labels": null,
481
  "_outliers": 1,