dna-casestudy / models /BERTopic /ctfidf_config.json
davidna22's picture
Upload folder using huggingface_hub
dad00c5 verified
raw
history blame
9.39 kB
{
"ctfidf_model": {
"bm25_weighting": false,
"reduce_frequent_words": false
},
"vectorizer_model": {
"params": {
"analyzer": "word",
"binary": false,
"decode_error": "strict",
"encoding": "utf-8",
"input": "content",
"lowercase": true,
"max_df": 1.0,
"max_features": null,
"min_df": 2,
"ngram_range": [
1,
2
],
"stop_words": "english",
"strip_accents": null,
"token_pattern": "(?u)\\b\\w\\w+\\b",
"vocabulary": null
},
"vocab": {
"request": 285,
"large": 197,
"collection": 63,
"posted": 261,
"parking": 245,
"sign": 312,
"violation": 365,
"working": 376,
"contrary": 83,
"stop": 328,
"work": 374,
"order": 241,
"dirty": 110,
"sidewalk": 311,
"access": 3,
"receipt": 273,
"site": 316,
"street": 331,
"condition": 76,
"ice": 169,
"non": 232,
"missed": 224,
"area": 20,
"license": 203,
"engine": 126,
"idling": 170,
"cond": 75,
"lead": 200,
"residential": 290,
"sewer": 309,
"use": 358,
"comments": 66,
"water": 370,
"meter": 221,
"broken": 34,
"leaking": 202,
"private": 263,
"residence": 288,
"refund": 277,
"return": 295,
"permit": 249,
"improper": 175,
"certificate": 50,
"occupancy": 236,
"illegal": 171,
"plumbing": 256,
"pedestrian": 248,
"signal": 313,
"defective": 101,
"inadequate": 179,
"heat": 159,
"new": 230,
"bus": 36,
"placement": 254,
"repair": 284,
"building": 35,
"damaged": 93,
"cracked": 88,
"bicycle": 28,
"flooding": 140,
"overnight": 242,
"commercial": 67,
"storage": 329,
"surveillance": 337,
"waste": 369,
"blocked": 30,
"construction": 79,
"school": 306,
"property": 270,
"cover": 86,
"noise": 231,
"gas": 147,
"problem": 265,
"delivery": 103,
"goods": 150,
"curb": 90,
"hitting": 162,
"phone": 252,
"c1": 40,
"trees": 351,
"rent": 283,
"unauthorized": 354,
"chronic": 55,
"hanging": 157,
"accident": 4,
"cleaning": 58,
"asp": 22,
"establishment": 129,
"public": 272,
"space": 320,
"dispute": 111,
"home": 163,
"electronics": 123,
"chemical": 53,
"chained": 51,
"smoking": 319,
"car": 42,
"general": 149,
"maintenance": 214,
"asbestos": 21,
"open": 239,
"missing": 226,
"emergency": 124,
"odor": 237,
"catch": 48,
"basin": 26,
"tax": 342,
"temporary": 345,
"failure": 134,
"debris": 98,
"falling": 136,
"danger": 95,
"air": 9,
"defect": 100,
"metal": 219,
"protruding": 271,
"information": 183,
"cut": 91,
"vacant": 360,
"lot": 211,
"resident": 289,
"pipe": 253,
"toilet": 349,
"button": 38,
"wiring": 373,
"buzzer": 39,
"vehicle": 361,
"carbon": 43,
"monoxide": 227,
"smoke": 318,
"audit": 24,
"damp": 94,
"leak": 201,
"st": 324,
"facility": 133,
"law": 198,
"cigarette": 56,
"sale": 303,
"minor": 222,
"pool": 258,
"graffiti": 151,
"speed": 322,
"scale": 305,
"hours": 165,
"safety": 301,
"equipment": 128,
"signs": 314,
"notice": 233,
"box": 31,
"weeds": 372,
"grating": 152,
"removal": 280,
"requested": 286,
"controller": 85,
"flasher": 139,
"loose": 210,
"time": 347,
"switch": 340,
"stump": 335,
"sampling": 304,
"required": 287,
"head": 158,
"card": 45,
"stuck": 333,
"commission": 68,
"lack": 195,
"litter": 207,
"comm": 65,
"bldg": 29,
"basket": 27,
"fallen": 135,
"bridge": 33,
"warning": 367,
"prohibited": 269,
"inspection": 187,
"roof": 298,
"illness": 174,
"injury": 185,
"ticket": 346,
"clear": 59,
"insects": 186,
"highway": 161,
"multiple": 229,
"devices": 107,
"animal": 13,
"lane": 196,
"control": 84,
"dirt": 108,
"clothing": 61,
"high": 160,
"pressure": 262,
"debt": 99,
"materials": 218,
"agency": 8,
"application": 17,
"station": 325,
"unguarded": 355,
"driveway": 117,
"gallons": 144,
"device": 106,
"service": 307,
"swimming": 338,
"coin": 62,
"tobacco": 348,
"taste": 341,
"filing": 138,
"technical": 343,
"issues": 192,
"rights": 297,
"miscellaneous": 223,
"color": 64,
"division": 112,
"retaining": 293,
"zoning": 378,
"lawn": 199,
"status": 326,
"enforcement": 125,
"excessive": 131,
"contractor": 82,
"dry": 118,
"complaince": 71,
"electrical": 121,
"amusement": 11,
"ride": 296,
"incident": 180,
"received": 274,
"program": 268,
"nypd": 235,
"issue": 191,
"electronic": 122,
"transfer": 350,
"eft": 120,
"address": 6,
"incorrect": 181,
"wrong": 377,
"paper": 244,
"list": 206,
"passenger": 247,
"guide": 154,
"assistance": 23,
"exemption": 132,
"sro": 323,
"truck": 352,
"driver": 115,
"city": 57,
"tunnel": 353,
"licensed": 204,
"improvement": 177,
"sticker": 327,
"animals": 14,
"company": 69,
"waterway": 371,
"abcs": 1,
"housing": 167,
"haitian": 155,
"creole": 89,
"apartment": 16,
"unit": 356,
"retail": 292,
"store": 330,
"initial": 184,
"fdny": 137,
"approved": 18,
"multi": 228,
"business": 37,
"annual": 15,
"related": 278,
"dep": 104,
"internal": 189,
"vending": 362,
"machine": 213,
"marine": 217,
"dumpster": 119,
"damage": 92,
"cable": 41,
"missed collection": 225,
"street cond": 332,
"use comments": 359,
"private residence": 264,
"improper use": 176,
"residential building": 291,
"plumbing work": 257,
"work illegal": 375,
"construction site": 80,
"cover missing": 87,
"odor sewer": 238,
"sewer catch": 310,
"catch basin": 49,
"danger falling": 96,
"metal protruding": 220,
"defective street": 102,
"carbon monoxide": 44,
"safety equipment": 302,
"permit license": 250,
"grating missing": 153,
"card stuck": 46,
"stuck meter": 334,
"warning signal": 368,
"clear water": 60,
"dirt litter": 109,
"litter debris": 208,
"open unguarded": 240,
"swimming pool": 339,
"amusement ride": 12,
"address incorrect": 7,
"incorrect status": 182,
"driver license": 116,
"home improvement": 164,
"improvement contractor": 178,
"company license": 70,
"abcs housing": 2,
"haitian creole": 156,
"dep internal": 105,
"internal use": 190,
"vending machine": 363,
"unknown": 357,
"line": 205,
"knocked": 193,
"post": 260,
"wall": 366,
"excavation": 130,
"support": 336,
"foreign": 142,
"dead": 97,
"contact": 81,
"installation": 188,
"break": 32,
"house": 166,
"change": 52,
"management": 215,
"conditioning": 77,
"condo": 78,
"foundation": 143,
"referral": 275,
"route": 299,
"concrete": 74,
"panel": 243,
"complaint": 72,
"basement": 25,
"garage": 145,
"sink": 315,
"reflected": 276,
"chinese": 54,
"spanish": 321,
"arabic": 19,
"hqs": 168,
"english": 127,
"russian": 300,
"portable": 259,
"korean": 194,
"10": 0,
"television": 344,
"retaining wall": 294,
"parking lot": 246,
"air conditioning": 10,
"location": 209,
"manufacturing": 216,
"care": 47,
"activity": 5,
"low": 212,
"food": 141,
"number": 234,
"remove": 281,
"pet": 251,
"compressed": 73,
"illegal use": 173,
"illegal improper": 172,
"sewage": 308,
"drinking": 113,
"garbage": 146,
"small": 317,
"removing": 282,
"plants": 255,
"problem use": 266,
"drinking water": 114,
"gas sewer": 148,
"ventilation": 364,
"problems": 267,
"related problems": 279
}
}
}