jayasuriyaK commited on
Commit
c2429ef
·
verified ·
1 Parent(s): 307a738

Upload 9 files

Browse files
NSFW_text_classifier/.gitattributes ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ *.7z filter=lfs diff=lfs merge=lfs -text
2
+ *.arrow filter=lfs diff=lfs merge=lfs -text
3
+ *.bin filter=lfs diff=lfs merge=lfs -text
4
+ *.bz2 filter=lfs diff=lfs merge=lfs -text
5
+ *.ckpt filter=lfs diff=lfs merge=lfs -text
6
+ *.ftz filter=lfs diff=lfs merge=lfs -text
7
+ *.gz filter=lfs diff=lfs merge=lfs -text
8
+ *.h5 filter=lfs diff=lfs merge=lfs -text
9
+ *.joblib filter=lfs diff=lfs merge=lfs -text
10
+ *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
+ *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
+ *.model filter=lfs diff=lfs merge=lfs -text
13
+ *.msgpack filter=lfs diff=lfs merge=lfs -text
14
+ *.npy filter=lfs diff=lfs merge=lfs -text
15
+ *.npz filter=lfs diff=lfs merge=lfs -text
16
+ *.onnx filter=lfs diff=lfs merge=lfs -text
17
+ *.ot filter=lfs diff=lfs merge=lfs -text
18
+ *.parquet filter=lfs diff=lfs merge=lfs -text
19
+ *.pb filter=lfs diff=lfs merge=lfs -text
20
+ *.pickle filter=lfs diff=lfs merge=lfs -text
21
+ *.pkl filter=lfs diff=lfs merge=lfs -text
22
+ *.pt filter=lfs diff=lfs merge=lfs -text
23
+ *.pth filter=lfs diff=lfs merge=lfs -text
24
+ *.rar filter=lfs diff=lfs merge=lfs -text
25
+ *.safetensors filter=lfs diff=lfs merge=lfs -text
26
+ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
+ *.tar.* filter=lfs diff=lfs merge=lfs -text
28
+ *.tflite filter=lfs diff=lfs merge=lfs -text
29
+ *.tgz filter=lfs diff=lfs merge=lfs -text
30
+ *.wasm filter=lfs diff=lfs merge=lfs -text
31
+ *.xz filter=lfs diff=lfs merge=lfs -text
32
+ *.zip filter=lfs diff=lfs merge=lfs -text
33
+ *.zst filter=lfs diff=lfs merge=lfs -text
34
+ *tfevents* filter=lfs diff=lfs merge=lfs -text
NSFW_text_classifier/README.md ADDED
@@ -0,0 +1,46 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ language: "en"
3
+ tags:
4
+ - distilroberta
5
+ - sentiment
6
+ - NSFW
7
+ - inappropriate
8
+ - spam
9
+ - twitter
10
+ - reddit
11
+
12
+ widget:
13
+ - text: "I like you. You remind me of me when I was young and stupid."
14
+ - text: "I see you’ve set aside this special time to humiliate yourself in public."
15
+ - text: "Have a great weekend! See you next week!"
16
+
17
+ ---
18
+
19
+ # Fine-tuned DistilRoBERTa-base for NSFW Classification
20
+
21
+ # Model Description
22
+
23
+ DistilBERT is a transformer model that performs sentiment analysis. I fine-tuned the model on Reddit posts with the purpose of classifying not safe for work (NSFW) content, specifically text that is considered inappropriate and unprofessional. The model predicts 2 classes, which are NSFW or safe for work (SFW).
24
+
25
+ The model is a fine-tuned version of [DistilBERT](https://huggingface.co/docs/transformers/model_doc/distilbert).
26
+
27
+ It was fine-tuned on 14317 Reddit posts pulled from the (Reddit API) [https://praw.readthedocs.io/en/stable/].
28
+
29
+ # How to Use
30
+
31
+ ```python
32
+ from transformers import pipeline
33
+ classifier = pipeline("sentiment-analysis", model="michellejieli/NSFW_text_classification")
34
+ classifier("I see you’ve set aside this special time to humiliate yourself in public.")
35
+ ```
36
+
37
+ ```python
38
+ Output:
39
+ [{'label': 'NSFW', 'score': 0.998853325843811}]
40
+ ```
41
+
42
+ # Contact
43
+
44
+ Please reach out to [[email protected]](mailto:[email protected]) if you have any questions or feedback.
45
+
46
+ ---
NSFW_text_classifier/config.json ADDED
@@ -0,0 +1,33 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "michellejieli/NSFW_text_classifier",
3
+ "activation": "gelu",
4
+ "architectures": [
5
+ "DistilBertForSequenceClassification"
6
+ ],
7
+ "attention_dropout": 0.1,
8
+ "dim": 768,
9
+ "dropout": 0.1,
10
+ "hidden_dim": 3072,
11
+ "id2label": {
12
+ "0": "SFW",
13
+ "1": "NSFW"
14
+ },
15
+ "initializer_range": 0.02,
16
+ "label2id": {
17
+ "NSFW": 1,
18
+ "SFW": 0
19
+ },
20
+ "max_position_embeddings": 512,
21
+ "model_type": "distilbert",
22
+ "n_heads": 12,
23
+ "n_layers": 6,
24
+ "pad_token_id": 0,
25
+ "problem_type": "single_label_classification",
26
+ "qa_dropout": 0.1,
27
+ "seq_classif_dropout": 0.2,
28
+ "sinusoidal_pos_embds": false,
29
+ "tie_weights_": true,
30
+ "torch_dtype": "float32",
31
+ "transformers_version": "4.22.2",
32
+ "vocab_size": 30522
33
+ }
NSFW_text_classifier/pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:80d26f8d631ca24eac50200b5b4e06fe970d5228f395f323cd9265b550a8621a
3
+ size 267855533
NSFW_text_classifier/special_tokens_map.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "cls_token": "[CLS]",
3
+ "mask_token": "[MASK]",
4
+ "pad_token": "[PAD]",
5
+ "sep_token": "[SEP]",
6
+ "unk_token": "[UNK]"
7
+ }
NSFW_text_classifier/tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
NSFW_text_classifier/tokenizer_config.json ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cls_token": "[CLS]",
3
+ "do_lower_case": true,
4
+ "mask_token": "[MASK]",
5
+ "model_max_length": 512,
6
+ "name_or_path": "distilbert-base-uncased",
7
+ "pad_token": "[PAD]",
8
+ "sep_token": "[SEP]",
9
+ "special_tokens_map_file": null,
10
+ "strip_accents": null,
11
+ "tokenize_chinese_chars": true,
12
+ "tokenizer_class": "DistilBertTokenizer",
13
+ "unk_token": "[UNK]"
14
+ }
NSFW_text_classifier/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dcfe2407ff98dc74e55be325b81829653d7c461236aaae608148bbfc14ec5118
3
+ size 3323
NSFW_text_classifier/vocab.txt ADDED
The diff for this file is too large to render. See raw diff