quyip commited on
Commit
8025845
·
1 Parent(s): 1ec7483
models/test2tags/README.md DELETED
@@ -1,3 +0,0 @@
1
- ---
2
- license: unlicense
3
- ---
 
 
 
 
models/test2tags/config.json DELETED
@@ -1,61 +0,0 @@
1
- {
2
- "_name_or_path": "google-t5/t5-small",
3
- "architectures": [
4
- "T5ForConditionalGeneration"
5
- ],
6
- "classifier_dropout": 0.0,
7
- "d_ff": 2048,
8
- "d_kv": 64,
9
- "d_model": 512,
10
- "decoder_start_token_id": 0,
11
- "dense_act_fn": "relu",
12
- "dropout_rate": 0.1,
13
- "eos_token_id": 1,
14
- "feed_forward_proj": "relu",
15
- "initializer_factor": 1.0,
16
- "is_encoder_decoder": true,
17
- "is_gated_act": false,
18
- "layer_norm_epsilon": 1e-06,
19
- "model_type": "t5",
20
- "n_positions": 512,
21
- "num_decoder_layers": 6,
22
- "num_heads": 8,
23
- "num_layers": 6,
24
- "output_past": true,
25
- "pad_token_id": 0,
26
- "relative_attention_max_distance": 128,
27
- "relative_attention_num_buckets": 32,
28
- "task_specific_params": {
29
- "summarization": {
30
- "early_stopping": true,
31
- "length_penalty": 2.0,
32
- "max_length": 200,
33
- "min_length": 30,
34
- "no_repeat_ngram_size": 3,
35
- "num_beams": 4,
36
- "prefix": "summarize: "
37
- },
38
- "translation_en_to_de": {
39
- "early_stopping": true,
40
- "max_length": 300,
41
- "num_beams": 4,
42
- "prefix": "translate English to German: "
43
- },
44
- "translation_en_to_fr": {
45
- "early_stopping": true,
46
- "max_length": 300,
47
- "num_beams": 4,
48
- "prefix": "translate English to French: "
49
- },
50
- "translation_en_to_ro": {
51
- "early_stopping": true,
52
- "max_length": 300,
53
- "num_beams": 4,
54
- "prefix": "translate English to Romanian: "
55
- }
56
- },
57
- "torch_dtype": "float32",
58
- "transformers_version": "4.41.1",
59
- "use_cache": true,
60
- "vocab_size": 32128
61
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
models/test2tags/generation_config.json DELETED
@@ -1,7 +0,0 @@
1
- {
2
- "_from_model_config": true,
3
- "decoder_start_token_id": 0,
4
- "eos_token_id": 1,
5
- "pad_token_id": 0,
6
- "transformers_version": "4.41.1"
7
- }
 
 
 
 
 
 
 
 
models/test2tags/model.safetensors DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:2491d3581a1d2646e28ef810ae61754e6eab5da0588486cec78edbb3b3baff48
3
- size 242041896
 
 
 
 
models/test2tags/special_tokens_map.json DELETED
@@ -1,23 +0,0 @@
1
- {
2
- "eos_token": {
3
- "content": "</s>",
4
- "lstrip": false,
5
- "normalized": false,
6
- "rstrip": false,
7
- "single_word": false
8
- },
9
- "pad_token": {
10
- "content": "<pad>",
11
- "lstrip": false,
12
- "normalized": false,
13
- "rstrip": false,
14
- "single_word": false
15
- },
16
- "unk_token": {
17
- "content": "<unk>",
18
- "lstrip": false,
19
- "normalized": false,
20
- "rstrip": false,
21
- "single_word": false
22
- }
23
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
models/test2tags/spiece.model DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:d60acb128cf7b7f2536e8f38a5b18a05535c9e14c7a355904270e15b0945ea86
3
- size 791656
 
 
 
 
models/test2tags/tokenizer_config.json DELETED
@@ -1,38 +0,0 @@
1
- {
2
- "add_prefix_space": true,
3
- "added_tokens_decoder": {
4
- "0": {
5
- "content": "<pad>",
6
- "lstrip": false,
7
- "normalized": false,
8
- "rstrip": false,
9
- "single_word": false,
10
- "special": true
11
- },
12
- "1": {
13
- "content": "</s>",
14
- "lstrip": false,
15
- "normalized": false,
16
- "rstrip": false,
17
- "single_word": false,
18
- "special": true
19
- },
20
- "2": {
21
- "content": "<unk>",
22
- "lstrip": false,
23
- "normalized": false,
24
- "rstrip": false,
25
- "single_word": false,
26
- "special": true
27
- }
28
- },
29
- "clean_up_tokenization_spaces": true,
30
- "eos_token": "</s>",
31
- "extra_ids": 100,
32
- "legacy": true,
33
- "model_max_length": 512,
34
- "pad_token": "<pad>",
35
- "sp_model_kwargs": {},
36
- "tokenizer_class": "T5Tokenizer",
37
- "unk_token": "<unk>"
38
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
utils/summary_utils.py CHANGED
@@ -4,7 +4,7 @@ from transformers import pipeline
4
  AiSummaryVersion = 1
5
  summarization_pipeline = pipeline("summarization", model="csebuetnlp/mT5_multilingual_XLSum", max_length=512, min_length=50)
6
  en_translation_pipe = pipeline("translation", model="Helsinki-NLP/opus-mt-mul-en")
7
- text_to_tags_pipe = pipeline('text2text-generation', model='youfengy/t5-small-test-tags')
8
 
9
 
10
  def summarize(id: str, text: str):
 
4
  AiSummaryVersion = 1
5
  summarization_pipeline = pipeline("summarization", model="csebuetnlp/mT5_multilingual_XLSum", max_length=512, min_length=50)
6
  en_translation_pipe = pipeline("translation", model="Helsinki-NLP/opus-mt-mul-en")
7
+ text_to_tags_pipe = pipeline('text2text-generation', model='PageOrg/t5-small-tagging-text')
8
 
9
 
10
  def summarize(id: str, text: str):