Upload folder using huggingface_hub
Browse files- .gitattributes +3 -9
- README.md +17 -0
- alphabet.json +1 -0
- config.json +116 -0
- language_model/2gram_It_Ref.arpa +510 -0
- language_model/2gram_It_Ref.bin +3 -0
- language_model/attrs.json +1 -0
- language_model/unigrams.txt +241 -0
- preprocessor_config.json +10 -0
- pytorch_model.bin +3 -0
- special_tokens_map.json +6 -0
- tokenizer_config.json +48 -0
- transcriptions_cv7_test.json +3 -0
- transcriptions_cv7_validation.json +3 -0
- vocab.json +48 -0
.gitattributes
CHANGED
@@ -2,34 +2,28 @@
|
|
2 |
*.arrow filter=lfs diff=lfs merge=lfs -text
|
3 |
*.bin filter=lfs diff=lfs merge=lfs -text
|
4 |
*.bz2 filter=lfs diff=lfs merge=lfs -text
|
5 |
-
*.ckpt filter=lfs diff=lfs merge=lfs -text
|
6 |
*.ftz filter=lfs diff=lfs merge=lfs -text
|
7 |
*.gz filter=lfs diff=lfs merge=lfs -text
|
8 |
*.h5 filter=lfs diff=lfs merge=lfs -text
|
9 |
*.joblib filter=lfs diff=lfs merge=lfs -text
|
10 |
*.lfs.* filter=lfs diff=lfs merge=lfs -text
|
11 |
-
*.mlmodel filter=lfs diff=lfs merge=lfs -text
|
12 |
*.model filter=lfs diff=lfs merge=lfs -text
|
13 |
*.msgpack filter=lfs diff=lfs merge=lfs -text
|
14 |
-
*.npy filter=lfs diff=lfs merge=lfs -text
|
15 |
-
*.npz filter=lfs diff=lfs merge=lfs -text
|
16 |
*.onnx filter=lfs diff=lfs merge=lfs -text
|
17 |
*.ot filter=lfs diff=lfs merge=lfs -text
|
18 |
*.parquet filter=lfs diff=lfs merge=lfs -text
|
19 |
*.pb filter=lfs diff=lfs merge=lfs -text
|
20 |
-
*.pickle filter=lfs diff=lfs merge=lfs -text
|
21 |
-
*.pkl filter=lfs diff=lfs merge=lfs -text
|
22 |
*.pt filter=lfs diff=lfs merge=lfs -text
|
23 |
*.pth filter=lfs diff=lfs merge=lfs -text
|
24 |
*.rar filter=lfs diff=lfs merge=lfs -text
|
25 |
-
*.safetensors filter=lfs diff=lfs merge=lfs -text
|
26 |
saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
27 |
*.tar.* filter=lfs diff=lfs merge=lfs -text
|
28 |
-
*.tar filter=lfs diff=lfs merge=lfs -text
|
29 |
*.tflite filter=lfs diff=lfs merge=lfs -text
|
30 |
*.tgz filter=lfs diff=lfs merge=lfs -text
|
31 |
*.wasm filter=lfs diff=lfs merge=lfs -text
|
32 |
*.xz filter=lfs diff=lfs merge=lfs -text
|
33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
-
*.
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
2 |
*.arrow filter=lfs diff=lfs merge=lfs -text
|
3 |
*.bin filter=lfs diff=lfs merge=lfs -text
|
4 |
*.bz2 filter=lfs diff=lfs merge=lfs -text
|
|
|
5 |
*.ftz filter=lfs diff=lfs merge=lfs -text
|
6 |
*.gz filter=lfs diff=lfs merge=lfs -text
|
7 |
*.h5 filter=lfs diff=lfs merge=lfs -text
|
8 |
*.joblib filter=lfs diff=lfs merge=lfs -text
|
9 |
*.lfs.* filter=lfs diff=lfs merge=lfs -text
|
|
|
10 |
*.model filter=lfs diff=lfs merge=lfs -text
|
11 |
*.msgpack filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
12 |
*.onnx filter=lfs diff=lfs merge=lfs -text
|
13 |
*.ot filter=lfs diff=lfs merge=lfs -text
|
14 |
*.parquet filter=lfs diff=lfs merge=lfs -text
|
15 |
*.pb filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
16 |
*.pt filter=lfs diff=lfs merge=lfs -text
|
17 |
*.pth filter=lfs diff=lfs merge=lfs -text
|
18 |
*.rar filter=lfs diff=lfs merge=lfs -text
|
|
|
19 |
saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
20 |
*.tar.* filter=lfs diff=lfs merge=lfs -text
|
|
|
21 |
*.tflite filter=lfs diff=lfs merge=lfs -text
|
22 |
*.tgz filter=lfs diff=lfs merge=lfs -text
|
23 |
*.wasm filter=lfs diff=lfs merge=lfs -text
|
24 |
*.xz filter=lfs diff=lfs merge=lfs -text
|
25 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
26 |
+
*.zstandard filter=lfs diff=lfs merge=lfs -text
|
27 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
28 |
+
transcriptions_cv7_test.json filter=lfs diff=lfs merge=lfs -text
|
29 |
+
transcriptions_cv7_validation.json filter=lfs diff=lfs merge=lfs -text
|
README.md
ADDED
@@ -0,0 +1,17 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
---
|
2 |
+
language:
|
3 |
+
- it
|
4 |
+
license: apache-2.0
|
5 |
+
tags:
|
6 |
+
- automatic-speech-recognition
|
7 |
+
- it
|
8 |
+
datasets:
|
9 |
+
- mozilla-foundation/common_voice_7_0
|
10 |
+
---
|
11 |
+
# exp_w2v2t_it_vp-100k_s449
|
12 |
+
|
13 |
+
Fine-tuned [facebook/wav2vec2-large-100k-voxpopuli](https://huggingface.co/facebook/wav2vec2-large-100k-voxpopuli) for speech recognition using the train split of [Common Voice 7.0 (it)](https://huggingface.co/datasets/mozilla-foundation/common_voice_7_0).
|
14 |
+
When using this model, make sure that your speech input is sampled at 16kHz.
|
15 |
+
|
16 |
+
This model has been fine-tuned by the [HuggingSound](https://github.com/jonatasgrosman/huggingsound) tool.
|
17 |
+
|
alphabet.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"labels": ["", "<s>", "</s>", "\u2047", " ", "'", "-", "a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k", "l", "m", "n", "o", "p", "q", "r", "s", "t", "u", "v", "w", "x", "y", "z", "\u00e0", "\u00e1", "\u00e8", "\u00e9", "\u00ec", "\u00ed", "\u00f2", "\u00f3", "\u00f9", "\u00fa", "\u010d", "\u014d", "\u0161"], "is_bpe": false}
|
config.json
ADDED
@@ -0,0 +1,116 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"_name_or_path": "facebook/wav2vec2-large-100k-voxpopuli",
|
3 |
+
"activation_dropout": 0.05,
|
4 |
+
"adapter_kernel_size": 3,
|
5 |
+
"adapter_stride": 2,
|
6 |
+
"add_adapter": false,
|
7 |
+
"apply_spec_augment": true,
|
8 |
+
"architectures": [
|
9 |
+
"Wav2Vec2ForCTC"
|
10 |
+
],
|
11 |
+
"attention_dropout": 0.05,
|
12 |
+
"bos_token_id": 1,
|
13 |
+
"classifier_proj_size": 256,
|
14 |
+
"codevector_dim": 768,
|
15 |
+
"contrastive_logits_temperature": 0.1,
|
16 |
+
"conv_bias": true,
|
17 |
+
"conv_dim": [
|
18 |
+
512,
|
19 |
+
512,
|
20 |
+
512,
|
21 |
+
512,
|
22 |
+
512,
|
23 |
+
512,
|
24 |
+
512
|
25 |
+
],
|
26 |
+
"conv_kernel": [
|
27 |
+
10,
|
28 |
+
3,
|
29 |
+
3,
|
30 |
+
3,
|
31 |
+
3,
|
32 |
+
2,
|
33 |
+
2
|
34 |
+
],
|
35 |
+
"conv_stride": [
|
36 |
+
5,
|
37 |
+
2,
|
38 |
+
2,
|
39 |
+
2,
|
40 |
+
2,
|
41 |
+
2,
|
42 |
+
2
|
43 |
+
],
|
44 |
+
"ctc_loss_reduction": "sum",
|
45 |
+
"ctc_zero_infinity": false,
|
46 |
+
"diversity_loss_weight": 0.1,
|
47 |
+
"do_stable_layer_norm": true,
|
48 |
+
"eos_token_id": 2,
|
49 |
+
"feat_extract_activation": "gelu",
|
50 |
+
"feat_extract_dropout": 0.0,
|
51 |
+
"feat_extract_norm": "layer",
|
52 |
+
"feat_proj_dropout": 0.05,
|
53 |
+
"feat_quantizer_dropout": 0.0,
|
54 |
+
"final_dropout": 0.05,
|
55 |
+
"gradient_checkpointing": false,
|
56 |
+
"hidden_act": "gelu",
|
57 |
+
"hidden_dropout": 0.05,
|
58 |
+
"hidden_size": 1024,
|
59 |
+
"initializer_range": 0.02,
|
60 |
+
"intermediate_size": 4096,
|
61 |
+
"layer_norm_eps": 1e-05,
|
62 |
+
"layerdrop": 0.05,
|
63 |
+
"mask_channel_length": 10,
|
64 |
+
"mask_channel_min_space": 1,
|
65 |
+
"mask_channel_other": 0.0,
|
66 |
+
"mask_channel_prob": 0.0,
|
67 |
+
"mask_channel_selection": "static",
|
68 |
+
"mask_feature_length": 10,
|
69 |
+
"mask_feature_min_masks": 0,
|
70 |
+
"mask_feature_prob": 0.0,
|
71 |
+
"mask_time_length": 10,
|
72 |
+
"mask_time_min_masks": 2,
|
73 |
+
"mask_time_min_space": 1,
|
74 |
+
"mask_time_other": 0.0,
|
75 |
+
"mask_time_prob": 0.05,
|
76 |
+
"mask_time_selection": "static",
|
77 |
+
"model_type": "wav2vec2",
|
78 |
+
"num_adapter_layers": 3,
|
79 |
+
"num_attention_heads": 16,
|
80 |
+
"num_codevector_groups": 2,
|
81 |
+
"num_codevectors_per_group": 320,
|
82 |
+
"num_conv_pos_embedding_groups": 16,
|
83 |
+
"num_conv_pos_embeddings": 128,
|
84 |
+
"num_feat_extract_layers": 7,
|
85 |
+
"num_hidden_layers": 24,
|
86 |
+
"num_negatives": 100,
|
87 |
+
"output_hidden_size": 1024,
|
88 |
+
"pad_token_id": 0,
|
89 |
+
"proj_codevector_dim": 768,
|
90 |
+
"tdnn_dilation": [
|
91 |
+
1,
|
92 |
+
2,
|
93 |
+
3,
|
94 |
+
1,
|
95 |
+
1
|
96 |
+
],
|
97 |
+
"tdnn_dim": [
|
98 |
+
512,
|
99 |
+
512,
|
100 |
+
512,
|
101 |
+
512,
|
102 |
+
1500
|
103 |
+
],
|
104 |
+
"tdnn_kernel": [
|
105 |
+
5,
|
106 |
+
3,
|
107 |
+
3,
|
108 |
+
1,
|
109 |
+
1
|
110 |
+
],
|
111 |
+
"torch_dtype": "float32",
|
112 |
+
"transformers_version": "4.15.0",
|
113 |
+
"use_weighted_layer_sum": false,
|
114 |
+
"vocab_size": 46,
|
115 |
+
"xvector_output_dim": 512
|
116 |
+
}
|
language_model/2gram_It_Ref.arpa
ADDED
@@ -0,0 +1,510 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
\data\
|
2 |
+
ngram 1=244
|
3 |
+
ngram 2=257
|
4 |
+
|
5 |
+
\1-grams:
|
6 |
+
-2.4316506 <unk> 0
|
7 |
+
0 <s> -0.30103
|
8 |
+
0 </s> -0.30103
|
9 |
+
0 </s> -0.30103
|
10 |
+
-2.4094923 </s> 0
|
11 |
+
-1.9548386 mela -1.5642715
|
12 |
+
-2.4094923 pomodoro -1.5642715
|
13 |
+
-2.4094923 mondo -1.5642715
|
14 |
+
-2.4094923 tentacolo -1.5642715
|
15 |
+
-2.4094923 maschera -1.5642715
|
16 |
+
-2.1308527 cartapesta -1.3222193
|
17 |
+
-2.4094923 gioco -1.5642715
|
18 |
+
-2.4094923 polipo -1.5642715
|
19 |
+
-2.4094923 cuore -1.5642715
|
20 |
+
-2.4094923 prezzemolo -1.5642715
|
21 |
+
-2.4094923 margherita -1.5642715
|
22 |
+
-2.4094923 girotondo -1.2632414
|
23 |
+
-2.1308527 rofe -1.6163005
|
24 |
+
-2.4094923 vufebu -1.6163005
|
25 |
+
-2.4094923 costro -1.6163005
|
26 |
+
-2.4094923 lada -1.6163005
|
27 |
+
-2.4094923 nurto -1.6163005
|
28 |
+
-2.4094923 frimace -1.6163005
|
29 |
+
-2.4094923 canfrosto -1.6163005
|
30 |
+
-2.4094923 presemma -1.6163005
|
31 |
+
-2.4094923 oblimione -1.6163005
|
32 |
+
-2.4094923 ceglirate -1.6163005
|
33 |
+
-2.4094923 banzione -1.6163005
|
34 |
+
-2.4094923 alcelisto -1.2483237
|
35 |
+
-1.7240447 vaso -0.8239087
|
36 |
+
-2.4094923 pace -0.8239087
|
37 |
+
-2.4094923 camera -0.8239087
|
38 |
+
-2.4094923 genitore -0.8239087
|
39 |
+
-2.4094923 mese -0.8239087
|
40 |
+
-2.4094923 cava -0.8239087
|
41 |
+
-2.4094923 parola -0.8239087
|
42 |
+
-2.4094923 bambina -0.8239087
|
43 |
+
-2.4094923 oggi -0.8239087
|
44 |
+
-2.4094923 foca -0.8239087
|
45 |
+
-2.4094923 aprile -0.8239087
|
46 |
+
-2.4094923 carnevale -0.69897
|
47 |
+
-2.1308527 duna -0.8239087
|
48 |
+
-2.4094923 nonno -0.8239087
|
49 |
+
-2.4094923 canile -0.8239087
|
50 |
+
-2.4094923 binocolo -0.8239087
|
51 |
+
-2.4094923 bacio -0.8239087
|
52 |
+
-2.4094923 alga -0.8239087
|
53 |
+
-2.4094923 codice -0.8239087
|
54 |
+
-2.4094923 bandito -0.8239087
|
55 |
+
-2.4094923 vizio -0.8239087
|
56 |
+
-2.4094923 matto -0.8239087
|
57 |
+
-2.4094923 avviso -0.8239087
|
58 |
+
-2.4094923 carnivoro -0.69897
|
59 |
+
-2.1308527 pranzo -0.8239087
|
60 |
+
-2.4094923 foglio -0.8239087
|
61 |
+
-2.4094923 sinistra -0.8239087
|
62 |
+
-2.4094923 bersaglio -0.8239087
|
63 |
+
-2.4094923 strano -0.8239087
|
64 |
+
-2.4094923 prezzo -0.8239087
|
65 |
+
-2.4094923 montagna -0.8239087
|
66 |
+
-2.4094923 mezzogiorno -0.8239087
|
67 |
+
-2.4094923 maestra -0.8239087
|
68 |
+
-2.4094923 piastra -0.8239087
|
69 |
+
-2.4094923 fratello -0.8239087
|
70 |
+
-2.4094923 pescespada -0.8239087
|
71 |
+
-2.4094923 strato -0.8239087
|
72 |
+
-2.4094923 balestra -0.8239087
|
73 |
+
-2.4094923 appuntamento -0.8239087
|
74 |
+
-2.4094923 capoclasse -0.8239087
|
75 |
+
-2.4094923 veglia -0.8239087
|
76 |
+
-2.4094923 risposta -0.8239087
|
77 |
+
-2.4094923 bicicletta -0.8239087
|
78 |
+
-2.4094923 caffetteria -0.8239087
|
79 |
+
-2.4094923 strizza -0.8239087
|
80 |
+
-2.4094923 cassetto -0.8239087
|
81 |
+
-2.4094923 cartellino -0.8239087
|
82 |
+
-2.4094923 insegnamento -0.45593196
|
83 |
+
-1.9548386 vilo -0.72699875
|
84 |
+
-2.4094923 soce -0.72699875
|
85 |
+
-2.4094923 gloveno -0.72699875
|
86 |
+
-2.4094923 munviglio -0.72699875
|
87 |
+
-2.4094923 depe -0.72699875
|
88 |
+
-2.4094923 banse -0.72699875
|
89 |
+
-2.4094923 coritta -0.72699875
|
90 |
+
-2.4094923 prodeglia -0.72699875
|
91 |
+
-2.4094923 arvi -0.72699875
|
92 |
+
-2.4094923 bignalo -0.72699875
|
93 |
+
-2.4094923 flunestro -0.72699875
|
94 |
+
-2.4094923 risognefa -0.72699875
|
95 |
+
-2.4094923 ostro -0.72699875
|
96 |
+
-2.4094923 bismaggo -0.72699875
|
97 |
+
-2.4094923 sirvelone -0.72699875
|
98 |
+
-2.4094923 chedinare -0.72699875
|
99 |
+
-2.4094923 dorca -0.72699875
|
100 |
+
-2.4094923 boleggio -0.72699875
|
101 |
+
-2.4094923 gagliralo -0.72699875
|
102 |
+
-2.4094923 nachipante -0.72699875
|
103 |
+
-2.4094923 nutto -0.72699875
|
104 |
+
-2.4094923 egnomisa -0.72699875
|
105 |
+
-2.4094923 coragresto -0.72699875
|
106 |
+
-2.4094923 lesciacope -0.35902193
|
107 |
+
-2.1308527 caso -0.60206
|
108 |
+
-2.4094923 fame -0.60206
|
109 |
+
-2.4094923 vicino -0.60206
|
110 |
+
-2.4094923 domenica -0.60206
|
111 |
+
-2.4094923 meno -0.60206
|
112 |
+
-2.4094923 bava -0.60206
|
113 |
+
-2.4094923 nuvola -0.60206
|
114 |
+
-2.4094923 signora -0.60206
|
115 |
+
-2.4094923 ecco -0.60206
|
116 |
+
-2.4094923 foga -0.60206
|
117 |
+
-2.4094923 estate -0.60206
|
118 |
+
-2.4094923 ospedale -0.60206
|
119 |
+
-2.4094923 diva -0.60206
|
120 |
+
-2.4094923 notte -0.60206
|
121 |
+
-2.4094923 barile -0.60206
|
122 |
+
-2.4094923 basilico -0.60206
|
123 |
+
-2.4094923 radio -0.60206
|
124 |
+
-2.4094923 anca -0.60206
|
125 |
+
-2.4094923 cofano -0.60206
|
126 |
+
-2.4094923 candito -0.60206
|
127 |
+
-2.4094923 bivio -0.60206
|
128 |
+
-2.4094923 fatto -0.60206
|
129 |
+
-2.4094923 attesa -0.60206
|
130 |
+
-2.4094923 candidato -0.47712123
|
131 |
+
-2.1308527 scarpa -0.60206
|
132 |
+
-2.4094923 figlio -0.60206
|
133 |
+
-2.4094923 palestra -0.60206
|
134 |
+
-2.4094923 fermaglio -0.60206
|
135 |
+
-2.4094923 strada -0.60206
|
136 |
+
-2.4094923 pressa -0.60206
|
137 |
+
-2.4094923 compagno -0.60206
|
138 |
+
-2.4094923 soggiorno -0.60206
|
139 |
+
-2.4094923 giostra -0.60206
|
140 |
+
-2.4094923 mostra -0.60206
|
141 |
+
-2.4094923 castello -0.60206
|
142 |
+
-2.4094923 cassaforte -0.60206
|
143 |
+
-2.4094923 strage -0.60206
|
144 |
+
-2.4094923 minestra -0.60206
|
145 |
+
-2.4094923 appartamento -0.60206
|
146 |
+
-2.4094923 caposcuola -0.60206
|
147 |
+
-2.4094923 taglia -0.60206
|
148 |
+
-2.4094923 farfalla -0.60206
|
149 |
+
-2.4094923 medaglietta -0.60206
|
150 |
+
-2.4094923 caffettiera -0.60206
|
151 |
+
-2.4094923 stringa -0.60206
|
152 |
+
-2.4094923 gessetto -0.60206
|
153 |
+
-2.4094923 cartoncino -0.60206
|
154 |
+
-2.4094923 arrossamento -0.47712123
|
155 |
+
-2.4094923 vuso -0.5228787
|
156 |
+
-2.4094923 seca -0.5228787
|
157 |
+
-2.4094923 glofeno -0.5228787
|
158 |
+
-2.4094923 ranviglio -0.5228787
|
159 |
+
-2.4094923 dape -0.5228787
|
160 |
+
-2.4094923 larse -0.5228787
|
161 |
+
-2.4094923 dorizza -0.5228787
|
162 |
+
-2.4094923 pradeglia -0.5228787
|
163 |
+
-2.4094923 arlo -0.5228787
|
164 |
+
-2.4094923 begnole -0.5228787
|
165 |
+
-2.4094923 flavestro -0.5228787
|
166 |
+
-2.4094923 risagnera -0.5228787
|
167 |
+
-2.4094923 istro -0.5228787
|
168 |
+
-2.4094923 bismoggo -0.5228787
|
169 |
+
-2.4094923 sirfelone -0.5228787
|
170 |
+
-2.4094923 chinadire -0.5228787
|
171 |
+
-2.4094923 borte -0.5228787
|
172 |
+
-2.4094923 foneggio -0.5228787
|
173 |
+
-2.4094923 caglivaro -0.5228787
|
174 |
+
-2.4094923 pachetenta -0.5228787
|
175 |
+
-2.4094923 bitto -0.5228787
|
176 |
+
-2.4094923 egnamisa -0.5228787
|
177 |
+
-2.4094923 caregresto -0.5228787
|
178 |
+
-2.4094923 vusciacope -0.30103
|
179 |
+
-1.8242877 naso -0.72699875
|
180 |
+
-2.4094923 pane -0.72699875
|
181 |
+
-2.4094923 lavoro -0.72699875
|
182 |
+
-2.4094923 telefono -0.72699875
|
183 |
+
-2.4094923 mano -0.72699875
|
184 |
+
-2.4094923 fava -0.72699875
|
185 |
+
-2.4094923 natura -0.72699875
|
186 |
+
-2.4094923 persona -0.72699875
|
187 |
+
-2.4094923 anno -0.72699875
|
188 |
+
-2.4094923 foce -0.72699875
|
189 |
+
-2.4094923 angolo -0.72699875
|
190 |
+
-2.4094923 personale -0.72699875
|
191 |
+
-2.4094923 diga -0.72699875
|
192 |
+
-2.4094923 gatto -0.72699875
|
193 |
+
-2.4094923 badile -0.72699875
|
194 |
+
-2.4094923 bonifico -0.72699875
|
195 |
+
-2.4094923 linea -0.72699875
|
196 |
+
-2.4094923 alba -0.72699875
|
197 |
+
-2.4094923 cometa -0.72699875
|
198 |
+
-2.4094923 pentito -0.72699875
|
199 |
+
-2.4094923 tizio -0.72699875
|
200 |
+
-2.4094923 patto -0.72699875
|
201 |
+
-2.4094923 offesa -0.72699875
|
202 |
+
-2.4094923 calderone -0.72699875
|
203 |
+
-2.4094923 frutta -0.72699875
|
204 |
+
-2.4094923 foglia -0.72699875
|
205 |
+
-2.4094923 finestra -0.72699875
|
206 |
+
-2.4094923 ventaglio -0.72699875
|
207 |
+
-2.4094923 strega -0.72699875
|
208 |
+
-2.4094923 pronto -0.72699875
|
209 |
+
-2.4094923 campagna -0.72699875
|
210 |
+
-2.4094923 buongiorno -0.72699875
|
211 |
+
-2.4094923 destra -0.72699875
|
212 |
+
-2.4094923 lastra -0.72699875
|
213 |
+
-2.4094923 cappello -0.72699875
|
214 |
+
-2.4094923 strofa -0.72699875
|
215 |
+
-2.4094923 ministra -0.72699875
|
216 |
+
-2.4094923 inquinamento -0.72699875
|
217 |
+
-2.4094923 caposquadra -0.72699875
|
218 |
+
-2.4094923 tiglio -0.72699875
|
219 |
+
-2.4094923 fantasma -0.72699875
|
220 |
+
-2.4094923 motocicletta -0.72699875
|
221 |
+
-2.4094923 caffelatte -0.72699875
|
222 |
+
-2.4094923 strillo -0.72699875
|
223 |
+
-2.4094923 rossetto -0.72699875
|
224 |
+
-2.4094923 cartellone -0.72699875
|
225 |
+
-2.4094923 sollevamento -0.72699875
|
226 |
+
-2.4094923 valo -0.72699875
|
227 |
+
-2.4094923 save -0.72699875
|
228 |
+
-2.4094923 glovelo -0.72699875
|
229 |
+
-2.4094923 sonviglio -0.72699875
|
230 |
+
-2.4094923 dete -0.72699875
|
231 |
+
-2.4094923 farde -0.72699875
|
232 |
+
-2.4094923 poritta -0.72699875
|
233 |
+
-2.4094923 prodiglia -0.72699875
|
234 |
+
-2.4094923 anci -0.72699875
|
235 |
+
-2.4094923 bignaso -0.72699875
|
236 |
+
-2.4094923 flenestro -0.72699875
|
237 |
+
-2.4094923 risugnela -0.72699875
|
238 |
+
-2.4094923 ustro -0.72699875
|
239 |
+
-2.4094923 bismacco -0.72699875
|
240 |
+
-2.4094923 sarvelone -0.72699875
|
241 |
+
-2.4094923 chetinere -0.72699875
|
242 |
+
-2.4094923 binca -0.72699875
|
243 |
+
-2.4094923 lonaggio -0.72699875
|
244 |
+
-2.4094923 cagliralo -0.72699875
|
245 |
+
-2.4094923 machidante -0.72699875
|
246 |
+
-2.4094923 tucca -0.72699875
|
247 |
+
-2.4094923 ignemisa -0.72699875
|
248 |
+
-2.4094923 coregresta -0.72699875
|
249 |
+
-2.4094923 nisciacope -0.42596874
|
250 |
+
|
251 |
+
\2-grams:
|
252 |
+
-1.1940873 nisciacope </s>
|
253 |
+
-0.29623765 <s> mela
|
254 |
+
-0.25558913 girotondo mela
|
255 |
+
-0.47981584 alcelisto mela
|
256 |
+
-0.011961477 mela pomodoro
|
257 |
+
-0.011961477 pomodoro mondo
|
258 |
+
-0.011961477 mondo tentacolo
|
259 |
+
-0.011961477 tentacolo maschera
|
260 |
+
-0.011918825 maschera cartapesta
|
261 |
+
-0.08943576 cappello cartapesta
|
262 |
+
-0.07089193 cartapesta gioco
|
263 |
+
-0.011961477 gioco polipo
|
264 |
+
-0.011961477 polipo cuore
|
265 |
+
-0.011961477 cuore prezzemolo
|
266 |
+
-0.011961477 prezzemolo margherita
|
267 |
+
-0.011961477 margherita girotondo
|
268 |
+
-0.4074761 girotondo rofe
|
269 |
+
-0.21806063 alcelisto rofe
|
270 |
+
-0.010594367 rofe vufebu
|
271 |
+
-0.010594367 vufebu costro
|
272 |
+
-0.010594367 costro lada
|
273 |
+
-0.010594367 lada nurto
|
274 |
+
-0.010594367 nurto frimace
|
275 |
+
-0.010594367 frimace canfrosto
|
276 |
+
-0.010594367 canfrosto presemma
|
277 |
+
-0.010594367 presemma oblimione
|
278 |
+
-0.010594367 oblimione ceglirate
|
279 |
+
-0.010594367 ceglirate banzione
|
280 |
+
-0.010594367 banzione alcelisto
|
281 |
+
-2.0395193 alcelisto vaso
|
282 |
+
-0.87530303 lesciacope vaso
|
283 |
+
-1.047566 candidato vaso
|
284 |
+
-0.96082795 vusciacope vaso
|
285 |
+
-0.3520508 nisciacope vaso
|
286 |
+
-0.07028266 vaso pace
|
287 |
+
-0.07028266 pace camera
|
288 |
+
-0.07028266 camera genitore
|
289 |
+
-0.07028266 genitore mese
|
290 |
+
-0.07028266 mese cava
|
291 |
+
-0.07028266 cava parola
|
292 |
+
-0.07028266 parola bambina
|
293 |
+
-0.07028266 bambina oggi
|
294 |
+
-0.07028266 oggi foca
|
295 |
+
-0.07028266 foca aprile
|
296 |
+
-0.07028266 aprile carnevale
|
297 |
+
-0.12408276 carnevale duna
|
298 |
+
-1.279101 insegnamento duna
|
299 |
+
-0.07028266 duna nonno
|
300 |
+
-0.07028266 nonno canile
|
301 |
+
-0.07028266 canile binocolo
|
302 |
+
-0.07028266 binocolo bacio
|
303 |
+
-0.07028266 bacio alga
|
304 |
+
-0.07028266 alga codice
|
305 |
+
-0.07028266 codice bandito
|
306 |
+
-0.07028266 bandito vizio
|
307 |
+
-0.07028266 vizio matto
|
308 |
+
-0.07028266 matto avviso
|
309 |
+
-0.07028266 avviso carnivoro
|
310 |
+
-1.2883639 carnevale pranzo
|
311 |
+
-0.12408276 carnivoro pranzo
|
312 |
+
-0.07028266 pranzo foglio
|
313 |
+
-0.07028266 foglio sinistra
|
314 |
+
-0.07028266 sinistra bersaglio
|
315 |
+
-0.07028266 bersaglio strano
|
316 |
+
-0.07028266 strano prezzo
|
317 |
+
-0.07028266 prezzo montagna
|
318 |
+
-0.07028266 montagna mezzogiorno
|
319 |
+
-0.07028266 mezzogiorno maestra
|
320 |
+
-0.07028266 maestra piastra
|
321 |
+
-0.07028266 piastra fratello
|
322 |
+
-0.07028266 fratello pescespada
|
323 |
+
-0.07028266 pescespada strato
|
324 |
+
-0.07028266 strato balestra
|
325 |
+
-0.07028266 balestra appuntamento
|
326 |
+
-0.07028266 appuntamento capoclasse
|
327 |
+
-0.07028266 capoclasse veglia
|
328 |
+
-0.07028266 veglia risposta
|
329 |
+
-0.07028266 risposta bicicletta
|
330 |
+
-0.07028266 bicicletta caffetteria
|
331 |
+
-0.07028266 caffetteria strizza
|
332 |
+
-0.07028266 strizza cassetto
|
333 |
+
-0.07028266 cassetto cartellino
|
334 |
+
-0.07028266 cartellino insegnamento
|
335 |
+
-1.28217 carnivoro vilo
|
336 |
+
-0.34305558 insegnamento vilo
|
337 |
+
-1.0603212 arrossamento vilo
|
338 |
+
-0.08978643 vilo soce
|
339 |
+
-0.08978643 soce gloveno
|
340 |
+
-0.08978643 gloveno munviglio
|
341 |
+
-0.08978643 munviglio depe
|
342 |
+
-0.08978643 depe banse
|
343 |
+
-0.08978643 banse coritta
|
344 |
+
-0.08978643 coritta prodeglia
|
345 |
+
-0.08978643 prodeglia arvi
|
346 |
+
-0.08978643 arvi bignalo
|
347 |
+
-0.08978643 bignalo flunestro
|
348 |
+
-0.08978643 flunestro risognefa
|
349 |
+
-0.08978643 risognefa ostro
|
350 |
+
-0.08978643 ostro bismaggo
|
351 |
+
-0.08978643 bismaggo sirvelone
|
352 |
+
-0.08978643 sirvelone chedinare
|
353 |
+
-0.08978643 chedinare dorca
|
354 |
+
-0.08978643 dorca boleggio
|
355 |
+
-0.08978643 boleggio gagliralo
|
356 |
+
-0.08978643 gagliralo nachipante
|
357 |
+
-0.08978643 nachipante nutto
|
358 |
+
-0.08978643 nutto egnomisa
|
359 |
+
-0.08978643 egnomisa coragresto
|
360 |
+
-0.08978643 coragresto lesciacope
|
361 |
+
-0.5006747 lesciacope caso
|
362 |
+
-0.6910105 vusciacope caso
|
363 |
+
-0.12437523 caso fame
|
364 |
+
-0.12437523 fame vicino
|
365 |
+
-0.12437523 vicino domenica
|
366 |
+
-0.12437523 domenica meno
|
367 |
+
-0.12437523 meno bava
|
368 |
+
-0.12437523 bava nuvola
|
369 |
+
-0.12437523 nuvola signora
|
370 |
+
-0.12437523 signora ecco
|
371 |
+
-0.12437523 ecco foga
|
372 |
+
-0.12437523 foga estate
|
373 |
+
-0.12437523 estate ospedale
|
374 |
+
-0.12437523 ospedale diva
|
375 |
+
-0.12437523 diva notte
|
376 |
+
-0.12437523 notte barile
|
377 |
+
-0.12437523 barile basilico
|
378 |
+
-0.12437523 basilico radio
|
379 |
+
-0.12437523 radio anca
|
380 |
+
-0.12437523 anca cofano
|
381 |
+
-0.12437523 cofano candito
|
382 |
+
-0.12437523 candito bivio
|
383 |
+
-0.12437523 bivio fatto
|
384 |
+
-0.12437523 fatto attesa
|
385 |
+
-0.12437523 attesa candidato
|
386 |
+
-1.279101 insegnamento scarpa
|
387 |
+
-0.232251 candidato scarpa
|
388 |
+
-0.12437523 scarpa figlio
|
389 |
+
-0.12437523 figlio palestra
|
390 |
+
-0.12437523 palestra fermaglio
|
391 |
+
-0.12437523 fermaglio strada
|
392 |
+
-0.12437523 strada pressa
|
393 |
+
-0.12437523 pressa compagno
|
394 |
+
-0.12437523 compagno soggiorno
|
395 |
+
-0.12437523 soggiorno giostra
|
396 |
+
-0.12437523 giostra mostra
|
397 |
+
-0.12437523 mostra castello
|
398 |
+
-0.12437523 castello cassaforte
|
399 |
+
-0.12437523 cassaforte strage
|
400 |
+
-0.12437523 strage minestra
|
401 |
+
-0.12437523 minestra appartamento
|
402 |
+
-0.12437523 appartamento caposcuola
|
403 |
+
-0.12437523 caposcuola taglia
|
404 |
+
-0.12437523 taglia farfalla
|
405 |
+
-0.12437523 farfalla medaglietta
|
406 |
+
-0.12437523 medaglietta caffettiera
|
407 |
+
-0.12437523 caffettiera stringa
|
408 |
+
-0.12437523 stringa gessetto
|
409 |
+
-0.12437523 gessetto cartoncino
|
410 |
+
-0.12437523 cartoncino arrossamento
|
411 |
+
-0.2331177 arrossamento vuso
|
412 |
+
-0.15417762 vuso seca
|
413 |
+
-0.15417762 seca glofeno
|
414 |
+
-0.15417762 glofeno ranviglio
|
415 |
+
-0.15417762 ranviglio dape
|
416 |
+
-0.15417762 dape larse
|
417 |
+
-0.15417762 larse dorizza
|
418 |
+
-0.15417762 dorizza pradeglia
|
419 |
+
-0.15417762 pradeglia arlo
|
420 |
+
-0.15417762 arlo begnole
|
421 |
+
-0.15417762 begnole flavestro
|
422 |
+
-0.15417762 flavestro risagnera
|
423 |
+
-0.15417762 risagnera istro
|
424 |
+
-0.15417762 istro bismoggo
|
425 |
+
-0.15417762 bismoggo sirfelone
|
426 |
+
-0.15417762 sirfelone chinadire
|
427 |
+
-0.15417762 chinadire borte
|
428 |
+
-0.15417762 borte foneggio
|
429 |
+
-0.15417762 foneggio caglivaro
|
430 |
+
-0.15417762 caglivaro pachetenta
|
431 |
+
-0.15417762 pachetenta bitto
|
432 |
+
-0.15417762 bitto egnamisa
|
433 |
+
-0.15417762 egnamisa caregresto
|
434 |
+
-0.15417762 caregresto vusciacope
|
435 |
+
-0.9777968 insegnamento naso
|
436 |
+
-0.8808868 lesciacope naso
|
437 |
+
-0.68299556 vusciacope naso
|
438 |
+
-0.88399005 nisciacope naso
|
439 |
+
-0.08978643 naso pane
|
440 |
+
-0.08978643 pane lavoro
|
441 |
+
-0.08978643 lavoro telefono
|
442 |
+
-0.08978643 telefono mano
|
443 |
+
-0.08978643 mano fava
|
444 |
+
-0.08978643 fava natura
|
445 |
+
-0.08978643 natura persona
|
446 |
+
-0.08978643 persona anno
|
447 |
+
-0.08978643 anno foce
|
448 |
+
-0.08978643 foce angolo
|
449 |
+
-0.08978643 angolo personale
|
450 |
+
-0.08978643 personale diga
|
451 |
+
-0.08978643 diga gatto
|
452 |
+
-0.08978643 gatto badile
|
453 |
+
-0.08978643 badile bonifico
|
454 |
+
-0.08978643 bonifico linea
|
455 |
+
-0.08978643 linea alba
|
456 |
+
-0.08978643 alba cometa
|
457 |
+
-0.08978643 cometa pentito
|
458 |
+
-0.08978643 pentito tizio
|
459 |
+
-0.08978643 tizio patto
|
460 |
+
-0.08978643 patto offesa
|
461 |
+
-0.08978643 offesa calderone
|
462 |
+
-0.08978643 calderone frutta
|
463 |
+
-0.08978643 frutta foglia
|
464 |
+
-0.08978643 foglia finestra
|
465 |
+
-0.08978643 finestra ventaglio
|
466 |
+
-0.08978643 ventaglio strega
|
467 |
+
-0.08978643 strega pronto
|
468 |
+
-0.08978643 pronto campagna
|
469 |
+
-0.08978643 campagna buongiorno
|
470 |
+
-0.08978643 buongiorno destra
|
471 |
+
-0.08978643 destra lastra
|
472 |
+
-0.08978643 lastra cappello
|
473 |
+
-0.98564714 cartapesta strofa
|
474 |
+
-0.08978643 strofa ministra
|
475 |
+
-0.08978643 ministra inquinamento
|
476 |
+
-0.08978643 inquinamento caposquadra
|
477 |
+
-0.08978643 caposquadra tiglio
|
478 |
+
-0.08978643 tiglio fantasma
|
479 |
+
-0.08978643 fantasma motocicletta
|
480 |
+
-0.08978643 motocicletta caffelatte
|
481 |
+
-0.08978643 caffelatte strillo
|
482 |
+
-0.08978643 strillo rossetto
|
483 |
+
-0.08978643 rossetto cartellone
|
484 |
+
-0.08978643 cartellone sollevamento
|
485 |
+
-0.08978643 sollevamento valo
|
486 |
+
-0.08978643 valo save
|
487 |
+
-0.08978643 save glovelo
|
488 |
+
-0.08978643 glovelo sonviglio
|
489 |
+
-0.08978643 sonviglio dete
|
490 |
+
-0.08978643 dete farde
|
491 |
+
-0.08978643 farde poritta
|
492 |
+
-0.08978643 poritta prodiglia
|
493 |
+
-0.08978643 prodiglia anci
|
494 |
+
-0.08978643 anci bignaso
|
495 |
+
-0.08978643 bignaso flenestro
|
496 |
+
-0.08978643 flenestro risugnela
|
497 |
+
-0.08978643 risugnela ustro
|
498 |
+
-0.08978643 ustro bismacco
|
499 |
+
-0.08978643 bismacco sarvelone
|
500 |
+
-0.08978643 sarvelone chetinere
|
501 |
+
-0.08978643 chetinere binca
|
502 |
+
-0.08978643 binca lonaggio
|
503 |
+
-0.08978643 lonaggio cagliralo
|
504 |
+
-0.08978643 cagliralo machidante
|
505 |
+
-0.08978643 machidante tucca
|
506 |
+
-0.08978643 tucca ignemisa
|
507 |
+
-0.08978643 ignemisa coregresta
|
508 |
+
-0.08978643 coregresta nisciacope
|
509 |
+
|
510 |
+
\end\
|
language_model/2gram_It_Ref.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f498476811af80757fe360dcae5de0ab06fa73fdec859cbc4e824160129e3540
|
3 |
+
size 13061
|
language_model/attrs.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"alpha": 0.5, "beta": 1.5, "unk_score_offset": -10.0, "score_boundary": true}
|
language_model/unigrams.txt
ADDED
@@ -0,0 +1,241 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
</s>
|
2 |
+
<s>
|
3 |
+
alba
|
4 |
+
alcelisto
|
5 |
+
alga
|
6 |
+
anca
|
7 |
+
anci
|
8 |
+
angolo
|
9 |
+
anno
|
10 |
+
appartamento
|
11 |
+
appuntamento
|
12 |
+
aprile
|
13 |
+
arlo
|
14 |
+
arrossamento
|
15 |
+
arvi
|
16 |
+
attesa
|
17 |
+
avviso
|
18 |
+
bacio
|
19 |
+
badile
|
20 |
+
balestra
|
21 |
+
bambina
|
22 |
+
bandito
|
23 |
+
banse
|
24 |
+
banzione
|
25 |
+
barile
|
26 |
+
basilico
|
27 |
+
bava
|
28 |
+
begnole
|
29 |
+
bersaglio
|
30 |
+
bicicletta
|
31 |
+
bignalo
|
32 |
+
bignaso
|
33 |
+
binca
|
34 |
+
binocolo
|
35 |
+
bismacco
|
36 |
+
bismaggo
|
37 |
+
bismoggo
|
38 |
+
bitto
|
39 |
+
bivio
|
40 |
+
boleggio
|
41 |
+
bonifico
|
42 |
+
borte
|
43 |
+
buongiorno
|
44 |
+
caffelatte
|
45 |
+
caffetteria
|
46 |
+
caffettiera
|
47 |
+
cagliralo
|
48 |
+
caglivaro
|
49 |
+
calderone
|
50 |
+
camera
|
51 |
+
campagna
|
52 |
+
candidato
|
53 |
+
candito
|
54 |
+
canfrosto
|
55 |
+
canile
|
56 |
+
capoclasse
|
57 |
+
caposcuola
|
58 |
+
caposquadra
|
59 |
+
cappello
|
60 |
+
caregresto
|
61 |
+
carnevale
|
62 |
+
carnivoro
|
63 |
+
cartapesta
|
64 |
+
cartellino
|
65 |
+
cartellone
|
66 |
+
cartoncino
|
67 |
+
caso
|
68 |
+
cassaforte
|
69 |
+
cassetto
|
70 |
+
castello
|
71 |
+
cava
|
72 |
+
ceglirate
|
73 |
+
chedinare
|
74 |
+
chetinere
|
75 |
+
chinadire
|
76 |
+
codice
|
77 |
+
cofano
|
78 |
+
cometa
|
79 |
+
compagno
|
80 |
+
coragresto
|
81 |
+
coregresta
|
82 |
+
coritta
|
83 |
+
costro
|
84 |
+
cuore
|
85 |
+
dape
|
86 |
+
depe
|
87 |
+
destra
|
88 |
+
dete
|
89 |
+
diga
|
90 |
+
diva
|
91 |
+
domenica
|
92 |
+
dorca
|
93 |
+
dorizza
|
94 |
+
duna
|
95 |
+
ecco
|
96 |
+
egnamisa
|
97 |
+
egnomisa
|
98 |
+
estate
|
99 |
+
fame
|
100 |
+
fantasma
|
101 |
+
farde
|
102 |
+
farfalla
|
103 |
+
fatto
|
104 |
+
fava
|
105 |
+
fermaglio
|
106 |
+
figlio
|
107 |
+
finestra
|
108 |
+
flavestro
|
109 |
+
flenestro
|
110 |
+
flunestro
|
111 |
+
foca
|
112 |
+
foce
|
113 |
+
foga
|
114 |
+
foglia
|
115 |
+
foglio
|
116 |
+
foneggio
|
117 |
+
fratello
|
118 |
+
frimace
|
119 |
+
frutta
|
120 |
+
gagliralo
|
121 |
+
gatto
|
122 |
+
genitore
|
123 |
+
gessetto
|
124 |
+
gioco
|
125 |
+
giostra
|
126 |
+
girotondo
|
127 |
+
glofeno
|
128 |
+
glovelo
|
129 |
+
gloveno
|
130 |
+
ignemisa
|
131 |
+
inquinamento
|
132 |
+
insegnamento
|
133 |
+
istro
|
134 |
+
lada
|
135 |
+
larse
|
136 |
+
lastra
|
137 |
+
lavoro
|
138 |
+
lesciacope
|
139 |
+
linea
|
140 |
+
lonaggio
|
141 |
+
machidante
|
142 |
+
maestra
|
143 |
+
mano
|
144 |
+
margherita
|
145 |
+
maschera
|
146 |
+
matto
|
147 |
+
medaglietta
|
148 |
+
mela
|
149 |
+
meno
|
150 |
+
mese
|
151 |
+
mezzogiorno
|
152 |
+
minestra
|
153 |
+
ministra
|
154 |
+
mondo
|
155 |
+
montagna
|
156 |
+
mostra
|
157 |
+
motocicletta
|
158 |
+
munviglio
|
159 |
+
nachipante
|
160 |
+
naso
|
161 |
+
natura
|
162 |
+
nisciacope
|
163 |
+
nonno
|
164 |
+
notte
|
165 |
+
nurto
|
166 |
+
nutto
|
167 |
+
nuvola
|
168 |
+
oblimione
|
169 |
+
offesa
|
170 |
+
oggi
|
171 |
+
ospedale
|
172 |
+
ostro
|
173 |
+
pace
|
174 |
+
pachetenta
|
175 |
+
palestra
|
176 |
+
pane
|
177 |
+
parola
|
178 |
+
patto
|
179 |
+
pentito
|
180 |
+
persona
|
181 |
+
personale
|
182 |
+
pescespada
|
183 |
+
piastra
|
184 |
+
polipo
|
185 |
+
pomodoro
|
186 |
+
poritta
|
187 |
+
pradeglia
|
188 |
+
pranzo
|
189 |
+
presemma
|
190 |
+
pressa
|
191 |
+
prezzemolo
|
192 |
+
prezzo
|
193 |
+
prodeglia
|
194 |
+
prodiglia
|
195 |
+
pronto
|
196 |
+
radio
|
197 |
+
ranviglio
|
198 |
+
risagnera
|
199 |
+
risognefa
|
200 |
+
risposta
|
201 |
+
risugnela
|
202 |
+
rofe
|
203 |
+
rossetto
|
204 |
+
sarvelone
|
205 |
+
save
|
206 |
+
scarpa
|
207 |
+
seca
|
208 |
+
signora
|
209 |
+
sinistra
|
210 |
+
sirfelone
|
211 |
+
sirvelone
|
212 |
+
soce
|
213 |
+
soggiorno
|
214 |
+
sollevamento
|
215 |
+
sonviglio
|
216 |
+
strada
|
217 |
+
strage
|
218 |
+
strano
|
219 |
+
strato
|
220 |
+
strega
|
221 |
+
strillo
|
222 |
+
stringa
|
223 |
+
strizza
|
224 |
+
strofa
|
225 |
+
taglia
|
226 |
+
telefono
|
227 |
+
tentacolo
|
228 |
+
tiglio
|
229 |
+
tizio
|
230 |
+
tucca
|
231 |
+
ustro
|
232 |
+
valo
|
233 |
+
vaso
|
234 |
+
veglia
|
235 |
+
ventaglio
|
236 |
+
vicino
|
237 |
+
vilo
|
238 |
+
vizio
|
239 |
+
vufebu
|
240 |
+
vusciacope
|
241 |
+
vuso
|
preprocessor_config.json
ADDED
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"do_normalize": true,
|
3 |
+
"feature_extractor_type": "Wav2Vec2FeatureExtractor",
|
4 |
+
"feature_size": 1,
|
5 |
+
"padding_side": "right",
|
6 |
+
"padding_value": 0,
|
7 |
+
"processor_class": "Wav2Vec2ProcessorWithLM",
|
8 |
+
"return_attention_mask": true,
|
9 |
+
"sampling_rate": 16000
|
10 |
+
}
|
pytorch_model.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d7281f836a6c24a0cb01dcd6abcaccc1d05af48807a5138b3f39c235904e0d7f
|
3 |
+
size 1262112241
|
special_tokens_map.json
ADDED
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"bos_token": "<s>",
|
3 |
+
"eos_token": "</s>",
|
4 |
+
"pad_token": "<pad>",
|
5 |
+
"unk_token": "<unk>"
|
6 |
+
}
|
tokenizer_config.json
ADDED
@@ -0,0 +1,48 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"added_tokens_decoder": {
|
3 |
+
"0": {
|
4 |
+
"content": "<pad>",
|
5 |
+
"lstrip": true,
|
6 |
+
"normalized": false,
|
7 |
+
"rstrip": true,
|
8 |
+
"single_word": false,
|
9 |
+
"special": false
|
10 |
+
},
|
11 |
+
"1": {
|
12 |
+
"content": "<s>",
|
13 |
+
"lstrip": true,
|
14 |
+
"normalized": false,
|
15 |
+
"rstrip": true,
|
16 |
+
"single_word": false,
|
17 |
+
"special": false
|
18 |
+
},
|
19 |
+
"2": {
|
20 |
+
"content": "</s>",
|
21 |
+
"lstrip": true,
|
22 |
+
"normalized": false,
|
23 |
+
"rstrip": true,
|
24 |
+
"single_word": false,
|
25 |
+
"special": false
|
26 |
+
},
|
27 |
+
"3": {
|
28 |
+
"content": "<unk>",
|
29 |
+
"lstrip": true,
|
30 |
+
"normalized": false,
|
31 |
+
"rstrip": true,
|
32 |
+
"single_word": false,
|
33 |
+
"special": false
|
34 |
+
}
|
35 |
+
},
|
36 |
+
"bos_token": "<s>",
|
37 |
+
"clean_up_tokenization_spaces": true,
|
38 |
+
"do_lower_case": false,
|
39 |
+
"eos_token": "</s>",
|
40 |
+
"model_max_length": 1000000000000000019884624838656,
|
41 |
+
"pad_token": "<pad>",
|
42 |
+
"processor_class": "Wav2Vec2ProcessorWithLM",
|
43 |
+
"replace_word_delimiter_char": " ",
|
44 |
+
"target_lang": null,
|
45 |
+
"tokenizer_class": "Wav2Vec2CTCTokenizer",
|
46 |
+
"unk_token": "<unk>",
|
47 |
+
"word_delimiter_token": "|"
|
48 |
+
}
|
transcriptions_cv7_test.json
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:0e7ccd1835ec95183cc10c41dddf4532f3700c79d1d9979197242a697c24ce54
|
3 |
+
size 78764122
|
transcriptions_cv7_validation.json
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:13ec28e9aa51878172c99ac03e1d68611f1a37bc6acd12fa96b22a2f474e1a3c
|
3 |
+
size 78184898
|
vocab.json
ADDED
@@ -0,0 +1,48 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"'": 5,
|
3 |
+
"-": 6,
|
4 |
+
"</s>": 2,
|
5 |
+
"<pad>": 0,
|
6 |
+
"<s>": 1,
|
7 |
+
"<unk>": 3,
|
8 |
+
"a": 7,
|
9 |
+
"b": 8,
|
10 |
+
"c": 9,
|
11 |
+
"d": 10,
|
12 |
+
"e": 11,
|
13 |
+
"f": 12,
|
14 |
+
"g": 13,
|
15 |
+
"h": 14,
|
16 |
+
"i": 15,
|
17 |
+
"j": 16,
|
18 |
+
"k": 17,
|
19 |
+
"l": 18,
|
20 |
+
"m": 19,
|
21 |
+
"n": 20,
|
22 |
+
"o": 21,
|
23 |
+
"p": 22,
|
24 |
+
"q": 23,
|
25 |
+
"r": 24,
|
26 |
+
"s": 25,
|
27 |
+
"t": 26,
|
28 |
+
"u": 27,
|
29 |
+
"v": 28,
|
30 |
+
"w": 29,
|
31 |
+
"x": 30,
|
32 |
+
"y": 31,
|
33 |
+
"z": 32,
|
34 |
+
"|": 4,
|
35 |
+
"à": 33,
|
36 |
+
"á": 34,
|
37 |
+
"è": 35,
|
38 |
+
"é": 36,
|
39 |
+
"ì": 37,
|
40 |
+
"í": 38,
|
41 |
+
"ò": 39,
|
42 |
+
"ó": 40,
|
43 |
+
"ù": 41,
|
44 |
+
"ú": 42,
|
45 |
+
"č": 43,
|
46 |
+
"ō": 44,
|
47 |
+
"š": 45
|
48 |
+
}
|