gongting commited on Jan 18

Commit

a3caaef

verified ·

1 Parent(s): 50c56a5

Delete https:

Browse files

Files changed (20) hide show

https:/huggingface.co/gongting/CosyVoice2-0.5B/tree/main/CosyVoice-BlankEN/config.json +0 -27
https:/huggingface.co/gongting/CosyVoice2-0.5B/tree/main/CosyVoice-BlankEN/generation_config.json +0 -14
https:/huggingface.co/gongting/CosyVoice2-0.5B/tree/main/CosyVoice-BlankEN/merges.txt +0 -0
https:/huggingface.co/gongting/CosyVoice2-0.5B/tree/main/CosyVoice-BlankEN/model.safetensors +0 -3
https:/huggingface.co/gongting/CosyVoice2-0.5B/tree/main/CosyVoice-BlankEN/tokenizer_config.json +0 -40
https:/huggingface.co/gongting/CosyVoice2-0.5B/tree/main/CosyVoice-BlankEN/vocab.json +0 -0
https:/huggingface.co/gongting/CosyVoice2-0.5B/tree/main/asset/dingding.png +0 -0
https:/huggingface.co/gongting/CosyVoice2-0.5B/tree/main/campplus.onnx +0 -3
https:/huggingface.co/gongting/CosyVoice2-0.5B/tree/main/configuration.json +0 -1
https:/huggingface.co/gongting/CosyVoice2-0.5B/tree/main/cosyvoice.yaml +0 -140
https:/huggingface.co/gongting/CosyVoice2-0.5B/tree/main/flow.decoder.estimator.fp16.a10.plan +0 -3
https:/huggingface.co/gongting/CosyVoice2-0.5B/tree/main/flow.decoder.estimator.fp16.l20.plan +0 -3
https:/huggingface.co/gongting/CosyVoice2-0.5B/tree/main/flow.decoder.estimator.fp16.v100.plan +0 -3
https:/huggingface.co/gongting/CosyVoice2-0.5B/tree/main/flow.decoder.estimator.fp32.onnx +0 -3
https:/huggingface.co/gongting/CosyVoice2-0.5B/tree/main/flow.encoder.fp16.zip +0 -3
https:/huggingface.co/gongting/CosyVoice2-0.5B/tree/main/flow.encoder.fp32.zip +0 -3
https:/huggingface.co/gongting/CosyVoice2-0.5B/tree/main/flow.pt +0 -3
https:/huggingface.co/gongting/CosyVoice2-0.5B/tree/main/hift.pt +0 -3
https:/huggingface.co/gongting/CosyVoice2-0.5B/tree/main/llm.pt +0 -3
https:/huggingface.co/gongting/CosyVoice2-0.5B/tree/main/speech_tokenizer_v2.onnx +0 -3

https:/huggingface.co/gongting/CosyVoice2-0.5B/tree/main/CosyVoice-BlankEN/config.json DELETED Viewed

@@ -1,27 +0,0 @@
-{
-  "architectures": [
-    "Qwen2ForCausalLM"
-  ],
-  "attention_dropout": 0.0,
-  "bos_token_id": 151643,
-  "eos_token_id": 151645,
-  "hidden_act": "silu",
-  "hidden_size": 896,
-  "initializer_range": 0.02,
-  "intermediate_size": 4864,
-  "max_position_embeddings": 32768,
-  "max_window_layers": 24,
-  "model_type": "qwen2",
-  "num_attention_heads": 14,
-  "num_hidden_layers": 24,
-  "num_key_value_heads": 2,
-  "rms_norm_eps": 1e-06,
-  "rope_theta": 1000000.0,
-  "sliding_window": 32768,
-  "tie_word_embeddings": true,
-  "torch_dtype": "bfloat16",
-  "transformers_version": "4.40.1",
-  "use_cache": true,
-  "use_sliding_window": false,
-  "vocab_size": 151936
-}

https:/huggingface.co/gongting/CosyVoice2-0.5B/tree/main/CosyVoice-BlankEN/generation_config.json DELETED Viewed

@@ -1,14 +0,0 @@
-{
-  "bos_token_id": 151643,
-  "pad_token_id": 151643,
-  "do_sample": true,
-  "eos_token_id": [
-    151645,
-    151643
-  ],
-  "repetition_penalty": 1.1,
-  "temperature": 0.7,
-  "top_p": 0.8,
-  "top_k": 20,
-  "transformers_version": "4.37.0"
-}

https:/huggingface.co/gongting/CosyVoice2-0.5B/tree/main/CosyVoice-BlankEN/merges.txt DELETED Viewed

The diff for this file is too large to render. See raw diff

https:/huggingface.co/gongting/CosyVoice2-0.5B/tree/main/CosyVoice-BlankEN/model.safetensors DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:130282af0dfa9fe5840737cc49a0d339d06075f83c5a315c3372c9a0740d0b96
-size 988097824

https:/huggingface.co/gongting/CosyVoice2-0.5B/tree/main/CosyVoice-BlankEN/tokenizer_config.json DELETED Viewed

@@ -1,40 +0,0 @@
-{
-  "add_prefix_space": false,
-  "added_tokens_decoder": {
-    "151643": {
-      "content": "<|endoftext|>",
-      "lstrip": false,
-      "normalized": false,
-      "rstrip": false,
-      "single_word": false,
-      "special": true
-    },
-    "151644": {
-      "content": "<|im_start|>",
-      "lstrip": false,
-      "normalized": false,
-      "rstrip": false,
-      "single_word": false,
-      "special": true
-    },
-    "151645": {
-      "content": "<|im_end|>",
-      "lstrip": false,
-      "normalized": false,
-      "rstrip": false,
-      "single_word": false,
-      "special": true
-    }
-  },
-  "additional_special_tokens": ["<|im_start|>", "<|im_end|>"],
-  "bos_token": null,
-  "chat_template": "{% for message in messages %}{% if loop.first and messages[0]['role'] != 'system' %}{{ '<|im_start|>system\nYou are a helpful assistant.<|im_end|>\n' }}{% endif %}{{'<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>' + '\n'}}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant\n' }}{% endif %}",
-  "clean_up_tokenization_spaces": false,
-  "eos_token": "<|im_end|>",
-  "errors": "replace",
-  "model_max_length": 32768,
-  "pad_token": "<|endoftext|>",
-  "split_special_tokens": false,
-  "tokenizer_class": "Qwen2Tokenizer",
-  "unk_token": null
-}

https:/huggingface.co/gongting/CosyVoice2-0.5B/tree/main/CosyVoice-BlankEN/vocab.json DELETED Viewed

The diff for this file is too large to render. See raw diff

https:/huggingface.co/gongting/CosyVoice2-0.5B/tree/main/asset/dingding.png DELETED Viewed

Binary file (96.4 kB)

https:/huggingface.co/gongting/CosyVoice2-0.5B/tree/main/campplus.onnx DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:a6ac6a63997761ae2997373e2ee1c47040854b4b759ea41ec48e4e42df0f4d73
-size 28303423

https:/huggingface.co/gongting/CosyVoice2-0.5B/tree/main/configuration.json DELETED Viewed

	@@ -1 +0,0 @@
1	- {"framework":"Pytorch","task":"text-to-speech"}

https:/huggingface.co/gongting/CosyVoice2-0.5B/tree/main/cosyvoice.yaml DELETED Viewed

@@ -1,140 +0,0 @@
-# set random seed, so that you may reproduce your result.
-__set_seed1: !apply:random.seed [1986]
-__set_seed2: !apply:numpy.random.seed [1986]
-__set_seed3: !apply:torch.manual_seed [1986]
-__set_seed4: !apply:torch.cuda.manual_seed_all [1986]
-# fixed params
-sample_rate: 24000
-llm_input_size: 896
-llm_output_size: 896
-spk_embed_dim: 192
-qwen_pretrain_path: ''
-# model params
-# for all class/function included in this repo, we use !<name> or !<new> for intialization, so that user may find all corresponding class/function according to one single yaml.
-# for system/third_party class/function, we do not require this.
-llm: !new:cosyvoice.llm.llm.Qwen2LM
-    llm_input_size: !ref <llm_input_size>
-    llm_output_size: !ref <llm_output_size>
-    speech_token_size: 6561
-    length_normalized_loss: True
-    lsm_weight: 0
-    llm: !new:cosyvoice.llm.llm.Qwen2Encoder
-        pretrain_path: !ref <qwen_pretrain_path>
-    sampling: !name:cosyvoice.utils.common.ras_sampling
-        top_p: 0.8
-        top_k: 25
-        win_size: 10
-        tau_r: 0.1
-flow: !new:cosyvoice.flow.flow.CausalMaskedDiffWithXvec
-    input_size: 512
-    output_size: 80
-    spk_embed_dim: !ref <spk_embed_dim>
-    output_type: 'mel'
-    vocab_size: 6561
-    input_frame_rate: 25
-    only_mask_loss: True
-    token_mel_ratio: 2
-    pre_lookahead_len: 3
-    encoder: !new:cosyvoice.transformer.upsample_encoder.UpsampleConformerEncoder
-        output_size: 512
-        attention_heads: 8
-        linear_units: 2048
-        num_blocks: 6
-        dropout_rate: 0.1
-        positional_dropout_rate: 0.1
-        attention_dropout_rate: 0.1
-        normalize_before: True
-        input_layer: 'linear'
-        pos_enc_layer_type: 'rel_pos_espnet'
-        selfattention_layer_type: 'rel_selfattn'
-        input_size: 512
-        use_cnn_module: False
-        macaron_style: False
-    decoder: !new:cosyvoice.flow.flow_matching.CausalConditionalCFM
-        in_channels: 240
-        n_spks: 1
-        spk_emb_dim: 80
-        cfm_params: !new:omegaconf.DictConfig
-            content:
-                sigma_min: 1e-06
-                solver: 'euler'
-                t_scheduler: 'cosine'
-                training_cfg_rate: 0.2
-                inference_cfg_rate: 0.7
-                reg_loss_type: 'l1'
-        estimator: !new:cosyvoice.flow.decoder.ConditionalDecoder
-            in_channels: 320
-            out_channels: 80
-            causal: True
-            channels: [256]
-            dropout: 0.0
-            attention_head_dim: 64
-            n_blocks: 4
-            num_mid_blocks: 12
-            num_heads: 8
-            act_fn: 'gelu'
-hift: !new:cosyvoice.hifigan.generator.HiFTGenerator
-    in_channels: 80
-    base_channels: 512
-    nb_harmonics: 8
-    sampling_rate: !ref <sample_rate>
-    nsf_alpha: 0.1
-    nsf_sigma: 0.003
-    nsf_voiced_threshold: 10
-    upsample_rates: [8, 5, 3]
-    upsample_kernel_sizes: [16, 11, 7]
-    istft_params:
-        n_fft: 16
-        hop_len: 4
-    resblock_kernel_sizes: [3, 7, 11]
-    resblock_dilation_sizes: [[1, 3, 5], [1, 3, 5], [1, 3, 5]]
-    source_resblock_kernel_sizes: [7, 7, 11]
-    source_resblock_dilation_sizes: [[1, 3, 5], [1, 3, 5], [1, 3, 5]]
-    lrelu_slope: 0.1
-    audio_limit: 0.99
-    f0_predictor: !new:cosyvoice.hifigan.f0_predictor.ConvRNNF0Predictor
-        num_class: 1
-        in_channels: 80
-        cond_channels: 512
-# processor functions
-parquet_opener: !name:cosyvoice.dataset.processor.parquet_opener
-get_tokenizer: !name:cosyvoice.tokenizer.tokenizer.get_qwen_tokenizer
-    token_path: !ref <qwen_pretrain_path>
-    skip_special_tokens: True
-allowed_special: 'all'
-tokenize: !name:cosyvoice.dataset.processor.tokenize
-    get_tokenizer: !ref <get_tokenizer>
-    allowed_special: !ref <allowed_special>
-filter: !name:cosyvoice.dataset.processor.filter
-    max_length: 40960
-    min_length: 0
-    token_max_length: 200
-    token_min_length: 1
-resample: !name:cosyvoice.dataset.processor.resample
-    resample_rate: !ref <sample_rate>
-feat_extractor: !name:matcha.utils.audio.mel_spectrogram
-    n_fft: 1920
-    num_mels: 80
-    sampling_rate: !ref <sample_rate>
-    hop_size: 480
-    win_size: 1920
-    fmin: 0
-    fmax: 8000
-    center: False
-compute_fbank: !name:cosyvoice.dataset.processor.compute_fbank
-    feat_extractor: !ref <feat_extractor>
-parse_embedding: !name:cosyvoice.dataset.processor.parse_embedding
-    normalize: True
-shuffle: !name:cosyvoice.dataset.processor.shuffle
-    shuffle_size: 1000
-sort: !name:cosyvoice.dataset.processor.sort
-    sort_size: 500  # sort_size should be less than shuffle_size
-batch: !name:cosyvoice.dataset.processor.batch
-    batch_type: 'dynamic'
-    max_frames_in_batch: 2000
-padding: !name:cosyvoice.dataset.processor.padding

https:/huggingface.co/gongting/CosyVoice2-0.5B/tree/main/flow.decoder.estimator.fp16.a10.plan DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:6f6b9073bd9e7b8ac5bef0a21431391cbc32376b9265ec73935d6f28a0d32d01
-size 168597292

https:/huggingface.co/gongting/CosyVoice2-0.5B/tree/main/flow.decoder.estimator.fp16.l20.plan DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:012df9e730e36e1cb61bf2780378c15ae92c536ae87518b7a54a90026cb99385
-size 166520788

https:/huggingface.co/gongting/CosyVoice2-0.5B/tree/main/flow.decoder.estimator.fp16.v100.plan DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:f231edf01451fafbc3dc0498a51feb3a264afad43275536c8151fff954ef3c56
-size 161799540

https:/huggingface.co/gongting/CosyVoice2-0.5B/tree/main/flow.decoder.estimator.fp32.onnx DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:51aed3efa2c153898ea53a780893c920e968dab1d7aec25402bd6c9815d94702
-size 286521895

https:/huggingface.co/gongting/CosyVoice2-0.5B/tree/main/flow.encoder.fp16.zip DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:46d2539ad8bdb90026cd50cb42e45bd389f10108111d742b912feddca105aeb6
-size 116703414

https:/huggingface.co/gongting/CosyVoice2-0.5B/tree/main/flow.encoder.fp32.zip DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:32ac668741e1358123d3c066cfd1f68a81bd386028755be9831509e304bfd98c
-size 192365750

https:/huggingface.co/gongting/CosyVoice2-0.5B/tree/main/flow.pt DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:ff4c2f867674411e0a08cee702996df13fa67c1cd864c06108da88d16d088541
-size 450575567

https:/huggingface.co/gongting/CosyVoice2-0.5B/tree/main/hift.pt DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:1d4af0d661a416c69544eec83ff9c070dc80c37ee53ef44af3a37d910c95bc21
-size 83364158

https:/huggingface.co/gongting/CosyVoice2-0.5B/tree/main/llm.pt DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:b144ef55b51ce8cfb79a73c90dbba0bdaba4e451c0ebcfab20f769264f84a608
-size 2023316821

https:/huggingface.co/gongting/CosyVoice2-0.5B/tree/main/speech_tokenizer_v2.onnx DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:d43342aa12163a80bf07bffb94c9de2e120a8df2f9917cd2f642e7f4219c6f71
-size 496082973