Oscar Wang commited on Jun 24, 2024

Commit

c430417

verified ·

1 Parent(s): 546ea4a

Upload 17 files

Browse files

Files changed (17) hide show

.DS_Store +0 -0
.gitattributes +35 -35
README.md +110 -0
base_speakers/.DS_Store +0 -0
base_speakers/ses/en-au.pth +3 -0
base_speakers/ses/en-br.pth +3 -0
base_speakers/ses/en-default.pth +3 -0
base_speakers/ses/en-india.pth +3 -0
base_speakers/ses/en-newest.pth +3 -0
base_speakers/ses/en-us.pth +3 -0
base_speakers/ses/es.pth +3 -0
base_speakers/ses/fr.pth +3 -0
base_speakers/ses/jp.pth +3 -0
base_speakers/ses/kr.pth +3 -0
base_speakers/ses/zh.pth +3 -0
converter/checkpoint.pth +3 -0
converter/config.json +57 -0

.DS_Store ADDED Viewed

Binary file (6.15 kB). View file

.gitattributes CHANGED Viewed

@@ -1,35 +1,35 @@
-*.7z filter=lfs diff=lfs merge=lfs -text
-*.arrow filter=lfs diff=lfs merge=lfs -text
-*.bin filter=lfs diff=lfs merge=lfs -text
-*.bz2 filter=lfs diff=lfs merge=lfs -text
-*.ckpt filter=lfs diff=lfs merge=lfs -text
-*.ftz filter=lfs diff=lfs merge=lfs -text
-*.gz filter=lfs diff=lfs merge=lfs -text
-*.h5 filter=lfs diff=lfs merge=lfs -text
-*.joblib filter=lfs diff=lfs merge=lfs -text
-*.lfs.* filter=lfs diff=lfs merge=lfs -text
-*.mlmodel filter=lfs diff=lfs merge=lfs -text
-*.model filter=lfs diff=lfs merge=lfs -text
-*.msgpack filter=lfs diff=lfs merge=lfs -text
-*.npy filter=lfs diff=lfs merge=lfs -text
-*.npz filter=lfs diff=lfs merge=lfs -text
-*.onnx filter=lfs diff=lfs merge=lfs -text
-*.ot filter=lfs diff=lfs merge=lfs -text
-*.parquet filter=lfs diff=lfs merge=lfs -text
-*.pb filter=lfs diff=lfs merge=lfs -text
-*.pickle filter=lfs diff=lfs merge=lfs -text
-*.pkl filter=lfs diff=lfs merge=lfs -text
-*.pt filter=lfs diff=lfs merge=lfs -text
-*.pth filter=lfs diff=lfs merge=lfs -text
-*.rar filter=lfs diff=lfs merge=lfs -text
-*.safetensors filter=lfs diff=lfs merge=lfs -text
-saved_model/**/* filter=lfs diff=lfs merge=lfs -text
-*.tar.* filter=lfs diff=lfs merge=lfs -text
-*.tar filter=lfs diff=lfs merge=lfs -text
-*.tflite filter=lfs diff=lfs merge=lfs -text
-*.tgz filter=lfs diff=lfs merge=lfs -text
-*.wasm filter=lfs diff=lfs merge=lfs -text
-*.xz filter=lfs diff=lfs merge=lfs -text
-*.zip filter=lfs diff=lfs merge=lfs -text
-*.zst filter=lfs diff=lfs merge=lfs -text
-*tfevents* filter=lfs diff=lfs merge=lfs -text

+*.7z filter=lfs diff=lfs merge=lfs -text
+*.arrow filter=lfs diff=lfs merge=lfs -text
+*.bin filter=lfs diff=lfs merge=lfs -text
+*.bz2 filter=lfs diff=lfs merge=lfs -text
+*.ckpt filter=lfs diff=lfs merge=lfs -text
+*.ftz filter=lfs diff=lfs merge=lfs -text
+*.gz filter=lfs diff=lfs merge=lfs -text
+*.h5 filter=lfs diff=lfs merge=lfs -text
+*.joblib filter=lfs diff=lfs merge=lfs -text
+*.lfs.* filter=lfs diff=lfs merge=lfs -text
+*.mlmodel filter=lfs diff=lfs merge=lfs -text
+*.model filter=lfs diff=lfs merge=lfs -text
+*.msgpack filter=lfs diff=lfs merge=lfs -text
+*.npy filter=lfs diff=lfs merge=lfs -text
+*.npz filter=lfs diff=lfs merge=lfs -text
+*.onnx filter=lfs diff=lfs merge=lfs -text
+*.ot filter=lfs diff=lfs merge=lfs -text
+*.parquet filter=lfs diff=lfs merge=lfs -text
+*.pb filter=lfs diff=lfs merge=lfs -text
+*.pickle filter=lfs diff=lfs merge=lfs -text
+*.pkl filter=lfs diff=lfs merge=lfs -text
+*.pt filter=lfs diff=lfs merge=lfs -text
+*.pth filter=lfs diff=lfs merge=lfs -text
+*.rar filter=lfs diff=lfs merge=lfs -text
+*.safetensors filter=lfs diff=lfs merge=lfs -text
+saved_model/**/* filter=lfs diff=lfs merge=lfs -text
+*.tar.* filter=lfs diff=lfs merge=lfs -text
+*.tar filter=lfs diff=lfs merge=lfs -text
+*.tflite filter=lfs diff=lfs merge=lfs -text
+*.tgz filter=lfs diff=lfs merge=lfs -text
+*.wasm filter=lfs diff=lfs merge=lfs -text
+*.xz filter=lfs diff=lfs merge=lfs -text
+*.zip filter=lfs diff=lfs merge=lfs -text
+*.zst filter=lfs diff=lfs merge=lfs -text
+*tfevents* filter=lfs diff=lfs merge=lfs -text

README.md CHANGED Viewed

@@ -1,3 +1,113 @@
 ---
 license: mit
 ---

 ---
 license: mit
+tags:
+- audio
+- text-to-speech
+- instant-voice-cloning
+language:
+- en
+- zh
+inference: false
 ---
+# OpenVoice V2
+In April 2024, we release OpenVoice V2, which includes all features in V1 and has:
+1. Better Audio Quality. OpenVoice V2 adopts a different training strategy that delivers better audio quality.
+2. Native Multi-lingual Support. English, Spanish, French, Chinese, Japanese and Korean are natively supported in OpenVoice V2.
+3. Free Commercial Use. Starting from April 2024, both V2 and V1 are released under MIT License. Free for commercial use.
+<video controls autoplay src="https://cdn-uploads.huggingface.co/production/uploads/641de0213239b631552713e4/uCHTHD9OUotgOflqDu3QK.mp4"></video>
+### Features
+- **Accurate Tone Color Cloning.** OpenVoice can accurately clone the reference tone color and generate speech in multiple languages and accents.
+- **Flexible Voice Style Control.** OpenVoice enables granular control over voice styles, such as emotion and accent, as well as other style parameters including rhythm, pauses, and intonation.
+- **Zero-shot Cross-lingual Voice Cloning.** Neither of the language of the generated speech nor the language of the reference speech needs to be presented in the massive-speaker multi-lingual training dataset.
+### How to Use
+Please see [usage](https://github.com/myshell-ai/OpenVoice/blob/main/docs/USAGE.md) for detailed instructions.
+# Usage
+## Table of Content
+- [Quick Use](#quick-use): directly use OpenVoice without installation.
+- [Linux Install](#linux-install): for researchers and developers only.
+    - [V1](#openvoice-v1)
+    - [V2](#openvoice-v2)
+- [Install on Other Platforms](#install-on-other-platforms): unofficial installation guide contributed by the community
+## Quick Use
+The input speech audio of OpenVoice can be in **Any Language**. OpenVoice can clone the voice in that speech audio, and use the voice to speak in multiple languages. For quick use, we recommend you to try the already deployed services:
+- [British English](https://app.myshell.ai/widget/vYjqae)
+- [American English](https://app.myshell.ai/widget/nEFFJf)
+- [Indian English](https://app.myshell.ai/widget/V3iYze)
+- [Australian English](https://app.myshell.ai/widget/fM7JVf)
+- [Spanish](https://app.myshell.ai/widget/NNFFVz)
+- [French](https://app.myshell.ai/widget/z2uyUz)
+- [Chinese](https://app.myshell.ai/widget/fU7nUz)
+- [Japanese](https://app.myshell.ai/widget/IfIB3u)
+- [Korean](https://app.myshell.ai/widget/q6ZjIn)
+## Linux Install
+This section is only for developers and researchers who are familiar with Linux, Python and PyTorch. Clone this repo, and run
+```
+conda create -n openvoice python=3.9
+conda activate openvoice
+git clone [email protected]:myshell-ai/OpenVoice.git
+cd OpenVoice
+pip install -e .
+```
+No matter if you are using V1 or V2, the above installation is the same.
+### OpenVoice V1
+Download the checkpoint from [here](https://myshell-public-repo-hosting.s3.amazonaws.com/openvoice/checkpoints_1226.zip) and extract it to the `checkpoints` folder.
+**1. Flexible Voice Style Control.**
+Please see [`demo_part1.ipynb`](https://github.com/myshell-ai/OpenVoice/blob/main/demo_part1.ipynb) for an example usage of how OpenVoice enables flexible style control over the cloned voice.
+**2. Cross-Lingual Voice Cloning.**
+Please see [`demo_part2.ipynb`](https://github.com/myshell-ai/OpenVoice/blob/main/demo_part2.ipynb) for an example for languages seen or unseen in the MSML training set.
+**3. Gradio Demo.**. We provide a minimalist local gradio demo here. We strongly suggest the users to look into `demo_part1.ipynb`, `demo_part2.ipynb` and the [QnA](QA.md) if they run into issues with the gradio demo. Launch a local gradio demo with `python -m openvoice_app --share`.
+### OpenVoice V2
+Download the checkpoint from [here](https://myshell-public-repo-hosting.s3.amazonaws.com/openvoice/checkpoints_v2_0417.zip) and extract it to the `checkpoints_v2` folder.
+Install [MeloTTS](https://github.com/myshell-ai/MeloTTS):
+```
+pip install git+https://github.com/myshell-ai/MeloTTS.git
+python -m unidic download
+```
+**Demo Usage.** Please see [`demo_part3.ipynb`](https://github.com/myshell-ai/OpenVoice/blob/main/demo_part3.ipynb) for example usage of OpenVoice V2. Now it natively supports English, Spanish, French, Chinese, Japanese and Korean.
+## Install on Other Platforms
+This section provides the unofficial installation guides by open-source contributors in the community:
+- Windows
+  - [Guide](https://github.com/Alienpups/OpenVoice/blob/main/docs/USAGE_WINDOWS.md) by [@Alienpups](https://github.com/Alienpups)
+  - You are welcome to contribute if you have a better installation guide. We will list you here.
+- Docker
+  - [Guide](https://github.com/StevenJSCF/OpenVoice/blob/update-docs/docs/DF_USAGE.md) by [@StevenJSCF](https://github.com/StevenJSCF)
+  - You are welcome to contribute if you have a better installation guide. We will list you here.
+### Links
+- [Github](https://github.com/myshell-ai/OpenVoice)
+- [HFDemo](https://huggingface.co/spaces/myshell-ai/OpenVoiceV2)
+- [Discord](https://discord.gg/myshell)

base_speakers/.DS_Store ADDED Viewed

Binary file (6.15 kB). View file

base_speakers/ses/en-au.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:5e9782233deef51fc5289d05ad4dd4ce12b196e282eccf6b6db6256bbd02daaa
+size 1701

base_speakers/ses/en-br.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:2bf5a88025cfd10473b25d65d5c0e608338ce4533059c5f9a3383e69c812d389
+size 1701

base_speakers/ses/en-default.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:e4139de3bc2ea162f45a5a5f9559b710686c9689749b5ab8945ee5e2a082d154
+size 1783

base_speakers/ses/en-india.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:ad03d946757e95fe9e13239aa4b11071d98f22316f604f34b1a0b4bdf41cda48
+size 1701

base_speakers/ses/en-newest.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:6a3798229b1114f0e9cc137b33211809def7dda5a8a9398d5a112c0b42699177
+size 1692

base_speakers/ses/en-us.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:0d092d4af0815a4bfbc6105b65621ab68dc4c61b2f55044d8a66968a34947c32
+size 1701

base_speakers/ses/es.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:b8cece8853fb75b9f5217a1f5cda9807bac92a3e4c4547fc651e404d05deff63
+size 1692

base_speakers/ses/fr.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:8a01f6d30a73efa368c288a542a522a2bcdd4e2ec5589d8646b307cf8e2ad9ae
+size 1692

base_speakers/ses/jp.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:7b645ff428de4a57a22122318968f1e6127ac81fda2e2aa66062deccd3864416
+size 1692

base_speakers/ses/kr.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:f501479d6072741a396725bec79144653e9f4a5381b85901e29683aa169795df
+size 1692

base_speakers/ses/zh.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:2b353de562700c13faacf096ecfc0adcafd26e6704a9feef572be1279714e031
+size 1692

converter/checkpoint.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:9652c27e92b6b2a91632590ac9962ef7ae2b712e5c5b7f4c34ec55ee2b37ab9e
+size 131320490

converter/config.json ADDED Viewed

	@@ -0,0 +1,57 @@

+{
+  "_version_": "v2",
+  "data": {
+    "sampling_rate": 22050,
+    "filter_length": 1024,
+    "hop_length": 256,
+    "win_length": 1024,
+    "n_speakers": 0
+  },
+  "model": {
+    "zero_g": true,
+    "inter_channels": 192,
+    "hidden_channels": 192,
+    "filter_channels": 768,
+    "n_heads": 2,
+    "n_layers": 6,
+    "kernel_size": 3,
+    "p_dropout": 0.1,
+    "resblock": "1",
+    "resblock_kernel_sizes": [
+      3,
+      7,
+      11
+    ],
+    "resblock_dilation_sizes": [
+      [
+        1,
+        3,
+        5
+      ],
+      [
+        1,
+        3,
+        5
+      ],
+      [
+        1,
+        3,
+        5
+      ]
+    ],
+    "upsample_rates": [
+      8,
+      8,
+      2,
+      2
+    ],
+    "upsample_initial_channel": 512,
+    "upsample_kernel_sizes": [
+      16,
+      16,
+      4,
+      4
+    ],
+    "gin_channels": 256
+  }
+}