syntax = "proto3"; | |
package cosyvoice; | |
option go_package = "protos/"; | |
// βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
// Service | |
// βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
service CosyVoice { | |
rpc Inference(Request) returns (stream Response) {} | |
} | |
// βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
// Topβlevel wrapper | |
// βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
message Request { | |
oneof RequestPayload { | |
sftRequest sft_request = 1; | |
zeroshotRequest zero_shot_request = 2; | |
crosslingualRequest cross_lingual_request = 3; | |
instructRequest instruct_request = 4; | |
} | |
} | |
// βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
// Individual request messages | |
// βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
message sftRequest { | |
string spk_id = 1; | |
string tts_text = 2; | |
} | |
message zeroshotRequest { | |
string tts_text = 1; | |
string prompt_text = 2; | |
bytes prompt_audio = 3; // URL (utfβ8) or raw bytes | |
float speed = 4; // β― optional, 1β―=β―normal speed | |
} | |
message crosslingualRequest { | |
string tts_text = 1; | |
bytes prompt_audio = 2; | |
float speed = 3; // optional | |
} | |
message instructRequest { | |
string tts_text = 1; | |
string instruct_text = 2; | |
bytes prompt_audio = 3; // URL (utfβ8) or raw bytes | |
float speed = 4; // optional | |
} | |
// βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
message Response { | |
bytes tts_audio = 1; | |
} |