Spaces:
Running
Running
File size: 6,354 Bytes
1f3bd14 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 |
from typing import Dict
from speakers.processors import BaseProcessor, get_processors, EdgeProcessorData, RvcProcessorData
from speakers.tasks import BaseTask, Runner, FlowData
from speakers.common.registry import registry
from speakers.server.model.flow_data import PayLoad
import traceback
import hashlib
def calculate_md5(input_string):
md5_hash = hashlib.md5()
md5_hash.update(input_string.encode('utf-8'))
return md5_hash.hexdigest()
class EdgeVoiceFlowData(FlowData):
edge: EdgeProcessorData
rvc: RvcProcessorData
@property
def type(self) -> str:
"""Type of the FlowData Message, used for serialization."""
return "edge_voice"
@registry.register_task("edge_voice_task")
class EdgeVoiceTask(BaseTask):
def __init__(self, preprocess_dict: Dict[str, BaseProcessor]):
super().__init__(preprocess_dict=preprocess_dict)
self._preprocess_dict = preprocess_dict
@classmethod
def from_config(cls, cfg=None):
preprocess_dict = {}
for preprocess in cfg.get('preprocess'):
for key, preprocess_info in preprocess.items():
preprocess_object = get_processors(preprocess_info.processor)
preprocess_dict[preprocess_info.processor_name] = preprocess_object
return cls(preprocess_dict=preprocess_dict)
@property
def preprocess_dict(self) -> Dict[str, BaseProcessor]:
return self._preprocess_dict
@classmethod
def prepare(cls, payload: PayLoad) -> Runner:
"""
runner任务构建
"""
params = payload.payload
# 获取payload中的edge和rvc的值
edge_data = params.get("edge", {})
rvc_data = params.get("rvc", {})
# edge 讲话人
tts_speaker = edge_data.get("tts_speaker")
text = edge_data.get("text")
rate = edge_data.get("rate")
volume = edge_data.get("volume")
# 创建一个 EdgeProcessorData 实例
edge_processor_data = EdgeProcessorData(text=text,
tts_speaker=tts_speaker,
rate=rate,
volume=volume)
# 获取rvc中的值
model_index = rvc_data.get("model_index")
# 变调(整数, 半音数量, 升八度12降八度-12)
f0_up_key = rvc_data.get("f0_up_key")
f0_method = rvc_data.get("f0_method")
# 检索特征占比
index_rate = rvc_data.get("index_rate")
# >=3则使用对harvest音高识别的结果使用中值滤波,数值为滤波半径,使用可以削弱哑音
filter_radius = rvc_data.get("filter_radius")
# 输入源音量包络替换输出音量包络融合比例,越靠近1越使用输出包络
rms_mix_rate = rvc_data.get("rms_mix_rate")
# 后处理重采样至最终采样率,0为不进行重采样
resample_rate = rvc_data.get("resample_sr")
rvc_protect = rvc_data.get("protect")
rvc_f0_file = rvc_data.get("f0_file")
rvc_processor_data = RvcProcessorData(
model_index=model_index,
f0_up_key=f0_up_key,
f0_method=f0_method,
index_rate=index_rate,
filter_radius=filter_radius,
rms_mix_rate=rms_mix_rate,
resample_sr=resample_rate,
f0_file=rvc_f0_file,
protect=rvc_protect
)
# 创建一个 EdgeVoiceFlowData 实例,并将 EdgeProcessorData 实例作为参数传递
voice_flow_data = EdgeVoiceFlowData(edge=edge_processor_data,
rvc=rvc_processor_data)
# 创建 Runner 实例并传递上面创建的 EdgeVoiceFlowData 实例作为参数
task_id = f'{calculate_md5(text)}-{tts_speaker}'\
f'-{rate}-{volume}'\
f'-{model_index}-{f0_up_key}'
runner = Runner(
task_id=task_id,
flow_data=voice_flow_data
)
return runner
async def dispatch(self, runner: Runner):
try:
# 加载task
self.logger.info('dispatch')
# 开启任务1
await self.report_progress(task_id=runner.task_id, runner_stat='edge_voice_task',
state='dispatch_edge_voice_task')
data = runner.flow_data
if 'edge_voice' in data.type:
if 'EDGE' in data.edge.type:
edge_preprocess_object = self.preprocess_dict.get(data.edge.type)
if not edge_preprocess_object.match(data.edge):
raise RuntimeError('不支持的process')
tts_np, tts_sr = edge_preprocess_object(data.edge)
if tts_np is not None and 'RVC' in data.rvc.type:
# 将 NumPy 数组转换为 Python 列表
audio_samples_list = tts_np.tolist()
data.rvc.sample_rate = tts_sr
data.rvc.audio_samples = audio_samples_list
rvc_preprocess_object = self.preprocess_dict.get(data.rvc.type)
if not rvc_preprocess_object.match(data.rvc):
raise RuntimeError('不支持的process')
out_sr, output_audio = rvc_preprocess_object(data.rvc)
# 完成任务,构建响应数据
await self.report_progress(task_id=runner.task_id,
runner_stat='edge_voice_task',
state='finished',
finished=True)
del tts_np
del tts_sr
del runner
return out_sr, output_audio
except Exception as e:
await self.report_progress(task_id=runner.task_id, runner_stat='edge_voice_task',
state='error', finished=True)
self.logger.error(f'{e.__class__.__name__}: {e}',
exc_info=e)
traceback.print_exc()
return None, None
def complete(self, runner: Runner):
pass
|