Spaces:

Kororinpa
/

Amadeus_Project

Build error

Amadeus_Project / api.py

johntito

```

2ef7c17 5 months ago

2.14 kB

	import torch
	from fastapi import FastAPI
	from pydantic import BaseModel
	import numpy as np
	import base64
	import io
	from scipy.io.wavfile import write
	import sounddevice as sd

	# 自定义模块
	import commons
	import utils
	from models import SynthesizerTrn
	from text.symbols import symbols
	from text import text_to_sequence

	# 检查 PyTorch 版本
	print(torch.__version__)

	# 检查 CUDA 是否可用
	print(torch.cuda.is_available())

	# 检查当前 CUDA 版本
	print(torch.version.cuda)

	# FastAPI 应用
	app = FastAPI()

	# 请求体模型
	class TextRequest(BaseModel):
	text: str

	# 加载配置和模型
	config_path = "configs/steins_gate_base.json"
	checkpoint_path = "G_265000.pth"
	hps = utils.get_hparams_from_file(config_path)
	net_g = SynthesizerTrn(
	len(symbols),
	hps.data.filter_length // 2 + 1,
	hps.train.segment_size // hps.data.hop_length,
	**hps.model,
	).eval()
	utils.load_checkpoint(checkpoint_path, net_g, None)

	# 文本到语音合成
	def text_to_speech(content):
	stn_tst = text_to_sequence(content, hps.data.text_cleaners)
	if hps.data.add_blank:
	stn_tst = commons.intersperse(stn_tst, 0)
	stn_tst = torch.LongTensor(stn_tst)
	with torch.no_grad():
	x_tst = stn_tst.unsqueeze(0)
	x_tst_lengths = torch.LongTensor([stn_tst.size(0)])
	audio = net_g.infer(x_tst, x_tst_lengths, noise_scale=0.667, noise_scale_w=0.8, length_scale=1)[0][0, 0].data.float().numpy()

	return hps.data.sampling_rate, audio

	# API 路由：文本转语音
	@app.post("/synthesize")
	def synthesize(request: TextRequest):
	# 假设 text_to_speech 是生成音频的函数
	sampling_rate, audio = text_to_speech(request.text)

	# 将音频数据保存到 BytesIO 对象
	wav_bytes = io.BytesIO()
	write(wav_bytes, sampling_rate, (audio * 32767).astype(np.int16))
	wav_bytes.seek(0) # 将指针移动到文件开头

	# 将 WAV 文件编码为 Base64
	audio_base64 = base64.b64encode(wav_bytes.read()).decode("utf-8")
	return {"audio": audio_base64}

	# 主函数
	if __name__ == "__main__":
	import uvicorn
	uvicorn.run(app, host="127.0.0.1", port=8000)