Spaces:

pineconeT94
/

fishspeech2

No application file

App Files Files Community

fishspeech2 / tools /msgpack_api.py

pineconeT94

first commit

8b14bed 4 days ago

raw

history blame contribute delete

2.62 kB

	import os
	from argparse import ArgumentParser
	from pathlib import Path

	import httpx
	import ormsgpack

	from tools.schema import ServeReferenceAudio, ServeTTSRequest

	api_key = os.environ.get("FISH_API_KEY", "YOUR_API_KEY")


	def audio_request():
	# priority: ref_id > references
	request = ServeTTSRequest(
	text="你说的对, 但是原神是一款由米哈游自主研发的开放世界手游.",
	# reference_id="114514",
	references=[
	ServeReferenceAudio(
	audio=open("lengyue.wav", "rb").read(),
	text=open("lengyue.lab", "r", encoding="utf-8").read(),
	)
	],
	streaming=True,
	)

	api_key = os.environ.get("FISH_API_KEY", "YOUR_API_KEY")

	with (
	httpx.Client() as client,
	open("hello.wav", "wb") as f,
	):
	with client.stream(
	"POST",
	"http://127.0.0.1:8080/v1/tts",
	content=ormsgpack.packb(request, option=ormsgpack.OPT_SERIALIZE_PYDANTIC),
	headers={
	"authorization": f"Bearer {api_key}",
	"content-type": "application/msgpack",
	},
	timeout=None,
	) as response:
	for chunk in response.iter_bytes():
	f.write(chunk)


	def asr_request(audio_path: Path):

	# Read the audio file
	with open(
	str(audio_path),
	"rb",
	) as audio_file:
	audio_data = audio_file.read()

	# Prepare the request data
	request_data = {
	"audio": audio_data,
	"language": "en", # Optional: specify the language
	"ignore_timestamps": False, # Optional: set to True to ignore precise timestamps
	}

	# Send the request
	with httpx.Client() as client:
	response = client.post(
	"https://api.fish.audio/v1/asr",
	headers={
	"Authorization": f"Bearer {api_key}",
	"Content-Type": "application/msgpack",
	},
	content=ormsgpack.packb(request_data),
	)

	# Parse the response
	result = response.json()

	print(f"Transcribed text: {result['text']}")
	print(f"Audio duration: {result['duration']} seconds")

	for segment in result["segments"]:
	print(f"Segment: {segment['text']}")
	print(f"Start time: {segment['start']}, End time: {segment['end']}")


	def parse_args():
	parser = ArgumentParser()
	parser.add_argument("--audio_path", type=Path, default="audio/ref/trump.mp3")

	return parser.parse_args()


	if __name__ == "__main__":
	args = parse_args()

	asr_request(args.audio_path)