Spaces:

ASLP-lab
/

OSUM

Running on Zero

OSUM / wenet /cli /transcribe.py

tomxxie

适配zeroGPU

568e264 12 days ago

3.35 kB

	# Copyright (c) 2023 Binbin Zhang ([email protected])
	#
	# Licensed under the Apache License, Version 2.0 (the "License");
	# you may not use this file except in compliance with the License.
	# You may obtain a copy of the License at
	#
	# http://www.apache.org/licenses/LICENSE-2.0
	#
	# Unless required by applicable law or agreed to in writing, software
	# distributed under the License is distributed on an "AS IS" BASIS,
	# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	# See the License for the specific language governing permissions and
	# limitations under the License.

	import argparse

	from wenet.cli.paraformer_model import load_model as load_paraformer
	from wenet.cli.model import load_model


	def get_args():
	parser = argparse.ArgumentParser(description='')
	parser.add_argument('audio_file', help='audio file to transcribe')
	parser.add_argument('-l',
	'--language',
	choices=[
	'chinese',
	'english',
	],
	default='chinese',
	help='language type')
	parser.add_argument('-m',
	'--model_dir',
	default=None,
	help='specify your own model dir')
	parser.add_argument('-g',
	'--gpu',
	type=int,
	default='-1',
	help='gpu id to decode, default is cpu.')
	parser.add_argument('--device',
	type=str,
	default='cpu',
	choices=["cpu", "npu", "cuda"],
	help='accelerator to use')
	parser.add_argument('-t',
	'--show_tokens_info',
	action='store_true',
	help='whether to output token(word) level information'
	', such times/confidence')
	parser.add_argument('--align',
	action='store_true',
	help='force align the input audio and transcript')
	parser.add_argument('--label', type=str, help='the input label to align')
	parser.add_argument('--paraformer',
	action='store_true',
	help='whether to use the best chinese model')
	parser.add_argument('--beam', type=int, default=5, help="beam size")
	parser.add_argument('--context_path',
	type=str,
	default=None,
	help='context list file')
	parser.add_argument('--context_score',
	type=float,
	default=6.0,
	help='context score')
	args = parser.parse_args()
	return args


	def main():
	args = get_args()

	if args.paraformer:
	model = load_paraformer(args.model_dir, args.gpu, args.device)
	else:
	model = load_model(args.language, args.model_dir, args.gpu, args.beam,
	args.context_path, args.context_score, args.device)
	if args.align:
	result = model.align(args.audio_file, args.label)
	else:
	result = model.transcribe(args.audio_file, args.show_tokens_info)
	print(result)


	if __name__ == "__main__":
	main()