Spaces:
Running
on
Zero
Running
on
Zero
# Copyright (c) 2023 Binbin Zhang ([email protected]) | |
# | |
# Licensed under the Apache License, Version 2.0 (the "License"); | |
# you may not use this file except in compliance with the License. | |
# You may obtain a copy of the License at | |
# | |
# http://www.apache.org/licenses/LICENSE-2.0 | |
# | |
# Unless required by applicable law or agreed to in writing, software | |
# distributed under the License is distributed on an "AS IS" BASIS, | |
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
# See the License for the specific language governing permissions and | |
# limitations under the License. | |
import argparse | |
from wenet.cli.paraformer_model import load_model as load_paraformer | |
from wenet.cli.model import load_model | |
def get_args(): | |
parser = argparse.ArgumentParser(description='') | |
parser.add_argument('audio_file', help='audio file to transcribe') | |
parser.add_argument('-l', | |
'--language', | |
choices=[ | |
'chinese', | |
'english', | |
], | |
default='chinese', | |
help='language type') | |
parser.add_argument('-m', | |
'--model_dir', | |
default=None, | |
help='specify your own model dir') | |
parser.add_argument('-g', | |
'--gpu', | |
type=int, | |
default='-1', | |
help='gpu id to decode, default is cpu.') | |
parser.add_argument('--device', | |
type=str, | |
default='cpu', | |
choices=["cpu", "npu", "cuda"], | |
help='accelerator to use') | |
parser.add_argument('-t', | |
'--show_tokens_info', | |
action='store_true', | |
help='whether to output token(word) level information' | |
', such times/confidence') | |
parser.add_argument('--align', | |
action='store_true', | |
help='force align the input audio and transcript') | |
parser.add_argument('--label', type=str, help='the input label to align') | |
parser.add_argument('--paraformer', | |
action='store_true', | |
help='whether to use the best chinese model') | |
parser.add_argument('--beam', type=int, default=5, help="beam size") | |
parser.add_argument('--context_path', | |
type=str, | |
default=None, | |
help='context list file') | |
parser.add_argument('--context_score', | |
type=float, | |
default=6.0, | |
help='context score') | |
args = parser.parse_args() | |
return args | |
def main(): | |
args = get_args() | |
if args.paraformer: | |
model = load_paraformer(args.model_dir, args.gpu, args.device) | |
else: | |
model = load_model(args.language, args.model_dir, args.gpu, args.beam, | |
args.context_path, args.context_score, args.device) | |
if args.align: | |
result = model.align(args.audio_file, args.label) | |
else: | |
result = model.transcribe(args.audio_file, args.show_tokens_info) | |
print(result) | |
if __name__ == "__main__": | |
main() | |