Spaces:
Paused
Paused
# Autogenerated by nbdev | |
d = { 'settings': { 'branch': 'master', | |
'doc_baseurl': '/WhisperSpeech', | |
'doc_host': 'https://collabora.github.io', | |
'git_url': 'https://github.com/collabora/WhisperSpeech', | |
'lib_path': 'whisperspeech'}, | |
'syms': { 'whisperspeech.a2wav': { 'whisperspeech.a2wav.Vocoder': ('6. quality-boosting vocoder.html#vocoder', 'whisperspeech/a2wav.py'), | |
'whisperspeech.a2wav.Vocoder.__init__': ( '6. quality-boosting vocoder.html#vocoder.__init__', | |
'whisperspeech/a2wav.py'), | |
'whisperspeech.a2wav.Vocoder.decode': ( '6. quality-boosting vocoder.html#vocoder.decode', | |
'whisperspeech/a2wav.py'), | |
'whisperspeech.a2wav.Vocoder.decode_to_file': ( '6. quality-boosting ' | |
'vocoder.html#vocoder.decode_to_file', | |
'whisperspeech/a2wav.py'), | |
'whisperspeech.a2wav.Vocoder.decode_to_notebook': ( '6. quality-boosting ' | |
'vocoder.html#vocoder.decode_to_notebook', | |
'whisperspeech/a2wav.py')}, | |
'whisperspeech.extract_acoustic': { 'whisperspeech.extract_acoustic.extract_Atoks': ( '1. acoustic token ' | |
'extraction.html#extract_atoks', | |
'whisperspeech/extract_acoustic.py'), | |
'whisperspeech.extract_acoustic.extract_acoustic': ( '1. acoustic token ' | |
'extraction.html#extract_acoustic', | |
'whisperspeech/extract_acoustic.py'), | |
'whisperspeech.extract_acoustic.load': ( '1. acoustic token extraction.html#load', | |
'whisperspeech/extract_acoustic.py'), | |
'whisperspeech.extract_acoustic.load_model': ( '1. acoustic token ' | |
'extraction.html#load_model', | |
'whisperspeech/extract_acoustic.py')}, | |
'whisperspeech.extract_semb': { 'whisperspeech.extract_semb.encode_semantic': ( '2c. whisper semantic embedding ' | |
'extraction.html#encode_semantic', | |
'whisperspeech/extract_semb.py'), | |
'whisperspeech.extract_semb.extract_semantic': ( '2c. whisper semantic embedding ' | |
'extraction.html#extract_semantic', | |
'whisperspeech/extract_semb.py'), | |
'whisperspeech.extract_semb.load_model': ( '2c. whisper semantic embedding ' | |
'extraction.html#load_model', | |
'whisperspeech/extract_semb.py')}, | |
'whisperspeech.fetch_models': { 'whisperspeech.fetch_models.main': ( '0. download models.html#main', | |
'whisperspeech/fetch_models.py')}, | |
'whisperspeech.modules': { 'whisperspeech.modules.Decoder': ('a. neural modules.html#decoder', 'whisperspeech/modules.py'), | |
'whisperspeech.modules.Decoder.__init__': ( 'a. neural modules.html#decoder.__init__', | |
'whisperspeech/modules.py'), | |
'whisperspeech.modules.Decoder.forward': ( 'a. neural modules.html#decoder.forward', | |
'whisperspeech/modules.py'), | |
'whisperspeech.modules.Encoder': ('a. neural modules.html#encoder', 'whisperspeech/modules.py'), | |
'whisperspeech.modules.Encoder.__init__': ( 'a. neural modules.html#encoder.__init__', | |
'whisperspeech/modules.py'), | |
'whisperspeech.modules.Encoder.forward': ( 'a. neural modules.html#encoder.forward', | |
'whisperspeech/modules.py'), | |
'whisperspeech.modules.LayerNorm': ('a. neural modules.html#layernorm', 'whisperspeech/modules.py'), | |
'whisperspeech.modules.LayerNorm.forward': ( 'a. neural modules.html#layernorm.forward', | |
'whisperspeech/modules.py'), | |
'whisperspeech.modules.LinearHead': ( 'a. neural modules.html#linearhead', | |
'whisperspeech/modules.py'), | |
'whisperspeech.modules.MultiHeadAttention': ( 'a. neural modules.html#multiheadattention', | |
'whisperspeech/modules.py'), | |
'whisperspeech.modules.MultiHeadAttention.__init__': ( 'a. neural ' | |
'modules.html#multiheadattention.__init__', | |
'whisperspeech/modules.py'), | |
'whisperspeech.modules.MultiHeadAttention.forward': ( 'a. neural ' | |
'modules.html#multiheadattention.forward', | |
'whisperspeech/modules.py'), | |
'whisperspeech.modules.MultiHeadAttention.qkv_attention_pth20': ( 'a. neural ' | |
'modules.html#multiheadattention.qkv_attention_pth20', | |
'whisperspeech/modules.py'), | |
'whisperspeech.modules.MultiHeadAttention.qkv_attention_vanilla': ( 'a. neural ' | |
'modules.html#multiheadattention.qkv_attention_vanilla', | |
'whisperspeech/modules.py'), | |
'whisperspeech.modules.MultiHeadAttention.qkv_attention_xformers': ( 'a. neural ' | |
'modules.html#multiheadattention.qkv_attention_xformers', | |
'whisperspeech/modules.py'), | |
'whisperspeech.modules.QueryHead': ('a. neural modules.html#queryhead', 'whisperspeech/modules.py'), | |
'whisperspeech.modules.ResidualAttentionBlock': ( 'a. neural modules.html#residualattentionblock', | |
'whisperspeech/modules.py'), | |
'whisperspeech.modules.ResidualAttentionBlock.__init__': ( 'a. neural ' | |
'modules.html#residualattentionblock.__init__', | |
'whisperspeech/modules.py'), | |
'whisperspeech.modules.ResidualAttentionBlock.forward': ( 'a. neural ' | |
'modules.html#residualattentionblock.forward', | |
'whisperspeech/modules.py'), | |
'whisperspeech.modules.Rotary': ('a. neural modules.html#rotary', 'whisperspeech/modules.py'), | |
'whisperspeech.modules.Rotary.__init__': ( 'a. neural modules.html#rotary.__init__', | |
'whisperspeech/modules.py'), | |
'whisperspeech.modules.Rotary.forward': ( 'a. neural modules.html#rotary.forward', | |
'whisperspeech/modules.py'), | |
'whisperspeech.modules.SumDecoder': ( 'a. neural modules.html#sumdecoder', | |
'whisperspeech/modules.py'), | |
'whisperspeech.modules.SumDecoder.__init__': ( 'a. neural modules.html#sumdecoder.__init__', | |
'whisperspeech/modules.py'), | |
'whisperspeech.modules.SumDecoder.forward': ( 'a. neural modules.html#sumdecoder.forward', | |
'whisperspeech/modules.py'), | |
'whisperspeech.modules.apply_rotary_pos_emb': ( 'a. neural modules.html#apply_rotary_pos_emb', | |
'whisperspeech/modules.py'), | |
'whisperspeech.modules.init_transformer': ( 'a. neural modules.html#init_transformer', | |
'whisperspeech/modules.py'), | |
'whisperspeech.modules.rotate_half': ( 'a. neural modules.html#rotate_half', | |
'whisperspeech/modules.py'), | |
'whisperspeech.modules.sinusoids': ('a. neural modules.html#sinusoids', 'whisperspeech/modules.py')}, | |
'whisperspeech.pipeline': { 'whisperspeech.pipeline.Pipeline': ('7. pipeline.html#pipeline', 'whisperspeech/pipeline.py'), | |
'whisperspeech.pipeline.Pipeline.__init__': ( '7. pipeline.html#pipeline.__init__', | |
'whisperspeech/pipeline.py'), | |
'whisperspeech.pipeline.Pipeline.generate': ( '7. pipeline.html#pipeline.generate', | |
'whisperspeech/pipeline.py'), | |
'whisperspeech.pipeline.Pipeline.generate_atoks': ( '7. pipeline.html#pipeline.generate_atoks', | |
'whisperspeech/pipeline.py'), | |
'whisperspeech.pipeline.Pipeline.generate_to_file': ( '7. pipeline.html#pipeline.generate_to_file', | |
'whisperspeech/pipeline.py'), | |
'whisperspeech.pipeline.Pipeline.generate_to_notebook': ( '7. ' | |
'pipeline.html#pipeline.generate_to_notebook', | |
'whisperspeech/pipeline.py')}, | |
'whisperspeech.prepare_s2a_dataset': { 'whisperspeech.prepare_s2a_dataset.flac_to_s2a_name': ( '4a. s2a dataset ' | |
'preparation.html#flac_to_s2a_name', | |
'whisperspeech/prepare_s2a_dataset.py'), | |
'whisperspeech.prepare_s2a_dataset.prepare_s2a': ( '4a. s2a dataset ' | |
'preparation.html#prepare_s2a', | |
'whisperspeech/prepare_s2a_dataset.py'), | |
'whisperspeech.prepare_s2a_dataset.resampler': ( '4a. s2a dataset ' | |
'preparation.html#resampler', | |
'whisperspeech/prepare_s2a_dataset.py')}, | |
'whisperspeech.prepare_t2s_dataset': { 'whisperspeech.prepare_t2s_dataset.Transcriber': ( '5a. t2s dataset ' | |
'preparation.html#transcriber', | |
'whisperspeech/prepare_t2s_dataset.py'), | |
'whisperspeech.prepare_t2s_dataset.Transcriber.__init__': ( '5a. t2s dataset ' | |
'preparation.html#transcriber.__init__', | |
'whisperspeech/prepare_t2s_dataset.py'), | |
'whisperspeech.prepare_t2s_dataset.Transcriber.transcribe': ( '5a. t2s dataset ' | |
'preparation.html#transcriber.transcribe', | |
'whisperspeech/prepare_t2s_dataset.py'), | |
'whisperspeech.prepare_t2s_dataset.flac_to_t2s_name': ( '5a. t2s dataset ' | |
'preparation.html#flac_to_t2s_name', | |
'whisperspeech/prepare_t2s_dataset.py'), | |
'whisperspeech.prepare_t2s_dataset.prepare_t2s': ( '5a. t2s dataset ' | |
'preparation.html#prepare_t2s', | |
'whisperspeech/prepare_t2s_dataset.py')}, | |
'whisperspeech.s2a_delar_mup_wds': { 'whisperspeech.s2a_delar_mup_wds.CMLMVisual': ( '4b. semantic to acoustic token ' | |
'modeling.html#cmlmvisual', | |
'whisperspeech/s2a_delar_mup_wds.py'), | |
'whisperspeech.s2a_delar_mup_wds.CMLMVisual.__init__': ( '4b. semantic to acoustic token ' | |
'modeling.html#cmlmvisual.__init__', | |
'whisperspeech/s2a_delar_mup_wds.py'), | |
'whisperspeech.s2a_delar_mup_wds.CMLMVisual.add_data': ( '4b. semantic to acoustic token ' | |
'modeling.html#cmlmvisual.add_data', | |
'whisperspeech/s2a_delar_mup_wds.py'), | |
'whisperspeech.s2a_delar_mup_wds.CMLMVisual.add_table_row': ( '4b. semantic to acoustic ' | |
'token ' | |
'modeling.html#cmlmvisual.add_table_row', | |
'whisperspeech/s2a_delar_mup_wds.py'), | |
'whisperspeech.s2a_delar_mup_wds.CMLMVisual.hide': ( '4b. semantic to acoustic token ' | |
'modeling.html#cmlmvisual.hide', | |
'whisperspeech/s2a_delar_mup_wds.py'), | |
'whisperspeech.s2a_delar_mup_wds.CMLMVisual.on_iter': ( '4b. semantic to acoustic token ' | |
'modeling.html#cmlmvisual.on_iter', | |
'whisperspeech/s2a_delar_mup_wds.py'), | |
'whisperspeech.s2a_delar_mup_wds.CMLMVisual.plot': ( '4b. semantic to acoustic token ' | |
'modeling.html#cmlmvisual.plot', | |
'whisperspeech/s2a_delar_mup_wds.py'), | |
'whisperspeech.s2a_delar_mup_wds.CMLMVisual.show': ( '4b. semantic to acoustic token ' | |
'modeling.html#cmlmvisual.show', | |
'whisperspeech/s2a_delar_mup_wds.py'), | |
'whisperspeech.s2a_delar_mup_wds.DelSumDecoder': ( '4b. semantic to acoustic token ' | |
'modeling.html#delsumdecoder', | |
'whisperspeech/s2a_delar_mup_wds.py'), | |
'whisperspeech.s2a_delar_mup_wds.DelSumDecoder.__init__': ( '4b. semantic to acoustic ' | |
'token ' | |
'modeling.html#delsumdecoder.__init__', | |
'whisperspeech/s2a_delar_mup_wds.py'), | |
'whisperspeech.s2a_delar_mup_wds.DelSumDecoder.forward': ( '4b. semantic to acoustic ' | |
'token ' | |
'modeling.html#delsumdecoder.forward', | |
'whisperspeech/s2a_delar_mup_wds.py'), | |
'whisperspeech.s2a_delar_mup_wds.EmbeddingProjector': ( '4b. semantic to acoustic token ' | |
'modeling.html#embeddingprojector', | |
'whisperspeech/s2a_delar_mup_wds.py'), | |
'whisperspeech.s2a_delar_mup_wds.MultiHeadAttention': ( '4b. semantic to acoustic token ' | |
'modeling.html#multiheadattention', | |
'whisperspeech/s2a_delar_mup_wds.py'), | |
'whisperspeech.s2a_delar_mup_wds.MultiHeadAttention.__init__': ( '4b. semantic to ' | |
'acoustic token ' | |
'modeling.html#multiheadattention.__init__', | |
'whisperspeech/s2a_delar_mup_wds.py'), | |
'whisperspeech.s2a_delar_mup_wds.MultiHeadAttention.forward': ( '4b. semantic to acoustic ' | |
'token ' | |
'modeling.html#multiheadattention.forward', | |
'whisperspeech/s2a_delar_mup_wds.py'), | |
'whisperspeech.s2a_delar_mup_wds.MultiHeadAttention.qkv_attention_pth20': ( '4b. semantic ' | |
'to acoustic ' | |
'token ' | |
'modeling.html#multiheadattention.qkv_attention_pth20', | |
'whisperspeech/s2a_delar_mup_wds.py'), | |
'whisperspeech.s2a_delar_mup_wds.MultiHeadAttention.qkv_attention_xformers': ( '4b. ' | |
'semantic ' | |
'to ' | |
'acoustic ' | |
'token ' | |
'modeling.html#multiheadattention.qkv_attention_xformers', | |
'whisperspeech/s2a_delar_mup_wds.py'), | |
'whisperspeech.s2a_delar_mup_wds.ResidualAttentionBlock': ( '4b. semantic to acoustic ' | |
'token ' | |
'modeling.html#residualattentionblock', | |
'whisperspeech/s2a_delar_mup_wds.py'), | |
'whisperspeech.s2a_delar_mup_wds.ResidualAttentionBlock.__init__': ( '4b. semantic to ' | |
'acoustic token ' | |
'modeling.html#residualattentionblock.__init__', | |
'whisperspeech/s2a_delar_mup_wds.py'), | |
'whisperspeech.s2a_delar_mup_wds.ResidualAttentionBlock.forward': ( '4b. semantic to ' | |
'acoustic token ' | |
'modeling.html#residualattentionblock.forward', | |
'whisperspeech/s2a_delar_mup_wds.py'), | |
'whisperspeech.s2a_delar_mup_wds.Rotary': ( '4b. semantic to acoustic token ' | |
'modeling.html#rotary', | |
'whisperspeech/s2a_delar_mup_wds.py'), | |
'whisperspeech.s2a_delar_mup_wds.Rotary.__init__': ( '4b. semantic to acoustic token ' | |
'modeling.html#rotary.__init__', | |
'whisperspeech/s2a_delar_mup_wds.py'), | |
'whisperspeech.s2a_delar_mup_wds.Rotary.forward': ( '4b. semantic to acoustic token ' | |
'modeling.html#rotary.forward', | |
'whisperspeech/s2a_delar_mup_wds.py'), | |
'whisperspeech.s2a_delar_mup_wds.SADelARTransformer': ( '4b. semantic to acoustic token ' | |
'modeling.html#sadelartransformer', | |
'whisperspeech/s2a_delar_mup_wds.py'), | |
'whisperspeech.s2a_delar_mup_wds.SADelARTransformer.__init__': ( '4b. semantic to ' | |
'acoustic token ' | |
'modeling.html#sadelartransformer.__init__', | |
'whisperspeech/s2a_delar_mup_wds.py'), | |
'whisperspeech.s2a_delar_mup_wds.SADelARTransformer.device': ( '4b. semantic to acoustic ' | |
'token ' | |
'modeling.html#sadelartransformer.device', | |
'whisperspeech/s2a_delar_mup_wds.py'), | |
'whisperspeech.s2a_delar_mup_wds.SADelARTransformer.embed_stoks': ( '4b. semantic to ' | |
'acoustic token ' | |
'modeling.html#sadelartransformer.embed_stoks', | |
'whisperspeech/s2a_delar_mup_wds.py'), | |
'whisperspeech.s2a_delar_mup_wds.SADelARTransformer.forward': ( '4b. semantic to acoustic ' | |
'token ' | |
'modeling.html#sadelartransformer.forward', | |
'whisperspeech/s2a_delar_mup_wds.py'), | |
'whisperspeech.s2a_delar_mup_wds.SADelARTransformer.generate': ( '4b. semantic to ' | |
'acoustic token ' | |
'modeling.html#sadelartransformer.generate', | |
'whisperspeech/s2a_delar_mup_wds.py'), | |
'whisperspeech.s2a_delar_mup_wds.SADelARTransformer.get_extra_state': ( '4b. semantic to ' | |
'acoustic token ' | |
'modeling.html#sadelartransformer.get_extra_state', | |
'whisperspeech/s2a_delar_mup_wds.py'), | |
'whisperspeech.s2a_delar_mup_wds.SADelARTransformer.get_metrics': ( '4b. semantic to ' | |
'acoustic token ' | |
'modeling.html#sadelartransformer.get_metrics', | |
'whisperspeech/s2a_delar_mup_wds.py'), | |
'whisperspeech.s2a_delar_mup_wds.SADelARTransformer.init_transformer': ( '4b. semantic to ' | |
'acoustic token ' | |
'modeling.html#sadelartransformer.init_transformer', | |
'whisperspeech/s2a_delar_mup_wds.py'), | |
'whisperspeech.s2a_delar_mup_wds.SADelARTransformer.load_checkpoint': ( '4b. semantic to ' | |
'acoustic token ' | |
'modeling.html#sadelartransformer.load_checkpoint', | |
'whisperspeech/s2a_delar_mup_wds.py'), | |
'whisperspeech.s2a_delar_mup_wds.SADelARTransformer.load_frozen_semantic_embeddings': ( '4b. ' | |
'semantic ' | |
'to ' | |
'acoustic ' | |
'token ' | |
'modeling.html#sadelartransformer.load_frozen_semantic_embeddings', | |
'whisperspeech/s2a_delar_mup_wds.py'), | |
'whisperspeech.s2a_delar_mup_wds.SADelARTransformer.load_model': ( '4b. semantic to ' | |
'acoustic token ' | |
'modeling.html#sadelartransformer.load_model', | |
'whisperspeech/s2a_delar_mup_wds.py'), | |
'whisperspeech.s2a_delar_mup_wds.SADelARTransformer.save_model': ( '4b. semantic to ' | |
'acoustic token ' | |
'modeling.html#sadelartransformer.save_model', | |
'whisperspeech/s2a_delar_mup_wds.py'), | |
'whisperspeech.s2a_delar_mup_wds.SADelARTransformer.set_extra_state': ( '4b. semantic to ' | |
'acoustic token ' | |
'modeling.html#sadelartransformer.set_extra_state', | |
'whisperspeech/s2a_delar_mup_wds.py'), | |
'whisperspeech.s2a_delar_mup_wds.SADelARTransformer.setup': ( '4b. semantic to acoustic ' | |
'token ' | |
'modeling.html#sadelartransformer.setup', | |
'whisperspeech/s2a_delar_mup_wds.py'), | |
'whisperspeech.s2a_delar_mup_wds.Tunables': ( '4b. semantic to acoustic token ' | |
'modeling.html#tunables', | |
'whisperspeech/s2a_delar_mup_wds.py'), | |
'whisperspeech.s2a_delar_mup_wds.Tunables.__post_init__': ( '4b. semantic to acoustic ' | |
'token ' | |
'modeling.html#tunables.__post_init__', | |
'whisperspeech/s2a_delar_mup_wds.py'), | |
'whisperspeech.s2a_delar_mup_wds.Tunables.upgrade': ( '4b. semantic to acoustic token ' | |
'modeling.html#tunables.upgrade', | |
'whisperspeech/s2a_delar_mup_wds.py'), | |
'whisperspeech.s2a_delar_mup_wds._make_model': ( '4b. semantic to acoustic token ' | |
'modeling.html#_make_model', | |
'whisperspeech/s2a_delar_mup_wds.py'), | |
'whisperspeech.s2a_delar_mup_wds.apply_rotary_pos_emb': ( '4b. semantic to acoustic token ' | |
'modeling.html#apply_rotary_pos_emb', | |
'whisperspeech/s2a_delar_mup_wds.py'), | |
'whisperspeech.s2a_delar_mup_wds.load_datasets': ( '4b. semantic to acoustic token ' | |
'modeling.html#load_datasets', | |
'whisperspeech/s2a_delar_mup_wds.py'), | |
'whisperspeech.s2a_delar_mup_wds.make_model': ( '4b. semantic to acoustic token ' | |
'modeling.html#make_model', | |
'whisperspeech/s2a_delar_mup_wds.py'), | |
'whisperspeech.s2a_delar_mup_wds.pad_samples': ( '4b. semantic to acoustic token ' | |
'modeling.html#pad_samples', | |
'whisperspeech/s2a_delar_mup_wds.py'), | |
'whisperspeech.s2a_delar_mup_wds.rand': ( '4b. semantic to acoustic token ' | |
'modeling.html#rand', | |
'whisperspeech/s2a_delar_mup_wds.py'), | |
'whisperspeech.s2a_delar_mup_wds.random_trunc': ( '4b. semantic to acoustic token ' | |
'modeling.html#random_trunc', | |
'whisperspeech/s2a_delar_mup_wds.py'), | |
'whisperspeech.s2a_delar_mup_wds.rotate_half': ( '4b. semantic to acoustic token ' | |
'modeling.html#rotate_half', | |
'whisperspeech/s2a_delar_mup_wds.py'), | |
'whisperspeech.s2a_delar_mup_wds.speaker_id_extractor': ( '4b. semantic to acoustic token ' | |
'modeling.html#speaker_id_extractor', | |
'whisperspeech/s2a_delar_mup_wds.py')}, | |
'whisperspeech.t2s_up_wds': { 'whisperspeech.t2s_up_wds.CharTokenizer': ( '5b. text to semantic token ' | |
'modeling.html#chartokenizer', | |
'whisperspeech/t2s_up_wds.py'), | |
'whisperspeech.t2s_up_wds.CharTokenizer.decode': ( '5b. text to semantic token ' | |
'modeling.html#chartokenizer.decode', | |
'whisperspeech/t2s_up_wds.py'), | |
'whisperspeech.t2s_up_wds.CharTokenizer.encode': ( '5b. text to semantic token ' | |
'modeling.html#chartokenizer.encode', | |
'whisperspeech/t2s_up_wds.py'), | |
'whisperspeech.t2s_up_wds.Decoder': ( '5b. text to semantic token modeling.html#decoder', | |
'whisperspeech/t2s_up_wds.py'), | |
'whisperspeech.t2s_up_wds.Decoder.__init__': ( '5b. text to semantic token ' | |
'modeling.html#decoder.__init__', | |
'whisperspeech/t2s_up_wds.py'), | |
'whisperspeech.t2s_up_wds.Decoder.forward': ( '5b. text to semantic token ' | |
'modeling.html#decoder.forward', | |
'whisperspeech/t2s_up_wds.py'), | |
'whisperspeech.t2s_up_wds.EmbeddingProjector': ( '5b. text to semantic token ' | |
'modeling.html#embeddingprojector', | |
'whisperspeech/t2s_up_wds.py'), | |
'whisperspeech.t2s_up_wds.Encoder': ( '5b. text to semantic token modeling.html#encoder', | |
'whisperspeech/t2s_up_wds.py'), | |
'whisperspeech.t2s_up_wds.Encoder.__init__': ( '5b. text to semantic token ' | |
'modeling.html#encoder.__init__', | |
'whisperspeech/t2s_up_wds.py'), | |
'whisperspeech.t2s_up_wds.Encoder.forward': ( '5b. text to semantic token ' | |
'modeling.html#encoder.forward', | |
'whisperspeech/t2s_up_wds.py'), | |
'whisperspeech.t2s_up_wds.TSARTransformer': ( '5b. text to semantic token ' | |
'modeling.html#tsartransformer', | |
'whisperspeech/t2s_up_wds.py'), | |
'whisperspeech.t2s_up_wds.TSARTransformer.__init__': ( '5b. text to semantic token ' | |
'modeling.html#tsartransformer.__init__', | |
'whisperspeech/t2s_up_wds.py'), | |
'whisperspeech.t2s_up_wds.TSARTransformer.device': ( '5b. text to semantic token ' | |
'modeling.html#tsartransformer.device', | |
'whisperspeech/t2s_up_wds.py'), | |
'whisperspeech.t2s_up_wds.TSARTransformer.ensure_tokenizer': ( '5b. text to semantic token ' | |
'modeling.html#tsartransformer.ensure_tokenizer', | |
'whisperspeech/t2s_up_wds.py'), | |
'whisperspeech.t2s_up_wds.TSARTransformer.forward': ( '5b. text to semantic token ' | |
'modeling.html#tsartransformer.forward', | |
'whisperspeech/t2s_up_wds.py'), | |
'whisperspeech.t2s_up_wds.TSARTransformer.generate': ( '5b. text to semantic token ' | |
'modeling.html#tsartransformer.generate', | |
'whisperspeech/t2s_up_wds.py'), | |
'whisperspeech.t2s_up_wds.TSARTransformer.generate_batch': ( '5b. text to semantic token ' | |
'modeling.html#tsartransformer.generate_batch', | |
'whisperspeech/t2s_up_wds.py'), | |
'whisperspeech.t2s_up_wds.TSARTransformer.init_transformer': ( '5b. text to semantic token ' | |
'modeling.html#tsartransformer.init_transformer', | |
'whisperspeech/t2s_up_wds.py'), | |
'whisperspeech.t2s_up_wds.TSARTransformer.load_checkpoint': ( '5b. text to semantic token ' | |
'modeling.html#tsartransformer.load_checkpoint', | |
'whisperspeech/t2s_up_wds.py'), | |
'whisperspeech.t2s_up_wds.TSARTransformer.load_frozen_semantic_embeddings': ( '5b. text to ' | |
'semantic token ' | |
'modeling.html#tsartransformer.load_frozen_semantic_embeddings', | |
'whisperspeech/t2s_up_wds.py'), | |
'whisperspeech.t2s_up_wds.TSARTransformer.load_model': ( '5b. text to semantic token ' | |
'modeling.html#tsartransformer.load_model', | |
'whisperspeech/t2s_up_wds.py'), | |
'whisperspeech.t2s_up_wds.TSARTransformer.save_model': ( '5b. text to semantic token ' | |
'modeling.html#tsartransformer.save_model', | |
'whisperspeech/t2s_up_wds.py'), | |
'whisperspeech.t2s_up_wds.TSARTransformer.setup': ( '5b. text to semantic token ' | |
'modeling.html#tsartransformer.setup', | |
'whisperspeech/t2s_up_wds.py'), | |
'whisperspeech.t2s_up_wds.Tunables': ( '5b. text to semantic token modeling.html#tunables', | |
'whisperspeech/t2s_up_wds.py'), | |
'whisperspeech.t2s_up_wds.Tunables.__post_init__': ( '5b. text to semantic token ' | |
'modeling.html#tunables.__post_init__', | |
'whisperspeech/t2s_up_wds.py'), | |
'whisperspeech.t2s_up_wds._make_model': ( '5b. text to semantic token modeling.html#_make_model', | |
'whisperspeech/t2s_up_wds.py'), | |
'whisperspeech.t2s_up_wds.ar_padder': ( '5b. text to semantic token modeling.html#ar_padder', | |
'whisperspeech/t2s_up_wds.py'), | |
'whisperspeech.t2s_up_wds.build_speaker_map': ( '5b. text to semantic token ' | |
'modeling.html#build_speaker_map', | |
'whisperspeech/t2s_up_wds.py'), | |
'whisperspeech.t2s_up_wds.char_per_seconder': ( '5b. text to semantic token ' | |
'modeling.html#char_per_seconder', | |
'whisperspeech/t2s_up_wds.py'), | |
'whisperspeech.t2s_up_wds.load_datasets': ( '5b. text to semantic token ' | |
'modeling.html#load_datasets', | |
'whisperspeech/t2s_up_wds.py'), | |
'whisperspeech.t2s_up_wds.make_model': ( '5b. text to semantic token modeling.html#make_model', | |
'whisperspeech/t2s_up_wds.py'), | |
'whisperspeech.t2s_up_wds.rand': ( '5b. text to semantic token modeling.html#rand', | |
'whisperspeech/t2s_up_wds.py'), | |
'whisperspeech.t2s_up_wds.speaker_id_extractor': ( '5b. text to semantic token ' | |
'modeling.html#speaker_id_extractor', | |
'whisperspeech/t2s_up_wds.py'), | |
'whisperspeech.t2s_up_wds.tokenizer': ( '5b. text to semantic token modeling.html#tokenizer', | |
'whisperspeech/t2s_up_wds.py')}, | |
'whisperspeech.train': { 'whisperspeech.train.SimpleVisual': ('b1. training.html#simplevisual', 'whisperspeech/train.py'), | |
'whisperspeech.train.SimpleVisual.__init__': ( 'b1. training.html#simplevisual.__init__', | |
'whisperspeech/train.py'), | |
'whisperspeech.train.SimpleVisual.add_data': ( 'b1. training.html#simplevisual.add_data', | |
'whisperspeech/train.py'), | |
'whisperspeech.train.SimpleVisual.add_table_row': ( 'b1. training.html#simplevisual.add_table_row', | |
'whisperspeech/train.py'), | |
'whisperspeech.train.SimpleVisual.hide': ( 'b1. training.html#simplevisual.hide', | |
'whisperspeech/train.py'), | |
'whisperspeech.train.SimpleVisual.on_iter': ( 'b1. training.html#simplevisual.on_iter', | |
'whisperspeech/train.py'), | |
'whisperspeech.train.SimpleVisual.plot': ( 'b1. training.html#simplevisual.plot', | |
'whisperspeech/train.py'), | |
'whisperspeech.train.SimpleVisual.show': ( 'b1. training.html#simplevisual.show', | |
'whisperspeech/train.py'), | |
'whisperspeech.train.train': ('b1. training.html#train', 'whisperspeech/train.py'), | |
'whisperspeech.train.validate': ('b1. training.html#validate', 'whisperspeech/train.py')}, | |
'whisperspeech.train_multi': { 'whisperspeech.train_multi.TrainingTask': ( 'b2. training (lightning).html#trainingtask', | |
'whisperspeech/train_multi.py'), | |
'whisperspeech.train_multi.TrainingTask.__init__': ( 'b2. training ' | |
'(lightning).html#trainingtask.__init__', | |
'whisperspeech/train_multi.py'), | |
'whisperspeech.train_multi.TrainingTask.configure_optimizers': ( 'b2. training ' | |
'(lightning).html#trainingtask.configure_optimizers', | |
'whisperspeech/train_multi.py'), | |
'whisperspeech.train_multi.TrainingTask.on_fit_start': ( 'b2. training ' | |
'(lightning).html#trainingtask.on_fit_start', | |
'whisperspeech/train_multi.py'), | |
'whisperspeech.train_multi.TrainingTask.on_validation_epoch_end': ( 'b2. training ' | |
'(lightning).html#trainingtask.on_validation_epoch_end', | |
'whisperspeech/train_multi.py'), | |
'whisperspeech.train_multi.TrainingTask.test_step': ( 'b2. training ' | |
'(lightning).html#trainingtask.test_step', | |
'whisperspeech/train_multi.py'), | |
'whisperspeech.train_multi.TrainingTask.training_step': ( 'b2. training ' | |
'(lightning).html#trainingtask.training_step', | |
'whisperspeech/train_multi.py'), | |
'whisperspeech.train_multi.TrainingTask.validation_step': ( 'b2. training ' | |
'(lightning).html#trainingtask.validation_step', | |
'whisperspeech/train_multi.py'), | |
'whisperspeech.train_multi.parse_and_call': ( 'b2. training (lightning).html#parse_and_call', | |
'whisperspeech/train_multi.py')}, | |
'whisperspeech.vad': { 'whisperspeech.vad.extract_segments': ( '1b. voice activity detection.html#extract_segments', | |
'whisperspeech/vad.py'), | |
'whisperspeech.vad.fix_dots_in_names': ( '1b. voice activity detection.html#fix_dots_in_names', | |
'whisperspeech/vad.py'), | |
'whisperspeech.vad.flac_to_vad_name': ( '1b. voice activity detection.html#flac_to_vad_name', | |
'whisperspeech/vad.py'), | |
'whisperspeech.vad.load_dataset': ( '1b. voice activity detection.html#load_dataset', | |
'whisperspeech/vad.py'), | |
'whisperspeech.vad.process_shard': ( '1b. voice activity detection.html#process_shard', | |
'whisperspeech/vad.py'), | |
'whisperspeech.vad.segment_audio': ( '1b. voice activity detection.html#segment_audio', | |
'whisperspeech/vad.py')}, | |
'whisperspeech.verify_wds': { 'whisperspeech.verify_wds.process_shard': ( '0. verify webdataset archives.html#process_shard', | |
'whisperspeech/verify_wds.py')}, | |
'whisperspeech.vq_stoks': { 'whisperspeech.vq_stoks.RQBottleneckTransformer': ( '2b. whisper quantization (semantic token) ' | |
'model.html#rqbottlenecktransformer', | |
'whisperspeech/vq_stoks.py'), | |
'whisperspeech.vq_stoks.RQBottleneckTransformer.__init__': ( '2b. whisper quantization (semantic ' | |
'token) ' | |
'model.html#rqbottlenecktransformer.__init__', | |
'whisperspeech/vq_stoks.py'), | |
'whisperspeech.vq_stoks.RQBottleneckTransformer.decode_text': ( '2b. whisper quantization ' | |
'(semantic token) ' | |
'model.html#rqbottlenecktransformer.decode_text', | |
'whisperspeech/vq_stoks.py'), | |
'whisperspeech.vq_stoks.RQBottleneckTransformer.dequantize': ( '2b. whisper quantization (semantic ' | |
'token) ' | |
'model.html#rqbottlenecktransformer.dequantize', | |
'whisperspeech/vq_stoks.py'), | |
'whisperspeech.vq_stoks.RQBottleneckTransformer.device': ( '2b. whisper quantization (semantic ' | |
'token) ' | |
'model.html#rqbottlenecktransformer.device', | |
'whisperspeech/vq_stoks.py'), | |
'whisperspeech.vq_stoks.RQBottleneckTransformer.downsample_embeddings': ( '2b. whisper ' | |
'quantization (semantic ' | |
'token) ' | |
'model.html#rqbottlenecktransformer.downsample_embeddings', | |
'whisperspeech/vq_stoks.py'), | |
'whisperspeech.vq_stoks.RQBottleneckTransformer.encode_audio': ( '2b. whisper quantization ' | |
'(semantic token) ' | |
'model.html#rqbottlenecktransformer.encode_audio', | |
'whisperspeech/vq_stoks.py'), | |
'whisperspeech.vq_stoks.RQBottleneckTransformer.encode_mel': ( '2b. whisper quantization (semantic ' | |
'token) ' | |
'model.html#rqbottlenecktransformer.encode_mel', | |
'whisperspeech/vq_stoks.py'), | |
'whisperspeech.vq_stoks.RQBottleneckTransformer.ensure_whisper': ( '2b. whisper quantization ' | |
'(semantic token) ' | |
'model.html#rqbottlenecktransformer.ensure_whisper', | |
'whisperspeech/vq_stoks.py'), | |
'whisperspeech.vq_stoks.RQBottleneckTransformer.extract_teacher': ( '2b. whisper quantization ' | |
'(semantic token) ' | |
'model.html#rqbottlenecktransformer.extract_teacher', | |
'whisperspeech/vq_stoks.py'), | |
'whisperspeech.vq_stoks.RQBottleneckTransformer.forward': ( '2b. whisper quantization (semantic ' | |
'token) ' | |
'model.html#rqbottlenecktransformer.forward', | |
'whisperspeech/vq_stoks.py'), | |
'whisperspeech.vq_stoks.RQBottleneckTransformer.get_metrics': ( '2b. whisper quantization ' | |
'(semantic token) ' | |
'model.html#rqbottlenecktransformer.get_metrics', | |
'whisperspeech/vq_stoks.py'), | |
'whisperspeech.vq_stoks.RQBottleneckTransformer.init_transformer': ( '2b. whisper quantization ' | |
'(semantic token) ' | |
'model.html#rqbottlenecktransformer.init_transformer', | |
'whisperspeech/vq_stoks.py'), | |
'whisperspeech.vq_stoks.RQBottleneckTransformer.load_checkpoint': ( '2b. whisper quantization ' | |
'(semantic token) ' | |
'model.html#rqbottlenecktransformer.load_checkpoint', | |
'whisperspeech/vq_stoks.py'), | |
'whisperspeech.vq_stoks.RQBottleneckTransformer.load_model': ( '2b. whisper quantization (semantic ' | |
'token) ' | |
'model.html#rqbottlenecktransformer.load_model', | |
'whisperspeech/vq_stoks.py'), | |
'whisperspeech.vq_stoks.RQBottleneckTransformer.quantize': ( '2b. whisper quantization (semantic ' | |
'token) ' | |
'model.html#rqbottlenecktransformer.quantize', | |
'whisperspeech/vq_stoks.py'), | |
'whisperspeech.vq_stoks.RQBottleneckTransformer.save_model': ( '2b. whisper quantization (semantic ' | |
'token) ' | |
'model.html#rqbottlenecktransformer.save_model', | |
'whisperspeech/vq_stoks.py'), | |
'whisperspeech.vq_stoks.RQBottleneckTransformer.setup': ( '2b. whisper quantization (semantic ' | |
'token) ' | |
'model.html#rqbottlenecktransformer.setup', | |
'whisperspeech/vq_stoks.py'), | |
'whisperspeech.vq_stoks.Tunables': ( '2b. whisper quantization (semantic token) ' | |
'model.html#tunables', | |
'whisperspeech/vq_stoks.py'), | |
'whisperspeech.vq_stoks.Tunables.__post_init__': ( '2b. whisper quantization (semantic token) ' | |
'model.html#tunables.__post_init__', | |
'whisperspeech/vq_stoks.py'), | |
'whisperspeech.vq_stoks.Tunables.upgrade': ( '2b. whisper quantization (semantic token) ' | |
'model.html#tunables.upgrade', | |
'whisperspeech/vq_stoks.py'), | |
'whisperspeech.vq_stoks.add_masks': ( '2b. whisper quantization (semantic token) ' | |
'model.html#add_masks', | |
'whisperspeech/vq_stoks.py'), | |
'whisperspeech.vq_stoks.derived_dataset': ( '2b. whisper quantization (semantic token) ' | |
'model.html#derived_dataset', | |
'whisperspeech/vq_stoks.py'), | |
'whisperspeech.vq_stoks.load_datasets': ( '2b. whisper quantization (semantic token) ' | |
'model.html#load_datasets', | |
'whisperspeech/vq_stoks.py'), | |
'whisperspeech.vq_stoks.logrand': ( '2b. whisper quantization (semantic token) model.html#logrand', | |
'whisperspeech/vq_stoks.py'), | |
'whisperspeech.vq_stoks.make_model': ( '2b. whisper quantization (semantic token) ' | |
'model.html#make_model', | |
'whisperspeech/vq_stoks.py'), | |
'whisperspeech.vq_stoks.merge_in': ( '2b. whisper quantization (semantic token) ' | |
'model.html#merge_in', | |
'whisperspeech/vq_stoks.py'), | |
'whisperspeech.vq_stoks.rand': ( '2b. whisper quantization (semantic token) model.html#rand', | |
'whisperspeech/vq_stoks.py'), | |
'whisperspeech.vq_stoks.tokenize_text': ( '2b. whisper quantization (semantic token) ' | |
'model.html#tokenize_text', | |
'whisperspeech/vq_stoks.py')}, | |
'whisperspeech.wer_metrics': { 'whisperspeech.wer_metrics.DfBuilder': ( 'c. word error rate metrics.html#dfbuilder', | |
'whisperspeech/wer_metrics.py'), | |
'whisperspeech.wer_metrics.DfBuilder.__init__': ( 'c. word error rate ' | |
'metrics.html#dfbuilder.__init__', | |
'whisperspeech/wer_metrics.py'), | |
'whisperspeech.wer_metrics.DfBuilder.df': ( 'c. word error rate metrics.html#dfbuilder.df', | |
'whisperspeech/wer_metrics.py'), | |
'whisperspeech.wer_metrics.DfBuilder.push': ( 'c. word error rate metrics.html#dfbuilder.push', | |
'whisperspeech/wer_metrics.py'), | |
'whisperspeech.wer_metrics.WERStats': ( 'c. word error rate metrics.html#werstats', | |
'whisperspeech/wer_metrics.py'), | |
'whisperspeech.wer_metrics.WERStats.__init__': ( 'c. word error rate ' | |
'metrics.html#werstats.__init__', | |
'whisperspeech/wer_metrics.py'), | |
'whisperspeech.wer_metrics.WERStats.push_sample': ( 'c. word error rate ' | |
'metrics.html#werstats.push_sample', | |
'whisperspeech/wer_metrics.py'), | |
'whisperspeech.wer_metrics.librispeech_data': ( 'c. word error rate ' | |
'metrics.html#librispeech_data', | |
'whisperspeech/wer_metrics.py'), | |
'whisperspeech.wer_metrics.whisper_normalize': ( 'c. word error rate ' | |
'metrics.html#whisper_normalize', | |
'whisperspeech/wer_metrics.py')}, | |
'whisperspeech.wh_transcribe': { 'whisperspeech.wh_transcribe.chunk_merger': ( '2a. whisper quantization dataset ' | |
'preparation.html#chunk_merger', | |
'whisperspeech/wh_transcribe.py'), | |
'whisperspeech.wh_transcribe.flac_to_txt_name': ( '2a. whisper quantization dataset ' | |
'preparation.html#flac_to_txt_name', | |
'whisperspeech/wh_transcribe.py'), | |
'whisperspeech.wh_transcribe.merge_in': ( '2a. whisper quantization dataset ' | |
'preparation.html#merge_in', | |
'whisperspeech/wh_transcribe.py'), | |
'whisperspeech.wh_transcribe.process_shard': ( '2a. whisper quantization dataset ' | |
'preparation.html#process_shard', | |
'whisperspeech/wh_transcribe.py'), | |
'whisperspeech.wh_transcribe.random_cutter': ( '2a. whisper quantization dataset ' | |
'preparation.html#random_cutter', | |
'whisperspeech/wh_transcribe.py'), | |
'whisperspeech.wh_transcribe.split_to_chunks': ( '2a. whisper quantization dataset ' | |
'preparation.html#split_to_chunks', | |
'whisperspeech/wh_transcribe.py'), | |
'whisperspeech.wh_transcribe.wds_compose': ( '2a. whisper quantization dataset ' | |
'preparation.html#wds_compose', | |
'whisperspeech/wh_transcribe.py')}}} | |