johannaSommer commited on
Commit
2f59c4d
·
verified ·
1 Parent(s): e87364b

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +2 -73
README.md CHANGED
@@ -4,77 +4,6 @@ pipeline_tag: automatic-speech-recognition
4
  inference: true
5
  ---
6
 
7
- This model is for debugging. It is randomly initialized with the config from [openai/whisper-large-v3](https://huggingface.co/openai/whisper-large-v3) but is of smaller size.
8
 
9
- Codes:
10
- ```python
11
- import os
12
-
13
- import torch
14
-
15
- from huggingface_hub import create_repo, upload_folder
16
- from transformers import (
17
- AutoModelForCausalLM,
18
- AutoTokenizer,
19
- GenerationConfig,
20
- AutoConfig,
21
- pipeline,
22
- set_seed,
23
- )
24
- import torch
25
- from transformers import AutoModelForSpeechSeq2Seq, AutoProcessor, pipeline, AutoConfig
26
- from datasets import load_dataset
27
-
28
- model_id = "openai/whisper-large-v3"
29
- repo_id = "yujiepan/whisper-v3-tiny-random"
30
- save_path = f"/tmp/{repo_id}"
31
- os.system(f'rm -rf {save_path}')
32
- os.makedirs(save_path, exist_ok=True)
33
-
34
- device = "cuda"
35
- torch_dtype = torch.float16
36
- model_id = "openai/whisper-large-v3"
37
-
38
- config = AutoConfig.from_pretrained(model_id)
39
- config.num_hidden_layers = 2
40
- config.d_model = 8
41
- config.decoder_attention_heads = 2
42
- config.decoder_ffn_dim = 16
43
- config.decoder_layers = 2
44
- config.encoder_ffn_dim = 16
45
- config.encoder_attention_heads = 2
46
- config.encoder_layers = 2
47
-
48
- model = AutoModelForSpeechSeq2Seq.from_config(config)
49
- model.to(device).to(torch_dtype)
50
- model.generation_config = GenerationConfig.from_pretrained(model_id)
51
- processor = AutoProcessor.from_pretrained(model_id)
52
-
53
- set_seed(42)
54
- num_params = 0
55
- with torch.no_grad():
56
- for name, p in sorted(model.named_parameters()):
57
- print(name, p.shape)
58
- torch.nn.init.uniform_(p, -0.5, 0.5)
59
- num_params += p.numel()
60
- print("Total number of parameters:", num_params)
61
-
62
- pipe = pipeline(
63
- "automatic-speech-recognition",
64
- model=model,
65
- tokenizer=processor.tokenizer,
66
- feature_extractor=processor.feature_extractor,
67
- torch_dtype=torch_dtype,
68
- device=device,
69
- )
70
-
71
- sample = load_dataset(
72
- "distil-whisper/librispeech_long", "clean",
73
- split="validation",
74
- )[0]["audio"]
75
- result = pipe(sample, return_timestamps=True)
76
- print(result["text"])
77
-
78
- create_repo(repo_id, exist_ok=True)
79
- upload_folder(repo_id=repo_id, folder_path=save_path, repo_type='model')
80
- ```
 
4
  inference: true
5
  ---
6
 
7
+ This model is a smaller, randomly initialized version of [openai/whisper-large-v3](https://huggingface.co/openai/whisper-large-v3). It's intended use is for debugging, for fast testing or for use in CI/CD pipelines.
8
 
9
+ This model was taken from [this repository](https://huggingface.co/yujiepan/whisper-v3-tiny-random) and updated / maintained to work with newer transformers versions.