5roop commited on
Commit
ec1ce66
1 Parent(s): 15b4ff5

Update README.md

Browse files

Correct the use example.

Files changed (1) hide show
  1. README.md +12 -11
README.md CHANGED
@@ -38,28 +38,29 @@ Nikola Ljubešić, Danijel Koržinek, Peter Rupnik, Ivo-Pavao Jazbec. ParlaSpeec
38
  So far untested use from before:
39
 
40
  ```python
41
- from transformers import Wav2Vec2Processor, Wav2Vec2ForCTC
42
  import soundfile as sf
43
  import torch
44
  import os
45
  device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
46
  # load model and tokenizer
47
- processor = Wav2Vec2Processor.from_pretrained(
48
- "classla/wav2vec2-large-slavic-parlaspeech-hr")
49
- model = Wav2Vec2ForCTC.from_pretrained("classla/wav2vec2-large-slavic-parlaspeech-hr")
50
  # download the example wav files:
51
  os.system("wget https://huggingface.co/classla/wav2vec2-large-slavic-parlaspeech-hr/raw/main/00020570a.flac.wav")
52
  # read the wav file
53
  speech, sample_rate = sf.read("00020570a.flac.wav")
54
- input_values = processor(speech, sampling_rate=sample_rate, return_tensors="pt").input_values.to(device)
 
 
 
 
 
55
  # remove the raw wav file
56
  os.system("rm 00020570a.flac.wav")
57
- # retrieve logits
58
- logits = model.to(device)(input_values).logits
59
- # take argmax and decode
60
- predicted_ids = torch.argmax(logits, dim=-1)
61
- transcription = processor.decode(predicted_ids[0]).lower()
62
- # transcription: 'veliki broj poslovnih subjekata posluje sa minusom velik dio'
63
  ```
64
 
65
 
 
38
  So far untested use from before:
39
 
40
  ```python
41
+ from transformers import Wav2Vec2ProcessorWithLM, Wav2Vec2ForCTC
42
  import soundfile as sf
43
  import torch
44
  import os
45
  device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
46
  # load model and tokenizer
47
+ processor = Wav2Vec2ProcessorWithLM.from_pretrained(
48
+ "5roop/wav2vec2-large-slavic-parlaspeech-hr-lm")
49
+ model = Wav2Vec2ForCTC.from_pretrained("5roop/wav2vec2-large-slavic-parlaspeech-hr-lm")
50
  # download the example wav files:
51
  os.system("wget https://huggingface.co/classla/wav2vec2-large-slavic-parlaspeech-hr/raw/main/00020570a.flac.wav")
52
  # read the wav file
53
  speech, sample_rate = sf.read("00020570a.flac.wav")
54
+ input_values = processor(speech, sampling_rate=sample_rate, return_tensors="pt").input_values.cuda()
55
+ inputs = processor(speech, sampling_rate=sample_rate, return_tensors="pt")
56
+ with torch.no_grad():
57
+ logits = model(**inputs).logits
58
+ transcription = processor.batch_decode(logits.numpy()).text[0]
59
+
60
  # remove the raw wav file
61
  os.system("rm 00020570a.flac.wav")
62
+
63
+ transcription # 'velik broj poslovnih subjekata poslao je sa minusom velik dio'
 
 
 
 
64
  ```
65
 
66