Yehor
/

hubert-uk

Automatic Speech Recognition

Safetensors

Ukrainian

hubert

Eval Results

Model card Files Files and versions Community

Yehor commited on Aug 16, 2024

Commit

9081c0b

verified ·

1 Parent(s): 96269f9

Update README.md

Browse files

Files changed (1) hide show

README.md +11 -90

README.md CHANGED Viewed

@@ -32,100 +32,21 @@ model-index:
 - Speech Recognition: https://t.me/speech_recognition_uk
 - Speech Synthesis: https://t.me/speech_synthesis_uk
-## Usage
-```python
-import csv
-import torch
-import torchaudio
-import numpy as np
-import evaluate
-from transformers import HubertForCTC, Wav2Vec2Processor
-batch_size = 8
-device = "cuda:0"  # or cpu
-torch_dtype = torch.float16
-sampling_rate = 16_000
-model_name = "/home/yehor/ext-ml-disk/asr/hubert-training/models/final-85500"
-testset_file = "/home/yehor/ext-ml-disk/asr/w2v2-bert-training/eval/rows_no_defis.csv"
-# Load the test dataset
-with open(testset_file) as f:
-    samples = list(csv.DictReader(f))
-# Load the model
-asr_model = HubertForCTC.from_pretrained(
-    model_name,
-    device_map=device,
-    torch_dtype=torch_dtype,
-    # attn_implementation="flash_attention_2",
-)
-processor = Wav2Vec2Processor.from_pretrained(model_name)
-# A util function to make batches
-def make_batches(iterable, n=1):
-    lx = len(iterable)
-    for ndx in range(0, lx, n):
-        yield iterable[ndx : min(ndx + n, lx)]
-# Temporary variables
-predictions_all = []
-references_all = []
-# Inference in the batched mode
-for batch in make_batches(samples, batch_size):
-    paths = [it["path"] for it in batch]
-    references = [it["text"] for it in batch]
-    # Extract audio
-    audio_inputs = []
-    for path in paths:
-        audio_input, sampling_rate = torchaudio.load(path, backend="sox")
-        audio_input = audio_input.squeeze(0).numpy()
-        audio_inputs.append(audio_input)
-    # Transcribe the audio
-    inputs = processor(audio_inputs, sampling_rate=16_000, padding=True).input_values
-    features = torch.tensor(np.array(inputs), dtype=torch_dtype).to(device)
-    with torch.inference_mode():
-        logits = asr_model(features).logits
-    predicted_ids = torch.argmax(logits, dim=-1)
-    predictions = processor.batch_decode(predicted_ids)
-    # Log outputs
-    print("---")
-    print("Predictions:")
-    print(predictions)
-    print("References:")
-    print(references)
-    print("---")
-    # Add predictions and references
-    predictions_all.extend(predictions)
-    references_all.extend(references)
-# Load evaluators
-wer = evaluate.load("wer")
-cer = evaluate.load("cer")
-# Evaluate
-wer_value = round(
-    wer.compute(predictions=predictions_all, references=references_all), 4
-)
-cer_value = round(
-    cer.compute(predictions=predictions_all, references=references_all), 4
-)
-# Print results
-print("Final:")
-print(f"WER: {wer_value} | CER: {cer_value}")
 ```

 - Speech Recognition: https://t.me/speech_recognition_uk
 - Speech Synthesis: https://t.me/speech_synthesis_uk
+## Install
+```text
+uv venv --python 3.12
+source .venv/bin/activate
+uv pip install -r requirements.txt
+# in development mode
+uv pip install -r requirements-dev.txt
+```
+## Usage
+```text
+python run_demo.py
 ```