{ "cells": [ { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "from audio_utils import audio_stream_generator\n", "import IPython.display as ipd\n", "import sys\n", "sys.path.append(\"..\")\n", "from transcribe.helpers.vadprocessor import FixedVADIterator\n" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [], "source": [ "vac = FixedVADIterator(\n", " threshold=0.5,\n", " sampling_rate=16000,\n", " # speech_pad_ms=10\n", " min_silence_duration_ms = 100,\n", " # speech_pad_ms = 30,\n", " max_speech_duration_s=5.0,\n", " )\n" ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [], "source": [ "SAMPLE_FILE_PATH = \"/Users/david/Samples/Audio/zh/liyongle.wav\"\n", "SAMPLING_RATE = 16000\n", "\n", "chunks_generator = audio_stream_generator(SAMPLE_FILE_PATH, chunk_size=4096)\n", "vac.reset_states()" ] }, { "cell_type": "code", "execution_count": 11, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "已加载音频文件: /Users/david/Samples/Audio/zh/liyongle.wav\n", "采样率: 16000 Hz\n", "音频长度: 64.00 秒\n", "{'start': 3616}\n", "{'end': 83968}\n", "{'end': 164352}\n", "{'end': 244736}\n", "{'end': 325120}\n", "{'end': 405504}\n", "{'end': 485888}\n", "{'end': 566272}\n", "{'end': 624608}\n", "{'start': 631328}\n", "{'end': 691168}\n", "{'start': 698912}\n", "{'end': 779264}\n", "{'end': 800736}\n", "{'start': 805920}\n", "{'end': 846816}\n", "{'start': 855072}\n", "{'end': 862176}\n", "{'start': 864288}\n", "{'end': 890336}\n", "{'start': 893984}\n", "{'end': 912352}\n", "{'start': 917536}\n", "{'end': 932320}\n", "{'start': 939040}\n", "{'end': 966112}\n", "{'start': 970784}\n", "{'end': 1015264}\n", "{'start': 1019424}\n", "音频流处理完成\n" ] } ], "source": [ "for chunk in chunks_generator:\n", " # vad_iterator.reset_states()\n", " # audio_buffer = np.append(audio_buffer, chunk)\n", " \n", " speech_dict = vac(chunk, return_seconds=False)\n", " if speech_dict:\n", " print(speech_dict)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": ".venv", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.11.11" } }, "nbformat": 4, "nbformat_minor": 2 }