|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
#include <float.h> |
|
|
|
#include "libavutil/opt.h" |
|
#include "libavutil/timestamp.h" |
|
#include "audio.h" |
|
#include "avfilter.h" |
|
#include "internal.h" |
|
|
|
typedef struct SilenceDetectContext { |
|
const AVClass *class; |
|
double noise; |
|
int64_t duration; |
|
int mono; |
|
int channels; |
|
int independent_channels; |
|
int64_t *nb_null_samples; |
|
int64_t *start; |
|
int64_t frame_end; |
|
int last_sample_rate; |
|
AVRational time_base; |
|
|
|
void (*silencedetect)(struct SilenceDetectContext *s, AVFrame *insamples, |
|
int nb_samples, int64_t nb_samples_notify, |
|
AVRational time_base); |
|
} SilenceDetectContext; |
|
|
|
#define MAX_DURATION (24*3600*1000000LL) |
|
#define OFFSET(x) offsetof(SilenceDetectContext, x) |
|
#define FLAGS AV_OPT_FLAG_FILTERING_PARAM|AV_OPT_FLAG_AUDIO_PARAM |
|
static const AVOption silencedetect_options[] = { |
|
{ "n", "set noise tolerance", OFFSET(noise), AV_OPT_TYPE_DOUBLE, {.dbl=0.001}, 0, DBL_MAX, FLAGS }, |
|
{ "noise", "set noise tolerance", OFFSET(noise), AV_OPT_TYPE_DOUBLE, {.dbl=0.001}, 0, DBL_MAX, FLAGS }, |
|
{ "d", "set minimum duration in seconds", OFFSET(duration), AV_OPT_TYPE_DURATION, {.i64=2000000}, 0, MAX_DURATION,FLAGS }, |
|
{ "duration", "set minimum duration in seconds", OFFSET(duration), AV_OPT_TYPE_DURATION, {.i64=2000000}, 0, MAX_DURATION,FLAGS }, |
|
{ "mono", "check each channel separately", OFFSET(mono), AV_OPT_TYPE_BOOL, {.i64=0}, 0, 1, FLAGS }, |
|
{ "m", "check each channel separately", OFFSET(mono), AV_OPT_TYPE_BOOL, {.i64=0}, 0, 1, FLAGS }, |
|
{ NULL } |
|
}; |
|
|
|
AVFILTER_DEFINE_CLASS(silencedetect); |
|
|
|
static void set_meta(AVFrame *insamples, int channel, const char *key, char *value) |
|
{ |
|
char key2[128]; |
|
|
|
if (channel) |
|
snprintf(key2, sizeof(key2), "lavfi.%s.%d", key, channel); |
|
else |
|
snprintf(key2, sizeof(key2), "lavfi.%s", key); |
|
av_dict_set(&insamples->metadata, key2, value, 0); |
|
} |
|
static av_always_inline void update(SilenceDetectContext *s, AVFrame *insamples, |
|
int is_silence, int current_sample, int64_t nb_samples_notify, |
|
AVRational time_base) |
|
{ |
|
int channel = current_sample % s->independent_channels; |
|
if (is_silence) { |
|
if (s->start[channel] == INT64_MIN) { |
|
s->nb_null_samples[channel]++; |
|
if (s->nb_null_samples[channel] >= nb_samples_notify) { |
|
s->start[channel] = insamples->pts + av_rescale_q(current_sample / s->channels + 1 - nb_samples_notify * s->independent_channels / s->channels, |
|
(AVRational){ 1, s->last_sample_rate }, time_base); |
|
set_meta(insamples, s->mono ? channel + 1 : 0, "silence_start", |
|
av_ts2timestr(s->start[channel], &time_base)); |
|
if (s->mono) |
|
av_log(s, AV_LOG_INFO, "channel: %d | ", channel); |
|
av_log(s, AV_LOG_INFO, "silence_start: %s\n", |
|
av_ts2timestr(s->start[channel], &time_base)); |
|
} |
|
} |
|
} else { |
|
if (s->start[channel] > INT64_MIN) { |
|
int64_t end_pts = insamples ? insamples->pts + av_rescale_q(current_sample / s->channels, |
|
(AVRational){ 1, s->last_sample_rate }, time_base) |
|
: s->frame_end; |
|
int64_t duration_ts = end_pts - s->start[channel]; |
|
if (insamples) { |
|
set_meta(insamples, s->mono ? channel + 1 : 0, "silence_end", |
|
av_ts2timestr(end_pts, &time_base)); |
|
set_meta(insamples, s->mono ? channel + 1 : 0, "silence_duration", |
|
av_ts2timestr(duration_ts, &time_base)); |
|
} |
|
if (s->mono) |
|
av_log(s, AV_LOG_INFO, "channel: %d | ", channel); |
|
av_log(s, AV_LOG_INFO, "silence_end: %s | silence_duration: %s\n", |
|
av_ts2timestr(end_pts, &time_base), |
|
av_ts2timestr(duration_ts, &time_base)); |
|
} |
|
s->nb_null_samples[channel] = 0; |
|
s->start[channel] = INT64_MIN; |
|
} |
|
} |
|
|
|
#define SILENCE_DETECT(name, type) \ |
|
static void silencedetect_##name(SilenceDetectContext *s, AVFrame *insamples, \ |
|
int nb_samples, int64_t nb_samples_notify, \ |
|
AVRational time_base) \ |
|
{ \ |
|
const type *p = (const type *)insamples->data[0]; \ |
|
const type noise = s->noise; \ |
|
int i; \ |
|
\ |
|
for (i = 0; i < nb_samples; i++, p++) \ |
|
update(s, insamples, *p < noise && *p > -noise, i, \ |
|
nb_samples_notify, time_base); \ |
|
} |
|
|
|
#define SILENCE_DETECT_PLANAR(name, type) \ |
|
static void silencedetect_##name(SilenceDetectContext *s, AVFrame *insamples, \ |
|
int nb_samples, int64_t nb_samples_notify, \ |
|
AVRational time_base) \ |
|
{ \ |
|
const int channels = insamples->ch_layout.nb_channels; \ |
|
const type noise = s->noise; \ |
|
\ |
|
nb_samples /= channels; \ |
|
for (int i = 0; i < nb_samples; i++) { \ |
|
for (int ch = 0; ch < insamples->ch_layout.nb_channels; ch++) { \ |
|
const type *p = (const type *)insamples->extended_data[ch]; \ |
|
update(s, insamples, p[i] < noise && p[i] > -noise, \ |
|
channels * i + ch, \ |
|
nb_samples_notify, time_base); \ |
|
} \ |
|
} \ |
|
} |
|
|
|
SILENCE_DETECT(dbl, double) |
|
SILENCE_DETECT(flt, float) |
|
SILENCE_DETECT(s32, int32_t) |
|
SILENCE_DETECT(s16, int16_t) |
|
|
|
SILENCE_DETECT_PLANAR(dblp, double) |
|
SILENCE_DETECT_PLANAR(fltp, float) |
|
SILENCE_DETECT_PLANAR(s32p, int32_t) |
|
SILENCE_DETECT_PLANAR(s16p, int16_t) |
|
|
|
static int config_input(AVFilterLink *inlink) |
|
{ |
|
AVFilterContext *ctx = inlink->dst; |
|
SilenceDetectContext *s = ctx->priv; |
|
int c; |
|
|
|
s->channels = inlink->ch_layout.nb_channels; |
|
s->duration = av_rescale(s->duration, inlink->sample_rate, AV_TIME_BASE); |
|
s->independent_channels = s->mono ? s->channels : 1; |
|
s->nb_null_samples = av_calloc(s->independent_channels, |
|
sizeof(*s->nb_null_samples)); |
|
if (!s->nb_null_samples) |
|
return AVERROR(ENOMEM); |
|
s->start = av_malloc_array(sizeof(*s->start), s->independent_channels); |
|
if (!s->start) |
|
return AVERROR(ENOMEM); |
|
for (c = 0; c < s->independent_channels; c++) |
|
s->start[c] = INT64_MIN; |
|
|
|
switch (inlink->format) { |
|
case AV_SAMPLE_FMT_DBL: s->silencedetect = silencedetect_dbl; break; |
|
case AV_SAMPLE_FMT_FLT: s->silencedetect = silencedetect_flt; break; |
|
case AV_SAMPLE_FMT_S32: |
|
s->noise *= INT32_MAX; |
|
s->silencedetect = silencedetect_s32; |
|
break; |
|
case AV_SAMPLE_FMT_S16: |
|
s->noise *= INT16_MAX; |
|
s->silencedetect = silencedetect_s16; |
|
break; |
|
case AV_SAMPLE_FMT_DBLP: s->silencedetect = silencedetect_dblp; break; |
|
case AV_SAMPLE_FMT_FLTP: s->silencedetect = silencedetect_fltp; break; |
|
case AV_SAMPLE_FMT_S32P: |
|
s->noise *= INT32_MAX; |
|
s->silencedetect = silencedetect_s32p; |
|
break; |
|
case AV_SAMPLE_FMT_S16P: |
|
s->noise *= INT16_MAX; |
|
s->silencedetect = silencedetect_s16p; |
|
break; |
|
default: |
|
return AVERROR_BUG; |
|
} |
|
|
|
return 0; |
|
} |
|
|
|
static int filter_frame(AVFilterLink *inlink, AVFrame *insamples) |
|
{ |
|
SilenceDetectContext *s = inlink->dst->priv; |
|
const int nb_channels = inlink->ch_layout.nb_channels; |
|
const int srate = inlink->sample_rate; |
|
const int nb_samples = insamples->nb_samples * nb_channels; |
|
const int64_t nb_samples_notify = s->duration * (s->mono ? 1 : nb_channels); |
|
int c; |
|
|
|
|
|
if (s->last_sample_rate && s->last_sample_rate != srate) |
|
for (c = 0; c < s->independent_channels; c++) { |
|
s->nb_null_samples[c] = srate * s->nb_null_samples[c] / s->last_sample_rate; |
|
} |
|
s->last_sample_rate = srate; |
|
s->time_base = inlink->time_base; |
|
s->frame_end = insamples->pts + av_rescale_q(insamples->nb_samples, |
|
(AVRational){ 1, s->last_sample_rate }, inlink->time_base); |
|
|
|
s->silencedetect(s, insamples, nb_samples, nb_samples_notify, |
|
inlink->time_base); |
|
|
|
return ff_filter_frame(inlink->dst->outputs[0], insamples); |
|
} |
|
|
|
static av_cold void uninit(AVFilterContext *ctx) |
|
{ |
|
SilenceDetectContext *s = ctx->priv; |
|
int c; |
|
|
|
for (c = 0; c < s->independent_channels; c++) |
|
if (s->start[c] > INT64_MIN) |
|
update(s, NULL, 0, c, 0, s->time_base); |
|
av_freep(&s->nb_null_samples); |
|
av_freep(&s->start); |
|
} |
|
|
|
static const AVFilterPad silencedetect_inputs[] = { |
|
{ |
|
.name = "default", |
|
.type = AVMEDIA_TYPE_AUDIO, |
|
.config_props = config_input, |
|
.filter_frame = filter_frame, |
|
}, |
|
}; |
|
|
|
const AVFilter ff_af_silencedetect = { |
|
.name = "silencedetect", |
|
.description = NULL_IF_CONFIG_SMALL("Detect silence."), |
|
.priv_size = sizeof(SilenceDetectContext), |
|
.uninit = uninit, |
|
FILTER_INPUTS(silencedetect_inputs), |
|
FILTER_OUTPUTS(ff_audio_default_filterpad), |
|
FILTER_SAMPLEFMTS(AV_SAMPLE_FMT_DBL, AV_SAMPLE_FMT_DBLP, |
|
AV_SAMPLE_FMT_FLT, AV_SAMPLE_FMT_FLTP, |
|
AV_SAMPLE_FMT_S32, AV_SAMPLE_FMT_S32P, |
|
AV_SAMPLE_FMT_S16, AV_SAMPLE_FMT_S16P), |
|
.priv_class = &silencedetect_class, |
|
.flags = AVFILTER_FLAG_METADATA_ONLY, |
|
}; |
|
|