|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
#include "libavutil/opt.h" |
|
#include "libavutil/pixdesc.h" |
|
#include "libavutil/avassert.h" |
|
#include "libavutil/imgutils.h" |
|
#include "filters.h" |
|
#include "dnn_filter_common.h" |
|
#include "internal.h" |
|
#include "video.h" |
|
#include "libswscale/swscale.h" |
|
#include "libavutil/time.h" |
|
|
|
typedef struct DnnProcessingContext { |
|
const AVClass *class; |
|
DnnContext dnnctx; |
|
struct SwsContext *sws_uv_scale; |
|
int sws_uv_height; |
|
} DnnProcessingContext; |
|
|
|
#define OFFSET(x) offsetof(DnnProcessingContext, dnnctx.x) |
|
#define FLAGS AV_OPT_FLAG_FILTERING_PARAM | AV_OPT_FLAG_VIDEO_PARAM |
|
static const AVOption dnn_processing_options[] = { |
|
{ "dnn_backend", "DNN backend", OFFSET(backend_type), AV_OPT_TYPE_INT, { .i64 = DNN_TF }, INT_MIN, INT_MAX, FLAGS, "backend" }, |
|
#if (CONFIG_LIBTENSORFLOW == 1) |
|
{ "tensorflow", "tensorflow backend flag", 0, AV_OPT_TYPE_CONST, { .i64 = DNN_TF }, 0, 0, FLAGS, "backend" }, |
|
#endif |
|
#if (CONFIG_LIBOPENVINO == 1) |
|
{ "openvino", "openvino backend flag", 0, AV_OPT_TYPE_CONST, { .i64 = DNN_OV }, 0, 0, FLAGS, "backend" }, |
|
#endif |
|
DNN_COMMON_OPTIONS |
|
{ NULL } |
|
}; |
|
|
|
AVFILTER_DEFINE_CLASS(dnn_processing); |
|
|
|
static av_cold int init(AVFilterContext *context) |
|
{ |
|
DnnProcessingContext *ctx = context->priv; |
|
return ff_dnn_init(&ctx->dnnctx, DFT_PROCESS_FRAME, context); |
|
} |
|
|
|
static const enum AVPixelFormat pix_fmts[] = { |
|
AV_PIX_FMT_RGB24, AV_PIX_FMT_BGR24, |
|
AV_PIX_FMT_GRAY8, AV_PIX_FMT_GRAYF32, |
|
AV_PIX_FMT_YUV420P, AV_PIX_FMT_YUV422P, |
|
AV_PIX_FMT_YUV444P, AV_PIX_FMT_YUV410P, AV_PIX_FMT_YUV411P, |
|
AV_PIX_FMT_NV12, |
|
AV_PIX_FMT_NONE |
|
}; |
|
|
|
#define LOG_FORMAT_CHANNEL_MISMATCH() \ |
|
av_log(ctx, AV_LOG_ERROR, \ |
|
"the frame's format %s does not match " \ |
|
"the model input channel %d\n", \ |
|
av_get_pix_fmt_name(fmt), \ |
|
model_input->channels); |
|
|
|
static int check_modelinput_inlink(const DNNData *model_input, const AVFilterLink *inlink) |
|
{ |
|
AVFilterContext *ctx = inlink->dst; |
|
enum AVPixelFormat fmt = inlink->format; |
|
|
|
|
|
if (model_input->height != -1 && model_input->height != inlink->h) { |
|
av_log(ctx, AV_LOG_ERROR, "the model requires frame height %d but got %d\n", |
|
model_input->height, inlink->h); |
|
return AVERROR(EIO); |
|
} |
|
if (model_input->width != -1 && model_input->width != inlink->w) { |
|
av_log(ctx, AV_LOG_ERROR, "the model requires frame width %d but got %d\n", |
|
model_input->width, inlink->w); |
|
return AVERROR(EIO); |
|
} |
|
if (model_input->dt != DNN_FLOAT) { |
|
avpriv_report_missing_feature(ctx, "data type rather than DNN_FLOAT"); |
|
return AVERROR(EIO); |
|
} |
|
|
|
switch (fmt) { |
|
case AV_PIX_FMT_RGB24: |
|
case AV_PIX_FMT_BGR24: |
|
if (model_input->channels != 3) { |
|
LOG_FORMAT_CHANNEL_MISMATCH(); |
|
return AVERROR(EIO); |
|
} |
|
return 0; |
|
case AV_PIX_FMT_GRAY8: |
|
case AV_PIX_FMT_GRAYF32: |
|
case AV_PIX_FMT_YUV420P: |
|
case AV_PIX_FMT_YUV422P: |
|
case AV_PIX_FMT_YUV444P: |
|
case AV_PIX_FMT_YUV410P: |
|
case AV_PIX_FMT_YUV411P: |
|
case AV_PIX_FMT_NV12: |
|
if (model_input->channels != 1) { |
|
LOG_FORMAT_CHANNEL_MISMATCH(); |
|
return AVERROR(EIO); |
|
} |
|
return 0; |
|
default: |
|
avpriv_report_missing_feature(ctx, "%s", av_get_pix_fmt_name(fmt)); |
|
return AVERROR(EIO); |
|
} |
|
|
|
return 0; |
|
} |
|
|
|
static int config_input(AVFilterLink *inlink) |
|
{ |
|
AVFilterContext *context = inlink->dst; |
|
DnnProcessingContext *ctx = context->priv; |
|
int result; |
|
DNNData model_input; |
|
int check; |
|
|
|
result = ff_dnn_get_input(&ctx->dnnctx, &model_input); |
|
if (result != 0) { |
|
av_log(ctx, AV_LOG_ERROR, "could not get input from the model\n"); |
|
return result; |
|
} |
|
|
|
check = check_modelinput_inlink(&model_input, inlink); |
|
if (check != 0) { |
|
return check; |
|
} |
|
|
|
return 0; |
|
} |
|
|
|
static av_always_inline int isPlanarYUV(enum AVPixelFormat pix_fmt) |
|
{ |
|
const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(pix_fmt); |
|
av_assert0(desc); |
|
return !(desc->flags & AV_PIX_FMT_FLAG_RGB) && desc->nb_components == 3; |
|
} |
|
|
|
static int prepare_uv_scale(AVFilterLink *outlink) |
|
{ |
|
AVFilterContext *context = outlink->src; |
|
DnnProcessingContext *ctx = context->priv; |
|
AVFilterLink *inlink = context->inputs[0]; |
|
enum AVPixelFormat fmt = inlink->format; |
|
|
|
if (isPlanarYUV(fmt)) { |
|
if (inlink->w != outlink->w || inlink->h != outlink->h) { |
|
if (fmt == AV_PIX_FMT_NV12) { |
|
ctx->sws_uv_scale = sws_getContext(inlink->w >> 1, inlink->h >> 1, AV_PIX_FMT_YA8, |
|
outlink->w >> 1, outlink->h >> 1, AV_PIX_FMT_YA8, |
|
SWS_BICUBIC, NULL, NULL, NULL); |
|
ctx->sws_uv_height = inlink->h >> 1; |
|
} else { |
|
const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(fmt); |
|
int sws_src_h = AV_CEIL_RSHIFT(inlink->h, desc->log2_chroma_h); |
|
int sws_src_w = AV_CEIL_RSHIFT(inlink->w, desc->log2_chroma_w); |
|
int sws_dst_h = AV_CEIL_RSHIFT(outlink->h, desc->log2_chroma_h); |
|
int sws_dst_w = AV_CEIL_RSHIFT(outlink->w, desc->log2_chroma_w); |
|
ctx->sws_uv_scale = sws_getContext(sws_src_w, sws_src_h, AV_PIX_FMT_GRAY8, |
|
sws_dst_w, sws_dst_h, AV_PIX_FMT_GRAY8, |
|
SWS_BICUBIC, NULL, NULL, NULL); |
|
ctx->sws_uv_height = sws_src_h; |
|
} |
|
} |
|
} |
|
|
|
return 0; |
|
} |
|
|
|
static int config_output(AVFilterLink *outlink) |
|
{ |
|
AVFilterContext *context = outlink->src; |
|
DnnProcessingContext *ctx = context->priv; |
|
int result; |
|
AVFilterLink *inlink = context->inputs[0]; |
|
|
|
|
|
result = ff_dnn_get_output(&ctx->dnnctx, inlink->w, inlink->h, &outlink->w, &outlink->h); |
|
if (result != 0) { |
|
av_log(ctx, AV_LOG_ERROR, "could not get output from the model\n"); |
|
return result; |
|
} |
|
|
|
prepare_uv_scale(outlink); |
|
|
|
return 0; |
|
} |
|
|
|
static int copy_uv_planes(DnnProcessingContext *ctx, AVFrame *out, const AVFrame *in) |
|
{ |
|
const AVPixFmtDescriptor *desc; |
|
int uv_height; |
|
|
|
if (!ctx->sws_uv_scale) { |
|
av_assert0(in->height == out->height && in->width == out->width); |
|
desc = av_pix_fmt_desc_get(in->format); |
|
uv_height = AV_CEIL_RSHIFT(in->height, desc->log2_chroma_h); |
|
for (int i = 1; i < 3; ++i) { |
|
int bytewidth = av_image_get_linesize(in->format, in->width, i); |
|
if (bytewidth < 0) { |
|
return AVERROR(EINVAL); |
|
} |
|
av_image_copy_plane(out->data[i], out->linesize[i], |
|
in->data[i], in->linesize[i], |
|
bytewidth, uv_height); |
|
} |
|
} else if (in->format == AV_PIX_FMT_NV12) { |
|
sws_scale(ctx->sws_uv_scale, (const uint8_t **)(in->data + 1), in->linesize + 1, |
|
0, ctx->sws_uv_height, out->data + 1, out->linesize + 1); |
|
} else { |
|
sws_scale(ctx->sws_uv_scale, (const uint8_t **)(in->data + 1), in->linesize + 1, |
|
0, ctx->sws_uv_height, out->data + 1, out->linesize + 1); |
|
sws_scale(ctx->sws_uv_scale, (const uint8_t **)(in->data + 2), in->linesize + 2, |
|
0, ctx->sws_uv_height, out->data + 2, out->linesize + 2); |
|
} |
|
|
|
return 0; |
|
} |
|
|
|
static int flush_frame(AVFilterLink *outlink, int64_t pts, int64_t *out_pts) |
|
{ |
|
DnnProcessingContext *ctx = outlink->src->priv; |
|
int ret; |
|
DNNAsyncStatusType async_state; |
|
|
|
ret = ff_dnn_flush(&ctx->dnnctx); |
|
if (ret != 0) { |
|
return -1; |
|
} |
|
|
|
do { |
|
AVFrame *in_frame = NULL; |
|
AVFrame *out_frame = NULL; |
|
async_state = ff_dnn_get_result(&ctx->dnnctx, &in_frame, &out_frame); |
|
if (out_frame) { |
|
if (isPlanarYUV(in_frame->format)) |
|
copy_uv_planes(ctx, out_frame, in_frame); |
|
av_frame_free(&in_frame); |
|
ret = ff_filter_frame(outlink, out_frame); |
|
if (ret < 0) |
|
return ret; |
|
if (out_pts) |
|
*out_pts = out_frame->pts + pts; |
|
} |
|
av_usleep(5000); |
|
} while (async_state >= DAST_NOT_READY); |
|
|
|
return 0; |
|
} |
|
|
|
static int activate(AVFilterContext *filter_ctx) |
|
{ |
|
AVFilterLink *inlink = filter_ctx->inputs[0]; |
|
AVFilterLink *outlink = filter_ctx->outputs[0]; |
|
DnnProcessingContext *ctx = filter_ctx->priv; |
|
AVFrame *in = NULL, *out = NULL; |
|
int64_t pts; |
|
int ret, status; |
|
int got_frame = 0; |
|
int async_state; |
|
|
|
FF_FILTER_FORWARD_STATUS_BACK(outlink, inlink); |
|
|
|
do { |
|
|
|
ret = ff_inlink_consume_frame(inlink, &in); |
|
if (ret < 0) |
|
return ret; |
|
if (ret > 0) { |
|
out = ff_get_video_buffer(outlink, outlink->w, outlink->h); |
|
if (!out) { |
|
av_frame_free(&in); |
|
return AVERROR(ENOMEM); |
|
} |
|
av_frame_copy_props(out, in); |
|
if (ff_dnn_execute_model(&ctx->dnnctx, in, out) != 0) { |
|
return AVERROR(EIO); |
|
} |
|
} |
|
} while (ret > 0); |
|
|
|
|
|
do { |
|
AVFrame *in_frame = NULL; |
|
AVFrame *out_frame = NULL; |
|
async_state = ff_dnn_get_result(&ctx->dnnctx, &in_frame, &out_frame); |
|
if (out_frame) { |
|
if (isPlanarYUV(in_frame->format)) |
|
copy_uv_planes(ctx, out_frame, in_frame); |
|
av_frame_free(&in_frame); |
|
ret = ff_filter_frame(outlink, out_frame); |
|
if (ret < 0) |
|
return ret; |
|
got_frame = 1; |
|
} |
|
} while (async_state == DAST_SUCCESS); |
|
|
|
|
|
if (got_frame) |
|
return 0; |
|
|
|
if (ff_inlink_acknowledge_status(inlink, &status, &pts)) { |
|
if (status == AVERROR_EOF) { |
|
int64_t out_pts = pts; |
|
ret = flush_frame(outlink, pts, &out_pts); |
|
ff_outlink_set_status(outlink, status, out_pts); |
|
return ret; |
|
} |
|
} |
|
|
|
FF_FILTER_FORWARD_WANTED(outlink, inlink); |
|
|
|
return 0; |
|
} |
|
|
|
static av_cold void uninit(AVFilterContext *ctx) |
|
{ |
|
DnnProcessingContext *context = ctx->priv; |
|
|
|
sws_freeContext(context->sws_uv_scale); |
|
ff_dnn_uninit(&context->dnnctx); |
|
} |
|
|
|
static const AVFilterPad dnn_processing_inputs[] = { |
|
{ |
|
.name = "default", |
|
.type = AVMEDIA_TYPE_VIDEO, |
|
.config_props = config_input, |
|
}, |
|
}; |
|
|
|
static const AVFilterPad dnn_processing_outputs[] = { |
|
{ |
|
.name = "default", |
|
.type = AVMEDIA_TYPE_VIDEO, |
|
.config_props = config_output, |
|
}, |
|
}; |
|
|
|
const AVFilter ff_vf_dnn_processing = { |
|
.name = "dnn_processing", |
|
.description = NULL_IF_CONFIG_SMALL("Apply DNN processing filter to the input."), |
|
.priv_size = sizeof(DnnProcessingContext), |
|
.init = init, |
|
.uninit = uninit, |
|
FILTER_INPUTS(dnn_processing_inputs), |
|
FILTER_OUTPUTS(dnn_processing_outputs), |
|
FILTER_PIXFMTS_ARRAY(pix_fmts), |
|
.priv_class = &dnn_processing_class, |
|
.activate = activate, |
|
}; |
|
|