LinuxLists.cc - [RFC] METADATA design using V4l2 Request API

[permalink] [raw]

Subject: [RFC PATCH 2/3] Enable Request API for output buffers

Change-Id: Ic09179f503bf7d4bb41522fb70207728405cc7b3
Signed-off-by: Dikshita Agarwal <[email protected]>
---
drivers/media/platform/Kconfig | 2 +-
drivers/media/platform/qcom/venus/core.h | 3 ++
drivers/media/platform/qcom/venus/helpers.c | 75 +++++++++++++++++++++++++-
drivers/media/platform/qcom/venus/venc.c | 25 ++++++++-
drivers/media/platform/qcom/venus/venc_ctrls.c | 61 ++++++++++++++++++++-
drivers/media/v4l2-core/v4l2-ctrls.c | 10 ++++
include/media/v4l2-ctrls.h | 1 +
include/media/venus-ctrls.h | 22 ++++++++
8 files changed, 195 insertions(+), 4 deletions(-)
create mode 100644 include/media/venus-ctrls.h

diff --git a/drivers/media/platform/Kconfig b/drivers/media/platform/Kconfig
index a51f76e..064b57b 100644
--- a/drivers/media/platform/Kconfig
+++ b/drivers/media/platform/Kconfig
@@ -481,7 +481,7 @@ config VIDEO_TI_VPE_DEBUG

config VIDEO_QCOM_VENUS
tristate "Qualcomm Venus V4L2 encoder/decoder driver"
- depends on VIDEO_DEV && VIDEO_V4L2 && MEDIA_CONTROLLER
+ depends on VIDEO_DEV && VIDEO_V4L2 && MEDIA_CONTROLLER && MEDIA_CONTROLLER_REQUEST_API
depends on (ARCH_QCOM && IOMMU_DMA) || COMPILE_TEST
select QCOM_MDT_LOADER if ARCH_QCOM
select QCOM_SCM if ARCH_QCOM
diff --git a/drivers/media/platform/qcom/venus/core.h b/drivers/media/platform/qcom/venus/core.h
index 91ff08d..9535d33 100644
--- a/drivers/media/platform/qcom/venus/core.h
+++ b/drivers/media/platform/qcom/venus/core.h
@@ -11,6 +11,8 @@
#include <media/videobuf2-v4l2.h>
#include <media/v4l2-ctrls.h>
#include <media/v4l2-device.h>
+#include <linux/time.h>
+#include <linux/timer.h>

#include "hfi.h"

@@ -320,6 +322,7 @@ struct venus_ts_metadata {
struct venus_inst {
struct list_head list;
struct mutex lock;
+ struct mutex req_lock;
struct venus_core *core;
struct list_head dpbbufs;
struct list_head internalbufs;
diff --git a/drivers/media/platform/qcom/venus/helpers.c b/drivers/media/platform/qcom/venus/helpers.c
index 2c766cd..c966c24 100644
--- a/drivers/media/platform/qcom/venus/helpers.c
+++ b/drivers/media/platform/qcom/venus/helpers.c
@@ -27,6 +27,63 @@ struct intbuf {
unsigned long attrs;
};

+int venus_ctrl_request_process(struct media_request *req)
+{
+ struct v4l2_ext_control control;
+ struct v4l2_ext_controls controls;
+ int ret = 0;
+ //struct media_device mdev = inst->core->m_dev;
+
+ struct media_request_object *obj;
+ struct v4l2_ctrl_handler *parent_hdl, *hdl;
+ struct venus_inst *inst;
+ struct v4l2_ctrl *ctrl;
+ struct v4l2_ctrl_ref *ref;
+ unsigned int count;
+ list_for_each_entry(obj, &req->objects, list) {
+ struct vb2_buffer *vb;
+
+ if (vb2_request_object_is_buffer(obj)) {
+ vb = container_of(obj, struct vb2_buffer, req_obj);
+ inst = vb2_get_drv_priv(vb->vb2_queue);
+ //struct v4l2_m2m_ctx *m2m_ctx = inst->m2m_ctx;
+ break;
+ }
+ }
+
+ if (!inst) {
+ pr_debug("No buffer was provided with the request\n");
+ return -ENOENT;
+ }
+ struct video_device *vdev = inst->core->vdev_enc;
+ count = vb2_request_buffer_cnt(req);
+ if (!count) {
+ pr_debug("No buffer was provided with the request\n");
+ return -ENOENT;
+ } else if (count > 1) {
+ pr_debug("More than one buffer was provided with the request\n");
+ return -EINVAL;
+ }
+
+ parent_hdl = &inst->ctrl_handler;
+
+ hdl = v4l2_ctrl_request_hdl_find(req, parent_hdl);
+ if (!hdl) {
+ pr_debug( "Missing control handler\n");
+ return -ENOENT;
+ }
+
+ ctrl = v4l2_ctrl_request_hdl_ctrl_find(hdl,
+ V4L2_CID_MPEG_VIDEO_VENUS_METADATA);
+
+ pr_debug("venus_ctrl_request_process ctrl id %d index %d",ctrl->id,ctrl->p_new.p_metadata->index);
+ return ret;
+}
+
+static int venus_helper_register_entity(struct media_device *mdev,
+ struct venus_media *media, enum venus_helper_entity_type type,
+ struct video_device *vdev, int function);
+
bool venus_helper_check_codec(struct venus_inst *inst, u32 v4l2_pixfmt)
{
struct venus_core *core = inst->core;
@@ -522,7 +579,10 @@ void venus_helper_get_ts_metadata(struct venus_inst *inst, u64 timestamp_us,
unsigned int type = vb->type;
struct hfi_frame_data fdata;
int ret;
-
+ struct media_request *req;
+ u64 ts,ts_ms;
+ struct timespec tv;
+ req = vb->req_obj.req;
memset(&fdata, 0, sizeof(fdata));
fdata.alloc_len = buf->size;
fdata.device_addr = buf->dma_addr;
@@ -549,7 +609,20 @@ void venus_helper_get_ts_metadata(struct venus_inst *inst, u64 timestamp_us,
fdata.filled_len = 0;
fdata.offset = 0;
}
+ if(req && type == V4L2_BUF_TYPE_VIDEO_OUTPUT_MPLANE)
+ {
+ ret = v4l2_ctrl_request_setup(req, &inst->ctrl_handler);
+ if (ret)
+ pr_debug("v4l2_ctrl_request_setup retunred error");
+ ret = venus_ctrl_request_process(req);
+ }

+ pr_debug("buf type %d buf index %d",fdata.buffer_type,vbuf->vb2_buf.index);
+ if(type == V4L2_BUF_TYPE_VIDEO_OUTPUT_MPLANE) {
+ getnstimeofday(&tv);
+ ts = (tv.tv_sec * 1000000) + (tv.tv_nsec / 1000);
+ pr_debug("ETB: BUF %d queued at tv sec %ld ts ns %ld ts usec %lld",vbuf->vb2_buf.index,tv.tv_sec,tv.tv_nsec,ts);
+ }
ret = hfi_session_process_buf(inst, &fdata);
if (ret)
return ret;
diff --git a/drivers/media/platform/qcom/venus/venc.c b/drivers/media/platform/qcom/venus/venc.c
index f57542f..599cfae 100644
--- a/drivers/media/platform/qcom/venus/venc.c
+++ b/drivers/media/platform/qcom/venus/venc.c
@@ -1031,6 +1031,21 @@ static int venc_start_streaming(struct vb2_queue *q, unsigned int count)
return ret;
}

+static int venc_buf_out_validate(struct vb2_buffer *vb)
+{
+ struct vb2_v4l2_buffer *vbuf = to_vb2_v4l2_buffer(vb);
+
+ vbuf->field = V4L2_FIELD_NONE;
+ return 0;
+}
+
+static void venc_buf_request_complete(struct vb2_buffer *vb)
+{
+ struct venus_inst *inst = vb2_get_drv_priv(vb->vb2_queue);
+
+ v4l2_ctrl_request_complete(vb->req_obj.req, &inst->ctrl_handler);
+}
+
static const struct vb2_ops venc_vb2_ops = {
.queue_setup = venc_queue_setup,
.buf_init = venus_helper_vb2_buf_init,
@@ -1038,6 +1053,8 @@ static int venc_start_streaming(struct vb2_queue *q, unsigned int count)
.start_streaming = venc_start_streaming,
.stop_streaming = venus_helper_vb2_stop_streaming,
.buf_queue = venus_helper_vb2_buf_queue,
+ .buf_out_validate = venc_buf_out_validate,
+ .buf_request_complete = venc_buf_request_complete,
};

static void venc_buf_done(struct venus_inst *inst, unsigned int buf_type,
@@ -1068,7 +1085,8 @@ static void venc_buf_done(struct venus_inst *inst, unsigned int buf_type,
} else {
vbuf->sequence = inst->sequence_out++;
}
-
+ if (buf_type == HFI_BUFFER_INPUT)
+ v4l2_ctrl_request_complete(vbuf->vb2_buf.req_obj.req, &inst->ctrl_handler);
v4l2_m2m_buf_done(vbuf, VB2_BUF_STATE_DONE);
}

@@ -1109,6 +1127,9 @@ static int m2m_queue_init(void *priv, struct vb2_queue *src_vq,
src_vq->allow_zero_bytesused = 1;
src_vq->min_buffers_needed = 1;
src_vq->dev = inst->core->dev;
+ src_vq->supports_requests = 1;
+ src_vq->requires_requests = 1;
+ src_vq->lock = &inst->req_lock;
if (inst->core->res->hfi_version == HFI_VERSION_1XX)
src_vq->bidirectional = 1;
ret = vb2_queue_init(src_vq);
@@ -1125,6 +1146,7 @@ static int m2m_queue_init(void *priv, struct vb2_queue *src_vq,
dst_vq->allow_zero_bytesused = 1;
dst_vq->min_buffers_needed = 1;
dst_vq->dev = inst->core->dev;
+ dst_vq->lock = src_vq->lock;
ret = vb2_queue_init(dst_vq);
if (ret) {
vb2_queue_release(src_vq);
@@ -1163,6 +1185,7 @@ static int venc_open(struct file *file)
INIT_LIST_HEAD(&inst->internalbufs);
INIT_LIST_HEAD(&inst->list);
mutex_init(&inst->lock);
+ mutex_init(&inst->req_lock);

inst->core = core;
inst->session_type = VIDC_SESSION_TYPE_ENC;
diff --git a/drivers/media/platform/qcom/venus/venc_ctrls.c b/drivers/media/platform/qcom/venus/venc_ctrls.c
index 877c0b3..02921b5 100644
--- a/drivers/media/platform/qcom/venus/venc_ctrls.c
+++ b/drivers/media/platform/qcom/venus/venc_ctrls.c
@@ -199,6 +199,12 @@ static int venc_op_s_ctrl(struct v4l2_ctrl *ctrl)
}
mutex_unlock(&inst->lock);
break;
+ case V4L2_CID_MPEG_VIDEO_VENUS_METADATA: {
+ unsigned int i;
+ const struct v4l2_ctrl_venus_metadata *metadata = ctrl->p_new.p_metadata;
+ pr_debug("%s: VENUS_METADATA p_new set index %d \n", __func__,metadata->index);
+ break;
+ }
default:
return -EINVAL;
}
@@ -206,15 +212,66 @@ static int venc_op_s_ctrl(struct v4l2_ctrl *ctrl)
return 0;
}

+static int venc_op_try_ctrl(struct v4l2_ctrl *ctrl)
+{
+ switch (ctrl->id) {
+ case V4L2_CID_MPEG_VIDEO_VENUS_METADATA: {
+ break;
+ }
+ default:
+ return -EINVAL;
+ }
+ return 0;
+}
+static void meta_type_init(const struct v4l2_ctrl *ctrl, u32 idx,
+ union v4l2_ctrl_ptr ptr)
+{
+}
+
+static bool meta_type_equal(const struct v4l2_ctrl *ctrl, u32 idx,
+ union v4l2_ctrl_ptr ptr1,
+ union v4l2_ctrl_ptr ptr2)
+{
+ return true;
+}
+
+static void meta_type_log(const struct v4l2_ctrl *ctrl)
+{
+}
+
+static int meta_type_validate(const struct v4l2_ctrl *ctrl, u32 idx,
+ union v4l2_ctrl_ptr ptr)
+{
+ return 0;
+}
+
static const struct v4l2_ctrl_ops venc_ctrl_ops = {
.s_ctrl = venc_op_s_ctrl,
+ .try_ctrl = venc_op_try_ctrl,
+};
+
+static const struct v4l2_ctrl_type_ops venus_meta_type_ops = {
+ .equal = meta_type_equal,
+ .init = meta_type_init,
+ .log = meta_type_log,
+ .validate = meta_type_validate,
+};
+
+static const struct v4l2_ctrl_config venus_meta_cfg = {
+ .name = "Venus codec metadata",
+ .type = V4L2_CTRL_TYPE_VENUS_METADATA,
+ .ops = &venc_ctrl_ops,
+ .id = V4L2_CID_MPEG_VIDEO_VENUS_METADATA,
+ .flags = V4L2_CTRL_FLAG_EXECUTE_ON_WRITE,
+ .elem_size = sizeof(struct v4l2_ctrl_venus_metadata),
+ .type_ops = &venus_meta_type_ops,
};

int venc_ctrl_init(struct venus_inst *inst)
{
int ret;

- ret = v4l2_ctrl_handler_init(&inst->ctrl_handler, 30);
+ ret = v4l2_ctrl_handler_init(&inst->ctrl_handler, 30+1);
if (ret)
return ret;

@@ -351,6 +408,8 @@ int venc_ctrl_init(struct venus_inst *inst)
v4l2_ctrl_new_std(&inst->ctrl_handler, &venc_ctrl_ops,
V4L2_CID_MPEG_VIDEO_FORCE_KEY_FRAME, 0, 0, 0, 0);

+ v4l2_ctrl_new_custom(&inst->ctrl_handler, &venus_meta_cfg, NULL);
+
ret = inst->ctrl_handler.error;
if (ret)
goto err;
diff --git a/drivers/media/v4l2-core/v4l2-ctrls.c b/drivers/media/v4l2-core/v4l2-ctrls.c
index cd84dbb..9f7f024 100644
--- a/drivers/media/v4l2-core/v4l2-ctrls.c
+++ b/drivers/media/v4l2-core/v4l2-ctrls.c
@@ -892,6 +892,7 @@ const char *v4l2_ctrl_get_name(u32 id)
case V4L2_CID_MPEG_VIDEO_MPEG2_SLICE_PARAMS: return "MPEG-2 Slice Parameters";
case V4L2_CID_MPEG_VIDEO_MPEG2_QUANTIZATION: return "MPEG-2 Quantization Matrices";
case V4L2_CID_MPEG_VIDEO_FWHT_PARAMS: return "FWHT Stateless Parameters";
+ case V4L2_CID_MPEG_VIDEO_VENUS_METADATA: return "Venus codec metadata";
case V4L2_CID_FWHT_I_FRAME_QP: return "FWHT I-Frame QP Value";
case V4L2_CID_FWHT_P_FRAME_QP: return "FWHT P-Frame QP Value";

@@ -1357,6 +1358,9 @@ void v4l2_ctrl_fill(u32 id, const char **name, enum v4l2_ctrl_type *type,
case V4L2_CID_MPEG_VIDEO_FWHT_PARAMS:
*type = V4L2_CTRL_TYPE_FWHT_PARAMS;
break;
+ case V4L2_CID_MPEG_VIDEO_VENUS_METADATA:
+ *type = V4L2_CTRL_TYPE_VENUS_METADATA;
+ break;
case V4L2_CID_MPEG_VIDEO_H264_SPS:
*type = V4L2_CTRL_TYPE_H264_SPS;
break;
@@ -1820,6 +1824,9 @@ static int std_validate(const struct v4l2_ctrl *ctrl, u32 idx,
return -ERANGE;
return 0;

+ case V4L2_CTRL_TYPE_VENUS_METADATA:
+ return 0;
+
default:
return std_validate_compound(ctrl, idx, ptr);
}
@@ -2403,6 +2410,9 @@ static struct v4l2_ctrl *v4l2_ctrl_new(struct v4l2_ctrl_handler *hdl,
case V4L2_CTRL_TYPE_FWHT_PARAMS:
elem_size = sizeof(struct v4l2_ctrl_fwht_params);
break;
+ case V4L2_CTRL_TYPE_VENUS_METADATA:
+ elem_size = sizeof(struct v4l2_ctrl_venus_metadata);
+ break;
case V4L2_CTRL_TYPE_H264_SPS:
elem_size = sizeof(struct v4l2_ctrl_h264_sps);
break;
diff --git a/include/media/v4l2-ctrls.h b/include/media/v4l2-ctrls.h
index 570ff4b..c66b797 100644
--- a/include/media/v4l2-ctrls.h
+++ b/include/media/v4l2-ctrls.h
@@ -12,6 +12,7 @@
#include <linux/mutex.h>
#include <linux/videodev2.h>
#include <media/media-request.h>
+#include <media/venus-ctrls.h>

/*
* Include the stateless codec compound control definitions.
diff --git a/include/media/venus-ctrls.h b/include/media/venus-ctrls.h
new file mode 100644
index 0000000..9179ef0
--- /dev/null
+++ b/include/media/venus-ctrls.h
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: GPL-2.0-only
+Copyright (c) 2020, The Linux Foundation. All rights reserved. */
+/*
+ * These are custom (compound) controls for Venus codec driver.
+ *
+ * It turns out that these structs are not stable yet and will undergo
+ * more changes. So keep them private until they are stable and ready to
+ * become part of the official public API.
+ */
+
+#ifndef _VENUS_CTRLS_H_
+#define _VENUS_CTRLS_H_
+
+#define V4L2_CTRL_TYPE_VENUS_METADATA 0x0107
+#define V4L2_CID_MPEG_VIDEO_VENUS_METADATA (V4L2_CID_MPEG_BASE + 293)
+
+struct v4l2_ctrl_venus_metadata {
+ unsigned int index;
+// __u8 rawdata[16];
+};
+
+#endif
\ No newline at end of file
--
1.9.1

2020-05-26 06:34:16

[permalink] [raw]

Subject: Re: [RFC PATCH 0/3] METADATA design using V4l2 Request API

Hi,

A gentle reminder for the review.

On 2020-05-08 11:51, Dikshita Agarwal wrote:
> There are many commercialized video use cases which needs metadata info
> to be circulated between v4l2 client and v4l2 driver.
>
> METADATA has following requirements associated:
> •Metadata is an optional info available for a buffer. It is not
> mandatorily for every buffer.
> For ex. consider metadata ROI (Region Of Interest). ROI is specified
> by clients to indicate
> the region where enhanced quality is desired. This metadata is given
> as an input information
> to encoder output plane. Client may or may not specify the ROI for a
> frame during encode as
> an input metadata. Also if the client has not provided ROI metadata
> for a given frame,
> it would be incorrect to take the metadata from previous frame. If the
> data and
> metadata is asynchronous, it would be difficult for hardware to decide
> if it
> needs to wait for metadata buffer or not before processing the input
> frame for encoding.
> •Synchronize the buffer requirement across both the video node/session
> (incase metadata is being processed as a separate v4l2 video
> node/session).
> This is to avoid buffer starvation.
> •Associate the metadata buffer with the data buffer without adding any
> pipeline delay
> in waiting for each other. This is applicable both at the hardware
> side (the processing end)
> and client side (the receiving end).
> •Low latency usecases like WFD/split rendering/game streaming/IMS have
> sub-50ms e2e latency
> requirements, and it is not practical to stall the pipeline due to
> inherent framework latencies.
> High performance usecase like high-frame rate playback/record can
> lead to frame loss during any pipeline latency.
>
> To address all above requirements, we used v4l2 Request API as
> interlace.
>
> As an experiment, We have introduced new control
> V4L2_CID_MPEG_VIDEO_VENUS_METADATA
> to contain the METADATA info. Exact controls can be finalized once the
> interface is discussed.
>
> For setting metadata from userspace to kernel, let say on encode output
> plane,
> following code sequence was followed
> 1. Video driver is registering for media device and creating a media
> node in /dev
> 2. Request fd is allocated by calling MEDIA_IOC_REQUEST_ALLOC IOCTL on
> media fd.
> 3. METADATA configuration is being applied on request fd using
> VIDIOC_S_EXT_CTRLS IOCTL
> and the same request fd is added to buf structure structure before
> calling VIDIOC_QBUF on video fd.
> 4. The same request is queued through MEDIA_REQUEST_IOC_QUEUE IOCTL to
> driver then, as a result
> to which METADATA control will be applied to buffer through S_CTRL.
> 5. Once control is applied and request is completed,
> MEDIA_REQUEST_IOC_REINIT IOCTL is called
> to re-initialize the request.
>
> We could achieve the same on capture plane as well by removing few
> checks present currently
> in v4l2 core which restrict the implementation to only output plane.
>
> We profiled below data with this implementation :
> 1. Total time taken ( round trip ) for setting up control data on video
> driver
> with VIDIOC_S_EXT_CTRLS, QBUF and Queue Request: 737us
> 2. Time taken for first QBUF on Output plane to reach driver with
> REQUEST API enabled (One way): 723us
> 3. Time taken for first QBUF on Output plane to reach driver without
> REQUEST API (One way) : 250us
> 4. Time taken by each IOCTL to complete ( round trip ) with REQUEST
> API enabled :
> a. VIDIOC_S_EXT_CTRLS : 201us
> b. VIDIOC_QBUF : 92us
> c. MEDIA_REQUEST_IOC_QUEUE: 386us
>
> Kindly share your feedback/comments on the design/call sequence.
> Also as we experimented and enabled the metadata on capture plane as
> well, please comment if any issue in
> allowing the metadata exchange on capture plane as well.
>
> Reference for client side implementation can be found at [1].
>
> Thanks,
> Dikshita
>
> [1]
> https://git.linaro.org/people/stanimir.varbanov/v4l2-encode.git/log/?h=dikshita/request-api
>
> Dikshita Agarwal (3):
> Register for media device
> - Initialize and register for media device
> - define venus_m2m_media_ops
> - Implement APIs to register/unregister media controller.
> Enable Request API for output buffers
> - Add dependency on MEDIA_CONTROLLER_REQUEST_API in Kconfig.
> - Initialize vb2 ops buf_out_validate and buf_request_complete.
> - Add support for custom Metadata control
> V4L2_CID_MPEG_VIDEO_VENUS_METADATA
> - Implemeted/Integrated APIs for Request setup/complete.
> Enable Request API for Capture Buffers
>
> drivers/media/common/videobuf2/videobuf2-v4l2.c | 4 +-
> drivers/media/platform/Kconfig | 2 +-
> drivers/media/platform/qcom/venus/core.h | 36 ++++
> drivers/media/platform/qcom/venus/helpers.c | 247
> +++++++++++++++++++++++-
> drivers/media/platform/qcom/venus/helpers.h | 15 ++
> drivers/media/platform/qcom/venus/venc.c | 63 +++++-
> drivers/media/platform/qcom/venus/venc_ctrls.c | 61 +++++-
> drivers/media/v4l2-core/v4l2-ctrls.c | 10 +
> drivers/media/v4l2-core/v4l2-mem2mem.c | 17 +-
> include/media/v4l2-ctrls.h | 1 +
> include/media/venus-ctrls.h | 22 +++
> 11 files changed, 465 insertions(+), 13 deletions(-)
> create mode 100644 include/media/venus-ctrls.h

2020-05-26 07:07:02

[permalink] [raw]

Subject: Re: [RFC PATCH 0/3] METADATA design using V4l2 Request API

On 26/05/2020 08:31, [email protected] wrote:
> Hi,
>
> A gentle reminder for the review.

It's on my TODO list. I hope to get to it this week or next week at the
latest.

Regards,

Hans

>
> On 2020-05-08 11:51, Dikshita Agarwal wrote:
>> There are many commercialized video use cases which needs metadata info
>> to be circulated between v4l2 client and v4l2 driver.
>>
>> METADATA has following requirements associated:
>> •Metadata is an optional info available for a buffer. It is not
>> mandatorily for every buffer.
>> For ex. consider metadata ROI (Region Of Interest). ROI is specified
>> by clients to indicate
>> the region where enhanced quality is desired. This metadata is given
>> as an input information
>> to encoder output plane. Client may or may not specify the ROI for a
>> frame during encode as
>> an input metadata. Also if the client has not provided ROI metadata
>> for a given frame,
>> it would be incorrect to take the metadata from previous frame. If the
>> data and
>> metadata is asynchronous, it would be difficult for hardware to decide
>> if it
>> needs to wait for metadata buffer or not before processing the input
>> frame for encoding.
>> •Synchronize the buffer requirement across both the video node/session
>> (incase metadata is being processed as a separate v4l2 video
>> node/session).
>> This is to avoid buffer starvation.
>> •Associate the metadata buffer with the data buffer without adding any
>> pipeline delay
>> in waiting for each other. This is applicable both at the hardware
>> side (the processing end)
>> and client side (the receiving end).
>> •Low latency usecases like WFD/split rendering/game streaming/IMS have
>> sub-50ms e2e latency
>> requirements, and it is not practical to stall the pipeline due to
>> inherent framework latencies.
>> High performance usecase like high-frame rate playback/record can
>> lead to frame loss during any pipeline latency.
>>
>> To address all above requirements, we used v4l2 Request API as
>> interlace.
>>
>> As an experiment, We have introduced new control
>> V4L2_CID_MPEG_VIDEO_VENUS_METADATA
>> to contain the METADATA info. Exact controls can be finalized once the
>> interface is discussed.
>>
>> For setting metadata from userspace to kernel, let say on encode output
>> plane,
>> following code sequence was followed
>> 1. Video driver is registering for media device and creating a media
>> node in /dev
>> 2. Request fd is allocated by calling MEDIA_IOC_REQUEST_ALLOC IOCTL on
>> media fd.
>> 3. METADATA configuration is being applied on request fd using
>> VIDIOC_S_EXT_CTRLS IOCTL
>> and the same request fd is added to buf structure structure before
>> calling VIDIOC_QBUF on video fd.
>> 4. The same request is queued through MEDIA_REQUEST_IOC_QUEUE IOCTL to
>> driver then, as a result
>> to which METADATA control will be applied to buffer through S_CTRL.
>> 5. Once control is applied and request is completed,
>> MEDIA_REQUEST_IOC_REINIT IOCTL is called
>> to re-initialize the request.
>>
>> We could achieve the same on capture plane as well by removing few
>> checks present currently
>> in v4l2 core which restrict the implementation to only output plane.
>>
>> We profiled below data with this implementation :
>> 1. Total time taken ( round trip ) for setting up control data on video
>> driver
>> with VIDIOC_S_EXT_CTRLS, QBUF and Queue Request: 737us
>> 2. Time taken for first QBUF on Output plane to reach driver with
>> REQUEST API enabled (One way): 723us
>> 3. Time taken for first QBUF on Output plane to reach driver without
>> REQUEST API (One way) : 250us
>> 4. Time taken by each IOCTL to complete ( round trip ) with REQUEST
>> API enabled :
>> a. VIDIOC_S_EXT_CTRLS : 201us
>> b. VIDIOC_QBUF : 92us
>> c. MEDIA_REQUEST_IOC_QUEUE: 386us
>>
>> Kindly share your feedback/comments on the design/call sequence.
>> Also as we experimented and enabled the metadata on capture plane as
>> well, please comment if any issue in
>> allowing the metadata exchange on capture plane as well.
>>
>> Reference for client side implementation can be found at [1].
>>
>> Thanks,
>> Dikshita
>>
>> [1]
>> https://git.linaro.org/people/stanimir.varbanov/v4l2-encode.git/log/?h=dikshita/request-api
>>
>> Dikshita Agarwal (3):
>> Register for media device
>> - Initialize and register for media device
>> - define venus_m2m_media_ops
>> - Implement APIs to register/unregister media controller.
>> Enable Request API for output buffers
>> - Add dependency on MEDIA_CONTROLLER_REQUEST_API in Kconfig.
>> - Initialize vb2 ops buf_out_validate and buf_request_complete.
>> - Add support for custom Metadata control
>> V4L2_CID_MPEG_VIDEO_VENUS_METADATA
>> - Implemeted/Integrated APIs for Request setup/complete.
>> Enable Request API for Capture Buffers
>>
>> drivers/media/common/videobuf2/videobuf2-v4l2.c | 4 +-
>> drivers/media/platform/Kconfig | 2 +-
>> drivers/media/platform/qcom/venus/core.h | 36 ++++
>> drivers/media/platform/qcom/venus/helpers.c | 247
>> +++++++++++++++++++++++-
>> drivers/media/platform/qcom/venus/helpers.h | 15 ++
>> drivers/media/platform/qcom/venus/venc.c | 63 +++++-
>> drivers/media/platform/qcom/venus/venc_ctrls.c | 61 +++++-
>> drivers/media/v4l2-core/v4l2-ctrls.c | 10 +
>> drivers/media/v4l2-core/v4l2-mem2mem.c | 17 +-
>> include/media/v4l2-ctrls.h | 1 +
>> include/media/venus-ctrls.h | 22 +++
>> 11 files changed, 465 insertions(+), 13 deletions(-)
>> create mode 100644 include/media/venus-ctrls.h

2020-05-26 10:59:59

[permalink] [raw]

Subject: Re: [RFC] METADATA design using V4l2 Request API

Hi Dikshita,

My apologies for the delay, this was (mostly) due to various vacation days.

On 08/05/2020 08:21, Dikshita Agarwal wrote:
> There are many commercialized video use cases which needs metadata info
> to be circulated between v4l2 client and v4l2 driver.
>
> METADATA has following requirements associated:
> •Metadata is an optional info available for a buffer. It is not mandatorily for every buffer.
> For ex. consider metadata ROI (Region Of Interest). ROI is specified by clients to indicate
> the region where enhanced quality is desired. This metadata is given as an input information
> to encoder output plane. Client may or may not specify the ROI for a frame during encode as
> an input metadata. Also if the client has not provided ROI metadata for a given frame,
> it would be incorrect to take the metadata from previous frame. If the data and
> metadata is asynchronous, it would be difficult for hardware to decide if it
> needs to wait for metadata buffer or not before processing the input frame for encoding.
> •Synchronize the buffer requirement across both the video node/session
> (incase metadata is being processed as a separate v4l2 video node/session).
> This is to avoid buffer starvation.
> •Associate the metadata buffer with the data buffer without adding any pipeline delay
> in waiting for each other. This is applicable both at the hardware side (the processing end)
> and client side (the receiving end).
> •Low latency usecases like WFD/split rendering/game streaming/IMS have sub-50ms e2e latency
> requirements, and it is not practical to stall the pipeline due to inherent framework latencies.
> High performance usecase like high-frame rate playback/record can lead to frame loss during any pipeline latency.
>
> To address all above requirements, we used v4l2 Request API as interlace.
>
> As an experiment, We have introduced new control V4L2_CID_MPEG_VIDEO_VENUS_METADATA
> to contain the METADATA info. Exact controls can be finalized once the interface is discussed.
>
> For setting metadata from userspace to kernel, let say on encode output plane,
> following code sequence was followed
> 1. Video driver is registering for media device and creating a media node in /dev
> 2. Request fd is allocated by calling MEDIA_IOC_REQUEST_ALLOC IOCTL on media fd.
> 3. METADATA configuration is being applied on request fd using VIDIOC_S_EXT_CTRLS IOCTL
> and the same request fd is added to buf structure structure before calling VIDIOC_QBUF on video fd.
> 4. The same request is queued through MEDIA_REQUEST_IOC_QUEUE IOCTL to driver then, as a result
> to which METADATA control will be applied to buffer through S_CTRL.
> 5. Once control is applied and request is completed, MEDIA_REQUEST_IOC_REINIT IOCTL is called
> to re-initialize the request.

This is fine and should work well. It's what the Request API is for, so no problems here.

>
> We could achieve the same on capture plane as well by removing few checks present currently
> in v4l2 core which restrict the implementation to only output plane.

Why do you need the Request API for the capture side in a memory-to-memory driver? It is not
clear from this patch series what the use-case is. There are reasons why this is currently
not allowed. So I need to know more about this.

Regards,

Hans

>
> We profiled below data with this implementation :
> 1. Total time taken ( round trip ) for setting up control data on video driver
> with VIDIOC_S_EXT_CTRLS, QBUF and Queue Request: 737us
> 2. Time taken for first QBUF on Output plane to reach driver with REQUEST API enabled (One way): 723us
> 3. Time taken for first QBUF on Output plane to reach driver without REQUEST API (One way) : 250us
> 4. Time taken by each IOCTL to complete ( round trip ) with REQUEST API enabled :
> a. VIDIOC_S_EXT_CTRLS : 201us
> b. VIDIOC_QBUF : 92us
> c. MEDIA_REQUEST_IOC_QUEUE: 386us
>
> Kindly share your feedback/comments on the design/call sequence.
> Also as we experimented and enabled the metadata on capture plane as well, please comment if any issue in
> allowing the metadata exchange on capture plane as well.
>
> Reference for client side implementation can be found at [1].
>
> Thanks,
> Dikshita
>
> [1] https://git.linaro.org/people/stanimir.varbanov/v4l2-encode.git/log/?h=dikshita/request-api
>
> Dikshita Agarwal (3):
> Register for media device
> - Initialize and register for media device
> - define venus_m2m_media_ops
> - Implement APIs to register/unregister media controller.
> Enable Request API for output buffers
> - Add dependency on MEDIA_CONTROLLER_REQUEST_API in Kconfig.
> - Initialize vb2 ops buf_out_validate and buf_request_complete.
> - Add support for custom Metadata control V4L2_CID_MPEG_VIDEO_VENUS_METADATA
> - Implemeted/Integrated APIs for Request setup/complete.
> Enable Request API for Capture Buffers
>
> drivers/media/common/videobuf2/videobuf2-v4l2.c | 4 +-
> drivers/media/platform/Kconfig | 2 +-
> drivers/media/platform/qcom/venus/core.h | 36 ++++
> drivers/media/platform/qcom/venus/helpers.c | 247 +++++++++++++++++++++++-
> drivers/media/platform/qcom/venus/helpers.h | 15 ++
> drivers/media/platform/qcom/venus/venc.c | 63 +++++-
> drivers/media/platform/qcom/venus/venc_ctrls.c | 61 +++++-
> drivers/media/v4l2-core/v4l2-ctrls.c | 10 +
> drivers/media/v4l2-core/v4l2-mem2mem.c | 17 +-
> include/media/v4l2-ctrls.h | 1 +
> include/media/venus-ctrls.h | 22 +++
> 11 files changed, 465 insertions(+), 13 deletions(-)
> create mode 100644 include/media/venus-ctrls.h
>

2020-05-28 10:51:16

[permalink] [raw]

Subject: Re: [RFC] METADATA design using V4l2 Request API

Hi Hans,

Thanks for the review.

On 2020-05-26 16:27, Hans Verkuil wrote:
> Hi Dikshita,
>
> My apologies for the delay, this was (mostly) due to various vacation
> days.
>
> On 08/05/2020 08:21, Dikshita Agarwal wrote:
>> There are many commercialized video use cases which needs metadata
>> info
>> to be circulated between v4l2 client and v4l2 driver.
>>
>> METADATA has following requirements associated:
>> •Metadata is an optional info available for a buffer. It is not
>> mandatorily for every buffer.
>> For ex. consider metadata ROI (Region Of Interest). ROI is specified
>> by clients to indicate
>> the region where enhanced quality is desired. This metadata is given
>> as an input information
>> to encoder output plane. Client may or may not specify the ROI for a
>> frame during encode as
>> an input metadata. Also if the client has not provided ROI metadata
>> for a given frame,
>> it would be incorrect to take the metadata from previous frame. If
>> the data and
>> metadata is asynchronous, it would be difficult for hardware to
>> decide if it
>> needs to wait for metadata buffer or not before processing the input
>> frame for encoding.
>> •Synchronize the buffer requirement across both the video node/session
>> (incase metadata is being processed as a separate v4l2 video
>> node/session).
>> This is to avoid buffer starvation.
>> •Associate the metadata buffer with the data buffer without adding any
>> pipeline delay
>> in waiting for each other. This is applicable both at the hardware
>> side (the processing end)
>> and client side (the receiving end).
>> •Low latency usecases like WFD/split rendering/game streaming/IMS have
>> sub-50ms e2e latency
>> requirements, and it is not practical to stall the pipeline due to
>> inherent framework latencies.
>> High performance usecase like high-frame rate playback/record can
>> lead to frame loss during any pipeline latency.
>>
>> To address all above requirements, we used v4l2 Request API as
>> interlace.
>>
>> As an experiment, We have introduced new control
>> V4L2_CID_MPEG_VIDEO_VENUS_METADATA
>> to contain the METADATA info. Exact controls can be finalized once the
>> interface is discussed.
>>
>> For setting metadata from userspace to kernel, let say on encode
>> output plane,
>> following code sequence was followed
>> 1. Video driver is registering for media device and creating a media
>> node in /dev
>> 2. Request fd is allocated by calling MEDIA_IOC_REQUEST_ALLOC IOCTL on
>> media fd.
>> 3. METADATA configuration is being applied on request fd using
>> VIDIOC_S_EXT_CTRLS IOCTL
>> and the same request fd is added to buf structure structure before
>> calling VIDIOC_QBUF on video fd.
>> 4. The same request is queued through MEDIA_REQUEST_IOC_QUEUE IOCTL to
>> driver then, as a result
>> to which METADATA control will be applied to buffer through S_CTRL.
>> 5. Once control is applied and request is completed,
>> MEDIA_REQUEST_IOC_REINIT IOCTL is called
>> to re-initialize the request.
>
> This is fine and should work well. It's what the Request API is for,
> so no problems here.
>
>>
>> We could achieve the same on capture plane as well by removing few
>> checks present currently
>> in v4l2 core which restrict the implementation to only output plane.
>
> Why do you need the Request API for the capture side in a
> memory-to-memory driver? It is not
> clear from this patch series what the use-case is. There are reasons
> why this is currently
> not allowed. So I need to know more about this.
>
> Regards,
>
> Hans
>
we need this for use cases like HDR10+ where metadata info is part of
the bitstream.
To handle such frame specific data, support for request api on capture
plane would be needed.

Thanks,
Dikshita
>>
>> We profiled below data with this implementation :
>> 1. Total time taken ( round trip ) for setting up control data on
>> video driver
>> with VIDIOC_S_EXT_CTRLS, QBUF and Queue Request: 737us
>> 2. Time taken for first QBUF on Output plane to reach driver with
>> REQUEST API enabled (One way): 723us
>> 3. Time taken for first QBUF on Output plane to reach driver without
>> REQUEST API (One way) : 250us
>> 4. Time taken by each IOCTL to complete ( round trip ) with REQUEST
>> API enabled :
>> a. VIDIOC_S_EXT_CTRLS : 201us
>> b. VIDIOC_QBUF : 92us
>> c. MEDIA_REQUEST_IOC_QUEUE: 386us
>>
>> Kindly share your feedback/comments on the design/call sequence.
>> Also as we experimented and enabled the metadata on capture plane as
>> well, please comment if any issue in
>> allowing the metadata exchange on capture plane as well.
>>
>> Reference for client side implementation can be found at [1].
>>
>> Thanks,
>> Dikshita
>>
>> [1]
>> https://git.linaro.org/people/stanimir.varbanov/v4l2-encode.git/log/?h=dikshita/request-api
>>
>> Dikshita Agarwal (3):
>> Register for media device
>> - Initialize and register for media device
>> - define venus_m2m_media_ops
>> - Implement APIs to register/unregister media controller.
>> Enable Request API for output buffers
>> - Add dependency on MEDIA_CONTROLLER_REQUEST_API in Kconfig.
>> - Initialize vb2 ops buf_out_validate and buf_request_complete.
>> - Add support for custom Metadata control
>> V4L2_CID_MPEG_VIDEO_VENUS_METADATA
>> - Implemeted/Integrated APIs for Request setup/complete.
>> Enable Request API for Capture Buffers
>>
>> drivers/media/common/videobuf2/videobuf2-v4l2.c | 4 +-
>> drivers/media/platform/Kconfig | 2 +-
>> drivers/media/platform/qcom/venus/core.h | 36 ++++
>> drivers/media/platform/qcom/venus/helpers.c | 247
>> +++++++++++++++++++++++-
>> drivers/media/platform/qcom/venus/helpers.h | 15 ++
>> drivers/media/platform/qcom/venus/venc.c | 63 +++++-
>> drivers/media/platform/qcom/venus/venc_ctrls.c | 61 +++++-
>> drivers/media/v4l2-core/v4l2-ctrls.c | 10 +
>> drivers/media/v4l2-core/v4l2-mem2mem.c | 17 +-
>> include/media/v4l2-ctrls.h | 1 +
>> include/media/venus-ctrls.h | 22 +++
>> 11 files changed, 465 insertions(+), 13 deletions(-)
>> create mode 100644 include/media/venus-ctrls.h
>>

2020-05-28 11:27:00

[permalink] [raw]

Subject: Re: [RFC] METADATA design using V4l2 Request API

On 28/05/2020 12:48, [email protected] wrote:
> Hi Hans,
>
> Thanks for the review.
>
> On 2020-05-26 16:27, Hans Verkuil wrote:
>> Hi Dikshita,
>>
>> My apologies for the delay, this was (mostly) due to various vacation
>> days.
>>
>> On 08/05/2020 08:21, Dikshita Agarwal wrote:
>>> There are many commercialized video use cases which needs metadata
>>> info
>>> to be circulated between v4l2 client and v4l2 driver.
>>>
>>> METADATA has following requirements associated:
>>> •Metadata is an optional info available for a buffer. It is not
>>> mandatorily for every buffer.
>>> For ex. consider metadata ROI (Region Of Interest). ROI is specified
>>> by clients to indicate
>>> the region where enhanced quality is desired. This metadata is given
>>> as an input information
>>> to encoder output plane. Client may or may not specify the ROI for a
>>> frame during encode as
>>> an input metadata. Also if the client has not provided ROI metadata
>>> for a given frame,
>>> it would be incorrect to take the metadata from previous frame. If
>>> the data and
>>> metadata is asynchronous, it would be difficult for hardware to
>>> decide if it
>>> needs to wait for metadata buffer or not before processing the input
>>> frame for encoding.
>>> •Synchronize the buffer requirement across both the video node/session
>>> (incase metadata is being processed as a separate v4l2 video
>>> node/session).
>>> This is to avoid buffer starvation.
>>> •Associate the metadata buffer with the data buffer without adding any
>>> pipeline delay
>>> in waiting for each other. This is applicable both at the hardware
>>> side (the processing end)
>>> and client side (the receiving end).
>>> •Low latency usecases like WFD/split rendering/game streaming/IMS have
>>> sub-50ms e2e latency
>>> requirements, and it is not practical to stall the pipeline due to
>>> inherent framework latencies.
>>> High performance usecase like high-frame rate playback/record can
>>> lead to frame loss during any pipeline latency.
>>>
>>> To address all above requirements, we used v4l2 Request API as
>>> interlace.
>>>
>>> As an experiment, We have introduced new control
>>> V4L2_CID_MPEG_VIDEO_VENUS_METADATA
>>> to contain the METADATA info. Exact controls can be finalized once the
>>> interface is discussed.
>>>
>>> For setting metadata from userspace to kernel, let say on encode
>>> output plane,
>>> following code sequence was followed
>>> 1. Video driver is registering for media device and creating a media
>>> node in /dev
>>> 2. Request fd is allocated by calling MEDIA_IOC_REQUEST_ALLOC IOCTL on
>>> media fd.
>>> 3. METADATA configuration is being applied on request fd using
>>> VIDIOC_S_EXT_CTRLS IOCTL
>>> and the same request fd is added to buf structure structure before
>>> calling VIDIOC_QBUF on video fd.
>>> 4. The same request is queued through MEDIA_REQUEST_IOC_QUEUE IOCTL to
>>> driver then, as a result
>>> to which METADATA control will be applied to buffer through S_CTRL.
>>> 5. Once control is applied and request is completed,
>>> MEDIA_REQUEST_IOC_REINIT IOCTL is called
>>> to re-initialize the request.
>>
>> This is fine and should work well. It's what the Request API is for,
>> so no problems here.
>>
>>>
>>> We could achieve the same on capture plane as well by removing few
>>> checks present currently
>>> in v4l2 core which restrict the implementation to only output plane.
>>
>> Why do you need the Request API for the capture side in a
>> memory-to-memory driver? It is not
>> clear from this patch series what the use-case is. There are reasons
>> why this is currently
>> not allowed. So I need to know more about this.
>>
>> Regards,
>>
>> Hans
>>
> we need this for use cases like HDR10+ where metadata info is part of
> the bitstream.
> To handle such frame specific data, support for request api on capture
> plane would be needed.

That's for the decoder, right? So the decoder extracts the HDR10+ metadata
and fills in a control with the metadata?

If that's the case, then it matches a similar request I got from mediatek.
What is needed is support for 'read-only' requests: i.e. the driver can
associate controls with a capture buffer and return that to userspace. But
it is not possible to *set* controls in userspace when queuing the request.

If you think about it you'll see that setting controls in userspace for
a capture queue request makes no sense, but having the driver add set
read-only controls when the request is finished is fine and makes sense.

Implementing this shouldn't be a big job: you'd need a new V4L2_BUF_CAP_SUPPORTS_RO_REQUESTS
capability, a corresponding new flag in struct vb2_queue, a new ro_requests flag in
struct v4l2_ctrl_handler, and try_set_ext_ctrls() should check that flag and refuse to
try/set any controls if it is true.

Finally, the v4l2_m2m_qbuf() function should be updated to just refuse the case where both
capture and output queue set V4L2_BUF_CAP_SUPPORTS_REQUESTS.

And the documentation needs to be updated.

I've added Yunfei Dong to the CC list, perhaps mediatek did some work on
this already.

Regards,

Hans

>
> Thanks,
> Dikshita
>>>
>>> We profiled below data with this implementation :
>>> 1. Total time taken ( round trip ) for setting up control data on
>>> video driver
>>> with VIDIOC_S_EXT_CTRLS, QBUF and Queue Request: 737us
>>> 2. Time taken for first QBUF on Output plane to reach driver with
>>> REQUEST API enabled (One way): 723us
>>> 3. Time taken for first QBUF on Output plane to reach driver without
>>> REQUEST API (One way) : 250us
>>> 4. Time taken by each IOCTL to complete ( round trip ) with REQUEST
>>> API enabled :
>>> a. VIDIOC_S_EXT_CTRLS : 201us
>>> b. VIDIOC_QBUF : 92us
>>> c. MEDIA_REQUEST_IOC_QUEUE: 386us
>>>
>>> Kindly share your feedback/comments on the design/call sequence.
>>> Also as we experimented and enabled the metadata on capture plane as
>>> well, please comment if any issue in
>>> allowing the metadata exchange on capture plane as well.
>>>
>>> Reference for client side implementation can be found at [1].
>>>
>>> Thanks,
>>> Dikshita
>>>
>>> [1]
>>> https://git.linaro.org/people/stanimir.varbanov/v4l2-encode.git/log/?h=dikshita/request-api
>>>
>>> Dikshita Agarwal (3):
>>> Register for media device
>>> - Initialize and register for media device
>>> - define venus_m2m_media_ops
>>> - Implement APIs to register/unregister media controller.
>>> Enable Request API for output buffers
>>> - Add dependency on MEDIA_CONTROLLER_REQUEST_API in Kconfig.
>>> - Initialize vb2 ops buf_out_validate and buf_request_complete.
>>> - Add support for custom Metadata control
>>> V4L2_CID_MPEG_VIDEO_VENUS_METADATA
>>> - Implemeted/Integrated APIs for Request setup/complete.
>>> Enable Request API for Capture Buffers
>>>
>>> drivers/media/common/videobuf2/videobuf2-v4l2.c | 4 +-
>>> drivers/media/platform/Kconfig | 2 +-
>>> drivers/media/platform/qcom/venus/core.h | 36 ++++
>>> drivers/media/platform/qcom/venus/helpers.c | 247
>>> +++++++++++++++++++++++-
>>> drivers/media/platform/qcom/venus/helpers.h | 15 ++
>>> drivers/media/platform/qcom/venus/venc.c | 63 +++++-
>>> drivers/media/platform/qcom/venus/venc_ctrls.c | 61 +++++-
>>> drivers/media/v4l2-core/v4l2-ctrls.c | 10 +
>>> drivers/media/v4l2-core/v4l2-mem2mem.c | 17 +-
>>> include/media/v4l2-ctrls.h | 1 +
>>> include/media/venus-ctrls.h | 22 +++
>>> 11 files changed, 465 insertions(+), 13 deletions(-)
>>> create mode 100644 include/media/venus-ctrls.h
>>>

2020-05-29 02:11:05

[permalink] [raw]

Subject: Re: [RFC] METADATA design using V4l2 Request API

Le jeudi 28 mai 2020 à 13:24 +0200, Hans Verkuil a écrit :
> On 28/05/2020 12:48, [email protected] wrote:
> > Hi Hans,
> >
> > Thanks for the review.
> >
> > On 2020-05-26 16:27, Hans Verkuil wrote:
> > > Hi Dikshita,
> > >
> > > My apologies for the delay, this was (mostly) due to various vacation
> > > days.
> > >
> > > On 08/05/2020 08:21, Dikshita Agarwal wrote:
> > > > There are many commercialized video use cases which needs metadata
> > > > info
> > > > to be circulated between v4l2 client and v4l2 driver.
> > > >
> > > > METADATA has following requirements associated:
> > > > •Metadata is an optional info available for a buffer. It is not
> > > > mandatorily for every buffer.
> > > > For ex. consider metadata ROI (Region Of Interest). ROI is specified
> > > > by clients to indicate
> > > > the region where enhanced quality is desired. This metadata is given
> > > > as an input information
> > > > to encoder output plane. Client may or may not specify the ROI for a
> > > > frame during encode as
> > > > an input metadata. Also if the client has not provided ROI metadata
> > > > for a given frame,
> > > > it would be incorrect to take the metadata from previous frame. If
> > > > the data and
> > > > metadata is asynchronous, it would be difficult for hardware to
> > > > decide if it
> > > > needs to wait for metadata buffer or not before processing the input
> > > > frame for encoding.
> > > > •Synchronize the buffer requirement across both the video node/session
> > > > (incase metadata is being processed as a separate v4l2 video
> > > > node/session).
> > > > This is to avoid buffer starvation.
> > > > •Associate the metadata buffer with the data buffer without adding any
> > > > pipeline delay
> > > > in waiting for each other. This is applicable both at the hardware
> > > > side (the processing end)
> > > > and client side (the receiving end).
> > > > •Low latency usecases like WFD/split rendering/game streaming/IMS have
> > > > sub-50ms e2e latency
> > > > requirements, and it is not practical to stall the pipeline due to
> > > > inherent framework latencies.
> > > > High performance usecase like high-frame rate playback/record can
> > > > lead to frame loss during any pipeline latency.
> > > >
> > > > To address all above requirements, we used v4l2 Request API as
> > > > interlace.
> > > >
> > > > As an experiment, We have introduced new control
> > > > V4L2_CID_MPEG_VIDEO_VENUS_METADATA
> > > > to contain the METADATA info. Exact controls can be finalized once the
> > > > interface is discussed.
> > > >
> > > > For setting metadata from userspace to kernel, let say on encode
> > > > output plane,
> > > > following code sequence was followed
> > > > 1. Video driver is registering for media device and creating a media
> > > > node in /dev
> > > > 2. Request fd is allocated by calling MEDIA_IOC_REQUEST_ALLOC IOCTL on
> > > > media fd.
> > > > 3. METADATA configuration is being applied on request fd using
> > > > VIDIOC_S_EXT_CTRLS IOCTL
> > > > and the same request fd is added to buf structure structure before
> > > > calling VIDIOC_QBUF on video fd.
> > > > 4. The same request is queued through MEDIA_REQUEST_IOC_QUEUE IOCTL to
> > > > driver then, as a result
> > > > to which METADATA control will be applied to buffer through S_CTRL.
> > > > 5. Once control is applied and request is completed,
> > > > MEDIA_REQUEST_IOC_REINIT IOCTL is called
> > > > to re-initialize the request.
> > >
> > > This is fine and should work well. It's what the Request API is for,
> > > so no problems here.
> > >
> > > > We could achieve the same on capture plane as well by removing few
> > > > checks present currently
> > > > in v4l2 core which restrict the implementation to only output plane.
> > >
> > > Why do you need the Request API for the capture side in a
> > > memory-to-memory driver? It is not
> > > clear from this patch series what the use-case is. There are reasons
> > > why this is currently
> > > not allowed. So I need to know more about this.
> > >
> > > Regards,
> > >
> > > Hans
> > >
> > we need this for use cases like HDR10+ where metadata info is part of
> > the bitstream.
> > To handle such frame specific data, support for request api on capture
> > plane would be needed.
>
> That's for the decoder, right? So the decoder extracts the HDR10+ metadata
> and fills in a control with the metadata?
>
> If that's the case, then it matches a similar request I got from mediatek.
> What is needed is support for 'read-only' requests: i.e. the driver can
> associate controls with a capture buffer and return that to userspace. But
> it is not possible to *set* controls in userspace when queuing the request.
>
> If you think about it you'll see that setting controls in userspace for
> a capture queue request makes no sense, but having the driver add set
> read-only controls when the request is finished is fine and makes sense.

Hi Hans,

I'm not sure I follow you on what will this look like in userspace. Can you post
an RFC of your idea, describing the userspace flow ? Particularly, I'm not sure
how the request helps in synchronizing the read of the metadata controls (over
simply reading the control after a DQBUF, which we can match to a specific input
using the provided timestamp). I also fail to understand how this aligns with
Stanimir concern with the performance of the get control interface.

>
> Implementing this shouldn't be a big job: you'd need a new
> V4L2_BUF_CAP_SUPPORTS_RO_REQUESTS
> capability, a corresponding new flag in struct vb2_queue, a new ro_requests
> flag in
> struct v4l2_ctrl_handler, and try_set_ext_ctrls() should check that flag and
> refuse to
> try/set any controls if it is true.
>
> Finally, the v4l2_m2m_qbuf() function should be updated to just refuse the
> case where both
> capture and output queue set V4L2_BUF_CAP_SUPPORTS_REQUESTS.
>
> And the documentation needs to be updated.
>
> I've added Yunfei Dong to the CC list, perhaps mediatek did some work on
> this already.
>
> Regards,
>
> Hans
>
> > Thanks,
> > Dikshita
> > > > We profiled below data with this implementation :
> > > > 1. Total time taken ( round trip ) for setting up control data on
> > > > video driver
> > > > with VIDIOC_S_EXT_CTRLS, QBUF and Queue Request: 737us
> > > > 2. Time taken for first QBUF on Output plane to reach driver with
> > > > REQUEST API enabled (One way): 723us
> > > > 3. Time taken for first QBUF on Output plane to reach driver without
> > > > REQUEST API (One way) : 250us
> > > > 4. Time taken by each IOCTL to complete ( round trip ) with REQUEST
> > > > API enabled :
> > > > a. VIDIOC_S_EXT_CTRLS : 201us
> > > > b. VIDIOC_QBUF : 92us
> > > > c. MEDIA_REQUEST_IOC_QUEUE: 386us
> > > >
> > > > Kindly share your feedback/comments on the design/call sequence.
> > > > Also as we experimented and enabled the metadata on capture plane as
> > > > well, please comment if any issue in
> > > > allowing the metadata exchange on capture plane as well.
> > > >
> > > > Reference for client side implementation can be found at [1].
> > > >
> > > > Thanks,
> > > > Dikshita
> > > >
> > > > [1]
> > > > https://git.linaro.org/people/stanimir.varbanov/v4l2-encode.git/log/?h=dikshita/request-api
> > > >
> > > > Dikshita Agarwal (3):
> > > > Register for media device
> > > > - Initialize and register for media device
> > > > - define venus_m2m_media_ops
> > > > - Implement APIs to register/unregister media controller.
> > > > Enable Request API for output buffers
> > > > - Add dependency on MEDIA_CONTROLLER_REQUEST_API in Kconfig.
> > > > - Initialize vb2 ops buf_out_validate and buf_request_complete.
> > > > - Add support for custom Metadata control
> > > > V4L2_CID_MPEG_VIDEO_VENUS_METADATA
> > > > - Implemeted/Integrated APIs for Request setup/complete.
> > > > Enable Request API for Capture Buffers
> > > >
> > > > drivers/media/common/videobuf2/videobuf2-v4l2.c | 4 +-
> > > > drivers/media/platform/Kconfig | 2 +-
> > > > drivers/media/platform/qcom/venus/core.h | 36 ++++
> > > > drivers/media/platform/qcom/venus/helpers.c | 247
> > > > +++++++++++++++++++++++-
> > > > drivers/media/platform/qcom/venus/helpers.h | 15 ++
> > > > drivers/media/platform/qcom/venus/venc.c | 63 +++++-
> > > > drivers/media/platform/qcom/venus/venc_ctrls.c | 61 +++++-
> > > > drivers/media/v4l2-core/v4l2-ctrls.c | 10 +
> > > > drivers/media/v4l2-core/v4l2-mem2mem.c | 17 +-
> > > > include/media/v4l2-ctrls.h | 1 +
> > > > include/media/venus-ctrls.h | 22 +++
> > > > 11 files changed, 465 insertions(+), 13 deletions(-)
> > > > create mode 100644 include/media/venus-ctrls.h
> > > >

2020-05-29 02:21:16

[permalink] [raw]

Subject: Re: [RFC] METADATA design using V4l2 Request API

Le jeudi 28 mai 2020 à 16:18 +0530, [email protected] a écrit :
> > not allowed. So I need to know more about this.
> > Regards,
> > Hans
>
> we need this for use cases like HDR10+ where metadata info is part of
> the bitstream.
>
> To handle such frame specific data, support for request api on capture
> plane would be needed.

I have a bit of a mixed impression here. Considering how large the ioctl
interface overhead is, relying on HW parser to extract this medata woud be the
last thing I would consider.

Instead, I'm quite convince we can achieve greater and likely zero-copy
perfromance by locating this header in userspace. So everytime I see this kind
of API, were the HW is *needed* to parse a trivial bit of bistream, I ended
thinking that we simply craft this API to expose this because the HW can do it,
but no further logical thinking or higher level design seems to have been
applied.

I'm sorry for this open critic, but are we designing this because the HW
designer exposed that feature? This is so low complexity to extract in
userspace, with the bonus that we are not forced into expanding the
representation to another form immediatly, as maybe the display will be able to
handle that form directly (where converting to a C structure and then back to
some binary format to satisfy DRM property seems very sub-optimal).

Nicolas

2020-05-29 07:34:03

[permalink] [raw]

Subject: Re: [RFC] METADATA design using V4l2 Request API

On 29/05/2020 04:18, Nicolas Dufresne wrote:
> Le jeudi 28 mai 2020 à 16:18 +0530, [email protected] a écrit :
>>> not allowed. So I need to know more about this.
>>> Regards,
>>> Hans
>>
>> we need this for use cases like HDR10+ where metadata info is part of
>> the bitstream.
>>
>> To handle such frame specific data, support for request api on capture
>> plane would be needed.
>
> I have a bit of a mixed impression here. Considering how large the ioctl
> interface overhead is, relying on HW parser to extract this medata woud be the
> last thing I would consider.
>
> Instead, I'm quite convince we can achieve greater and likely zero-copy
> perfromance by locating this header in userspace. So everytime I see this kind
> of API, were the HW is *needed* to parse a trivial bit of bistream, I ended
> thinking that we simply craft this API to expose this because the HW can do it,
> but no further logical thinking or higher level design seems to have been
> applied.
>
> I'm sorry for this open critic, but are we designing this because the HW
> designer exposed that feature? This is so low complexity to extract in
> userspace, with the bonus that we are not forced into expanding the
> representation to another form immediatly, as maybe the display will be able to
> handle that form directly (where converting to a C structure and then back to
> some binary format to satisfy DRM property seems very sub-optimal).
>
> Nicolas
>

Nicolas raises good questions: it would help if you can give more detailed information
about the hardware. We had similar discussions with Xilinx about HDR10+ (see this
thread: https://www.spinics.net/lists/linux-media/msg163297.html).

There is no clear answer at the moment on how to handle dynamic HDR metadata.
It will help if we have some more information how different SoCs handle this
in hardware.

Regards,

Hans

2020-06-05 07:07:23

[permalink] [raw]

Subject: Re: [RFC] METADATA design using V4l2 Request API

Hi Hans, Nicolas,

On 2020-05-29 13:01, Hans Verkuil wrote:
> On 29/05/2020 04:18, Nicolas Dufresne wrote:
>> Le jeudi 28 mai 2020 à 16:18 +0530, [email protected] a écrit :
>>>> not allowed. So I need to know more about this.
>>>> Regards,
>>>> Hans
>>>
>>> we need this for use cases like HDR10+ where metadata info is part of
>>> the bitstream.
>>>
>>> To handle such frame specific data, support for request api on
>>> capture
>>> plane would be needed.
>>
>> I have a bit of a mixed impression here. Considering how large the
>> ioctl
>> interface overhead is, relying on HW parser to extract this medata
>> woud be the
>> last thing I would consider.
>>
>> Instead, I'm quite convince we can achieve greater and likely
>> zero-copy
>> perfromance by locating this header in userspace. So everytime I see
>> this kind
>> of API, were the HW is *needed* to parse a trivial bit of bistream, I
>> ended
>> thinking that we simply craft this API to expose this because the HW
>> can do it,
>> but no further logical thinking or higher level design seems to have
>> been
>> applied.
>>
>> I'm sorry for this open critic, but are we designing this because the
>> HW
>> designer exposed that feature? This is so low complexity to extract in
>> userspace, with the bonus that we are not forced into expanding the
>> representation to another form immediatly, as maybe the display will
>> be able to
>> handle that form directly (where converting to a C structure and then
>> back to
>> some binary format to satisfy DRM property seems very sub-optimal).
>>
>> Nicolas
>>
>
> Nicolas raises good questions: it would help if you can give more
> detailed information
> about the hardware. We had similar discussions with Xilinx about
> HDR10+ (see this
> thread: https://www.spinics.net/lists/linux-media/msg163297.html).
>
> There is no clear answer at the moment on how to handle dynamic HDR
> metadata.
> It will help if we have some more information how different SoCs handle
> this
> in hardware.
>
> Regards,
>
> Hans

As per Widevine Level 1 requirement, it needs “Hardware Protected Video
Path”.
Hence, in case of secure bitstream decoding, we need decoder metadata
delivered from HW.
CPU cannot parse secure bitstream and extract them.
Apart from this, there are other metadata like "histogram" which is not
part of the bitstream
and generated by hardware

Thanks,
Dikshita

2020-06-05 17:47:33

[permalink] [raw]

Subject: Re: [RFC] METADATA design using V4l2 Request API

Le vendredi 05 juin 2020 à 12:32 +0530, [email protected] a écrit :
> Hi Hans, Nicolas,
>
> On 2020-05-29 13:01, Hans Verkuil wrote:
> > On 29/05/2020 04:18, Nicolas Dufresne wrote:
> > > Le jeudi 28 mai 2020 à 16:18 +0530, [email protected] a écrit :
> > > > > not allowed. So I need to know more about this.
> > > > > Regards,
> > > > > Hans
> > > >
> > > > we need this for use cases like HDR10+ where metadata info is part of
> > > > the bitstream.
> > > >
> > > > To handle such frame specific data, support for request api on
> > > > capture
> > > > plane would be needed.
> > >
> > > I have a bit of a mixed impression here. Considering how large the
> > > ioctl
> > > interface overhead is, relying on HW parser to extract this medata
> > > woud be the
> > > last thing I would consider.
> > >
> > > Instead, I'm quite convince we can achieve greater and likely
> > > zero-copy
> > > perfromance by locating this header in userspace. So everytime I see
> > > this kind
> > > of API, were the HW is *needed* to parse a trivial bit of bistream, I
> > > ended
> > > thinking that we simply craft this API to expose this because the HW
> > > can do it,
> > > but no further logical thinking or higher level design seems to have
> > > been
> > > applied.
> > >
> > > I'm sorry for this open critic, but are we designing this because the
> > > HW
> > > designer exposed that feature? This is so low complexity to extract in
> > > userspace, with the bonus that we are not forced into expanding the
> > > representation to another form immediatly, as maybe the display will
> > > be able to
> > > handle that form directly (where converting to a C structure and then
> > > back to
> > > some binary format to satisfy DRM property seems very sub-optimal).
> > >
> > > Nicolas
> > >
> >
> > Nicolas raises good questions: it would help if you can give more
> > detailed information
> > about the hardware. We had similar discussions with Xilinx about
> > HDR10+ (see this
> > thread: https://www.spinics.net/lists/linux-media/msg163297.html).
> >
> > There is no clear answer at the moment on how to handle dynamic HDR
> > metadata.
> > It will help if we have some more information how different SoCs handle
> > this
> > in hardware.
> >
> > Regards,
> >
> > Hans
>
> As per Widevine Level 1 requirement, it needs “Hardware Protected Video
> Path”.
> Hence, in case of secure bitstream decoding, we need decoder metadata
> delivered from HW.
> CPU cannot parse secure bitstream and extract them.
> Apart from this, there are other metadata like "histogram" which is not
> part of the bitstream
> and generated by hardware

(I'm ignoring the bit about camera data + histogram, this was about
CODEC, and it also does not make much sense to me)

We extract this data from the bitstream, before the decoder. The
bitstream is not subject to secure buffer, because it's encrypted. Be
aware that the implementation does not encrypt all NALs, PPS/SPS SEIs,
are in clear, and NAL headers are most of the time in clear.

Going with full bitstream encryption would have required rewriting a
lot more HW, since you would not be able to handle the
depayload/demuxing in userspace or you'd need all this multimedia stuff
to be placed on in your secure firmware. Multimedia software these
days, ffmpeg, gstreamer, chromium internal stack etc. needs a lot of
information from the bitstream that would be very hard to pass
properly.

regards,
Nicolas

2020-06-11 15:12:50

[permalink] [raw]

Subject: Re: [RFC] METADATA design using V4l2 Request API

Le jeudi 28 mai 2020 à 22:08 -0400, Nicolas Dufresne a écrit :
> Le jeudi 28 mai 2020 à 13:24 +0200, Hans Verkuil a écrit :
> > On 28/05/2020 12:48, [email protected] wrote:
> > > Hi Hans,
> > >
> > > Thanks for the review.
> > >
> > > On 2020-05-26 16:27, Hans Verkuil wrote:
> > > > Hi Dikshita,
> > > >
> > > > My apologies for the delay, this was (mostly) due to various vacation
> > > > days.
> > > >
> > > > On 08/05/2020 08:21, Dikshita Agarwal wrote:
> > > > > There are many commercialized video use cases which needs metadata
> > > > > info
> > > > > to be circulated between v4l2 client and v4l2 driver.
> > > > >
> > > > > METADATA has following requirements associated:
> > > > > •Metadata is an optional info available for a buffer. It is not
> > > > > mandatorily for every buffer.
> > > > > For ex. consider metadata ROI (Region Of Interest). ROI is specified
> > > > > by clients to indicate
> > > > > the region where enhanced quality is desired. This metadata is given
> > > > > as an input information
> > > > > to encoder output plane. Client may or may not specify the ROI for a
> > > > > frame during encode as
> > > > > an input metadata. Also if the client has not provided ROI metadata
> > > > > for a given frame,
> > > > > it would be incorrect to take the metadata from previous frame. If
> > > > > the data and
> > > > > metadata is asynchronous, it would be difficult for hardware to
> > > > > decide if it
> > > > > needs to wait for metadata buffer or not before processing the input
> > > > > frame for encoding.
> > > > > •Synchronize the buffer requirement across both the video node/session
> > > > > (incase metadata is being processed as a separate v4l2 video
> > > > > node/session).
> > > > > This is to avoid buffer starvation.
> > > > > •Associate the metadata buffer with the data buffer without adding any
> > > > > pipeline delay
> > > > > in waiting for each other. This is applicable both at the hardware
> > > > > side (the processing end)
> > > > > and client side (the receiving end).
> > > > > •Low latency usecases like WFD/split rendering/game streaming/IMS have
> > > > > sub-50ms e2e latency
> > > > > requirements, and it is not practical to stall the pipeline due to
> > > > > inherent framework latencies.
> > > > > High performance usecase like high-frame rate playback/record can
> > > > > lead to frame loss during any pipeline latency.
> > > > >
> > > > > To address all above requirements, we used v4l2 Request API as
> > > > > interlace.
> > > > >
> > > > > As an experiment, We have introduced new control
> > > > > V4L2_CID_MPEG_VIDEO_VENUS_METADATA
> > > > > to contain the METADATA info. Exact controls can be finalized once the
> > > > > interface is discussed.
> > > > >
> > > > > For setting metadata from userspace to kernel, let say on encode
> > > > > output plane,
> > > > > following code sequence was followed
> > > > > 1. Video driver is registering for media device and creating a media
> > > > > node in /dev
> > > > > 2. Request fd is allocated by calling MEDIA_IOC_REQUEST_ALLOC IOCTL on
> > > > > media fd.
> > > > > 3. METADATA configuration is being applied on request fd using
> > > > > VIDIOC_S_EXT_CTRLS IOCTL
> > > > > and the same request fd is added to buf structure structure before
> > > > > calling VIDIOC_QBUF on video fd.
> > > > > 4. The same request is queued through MEDIA_REQUEST_IOC_QUEUE IOCTL to
> > > > > driver then, as a result
> > > > > to which METADATA control will be applied to buffer through S_CTRL.
> > > > > 5. Once control is applied and request is completed,
> > > > > MEDIA_REQUEST_IOC_REINIT IOCTL is called
> > > > > to re-initialize the request.
> > > >
> > > > This is fine and should work well. It's what the Request API is for,
> > > > so no problems here.
> > > >
> > > > > We could achieve the same on capture plane as well by removing few
> > > > > checks present currently
> > > > > in v4l2 core which restrict the implementation to only output plane.
> > > >
> > > > Why do you need the Request API for the capture side in a
> > > > memory-to-memory driver? It is not
> > > > clear from this patch series what the use-case is. There are reasons
> > > > why this is currently
> > > > not allowed. So I need to know more about this.
> > > >
> > > > Regards,
> > > >
> > > > Hans
> > > >
> > > we need this for use cases like HDR10+ where metadata info is part of
> > > the bitstream.
> > > To handle such frame specific data, support for request api on capture
> > > plane would be needed.
> >
> > That's for the decoder, right? So the decoder extracts the HDR10+ metadata
> > and fills in a control with the metadata?
> >
> > If that's the case, then it matches a similar request I got from mediatek.
> > What is needed is support for 'read-only' requests: i.e. the driver can
> > associate controls with a capture buffer and return that to userspace. But
> > it is not possible to *set* controls in userspace when queuing the request.
> >
> > If you think about it you'll see that setting controls in userspace for
> > a capture queue request makes no sense, but having the driver add set
> > read-only controls when the request is finished is fine and makes sense.
>
> Hi Hans,
>
> I'm not sure I follow you on what will this look like in userspace. Can you post
> an RFC of your idea, describing the userspace flow ? Particularly, I'm not sure
> how the request helps in synchronizing the read of the metadata controls (over
> simply reading the control after a DQBUF, which we can match to a specific input
> using the provided timestamp). I also fail to understand how this aligns with
> Stanimir concern with the performance of the get control interface.

As there was no answer, I'll try and ask a more precise question. While
I still believe it's better done in userspace for M2M decoder, there is
a need for HDMI receivers (hence adding direct CC to Dylan Yip from
Xilinx).

Would this match your expected userspace workflow ?

1. Allocate capture buffers
2. Allocate the same number of request fd (ro request)
3. For each capture buffer
3.1 Queue the capture buffer (passing the request)
3.2 Queue the request
4. Wait for capture buffer to be ready
5. Dequeue a capture buffer, and lookup it's request FD
6. Wait for the request to complete (needed ?)
7. Read the meta control passing the request

I'm not sure if 6. is required, driver could just to things in the
right order (store the meta in the request before marking the buffer
done). Now this is pretty heavy, but does not seems broken. The
question is how to you avoid reading the control if it haven't changed
? Can driver guaranty to send the control change event before marking a
buffer done ? (this is for static HDR meta, which will change when
stream is change, e.g. starting/ending of adds on TV). While for HDR10+
and similar dynamic meta, we expect a new value for every run, HDR10+
data is much much bigger. Do we really want that to be model around a
control ?

>
> > Implementing this shouldn't be a big job: you'd need a new
> > V4L2_BUF_CAP_SUPPORTS_RO_REQUESTS
> > capability, a corresponding new flag in struct vb2_queue, a new ro_requests
> > flag in
> > struct v4l2_ctrl_handler, and try_set_ext_ctrls() should check that flag and
> > refuse to
> > try/set any controls if it is true.
> >
> > Finally, the v4l2_m2m_qbuf() function should be updated to just refuse the
> > case where both
> > capture and output queue set V4L2_BUF_CAP_SUPPORTS_REQUESTS.
> >
> > And the documentation needs to be updated.
> >
> > I've added Yunfei Dong to the CC list, perhaps mediatek did some work on
> > this already.
> >
> > Regards,
> >
> > Hans
> >
> > > Thanks,
> > > Dikshita
> > > > > We profiled below data with this implementation :
> > > > > 1. Total time taken ( round trip ) for setting up control data on
> > > > > video driver
> > > > > with VIDIOC_S_EXT_CTRLS, QBUF and Queue Request: 737us
> > > > > 2. Time taken for first QBUF on Output plane to reach driver with
> > > > > REQUEST API enabled (One way): 723us
> > > > > 3. Time taken for first QBUF on Output plane to reach driver without
> > > > > REQUEST API (One way) : 250us
> > > > > 4. Time taken by each IOCTL to complete ( round trip ) with REQUEST
> > > > > API enabled :
> > > > > a. VIDIOC_S_EXT_CTRLS : 201us
> > > > > b. VIDIOC_QBUF : 92us
> > > > > c. MEDIA_REQUEST_IOC_QUEUE: 386us
> > > > >
> > > > > Kindly share your feedback/comments on the design/call sequence.
> > > > > Also as we experimented and enabled the metadata on capture plane as
> > > > > well, please comment if any issue in
> > > > > allowing the metadata exchange on capture plane as well.
> > > > >
> > > > > Reference for client side implementation can be found at [1].
> > > > >
> > > > > Thanks,
> > > > > Dikshita
> > > > >
> > > > > [1]
> > > > > https://git.linaro.org/people/stanimir.varbanov/v4l2-encode.git/log/?h=dikshita/request-api
> > > > >
> > > > > Dikshita Agarwal (3):
> > > > > Register for media device
> > > > > - Initialize and register for media device
> > > > > - define venus_m2m_media_ops
> > > > > - Implement APIs to register/unregister media controller.
> > > > > Enable Request API for output buffers
> > > > > - Add dependency on MEDIA_CONTROLLER_REQUEST_API in Kconfig.
> > > > > - Initialize vb2 ops buf_out_validate and buf_request_complete.
> > > > > - Add support for custom Metadata control
> > > > > V4L2_CID_MPEG_VIDEO_VENUS_METADATA
> > > > > - Implemeted/Integrated APIs for Request setup/complete.
> > > > > Enable Request API for Capture Buffers
> > > > >
> > > > > drivers/media/common/videobuf2/videobuf2-v4l2.c | 4 +-
> > > > > drivers/media/platform/Kconfig | 2 +-
> > > > > drivers/media/platform/qcom/venus/core.h | 36 ++++
> > > > > drivers/media/platform/qcom/venus/helpers.c | 247
> > > > > +++++++++++++++++++++++-
> > > > > drivers/media/platform/qcom/venus/helpers.h | 15 ++
> > > > > drivers/media/platform/qcom/venus/venc.c | 63 +++++-
> > > > > drivers/media/platform/qcom/venus/venc_ctrls.c | 61 +++++-
> > > > > drivers/media/v4l2-core/v4l2-ctrls.c | 10 +
> > > > > drivers/media/v4l2-core/v4l2-mem2mem.c | 17 +-
> > > > > include/media/v4l2-ctrls.h | 1 +
> > > > > include/media/venus-ctrls.h | 22 +++
> > > > > 11 files changed, 465 insertions(+), 13 deletions(-)
> > > > > create mode 100644 include/media/venus-ctrls.h
> > > > >

2020-06-12 10:09:51

[permalink] [raw]

Subject: Re: [RFC] METADATA design using V4l2 Request API

On 11/06/2020 17:06, Nicolas Dufresne wrote:
> Le jeudi 28 mai 2020 à 22:08 -0400, Nicolas Dufresne a écrit :
>> Le jeudi 28 mai 2020 à 13:24 +0200, Hans Verkuil a écrit :
>>> On 28/05/2020 12:48, [email protected] wrote:
>>>> Hi Hans,
>>>>
>>>> Thanks for the review.
>>>>
>>>> On 2020-05-26 16:27, Hans Verkuil wrote:
>>>>> Hi Dikshita,
>>>>>
>>>>> My apologies for the delay, this was (mostly) due to various vacation
>>>>> days.
>>>>>
>>>>> On 08/05/2020 08:21, Dikshita Agarwal wrote:
>>>>>> There are many commercialized video use cases which needs metadata
>>>>>> info
>>>>>> to be circulated between v4l2 client and v4l2 driver.
>>>>>>
>>>>>> METADATA has following requirements associated:
>>>>>> •Metadata is an optional info available for a buffer. It is not
>>>>>> mandatorily for every buffer.
>>>>>> For ex. consider metadata ROI (Region Of Interest). ROI is specified
>>>>>> by clients to indicate
>>>>>> the region where enhanced quality is desired. This metadata is given
>>>>>> as an input information
>>>>>> to encoder output plane. Client may or may not specify the ROI for a
>>>>>> frame during encode as
>>>>>> an input metadata. Also if the client has not provided ROI metadata
>>>>>> for a given frame,
>>>>>> it would be incorrect to take the metadata from previous frame. If
>>>>>> the data and
>>>>>> metadata is asynchronous, it would be difficult for hardware to
>>>>>> decide if it
>>>>>> needs to wait for metadata buffer or not before processing the input
>>>>>> frame for encoding.
>>>>>> •Synchronize the buffer requirement across both the video node/session
>>>>>> (incase metadata is being processed as a separate v4l2 video
>>>>>> node/session).
>>>>>> This is to avoid buffer starvation.
>>>>>> •Associate the metadata buffer with the data buffer without adding any
>>>>>> pipeline delay
>>>>>> in waiting for each other. This is applicable both at the hardware
>>>>>> side (the processing end)
>>>>>> and client side (the receiving end).
>>>>>> •Low latency usecases like WFD/split rendering/game streaming/IMS have
>>>>>> sub-50ms e2e latency
>>>>>> requirements, and it is not practical to stall the pipeline due to
>>>>>> inherent framework latencies.
>>>>>> High performance usecase like high-frame rate playback/record can
>>>>>> lead to frame loss during any pipeline latency.
>>>>>>
>>>>>> To address all above requirements, we used v4l2 Request API as
>>>>>> interlace.
>>>>>>
>>>>>> As an experiment, We have introduced new control
>>>>>> V4L2_CID_MPEG_VIDEO_VENUS_METADATA
>>>>>> to contain the METADATA info. Exact controls can be finalized once the
>>>>>> interface is discussed.
>>>>>>
>>>>>> For setting metadata from userspace to kernel, let say on encode
>>>>>> output plane,
>>>>>> following code sequence was followed
>>>>>> 1. Video driver is registering for media device and creating a media
>>>>>> node in /dev
>>>>>> 2. Request fd is allocated by calling MEDIA_IOC_REQUEST_ALLOC IOCTL on
>>>>>> media fd.
>>>>>> 3. METADATA configuration is being applied on request fd using
>>>>>> VIDIOC_S_EXT_CTRLS IOCTL
>>>>>> and the same request fd is added to buf structure structure before
>>>>>> calling VIDIOC_QBUF on video fd.
>>>>>> 4. The same request is queued through MEDIA_REQUEST_IOC_QUEUE IOCTL to
>>>>>> driver then, as a result
>>>>>> to which METADATA control will be applied to buffer through S_CTRL.
>>>>>> 5. Once control is applied and request is completed,
>>>>>> MEDIA_REQUEST_IOC_REINIT IOCTL is called
>>>>>> to re-initialize the request.
>>>>>
>>>>> This is fine and should work well. It's what the Request API is for,
>>>>> so no problems here.
>>>>>
>>>>>> We could achieve the same on capture plane as well by removing few
>>>>>> checks present currently
>>>>>> in v4l2 core which restrict the implementation to only output plane.
>>>>>
>>>>> Why do you need the Request API for the capture side in a
>>>>> memory-to-memory driver? It is not
>>>>> clear from this patch series what the use-case is. There are reasons
>>>>> why this is currently
>>>>> not allowed. So I need to know more about this.
>>>>>
>>>>> Regards,
>>>>>
>>>>> Hans
>>>>>
>>>> we need this for use cases like HDR10+ where metadata info is part of
>>>> the bitstream.
>>>> To handle such frame specific data, support for request api on capture
>>>> plane would be needed.
>>>
>>> That's for the decoder, right? So the decoder extracts the HDR10+ metadata
>>> and fills in a control with the metadata?
>>>
>>> If that's the case, then it matches a similar request I got from mediatek.
>>> What is needed is support for 'read-only' requests: i.e. the driver can
>>> associate controls with a capture buffer and return that to userspace. But
>>> it is not possible to *set* controls in userspace when queuing the request.
>>>
>>> If you think about it you'll see that setting controls in userspace for
>>> a capture queue request makes no sense, but having the driver add set
>>> read-only controls when the request is finished is fine and makes sense.
>>
>> Hi Hans,
>>
>> I'm not sure I follow you on what will this look like in userspace. Can you post
>> an RFC of your idea, describing the userspace flow ? Particularly, I'm not sure
>> how the request helps in synchronizing the read of the metadata controls (over
>> simply reading the control after a DQBUF, which we can match to a specific input
>> using the provided timestamp). I also fail to understand how this aligns with
>> Stanimir concern with the performance of the get control interface.

Hmm, I think I missed this email.

Note that there is no guarantee that reading a control after a DQBUF will give
you the value for that dequeued frame. The driver may already have generated
multiple capture frames by the time userspace dequeues the oldest buffer.

>
> As there was no answer, I'll try and ask a more precise question. While
> I still believe it's better done in userspace for M2M decoder, there is
> a need for HDMI receivers (hence adding direct CC to Dylan Yip from
> Xilinx).
>
> Would this match your expected userspace workflow ?
>
> 1. Allocate capture buffers
> 2. Allocate the same number of request fd (ro request)
> 3. For each capture buffer
> 3.1 Queue the capture buffer (passing the request)
> 3.2 Queue the request
> 4. Wait for capture buffer to be ready
> 5. Dequeue a capture buffer, and lookup it's request FD
> 6. Wait for the request to complete (needed ?)
> 7. Read the meta control passing the request

Right.

>
> I'm not sure if 6. is required, driver could just to things in the
> right order (store the meta in the request before marking the buffer
> done).

For HDR information you don't need 6. For histogram/statistics such
information might become available after the capture buffer was ready.

I would just implement this as follows:

4. Wait for the request to complete
5. Dequeue a capture buffer, and lookup it's request FD
6. Read the meta control passing the request

No need to wait for both the request and the capture buffer.

Now this is pretty heavy, but does not seems broken. The
> question is how to you avoid reading the control if it haven't changed
> ? Can driver guaranty to send the control change event before marking a
> buffer done ? (this is for static HDR meta, which will change when
> stream is change, e.g. starting/ending of adds on TV).

There are two possible mechanisms today:

1) use requests for the capture queue, and then you need to query the
request for the static HDR control. The advantage is that this is a
1-to-1 mapping of the control value with the buffer. The disadvantage is that
it adds overhead.

2) don't use requests, instead just subscribe to the control event.
Whenever the static HDR meta data changes, you'll get an event. But
then it isn't linked to a specific frame.

A third option would be to extend struct v4l2_event_ctrl with a __u32
buffer_index field to associate a capture buffer with this change. You'd
need an addition control flag as well to indicate that this control is
associated with a specific buffer.

(Just a brainstorm, this no doubt would need some refinement)

While for HDR10+
> and similar dynamic meta, we expect a new value for every run, HDR10+
> data is much much bigger. Do we really want that to be model around a
> control ?

No, we don't. For that you would want a metadata video device.

Dynamic metadata is a separate problem. I would also really like to know
a bit more about the various HW implementations for this before deciding
on a specific API. E.g., can all HW pass this data to a separate DMA engine?
Would it only split off dynamic metadata or just all InfoFrames?

Regards,

Hans

>
>>
>>> Implementing this shouldn't be a big job: you'd need a new
>>> V4L2_BUF_CAP_SUPPORTS_RO_REQUESTS
>>> capability, a corresponding new flag in struct vb2_queue, a new ro_requests
>>> flag in
>>> struct v4l2_ctrl_handler, and try_set_ext_ctrls() should check that flag and
>>> refuse to
>>> try/set any controls if it is true.
>>>
>>> Finally, the v4l2_m2m_qbuf() function should be updated to just refuse the
>>> case where both
>>> capture and output queue set V4L2_BUF_CAP_SUPPORTS_REQUESTS.
>>>
>>> And the documentation needs to be updated.
>>>
>>> I've added Yunfei Dong to the CC list, perhaps mediatek did some work on
>>> this already.
>>>
>>> Regards,
>>>
>>> Hans
>>>
>>>> Thanks,
>>>> Dikshita
>>>>>> We profiled below data with this implementation :
>>>>>> 1. Total time taken ( round trip ) for setting up control data on
>>>>>> video driver
>>>>>> with VIDIOC_S_EXT_CTRLS, QBUF and Queue Request: 737us
>>>>>> 2. Time taken for first QBUF on Output plane to reach driver with
>>>>>> REQUEST API enabled (One way): 723us
>>>>>> 3. Time taken for first QBUF on Output plane to reach driver without
>>>>>> REQUEST API (One way) : 250us
>>>>>> 4. Time taken by each IOCTL to complete ( round trip ) with REQUEST
>>>>>> API enabled :
>>>>>> a. VIDIOC_S_EXT_CTRLS : 201us
>>>>>> b. VIDIOC_QBUF : 92us
>>>>>> c. MEDIA_REQUEST_IOC_QUEUE: 386us
>>>>>>
>>>>>> Kindly share your feedback/comments on the design/call sequence.
>>>>>> Also as we experimented and enabled the metadata on capture plane as
>>>>>> well, please comment if any issue in
>>>>>> allowing the metadata exchange on capture plane as well.
>>>>>>
>>>>>> Reference for client side implementation can be found at [1].
>>>>>>
>>>>>> Thanks,
>>>>>> Dikshita
>>>>>>
>>>>>> [1]
>>>>>> https://git.linaro.org/people/stanimir.varbanov/v4l2-encode.git/log/?h=dikshita/request-api
>>>>>>
>>>>>> Dikshita Agarwal (3):
>>>>>> Register for media device
>>>>>> - Initialize and register for media device
>>>>>> - define venus_m2m_media_ops
>>>>>> - Implement APIs to register/unregister media controller.
>>>>>> Enable Request API for output buffers
>>>>>> - Add dependency on MEDIA_CONTROLLER_REQUEST_API in Kconfig.
>>>>>> - Initialize vb2 ops buf_out_validate and buf_request_complete.
>>>>>> - Add support for custom Metadata control
>>>>>> V4L2_CID_MPEG_VIDEO_VENUS_METADATA
>>>>>> - Implemeted/Integrated APIs for Request setup/complete.
>>>>>> Enable Request API for Capture Buffers
>>>>>>
>>>>>> drivers/media/common/videobuf2/videobuf2-v4l2.c | 4 +-
>>>>>> drivers/media/platform/Kconfig | 2 +-
>>>>>> drivers/media/platform/qcom/venus/core.h | 36 ++++
>>>>>> drivers/media/platform/qcom/venus/helpers.c | 247
>>>>>> +++++++++++++++++++++++-
>>>>>> drivers/media/platform/qcom/venus/helpers.h | 15 ++
>>>>>> drivers/media/platform/qcom/venus/venc.c | 63 +++++-
>>>>>> drivers/media/platform/qcom/venus/venc_ctrls.c | 61 +++++-
>>>>>> drivers/media/v4l2-core/v4l2-ctrls.c | 10 +
>>>>>> drivers/media/v4l2-core/v4l2-mem2mem.c | 17 +-
>>>>>> include/media/v4l2-ctrls.h | 1 +
>>>>>> include/media/venus-ctrls.h | 22 +++
>>>>>> 11 files changed, 465 insertions(+), 13 deletions(-)
>>>>>> create mode 100644 include/media/venus-ctrls.h
>>>>>>
>

2020-06-12 16:40:03

[permalink] [raw]

Subject: Re: [RFC] METADATA design using V4l2 Request API

Le vendredi 12 juin 2020 à 12:05 +0200, Hans Verkuil a écrit :
> On 11/06/2020 17:06, Nicolas Dufresne wrote:
> > Le jeudi 28 mai 2020 à 22:08 -0400, Nicolas Dufresne a écrit :
> > > Le jeudi 28 mai 2020 à 13:24 +0200, Hans Verkuil a écrit :
> > > > On 28/05/2020 12:48, [email protected] wrote:
> > > > > Hi Hans,
> > > > >
> > > > > Thanks for the review.
> > > > >
> > > > > On 2020-05-26 16:27, Hans Verkuil wrote:
> > > > > > Hi Dikshita,
> > > > > >
> > > > > > My apologies for the delay, this was (mostly) due to various vacation
> > > > > > days.
> > > > > >
> > > > > > On 08/05/2020 08:21, Dikshita Agarwal wrote:
> > > > > > > There are many commercialized video use cases which needs metadata
> > > > > > > info
> > > > > > > to be circulated between v4l2 client and v4l2 driver.
> > > > > > >
> > > > > > > METADATA has following requirements associated:
> > > > > > > •Metadata is an optional info available for a buffer. It is not
> > > > > > > mandatorily for every buffer.
> > > > > > > For ex. consider metadata ROI (Region Of Interest). ROI is specified
> > > > > > > by clients to indicate
> > > > > > > the region where enhanced quality is desired. This metadata is given
> > > > > > > as an input information
> > > > > > > to encoder output plane. Client may or may not specify the ROI for a
> > > > > > > frame during encode as
> > > > > > > an input metadata. Also if the client has not provided ROI metadata
> > > > > > > for a given frame,
> > > > > > > it would be incorrect to take the metadata from previous frame. If
> > > > > > > the data and
> > > > > > > metadata is asynchronous, it would be difficult for hardware to
> > > > > > > decide if it
> > > > > > > needs to wait for metadata buffer or not before processing the input
> > > > > > > frame for encoding.
> > > > > > > •Synchronize the buffer requirement across both the video node/session
> > > > > > > (incase metadata is being processed as a separate v4l2 video
> > > > > > > node/session).
> > > > > > > This is to avoid buffer starvation.
> > > > > > > •Associate the metadata buffer with the data buffer without adding any
> > > > > > > pipeline delay
> > > > > > > in waiting for each other. This is applicable both at the hardware
> > > > > > > side (the processing end)
> > > > > > > and client side (the receiving end).
> > > > > > > •Low latency usecases like WFD/split rendering/game streaming/IMS have
> > > > > > > sub-50ms e2e latency
> > > > > > > requirements, and it is not practical to stall the pipeline due to
> > > > > > > inherent framework latencies.
> > > > > > > High performance usecase like high-frame rate playback/record can
> > > > > > > lead to frame loss during any pipeline latency.
> > > > > > >
> > > > > > > To address all above requirements, we used v4l2 Request API as
> > > > > > > interlace.
> > > > > > >
> > > > > > > As an experiment, We have introduced new control
> > > > > > > V4L2_CID_MPEG_VIDEO_VENUS_METADATA
> > > > > > > to contain the METADATA info. Exact controls can be finalized once the
> > > > > > > interface is discussed.
> > > > > > >
> > > > > > > For setting metadata from userspace to kernel, let say on encode
> > > > > > > output plane,
> > > > > > > following code sequence was followed
> > > > > > > 1. Video driver is registering for media device and creating a media
> > > > > > > node in /dev
> > > > > > > 2. Request fd is allocated by calling MEDIA_IOC_REQUEST_ALLOC IOCTL on
> > > > > > > media fd.
> > > > > > > 3. METADATA configuration is being applied on request fd using
> > > > > > > VIDIOC_S_EXT_CTRLS IOCTL
> > > > > > > and the same request fd is added to buf structure structure before
> > > > > > > calling VIDIOC_QBUF on video fd.
> > > > > > > 4. The same request is queued through MEDIA_REQUEST_IOC_QUEUE IOCTL to
> > > > > > > driver then, as a result
> > > > > > > to which METADATA control will be applied to buffer through S_CTRL.
> > > > > > > 5. Once control is applied and request is completed,
> > > > > > > MEDIA_REQUEST_IOC_REINIT IOCTL is called
> > > > > > > to re-initialize the request.
> > > > > >
> > > > > > This is fine and should work well. It's what the Request API is for,
> > > > > > so no problems here.
> > > > > >
> > > > > > > We could achieve the same on capture plane as well by removing few
> > > > > > > checks present currently
> > > > > > > in v4l2 core which restrict the implementation to only output plane.
> > > > > >
> > > > > > Why do you need the Request API for the capture side in a
> > > > > > memory-to-memory driver? It is not
> > > > > > clear from this patch series what the use-case is. There are reasons
> > > > > > why this is currently
> > > > > > not allowed. So I need to know more about this.
> > > > > >
> > > > > > Regards,
> > > > > >
> > > > > > Hans
> > > > > >
> > > > > we need this for use cases like HDR10+ where metadata info is part of
> > > > > the bitstream.
> > > > > To handle such frame specific data, support for request api on capture
> > > > > plane would be needed.
> > > >
> > > > That's for the decoder, right? So the decoder extracts the HDR10+ metadata
> > > > and fills in a control with the metadata?
> > > >
> > > > If that's the case, then it matches a similar request I got from mediatek.
> > > > What is needed is support for 'read-only' requests: i.e. the driver can
> > > > associate controls with a capture buffer and return that to userspace. But
> > > > it is not possible to *set* controls in userspace when queuing the request.
> > > >
> > > > If you think about it you'll see that setting controls in userspace for
> > > > a capture queue request makes no sense, but having the driver add set
> > > > read-only controls when the request is finished is fine and makes sense.
> > >
> > > Hi Hans,
> > >
> > > I'm not sure I follow you on what will this look like in userspace. Can you post
> > > an RFC of your idea, describing the userspace flow ? Particularly, I'm not sure
> > > how the request helps in synchronizing the read of the metadata controls (over
> > > simply reading the control after a DQBUF, which we can match to a specific input
> > > using the provided timestamp). I also fail to understand how this aligns with
> > > Stanimir concern with the performance of the get control interface.
>
> Hmm, I think I missed this email.
>
> Note that there is no guarantee that reading a control after a DQBUF will give
> you the value for that dequeued frame. The driver may already have generated
> multiple capture frames by the time userspace dequeues the oldest buffer.
>
> > As there was no answer, I'll try and ask a more precise question. While
> > I still believe it's better done in userspace for M2M decoder, there is
> > a need for HDMI receivers (hence adding direct CC to Dylan Yip from
> > Xilinx).
> >
> > Would this match your expected userspace workflow ?
> >
> > 1. Allocate capture buffers
> > 2. Allocate the same number of request fd (ro request)
> > 3. For each capture buffer
> > 3.1 Queue the capture buffer (passing the request)
> > 3.2 Queue the request
> > 4. Wait for capture buffer to be ready
> > 5. Dequeue a capture buffer, and lookup it's request FD
> > 6. Wait for the request to complete (needed ?)
> > 7. Read the meta control passing the request
>
> Right.
>
> > I'm not sure if 6. is required, driver could just to things in the
> > right order (store the meta in the request before marking the buffer
> > done).
>
> For HDR information you don't need 6. For histogram/statistics such
> information might become available after the capture buffer was ready.
>
> I would just implement this as follows:
>
> 4. Wait for the request to complete

I should have clarified my thought, I first wrote it like this, but
then realized that it forces the driver into picking up capture buffer
in the same order they were queued. For an encoder, it might be a
problem as in practice it does not encode in the same order it will
produce (I think this depends on internal encoder implementation
details though).

So in fact, I was waiting on the queue, to pop a Capture buffer to
figure-out which request was being involved. And then I'd get the
control value for that request (which may need waiting further on the
request, the "heavy" reference is there).

But I think we can assume this is all in order for HDMI/SDI/CSI capture
(well raw/bayer capture mostly) and keep it a bit more lightweight
there.

> 5. Dequeue a capture buffer, and lookup it's request FD
> 6. Read the meta control passing the request
>
> No need to wait for both the request and the capture buffer.

Ack.

>
> Now this is pretty heavy, but does not seems broken. The
> > question is how to you avoid reading the control if it haven't changed
> > ? Can driver guaranty to send the control change event before marking a
> > buffer done ? (this is for static HDR meta, which will change when
> > stream is change, e.g. starting/ending of adds on TV).
>
> There are two possible mechanisms today:
>
> 1) use requests for the capture queue, and then you need to query the
> request for the static HDR control. The advantage is that this is a
> 1-to-1 mapping of the control value with the buffer. The disadvantage is that
> it adds overhead.
>
> 2) don't use requests, instead just subscribe to the control event.
> Whenever the static HDR meta data changes, you'll get an event. But
> then it isn't linked to a specific frame.
>
> A third option would be to extend struct v4l2_event_ctrl with a __u32
> buffer_index field to associate a capture buffer with this change. You'd
> need an addition control flag as well to indicate that this control is
> associated with a specific buffer.
>
> (Just a brainstorm, this no doubt would need some refinement)

Or extend with a target sequence number, as we already have this unique
number handy.

In fact I was already thinking about extending the event with a target
v4l2_buffer sequence. But then I realized that if you have two
consecutive changes, there is still no way to ensure you can read the
control value matching the event you are processing.

So this idea would conceptually mean that instead of using the
"request" as a control storage, we'd be using per "buffer" control
storage. I wonder how realistic this is in the existing framework. But
could be a thread to explore.

To add to the brainstorm, we could introduce a meta type for controls.
Can't think of a name now, but basically we would "pop" a control
value. That control would contain the information needed to identify
the buffer it applied to (like a sequence number). Fifo would be leaky
at the tail, and sized around the size of the queue. This way,
userspace can ignore it without causing any leaks, which also allow for
backward compatibility.

The "pop" ioctl could be extended with the target sequence number, so
that you get exactly the control you are looking for (ignoring the
older one, and preventing the newer one from being extracted). Of
course, the event is needed to tel when something get queued for static
meta.

>
> While for HDR10+
> > and similar dynamic meta, we expect a new value for every run, HDR10+
> > data is much much bigger. Do we really want that to be model around a
> > control ?
>
> No, we don't. For that you would want a metadata video device.
>
> Dynamic metadata is a separate problem. I would also really like to know
> a bit more about the various HW implementations for this before deciding
> on a specific API. E.g., can all HW pass this data to a separate DMA engine?
> Would it only split off dynamic metadata or just all InfoFrames?

That seems like a HW question, I'll pass. Though, in my mind, the
separated metadata video device are only usable if there is a 1:1
relationship between buffers and metadata. For other use cases, it's
missing a notification mechanism (like described above) to avoid having
to introduce arbitrary (and likely bogus) latency that ensure we didn't
proceed without the associate metadata. To I think it's not a very
powerful way to pass around metadata, unless it's 1:1 with the buffers,
which for HDR10+ fits.

>
> Regards,
>
> Hans
>
> > > > Implementing this shouldn't be a big job: you'd need a new
> > > > V4L2_BUF_CAP_SUPPORTS_RO_REQUESTS
> > > > capability, a corresponding new flag in struct vb2_queue, a new ro_requests
> > > > flag in
> > > > struct v4l2_ctrl_handler, and try_set_ext_ctrls() should check that flag and
> > > > refuse to
> > > > try/set any controls if it is true.
> > > >
> > > > Finally, the v4l2_m2m_qbuf() function should be updated to just refuse the
> > > > case where both
> > > > capture and output queue set V4L2_BUF_CAP_SUPPORTS_REQUESTS.
> > > >
> > > > And the documentation needs to be updated.
> > > >
> > > > I've added Yunfei Dong to the CC list, perhaps mediatek did some work on
> > > > this already.
> > > >
> > > > Regards,
> > > >
> > > > Hans
> > > >
> > > > > Thanks,
> > > > > Dikshita
> > > > > > > We profiled below data with this implementation :
> > > > > > > 1. Total time taken ( round trip ) for setting up control data on
> > > > > > > video driver
> > > > > > > with VIDIOC_S_EXT_CTRLS, QBUF and Queue Request: 737us
> > > > > > > 2. Time taken for first QBUF on Output plane to reach driver with
> > > > > > > REQUEST API enabled (One way): 723us
> > > > > > > 3. Time taken for first QBUF on Output plane to reach driver without
> > > > > > > REQUEST API (One way) : 250us
> > > > > > > 4. Time taken by each IOCTL to complete ( round trip ) with REQUEST
> > > > > > > API enabled :
> > > > > > > a. VIDIOC_S_EXT_CTRLS : 201us
> > > > > > > b. VIDIOC_QBUF : 92us
> > > > > > > c. MEDIA_REQUEST_IOC_QUEUE: 386us
> > > > > > >
> > > > > > > Kindly share your feedback/comments on the design/call sequence.
> > > > > > > Also as we experimented and enabled the metadata on capture plane as
> > > > > > > well, please comment if any issue in
> > > > > > > allowing the metadata exchange on capture plane as well.
> > > > > > >
> > > > > > > Reference for client side implementation can be found at [1].
> > > > > > >
> > > > > > > Thanks,
> > > > > > > Dikshita
> > > > > > >
> > > > > > > [1]
> > > > > > > https://git.linaro.org/people/stanimir.varbanov/v4l2-encode.git/log/?h=dikshita/request-api
> > > > > > >
> > > > > > > Dikshita Agarwal (3):
> > > > > > > Register for media device
> > > > > > > - Initialize and register for media device
> > > > > > > - define venus_m2m_media_ops
> > > > > > > - Implement APIs to register/unregister media controller.
> > > > > > > Enable Request API for output buffers
> > > > > > > - Add dependency on MEDIA_CONTROLLER_REQUEST_API in Kconfig.
> > > > > > > - Initialize vb2 ops buf_out_validate and buf_request_complete.
> > > > > > > - Add support for custom Metadata control
> > > > > > > V4L2_CID_MPEG_VIDEO_VENUS_METADATA
> > > > > > > - Implemeted/Integrated APIs for Request setup/complete.
> > > > > > > Enable Request API for Capture Buffers
> > > > > > >
> > > > > > > drivers/media/common/videobuf2/videobuf2-v4l2.c | 4 +-
> > > > > > > drivers/media/platform/Kconfig | 2 +-
> > > > > > > drivers/media/platform/qcom/venus/core.h | 36 ++++
> > > > > > > drivers/media/platform/qcom/venus/helpers.c | 247
> > > > > > > +++++++++++++++++++++++-
> > > > > > > drivers/media/platform/qcom/venus/helpers.h | 15 ++
> > > > > > > drivers/media/platform/qcom/venus/venc.c | 63 +++++-
> > > > > > > drivers/media/platform/qcom/venus/venc_ctrls.c | 61 +++++-
> > > > > > > drivers/media/v4l2-core/v4l2-ctrls.c | 10 +
> > > > > > > drivers/media/v4l2-core/v4l2-mem2mem.c | 17 +-
> > > > > > > include/media/v4l2-ctrls.h | 1 +
> > > > > > > include/media/venus-ctrls.h | 22 +++
> > > > > > > 11 files changed, 465 insertions(+), 13 deletions(-)
> > > > > > > create mode 100644 include/media/venus-ctrls.h
> > > > > > >

2020-06-16 13:04:39

[permalink] [raw]

Subject: Re: [RFC] METADATA design using V4l2 Request API

Hi Nicolas, Hans,

Thanks for your comments and sorry for the delayed response.

On 2020-06-12 22:07, Nicolas Dufresne wrote:
> Le vendredi 12 juin 2020 à 12:05 +0200, Hans Verkuil a écrit :
>> On 11/06/2020 17:06, Nicolas Dufresne wrote:
>> > Le jeudi 28 mai 2020 à 22:08 -0400, Nicolas Dufresne a écrit :
>> > > Le jeudi 28 mai 2020 à 13:24 +0200, Hans Verkuil a écrit :
>> > > > On 28/05/2020 12:48, [email protected] wrote:
>> > > > > Hi Hans,
>> > > > >
>> > > > > Thanks for the review.
>> > > > >
>> > > > > On 2020-05-26 16:27, Hans Verkuil wrote:
>> > > > > > Hi Dikshita,
>> > > > > >
>> > > > > > My apologies for the delay, this was (mostly) due to various vacation
>> > > > > > days.
>> > > > > >
>> > > > > > On 08/05/2020 08:21, Dikshita Agarwal wrote:
>> > > > > > > There are many commercialized video use cases which needs metadata
>> > > > > > > info
>> > > > > > > to be circulated between v4l2 client and v4l2 driver.
>> > > > > > >
>> > > > > > > METADATA has following requirements associated:
>> > > > > > > •Metadata is an optional info available for a buffer. It is not
>> > > > > > > mandatorily for every buffer.
>> > > > > > > For ex. consider metadata ROI (Region Of Interest). ROI is specified
>> > > > > > > by clients to indicate
>> > > > > > > the region where enhanced quality is desired. This metadata is given
>> > > > > > > as an input information
>> > > > > > > to encoder output plane. Client may or may not specify the ROI for a
>> > > > > > > frame during encode as
>> > > > > > > an input metadata. Also if the client has not provided ROI metadata
>> > > > > > > for a given frame,
>> > > > > > > it would be incorrect to take the metadata from previous frame. If
>> > > > > > > the data and
>> > > > > > > metadata is asynchronous, it would be difficult for hardware to
>> > > > > > > decide if it
>> > > > > > > needs to wait for metadata buffer or not before processing the input
>> > > > > > > frame for encoding.
>> > > > > > > •Synchronize the buffer requirement across both the video node/session
>> > > > > > > (incase metadata is being processed as a separate v4l2 video
>> > > > > > > node/session).
>> > > > > > > This is to avoid buffer starvation.
>> > > > > > > •Associate the metadata buffer with the data buffer without adding any
>> > > > > > > pipeline delay
>> > > > > > > in waiting for each other. This is applicable both at the hardware
>> > > > > > > side (the processing end)
>> > > > > > > and client side (the receiving end).
>> > > > > > > •Low latency usecases like WFD/split rendering/game streaming/IMS have
>> > > > > > > sub-50ms e2e latency
>> > > > > > > requirements, and it is not practical to stall the pipeline due to
>> > > > > > > inherent framework latencies.
>> > > > > > > High performance usecase like high-frame rate playback/record can
>> > > > > > > lead to frame loss during any pipeline latency.
>> > > > > > >
>> > > > > > > To address all above requirements, we used v4l2 Request API as
>> > > > > > > interlace.
>> > > > > > >
>> > > > > > > As an experiment, We have introduced new control
>> > > > > > > V4L2_CID_MPEG_VIDEO_VENUS_METADATA
>> > > > > > > to contain the METADATA info. Exact controls can be finalized once the
>> > > > > > > interface is discussed.
>> > > > > > >
>> > > > > > > For setting metadata from userspace to kernel, let say on encode
>> > > > > > > output plane,
>> > > > > > > following code sequence was followed
>> > > > > > > 1. Video driver is registering for media device and creating a media
>> > > > > > > node in /dev
>> > > > > > > 2. Request fd is allocated by calling MEDIA_IOC_REQUEST_ALLOC IOCTL on
>> > > > > > > media fd.
>> > > > > > > 3. METADATA configuration is being applied on request fd using
>> > > > > > > VIDIOC_S_EXT_CTRLS IOCTL
>> > > > > > > and the same request fd is added to buf structure structure before
>> > > > > > > calling VIDIOC_QBUF on video fd.
>> > > > > > > 4. The same request is queued through MEDIA_REQUEST_IOC_QUEUE IOCTL to
>> > > > > > > driver then, as a result
>> > > > > > > to which METADATA control will be applied to buffer through S_CTRL.
>> > > > > > > 5. Once control is applied and request is completed,
>> > > > > > > MEDIA_REQUEST_IOC_REINIT IOCTL is called
>> > > > > > > to re-initialize the request.
>> > > > > >
>> > > > > > This is fine and should work well. It's what the Request API is for,
>> > > > > > so no problems here.
>> > > > > >
>> > > > > > > We could achieve the same on capture plane as well by removing few
>> > > > > > > checks present currently
>> > > > > > > in v4l2 core which restrict the implementation to only output plane.
>> > > > > >
>> > > > > > Why do you need the Request API for the capture side in a
>> > > > > > memory-to-memory driver? It is not
>> > > > > > clear from this patch series what the use-case is. There are reasons
>> > > > > > why this is currently
>> > > > > > not allowed. So I need to know more about this.
>> > > > > >
>> > > > > > Regards,
>> > > > > >
>> > > > > > Hans
>> > > > > >
>> > > > > we need this for use cases like HDR10+ where metadata info is part of
>> > > > > the bitstream.
>> > > > > To handle such frame specific data, support for request api on capture
>> > > > > plane would be needed.
>> > > >
>> > > > That's for the decoder, right? So the decoder extracts the HDR10+ metadata
>> > > > and fills in a control with the metadata?
>> > > >
>> > > > If that's the case, then it matches a similar request I got from mediatek.
>> > > > What is needed is support for 'read-only' requests: i.e. the driver can
>> > > > associate controls with a capture buffer and return that to userspace. But
>> > > > it is not possible to *set* controls in userspace when queuing the request.
>> > > >
>> > > > If you think about it you'll see that setting controls in userspace for
>> > > > a capture queue request makes no sense, but having the driver add set
>> > > > read-only controls when the request is finished is fine and makes sense.

I tried an experiment where I removed set control from the client for
metadata,
queued buffer and request to driver and when capture buffer was ready, I
dequeued the buffer,
extracted the request fd and called get_ext_control IOCTL on driver.
this resulted in error from
https://elixir.bootlin.com/linux/latest/source/drivers/media/v4l2-core/v4l2-ctrls.c#L3678
based on what I understood, since there was no set control for the
request,
there was no v4l2 ctrl handler object associated with it so no obj was
found and
hence the error(please correct if my understanding is wrong).

So if client is not supposed to call set control on capture plane then
1. how to avoid the above error?
2. should driver call v4l2_s_ext_ctrls() with the request once buffer
and request are
queued to driver? Driver should be able to extract the request fd
from request to
achieve the same right? is that possible?

>> > >
>> > > Hi Hans,
>> > >
>> > > I'm not sure I follow you on what will this look like in userspace. Can you post
>> > > an RFC of your idea, describing the userspace flow ? Particularly, I'm not sure
>> > > how the request helps in synchronizing the read of the metadata controls (over
>> > > simply reading the control after a DQBUF, which we can match to a specific input
>> > > using the provided timestamp). I also fail to understand how this aligns with
>> > > Stanimir concern with the performance of the get control interface.
>>
>> Hmm, I think I missed this email.
>>
>> Note that there is no guarantee that reading a control after a DQBUF
>> will give
>> you the value for that dequeued frame. The driver may already have
>> generated
>> multiple capture frames by the time userspace dequeues the oldest
>> buffer.
>>
>> > As there was no answer, I'll try and ask a more precise question. While
>> > I still believe it's better done in userspace for M2M decoder, there is
>> > a need for HDMI receivers (hence adding direct CC to Dylan Yip from
>> > Xilinx).
>> >
>> > Would this match your expected userspace workflow ?
>> >
>> > 1. Allocate capture buffers
>> > 2. Allocate the same number of request fd (ro request)
>> > 3. For each capture buffer
>> > 3.1 Queue the capture buffer (passing the request)
>> > 3.2 Queue the request
>> > 4. Wait for capture buffer to be ready
>> > 5. Dequeue a capture buffer, and lookup it's request FD
>> > 6. Wait for the request to complete (needed ?)
>> > 7. Read the meta control passing the request
>>
>> Right.
>>
>> > I'm not sure if 6. is required, driver could just to things in the
>> > right order (store the meta in the request before marking the buffer
>> > done).
>>
>> For HDR information you don't need 6. For histogram/statistics such
>> information might become available after the capture buffer was ready.
>>
>> I would just implement this as follows:
>>
>> 4. Wait for the request to complete
>
> I should have clarified my thought, I first wrote it like this, but
> then realized that it forces the driver into picking up capture buffer
> in the same order they were queued. For an encoder, it might be a
> problem as in practice it does not encode in the same order it will
> produce (I think this depends on internal encoder implementation
> details though).
>
> So in fact, I was waiting on the queue, to pop a Capture buffer to
> figure-out which request was being involved. And then I'd get the
> control value for that request (which may need waiting further on the
> request, the "heavy" reference is there).
>
> But I think we can assume this is all in order for HDMI/SDI/CSI capture
> (well raw/bayer capture mostly) and keep it a bit more lightweight
> there.
>
>> 5. Dequeue a capture buffer, and lookup it's request FD
>> 6. Read the meta control passing the request
>>
>> No need to wait for both the request and the capture buffer.
>
> Ack.

The sequence which I followed for implementing this is.
1. Allocate capture buffers
2. Allocate the same number of request fd
3. For each capture buffer
3.1 call VIDIOC_S_EXT_CTRLS IOCTL to the driver with metadata info
and request fd
3.2 Queue the capture buffer (passing the request)
3.3 Queue the request
4. Wait for capture buffer to be ready
5. Dequeue a capture buffer, and lookup it's request FD
6. Call VIDIOC_G_EXT_CTRLS passing the request for the dequeued buffer.

Qualcomm H/W generates both data buffer and metadata buffer at the same
time and
provides this info to the driver in a single response, there won't be
separate responses for
data buffer and metadata buffer.
so there is no need to wait for request completion.
The driver will write meta info in the control before sending buffer
done to the client.
so when the buffer is available for deque, meta info will also be
available.

but I agree that the wait for the request might be required for HW that
generates
data buffer and metadata buffer separately.

>>
>> Now this is pretty heavy, but does not seems broken. The
>> > question is how to you avoid reading the control if it haven't changed
>> > ? Can driver guaranty to send the control change event before marking a
>> > buffer done ? (this is for static HDR meta, which will change when
>> > stream is change, e.g. starting/ending of adds on TV).
>>
>> There are two possible mechanisms today:
>>
>> 1) use requests for the capture queue, and then you need to query the
>> request for the static HDR control. The advantage is that this is a
>> 1-to-1 mapping of the control value with the buffer. The disadvantage
>> is that
>> it adds overhead.

Since we need 1-to-1 mapping of buffer and metadata info using this
approach for
capture queue as well should be fine, right? or there are any concerns?

>>
>> 2) don't use requests, instead just subscribe to the control event.
>> Whenever the static HDR meta data changes, you'll get an event. But
>> then it isn't linked to a specific frame.
>>
>> A third option would be to extend struct v4l2_event_ctrl with a __u32
>> buffer_index field to associate a capture buffer with this change.
>> You'd
>> need an addition control flag as well to indicate that this control is
>> associated with a specific buffer.
>>
>> (Just a brainstorm, this no doubt would need some refinement)
>
> Or extend with a target sequence number, as we already have this unique
> number handy.
>
> In fact I was already thinking about extending the event with a target
> v4l2_buffer sequence. But then I realized that if you have two
> consecutive changes, there is still no way to ensure you can read the
> control value matching the event you are processing.
>
> So this idea would conceptually mean that instead of using the
> "request" as a control storage, we'd be using per "buffer" control
> storage. I wonder how realistic this is in the existing framework. But
> could be a thread to explore.
>
> To add to the brainstorm, we could introduce a meta type for controls.
> Can't think of a name now, but basically we would "pop" a control
> value. That control would contain the information needed to identify
> the buffer it applied to (like a sequence number). Fifo would be leaky
> at the tail, and sized around the size of the queue. This way,
> userspace can ignore it without causing any leaks, which also allow for
> backward compatibility.
>
> The "pop" ioctl could be extended with the target sequence number, so
> that you get exactly the control you are looking for (ignoring the
> older one, and preventing the newer one from being extracted). Of
> course, the event is needed to tel when something get queued for static
> meta.
>
>>
>> While for HDR10+
>> > and similar dynamic meta, we expect a new value for every run, HDR10+
>> > data is much much bigger. Do we really want that to be model around a
>> > control ?
>>
>> No, we don't. For that you would want a metadata video device.
>>
>> Dynamic metadata is a separate problem. I would also really like to
>> know
>> a bit more about the various HW implementations for this before
>> deciding
>> on a specific API. E.g., can all HW pass this data to a separate DMA
>> engine?
>> Would it only split off dynamic metadata or just all InfoFrames?

Qualcomm HW has a single DMA engine that stores both data and metadata
buffer and
the driver is notified by HW in a single response with both data buffer
and metadata buffer.
So using a request based approach for HDR10+ metadata or any dynamic
data as well should be fine?

If not, what could be the other way to achieve this?

>
> That seems like a HW question, I'll pass. Though, in my mind, the
> separated metadata video device are only usable if there is a 1:1
> relationship between buffers and metadata. For other use cases, it's
> missing a notification mechanism (like described above) to avoid having
> to introduce arbitrary (and likely bogus) latency that ensure we didn't
> proceed without the associate metadata. To I think it's not a very
> powerful way to pass around metadata, unless it's 1:1 with the buffers,
> which for HDR10+ fits.
>
>>
>> Regards,
>>
>> Hans
>>
>> > > > Implementing this shouldn't be a big job: you'd need a new
>> > > > V4L2_BUF_CAP_SUPPORTS_RO_REQUESTS
>> > > > capability, a corresponding new flag in struct vb2_queue, a new ro_requests
>> > > > flag in
>> > > > struct v4l2_ctrl_handler, and try_set_ext_ctrls() should check that flag and
>> > > > refuse to
>> > > > try/set any controls if it is true.
>> > > >
>> > > > Finally, the v4l2_m2m_qbuf() function should be updated to just refuse the
>> > > > case where both
>> > > > capture and output queue set V4L2_BUF_CAP_SUPPORTS_REQUESTS.
>> > > >
>> > > > And the documentation needs to be updated.
>> > > >
>> > > > I've added Yunfei Dong to the CC list, perhaps mediatek did some work on
>> > > > this already.
>> > > >
>> > > > Regards,
>> > > >
>> > > > Hans
>> > > >
>> > > > > Thanks,
>> > > > > Dikshita
>> > > > > > > We profiled below data with this implementation :
>> > > > > > > 1. Total time taken ( round trip ) for setting up control data on
>> > > > > > > video driver
>> > > > > > > with VIDIOC_S_EXT_CTRLS, QBUF and Queue Request: 737us
>> > > > > > > 2. Time taken for first QBUF on Output plane to reach driver with
>> > > > > > > REQUEST API enabled (One way): 723us
>> > > > > > > 3. Time taken for first QBUF on Output plane to reach driver without
>> > > > > > > REQUEST API (One way) : 250us
>> > > > > > > 4. Time taken by each IOCTL to complete ( round trip ) with REQUEST
>> > > > > > > API enabled :
>> > > > > > > a. VIDIOC_S_EXT_CTRLS : 201us
>> > > > > > > b. VIDIOC_QBUF : 92us
>> > > > > > > c. MEDIA_REQUEST_IOC_QUEUE: 386us
>> > > > > > >
>> > > > > > > Kindly share your feedback/comments on the design/call sequence.
>> > > > > > > Also as we experimented and enabled the metadata on capture plane as
>> > > > > > > well, please comment if any issue in
>> > > > > > > allowing the metadata exchange on capture plane as well.
>> > > > > > >
>> > > > > > > Reference for client side implementation can be found at [1].
>> > > > > > >
>> > > > > > > Thanks,
>> > > > > > > Dikshita
>> > > > > > >
>> > > > > > > [1]
>> > > > > > > https://git.linaro.org/people/stanimir.varbanov/v4l2-encode.git/log/?h=dikshita/request-api
>> > > > > > >
>> > > > > > > Dikshita Agarwal (3):
>> > > > > > > Register for media device
>> > > > > > > - Initialize and register for media device
>> > > > > > > - define venus_m2m_media_ops
>> > > > > > > - Implement APIs to register/unregister media controller.
>> > > > > > > Enable Request API for output buffers
>> > > > > > > - Add dependency on MEDIA_CONTROLLER_REQUEST_API in Kconfig.
>> > > > > > > - Initialize vb2 ops buf_out_validate and buf_request_complete.
>> > > > > > > - Add support for custom Metadata control
>> > > > > > > V4L2_CID_MPEG_VIDEO_VENUS_METADATA
>> > > > > > > - Implemeted/Integrated APIs for Request setup/complete.
>> > > > > > > Enable Request API for Capture Buffers
>> > > > > > >
>> > > > > > > drivers/media/common/videobuf2/videobuf2-v4l2.c | 4 +-
>> > > > > > > drivers/media/platform/Kconfig | 2 +-
>> > > > > > > drivers/media/platform/qcom/venus/core.h | 36 ++++
>> > > > > > > drivers/media/platform/qcom/venus/helpers.c | 247
>> > > > > > > +++++++++++++++++++++++-
>> > > > > > > drivers/media/platform/qcom/venus/helpers.h | 15 ++
>> > > > > > > drivers/media/platform/qcom/venus/venc.c | 63 +++++-
>> > > > > > > drivers/media/platform/qcom/venus/venc_ctrls.c | 61 +++++-
>> > > > > > > drivers/media/v4l2-core/v4l2-ctrls.c | 10 +
>> > > > > > > drivers/media/v4l2-core/v4l2-mem2mem.c | 17 +-
>> > > > > > > include/media/v4l2-ctrls.h | 1 +
>> > > > > > > include/media/venus-ctrls.h | 22 +++
>> > > > > > > 11 files changed, 465 insertions(+), 13 deletions(-)
>> > > > > > > create mode 100644 include/media/venus-ctrls.h
>> > > > > > >

2020-06-23 12:50:38

[permalink] [raw]

Subject: Re: [RFC] METADATA design using V4l2 Request API

Hi Hans,

On 2020-06-16 18:30, [email protected] wrote:
> Hi Nicolas, Hans,
>
> Thanks for your comments and sorry for the delayed response.
>
> On 2020-06-12 22:07, Nicolas Dufresne wrote:
>> Le vendredi 12 juin 2020 à 12:05 +0200, Hans Verkuil a écrit :
>>> On 11/06/2020 17:06, Nicolas Dufresne wrote:
>>> > Le jeudi 28 mai 2020 à 22:08 -0400, Nicolas Dufresne a écrit :
>>> > > Le jeudi 28 mai 2020 à 13:24 +0200, Hans Verkuil a écrit :
>>> > > > On 28/05/2020 12:48, [email protected] wrote:
>>> > > > > Hi Hans,
>>> > > > >
>>> > > > > Thanks for the review.
>>> > > > >
>>> > > > > On 2020-05-26 16:27, Hans Verkuil wrote:
>>> > > > > > Hi Dikshita,
>>> > > > > >
>>> > > > > > My apologies for the delay, this was (mostly) due to various vacation
>>> > > > > > days.
>>> > > > > >
>>> > > > > > On 08/05/2020 08:21, Dikshita Agarwal wrote:
>>> > > > > > > There are many commercialized video use cases which needs metadata
>>> > > > > > > info
>>> > > > > > > to be circulated between v4l2 client and v4l2 driver.
>>> > > > > > >
>>> > > > > > > METADATA has following requirements associated:
>>> > > > > > > •Metadata is an optional info available for a buffer. It is not
>>> > > > > > > mandatorily for every buffer.
>>> > > > > > > For ex. consider metadata ROI (Region Of Interest). ROI is specified
>>> > > > > > > by clients to indicate
>>> > > > > > > the region where enhanced quality is desired. This metadata is given
>>> > > > > > > as an input information
>>> > > > > > > to encoder output plane. Client may or may not specify the ROI for a
>>> > > > > > > frame during encode as
>>> > > > > > > an input metadata. Also if the client has not provided ROI metadata
>>> > > > > > > for a given frame,
>>> > > > > > > it would be incorrect to take the metadata from previous frame. If
>>> > > > > > > the data and
>>> > > > > > > metadata is asynchronous, it would be difficult for hardware to
>>> > > > > > > decide if it
>>> > > > > > > needs to wait for metadata buffer or not before processing the input
>>> > > > > > > frame for encoding.
>>> > > > > > > •Synchronize the buffer requirement across both the video node/session
>>> > > > > > > (incase metadata is being processed as a separate v4l2 video
>>> > > > > > > node/session).
>>> > > > > > > This is to avoid buffer starvation.
>>> > > > > > > •Associate the metadata buffer with the data buffer without adding any
>>> > > > > > > pipeline delay
>>> > > > > > > in waiting for each other. This is applicable both at the hardware
>>> > > > > > > side (the processing end)
>>> > > > > > > and client side (the receiving end).
>>> > > > > > > •Low latency usecases like WFD/split rendering/game streaming/IMS have
>>> > > > > > > sub-50ms e2e latency
>>> > > > > > > requirements, and it is not practical to stall the pipeline due to
>>> > > > > > > inherent framework latencies.
>>> > > > > > > High performance usecase like high-frame rate playback/record can
>>> > > > > > > lead to frame loss during any pipeline latency.
>>> > > > > > >
>>> > > > > > > To address all above requirements, we used v4l2 Request API as
>>> > > > > > > interlace.
>>> > > > > > >
>>> > > > > > > As an experiment, We have introduced new control
>>> > > > > > > V4L2_CID_MPEG_VIDEO_VENUS_METADATA
>>> > > > > > > to contain the METADATA info. Exact controls can be finalized once the
>>> > > > > > > interface is discussed.
>>> > > > > > >
>>> > > > > > > For setting metadata from userspace to kernel, let say on encode
>>> > > > > > > output plane,
>>> > > > > > > following code sequence was followed
>>> > > > > > > 1. Video driver is registering for media device and creating a media
>>> > > > > > > node in /dev
>>> > > > > > > 2. Request fd is allocated by calling MEDIA_IOC_REQUEST_ALLOC IOCTL on
>>> > > > > > > media fd.
>>> > > > > > > 3. METADATA configuration is being applied on request fd using
>>> > > > > > > VIDIOC_S_EXT_CTRLS IOCTL
>>> > > > > > > and the same request fd is added to buf structure structure before
>>> > > > > > > calling VIDIOC_QBUF on video fd.
>>> > > > > > > 4. The same request is queued through MEDIA_REQUEST_IOC_QUEUE IOCTL to
>>> > > > > > > driver then, as a result
>>> > > > > > > to which METADATA control will be applied to buffer through S_CTRL.
>>> > > > > > > 5. Once control is applied and request is completed,
>>> > > > > > > MEDIA_REQUEST_IOC_REINIT IOCTL is called
>>> > > > > > > to re-initialize the request.
>>> > > > > >
>>> > > > > > This is fine and should work well. It's what the Request API is for,
>>> > > > > > so no problems here.
>>> > > > > >
>>> > > > > > > We could achieve the same on capture plane as well by removing few
>>> > > > > > > checks present currently
>>> > > > > > > in v4l2 core which restrict the implementation to only output plane.
>>> > > > > >
>>> > > > > > Why do you need the Request API for the capture side in a
>>> > > > > > memory-to-memory driver? It is not
>>> > > > > > clear from this patch series what the use-case is. There are reasons
>>> > > > > > why this is currently
>>> > > > > > not allowed. So I need to know more about this.
>>> > > > > >
>>> > > > > > Regards,
>>> > > > > >
>>> > > > > > Hans
>>> > > > > >
>>> > > > > we need this for use cases like HDR10+ where metadata info is part of
>>> > > > > the bitstream.
>>> > > > > To handle such frame specific data, support for request api on capture
>>> > > > > plane would be needed.
>>> > > >
>>> > > > That's for the decoder, right? So the decoder extracts the HDR10+ metadata
>>> > > > and fills in a control with the metadata?
>>> > > >
>>> > > > If that's the case, then it matches a similar request I got from mediatek.
>>> > > > What is needed is support for 'read-only' requests: i.e. the driver can
>>> > > > associate controls with a capture buffer and return that to userspace. But
>>> > > > it is not possible to *set* controls in userspace when queuing the request.
>>> > > >
>>> > > > If you think about it you'll see that setting controls in userspace for
>>> > > > a capture queue request makes no sense, but having the driver add set
>>> > > > read-only controls when the request is finished is fine and makes sense.
>
> I tried an experiment where I removed set control from the client for
> metadata,
> queued buffer and request to driver and when capture buffer was ready,
> I dequeued the buffer,
> extracted the request fd and called get_ext_control IOCTL on driver.
> this resulted in error from
> https://elixir.bootlin.com/linux/latest/source/drivers/media/v4l2-core/v4l2-ctrls.c#L3678
> based on what I understood, since there was no set control for the
> request,
> there was no v4l2 ctrl handler object associated with it so no obj was
> found and
> hence the error(please correct if my understanding is wrong).
>
> So if client is not supposed to call set control on capture plane then
> 1. how to avoid the above error?
> 2. should driver call v4l2_s_ext_ctrls() with the request once buffer
> and request are
> queued to driver? Driver should be able to extract the request fd
> from request to
> achieve the same right? is that possible?
>
>>> > >
>>> > > Hi Hans,
>>> > >
>>> > > I'm not sure I follow you on what will this look like in userspace. Can you post
>>> > > an RFC of your idea, describing the userspace flow ? Particularly, I'm not sure
>>> > > how the request helps in synchronizing the read of the metadata controls (over
>>> > > simply reading the control after a DQBUF, which we can match to a specific input
>>> > > using the provided timestamp). I also fail to understand how this aligns with
>>> > > Stanimir concern with the performance of the get control interface.
>>>
>>> Hmm, I think I missed this email.
>>>
>>> Note that there is no guarantee that reading a control after a DQBUF
>>> will give
>>> you the value for that dequeued frame. The driver may already have
>>> generated
>>> multiple capture frames by the time userspace dequeues the oldest
>>> buffer.
>>>
>>> > As there was no answer, I'll try and ask a more precise question. While
>>> > I still believe it's better done in userspace for M2M decoder, there is
>>> > a need for HDMI receivers (hence adding direct CC to Dylan Yip from
>>> > Xilinx).
>>> >
>>> > Would this match your expected userspace workflow ?
>>> >
>>> > 1. Allocate capture buffers
>>> > 2. Allocate the same number of request fd (ro request)
>>> > 3. For each capture buffer
>>> > 3.1 Queue the capture buffer (passing the request)
>>> > 3.2 Queue the request
>>> > 4. Wait for capture buffer to be ready
>>> > 5. Dequeue a capture buffer, and lookup it's request FD
>>> > 6. Wait for the request to complete (needed ?)
>>> > 7. Read the meta control passing the request
>>>
>>> Right.
>>>
>>> > I'm not sure if 6. is required, driver could just to things in the
>>> > right order (store the meta in the request before marking the buffer
>>> > done).
>>>
>>> For HDR information you don't need 6. For histogram/statistics such
>>> information might become available after the capture buffer was
>>> ready.
>>>
>>> I would just implement this as follows:
>>>
>>> 4. Wait for the request to complete
>>
>> I should have clarified my thought, I first wrote it like this, but
>> then realized that it forces the driver into picking up capture buffer
>> in the same order they were queued. For an encoder, it might be a
>> problem as in practice it does not encode in the same order it will
>> produce (I think this depends on internal encoder implementation
>> details though).
>>
>> So in fact, I was waiting on the queue, to pop a Capture buffer to
>> figure-out which request was being involved. And then I'd get the
>> control value for that request (which may need waiting further on the
>> request, the "heavy" reference is there).
>>
>> But I think we can assume this is all in order for HDMI/SDI/CSI
>> capture
>> (well raw/bayer capture mostly) and keep it a bit more lightweight
>> there.
>>
>>> 5. Dequeue a capture buffer, and lookup it's request FD
>>> 6. Read the meta control passing the request
>>>
>>> No need to wait for both the request and the capture buffer.
>>
>> Ack.
>
> The sequence which I followed for implementing this is.
> 1. Allocate capture buffers
> 2. Allocate the same number of request fd
> 3. For each capture buffer
> 3.1 call VIDIOC_S_EXT_CTRLS IOCTL to the driver with metadata info
> and request fd
> 3.2 Queue the capture buffer (passing the request)
> 3.3 Queue the request
> 4. Wait for capture buffer to be ready
> 5. Dequeue a capture buffer, and lookup it's request FD
> 6. Call VIDIOC_G_EXT_CTRLS passing the request for the dequeued buffer.
>
> Qualcomm H/W generates both data buffer and metadata buffer at the same
> time and
> provides this info to the driver in a single response, there won't be
> separate responses for
> data buffer and metadata buffer.
> so there is no need to wait for request completion.
> The driver will write meta info in the control before sending buffer
> done to the client.
> so when the buffer is available for deque, meta info will also be
> available.
>
> but I agree that the wait for the request might be required for HW
> that generates
> data buffer and metadata buffer separately.
>
>>>
>>> Now this is pretty heavy, but does not seems broken. The
>>> > question is how to you avoid reading the control if it haven't changed
>>> > ? Can driver guaranty to send the control change event before marking a
>>> > buffer done ? (this is for static HDR meta, which will change when
>>> > stream is change, e.g. starting/ending of adds on TV).
>>>
>>> There are two possible mechanisms today:
>>>
>>> 1) use requests for the capture queue, and then you need to query the
>>> request for the static HDR control. The advantage is that this is a
>>> 1-to-1 mapping of the control value with the buffer. The disadvantage
>>> is that
>>> it adds overhead.
>
> Since we need 1-to-1 mapping of buffer and metadata info using this
> approach for
> capture queue as well should be fine, right? or there are any concerns?
>
>>>
>>> 2) don't use requests, instead just subscribe to the control event.
>>> Whenever the static HDR meta data changes, you'll get an event. But
>>> then it isn't linked to a specific frame.
>>>
>>> A third option would be to extend struct v4l2_event_ctrl with a __u32
>>> buffer_index field to associate a capture buffer with this change.
>>> You'd
>>> need an addition control flag as well to indicate that this control
>>> is
>>> associated with a specific buffer.
>>>
>>> (Just a brainstorm, this no doubt would need some refinement)
>>
>> Or extend with a target sequence number, as we already have this
>> unique
>> number handy.
>>
>> In fact I was already thinking about extending the event with a target
>> v4l2_buffer sequence. But then I realized that if you have two
>> consecutive changes, there is still no way to ensure you can read the
>> control value matching the event you are processing.
>>
>> So this idea would conceptually mean that instead of using the
>> "request" as a control storage, we'd be using per "buffer" control
>> storage. I wonder how realistic this is in the existing framework. But
>> could be a thread to explore.
>>
>> To add to the brainstorm, we could introduce a meta type for controls.
>> Can't think of a name now, but basically we would "pop" a control
>> value. That control would contain the information needed to identify
>> the buffer it applied to (like a sequence number). Fifo would be leaky
>> at the tail, and sized around the size of the queue. This way,
>> userspace can ignore it without causing any leaks, which also allow
>> for
>> backward compatibility.
>>
>> The "pop" ioctl could be extended with the target sequence number, so
>> that you get exactly the control you are looking for (ignoring the
>> older one, and preventing the newer one from being extracted). Of
>> course, the event is needed to tel when something get queued for
>> static
>> meta.
>>
>>>
>>> While for HDR10+
>>> > and similar dynamic meta, we expect a new value for every run, HDR10+
>>> > data is much much bigger. Do we really want that to be model around a
>>> > control ?
>>>
>>> No, we don't. For that you would want a metadata video device.
>>>
>>> Dynamic metadata is a separate problem. I would also really like to
>>> know
>>> a bit more about the various HW implementations for this before
>>> deciding
>>> on a specific API. E.g., can all HW pass this data to a separate DMA
>>> engine?
>>> Would it only split off dynamic metadata or just all InfoFrames?
>
> Qualcomm HW has a single DMA engine that stores both data and metadata
> buffer and
> the driver is notified by HW in a single response with both data
> buffer and metadata buffer.
> So using a request based approach for HDR10+ metadata or any dynamic
> data as well should be fine?
>
> If not, what could be the other way to achieve this?
>
>>
>> That seems like a HW question, I'll pass. Though, in my mind, the
>> separated metadata video device are only usable if there is a 1:1
>> relationship between buffers and metadata. For other use cases, it's
>> missing a notification mechanism (like described above) to avoid
>> having
>> to introduce arbitrary (and likely bogus) latency that ensure we
>> didn't
>> proceed without the associate metadata. To I think it's not a very
>> powerful way to pass around metadata, unless it's 1:1 with the
>> buffers,
>> which for HDR10+ fits.
>>
>>>
>>> Regards,
>>>
>>> Hans
>>>
>>> > > > Implementing this shouldn't be a big job: you'd need a new
>>> > > > V4L2_BUF_CAP_SUPPORTS_RO_REQUESTS
>>> > > > capability, a corresponding new flag in struct vb2_queue, a new ro_requests
>>> > > > flag in
>>> > > > struct v4l2_ctrl_handler, and try_set_ext_ctrls() should check that flag and
>>> > > > refuse to
>>> > > > try/set any controls if it is true.
>>> > > >
>>> > > > Finally, the v4l2_m2m_qbuf() function should be updated to just refuse the
>>> > > > case where both
>>> > > > capture and output queue set V4L2_BUF_CAP_SUPPORTS_REQUESTS.
>>> > > >
>>> > > > And the documentation needs to be updated.
>>> > > >
>>> > > > I've added Yunfei Dong to the CC list, perhaps mediatek did some work on
>>> > > > this already.

I implemented 'read-only' request based on your recommendations.
I had to relax a few checks to make it work and also used
https://lkml.org/lkml/2020/6/21/359 as reference
for the implementation.
In addition to that, I made changes in vb2_core_queue_init() as well to
allow ro request.

In driver, I am initializing a new ro_ctrl_handler with read only
controls I want to support
and setting ro_request to true for this ctrl handler. I am also setting
supports_ro_requests to true
in driver while calling vb2_queue_init() for capture buffer.
In userspace,
1. Waiting for capture buffer to be ready
2. Dequeueing capture buffer, and extracting its request FD
3. Calling VIDIOC_G_EXT_CTRLS passing the request for the dequeued
buffer.
This will internally call g_volatile_ctrl to the driver and driver will
fill the ctrl data/payload.

I have one query here when g_volatile_ctrl is called on driver,
how we will make sure that the data which driver is filling in the ctrl
is actually for the buffer which was dequeued.
for eg: driver might have called buffer done for lets say two buffers
and have metadata associated with
those two buffers with it. Now when get_ctrl is called for one of those
buffers, how will the driver know
which metadata to fill in ctrl data.
Please let me know if I am missing something here.

Thanks,
Dikshita

>>> > > >
>>> > > > Regards,
>>> > > >
>>> > > > Hans
>>> > > >
>>> > > > > Thanks,
>>> > > > > Dikshita
>>> > > > > > > We profiled below data with this implementation :
>>> > > > > > > 1. Total time taken ( round trip ) for setting up control data on
>>> > > > > > > video driver
>>> > > > > > > with VIDIOC_S_EXT_CTRLS, QBUF and Queue Request: 737us
>>> > > > > > > 2. Time taken for first QBUF on Output plane to reach driver with
>>> > > > > > > REQUEST API enabled (One way): 723us
>>> > > > > > > 3. Time taken for first QBUF on Output plane to reach driver without
>>> > > > > > > REQUEST API (One way) : 250us
>>> > > > > > > 4. Time taken by each IOCTL to complete ( round trip ) with REQUEST
>>> > > > > > > API enabled :
>>> > > > > > > a. VIDIOC_S_EXT_CTRLS : 201us
>>> > > > > > > b. VIDIOC_QBUF : 92us
>>> > > > > > > c. MEDIA_REQUEST_IOC_QUEUE: 386us
>>> > > > > > >
>>> > > > > > > Kindly share your feedback/comments on the design/call sequence.
>>> > > > > > > Also as we experimented and enabled the metadata on capture plane as
>>> > > > > > > well, please comment if any issue in
>>> > > > > > > allowing the metadata exchange on capture plane as well.
>>> > > > > > >
>>> > > > > > > Reference for client side implementation can be found at [1].
>>> > > > > > >
>>> > > > > > > Thanks,
>>> > > > > > > Dikshita
>>> > > > > > >
>>> > > > > > > [1]
>>> > > > > > > https://git.linaro.org/people/stanimir.varbanov/v4l2-encode.git/log/?h=dikshita/request-api
>>> > > > > > >
>>> > > > > > > Dikshita Agarwal (3):
>>> > > > > > > Register for media device
>>> > > > > > > - Initialize and register for media device
>>> > > > > > > - define venus_m2m_media_ops
>>> > > > > > > - Implement APIs to register/unregister media controller.
>>> > > > > > > Enable Request API for output buffers
>>> > > > > > > - Add dependency on MEDIA_CONTROLLER_REQUEST_API in Kconfig.
>>> > > > > > > - Initialize vb2 ops buf_out_validate and buf_request_complete.
>>> > > > > > > - Add support for custom Metadata control
>>> > > > > > > V4L2_CID_MPEG_VIDEO_VENUS_METADATA
>>> > > > > > > - Implemeted/Integrated APIs for Request setup/complete.
>>> > > > > > > Enable Request API for Capture Buffers
>>> > > > > > >
>>> > > > > > > drivers/media/common/videobuf2/videobuf2-v4l2.c | 4 +-
>>> > > > > > > drivers/media/platform/Kconfig | 2 +-
>>> > > > > > > drivers/media/platform/qcom/venus/core.h | 36 ++++
>>> > > > > > > drivers/media/platform/qcom/venus/helpers.c | 247
>>> > > > > > > +++++++++++++++++++++++-
>>> > > > > > > drivers/media/platform/qcom/venus/helpers.h | 15 ++
>>> > > > > > > drivers/media/platform/qcom/venus/venc.c | 63 +++++-
>>> > > > > > > drivers/media/platform/qcom/venus/venc_ctrls.c | 61 +++++-
>>> > > > > > > drivers/media/v4l2-core/v4l2-ctrls.c | 10 +
>>> > > > > > > drivers/media/v4l2-core/v4l2-mem2mem.c | 17 +-
>>> > > > > > > include/media/v4l2-ctrls.h | 1 +
>>> > > > > > > include/media/venus-ctrls.h | 22 +++
>>> > > > > > > 11 files changed, 465 insertions(+), 13 deletions(-)
>>> > > > > > > create mode 100644 include/media/venus-ctrls.h
>>> > > > > > >

2020-07-02 06:34:28

[permalink] [raw]

Subject: Re: [RFC] METADATA design using V4l2 Request API

Hi,

A gentle reminder for review.

Thanks,
Dikshita

On 2020-06-23 18:17, [email protected] wrote:
> Hi Hans,
>
> On 2020-06-16 18:30, [email protected] wrote:
>> Hi Nicolas, Hans,
>>
>> Thanks for your comments and sorry for the delayed response.
>>
>> On 2020-06-12 22:07, Nicolas Dufresne wrote:
>>> Le vendredi 12 juin 2020 à 12:05 +0200, Hans Verkuil a écrit :
>>>> On 11/06/2020 17:06, Nicolas Dufresne wrote:
>>>> > Le jeudi 28 mai 2020 à 22:08 -0400, Nicolas Dufresne a écrit :
>>>> > > Le jeudi 28 mai 2020 à 13:24 +0200, Hans Verkuil a écrit :
>>>> > > > On 28/05/2020 12:48, [email protected] wrote:
>>>> > > > > Hi Hans,
>>>> > > > >
>>>> > > > > Thanks for the review.
>>>> > > > >
>>>> > > > > On 2020-05-26 16:27, Hans Verkuil wrote:
>>>> > > > > > Hi Dikshita,
>>>> > > > > >
>>>> > > > > > My apologies for the delay, this was (mostly) due to various vacation
>>>> > > > > > days.
>>>> > > > > >
>>>> > > > > > On 08/05/2020 08:21, Dikshita Agarwal wrote:
>>>> > > > > > > There are many commercialized video use cases which needs metadata
>>>> > > > > > > info
>>>> > > > > > > to be circulated between v4l2 client and v4l2 driver.
>>>> > > > > > >
>>>> > > > > > > METADATA has following requirements associated:
>>>> > > > > > > •Metadata is an optional info available for a buffer. It is not
>>>> > > > > > > mandatorily for every buffer.
>>>> > > > > > > For ex. consider metadata ROI (Region Of Interest). ROI is specified
>>>> > > > > > > by clients to indicate
>>>> > > > > > > the region where enhanced quality is desired. This metadata is given
>>>> > > > > > > as an input information
>>>> > > > > > > to encoder output plane. Client may or may not specify the ROI for a
>>>> > > > > > > frame during encode as
>>>> > > > > > > an input metadata. Also if the client has not provided ROI metadata
>>>> > > > > > > for a given frame,
>>>> > > > > > > it would be incorrect to take the metadata from previous frame. If
>>>> > > > > > > the data and
>>>> > > > > > > metadata is asynchronous, it would be difficult for hardware to
>>>> > > > > > > decide if it
>>>> > > > > > > needs to wait for metadata buffer or not before processing the input
>>>> > > > > > > frame for encoding.
>>>> > > > > > > •Synchronize the buffer requirement across both the video node/session
>>>> > > > > > > (incase metadata is being processed as a separate v4l2 video
>>>> > > > > > > node/session).
>>>> > > > > > > This is to avoid buffer starvation.
>>>> > > > > > > •Associate the metadata buffer with the data buffer without adding any
>>>> > > > > > > pipeline delay
>>>> > > > > > > in waiting for each other. This is applicable both at the hardware
>>>> > > > > > > side (the processing end)
>>>> > > > > > > and client side (the receiving end).
>>>> > > > > > > •Low latency usecases like WFD/split rendering/game streaming/IMS have
>>>> > > > > > > sub-50ms e2e latency
>>>> > > > > > > requirements, and it is not practical to stall the pipeline due to
>>>> > > > > > > inherent framework latencies.
>>>> > > > > > > High performance usecase like high-frame rate playback/record can
>>>> > > > > > > lead to frame loss during any pipeline latency.
>>>> > > > > > >
>>>> > > > > > > To address all above requirements, we used v4l2 Request API as
>>>> > > > > > > interlace.
>>>> > > > > > >
>>>> > > > > > > As an experiment, We have introduced new control
>>>> > > > > > > V4L2_CID_MPEG_VIDEO_VENUS_METADATA
>>>> > > > > > > to contain the METADATA info. Exact controls can be finalized once the
>>>> > > > > > > interface is discussed.
>>>> > > > > > >
>>>> > > > > > > For setting metadata from userspace to kernel, let say on encode
>>>> > > > > > > output plane,
>>>> > > > > > > following code sequence was followed
>>>> > > > > > > 1. Video driver is registering for media device and creating a media
>>>> > > > > > > node in /dev
>>>> > > > > > > 2. Request fd is allocated by calling MEDIA_IOC_REQUEST_ALLOC IOCTL on
>>>> > > > > > > media fd.
>>>> > > > > > > 3. METADATA configuration is being applied on request fd using
>>>> > > > > > > VIDIOC_S_EXT_CTRLS IOCTL
>>>> > > > > > > and the same request fd is added to buf structure structure before
>>>> > > > > > > calling VIDIOC_QBUF on video fd.
>>>> > > > > > > 4. The same request is queued through MEDIA_REQUEST_IOC_QUEUE IOCTL to
>>>> > > > > > > driver then, as a result
>>>> > > > > > > to which METADATA control will be applied to buffer through S_CTRL.
>>>> > > > > > > 5. Once control is applied and request is completed,
>>>> > > > > > > MEDIA_REQUEST_IOC_REINIT IOCTL is called
>>>> > > > > > > to re-initialize the request.
>>>> > > > > >
>>>> > > > > > This is fine and should work well. It's what the Request API is for,
>>>> > > > > > so no problems here.
>>>> > > > > >
>>>> > > > > > > We could achieve the same on capture plane as well by removing few
>>>> > > > > > > checks present currently
>>>> > > > > > > in v4l2 core which restrict the implementation to only output plane.
>>>> > > > > >
>>>> > > > > > Why do you need the Request API for the capture side in a
>>>> > > > > > memory-to-memory driver? It is not
>>>> > > > > > clear from this patch series what the use-case is. There are reasons
>>>> > > > > > why this is currently
>>>> > > > > > not allowed. So I need to know more about this.
>>>> > > > > >
>>>> > > > > > Regards,
>>>> > > > > >
>>>> > > > > > Hans
>>>> > > > > >
>>>> > > > > we need this for use cases like HDR10+ where metadata info is part of
>>>> > > > > the bitstream.
>>>> > > > > To handle such frame specific data, support for request api on capture
>>>> > > > > plane would be needed.
>>>> > > >
>>>> > > > That's for the decoder, right? So the decoder extracts the HDR10+ metadata
>>>> > > > and fills in a control with the metadata?
>>>> > > >
>>>> > > > If that's the case, then it matches a similar request I got from mediatek.
>>>> > > > What is needed is support for 'read-only' requests: i.e. the driver can
>>>> > > > associate controls with a capture buffer and return that to userspace. But
>>>> > > > it is not possible to *set* controls in userspace when queuing the request.
>>>> > > >
>>>> > > > If you think about it you'll see that setting controls in userspace for
>>>> > > > a capture queue request makes no sense, but having the driver add set
>>>> > > > read-only controls when the request is finished is fine and makes sense.
>>
>> I tried an experiment where I removed set control from the client for
>> metadata,
>> queued buffer and request to driver and when capture buffer was ready,
>> I dequeued the buffer,
>> extracted the request fd and called get_ext_control IOCTL on driver.
>> this resulted in error from
>> https://elixir.bootlin.com/linux/latest/source/drivers/media/v4l2-core/v4l2-ctrls.c#L3678
>> based on what I understood, since there was no set control for the
>> request,
>> there was no v4l2 ctrl handler object associated with it so no obj was
>> found and
>> hence the error(please correct if my understanding is wrong).
>>
>> So if client is not supposed to call set control on capture plane then
>> 1. how to avoid the above error?
>> 2. should driver call v4l2_s_ext_ctrls() with the request once buffer
>> and request are
>> queued to driver? Driver should be able to extract the request fd
>> from request to
>> achieve the same right? is that possible?
>>
>>>> > >
>>>> > > Hi Hans,
>>>> > >
>>>> > > I'm not sure I follow you on what will this look like in userspace. Can you post
>>>> > > an RFC of your idea, describing the userspace flow ? Particularly, I'm not sure
>>>> > > how the request helps in synchronizing the read of the metadata controls (over
>>>> > > simply reading the control after a DQBUF, which we can match to a specific input
>>>> > > using the provided timestamp). I also fail to understand how this aligns with
>>>> > > Stanimir concern with the performance of the get control interface.
>>>>
>>>> Hmm, I think I missed this email.
>>>>
>>>> Note that there is no guarantee that reading a control after a DQBUF
>>>> will give
>>>> you the value for that dequeued frame. The driver may already have
>>>> generated
>>>> multiple capture frames by the time userspace dequeues the oldest
>>>> buffer.
>>>>
>>>> > As there was no answer, I'll try and ask a more precise question. While
>>>> > I still believe it's better done in userspace for M2M decoder, there is
>>>> > a need for HDMI receivers (hence adding direct CC to Dylan Yip from
>>>> > Xilinx).
>>>> >
>>>> > Would this match your expected userspace workflow ?
>>>> >
>>>> > 1. Allocate capture buffers
>>>> > 2. Allocate the same number of request fd (ro request)
>>>> > 3. For each capture buffer
>>>> > 3.1 Queue the capture buffer (passing the request)
>>>> > 3.2 Queue the request
>>>> > 4. Wait for capture buffer to be ready
>>>> > 5. Dequeue a capture buffer, and lookup it's request FD
>>>> > 6. Wait for the request to complete (needed ?)
>>>> > 7. Read the meta control passing the request
>>>>
>>>> Right.
>>>>
>>>> > I'm not sure if 6. is required, driver could just to things in the
>>>> > right order (store the meta in the request before marking the buffer
>>>> > done).
>>>>
>>>> For HDR information you don't need 6. For histogram/statistics such
>>>> information might become available after the capture buffer was
>>>> ready.
>>>>
>>>> I would just implement this as follows:
>>>>
>>>> 4. Wait for the request to complete
>>>
>>> I should have clarified my thought, I first wrote it like this, but
>>> then realized that it forces the driver into picking up capture
>>> buffer
>>> in the same order they were queued. For an encoder, it might be a
>>> problem as in practice it does not encode in the same order it will
>>> produce (I think this depends on internal encoder implementation
>>> details though).
>>>
>>> So in fact, I was waiting on the queue, to pop a Capture buffer to
>>> figure-out which request was being involved. And then I'd get the
>>> control value for that request (which may need waiting further on the
>>> request, the "heavy" reference is there).
>>>
>>> But I think we can assume this is all in order for HDMI/SDI/CSI
>>> capture
>>> (well raw/bayer capture mostly) and keep it a bit more lightweight
>>> there.
>>>
>>>> 5. Dequeue a capture buffer, and lookup it's request FD
>>>> 6. Read the meta control passing the request
>>>>
>>>> No need to wait for both the request and the capture buffer.
>>>
>>> Ack.
>>
>> The sequence which I followed for implementing this is.
>> 1. Allocate capture buffers
>> 2. Allocate the same number of request fd
>> 3. For each capture buffer
>> 3.1 call VIDIOC_S_EXT_CTRLS IOCTL to the driver with metadata info
>> and request fd
>> 3.2 Queue the capture buffer (passing the request)
>> 3.3 Queue the request
>> 4. Wait for capture buffer to be ready
>> 5. Dequeue a capture buffer, and lookup it's request FD
>> 6. Call VIDIOC_G_EXT_CTRLS passing the request for the dequeued
>> buffer.
>>
>> Qualcomm H/W generates both data buffer and metadata buffer at the
>> same time and
>> provides this info to the driver in a single response, there won't be
>> separate responses for
>> data buffer and metadata buffer.
>> so there is no need to wait for request completion.
>> The driver will write meta info in the control before sending buffer
>> done to the client.
>> so when the buffer is available for deque, meta info will also be
>> available.
>>
>> but I agree that the wait for the request might be required for HW
>> that generates
>> data buffer and metadata buffer separately.
>>
>>>>
>>>> Now this is pretty heavy, but does not seems broken. The
>>>> > question is how to you avoid reading the control if it haven't changed
>>>> > ? Can driver guaranty to send the control change event before marking a
>>>> > buffer done ? (this is for static HDR meta, which will change when
>>>> > stream is change, e.g. starting/ending of adds on TV).
>>>>
>>>> There are two possible mechanisms today:
>>>>
>>>> 1) use requests for the capture queue, and then you need to query
>>>> the
>>>> request for the static HDR control. The advantage is that this is a
>>>> 1-to-1 mapping of the control value with the buffer. The
>>>> disadvantage is that
>>>> it adds overhead.
>>
>> Since we need 1-to-1 mapping of buffer and metadata info using this
>> approach for
>> capture queue as well should be fine, right? or there are any
>> concerns?
>>
>>>>
>>>> 2) don't use requests, instead just subscribe to the control event.
>>>> Whenever the static HDR meta data changes, you'll get an event. But
>>>> then it isn't linked to a specific frame.
>>>>
>>>> A third option would be to extend struct v4l2_event_ctrl with a
>>>> __u32
>>>> buffer_index field to associate a capture buffer with this change.
>>>> You'd
>>>> need an addition control flag as well to indicate that this control
>>>> is
>>>> associated with a specific buffer.
>>>>
>>>> (Just a brainstorm, this no doubt would need some refinement)
>>>
>>> Or extend with a target sequence number, as we already have this
>>> unique
>>> number handy.
>>>
>>> In fact I was already thinking about extending the event with a
>>> target
>>> v4l2_buffer sequence. But then I realized that if you have two
>>> consecutive changes, there is still no way to ensure you can read the
>>> control value matching the event you are processing.
>>>
>>> So this idea would conceptually mean that instead of using the
>>> "request" as a control storage, we'd be using per "buffer" control
>>> storage. I wonder how realistic this is in the existing framework.
>>> But
>>> could be a thread to explore.
>>>
>>> To add to the brainstorm, we could introduce a meta type for
>>> controls.
>>> Can't think of a name now, but basically we would "pop" a control
>>> value. That control would contain the information needed to identify
>>> the buffer it applied to (like a sequence number). Fifo would be
>>> leaky
>>> at the tail, and sized around the size of the queue. This way,
>>> userspace can ignore it without causing any leaks, which also allow
>>> for
>>> backward compatibility.
>>>
>>> The "pop" ioctl could be extended with the target sequence number, so
>>> that you get exactly the control you are looking for (ignoring the
>>> older one, and preventing the newer one from being extracted). Of
>>> course, the event is needed to tel when something get queued for
>>> static
>>> meta.
>>>
>>>>
>>>> While for HDR10+
>>>> > and similar dynamic meta, we expect a new value for every run, HDR10+
>>>> > data is much much bigger. Do we really want that to be model around a
>>>> > control ?
>>>>
>>>> No, we don't. For that you would want a metadata video device.
>>>>
>>>> Dynamic metadata is a separate problem. I would also really like to
>>>> know
>>>> a bit more about the various HW implementations for this before
>>>> deciding
>>>> on a specific API. E.g., can all HW pass this data to a separate DMA
>>>> engine?
>>>> Would it only split off dynamic metadata or just all InfoFrames?
>>
>> Qualcomm HW has a single DMA engine that stores both data and metadata
>> buffer and
>> the driver is notified by HW in a single response with both data
>> buffer and metadata buffer.
>> So using a request based approach for HDR10+ metadata or any dynamic
>> data as well should be fine?
>>
>> If not, what could be the other way to achieve this?
>>
>>>
>>> That seems like a HW question, I'll pass. Though, in my mind, the
>>> separated metadata video device are only usable if there is a 1:1
>>> relationship between buffers and metadata. For other use cases, it's
>>> missing a notification mechanism (like described above) to avoid
>>> having
>>> to introduce arbitrary (and likely bogus) latency that ensure we
>>> didn't
>>> proceed without the associate metadata. To I think it's not a very
>>> powerful way to pass around metadata, unless it's 1:1 with the
>>> buffers,
>>> which for HDR10+ fits.
>>>
>>>>
>>>> Regards,
>>>>
>>>> Hans
>>>>
>>>> > > > Implementing this shouldn't be a big job: you'd need a new
>>>> > > > V4L2_BUF_CAP_SUPPORTS_RO_REQUESTS
>>>> > > > capability, a corresponding new flag in struct vb2_queue, a new ro_requests
>>>> > > > flag in
>>>> > > > struct v4l2_ctrl_handler, and try_set_ext_ctrls() should check that flag and
>>>> > > > refuse to
>>>> > > > try/set any controls if it is true.
>>>> > > >
>>>> > > > Finally, the v4l2_m2m_qbuf() function should be updated to just refuse the
>>>> > > > case where both
>>>> > > > capture and output queue set V4L2_BUF_CAP_SUPPORTS_REQUESTS.
>>>> > > >
>>>> > > > And the documentation needs to be updated.
>>>> > > >
>>>> > > > I've added Yunfei Dong to the CC list, perhaps mediatek did some work on
>>>> > > > this already.
>
> I implemented 'read-only' request based on your recommendations.
> I had to relax a few checks to make it work and also used
> https://lkml.org/lkml/2020/6/21/359 as reference
> for the implementation.
> In addition to that, I made changes in vb2_core_queue_init() as well
> to allow ro request.
>
> In driver, I am initializing a new ro_ctrl_handler with read only
> controls I want to support
> and setting ro_request to true for this ctrl handler. I am also
> setting supports_ro_requests to true
> in driver while calling vb2_queue_init() for capture buffer.
> In userspace,
> 1. Waiting for capture buffer to be ready
> 2. Dequeueing capture buffer, and extracting its request FD
> 3. Calling VIDIOC_G_EXT_CTRLS passing the request for the dequeued
> buffer.
> This will internally call g_volatile_ctrl to the driver and driver
> will fill the ctrl data/payload.
>
> I have one query here when g_volatile_ctrl is called on driver,
> how we will make sure that the data which driver is filling in the
> ctrl is actually for the buffer which was dequeued.
> for eg: driver might have called buffer done for lets say two buffers
> and have metadata associated with
> those two buffers with it. Now when get_ctrl is called for one of
> those buffers, how will the driver know
> which metadata to fill in ctrl data.
> Please let me know if I am missing something here.
>
> Thanks,
> Dikshita
>
>>>> > > >
>>>> > > > Regards,
>>>> > > >
>>>> > > > Hans
>>>> > > >
>>>> > > > > Thanks,
>>>> > > > > Dikshita
>>>> > > > > > > We profiled below data with this implementation :
>>>> > > > > > > 1. Total time taken ( round trip ) for setting up control data on
>>>> > > > > > > video driver
>>>> > > > > > > with VIDIOC_S_EXT_CTRLS, QBUF and Queue Request: 737us
>>>> > > > > > > 2. Time taken for first QBUF on Output plane to reach driver with
>>>> > > > > > > REQUEST API enabled (One way): 723us
>>>> > > > > > > 3. Time taken for first QBUF on Output plane to reach driver without
>>>> > > > > > > REQUEST API (One way) : 250us
>>>> > > > > > > 4. Time taken by each IOCTL to complete ( round trip ) with REQUEST
>>>> > > > > > > API enabled :
>>>> > > > > > > a. VIDIOC_S_EXT_CTRLS : 201us
>>>> > > > > > > b. VIDIOC_QBUF : 92us
>>>> > > > > > > c. MEDIA_REQUEST_IOC_QUEUE: 386us
>>>> > > > > > >
>>>> > > > > > > Kindly share your feedback/comments on the design/call sequence.
>>>> > > > > > > Also as we experimented and enabled the metadata on capture plane as
>>>> > > > > > > well, please comment if any issue in
>>>> > > > > > > allowing the metadata exchange on capture plane as well.
>>>> > > > > > >
>>>> > > > > > > Reference for client side implementation can be found at [1].
>>>> > > > > > >
>>>> > > > > > > Thanks,
>>>> > > > > > > Dikshita
>>>> > > > > > >
>>>> > > > > > > [1]
>>>> > > > > > > https://git.linaro.org/people/stanimir.varbanov/v4l2-encode.git/log/?h=dikshita/request-api
>>>> > > > > > >
>>>> > > > > > > Dikshita Agarwal (3):
>>>> > > > > > > Register for media device
>>>> > > > > > > - Initialize and register for media device
>>>> > > > > > > - define venus_m2m_media_ops
>>>> > > > > > > - Implement APIs to register/unregister media controller.
>>>> > > > > > > Enable Request API for output buffers
>>>> > > > > > > - Add dependency on MEDIA_CONTROLLER_REQUEST_API in Kconfig.
>>>> > > > > > > - Initialize vb2 ops buf_out_validate and buf_request_complete.
>>>> > > > > > > - Add support for custom Metadata control
>>>> > > > > > > V4L2_CID_MPEG_VIDEO_VENUS_METADATA
>>>> > > > > > > - Implemeted/Integrated APIs for Request setup/complete.
>>>> > > > > > > Enable Request API for Capture Buffers
>>>> > > > > > >
>>>> > > > > > > drivers/media/common/videobuf2/videobuf2-v4l2.c | 4 +-
>>>> > > > > > > drivers/media/platform/Kconfig | 2 +-
>>>> > > > > > > drivers/media/platform/qcom/venus/core.h | 36 ++++
>>>> > > > > > > drivers/media/platform/qcom/venus/helpers.c | 247
>>>> > > > > > > +++++++++++++++++++++++-
>>>> > > > > > > drivers/media/platform/qcom/venus/helpers.h | 15 ++
>>>> > > > > > > drivers/media/platform/qcom/venus/venc.c | 63 +++++-
>>>> > > > > > > drivers/media/platform/qcom/venus/venc_ctrls.c | 61 +++++-
>>>> > > > > > > drivers/media/v4l2-core/v4l2-ctrls.c | 10 +
>>>> > > > > > > drivers/media/v4l2-core/v4l2-mem2mem.c | 17 +-
>>>> > > > > > > include/media/v4l2-ctrls.h | 1 +
>>>> > > > > > > include/media/venus-ctrls.h | 22 +++
>>>> > > > > > > 11 files changed, 465 insertions(+), 13 deletions(-)
>>>> > > > > > > create mode 100644 include/media/venus-ctrls.h
>>>> > > > > > >

2020-08-03 11:35:39

[permalink] [raw]

Subject: Re: [RFC] METADATA design using V4l2 Request API

On 16/06/2020 15:00, [email protected] wrote:
> Hi Nicolas, Hans,
>
> Thanks for your comments and sorry for the delayed response.
>
> On 2020-06-12 22:07, Nicolas Dufresne wrote:
>> Le vendredi 12 juin 2020 à 12:05 +0200, Hans Verkuil a écrit :
>>> On 11/06/2020 17:06, Nicolas Dufresne wrote:
>>> > Le jeudi 28 mai 2020 à 22:08 -0400, Nicolas Dufresne a écrit :
>>> > > Le jeudi 28 mai 2020 à 13:24 +0200, Hans Verkuil a écrit :
>>> > > > On 28/05/2020 12:48, [email protected] wrote:
>>> > > > > Hi Hans,
>>> > > > >
>>> > > > > Thanks for the review.
>>> > > > >
>>> > > > > On 2020-05-26 16:27, Hans Verkuil wrote:
>>> > > > > > Hi Dikshita,
>>> > > > > >
>>> > > > > > My apologies for the delay, this was (mostly) due to various vacation
>>> > > > > > days.
>>> > > > > >
>>> > > > > > On 08/05/2020 08:21, Dikshita Agarwal wrote:
>>> > > > > > > There are many commercialized video use cases which needs metadata
>>> > > > > > > info
>>> > > > > > > to be circulated between v4l2 client and v4l2 driver.
>>> > > > > > >
>>> > > > > > > METADATA has following requirements associated:
>>> > > > > > > •Metadata is an optional info available for a buffer. It is not
>>> > > > > > > mandatorily for every buffer.
>>> > > > > > > For ex. consider metadata ROI (Region Of Interest). ROI is specified
>>> > > > > > > by clients to indicate
>>> > > > > > > the region where enhanced quality is desired. This metadata is given
>>> > > > > > > as an input information
>>> > > > > > > to encoder output plane. Client may or may not specify the ROI for a
>>> > > > > > > frame during encode as
>>> > > > > > > an input metadata. Also if the client has not provided ROI metadata
>>> > > > > > > for a given frame,
>>> > > > > > > it would be incorrect to take the metadata from previous frame. If
>>> > > > > > > the data and
>>> > > > > > > metadata is asynchronous, it would be difficult for hardware to
>>> > > > > > > decide if it
>>> > > > > > > needs to wait for metadata buffer or not before processing the input
>>> > > > > > > frame for encoding.
>>> > > > > > > •Synchronize the buffer requirement across both the video node/session
>>> > > > > > > (incase metadata is being processed as a separate v4l2 video
>>> > > > > > > node/session).
>>> > > > > > > This is to avoid buffer starvation.
>>> > > > > > > •Associate the metadata buffer with the data buffer without adding any
>>> > > > > > > pipeline delay
>>> > > > > > > in waiting for each other. This is applicable both at the hardware
>>> > > > > > > side (the processing end)
>>> > > > > > > and client side (the receiving end).
>>> > > > > > > •Low latency usecases like WFD/split rendering/game streaming/IMS have
>>> > > > > > > sub-50ms e2e latency
>>> > > > > > > requirements, and it is not practical to stall the pipeline due to
>>> > > > > > > inherent framework latencies.
>>> > > > > > > High performance usecase like high-frame rate playback/record can
>>> > > > > > > lead to frame loss during any pipeline latency.
>>> > > > > > >
>>> > > > > > > To address all above requirements, we used v4l2 Request API as
>>> > > > > > > interlace.
>>> > > > > > >
>>> > > > > > > As an experiment, We have introduced new control
>>> > > > > > > V4L2_CID_MPEG_VIDEO_VENUS_METADATA
>>> > > > > > > to contain the METADATA info. Exact controls can be finalized once the
>>> > > > > > > interface is discussed.
>>> > > > > > >
>>> > > > > > > For setting metadata from userspace to kernel, let say on encode
>>> > > > > > > output plane,
>>> > > > > > > following code sequence was followed
>>> > > > > > > 1. Video driver is registering for media device and creating a media
>>> > > > > > > node in /dev
>>> > > > > > > 2. Request fd is allocated by calling MEDIA_IOC_REQUEST_ALLOC IOCTL on
>>> > > > > > > media fd.
>>> > > > > > > 3. METADATA configuration is being applied on request fd using
>>> > > > > > > VIDIOC_S_EXT_CTRLS IOCTL
>>> > > > > > >    and the same request fd is added to buf structure structure before
>>> > > > > > > calling VIDIOC_QBUF on video fd.
>>> > > > > > > 4. The same request is queued through MEDIA_REQUEST_IOC_QUEUE IOCTL to
>>> > > > > > > driver then, as a result
>>> > > > > > >    to which METADATA control will be applied to buffer through S_CTRL.
>>> > > > > > > 5. Once control is applied and request is completed,
>>> > > > > > > MEDIA_REQUEST_IOC_REINIT IOCTL is called
>>> > > > > > >    to re-initialize the request.
>>> > > > > >
>>> > > > > > This is fine and should work well. It's what the Request API is for,
>>> > > > > > so no problems here.
>>> > > > > >
>>> > > > > > > We could achieve the same on capture plane as well by removing few
>>> > > > > > > checks present currently
>>> > > > > > > in v4l2 core which restrict the implementation to only output plane.
>>> > > > > >
>>> > > > > > Why do you need the Request API for the capture side in a
>>> > > > > > memory-to-memory driver? It is not
>>> > > > > > clear from this patch series what the use-case is. There are reasons
>>> > > > > > why this is currently
>>> > > > > > not allowed. So I need to know more about this.
>>> > > > > >
>>> > > > > > Regards,
>>> > > > > >
>>> > > > > >     Hans
>>> > > > > >
>>> > > > > we need this for use cases like HDR10+ where metadata info is part of
>>> > > > > the bitstream.
>>> > > > > To handle such frame specific data, support for request api on capture
>>> > > > > plane would be needed.
>>> > > >
>>> > > > That's for the decoder, right? So the decoder extracts the HDR10+ metadata
>>> > > > and fills in a control with the metadata?
>>> > > >
>>> > > > If that's the case, then it matches a similar request I got from mediatek.
>>> > > > What is needed is support for 'read-only' requests: i.e. the driver can
>>> > > > associate controls with a capture buffer and return that to userspace. But
>>> > > > it is not possible to *set* controls in userspace when queuing the request.
>>> > > >
>>> > > > If you think about it you'll see that setting controls in userspace for
>>> > > > a capture queue request makes no sense, but having the driver add set
>>> > > > read-only controls when the request is finished is fine and makes sense.
>
> I tried an experiment where I removed set control from the client for metadata,
> queued buffer and request to driver and when capture buffer was ready, I dequeued the buffer,
> extracted the request fd and called get_ext_control IOCTL on driver.
> this resulted in error from https://elixir.bootlin.com/linux/latest/source/drivers/media/v4l2-core/v4l2-ctrls.c#L3678
> based on what I understood, since there was no set control for the request,
> there was no v4l2 ctrl handler object associated with it so no obj was found and
> hence the error(please correct if my understanding is wrong).
>
> So if client is not supposed to call set control on capture plane then
> 1. how to avoid the above error?
> 2. should driver call v4l2_s_ext_ctrls() with the request once buffer and request are
>    queued to driver? Driver should be able to extract the request fd from request to
>    achieve the same right? is that possible?

It's a bug. This RFC series provides a fix:

https://patchwork.linuxtv.org/project/linux-media/cover/[email protected]/

>
>>> > >
>>> > > Hi Hans,
>>> > >
>>> > > I'm not sure I follow you on what will this look like in userspace. Can you post
>>> > > an RFC of your idea, describing the userspace flow ? Particularly, I'm not sure
>>> > > how the request helps in synchronizing the read of the metadata controls (over
>>> > > simply reading the control after a DQBUF, which we can match to a specific input
>>> > > using the provided timestamp). I also fail to understand how this aligns with
>>> > > Stanimir concern with the performance of the get control interface.
>>>
>>> Hmm, I think I missed this email.
>>>
>>> Note that there is no guarantee that reading a control after a DQBUF will give
>>> you the value for that dequeued frame. The driver may already have generated
>>> multiple capture frames by the time userspace dequeues the oldest buffer.
>>>
>>> > As there was no answer, I'll try and ask a more precise question. While
>>> > I still believe it's better done in userspace for M2M decoder, there is
>>> > a need for HDMI receivers (hence adding direct CC to Dylan Yip from
>>> > Xilinx).
>>> >
>>> > Would this match your expected userspace workflow ?
>>> >
>>> > 1. Allocate capture buffers
>>> > 2. Allocate the same number of request fd (ro request)
>>> > 3. For each capture buffer
>>> >   3.1 Queue the capture buffer (passing the request)
>>> >   3.2 Queue the request
>>> > 4. Wait for capture buffer to be ready
>>> > 5. Dequeue a capture buffer, and lookup it's request FD
>>> > 6. Wait for the request to complete (needed ?)
>>> > 7. Read the meta control passing the request
>>>
>>> Right.
>>>
>>> > I'm not sure if 6. is required, driver could just to things in the
>>> > right order (store the meta in the request before marking the buffer
>>> > done).
>>>
>>> For HDR information you don't need 6. For histogram/statistics such
>>> information might become available after the capture buffer was ready.
>>>
>>> I would just implement this as follows:
>>>
>>> 4. Wait for the request to complete
>>
>> I should have clarified my thought, I first wrote it like this, but
>> then realized that it forces the driver into picking up capture buffer
>> in the same order they were queued. For an encoder, it might be a
>> problem as in practice it does not encode in the same order it will
>> produce (I think this depends on internal encoder implementation
>> details though).
>>
>> So in fact, I was waiting on the queue, to pop a Capture buffer to
>> figure-out which request was being involved. And then I'd get the
>> control value for that request (which may need waiting further on the
>> request, the "heavy" reference is there).
>>
>> But I think we can assume this is all in order for HDMI/SDI/CSI capture
>> (well raw/bayer capture mostly) and keep it a bit more lightweight
>> there.
>>
>>> 5. Dequeue a capture buffer, and lookup it's request FD
>>> 6. Read the meta control passing the request
>>>
>>> No need to wait for both the request and the capture buffer.
>>
>> Ack.
>
> The sequence which I followed for implementing this is.
> 1. Allocate capture buffers
> 2. Allocate the same number of request fd
> 3. For each capture buffer
>     3.1 call VIDIOC_S_EXT_CTRLS IOCTL to the driver with metadata info and request fd
>     3.2 Queue the capture buffer (passing the request)
>     3.3 Queue the request
> 4. Wait for capture buffer to be ready
> 5. Dequeue a capture buffer, and lookup it's request FD
> 6. Call VIDIOC_G_EXT_CTRLS passing the request for the dequeued buffer.
>
> Qualcomm H/W generates both data buffer and metadata buffer at the same time and
> provides this info to the driver in a single response, there won't be separate responses for
> data buffer and metadata buffer.
> so there is no need to wait for request completion.
> The driver will write meta info in the control before sending buffer done to the client.
> so when the buffer is available for deque, meta info will also be available.
>
> but I agree that the wait for the request might be required for HW that generates
> data buffer and metadata buffer separately.

It is safer to wait for the request event rather than assuming knowledge of
the driver implementation.

>
>>>
>>> Now this is pretty heavy, but does not seems broken. The
>>> > question is how to you avoid reading the control if it haven't changed
>>> > ? Can driver guaranty to send the control change event before marking a
>>> > buffer done ? (this is for static HDR meta, which will change when
>>> > stream is change, e.g. starting/ending of adds on TV).
>>>
>>> There are two possible mechanisms today:
>>>
>>> 1) use requests for the capture queue, and then you need to query the
>>> request for the static HDR control. The advantage is that this is a
>>> 1-to-1 mapping of the control value with the buffer. The disadvantage is that
>>> it adds overhead.
>
> Since we need 1-to-1 mapping of buffer and metadata info using this approach for
> capture queue as well should be fine, right? or there are any concerns?

Yes, that should be fine (you'd use read-only requests for this, as per the
RFC series linked to above).

>
>>>
>>> 2) don't use requests, instead just subscribe to the control event.
>>> Whenever the static HDR meta data changes, you'll get an event. But
>>> then it isn't linked to a specific frame.
>>>
>>> A third option would be to extend struct v4l2_event_ctrl with a __u32
>>> buffer_index field to associate a capture buffer with this change. You'd
>>> need an addition control flag as well to indicate that this control is
>>> associated with a specific buffer.
>>>
>>> (Just a brainstorm, this no doubt would need some refinement)
>>
>> Or extend with a target sequence number, as we already have this unique
>> number handy.
>>
>> In fact I was already thinking about extending the event with a target
>> v4l2_buffer sequence. But then I realized that if you have two
>> consecutive changes, there is still no way to ensure you can read the
>> control value matching the event you are processing.
>>
>> So this idea would conceptually mean that instead of using the
>> "request" as a control storage, we'd be using per "buffer" control
>> storage. I wonder how realistic this is in the existing framework. But
>> could be a thread to explore.
>>
>> To add to the brainstorm, we could introduce a meta type for controls.
>> Can't think of a name now, but basically we would "pop" a control
>> value. That control would contain the information needed to identify
>> the buffer it applied to (like a sequence number). Fifo would be leaky
>> at the tail, and sized around the size of the queue. This way,
>> userspace can ignore it without causing any leaks, which also allow for
>> backward compatibility.
>>
>> The "pop" ioctl could be extended with the target sequence number, so
>> that you get exactly the control you are looking for (ignoring the
>> older one, and preventing the newer one from being extracted). Of
>> course, the event is needed to tel when something get queued for static
>> meta.
>>
>>>
>>> While for HDR10+
>>> > and similar dynamic meta, we expect a new value for every run, HDR10+
>>> > data is much much bigger. Do we really want that to be model around a
>>> > control ?
>>>
>>> No, we don't. For that you would want a metadata video device.
>>>
>>> Dynamic metadata is a separate problem. I would also really like to know
>>> a bit more about the various HW implementations for this before deciding
>>> on a specific API. E.g., can all HW pass this data to a separate DMA engine?
>>> Would it only split off dynamic metadata or just all InfoFrames?
>
> Qualcomm HW has a single DMA engine that stores both data and metadata buffer and
> the driver is notified by HW in a single response with both data buffer and metadata buffer.

Are the video data and meta data buffers separate? Or are they all dumped in one
buffer? How does the HW know how many bytes should be reserved for the metadata?
Is there a hardware limit?

> So using a request based approach for HDR10+ metadata or any dynamic data as well should be fine?

Not quite sure what you mean. A request based approach is certainly valid, but
do you want to use a control for the dynamic metadata or a video device?

I would prefer the latter. But that also assumes that the video and metadata
arrive in different buffers.

Regards,

Hans

>
> If not, what could be the other way to achieve this?
>
>>
>> That seems like a HW question, I'll pass. Though, in my mind, the
>> separated metadata video device are only usable if there is a 1:1
>> relationship between buffers and metadata. For other use cases, it's
>> missing a notification mechanism (like described above) to avoid having
>> to introduce arbitrary (and likely bogus) latency that ensure we didn't
>> proceed without the associate metadata. To I think it's not a very
>> powerful way to pass around metadata, unless it's 1:1 with the buffers,
>> which for HDR10+ fits.
>>
>>>
>>> Regards,
>>>
>>>     Hans
>>>
>>> > > > Implementing this shouldn't be a big job: you'd need a new
>>> > > > V4L2_BUF_CAP_SUPPORTS_RO_REQUESTS
>>> > > > capability, a corresponding new flag in struct vb2_queue, a new ro_requests
>>> > > > flag in
>>> > > > struct v4l2_ctrl_handler, and try_set_ext_ctrls() should check that flag and
>>> > > > refuse to
>>> > > > try/set any controls if it is true.
>>> > > >
>>> > > > Finally, the v4l2_m2m_qbuf() function should be updated to just refuse the
>>> > > > case where both
>>> > > > capture and output queue set V4L2_BUF_CAP_SUPPORTS_REQUESTS.
>>> > > >
>>> > > > And the documentation needs to be updated.
>>> > > >
>>> > > > I've added Yunfei Dong to the CC list, perhaps mediatek did some work on
>>> > > > this already.
>>> > > >
>>> > > > Regards,
>>> > > >
>>> > > >     Hans
>>> > > >
>>> > > > > Thanks,
>>> > > > > Dikshita
>>> > > > > > > We profiled below data with this implementation :
>>> > > > > > > 1. Total time taken ( round trip ) for setting up control data on
>>> > > > > > > video driver
>>> > > > > > >    with VIDIOC_S_EXT_CTRLS, QBUF and Queue Request: 737us
>>> > > > > > > 2. Time taken for first QBUF on Output plane to reach driver with
>>> > > > > > > REQUEST API enabled (One way): 723us
>>> > > > > > > 3. Time taken for first QBUF on Output plane to reach driver without
>>> > > > > > > REQUEST API (One way) : 250us
>>> > > > > > > 4. Time taken by each IOCTL to complete ( round trip ) with REQUEST
>>> > > > > > > API enabled :
>>> > > > > > >     a. VIDIOC_S_EXT_CTRLS : 201us
>>> > > > > > >     b. VIDIOC_QBUF : 92us
>>> > > > > > >     c. MEDIA_REQUEST_IOC_QUEUE: 386us
>>> > > > > > >
>>> > > > > > > Kindly share your feedback/comments on the design/call sequence.
>>> > > > > > > Also as we experimented and enabled the metadata on capture plane as
>>> > > > > > > well, please comment if any issue in
>>> > > > > > > allowing the metadata exchange on capture plane as well.
>>> > > > > > >
>>> > > > > > > Reference for client side implementation can be found at [1].
>>> > > > > > >
>>> > > > > > > Thanks,
>>> > > > > > > Dikshita
>>> > > > > > >
>>> > > > > > > [1]
>>> > > > > > > https://git.linaro.org/people/stanimir.varbanov/v4l2-encode.git/log/?h=dikshita/request-api
>>> > > > > > >
>>> > > > > > > Dikshita Agarwal (3):
>>> > > > > > >   Register for media device
>>> > > > > > >     - Initialize and register for media device
>>> > > > > > >     - define venus_m2m_media_ops
>>> > > > > > >     - Implement APIs to register/unregister media controller.
>>> > > > > > >   Enable Request API for output buffers
>>> > > > > > >     - Add dependency on MEDIA_CONTROLLER_REQUEST_API in Kconfig.
>>> > > > > > >     - Initialize vb2 ops buf_out_validate and buf_request_complete.
>>> > > > > > >     - Add support for custom Metadata control
>>> > > > > > > V4L2_CID_MPEG_VIDEO_VENUS_METADATA
>>> > > > > > >     - Implemeted/Integrated APIs for Request setup/complete.
>>> > > > > > >   Enable Request API for Capture Buffers
>>> > > > > > >
>>> > > > > > > drivers/media/common/videobuf2/videobuf2-v4l2.c |   4 +-
>>> > > > > > > drivers/media/platform/Kconfig                  |   2 +-
>>> > > > > > > drivers/media/platform/qcom/venus/core.h        | 36 ++++
>>> > > > > > > drivers/media/platform/qcom/venus/helpers.c     | 247
>>> > > > > > > +++++++++++++++++++++++-
>>> > > > > > > drivers/media/platform/qcom/venus/helpers.h     | 15 ++
>>> > > > > > > drivers/media/platform/qcom/venus/venc.c        | 63 +++++-
>>> > > > > > > drivers/media/platform/qcom/venus/venc_ctrls.c | 61 +++++-
>>> > > > > > > drivers/media/v4l2-core/v4l2-ctrls.c            | 10 +
>>> > > > > > > drivers/media/v4l2-core/v4l2-mem2mem.c          | 17 +-
>>> > > > > > > include/media/v4l2-ctrls.h                      |   1 +
>>> > > > > > > include/media/venus-ctrls.h                     | 22 +++
>>> > > > > > > 11 files changed, 465 insertions(+), 13 deletions(-)
>>> > > > > > > create mode 100644 include/media/venus-ctrls.h
>>> > > > > > >

2020-09-14 10:24:40

[permalink] [raw]

Subject: Re: [RFC] METADATA design using V4l2 Request API

Hi Hans,

I picked your latest patches to add support for RO Request and
implemented the driver changes as required.
To set a ctrl value from driver I am getting the ctrl using
v4l2_ctrl_find()
by passing Main handler as a parameter as suggested by you and
then setting the ctrl value with __v4l2_ctrl_s_ctrl_compound() since I
am trying to update a compound type control.
And I can see value of ctrl is as expected when VIDIOC_G_EXT_CTRLS is
called from userspace.

There are few issues I observed though:
1. As a part of request reinit I saw crashes at below two places
a.
https://elixir.bootlin.com/linux/latest/source/drivers/media/common/videobuf2/videobuf2-core.c#L964
b.
https://elixir.bootlin.com/linux/latest/source/drivers/media/mc/mc-request.c#L50
2. when I was setting the volatile flag (V4L2_CTRL_FLAG_VOLATILE) for
the new ctrls I created,
ctrls were not getting updated properly through
__v4l2_ctrl_s_ctrl_compound().
Is there any issue when using volatile flag with compound
control?

Thanks,
Dikshita

On 2020-08-03 17:02, Hans Verkuil wrote:
> On 16/06/2020 15:00, [email protected] wrote:
>> Hi Nicolas, Hans,
>>
>> Thanks for your comments and sorry for the delayed response.
>>
>> On 2020-06-12 22:07, Nicolas Dufresne wrote:
>>> Le vendredi 12 juin 2020 à 12:05 +0200, Hans Verkuil a écrit :
>>>> On 11/06/2020 17:06, Nicolas Dufresne wrote:
>>>> > Le jeudi 28 mai 2020 à 22:08 -0400, Nicolas Dufresne a écrit :
>>>> > > Le jeudi 28 mai 2020 à 13:24 +0200, Hans Verkuil a écrit :
>>>> > > > On 28/05/2020 12:48, [email protected] wrote:
>>>> > > > > Hi Hans,
>>>> > > > >
>>>> > > > > Thanks for the review.
>>>> > > > >
>>>> > > > > On 2020-05-26 16:27, Hans Verkuil wrote:
>>>> > > > > > Hi Dikshita,
>>>> > > > > >
>>>> > > > > > My apologies for the delay, this was (mostly) due to various vacation
>>>> > > > > > days.
>>>> > > > > >
>>>> > > > > > On 08/05/2020 08:21, Dikshita Agarwal wrote:
>>>> > > > > > > There are many commercialized video use cases which needs metadata
>>>> > > > > > > info
>>>> > > > > > > to be circulated between v4l2 client and v4l2 driver.
>>>> > > > > > >
>>>> > > > > > > METADATA has following requirements associated:
>>>> > > > > > > •Metadata is an optional info available for a buffer. It is not
>>>> > > > > > > mandatorily for every buffer.
>>>> > > > > > > For ex. consider metadata ROI (Region Of Interest). ROI is specified
>>>> > > > > > > by clients to indicate
>>>> > > > > > > the region where enhanced quality is desired. This metadata is given
>>>> > > > > > > as an input information
>>>> > > > > > > to encoder output plane. Client may or may not specify the ROI for a
>>>> > > > > > > frame during encode as
>>>> > > > > > > an input metadata. Also if the client has not provided ROI metadata
>>>> > > > > > > for a given frame,
>>>> > > > > > > it would be incorrect to take the metadata from previous frame. If
>>>> > > > > > > the data and
>>>> > > > > > > metadata is asynchronous, it would be difficult for hardware to
>>>> > > > > > > decide if it
>>>> > > > > > > needs to wait for metadata buffer or not before processing the input
>>>> > > > > > > frame for encoding.
>>>> > > > > > > •Synchronize the buffer requirement across both the video node/session
>>>> > > > > > > (incase metadata is being processed as a separate v4l2 video
>>>> > > > > > > node/session).
>>>> > > > > > > This is to avoid buffer starvation.
>>>> > > > > > > •Associate the metadata buffer with the data buffer without adding any
>>>> > > > > > > pipeline delay
>>>> > > > > > > in waiting for each other. This is applicable both at the hardware
>>>> > > > > > > side (the processing end)
>>>> > > > > > > and client side (the receiving end).
>>>> > > > > > > •Low latency usecases like WFD/split rendering/game streaming/IMS have
>>>> > > > > > > sub-50ms e2e latency
>>>> > > > > > > requirements, and it is not practical to stall the pipeline due to
>>>> > > > > > > inherent framework latencies.
>>>> > > > > > > High performance usecase like high-frame rate playback/record can
>>>> > > > > > > lead to frame loss during any pipeline latency.
>>>> > > > > > >
>>>> > > > > > > To address all above requirements, we used v4l2 Request API as
>>>> > > > > > > interlace.
>>>> > > > > > >
>>>> > > > > > > As an experiment, We have introduced new control
>>>> > > > > > > V4L2_CID_MPEG_VIDEO_VENUS_METADATA
>>>> > > > > > > to contain the METADATA info. Exact controls can be finalized once the
>>>> > > > > > > interface is discussed.
>>>> > > > > > >
>>>> > > > > > > For setting metadata from userspace to kernel, let say on encode
>>>> > > > > > > output plane,
>>>> > > > > > > following code sequence was followed
>>>> > > > > > > 1. Video driver is registering for media device and creating a media
>>>> > > > > > > node in /dev
>>>> > > > > > > 2. Request fd is allocated by calling MEDIA_IOC_REQUEST_ALLOC IOCTL on
>>>> > > > > > > media fd.
>>>> > > > > > > 3. METADATA configuration is being applied on request fd using
>>>> > > > > > > VIDIOC_S_EXT_CTRLS IOCTL
>>>> > > > > > >    and the same request fd is added to buf structure structure before
>>>> > > > > > > calling VIDIOC_QBUF on video fd.
>>>> > > > > > > 4. The same request is queued through MEDIA_REQUEST_IOC_QUEUE IOCTL to
>>>> > > > > > > driver then, as a result
>>>> > > > > > >    to which METADATA control will be applied to buffer through S_CTRL.
>>>> > > > > > > 5. Once control is applied and request is completed,
>>>> > > > > > > MEDIA_REQUEST_IOC_REINIT IOCTL is called
>>>> > > > > > >    to re-initialize the request.
>>>> > > > > >
>>>> > > > > > This is fine and should work well. It's what the Request API is for,
>>>> > > > > > so no problems here.
>>>> > > > > >
>>>> > > > > > > We could achieve the same on capture plane as well by removing few
>>>> > > > > > > checks present currently
>>>> > > > > > > in v4l2 core which restrict the implementation to only output plane.
>>>> > > > > >
>>>> > > > > > Why do you need the Request API for the capture side in a
>>>> > > > > > memory-to-memory driver? It is not
>>>> > > > > > clear from this patch series what the use-case is. There are reasons
>>>> > > > > > why this is currently
>>>> > > > > > not allowed. So I need to know more about this.
>>>> > > > > >
>>>> > > > > > Regards,
>>>> > > > > >
>>>> > > > > >     Hans
>>>> > > > > >
>>>> > > > > we need this for use cases like HDR10+ where metadata info is part of
>>>> > > > > the bitstream.
>>>> > > > > To handle such frame specific data, support for request api on capture
>>>> > > > > plane would be needed.
>>>> > > >
>>>> > > > That's for the decoder, right? So the decoder extracts the HDR10+ metadata
>>>> > > > and fills in a control with the metadata?
>>>> > > >
>>>> > > > If that's the case, then it matches a similar request I got from mediatek.
>>>> > > > What is needed is support for 'read-only' requests: i.e. the driver can
>>>> > > > associate controls with a capture buffer and return that to userspace. But
>>>> > > > it is not possible to *set* controls in userspace when queuing the request.
>>>> > > >
>>>> > > > If you think about it you'll see that setting controls in userspace for
>>>> > > > a capture queue request makes no sense, but having the driver add set
>>>> > > > read-only controls when the request is finished is fine and makes sense.
>>
>> I tried an experiment where I removed set control from the client for
>> metadata,
>> queued buffer and request to driver and when capture buffer was ready,
>> I dequeued the buffer,
>> extracted the request fd and called get_ext_control IOCTL on driver.
>> this resulted in error from
>> https://elixir.bootlin.com/linux/latest/source/drivers/media/v4l2-core/v4l2-ctrls.c#L3678
>> based on what I understood, since there was no set control for the
>> request,
>> there was no v4l2 ctrl handler object associated with it so no obj was
>> found and
>> hence the error(please correct if my understanding is wrong).
>>
>> So if client is not supposed to call set control on capture plane then
>> 1. how to avoid the above error?
>> 2. should driver call v4l2_s_ext_ctrls() with the request once buffer
>> and request are
>>    queued to driver? Driver should be able to extract the request fd
>> from request to
>>    achieve the same right? is that possible?
>
> It's a bug. This RFC series provides a fix:
>
> https://patchwork.linuxtv.org/project/linux-media/cover/[email protected]/
>
>>
>>>> > >
>>>> > > Hi Hans,
>>>> > >
>>>> > > I'm not sure I follow you on what will this look like in userspace. Can you post
>>>> > > an RFC of your idea, describing the userspace flow ? Particularly, I'm not sure
>>>> > > how the request helps in synchronizing the read of the metadata controls (over
>>>> > > simply reading the control after a DQBUF, which we can match to a specific input
>>>> > > using the provided timestamp). I also fail to understand how this aligns with
>>>> > > Stanimir concern with the performance of the get control interface.
>>>>
>>>> Hmm, I think I missed this email.
>>>>
>>>> Note that there is no guarantee that reading a control after a DQBUF
>>>> will give
>>>> you the value for that dequeued frame. The driver may already have
>>>> generated
>>>> multiple capture frames by the time userspace dequeues the oldest
>>>> buffer.
>>>>
>>>> > As there was no answer, I'll try and ask a more precise question. While
>>>> > I still believe it's better done in userspace for M2M decoder, there is
>>>> > a need for HDMI receivers (hence adding direct CC to Dylan Yip from
>>>> > Xilinx).
>>>> >
>>>> > Would this match your expected userspace workflow ?
>>>> >
>>>> > 1. Allocate capture buffers
>>>> > 2. Allocate the same number of request fd (ro request)
>>>> > 3. For each capture buffer
>>>> >   3.1 Queue the capture buffer (passing the request)
>>>> >   3.2 Queue the request
>>>> > 4. Wait for capture buffer to be ready
>>>> > 5. Dequeue a capture buffer, and lookup it's request FD
>>>> > 6. Wait for the request to complete (needed ?)
>>>> > 7. Read the meta control passing the request
>>>>
>>>> Right.
>>>>
>>>> > I'm not sure if 6. is required, driver could just to things in the
>>>> > right order (store the meta in the request before marking the buffer
>>>> > done).
>>>>
>>>> For HDR information you don't need 6. For histogram/statistics such
>>>> information might become available after the capture buffer was
>>>> ready.
>>>>
>>>> I would just implement this as follows:
>>>>
>>>> 4. Wait for the request to complete
>>>
>>> I should have clarified my thought, I first wrote it like this, but
>>> then realized that it forces the driver into picking up capture
>>> buffer
>>> in the same order they were queued. For an encoder, it might be a
>>> problem as in practice it does not encode in the same order it will
>>> produce (I think this depends on internal encoder implementation
>>> details though).
>>>
>>> So in fact, I was waiting on the queue, to pop a Capture buffer to
>>> figure-out which request was being involved. And then I'd get the
>>> control value for that request (which may need waiting further on the
>>> request, the "heavy" reference is there).
>>>
>>> But I think we can assume this is all in order for HDMI/SDI/CSI
>>> capture
>>> (well raw/bayer capture mostly) and keep it a bit more lightweight
>>> there.
>>>
>>>> 5. Dequeue a capture buffer, and lookup it's request FD
>>>> 6. Read the meta control passing the request
>>>>
>>>> No need to wait for both the request and the capture buffer.
>>>
>>> Ack.
>>
>> The sequence which I followed for implementing this is.
>> 1. Allocate capture buffers
>> 2. Allocate the same number of request fd
>> 3. For each capture buffer
>>     3.1 call VIDIOC_S_EXT_CTRLS IOCTL to the driver with metadata info
>> and request fd
>>     3.2 Queue the capture buffer (passing the request)
>>     3.3 Queue the request
>> 4. Wait for capture buffer to be ready
>> 5. Dequeue a capture buffer, and lookup it's request FD
>> 6. Call VIDIOC_G_EXT_CTRLS passing the request for the dequeued
>> buffer.
>>
>> Qualcomm H/W generates both data buffer and metadata buffer at the
>> same time and
>> provides this info to the driver in a single response, there won't be
>> separate responses for
>> data buffer and metadata buffer.
>> so there is no need to wait for request completion.
>> The driver will write meta info in the control before sending buffer
>> done to the client.
>> so when the buffer is available for deque, meta info will also be
>> available.
>>
>> but I agree that the wait for the request might be required for HW
>> that generates
>> data buffer and metadata buffer separately.
>
> It is safer to wait for the request event rather than assuming
> knowledge of
> the driver implementation.
>
>>
>>>>
>>>> Now this is pretty heavy, but does not seems broken. The
>>>> > question is how to you avoid reading the control if it haven't changed
>>>> > ? Can driver guaranty to send the control change event before marking a
>>>> > buffer done ? (this is for static HDR meta, which will change when
>>>> > stream is change, e.g. starting/ending of adds on TV).
>>>>
>>>> There are two possible mechanisms today:
>>>>
>>>> 1) use requests for the capture queue, and then you need to query
>>>> the
>>>> request for the static HDR control. The advantage is that this is a
>>>> 1-to-1 mapping of the control value with the buffer. The
>>>> disadvantage is that
>>>> it adds overhead.
>>
>> Since we need 1-to-1 mapping of buffer and metadata info using this
>> approach for
>> capture queue as well should be fine, right? or there are any
>> concerns?
>
> Yes, that should be fine (you'd use read-only requests for this, as per
> the
> RFC series linked to above).
>
>>
>>>>
>>>> 2) don't use requests, instead just subscribe to the control event.
>>>> Whenever the static HDR meta data changes, you'll get an event. But
>>>> then it isn't linked to a specific frame.
>>>>
>>>> A third option would be to extend struct v4l2_event_ctrl with a
>>>> __u32
>>>> buffer_index field to associate a capture buffer with this change.
>>>> You'd
>>>> need an addition control flag as well to indicate that this control
>>>> is
>>>> associated with a specific buffer.
>>>>
>>>> (Just a brainstorm, this no doubt would need some refinement)
>>>
>>> Or extend with a target sequence number, as we already have this
>>> unique
>>> number handy.
>>>
>>> In fact I was already thinking about extending the event with a
>>> target
>>> v4l2_buffer sequence. But then I realized that if you have two
>>> consecutive changes, there is still no way to ensure you can read the
>>> control value matching the event you are processing.
>>>
>>> So this idea would conceptually mean that instead of using the
>>> "request" as a control storage, we'd be using per "buffer" control
>>> storage. I wonder how realistic this is in the existing framework.
>>> But
>>> could be a thread to explore.
>>>
>>> To add to the brainstorm, we could introduce a meta type for
>>> controls.
>>> Can't think of a name now, but basically we would "pop" a control
>>> value. That control would contain the information needed to identify
>>> the buffer it applied to (like a sequence number). Fifo would be
>>> leaky
>>> at the tail, and sized around the size of the queue. This way,
>>> userspace can ignore it without causing any leaks, which also allow
>>> for
>>> backward compatibility.
>>>
>>> The "pop" ioctl could be extended with the target sequence number, so
>>> that you get exactly the control you are looking for (ignoring the
>>> older one, and preventing the newer one from being extracted). Of
>>> course, the event is needed to tel when something get queued for
>>> static
>>> meta.
>>>
>>>>
>>>> While for HDR10+
>>>> > and similar dynamic meta, we expect a new value for every run, HDR10+
>>>> > data is much much bigger. Do we really want that to be model around a
>>>> > control ?
>>>>
>>>> No, we don't. For that you would want a metadata video device.
>>>>
>>>> Dynamic metadata is a separate problem. I would also really like to
>>>> know
>>>> a bit more about the various HW implementations for this before
>>>> deciding
>>>> on a specific API. E.g., can all HW pass this data to a separate DMA
>>>> engine?
>>>> Would it only split off dynamic metadata or just all InfoFrames?
>>
>> Qualcomm HW has a single DMA engine that stores both data and metadata
>> buffer and
>> the driver is notified by HW in a single response with both data
>> buffer and metadata buffer.
>
> Are the video data and meta data buffers separate? Or are they all
> dumped in one
> buffer? How does the HW know how many bytes should be reserved for the
> metadata?
> Is there a hardware limit?
>
>> So using a request based approach for HDR10+ metadata or any dynamic
>> data as well should be fine?
>
> Not quite sure what you mean. A request based approach is certainly
> valid, but
> do you want to use a control for the dynamic metadata or a video
> device?
>
> I would prefer the latter. But that also assumes that the video and
> metadata
> arrive in different buffers.
>
> Regards,
>
> Hans
>
>>
>> If not, what could be the other way to achieve this?
>>
>>>
>>> That seems like a HW question, I'll pass. Though, in my mind, the
>>> separated metadata video device are only usable if there is a 1:1
>>> relationship between buffers and metadata. For other use cases, it's
>>> missing a notification mechanism (like described above) to avoid
>>> having
>>> to introduce arbitrary (and likely bogus) latency that ensure we
>>> didn't
>>> proceed without the associate metadata. To I think it's not a very
>>> powerful way to pass around metadata, unless it's 1:1 with the
>>> buffers,
>>> which for HDR10+ fits.
>>>
>>>>
>>>> Regards,
>>>>
>>>>     Hans
>>>>
>>>> > > > Implementing this shouldn't be a big job: you'd need a new
>>>> > > > V4L2_BUF_CAP_SUPPORTS_RO_REQUESTS
>>>> > > > capability, a corresponding new flag in struct vb2_queue, a new ro_requests
>>>> > > > flag in
>>>> > > > struct v4l2_ctrl_handler, and try_set_ext_ctrls() should check that flag and
>>>> > > > refuse to
>>>> > > > try/set any controls if it is true.
>>>> > > >
>>>> > > > Finally, the v4l2_m2m_qbuf() function should be updated to just refuse the
>>>> > > > case where both
>>>> > > > capture and output queue set V4L2_BUF_CAP_SUPPORTS_REQUESTS.
>>>> > > >
>>>> > > > And the documentation needs to be updated.
>>>> > > >
>>>> > > > I've added Yunfei Dong to the CC list, perhaps mediatek did some work on
>>>> > > > this already.
>>>> > > >
>>>> > > > Regards,
>>>> > > >
>>>> > > >     Hans
>>>> > > >
>>>> > > > > Thanks,
>>>> > > > > Dikshita
>>>> > > > > > > We profiled below data with this implementation :
>>>> > > > > > > 1. Total time taken ( round trip ) for setting up control data on
>>>> > > > > > > video driver
>>>> > > > > > >    with VIDIOC_S_EXT_CTRLS, QBUF and Queue Request: 737us
>>>> > > > > > > 2. Time taken for first QBUF on Output plane to reach driver with
>>>> > > > > > > REQUEST API enabled (One way): 723us
>>>> > > > > > > 3. Time taken for first QBUF on Output plane to reach driver without
>>>> > > > > > > REQUEST API (One way) : 250us
>>>> > > > > > > 4. Time taken by each IOCTL to complete ( round trip ) with REQUEST
>>>> > > > > > > API enabled :
>>>> > > > > > >     a. VIDIOC_S_EXT_CTRLS : 201us
>>>> > > > > > >     b. VIDIOC_QBUF : 92us
>>>> > > > > > >     c. MEDIA_REQUEST_IOC_QUEUE: 386us
>>>> > > > > > >
>>>> > > > > > > Kindly share your feedback/comments on the design/call sequence.
>>>> > > > > > > Also as we experimented and enabled the metadata on capture plane as
>>>> > > > > > > well, please comment if any issue in
>>>> > > > > > > allowing the metadata exchange on capture plane as well.
>>>> > > > > > >
>>>> > > > > > > Reference for client side implementation can be found at [1].
>>>> > > > > > >
>>>> > > > > > > Thanks,
>>>> > > > > > > Dikshita
>>>> > > > > > >
>>>> > > > > > > [1]
>>>> > > > > > > https://git.linaro.org/people/stanimir.varbanov/v4l2-encode.git/log/?h=dikshita/request-api
>>>> > > > > > >
>>>> > > > > > > Dikshita Agarwal (3):
>>>> > > > > > >   Register for media device
>>>> > > > > > >     - Initialize and register for media device
>>>> > > > > > >     - define venus_m2m_media_ops
>>>> > > > > > >     - Implement APIs to register/unregister media controller.
>>>> > > > > > >   Enable Request API for output buffers
>>>> > > > > > >     - Add dependency on MEDIA_CONTROLLER_REQUEST_API in Kconfig.
>>>> > > > > > >     - Initialize vb2 ops buf_out_validate and buf_request_complete.
>>>> > > > > > >     - Add support for custom Metadata control
>>>> > > > > > > V4L2_CID_MPEG_VIDEO_VENUS_METADATA
>>>> > > > > > >     - Implemeted/Integrated APIs for Request setup/complete.
>>>> > > > > > >   Enable Request API for Capture Buffers
>>>> > > > > > >
>>>> > > > > > > drivers/media/common/videobuf2/videobuf2-v4l2.c |   4 +-
>>>> > > > > > > drivers/media/platform/Kconfig                  |   2 +-
>>>> > > > > > > drivers/media/platform/qcom/venus/core.h        | 36 ++++
>>>> > > > > > > drivers/media/platform/qcom/venus/helpers.c     | 247
>>>> > > > > > > +++++++++++++++++++++++-
>>>> > > > > > > drivers/media/platform/qcom/venus/helpers.h     | 15 ++
>>>> > > > > > > drivers/media/platform/qcom/venus/venc.c        | 63 +++++-
>>>> > > > > > > drivers/media/platform/qcom/venus/venc_ctrls.c | 61 +++++-
>>>> > > > > > > drivers/media/v4l2-core/v4l2-ctrls.c            | 10 +
>>>> > > > > > > drivers/media/v4l2-core/v4l2-mem2mem.c          | 17 +-
>>>> > > > > > > include/media/v4l2-ctrls.h                      |   1 +
>>>> > > > > > > include/media/venus-ctrls.h                     | 22 +++
>>>> > > > > > > 11 files changed, 465 insertions(+), 13 deletions(-)
>>>> > > > > > > create mode 100644 include/media/venus-ctrls.h
>>>> > > > > > >

2020-09-14 10:35:49