2022-05-18 04:32:28

by Benjamin Gaignard

[permalink] [raw]
Subject: [PATCH v2] media: hantro: Be more accurate on pixel formats step_width constraints

On Hantro G2 decoder on IMX8MQ strides requirements aren't the same
for NV12_4L4 and NV12 pixel formats. The first one use a 4 bytes padding
while the last one needs 8 bytes.
To be sure to provide the correct stride in all cases we need:
- to relax the constraints on codec formats so set step_width to 4
- use capture queue format and not the output queue format when applying
the pixel format constraints.
- put the correct step_width constraints on each pixel format.

Move HEVC SPS validation in hantro_hevc.c to be able to perform it
when setting sps control and when starting to decode the bitstream.
Add a new test in HEVC SPS validation function to check if resolution
is still matching the hardware constraints.

With this SAODBLK_A_MainConcept_4 and SAODBLK_B_MainConcept_4 conformance
tests files are correctly decoded with both NV12 and NV12_4L4 pixel formats.
These two files have a resolution of 1016x760.
If step_width = 16 for the both pixel formats the selected capture
resolution is 1024x768 which is wrong for NV12_4L4 (which expect 1016x760)
on Hantro G2 on IMX8MQ (but correct for NV12).

For other variants than Hantro G2 on IMX8M keep the same step_width to avoid
regressions.

Fluster HEVC test score is now 128/147 vs 126/147 with the both pixel
formats as decoder output.
Fluster VP9 test score stay at 147/303.

Signed-off-by: Benjamin Gaignard <[email protected]>
---
version 2:
- Add a HEVC SPS validation function to be used when
setting the control and start decoding.
I hope that is what Nicolas expects in his remark on v1.

drivers/staging/media/hantro/hantro_drv.c | 12 +++---
drivers/staging/media/hantro/hantro_hevc.c | 28 +++++++++++++
drivers/staging/media/hantro/hantro_hw.h | 2 +
drivers/staging/media/hantro/hantro_v4l2.c | 2 +-
drivers/staging/media/hantro/imx8m_vpu_hw.c | 40 +++++++++++++++++--
.../staging/media/hantro/rockchip_vpu_hw.c | 32 +++++++++++++++
.../staging/media/hantro/sama5d4_vdec_hw.c | 16 ++++++++
drivers/staging/media/hantro/sunxi_vpu_hw.c | 16 ++++++++
8 files changed, 137 insertions(+), 11 deletions(-)

diff --git a/drivers/staging/media/hantro/hantro_drv.c b/drivers/staging/media/hantro/hantro_drv.c
index 377dcc1d19de..5aac3a090480 100644
--- a/drivers/staging/media/hantro/hantro_drv.c
+++ b/drivers/staging/media/hantro/hantro_drv.c
@@ -253,6 +253,11 @@ queue_init(void *priv, struct vb2_queue *src_vq, struct vb2_queue *dst_vq)

static int hantro_try_ctrl(struct v4l2_ctrl *ctrl)
{
+ struct hantro_ctx *ctx;
+
+ ctx = container_of(ctrl->handler,
+ struct hantro_ctx, ctrl_handler);
+
if (ctrl->id == V4L2_CID_STATELESS_H264_SPS) {
const struct v4l2_ctrl_h264_sps *sps = ctrl->p_new.p_h264_sps;

@@ -268,12 +273,7 @@ static int hantro_try_ctrl(struct v4l2_ctrl *ctrl)
} else if (ctrl->id == V4L2_CID_STATELESS_HEVC_SPS) {
const struct v4l2_ctrl_hevc_sps *sps = ctrl->p_new.p_hevc_sps;

- if (sps->bit_depth_luma_minus8 != sps->bit_depth_chroma_minus8)
- /* Luma and chroma bit depth mismatch */
- return -EINVAL;
- if (sps->bit_depth_luma_minus8 != 0)
- /* Only 8-bit is supported */
- return -EINVAL;
+ return hantro_hevc_validate_sps(ctx, sps);
} else if (ctrl->id == V4L2_CID_STATELESS_VP9_FRAME) {
const struct v4l2_ctrl_vp9_frame *dec_params = ctrl->p_new.p_vp9_frame;

diff --git a/drivers/staging/media/hantro/hantro_hevc.c b/drivers/staging/media/hantro/hantro_hevc.c
index 7fdec50dc853..6abef810b285 100644
--- a/drivers/staging/media/hantro/hantro_hevc.c
+++ b/drivers/staging/media/hantro/hantro_hevc.c
@@ -154,6 +154,30 @@ static int tile_buffer_reallocate(struct hantro_ctx *ctx)
return -ENOMEM;
}

+int hantro_hevc_validate_sps(struct hantro_ctx *ctx, const struct v4l2_ctrl_hevc_sps *sps)
+{
+ if (sps->bit_depth_luma_minus8 != sps->bit_depth_chroma_minus8)
+ /* Luma and chroma bit depth mismatch */
+ return -EINVAL;
+ if (sps->bit_depth_luma_minus8 != 0)
+ /* Only 8-bit is supported */
+ return -EINVAL;
+
+ /* for tile pixel format check if the width and height match
+ * hardware constraints */
+ if (ctx->vpu_dst_fmt->fourcc == V4L2_PIX_FMT_NV12_4L4) {
+ if (ctx->dst_fmt.width !=
+ ALIGN(sps->pic_width_in_luma_samples, ctx->vpu_dst_fmt->frmsize.step_width))
+ return -EINVAL;
+
+ if (ctx->dst_fmt.height !=
+ ALIGN(sps->pic_height_in_luma_samples, ctx->vpu_dst_fmt->frmsize.step_height))
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
int hantro_hevc_dec_prepare_run(struct hantro_ctx *ctx)
{
struct hantro_hevc_dec_hw_ctx *hevc_ctx = &ctx->hevc_dec;
@@ -177,6 +201,10 @@ int hantro_hevc_dec_prepare_run(struct hantro_ctx *ctx)
if (WARN_ON(!ctrls->sps))
return -EINVAL;

+ ret = hantro_hevc_validate_sps(ctx, ctrls->sps);
+ if (ret)
+ return ret;
+
ctrls->pps =
hantro_get_ctrl(ctx, V4L2_CID_STATELESS_HEVC_PPS);
if (WARN_ON(!ctrls->pps))
diff --git a/drivers/staging/media/hantro/hantro_hw.h b/drivers/staging/media/hantro/hantro_hw.h
index 994547fe41b9..0bba6378212d 100644
--- a/drivers/staging/media/hantro/hantro_hw.h
+++ b/drivers/staging/media/hantro/hantro_hw.h
@@ -341,6 +341,8 @@ int hantro_hevc_dec_prepare_run(struct hantro_ctx *ctx);
void hantro_hevc_ref_init(struct hantro_ctx *ctx);
dma_addr_t hantro_hevc_get_ref_buf(struct hantro_ctx *ctx, s32 poc);
int hantro_hevc_add_ref_buf(struct hantro_ctx *ctx, int poc, dma_addr_t addr);
+int hantro_hevc_validate_sps(struct hantro_ctx *ctx, const struct v4l2_ctrl_hevc_sps *sps);
+

static inline unsigned short hantro_vp9_num_sbs(unsigned short dimension)
{
diff --git a/drivers/staging/media/hantro/hantro_v4l2.c b/drivers/staging/media/hantro/hantro_v4l2.c
index 71a6279750bf..93d0dcf69f4a 100644
--- a/drivers/staging/media/hantro/hantro_v4l2.c
+++ b/drivers/staging/media/hantro/hantro_v4l2.c
@@ -260,7 +260,7 @@ static int hantro_try_fmt(const struct hantro_ctx *ctx,
} else if (ctx->is_encoder) {
vpu_fmt = ctx->vpu_dst_fmt;
} else {
- vpu_fmt = ctx->vpu_src_fmt;
+ vpu_fmt = fmt;
/*
* Width/height on the CAPTURE end of a decoder are ignored and
* replaced by the OUTPUT ones.
diff --git a/drivers/staging/media/hantro/imx8m_vpu_hw.c b/drivers/staging/media/hantro/imx8m_vpu_hw.c
index 9802508bade2..b6b2bf65e56d 100644
--- a/drivers/staging/media/hantro/imx8m_vpu_hw.c
+++ b/drivers/staging/media/hantro/imx8m_vpu_hw.c
@@ -83,6 +83,14 @@ static const struct hantro_fmt imx8m_vpu_postproc_fmts[] = {
.fourcc = V4L2_PIX_FMT_YUYV,
.codec_mode = HANTRO_MODE_NONE,
.postprocessed = true,
+ .frmsize = {
+ .min_width = 48,
+ .max_width = 3840,
+ .step_width = MB_DIM,
+ .min_height = 48,
+ .max_height = 2160,
+ .step_height = MB_DIM,
+ },
},
};

@@ -90,6 +98,14 @@ static const struct hantro_fmt imx8m_vpu_dec_fmts[] = {
{
.fourcc = V4L2_PIX_FMT_NV12,
.codec_mode = HANTRO_MODE_NONE,
+ .frmsize = {
+ .min_width = 48,
+ .max_width = 3840,
+ .step_width = MB_DIM,
+ .min_height = 48,
+ .max_height = 2160,
+ .step_height = MB_DIM,
+ },
},
{
.fourcc = V4L2_PIX_FMT_MPEG2_SLICE,
@@ -137,6 +153,14 @@ static const struct hantro_fmt imx8m_vpu_g2_postproc_fmts[] = {
.fourcc = V4L2_PIX_FMT_NV12,
.codec_mode = HANTRO_MODE_NONE,
.postprocessed = true,
+ .frmsize = {
+ .min_width = 48,
+ .max_width = 3840,
+ .step_width = MB_DIM,
+ .min_height = 48,
+ .max_height = 2160,
+ .step_height = MB_DIM,
+ },
},
};

@@ -144,6 +168,14 @@ static const struct hantro_fmt imx8m_vpu_g2_dec_fmts[] = {
{
.fourcc = V4L2_PIX_FMT_NV12_4L4,
.codec_mode = HANTRO_MODE_NONE,
+ .frmsize = {
+ .min_width = 48,
+ .max_width = 3840,
+ .step_width = 4,
+ .min_height = 48,
+ .max_height = 2160,
+ .step_height = 4,
+ },
},
{
.fourcc = V4L2_PIX_FMT_HEVC_SLICE,
@@ -152,10 +184,10 @@ static const struct hantro_fmt imx8m_vpu_g2_dec_fmts[] = {
.frmsize = {
.min_width = 48,
.max_width = 3840,
- .step_width = MB_DIM,
+ .step_width = 4,
.min_height = 48,
.max_height = 2160,
- .step_height = MB_DIM,
+ .step_height = 4,
},
},
{
@@ -165,10 +197,10 @@ static const struct hantro_fmt imx8m_vpu_g2_dec_fmts[] = {
.frmsize = {
.min_width = 48,
.max_width = 3840,
- .step_width = MB_DIM,
+ .step_width = 4,
.min_height = 48,
.max_height = 2160,
- .step_height = MB_DIM,
+ .step_height = 4,
},
},
};
diff --git a/drivers/staging/media/hantro/rockchip_vpu_hw.c b/drivers/staging/media/hantro/rockchip_vpu_hw.c
index fc96501f3bc8..efba7fcdf207 100644
--- a/drivers/staging/media/hantro/rockchip_vpu_hw.c
+++ b/drivers/staging/media/hantro/rockchip_vpu_hw.c
@@ -63,6 +63,14 @@ static const struct hantro_fmt rockchip_vpu1_postproc_fmts[] = {
.fourcc = V4L2_PIX_FMT_YUYV,
.codec_mode = HANTRO_MODE_NONE,
.postprocessed = true,
+ .frmsize = {
+ .min_width = 48,
+ .max_width = 1920,
+ .step_width = MB_DIM,
+ .min_height = 48,
+ .max_height = 1088,
+ .step_height = MB_DIM,
+ },
},
};

@@ -70,6 +78,14 @@ static const struct hantro_fmt rk3066_vpu_dec_fmts[] = {
{
.fourcc = V4L2_PIX_FMT_NV12,
.codec_mode = HANTRO_MODE_NONE,
+ .frmsize = {
+ .min_width = 48,
+ .max_width = 1920,
+ .step_width = MB_DIM,
+ .min_height = 48,
+ .max_height = 1088,
+ .step_height = MB_DIM,
+ },
},
{
.fourcc = V4L2_PIX_FMT_H264_SLICE,
@@ -116,6 +132,14 @@ static const struct hantro_fmt rk3288_vpu_dec_fmts[] = {
{
.fourcc = V4L2_PIX_FMT_NV12,
.codec_mode = HANTRO_MODE_NONE,
+ .frmsize = {
+ .min_width = 48,
+ .max_width = 4096,
+ .step_width = MB_DIM,
+ .min_height = 48,
+ .max_height = 2304,
+ .step_height = MB_DIM,
+ },
},
{
.fourcc = V4L2_PIX_FMT_H264_SLICE,
@@ -162,6 +186,14 @@ static const struct hantro_fmt rk3399_vpu_dec_fmts[] = {
{
.fourcc = V4L2_PIX_FMT_NV12,
.codec_mode = HANTRO_MODE_NONE,
+ .frmsize = {
+ .min_width = 48,
+ .max_width = 1920,
+ .step_width = MB_DIM,
+ .min_height = 48,
+ .max_height = 1088,
+ .step_height = MB_DIM,
+ },
},
{
.fourcc = V4L2_PIX_FMT_H264_SLICE,
diff --git a/drivers/staging/media/hantro/sama5d4_vdec_hw.c b/drivers/staging/media/hantro/sama5d4_vdec_hw.c
index b2fc1c5613e1..07ee804e706b 100644
--- a/drivers/staging/media/hantro/sama5d4_vdec_hw.c
+++ b/drivers/staging/media/hantro/sama5d4_vdec_hw.c
@@ -16,6 +16,14 @@ static const struct hantro_fmt sama5d4_vdec_postproc_fmts[] = {
.fourcc = V4L2_PIX_FMT_YUYV,
.codec_mode = HANTRO_MODE_NONE,
.postprocessed = true,
+ .frmsize = {
+ .min_width = 48,
+ .max_width = 1280,
+ .step_width = MB_DIM,
+ .min_height = 48,
+ .max_height = 720,
+ .step_height = MB_DIM,
+ },
},
};

@@ -23,6 +31,14 @@ static const struct hantro_fmt sama5d4_vdec_fmts[] = {
{
.fourcc = V4L2_PIX_FMT_NV12,
.codec_mode = HANTRO_MODE_NONE,
+ .frmsize = {
+ .min_width = 48,
+ .max_width = 1280,
+ .step_width = MB_DIM,
+ .min_height = 48,
+ .max_height = 720,
+ .step_height = MB_DIM,
+ },
},
{
.fourcc = V4L2_PIX_FMT_MPEG2_SLICE,
diff --git a/drivers/staging/media/hantro/sunxi_vpu_hw.c b/drivers/staging/media/hantro/sunxi_vpu_hw.c
index c0edd5856a0c..c2392c08febb 100644
--- a/drivers/staging/media/hantro/sunxi_vpu_hw.c
+++ b/drivers/staging/media/hantro/sunxi_vpu_hw.c
@@ -14,6 +14,14 @@ static const struct hantro_fmt sunxi_vpu_postproc_fmts[] = {
.fourcc = V4L2_PIX_FMT_NV12,
.codec_mode = HANTRO_MODE_NONE,
.postprocessed = true,
+ .frmsize = {
+ .min_width = 48,
+ .max_width = 3840,
+ .step_width = 32,
+ .min_height = 48,
+ .max_height = 2160,
+ .step_height = 32,
+ },
},
};

@@ -21,6 +29,14 @@ static const struct hantro_fmt sunxi_vpu_dec_fmts[] = {
{
.fourcc = V4L2_PIX_FMT_NV12_4L4,
.codec_mode = HANTRO_MODE_NONE,
+ .frmsize = {
+ .min_width = 48,
+ .max_width = 3840,
+ .step_width = 32,
+ .min_height = 48,
+ .max_height = 2160,
+ .step_height = 32,
+ },
},
{
.fourcc = V4L2_PIX_FMT_VP9_FRAME,
--
2.32.0



2022-05-18 04:35:30

by Nicolas Dufresne

[permalink] [raw]
Subject: Re: [PATCH v2] media: hantro: Be more accurate on pixel formats step_width constraints

Le mardi 17 mai 2022 à 14:26 +0200, Benjamin Gaignard a écrit :
> On Hantro G2 decoder on IMX8MQ strides requirements aren't the same
> for NV12_4L4 and NV12 pixel formats. The first one use a 4 bytes padding
> while the last one needs 8 bytes.
> To be sure to provide the correct stride in all cases we need:
> - to relax the constraints on codec formats so set step_width to 4
> - use capture queue format and not the output queue format when applying
> the pixel format constraints.
> - put the correct step_width constraints on each pixel format.
>
> Move HEVC SPS validation in hantro_hevc.c to be able to perform it
> when setting sps control and when starting to decode the bitstream.
> Add a new test in HEVC SPS validation function to check if resolution
> is still matching the hardware constraints.
>
> With this SAODBLK_A_MainConcept_4 and SAODBLK_B_MainConcept_4 conformance
> tests files are correctly decoded with both NV12 and NV12_4L4 pixel formats.
> These two files have a resolution of 1016x760.
> If step_width = 16 for the both pixel formats the selected capture

Did you mean is instead of = ? Are you missing "and" somewhere in this sentence
?

> resolution is 1024x768 which is wrong for NV12_4L4 (which expect 1016x760)
> on Hantro G2 on IMX8MQ (but correct for NV12).
>
> For other variants than Hantro G2 on IMX8M keep the same step_width to avoid
> regressions.
>
> Fluster HEVC test score is now 128/147 vs 126/147 with the both pixel
> formats as decoder output.
> Fluster VP9 test score stay at 147/303.
>
> Signed-off-by: Benjamin Gaignard <[email protected]>
> ---
> version 2:
> - Add a HEVC SPS validation function to be used when
> setting the control and start decoding.
> I hope that is what Nicolas expects in his remark on v1.
>
> drivers/staging/media/hantro/hantro_drv.c | 12 +++---
> drivers/staging/media/hantro/hantro_hevc.c | 28 +++++++++++++
> drivers/staging/media/hantro/hantro_hw.h | 2 +
> drivers/staging/media/hantro/hantro_v4l2.c | 2 +-
> drivers/staging/media/hantro/imx8m_vpu_hw.c | 40 +++++++++++++++++--
> .../staging/media/hantro/rockchip_vpu_hw.c | 32 +++++++++++++++
> .../staging/media/hantro/sama5d4_vdec_hw.c | 16 ++++++++
> drivers/staging/media/hantro/sunxi_vpu_hw.c | 16 ++++++++
> 8 files changed, 137 insertions(+), 11 deletions(-)
>
> diff --git a/drivers/staging/media/hantro/hantro_drv.c b/drivers/staging/media/hantro/hantro_drv.c
> index 377dcc1d19de..5aac3a090480 100644
> --- a/drivers/staging/media/hantro/hantro_drv.c
> +++ b/drivers/staging/media/hantro/hantro_drv.c
> @@ -253,6 +253,11 @@ queue_init(void *priv, struct vb2_queue *src_vq, struct vb2_queue *dst_vq)
>
> static int hantro_try_ctrl(struct v4l2_ctrl *ctrl)
> {
> + struct hantro_ctx *ctx;
> +
> + ctx = container_of(ctrl->handler,
> + struct hantro_ctx, ctrl_handler);
> +
> if (ctrl->id == V4L2_CID_STATELESS_H264_SPS) {
> const struct v4l2_ctrl_h264_sps *sps = ctrl->p_new.p_h264_sps;
>
> @@ -268,12 +273,7 @@ static int hantro_try_ctrl(struct v4l2_ctrl *ctrl)
> } else if (ctrl->id == V4L2_CID_STATELESS_HEVC_SPS) {
> const struct v4l2_ctrl_hevc_sps *sps = ctrl->p_new.p_hevc_sps;
>
> - if (sps->bit_depth_luma_minus8 != sps->bit_depth_chroma_minus8)
> - /* Luma and chroma bit depth mismatch */
> - return -EINVAL;
> - if (sps->bit_depth_luma_minus8 != 0)
> - /* Only 8-bit is supported */
> - return -EINVAL;
> + return hantro_hevc_validate_sps(ctx, sps);
> } else if (ctrl->id == V4L2_CID_STATELESS_VP9_FRAME) {
> const struct v4l2_ctrl_vp9_frame *dec_params = ctrl->p_new.p_vp9_frame;
>
> diff --git a/drivers/staging/media/hantro/hantro_hevc.c b/drivers/staging/media/hantro/hantro_hevc.c
> index 7fdec50dc853..6abef810b285 100644
> --- a/drivers/staging/media/hantro/hantro_hevc.c
> +++ b/drivers/staging/media/hantro/hantro_hevc.c
> @@ -154,6 +154,30 @@ static int tile_buffer_reallocate(struct hantro_ctx *ctx)
> return -ENOMEM;
> }
>
> +int hantro_hevc_validate_sps(struct hantro_ctx *ctx, const struct v4l2_ctrl_hevc_sps *sps)
> +{
> + if (sps->bit_depth_luma_minus8 != sps->bit_depth_chroma_minus8)
> + /* Luma and chroma bit depth mismatch */
> + return -EINVAL;
> + if (sps->bit_depth_luma_minus8 != 0)
> + /* Only 8-bit is supported */
> + return -EINVAL;
> +
> + /* for tile pixel format check if the width and height match
> + * hardware constraints */
> + if (ctx->vpu_dst_fmt->fourcc == V4L2_PIX_FMT_NV12_4L4) {
> + if (ctx->dst_fmt.width !=
> + ALIGN(sps->pic_width_in_luma_samples, ctx->vpu_dst_fmt->frmsize.step_width))
> + return -EINVAL;
> +
> + if (ctx->dst_fmt.height !=
> + ALIGN(sps->pic_height_in_luma_samples, ctx->vpu_dst_fmt->frmsize.step_height))
> + return -EINVAL;

No action needed, just a question. Can we output tiled out of the PP ? If so,
perhaps we could handle the alignment difference by enabling the PP even if
tiled. That would greatly help to handle compatibility between decoders and
encoders in the future.

> + }
> +
> + return 0;
> +}
> +
> int hantro_hevc_dec_prepare_run(struct hantro_ctx *ctx)
> {
> struct hantro_hevc_dec_hw_ctx *hevc_ctx = &ctx->hevc_dec;
> @@ -177,6 +201,10 @@ int hantro_hevc_dec_prepare_run(struct hantro_ctx *ctx)
> if (WARN_ON(!ctrls->sps))
> return -EINVAL;
>
> + ret = hantro_hevc_validate_sps(ctx, ctrls->sps);
> + if (ret)
> + return ret;
> +
> ctrls->pps =
> hantro_get_ctrl(ctx, V4L2_CID_STATELESS_HEVC_PPS);
> if (WARN_ON(!ctrls->pps))
> diff --git a/drivers/staging/media/hantro/hantro_hw.h b/drivers/staging/media/hantro/hantro_hw.h
> index 994547fe41b9..0bba6378212d 100644
> --- a/drivers/staging/media/hantro/hantro_hw.h
> +++ b/drivers/staging/media/hantro/hantro_hw.h
> @@ -341,6 +341,8 @@ int hantro_hevc_dec_prepare_run(struct hantro_ctx *ctx);
> void hantro_hevc_ref_init(struct hantro_ctx *ctx);
> dma_addr_t hantro_hevc_get_ref_buf(struct hantro_ctx *ctx, s32 poc);
> int hantro_hevc_add_ref_buf(struct hantro_ctx *ctx, int poc, dma_addr_t addr);
> +int hantro_hevc_validate_sps(struct hantro_ctx *ctx, const struct v4l2_ctrl_hevc_sps *sps);
> +
>
> static inline unsigned short hantro_vp9_num_sbs(unsigned short dimension)
> {
> diff --git a/drivers/staging/media/hantro/hantro_v4l2.c b/drivers/staging/media/hantro/hantro_v4l2.c
> index 71a6279750bf..93d0dcf69f4a 100644
> --- a/drivers/staging/media/hantro/hantro_v4l2.c
> +++ b/drivers/staging/media/hantro/hantro_v4l2.c
> @@ -260,7 +260,7 @@ static int hantro_try_fmt(const struct hantro_ctx *ctx,
> } else if (ctx->is_encoder) {
> vpu_fmt = ctx->vpu_dst_fmt;
> } else {
> - vpu_fmt = ctx->vpu_src_fmt;
> + vpu_fmt = fmt;
> /*
> * Width/height on the CAPTURE end of a decoder are ignored and
> * replaced by the OUTPUT ones.
> diff --git a/drivers/staging/media/hantro/imx8m_vpu_hw.c b/drivers/staging/media/hantro/imx8m_vpu_hw.c
> index 9802508bade2..b6b2bf65e56d 100644
> --- a/drivers/staging/media/hantro/imx8m_vpu_hw.c
> +++ b/drivers/staging/media/hantro/imx8m_vpu_hw.c
> @@ -83,6 +83,14 @@ static const struct hantro_fmt imx8m_vpu_postproc_fmts[] = {
> .fourcc = V4L2_PIX_FMT_YUYV,
> .codec_mode = HANTRO_MODE_NONE,
> .postprocessed = true,
> + .frmsize = {
> + .min_width = 48,
> + .max_width = 3840,

I remember seeing limits to 4096 instead in VSI documentation (UWHD). Just like
many hardcoded limits in this driver, perhaps the limits was copied from the
white paper performance example. It would be weird that we can decode portrait
UHD/FHD.

I think for this patch, what I'd like to see is the UDH and FHD limits to be set
as a define. Since if we got them wrong, fixing them later after this patch
becomes a lot more work, as its copied over and over.

Everything else looks good to me know, thanks for the update.

> + .step_width = MB_DIM,
> + .min_height = 48,
> + .max_height = 2160,
> + .step_height = MB_DIM,
> + },
> },
> };
>
> @@ -90,6 +98,14 @@ static const struct hantro_fmt imx8m_vpu_dec_fmts[] = {
> {
> .fourcc = V4L2_PIX_FMT_NV12,
> .codec_mode = HANTRO_MODE_NONE,
> + .frmsize = {
> + .min_width = 48,
> + .max_width = 3840,
> + .step_width = MB_DIM,
> + .min_height = 48,
> + .max_height = 2160,
> + .step_height = MB_DIM,
> + },
> },
> {
> .fourcc = V4L2_PIX_FMT_MPEG2_SLICE,
> @@ -137,6 +153,14 @@ static const struct hantro_fmt imx8m_vpu_g2_postproc_fmts[] = {
> .fourcc = V4L2_PIX_FMT_NV12,
> .codec_mode = HANTRO_MODE_NONE,
> .postprocessed = true,
> + .frmsize = {
> + .min_width = 48,
> + .max_width = 3840,
> + .step_width = MB_DIM,
> + .min_height = 48,
> + .max_height = 2160,
> + .step_height = MB_DIM,
> + },
> },
> };
>
> @@ -144,6 +168,14 @@ static const struct hantro_fmt imx8m_vpu_g2_dec_fmts[] = {
> {
> .fourcc = V4L2_PIX_FMT_NV12_4L4,
> .codec_mode = HANTRO_MODE_NONE,
> + .frmsize = {
> + .min_width = 48,
> + .max_width = 3840,
> + .step_width = 4,
> + .min_height = 48,
> + .max_height = 2160,
> + .step_height = 4,
> + },
> },
> {
> .fourcc = V4L2_PIX_FMT_HEVC_SLICE,
> @@ -152,10 +184,10 @@ static const struct hantro_fmt imx8m_vpu_g2_dec_fmts[] = {
> .frmsize = {
> .min_width = 48,
> .max_width = 3840,
> - .step_width = MB_DIM,
> + .step_width = 4,
> .min_height = 48,
> .max_height = 2160,
> - .step_height = MB_DIM,
> + .step_height = 4,
> },
> },
> {
> @@ -165,10 +197,10 @@ static const struct hantro_fmt imx8m_vpu_g2_dec_fmts[] = {
> .frmsize = {
> .min_width = 48,
> .max_width = 3840,
> - .step_width = MB_DIM,
> + .step_width = 4,
> .min_height = 48,
> .max_height = 2160,
> - .step_height = MB_DIM,
> + .step_height = 4,
> },
> },
> };
> diff --git a/drivers/staging/media/hantro/rockchip_vpu_hw.c b/drivers/staging/media/hantro/rockchip_vpu_hw.c
> index fc96501f3bc8..efba7fcdf207 100644
> --- a/drivers/staging/media/hantro/rockchip_vpu_hw.c
> +++ b/drivers/staging/media/hantro/rockchip_vpu_hw.c
> @@ -63,6 +63,14 @@ static const struct hantro_fmt rockchip_vpu1_postproc_fmts[] = {
> .fourcc = V4L2_PIX_FMT_YUYV,
> .codec_mode = HANTRO_MODE_NONE,
> .postprocessed = true,
> + .frmsize = {
> + .min_width = 48,
> + .max_width = 1920,
> + .step_width = MB_DIM,
> + .min_height = 48,
> + .max_height = 1088,
> + .step_height = MB_DIM,
> + },
> },
> };
>
> @@ -70,6 +78,14 @@ static const struct hantro_fmt rk3066_vpu_dec_fmts[] = {
> {
> .fourcc = V4L2_PIX_FMT_NV12,
> .codec_mode = HANTRO_MODE_NONE,
> + .frmsize = {
> + .min_width = 48,
> + .max_width = 1920,
> + .step_width = MB_DIM,
> + .min_height = 48,
> + .max_height = 1088,
> + .step_height = MB_DIM,
> + },
> },
> {
> .fourcc = V4L2_PIX_FMT_H264_SLICE,
> @@ -116,6 +132,14 @@ static const struct hantro_fmt rk3288_vpu_dec_fmts[] = {
> {
> .fourcc = V4L2_PIX_FMT_NV12,
> .codec_mode = HANTRO_MODE_NONE,
> + .frmsize = {
> + .min_width = 48,
> + .max_width = 4096,
> + .step_width = MB_DIM,
> + .min_height = 48,
> + .max_height = 2304,
> + .step_height = MB_DIM,
> + },
> },
> {
> .fourcc = V4L2_PIX_FMT_H264_SLICE,
> @@ -162,6 +186,14 @@ static const struct hantro_fmt rk3399_vpu_dec_fmts[] = {
> {
> .fourcc = V4L2_PIX_FMT_NV12,
> .codec_mode = HANTRO_MODE_NONE,
> + .frmsize = {
> + .min_width = 48,
> + .max_width = 1920,
> + .step_width = MB_DIM,
> + .min_height = 48,
> + .max_height = 1088,
> + .step_height = MB_DIM,
> + },
> },
> {
> .fourcc = V4L2_PIX_FMT_H264_SLICE,
> diff --git a/drivers/staging/media/hantro/sama5d4_vdec_hw.c b/drivers/staging/media/hantro/sama5d4_vdec_hw.c
> index b2fc1c5613e1..07ee804e706b 100644
> --- a/drivers/staging/media/hantro/sama5d4_vdec_hw.c
> +++ b/drivers/staging/media/hantro/sama5d4_vdec_hw.c
> @@ -16,6 +16,14 @@ static const struct hantro_fmt sama5d4_vdec_postproc_fmts[] = {
> .fourcc = V4L2_PIX_FMT_YUYV,
> .codec_mode = HANTRO_MODE_NONE,
> .postprocessed = true,
> + .frmsize = {
> + .min_width = 48,
> + .max_width = 1280,
> + .step_width = MB_DIM,
> + .min_height = 48,
> + .max_height = 720,
> + .step_height = MB_DIM,
> + },
> },
> };
>
> @@ -23,6 +31,14 @@ static const struct hantro_fmt sama5d4_vdec_fmts[] = {
> {
> .fourcc = V4L2_PIX_FMT_NV12,
> .codec_mode = HANTRO_MODE_NONE,
> + .frmsize = {
> + .min_width = 48,
> + .max_width = 1280,
> + .step_width = MB_DIM,
> + .min_height = 48,
> + .max_height = 720,
> + .step_height = MB_DIM,
> + },
> },
> {
> .fourcc = V4L2_PIX_FMT_MPEG2_SLICE,
> diff --git a/drivers/staging/media/hantro/sunxi_vpu_hw.c b/drivers/staging/media/hantro/sunxi_vpu_hw.c
> index c0edd5856a0c..c2392c08febb 100644
> --- a/drivers/staging/media/hantro/sunxi_vpu_hw.c
> +++ b/drivers/staging/media/hantro/sunxi_vpu_hw.c
> @@ -14,6 +14,14 @@ static const struct hantro_fmt sunxi_vpu_postproc_fmts[] = {
> .fourcc = V4L2_PIX_FMT_NV12,
> .codec_mode = HANTRO_MODE_NONE,
> .postprocessed = true,
> + .frmsize = {
> + .min_width = 48,
> + .max_width = 3840,
> + .step_width = 32,
> + .min_height = 48,
> + .max_height = 2160,
> + .step_height = 32,
> + },
> },
> };
>
> @@ -21,6 +29,14 @@ static const struct hantro_fmt sunxi_vpu_dec_fmts[] = {
> {
> .fourcc = V4L2_PIX_FMT_NV12_4L4,
> .codec_mode = HANTRO_MODE_NONE,
> + .frmsize = {
> + .min_width = 48,
> + .max_width = 3840,
> + .step_width = 32,
> + .min_height = 48,
> + .max_height = 2160,
> + .step_height = 32,
> + },
> },
> {
> .fourcc = V4L2_PIX_FMT_VP9_FRAME,
> --
> 2.32.0
>
>


2022-05-18 06:39:49

by Benjamin Gaignard

[permalink] [raw]
Subject: Re: [PATCH v2] media: hantro: Be more accurate on pixel formats step_width constraints


Le 17/05/2022 à 19:42, Nicolas Dufresne a écrit :
> Le mardi 17 mai 2022 à 14:26 +0200, Benjamin Gaignard a écrit :
>> On Hantro G2 decoder on IMX8MQ strides requirements aren't the same
>> for NV12_4L4 and NV12 pixel formats. The first one use a 4 bytes padding
>> while the last one needs 8 bytes.
>> To be sure to provide the correct stride in all cases we need:
>> - to relax the constraints on codec formats so set step_width to 4
>> - use capture queue format and not the output queue format when applying
>> the pixel format constraints.
>> - put the correct step_width constraints on each pixel format.
>>
>> Move HEVC SPS validation in hantro_hevc.c to be able to perform it
>> when setting sps control and when starting to decode the bitstream.
>> Add a new test in HEVC SPS validation function to check if resolution
>> is still matching the hardware constraints.
>>
>> With this SAODBLK_A_MainConcept_4 and SAODBLK_B_MainConcept_4 conformance
>> tests files are correctly decoded with both NV12 and NV12_4L4 pixel formats.
>> These two files have a resolution of 1016x760.
>> If step_width = 16 for the both pixel formats the selected capture
> Did you mean is instead of = ? Are you missing "and" somewhere in this sentence
> ?

I mean 'equal' if step_width equal 16 the selected resolution is wrong for NV12_4L4.

>> resolution is 1024x768 which is wrong for NV12_4L4 (which expect 1016x760)
>> on Hantro G2 on IMX8MQ (but correct for NV12).
>>
>> For other variants than Hantro G2 on IMX8M keep the same step_width to avoid
>> regressions.
>>
>> Fluster HEVC test score is now 128/147 vs 126/147 with the both pixel
>> formats as decoder output.
>> Fluster VP9 test score stay at 147/303.
>>
>> Signed-off-by: Benjamin Gaignard <[email protected]>
>> ---
>> version 2:
>> - Add a HEVC SPS validation function to be used when
>> setting the control and start decoding.
>> I hope that is what Nicolas expects in his remark on v1.
>>
>> drivers/staging/media/hantro/hantro_drv.c | 12 +++---
>> drivers/staging/media/hantro/hantro_hevc.c | 28 +++++++++++++
>> drivers/staging/media/hantro/hantro_hw.h | 2 +
>> drivers/staging/media/hantro/hantro_v4l2.c | 2 +-
>> drivers/staging/media/hantro/imx8m_vpu_hw.c | 40 +++++++++++++++++--
>> .../staging/media/hantro/rockchip_vpu_hw.c | 32 +++++++++++++++
>> .../staging/media/hantro/sama5d4_vdec_hw.c | 16 ++++++++
>> drivers/staging/media/hantro/sunxi_vpu_hw.c | 16 ++++++++
>> 8 files changed, 137 insertions(+), 11 deletions(-)
>>
>> diff --git a/drivers/staging/media/hantro/hantro_drv.c b/drivers/staging/media/hantro/hantro_drv.c
>> index 377dcc1d19de..5aac3a090480 100644
>> --- a/drivers/staging/media/hantro/hantro_drv.c
>> +++ b/drivers/staging/media/hantro/hantro_drv.c
>> @@ -253,6 +253,11 @@ queue_init(void *priv, struct vb2_queue *src_vq, struct vb2_queue *dst_vq)
>>
>> static int hantro_try_ctrl(struct v4l2_ctrl *ctrl)
>> {
>> + struct hantro_ctx *ctx;
>> +
>> + ctx = container_of(ctrl->handler,
>> + struct hantro_ctx, ctrl_handler);
>> +
>> if (ctrl->id == V4L2_CID_STATELESS_H264_SPS) {
>> const struct v4l2_ctrl_h264_sps *sps = ctrl->p_new.p_h264_sps;
>>
>> @@ -268,12 +273,7 @@ static int hantro_try_ctrl(struct v4l2_ctrl *ctrl)
>> } else if (ctrl->id == V4L2_CID_STATELESS_HEVC_SPS) {
>> const struct v4l2_ctrl_hevc_sps *sps = ctrl->p_new.p_hevc_sps;
>>
>> - if (sps->bit_depth_luma_minus8 != sps->bit_depth_chroma_minus8)
>> - /* Luma and chroma bit depth mismatch */
>> - return -EINVAL;
>> - if (sps->bit_depth_luma_minus8 != 0)
>> - /* Only 8-bit is supported */
>> - return -EINVAL;
>> + return hantro_hevc_validate_sps(ctx, sps);
>> } else if (ctrl->id == V4L2_CID_STATELESS_VP9_FRAME) {
>> const struct v4l2_ctrl_vp9_frame *dec_params = ctrl->p_new.p_vp9_frame;
>>
>> diff --git a/drivers/staging/media/hantro/hantro_hevc.c b/drivers/staging/media/hantro/hantro_hevc.c
>> index 7fdec50dc853..6abef810b285 100644
>> --- a/drivers/staging/media/hantro/hantro_hevc.c
>> +++ b/drivers/staging/media/hantro/hantro_hevc.c
>> @@ -154,6 +154,30 @@ static int tile_buffer_reallocate(struct hantro_ctx *ctx)
>> return -ENOMEM;
>> }
>>
>> +int hantro_hevc_validate_sps(struct hantro_ctx *ctx, const struct v4l2_ctrl_hevc_sps *sps)
>> +{
>> + if (sps->bit_depth_luma_minus8 != sps->bit_depth_chroma_minus8)
>> + /* Luma and chroma bit depth mismatch */
>> + return -EINVAL;
>> + if (sps->bit_depth_luma_minus8 != 0)
>> + /* Only 8-bit is supported */
>> + return -EINVAL;
>> +
>> + /* for tile pixel format check if the width and height match
>> + * hardware constraints */
>> + if (ctx->vpu_dst_fmt->fourcc == V4L2_PIX_FMT_NV12_4L4) {
>> + if (ctx->dst_fmt.width !=
>> + ALIGN(sps->pic_width_in_luma_samples, ctx->vpu_dst_fmt->frmsize.step_width))
>> + return -EINVAL;
>> +
>> + if (ctx->dst_fmt.height !=
>> + ALIGN(sps->pic_height_in_luma_samples, ctx->vpu_dst_fmt->frmsize.step_height))
>> + return -EINVAL;
> No action needed, just a question. Can we output tiled out of the PP ? If so,
> perhaps we could handle the alignment difference by enabling the PP even if
> tiled. That would greatly help to handle compatibility between decoders and
> encoders in the future.

No tiled formats are produced by the decoder, PP produce NV12 format.

>
>> + }
>> +
>> + return 0;
>> +}
>> +
>> int hantro_hevc_dec_prepare_run(struct hantro_ctx *ctx)
>> {
>> struct hantro_hevc_dec_hw_ctx *hevc_ctx = &ctx->hevc_dec;
>> @@ -177,6 +201,10 @@ int hantro_hevc_dec_prepare_run(struct hantro_ctx *ctx)
>> if (WARN_ON(!ctrls->sps))
>> return -EINVAL;
>>
>> + ret = hantro_hevc_validate_sps(ctx, ctrls->sps);
>> + if (ret)
>> + return ret;
>> +
>> ctrls->pps =
>> hantro_get_ctrl(ctx, V4L2_CID_STATELESS_HEVC_PPS);
>> if (WARN_ON(!ctrls->pps))
>> diff --git a/drivers/staging/media/hantro/hantro_hw.h b/drivers/staging/media/hantro/hantro_hw.h
>> index 994547fe41b9..0bba6378212d 100644
>> --- a/drivers/staging/media/hantro/hantro_hw.h
>> +++ b/drivers/staging/media/hantro/hantro_hw.h
>> @@ -341,6 +341,8 @@ int hantro_hevc_dec_prepare_run(struct hantro_ctx *ctx);
>> void hantro_hevc_ref_init(struct hantro_ctx *ctx);
>> dma_addr_t hantro_hevc_get_ref_buf(struct hantro_ctx *ctx, s32 poc);
>> int hantro_hevc_add_ref_buf(struct hantro_ctx *ctx, int poc, dma_addr_t addr);
>> +int hantro_hevc_validate_sps(struct hantro_ctx *ctx, const struct v4l2_ctrl_hevc_sps *sps);
>> +
>>
>> static inline unsigned short hantro_vp9_num_sbs(unsigned short dimension)
>> {
>> diff --git a/drivers/staging/media/hantro/hantro_v4l2.c b/drivers/staging/media/hantro/hantro_v4l2.c
>> index 71a6279750bf..93d0dcf69f4a 100644
>> --- a/drivers/staging/media/hantro/hantro_v4l2.c
>> +++ b/drivers/staging/media/hantro/hantro_v4l2.c
>> @@ -260,7 +260,7 @@ static int hantro_try_fmt(const struct hantro_ctx *ctx,
>> } else if (ctx->is_encoder) {
>> vpu_fmt = ctx->vpu_dst_fmt;
>> } else {
>> - vpu_fmt = ctx->vpu_src_fmt;
>> + vpu_fmt = fmt;
>> /*
>> * Width/height on the CAPTURE end of a decoder are ignored and
>> * replaced by the OUTPUT ones.
>> diff --git a/drivers/staging/media/hantro/imx8m_vpu_hw.c b/drivers/staging/media/hantro/imx8m_vpu_hw.c
>> index 9802508bade2..b6b2bf65e56d 100644
>> --- a/drivers/staging/media/hantro/imx8m_vpu_hw.c
>> +++ b/drivers/staging/media/hantro/imx8m_vpu_hw.c
>> @@ -83,6 +83,14 @@ static const struct hantro_fmt imx8m_vpu_postproc_fmts[] = {
>> .fourcc = V4L2_PIX_FMT_YUYV,
>> .codec_mode = HANTRO_MODE_NONE,
>> .postprocessed = true,
>> + .frmsize = {
>> + .min_width = 48,
>> + .max_width = 3840,
> I remember seeing limits to 4096 instead in VSI documentation (UWHD). Just like
> many hardcoded limits in this driver, perhaps the limits was copied from the
> white paper performance example. It would be weird that we can decode portrait
> UHD/FHD.
>
> I think for this patch, what I'd like to see is the UDH and FHD limits to be set
> as a define. Since if we got them wrong, fixing them later after this patch
> becomes a lot more work, as its copied over and over.
>
> Everything else looks good to me know, thanks for the update.

I will do that in the next version

Benjamin

>
>> + .step_width = MB_DIM,
>> + .min_height = 48,
>> + .max_height = 2160,
>> + .step_height = MB_DIM,
>> + },
>> },
>> };
>>
>> @@ -90,6 +98,14 @@ static const struct hantro_fmt imx8m_vpu_dec_fmts[] = {
>> {
>> .fourcc = V4L2_PIX_FMT_NV12,
>> .codec_mode = HANTRO_MODE_NONE,
>> + .frmsize = {
>> + .min_width = 48,
>> + .max_width = 3840,
>> + .step_width = MB_DIM,
>> + .min_height = 48,
>> + .max_height = 2160,
>> + .step_height = MB_DIM,
>> + },
>> },
>> {
>> .fourcc = V4L2_PIX_FMT_MPEG2_SLICE,
>> @@ -137,6 +153,14 @@ static const struct hantro_fmt imx8m_vpu_g2_postproc_fmts[] = {
>> .fourcc = V4L2_PIX_FMT_NV12,
>> .codec_mode = HANTRO_MODE_NONE,
>> .postprocessed = true,
>> + .frmsize = {
>> + .min_width = 48,
>> + .max_width = 3840,
>> + .step_width = MB_DIM,
>> + .min_height = 48,
>> + .max_height = 2160,
>> + .step_height = MB_DIM,
>> + },
>> },
>> };
>>
>> @@ -144,6 +168,14 @@ static const struct hantro_fmt imx8m_vpu_g2_dec_fmts[] = {
>> {
>> .fourcc = V4L2_PIX_FMT_NV12_4L4,
>> .codec_mode = HANTRO_MODE_NONE,
>> + .frmsize = {
>> + .min_width = 48,
>> + .max_width = 3840,
>> + .step_width = 4,
>> + .min_height = 48,
>> + .max_height = 2160,
>> + .step_height = 4,
>> + },
>> },
>> {
>> .fourcc = V4L2_PIX_FMT_HEVC_SLICE,
>> @@ -152,10 +184,10 @@ static const struct hantro_fmt imx8m_vpu_g2_dec_fmts[] = {
>> .frmsize = {
>> .min_width = 48,
>> .max_width = 3840,
>> - .step_width = MB_DIM,
>> + .step_width = 4,
>> .min_height = 48,
>> .max_height = 2160,
>> - .step_height = MB_DIM,
>> + .step_height = 4,
>> },
>> },
>> {
>> @@ -165,10 +197,10 @@ static const struct hantro_fmt imx8m_vpu_g2_dec_fmts[] = {
>> .frmsize = {
>> .min_width = 48,
>> .max_width = 3840,
>> - .step_width = MB_DIM,
>> + .step_width = 4,
>> .min_height = 48,
>> .max_height = 2160,
>> - .step_height = MB_DIM,
>> + .step_height = 4,
>> },
>> },
>> };
>> diff --git a/drivers/staging/media/hantro/rockchip_vpu_hw.c b/drivers/staging/media/hantro/rockchip_vpu_hw.c
>> index fc96501f3bc8..efba7fcdf207 100644
>> --- a/drivers/staging/media/hantro/rockchip_vpu_hw.c
>> +++ b/drivers/staging/media/hantro/rockchip_vpu_hw.c
>> @@ -63,6 +63,14 @@ static const struct hantro_fmt rockchip_vpu1_postproc_fmts[] = {
>> .fourcc = V4L2_PIX_FMT_YUYV,
>> .codec_mode = HANTRO_MODE_NONE,
>> .postprocessed = true,
>> + .frmsize = {
>> + .min_width = 48,
>> + .max_width = 1920,
>> + .step_width = MB_DIM,
>> + .min_height = 48,
>> + .max_height = 1088,
>> + .step_height = MB_DIM,
>> + },
>> },
>> };
>>
>> @@ -70,6 +78,14 @@ static const struct hantro_fmt rk3066_vpu_dec_fmts[] = {
>> {
>> .fourcc = V4L2_PIX_FMT_NV12,
>> .codec_mode = HANTRO_MODE_NONE,
>> + .frmsize = {
>> + .min_width = 48,
>> + .max_width = 1920,
>> + .step_width = MB_DIM,
>> + .min_height = 48,
>> + .max_height = 1088,
>> + .step_height = MB_DIM,
>> + },
>> },
>> {
>> .fourcc = V4L2_PIX_FMT_H264_SLICE,
>> @@ -116,6 +132,14 @@ static const struct hantro_fmt rk3288_vpu_dec_fmts[] = {
>> {
>> .fourcc = V4L2_PIX_FMT_NV12,
>> .codec_mode = HANTRO_MODE_NONE,
>> + .frmsize = {
>> + .min_width = 48,
>> + .max_width = 4096,
>> + .step_width = MB_DIM,
>> + .min_height = 48,
>> + .max_height = 2304,
>> + .step_height = MB_DIM,
>> + },
>> },
>> {
>> .fourcc = V4L2_PIX_FMT_H264_SLICE,
>> @@ -162,6 +186,14 @@ static const struct hantro_fmt rk3399_vpu_dec_fmts[] = {
>> {
>> .fourcc = V4L2_PIX_FMT_NV12,
>> .codec_mode = HANTRO_MODE_NONE,
>> + .frmsize = {
>> + .min_width = 48,
>> + .max_width = 1920,
>> + .step_width = MB_DIM,
>> + .min_height = 48,
>> + .max_height = 1088,
>> + .step_height = MB_DIM,
>> + },
>> },
>> {
>> .fourcc = V4L2_PIX_FMT_H264_SLICE,
>> diff --git a/drivers/staging/media/hantro/sama5d4_vdec_hw.c b/drivers/staging/media/hantro/sama5d4_vdec_hw.c
>> index b2fc1c5613e1..07ee804e706b 100644
>> --- a/drivers/staging/media/hantro/sama5d4_vdec_hw.c
>> +++ b/drivers/staging/media/hantro/sama5d4_vdec_hw.c
>> @@ -16,6 +16,14 @@ static const struct hantro_fmt sama5d4_vdec_postproc_fmts[] = {
>> .fourcc = V4L2_PIX_FMT_YUYV,
>> .codec_mode = HANTRO_MODE_NONE,
>> .postprocessed = true,
>> + .frmsize = {
>> + .min_width = 48,
>> + .max_width = 1280,
>> + .step_width = MB_DIM,
>> + .min_height = 48,
>> + .max_height = 720,
>> + .step_height = MB_DIM,
>> + },
>> },
>> };
>>
>> @@ -23,6 +31,14 @@ static const struct hantro_fmt sama5d4_vdec_fmts[] = {
>> {
>> .fourcc = V4L2_PIX_FMT_NV12,
>> .codec_mode = HANTRO_MODE_NONE,
>> + .frmsize = {
>> + .min_width = 48,
>> + .max_width = 1280,
>> + .step_width = MB_DIM,
>> + .min_height = 48,
>> + .max_height = 720,
>> + .step_height = MB_DIM,
>> + },
>> },
>> {
>> .fourcc = V4L2_PIX_FMT_MPEG2_SLICE,
>> diff --git a/drivers/staging/media/hantro/sunxi_vpu_hw.c b/drivers/staging/media/hantro/sunxi_vpu_hw.c
>> index c0edd5856a0c..c2392c08febb 100644
>> --- a/drivers/staging/media/hantro/sunxi_vpu_hw.c
>> +++ b/drivers/staging/media/hantro/sunxi_vpu_hw.c
>> @@ -14,6 +14,14 @@ static const struct hantro_fmt sunxi_vpu_postproc_fmts[] = {
>> .fourcc = V4L2_PIX_FMT_NV12,
>> .codec_mode = HANTRO_MODE_NONE,
>> .postprocessed = true,
>> + .frmsize = {
>> + .min_width = 48,
>> + .max_width = 3840,
>> + .step_width = 32,
>> + .min_height = 48,
>> + .max_height = 2160,
>> + .step_height = 32,
>> + },
>> },
>> };
>>
>> @@ -21,6 +29,14 @@ static const struct hantro_fmt sunxi_vpu_dec_fmts[] = {
>> {
>> .fourcc = V4L2_PIX_FMT_NV12_4L4,
>> .codec_mode = HANTRO_MODE_NONE,
>> + .frmsize = {
>> + .min_width = 48,
>> + .max_width = 3840,
>> + .step_width = 32,
>> + .min_height = 48,
>> + .max_height = 2160,
>> + .step_height = 32,
>> + },
>> },
>> {
>> .fourcc = V4L2_PIX_FMT_VP9_FRAME,
>> --
>> 2.32.0
>>
>>

2022-06-23 11:59:41

by Ezequiel Garcia

[permalink] [raw]
Subject: Re: [PATCH v2] media: hantro: Be more accurate on pixel formats step_width constraints

Hi Benjamin,

On Tue, May 17, 2022 at 02:26:29PM +0200, Benjamin Gaignard wrote:
> On Hantro G2 decoder on IMX8MQ strides requirements aren't the same
> for NV12_4L4 and NV12 pixel formats. The first one use a 4 bytes padding
> while the last one needs 8 bytes.
> To be sure to provide the correct stride in all cases we need:
> - to relax the constraints on codec formats so set step_width to 4
> - use capture queue format and not the output queue format when applying
> the pixel format constraints.
> - put the correct step_width constraints on each pixel format.
>
> Move HEVC SPS validation in hantro_hevc.c to be able to perform it
> when setting sps control and when starting to decode the bitstream.
> Add a new test in HEVC SPS validation function to check if resolution
> is still matching the hardware constraints.
>
> With this SAODBLK_A_MainConcept_4 and SAODBLK_B_MainConcept_4 conformance
> tests files are correctly decoded with both NV12 and NV12_4L4 pixel formats.
> These two files have a resolution of 1016x760.
> If step_width = 16 for the both pixel formats the selected capture
> resolution is 1024x768 which is wrong for NV12_4L4 (which expect 1016x760)
> on Hantro G2 on IMX8MQ (but correct for NV12).
>
> For other variants than Hantro G2 on IMX8M keep the same step_width to avoid
> regressions.
>
> Fluster HEVC test score is now 128/147 vs 126/147 with the both pixel
> formats as decoder output.
> Fluster VP9 test score stay at 147/303.
>
> Signed-off-by: Benjamin Gaignard <[email protected]>
> ---
> version 2:
> - Add a HEVC SPS validation function to be used when
> setting the control and start decoding.
> I hope that is what Nicolas expects in his remark on v1.
>
> drivers/staging/media/hantro/hantro_drv.c | 12 +++---
> drivers/staging/media/hantro/hantro_hevc.c | 28 +++++++++++++
> drivers/staging/media/hantro/hantro_hw.h | 2 +
> drivers/staging/media/hantro/hantro_v4l2.c | 2 +-
> drivers/staging/media/hantro/imx8m_vpu_hw.c | 40 +++++++++++++++++--
> .../staging/media/hantro/rockchip_vpu_hw.c | 32 +++++++++++++++
> .../staging/media/hantro/sama5d4_vdec_hw.c | 16 ++++++++
> drivers/staging/media/hantro/sunxi_vpu_hw.c | 16 ++++++++
> 8 files changed, 137 insertions(+), 11 deletions(-)
>
> diff --git a/drivers/staging/media/hantro/hantro_drv.c b/drivers/staging/media/hantro/hantro_drv.c
> index 377dcc1d19de..5aac3a090480 100644
> --- a/drivers/staging/media/hantro/hantro_drv.c
> +++ b/drivers/staging/media/hantro/hantro_drv.c
> @@ -253,6 +253,11 @@ queue_init(void *priv, struct vb2_queue *src_vq, struct vb2_queue *dst_vq)
>
> static int hantro_try_ctrl(struct v4l2_ctrl *ctrl)
> {
> + struct hantro_ctx *ctx;
> +
> + ctx = container_of(ctrl->handler,
> + struct hantro_ctx, ctrl_handler);
> +
> if (ctrl->id == V4L2_CID_STATELESS_H264_SPS) {
> const struct v4l2_ctrl_h264_sps *sps = ctrl->p_new.p_h264_sps;
>
> @@ -268,12 +273,7 @@ static int hantro_try_ctrl(struct v4l2_ctrl *ctrl)
> } else if (ctrl->id == V4L2_CID_STATELESS_HEVC_SPS) {
> const struct v4l2_ctrl_hevc_sps *sps = ctrl->p_new.p_hevc_sps;
>
> - if (sps->bit_depth_luma_minus8 != sps->bit_depth_chroma_minus8)
> - /* Luma and chroma bit depth mismatch */
> - return -EINVAL;
> - if (sps->bit_depth_luma_minus8 != 0)
> - /* Only 8-bit is supported */
> - return -EINVAL;
> + return hantro_hevc_validate_sps(ctx, sps);

This doesn't work. You cannot check the decoded format (vpu_dst_fmt) in .try_ctrl.

See:
https://www.kernel.org/doc/html/latest/userspace-api/media/v4l/dev-stateless-decoder.html.

The CAPTURE format is set (VIDIOC_S_FMT) only after SPS is set (VIDIOC_S_EXT_CTRLS).

> } else if (ctrl->id == V4L2_CID_STATELESS_VP9_FRAME) {
> const struct v4l2_ctrl_vp9_frame *dec_params = ctrl->p_new.p_vp9_frame;
>
> diff --git a/drivers/staging/media/hantro/hantro_hevc.c b/drivers/staging/media/hantro/hantro_hevc.c
> index 7fdec50dc853..6abef810b285 100644
> --- a/drivers/staging/media/hantro/hantro_hevc.c
> +++ b/drivers/staging/media/hantro/hantro_hevc.c
> @@ -154,6 +154,30 @@ static int tile_buffer_reallocate(struct hantro_ctx *ctx)
> return -ENOMEM;
> }
>
> +int hantro_hevc_validate_sps(struct hantro_ctx *ctx, const struct v4l2_ctrl_hevc_sps *sps)
> +{
> + if (sps->bit_depth_luma_minus8 != sps->bit_depth_chroma_minus8)
> + /* Luma and chroma bit depth mismatch */
> + return -EINVAL;
> + if (sps->bit_depth_luma_minus8 != 0)
> + /* Only 8-bit is supported */
> + return -EINVAL;
> +
> + /* for tile pixel format check if the width and height match
> + * hardware constraints */
> + if (ctx->vpu_dst_fmt->fourcc == V4L2_PIX_FMT_NV12_4L4) {
> + if (ctx->dst_fmt.width !=
> + ALIGN(sps->pic_width_in_luma_samples, ctx->vpu_dst_fmt->frmsize.step_width))
> + return -EINVAL;
> +
> + if (ctx->dst_fmt.height !=
> + ALIGN(sps->pic_height_in_luma_samples, ctx->vpu_dst_fmt->frmsize.step_height))
> + return -EINVAL;
> + }
> +
> + return 0;
> +}
> +
> int hantro_hevc_dec_prepare_run(struct hantro_ctx *ctx)
> {
> struct hantro_hevc_dec_hw_ctx *hevc_ctx = &ctx->hevc_dec;
> @@ -177,6 +201,10 @@ int hantro_hevc_dec_prepare_run(struct hantro_ctx *ctx)
> if (WARN_ON(!ctrls->sps))
> return -EINVAL;
>
> + ret = hantro_hevc_validate_sps(ctx, ctrls->sps);
> + if (ret)

This doesn't look correct, validations shouldn't happen here,
since this is the .run callback.

All validations need to happen as a result of TRY_FMT, TRY_CTRLS.

> + return ret;
> +
> ctrls->pps =
> hantro_get_ctrl(ctx, V4L2_CID_STATELESS_HEVC_PPS);
> if (WARN_ON(!ctrls->pps))
> diff --git a/drivers/staging/media/hantro/hantro_hw.h b/drivers/staging/media/hantro/hantro_hw.h
> index 994547fe41b9..0bba6378212d 100644
> --- a/drivers/staging/media/hantro/hantro_hw.h
> +++ b/drivers/staging/media/hantro/hantro_hw.h
> @@ -341,6 +341,8 @@ int hantro_hevc_dec_prepare_run(struct hantro_ctx *ctx);
> void hantro_hevc_ref_init(struct hantro_ctx *ctx);
> dma_addr_t hantro_hevc_get_ref_buf(struct hantro_ctx *ctx, s32 poc);
> int hantro_hevc_add_ref_buf(struct hantro_ctx *ctx, int poc, dma_addr_t addr);
> +int hantro_hevc_validate_sps(struct hantro_ctx *ctx, const struct v4l2_ctrl_hevc_sps *sps);
> +
>
> static inline unsigned short hantro_vp9_num_sbs(unsigned short dimension)
> {
> diff --git a/drivers/staging/media/hantro/hantro_v4l2.c b/drivers/staging/media/hantro/hantro_v4l2.c
> index 71a6279750bf..93d0dcf69f4a 100644
> --- a/drivers/staging/media/hantro/hantro_v4l2.c
> +++ b/drivers/staging/media/hantro/hantro_v4l2.c
> @@ -260,7 +260,7 @@ static int hantro_try_fmt(const struct hantro_ctx *ctx,
> } else if (ctx->is_encoder) {
> vpu_fmt = ctx->vpu_dst_fmt;
> } else {
> - vpu_fmt = ctx->vpu_src_fmt;
> + vpu_fmt = fmt;

Seems this is the core of the fix, right?

Instead of using the OUTPUT queue always, we now use the right queue.
I wonder why we select any queue in the first place.

Thanks,
Ezequiel

> /*
> * Width/height on the CAPTURE end of a decoder are ignored and
> * replaced by the OUTPUT ones.
> diff --git a/drivers/staging/media/hantro/imx8m_vpu_hw.c b/drivers/staging/media/hantro/imx8m_vpu_hw.c
> index 9802508bade2..b6b2bf65e56d 100644
> --- a/drivers/staging/media/hantro/imx8m_vpu_hw.c
> +++ b/drivers/staging/media/hantro/imx8m_vpu_hw.c
> @@ -83,6 +83,14 @@ static const struct hantro_fmt imx8m_vpu_postproc_fmts[] = {
> .fourcc = V4L2_PIX_FMT_YUYV,
> .codec_mode = HANTRO_MODE_NONE,
> .postprocessed = true,
> + .frmsize = {
> + .min_width = 48,
> + .max_width = 3840,
> + .step_width = MB_DIM,
> + .min_height = 48,
> + .max_height = 2160,
> + .step_height = MB_DIM,
> + },
> },
> };
>
> @@ -90,6 +98,14 @@ static const struct hantro_fmt imx8m_vpu_dec_fmts[] = {
> {
> .fourcc = V4L2_PIX_FMT_NV12,
> .codec_mode = HANTRO_MODE_NONE,
> + .frmsize = {
> + .min_width = 48,
> + .max_width = 3840,
> + .step_width = MB_DIM,
> + .min_height = 48,
> + .max_height = 2160,
> + .step_height = MB_DIM,
> + },
> },
> {
> .fourcc = V4L2_PIX_FMT_MPEG2_SLICE,
> @@ -137,6 +153,14 @@ static const struct hantro_fmt imx8m_vpu_g2_postproc_fmts[] = {
> .fourcc = V4L2_PIX_FMT_NV12,
> .codec_mode = HANTRO_MODE_NONE,
> .postprocessed = true,
> + .frmsize = {
> + .min_width = 48,
> + .max_width = 3840,
> + .step_width = MB_DIM,
> + .min_height = 48,
> + .max_height = 2160,
> + .step_height = MB_DIM,
> + },
> },
> };
>
> @@ -144,6 +168,14 @@ static const struct hantro_fmt imx8m_vpu_g2_dec_fmts[] = {
> {
> .fourcc = V4L2_PIX_FMT_NV12_4L4,
> .codec_mode = HANTRO_MODE_NONE,
> + .frmsize = {
> + .min_width = 48,
> + .max_width = 3840,
> + .step_width = 4,
> + .min_height = 48,
> + .max_height = 2160,
> + .step_height = 4,
> + },
> },
> {
> .fourcc = V4L2_PIX_FMT_HEVC_SLICE,
> @@ -152,10 +184,10 @@ static const struct hantro_fmt imx8m_vpu_g2_dec_fmts[] = {
> .frmsize = {
> .min_width = 48,
> .max_width = 3840,
> - .step_width = MB_DIM,
> + .step_width = 4,
> .min_height = 48,
> .max_height = 2160,
> - .step_height = MB_DIM,
> + .step_height = 4,
> },
> },
> {
> @@ -165,10 +197,10 @@ static const struct hantro_fmt imx8m_vpu_g2_dec_fmts[] = {
> .frmsize = {
> .min_width = 48,
> .max_width = 3840,
> - .step_width = MB_DIM,
> + .step_width = 4,
> .min_height = 48,
> .max_height = 2160,
> - .step_height = MB_DIM,
> + .step_height = 4,
> },
> },
> };
> diff --git a/drivers/staging/media/hantro/rockchip_vpu_hw.c b/drivers/staging/media/hantro/rockchip_vpu_hw.c
> index fc96501f3bc8..efba7fcdf207 100644
> --- a/drivers/staging/media/hantro/rockchip_vpu_hw.c
> +++ b/drivers/staging/media/hantro/rockchip_vpu_hw.c
> @@ -63,6 +63,14 @@ static const struct hantro_fmt rockchip_vpu1_postproc_fmts[] = {
> .fourcc = V4L2_PIX_FMT_YUYV,
> .codec_mode = HANTRO_MODE_NONE,
> .postprocessed = true,
> + .frmsize = {
> + .min_width = 48,
> + .max_width = 1920,
> + .step_width = MB_DIM,
> + .min_height = 48,
> + .max_height = 1088,
> + .step_height = MB_DIM,
> + },
> },
> };
>
> @@ -70,6 +78,14 @@ static const struct hantro_fmt rk3066_vpu_dec_fmts[] = {
> {
> .fourcc = V4L2_PIX_FMT_NV12,
> .codec_mode = HANTRO_MODE_NONE,
> + .frmsize = {
> + .min_width = 48,
> + .max_width = 1920,
> + .step_width = MB_DIM,
> + .min_height = 48,
> + .max_height = 1088,
> + .step_height = MB_DIM,
> + },
> },
> {
> .fourcc = V4L2_PIX_FMT_H264_SLICE,
> @@ -116,6 +132,14 @@ static const struct hantro_fmt rk3288_vpu_dec_fmts[] = {
> {
> .fourcc = V4L2_PIX_FMT_NV12,
> .codec_mode = HANTRO_MODE_NONE,
> + .frmsize = {
> + .min_width = 48,
> + .max_width = 4096,
> + .step_width = MB_DIM,
> + .min_height = 48,
> + .max_height = 2304,
> + .step_height = MB_DIM,
> + },
> },
> {
> .fourcc = V4L2_PIX_FMT_H264_SLICE,
> @@ -162,6 +186,14 @@ static const struct hantro_fmt rk3399_vpu_dec_fmts[] = {
> {
> .fourcc = V4L2_PIX_FMT_NV12,
> .codec_mode = HANTRO_MODE_NONE,
> + .frmsize = {
> + .min_width = 48,
> + .max_width = 1920,
> + .step_width = MB_DIM,
> + .min_height = 48,
> + .max_height = 1088,
> + .step_height = MB_DIM,
> + },
> },
> {
> .fourcc = V4L2_PIX_FMT_H264_SLICE,
> diff --git a/drivers/staging/media/hantro/sama5d4_vdec_hw.c b/drivers/staging/media/hantro/sama5d4_vdec_hw.c
> index b2fc1c5613e1..07ee804e706b 100644
> --- a/drivers/staging/media/hantro/sama5d4_vdec_hw.c
> +++ b/drivers/staging/media/hantro/sama5d4_vdec_hw.c
> @@ -16,6 +16,14 @@ static const struct hantro_fmt sama5d4_vdec_postproc_fmts[] = {
> .fourcc = V4L2_PIX_FMT_YUYV,
> .codec_mode = HANTRO_MODE_NONE,
> .postprocessed = true,
> + .frmsize = {
> + .min_width = 48,
> + .max_width = 1280,
> + .step_width = MB_DIM,
> + .min_height = 48,
> + .max_height = 720,
> + .step_height = MB_DIM,
> + },
> },
> };
>
> @@ -23,6 +31,14 @@ static const struct hantro_fmt sama5d4_vdec_fmts[] = {
> {
> .fourcc = V4L2_PIX_FMT_NV12,
> .codec_mode = HANTRO_MODE_NONE,
> + .frmsize = {
> + .min_width = 48,
> + .max_width = 1280,
> + .step_width = MB_DIM,
> + .min_height = 48,
> + .max_height = 720,
> + .step_height = MB_DIM,
> + },
> },
> {
> .fourcc = V4L2_PIX_FMT_MPEG2_SLICE,
> diff --git a/drivers/staging/media/hantro/sunxi_vpu_hw.c b/drivers/staging/media/hantro/sunxi_vpu_hw.c
> index c0edd5856a0c..c2392c08febb 100644
> --- a/drivers/staging/media/hantro/sunxi_vpu_hw.c
> +++ b/drivers/staging/media/hantro/sunxi_vpu_hw.c
> @@ -14,6 +14,14 @@ static const struct hantro_fmt sunxi_vpu_postproc_fmts[] = {
> .fourcc = V4L2_PIX_FMT_NV12,
> .codec_mode = HANTRO_MODE_NONE,
> .postprocessed = true,
> + .frmsize = {
> + .min_width = 48,
> + .max_width = 3840,
> + .step_width = 32,
> + .min_height = 48,
> + .max_height = 2160,
> + .step_height = 32,
> + },
> },
> };
>
> @@ -21,6 +29,14 @@ static const struct hantro_fmt sunxi_vpu_dec_fmts[] = {
> {
> .fourcc = V4L2_PIX_FMT_NV12_4L4,
> .codec_mode = HANTRO_MODE_NONE,
> + .frmsize = {
> + .min_width = 48,
> + .max_width = 3840,
> + .step_width = 32,
> + .min_height = 48,
> + .max_height = 2160,
> + .step_height = 32,
> + },
> },
> {
> .fourcc = V4L2_PIX_FMT_VP9_FRAME,
> --
> 2.32.0
>