2022-06-16 20:28:18

by Jernej Škrabec

[permalink] [raw]
Subject: [PATCH v2 3/7] media: hantro: postproc: Fix buffer size calculation

When allocating aux buffers for postprocessing, it's assumed that base
buffer size is the same as that of output. Coincidentally, that's true
most of the time, but not always. 10-bit source also needs aux buffer
size which is appropriate for 10-bit native format, even if the output
format is 8-bit. Similarly, mv sizes and other extra buffer size also
depends on source width/height, not destination.

Reviewed-by: Ezequiel Garcia <[email protected]>
Signed-off-by: Jernej Skrabec <[email protected]>
---
.../staging/media/hantro/hantro_postproc.c | 24 +++++++++++++------
drivers/staging/media/hantro/hantro_v4l2.c | 2 +-
drivers/staging/media/hantro/hantro_v4l2.h | 2 ++
3 files changed, 20 insertions(+), 8 deletions(-)

diff --git a/drivers/staging/media/hantro/hantro_postproc.c b/drivers/staging/media/hantro/hantro_postproc.c
index ab168c1c0d28..b77cc55e43ea 100644
--- a/drivers/staging/media/hantro/hantro_postproc.c
+++ b/drivers/staging/media/hantro/hantro_postproc.c
@@ -12,6 +12,7 @@
#include "hantro_hw.h"
#include "hantro_g1_regs.h"
#include "hantro_g2_regs.h"
+#include "hantro_v4l2.h"

#define HANTRO_PP_REG_WRITE(vpu, reg_name, val) \
{ \
@@ -174,18 +175,27 @@ int hantro_postproc_alloc(struct hantro_ctx *ctx)
struct v4l2_m2m_ctx *m2m_ctx = ctx->fh.m2m_ctx;
struct vb2_queue *cap_queue = &m2m_ctx->cap_q_ctx.q;
unsigned int num_buffers = cap_queue->num_buffers;
+ struct v4l2_pix_format_mplane pix_mp;
+ const struct hantro_fmt *fmt;
unsigned int i, buf_size;

- buf_size = ctx->dst_fmt.plane_fmt[0].sizeimage;
+ /* this should always pick native format */
+ fmt = hantro_get_default_fmt(ctx, false);
+ if (!fmt)
+ return -EINVAL;
+ v4l2_fill_pixfmt_mp(&pix_mp, fmt->fourcc, ctx->src_fmt.width,
+ ctx->src_fmt.height);
+
+ buf_size = pix_mp.plane_fmt[0].sizeimage;
if (ctx->vpu_src_fmt->fourcc == V4L2_PIX_FMT_H264_SLICE)
- buf_size += hantro_h264_mv_size(ctx->dst_fmt.width,
- ctx->dst_fmt.height);
+ buf_size += hantro_h264_mv_size(pix_mp.width,
+ pix_mp.height);
else if (ctx->vpu_src_fmt->fourcc == V4L2_PIX_FMT_VP9_FRAME)
- buf_size += hantro_vp9_mv_size(ctx->dst_fmt.width,
- ctx->dst_fmt.height);
+ buf_size += hantro_vp9_mv_size(pix_mp.width,
+ pix_mp.height);
else if (ctx->vpu_src_fmt->fourcc == V4L2_PIX_FMT_HEVC_SLICE)
- buf_size += hantro_hevc_mv_size(ctx->dst_fmt.width,
- ctx->dst_fmt.height);
+ buf_size += hantro_hevc_mv_size(pix_mp.width,
+ pix_mp.height);

for (i = 0; i < num_buffers; ++i) {
struct hantro_aux_buf *priv = &ctx->postproc.dec_q[i];
diff --git a/drivers/staging/media/hantro/hantro_v4l2.c b/drivers/staging/media/hantro/hantro_v4l2.c
index 334f18a4120d..2c7a805289e7 100644
--- a/drivers/staging/media/hantro/hantro_v4l2.c
+++ b/drivers/staging/media/hantro/hantro_v4l2.c
@@ -118,7 +118,7 @@ hantro_find_format(const struct hantro_ctx *ctx, u32 fourcc)
return NULL;
}

-static const struct hantro_fmt *
+const struct hantro_fmt *
hantro_get_default_fmt(const struct hantro_ctx *ctx, bool bitstream)
{
const struct hantro_fmt *formats;
diff --git a/drivers/staging/media/hantro/hantro_v4l2.h b/drivers/staging/media/hantro/hantro_v4l2.h
index b17e84c82582..64f6f57e9d7a 100644
--- a/drivers/staging/media/hantro/hantro_v4l2.h
+++ b/drivers/staging/media/hantro/hantro_v4l2.h
@@ -23,5 +23,7 @@ extern const struct vb2_ops hantro_queue_ops;

void hantro_reset_fmts(struct hantro_ctx *ctx);
int hantro_get_format_depth(u32 fourcc);
+const struct hantro_fmt *
+hantro_get_default_fmt(const struct hantro_ctx *ctx, bool bitstream);

#endif /* HANTRO_V4L2_H_ */
--
2.36.1


2022-06-28 16:30:29

by Ezequiel Garcia

[permalink] [raw]
Subject: Re: [PATCH v2 3/7] media: hantro: postproc: Fix buffer size calculation

Hi Jernej,

On Thu, Jun 16, 2022 at 10:25:09PM +0200, Jernej Skrabec wrote:
> When allocating aux buffers for postprocessing, it's assumed that base
> buffer size is the same as that of output. Coincidentally, that's true
> most of the time, but not always. 10-bit source also needs aux buffer
> size which is appropriate for 10-bit native format, even if the output
> format is 8-bit. Similarly, mv sizes and other extra buffer size also
> depends on source width/height, not destination.
>
> Reviewed-by: Ezequiel Garcia <[email protected]>

I took a new look at this patch.

> Signed-off-by: Jernej Skrabec <[email protected]>
> ---
> .../staging/media/hantro/hantro_postproc.c | 24 +++++++++++++------
> drivers/staging/media/hantro/hantro_v4l2.c | 2 +-
> drivers/staging/media/hantro/hantro_v4l2.h | 2 ++
> 3 files changed, 20 insertions(+), 8 deletions(-)
>
> diff --git a/drivers/staging/media/hantro/hantro_postproc.c b/drivers/staging/media/hantro/hantro_postproc.c
> index ab168c1c0d28..b77cc55e43ea 100644
> --- a/drivers/staging/media/hantro/hantro_postproc.c
> +++ b/drivers/staging/media/hantro/hantro_postproc.c
> @@ -12,6 +12,7 @@
> #include "hantro_hw.h"
> #include "hantro_g1_regs.h"
> #include "hantro_g2_regs.h"
> +#include "hantro_v4l2.h"
>
> #define HANTRO_PP_REG_WRITE(vpu, reg_name, val) \
> { \
> @@ -174,18 +175,27 @@ int hantro_postproc_alloc(struct hantro_ctx *ctx)
> struct v4l2_m2m_ctx *m2m_ctx = ctx->fh.m2m_ctx;
> struct vb2_queue *cap_queue = &m2m_ctx->cap_q_ctx.q;
> unsigned int num_buffers = cap_queue->num_buffers;
> + struct v4l2_pix_format_mplane pix_mp;
> + const struct hantro_fmt *fmt;
> unsigned int i, buf_size;
>
> - buf_size = ctx->dst_fmt.plane_fmt[0].sizeimage;
> + /* this should always pick native format */
> + fmt = hantro_get_default_fmt(ctx, false);

Clearly this is correct.

When the driver enables the post-processor it decodes a coded format (H264, etc.)
to a native format (NV12_4L4 or P010_4L4) and feeds this into the postprocessor
engine to produce some other format (YUYV, NV12, etc.).

The buffers allocated here should be taken from the native format,
so it's correct to use hantro_get_default_fmt().

> + if (!fmt)
> + return -EINVAL;
> + v4l2_fill_pixfmt_mp(&pix_mp, fmt->fourcc, ctx->src_fmt.width,
> + ctx->src_fmt.height);

The issue comes at this point, where we negotiate the buffer size based on the
source size (OUTPUT queue size), instead of negotiating based
on the Native size.

Coded -> [ Decoder ] -> Native -> [ Post-processor ] -> Decoded

So, while the patch is surely improving things, I wonder if it won't
cause other issues.

This reminds me we are still lacking a more complete test-suite for this
driver, so that we can validate changes and ensure there are no
regressions.

Perhaps we could hack Fluster to not only test the conformance,
but also test the post-processor?

Thanks,
Ezequiel


> +
> + buf_size = pix_mp.plane_fmt[0].sizeimage;
> if (ctx->vpu_src_fmt->fourcc == V4L2_PIX_FMT_H264_SLICE)
> - buf_size += hantro_h264_mv_size(ctx->dst_fmt.width,
> - ctx->dst_fmt.height);
> + buf_size += hantro_h264_mv_size(pix_mp.width,
> + pix_mp.height);
> else if (ctx->vpu_src_fmt->fourcc == V4L2_PIX_FMT_VP9_FRAME)
> - buf_size += hantro_vp9_mv_size(ctx->dst_fmt.width,
> - ctx->dst_fmt.height);
> + buf_size += hantro_vp9_mv_size(pix_mp.width,
> + pix_mp.height);
> else if (ctx->vpu_src_fmt->fourcc == V4L2_PIX_FMT_HEVC_SLICE)
> - buf_size += hantro_hevc_mv_size(ctx->dst_fmt.width,
> - ctx->dst_fmt.height);
> + buf_size += hantro_hevc_mv_size(pix_mp.width,
> + pix_mp.height);
>
> for (i = 0; i < num_buffers; ++i) {
> struct hantro_aux_buf *priv = &ctx->postproc.dec_q[i];
> diff --git a/drivers/staging/media/hantro/hantro_v4l2.c b/drivers/staging/media/hantro/hantro_v4l2.c
> index 334f18a4120d..2c7a805289e7 100644
> --- a/drivers/staging/media/hantro/hantro_v4l2.c
> +++ b/drivers/staging/media/hantro/hantro_v4l2.c
> @@ -118,7 +118,7 @@ hantro_find_format(const struct hantro_ctx *ctx, u32 fourcc)
> return NULL;
> }
>
> -static const struct hantro_fmt *
> +const struct hantro_fmt *
> hantro_get_default_fmt(const struct hantro_ctx *ctx, bool bitstream)
> {
> const struct hantro_fmt *formats;
> diff --git a/drivers/staging/media/hantro/hantro_v4l2.h b/drivers/staging/media/hantro/hantro_v4l2.h
> index b17e84c82582..64f6f57e9d7a 100644
> --- a/drivers/staging/media/hantro/hantro_v4l2.h
> +++ b/drivers/staging/media/hantro/hantro_v4l2.h
> @@ -23,5 +23,7 @@ extern const struct vb2_ops hantro_queue_ops;
>
> void hantro_reset_fmts(struct hantro_ctx *ctx);
> int hantro_get_format_depth(u32 fourcc);
> +const struct hantro_fmt *
> +hantro_get_default_fmt(const struct hantro_ctx *ctx, bool bitstream);
>
> #endif /* HANTRO_V4L2_H_ */
> --
> 2.36.1
>

2022-06-28 16:32:26

by Jernej Škrabec

[permalink] [raw]
Subject: Re: [PATCH v2 3/7] media: hantro: postproc: Fix buffer size calculation

Dne torek, 28. junij 2022 ob 17:54:19 CEST je Ezequiel Garcia napisal(a):
> Hi Jernej,
>
> On Thu, Jun 16, 2022 at 10:25:09PM +0200, Jernej Skrabec wrote:
> > When allocating aux buffers for postprocessing, it's assumed that base
> > buffer size is the same as that of output. Coincidentally, that's true
> > most of the time, but not always. 10-bit source also needs aux buffer
> > size which is appropriate for 10-bit native format, even if the output
> > format is 8-bit. Similarly, mv sizes and other extra buffer size also
> > depends on source width/height, not destination.
> >
> > Reviewed-by: Ezequiel Garcia <[email protected]>
>
> I took a new look at this patch.
>
> > Signed-off-by: Jernej Skrabec <[email protected]>
> > ---
> >
> > .../staging/media/hantro/hantro_postproc.c | 24 +++++++++++++------
> > drivers/staging/media/hantro/hantro_v4l2.c | 2 +-
> > drivers/staging/media/hantro/hantro_v4l2.h | 2 ++
> > 3 files changed, 20 insertions(+), 8 deletions(-)
> >
> > diff --git a/drivers/staging/media/hantro/hantro_postproc.c
> > b/drivers/staging/media/hantro/hantro_postproc.c index
> > ab168c1c0d28..b77cc55e43ea 100644
> > --- a/drivers/staging/media/hantro/hantro_postproc.c
> > +++ b/drivers/staging/media/hantro/hantro_postproc.c
> > @@ -12,6 +12,7 @@
> >
> > #include "hantro_hw.h"
> > #include "hantro_g1_regs.h"
> > #include "hantro_g2_regs.h"
> >
> > +#include "hantro_v4l2.h"
> >
> > #define HANTRO_PP_REG_WRITE(vpu, reg_name, val) \
> > { \
> >
> > @@ -174,18 +175,27 @@ int hantro_postproc_alloc(struct hantro_ctx *ctx)
> >
> > struct v4l2_m2m_ctx *m2m_ctx = ctx->fh.m2m_ctx;
> > struct vb2_queue *cap_queue = &m2m_ctx->cap_q_ctx.q;
> > unsigned int num_buffers = cap_queue->num_buffers;
> >
> > + struct v4l2_pix_format_mplane pix_mp;
> > + const struct hantro_fmt *fmt;
> >
> > unsigned int i, buf_size;
> >
> > - buf_size = ctx->dst_fmt.plane_fmt[0].sizeimage;
> > + /* this should always pick native format */
> > + fmt = hantro_get_default_fmt(ctx, false);
>
> Clearly this is correct.
>
> When the driver enables the post-processor it decodes a coded format (H264,
> etc.) to a native format (NV12_4L4 or P010_4L4) and feeds this into the
> postprocessor engine to produce some other format (YUYV, NV12, etc.).
>
> The buffers allocated here should be taken from the native format,
> so it's correct to use hantro_get_default_fmt().
>
> > + if (!fmt)
> > + return -EINVAL;
> > + v4l2_fill_pixfmt_mp(&pix_mp, fmt->fourcc, ctx->src_fmt.width,
> > + ctx->src_fmt.height);
>
> The issue comes at this point, where we negotiate the buffer size based on
> the source size (OUTPUT queue size), instead of negotiating based
> on the Native size.
>
> Coded -> [ Decoder ] -> Native -> [ Post-processor ] -> Decoded

I'm not sure what is the difference between source and native size? You mean
one coded in controls and one set via output format? IMO they should always be
the same, otherwise it can be considered a bug in userspace application.

Best regards,
Jernej

>
> So, while the patch is surely improving things, I wonder if it won't
> cause other issues.
>
> This reminds me we are still lacking a more complete test-suite for this
> driver, so that we can validate changes and ensure there are no
> regressions.
>
> Perhaps we could hack Fluster to not only test the conformance,
> but also test the post-processor?
>
> Thanks,
> Ezequiel
>
> > +
> > + buf_size = pix_mp.plane_fmt[0].sizeimage;
> >
> > if (ctx->vpu_src_fmt->fourcc == V4L2_PIX_FMT_H264_SLICE)
> >
> > - buf_size += hantro_h264_mv_size(ctx->dst_fmt.width,
> > - ctx-
>dst_fmt.height);
> > + buf_size += hantro_h264_mv_size(pix_mp.width,
> > +
pix_mp.height);
> >
> > else if (ctx->vpu_src_fmt->fourcc == V4L2_PIX_FMT_VP9_FRAME)
> >
> > - buf_size += hantro_vp9_mv_size(ctx->dst_fmt.width,
> > - ctx-
>dst_fmt.height);
> > + buf_size += hantro_vp9_mv_size(pix_mp.width,
> > + pix_mp.height);
> >
> > else if (ctx->vpu_src_fmt->fourcc == V4L2_PIX_FMT_HEVC_SLICE)
> >
> > - buf_size += hantro_hevc_mv_size(ctx->dst_fmt.width,
> > - ctx-
>dst_fmt.height);
> > + buf_size += hantro_hevc_mv_size(pix_mp.width,
> > +
pix_mp.height);
> >
> > for (i = 0; i < num_buffers; ++i) {
> >
> > struct hantro_aux_buf *priv = &ctx->postproc.dec_q[i];
> >
> > diff --git a/drivers/staging/media/hantro/hantro_v4l2.c
> > b/drivers/staging/media/hantro/hantro_v4l2.c index
> > 334f18a4120d..2c7a805289e7 100644
> > --- a/drivers/staging/media/hantro/hantro_v4l2.c
> > +++ b/drivers/staging/media/hantro/hantro_v4l2.c
> > @@ -118,7 +118,7 @@ hantro_find_format(const struct hantro_ctx *ctx, u32
> > fourcc)>
> > return NULL;
> >
> > }
> >
> > -static const struct hantro_fmt *
> > +const struct hantro_fmt *
> >
> > hantro_get_default_fmt(const struct hantro_ctx *ctx, bool bitstream)
> > {
> >
> > const struct hantro_fmt *formats;
> >
> > diff --git a/drivers/staging/media/hantro/hantro_v4l2.h
> > b/drivers/staging/media/hantro/hantro_v4l2.h index
> > b17e84c82582..64f6f57e9d7a 100644
> > --- a/drivers/staging/media/hantro/hantro_v4l2.h
> > +++ b/drivers/staging/media/hantro/hantro_v4l2.h
> > @@ -23,5 +23,7 @@ extern const struct vb2_ops hantro_queue_ops;
> >
> > void hantro_reset_fmts(struct hantro_ctx *ctx);
> > int hantro_get_format_depth(u32 fourcc);
> >
> > +const struct hantro_fmt *
> > +hantro_get_default_fmt(const struct hantro_ctx *ctx, bool bitstream);
> >
> > #endif /* HANTRO_V4L2_H_ */




2022-06-28 21:33:52

by Nicolas Dufresne

[permalink] [raw]
Subject: Re: [PATCH v2 3/7] media: hantro: postproc: Fix buffer size calculation

Le mardi 28 juin 2022 à 18:13 +0200, Jernej Škrabec a écrit :
> Dne torek, 28. junij 2022 ob 17:54:19 CEST je Ezequiel Garcia napisal(a):
> > Hi Jernej,
> >
> > On Thu, Jun 16, 2022 at 10:25:09PM +0200, Jernej Skrabec wrote:
> > > When allocating aux buffers for postprocessing, it's assumed that base
> > > buffer size is the same as that of output. Coincidentally, that's true
> > > most of the time, but not always. 10-bit source also needs aux buffer
> > > size which is appropriate for 10-bit native format, even if the output
> > > format is 8-bit. Similarly, mv sizes and other extra buffer size also
> > > depends on source width/height, not destination.
> > >
> > > Reviewed-by: Ezequiel Garcia <[email protected]>
> >
> > I took a new look at this patch.
> >
> > > Signed-off-by: Jernej Skrabec <[email protected]>
> > > ---
> > >
> > > .../staging/media/hantro/hantro_postproc.c | 24 +++++++++++++------
> > > drivers/staging/media/hantro/hantro_v4l2.c | 2 +-
> > > drivers/staging/media/hantro/hantro_v4l2.h | 2 ++
> > > 3 files changed, 20 insertions(+), 8 deletions(-)
> > >
> > > diff --git a/drivers/staging/media/hantro/hantro_postproc.c
> > > b/drivers/staging/media/hantro/hantro_postproc.c index
> > > ab168c1c0d28..b77cc55e43ea 100644
> > > --- a/drivers/staging/media/hantro/hantro_postproc.c
> > > +++ b/drivers/staging/media/hantro/hantro_postproc.c
> > > @@ -12,6 +12,7 @@
> > >
> > > #include "hantro_hw.h"
> > > #include "hantro_g1_regs.h"
> > > #include "hantro_g2_regs.h"
> > >
> > > +#include "hantro_v4l2.h"
> > >
> > > #define HANTRO_PP_REG_WRITE(vpu, reg_name, val) \
> > > { \
> > >
> > > @@ -174,18 +175,27 @@ int hantro_postproc_alloc(struct hantro_ctx *ctx)
> > >
> > > struct v4l2_m2m_ctx *m2m_ctx = ctx->fh.m2m_ctx;
> > > struct vb2_queue *cap_queue = &m2m_ctx->cap_q_ctx.q;
> > > unsigned int num_buffers = cap_queue->num_buffers;
> > >
> > > + struct v4l2_pix_format_mplane pix_mp;
> > > + const struct hantro_fmt *fmt;
> > >
> > > unsigned int i, buf_size;
> > >
> > > - buf_size = ctx->dst_fmt.plane_fmt[0].sizeimage;
> > > + /* this should always pick native format */
> > > + fmt = hantro_get_default_fmt(ctx, false);
> >
> > Clearly this is correct.
> >
> > When the driver enables the post-processor it decodes a coded format (H264,
> > etc.) to a native format (NV12_4L4 or P010_4L4) and feeds this into the
> > postprocessor engine to produce some other format (YUYV, NV12, etc.).
> >
> > The buffers allocated here should be taken from the native format,
> > so it's correct to use hantro_get_default_fmt().
> >
> > > + if (!fmt)
> > > + return -EINVAL;
> > > + v4l2_fill_pixfmt_mp(&pix_mp, fmt->fourcc, ctx->src_fmt.width,
> > > + ctx->src_fmt.height);
> >
> > The issue comes at this point, where we negotiate the buffer size based on
> > the source size (OUTPUT queue size), instead of negotiating based
> > on the Native size.
> >
> > Coded -> [ Decoder ] -> Native -> [ Post-processor ] -> Decoded
>
> I'm not sure what is the difference between source and native size? You mean
> one coded in controls and one set via output format? IMO they should always be
> the same, otherwise it can be considered a bug in userspace application.

Indeed the src_fmt should use coded width/height (as per spec). The driver will
then adapt to its own requirement resulting into the "native" width height being
returned. Notice that s_ctrl() should fail in case of miss-match (this is CODEC
specific), or streamon() should fail if the codec specific control have never
been set (as we always initialise this, it will fail due to default being an
invalid value anyway).

As a side effect, when userland read the default format (G_FMT(CAPTURE), the
width/height should match the src_dst for this driver. This is the native size.
The optional path that this driver enables is enumeration of CAPTURE format and
frame sizes, combined with to select from these. The driver will create a
secondary set of buffers in the case.

Nicolas

>
> Best regards,
> Jernej
>
> >
> > So, while the patch is surely improving things, I wonder if it won't
> > cause other issues.
> >
> > This reminds me we are still lacking a more complete test-suite for this
> > driver, so that we can validate changes and ensure there are no
> > regressions.
> >
> > Perhaps we could hack Fluster to not only test the conformance,
> > but also test the post-processor?
> >
> > Thanks,
> > Ezequiel
> >
> > > +
> > > + buf_size = pix_mp.plane_fmt[0].sizeimage;
> > >
> > > if (ctx->vpu_src_fmt->fourcc == V4L2_PIX_FMT_H264_SLICE)
> > >
> > > - buf_size += hantro_h264_mv_size(ctx->dst_fmt.width,
> > > - ctx-
> > dst_fmt.height);
> > > + buf_size += hantro_h264_mv_size(pix_mp.width,
> > > +
> pix_mp.height);
> > >
> > > else if (ctx->vpu_src_fmt->fourcc == V4L2_PIX_FMT_VP9_FRAME)
> > >
> > > - buf_size += hantro_vp9_mv_size(ctx->dst_fmt.width,
> > > - ctx-
> > dst_fmt.height);
> > > + buf_size += hantro_vp9_mv_size(pix_mp.width,
> > > + pix_mp.height);
> > >
> > > else if (ctx->vpu_src_fmt->fourcc == V4L2_PIX_FMT_HEVC_SLICE)
> > >
> > > - buf_size += hantro_hevc_mv_size(ctx->dst_fmt.width,
> > > - ctx-
> > dst_fmt.height);
> > > + buf_size += hantro_hevc_mv_size(pix_mp.width,
> > > +
> pix_mp.height);
> > >
> > > for (i = 0; i < num_buffers; ++i) {
> > >
> > > struct hantro_aux_buf *priv = &ctx->postproc.dec_q[i];
> > >
> > > diff --git a/drivers/staging/media/hantro/hantro_v4l2.c
> > > b/drivers/staging/media/hantro/hantro_v4l2.c index
> > > 334f18a4120d..2c7a805289e7 100644
> > > --- a/drivers/staging/media/hantro/hantro_v4l2.c
> > > +++ b/drivers/staging/media/hantro/hantro_v4l2.c
> > > @@ -118,7 +118,7 @@ hantro_find_format(const struct hantro_ctx *ctx, u32
> > > fourcc)>
> > > return NULL;
> > >
> > > }
> > >
> > > -static const struct hantro_fmt *
> > > +const struct hantro_fmt *
> > >
> > > hantro_get_default_fmt(const struct hantro_ctx *ctx, bool bitstream)
> > > {
> > >
> > > const struct hantro_fmt *formats;
> > >
> > > diff --git a/drivers/staging/media/hantro/hantro_v4l2.h
> > > b/drivers/staging/media/hantro/hantro_v4l2.h index
> > > b17e84c82582..64f6f57e9d7a 100644
> > > --- a/drivers/staging/media/hantro/hantro_v4l2.h
> > > +++ b/drivers/staging/media/hantro/hantro_v4l2.h
> > > @@ -23,5 +23,7 @@ extern const struct vb2_ops hantro_queue_ops;
> > >
> > > void hantro_reset_fmts(struct hantro_ctx *ctx);
> > > int hantro_get_format_depth(u32 fourcc);
> > >
> > > +const struct hantro_fmt *
> > > +hantro_get_default_fmt(const struct hantro_ctx *ctx, bool bitstream);
> > >
> > > #endif /* HANTRO_V4L2_H_ */
>
>
>
>

2022-06-29 17:36:13

by Ezequiel Garcia

[permalink] [raw]
Subject: Re: [PATCH v2 3/7] media: hantro: postproc: Fix buffer size calculation

Hi Nicolas, Jernej,

On Tue, Jun 28, 2022 at 04:06:13PM -0400, Nicolas Dufresne wrote:
> Le mardi 28 juin 2022 à 18:13 +0200, Jernej Škrabec a écrit :
> > Dne torek, 28. junij 2022 ob 17:54:19 CEST je Ezequiel Garcia napisal(a):
> > > Hi Jernej,
> > >
> > > On Thu, Jun 16, 2022 at 10:25:09PM +0200, Jernej Skrabec wrote:
> > > > When allocating aux buffers for postprocessing, it's assumed that base
> > > > buffer size is the same as that of output. Coincidentally, that's true
> > > > most of the time, but not always. 10-bit source also needs aux buffer
> > > > size which is appropriate for 10-bit native format, even if the output
> > > > format is 8-bit. Similarly, mv sizes and other extra buffer size also
> > > > depends on source width/height, not destination.
> > > >
> > > > Reviewed-by: Ezequiel Garcia <[email protected]>
> > >
> > > I took a new look at this patch.
> > >
> > > > Signed-off-by: Jernej Skrabec <[email protected]>
> > > > ---
> > > >
> > > > .../staging/media/hantro/hantro_postproc.c | 24 +++++++++++++------
> > > > drivers/staging/media/hantro/hantro_v4l2.c | 2 +-
> > > > drivers/staging/media/hantro/hantro_v4l2.h | 2 ++
> > > > 3 files changed, 20 insertions(+), 8 deletions(-)
> > > >
> > > > diff --git a/drivers/staging/media/hantro/hantro_postproc.c
> > > > b/drivers/staging/media/hantro/hantro_postproc.c index
> > > > ab168c1c0d28..b77cc55e43ea 100644
> > > > --- a/drivers/staging/media/hantro/hantro_postproc.c
> > > > +++ b/drivers/staging/media/hantro/hantro_postproc.c
> > > > @@ -12,6 +12,7 @@
> > > >
> > > > #include "hantro_hw.h"
> > > > #include "hantro_g1_regs.h"
> > > > #include "hantro_g2_regs.h"
> > > >
> > > > +#include "hantro_v4l2.h"
> > > >
> > > > #define HANTRO_PP_REG_WRITE(vpu, reg_name, val) \
> > > > { \
> > > >
> > > > @@ -174,18 +175,27 @@ int hantro_postproc_alloc(struct hantro_ctx *ctx)
> > > >
> > > > struct v4l2_m2m_ctx *m2m_ctx = ctx->fh.m2m_ctx;
> > > > struct vb2_queue *cap_queue = &m2m_ctx->cap_q_ctx.q;
> > > > unsigned int num_buffers = cap_queue->num_buffers;
> > > >
> > > > + struct v4l2_pix_format_mplane pix_mp;
> > > > + const struct hantro_fmt *fmt;
> > > >
> > > > unsigned int i, buf_size;
> > > >
> > > > - buf_size = ctx->dst_fmt.plane_fmt[0].sizeimage;
> > > > + /* this should always pick native format */
> > > > + fmt = hantro_get_default_fmt(ctx, false);
> > >
> > > Clearly this is correct.
> > >
> > > When the driver enables the post-processor it decodes a coded format (H264,
> > > etc.) to a native format (NV12_4L4 or P010_4L4) and feeds this into the
> > > postprocessor engine to produce some other format (YUYV, NV12, etc.).
> > >
> > > The buffers allocated here should be taken from the native format,
> > > so it's correct to use hantro_get_default_fmt().
> > >
> > > > + if (!fmt)
> > > > + return -EINVAL;
> > > > + v4l2_fill_pixfmt_mp(&pix_mp, fmt->fourcc, ctx->src_fmt.width,
> > > > + ctx->src_fmt.height);
> > >
> > > The issue comes at this point, where we negotiate the buffer size based on
> > > the source size (OUTPUT queue size), instead of negotiating based
> > > on the Native size.
> > >
> > > Coded -> [ Decoder ] -> Native -> [ Post-processor ] -> Decoded
> >
> > I'm not sure what is the difference between source and native size? You mean
> > one coded in controls and one set via output format? IMO they should always be
> > the same, otherwise it can be considered a bug in userspace application.
>
> Indeed the src_fmt should use coded width/height (as per spec). The driver will
> then adapt to its own requirement resulting into the "native" width height being
> returned. Notice that s_ctrl() should fail in case of miss-match (this is CODEC
> specific), or streamon() should fail if the codec specific control have never
> been set (as we always initialise this, it will fail due to default being an
> invalid value anyway).
>
> As a side effect, when userland read the default format (G_FMT(CAPTURE), the
> width/height should match the src_dst for this driver. This is the native size.
> The optional path that this driver enables is enumeration of CAPTURE format and
> frame sizes, combined with to select from these. The driver will create a
> secondary set of buffers in the case.
>

OK, the patch looks good then.

Thanks,
Ezequiel

> Nicolas
>
> >
> > Best regards,
> > Jernej
> >
> > >
> > > So, while the patch is surely improving things, I wonder if it won't
> > > cause other issues.
> > >
> > > This reminds me we are still lacking a more complete test-suite for this
> > > driver, so that we can validate changes and ensure there are no
> > > regressions.
> > >
> > > Perhaps we could hack Fluster to not only test the conformance,
> > > but also test the post-processor?
> > >
> > > Thanks,
> > > Ezequiel
> > >
> > > > +
> > > > + buf_size = pix_mp.plane_fmt[0].sizeimage;
> > > >
> > > > if (ctx->vpu_src_fmt->fourcc == V4L2_PIX_FMT_H264_SLICE)
> > > >
> > > > - buf_size += hantro_h264_mv_size(ctx->dst_fmt.width,
> > > > - ctx-
> > > dst_fmt.height);
> > > > + buf_size += hantro_h264_mv_size(pix_mp.width,
> > > > +
> > pix_mp.height);
> > > >
> > > > else if (ctx->vpu_src_fmt->fourcc == V4L2_PIX_FMT_VP9_FRAME)
> > > >
> > > > - buf_size += hantro_vp9_mv_size(ctx->dst_fmt.width,
> > > > - ctx-
> > > dst_fmt.height);
> > > > + buf_size += hantro_vp9_mv_size(pix_mp.width,
> > > > + pix_mp.height);
> > > >
> > > > else if (ctx->vpu_src_fmt->fourcc == V4L2_PIX_FMT_HEVC_SLICE)
> > > >
> > > > - buf_size += hantro_hevc_mv_size(ctx->dst_fmt.width,
> > > > - ctx-
> > > dst_fmt.height);
> > > > + buf_size += hantro_hevc_mv_size(pix_mp.width,
> > > > +
> > pix_mp.height);
> > > >
> > > > for (i = 0; i < num_buffers; ++i) {
> > > >
> > > > struct hantro_aux_buf *priv = &ctx->postproc.dec_q[i];
> > > >
> > > > diff --git a/drivers/staging/media/hantro/hantro_v4l2.c
> > > > b/drivers/staging/media/hantro/hantro_v4l2.c index
> > > > 334f18a4120d..2c7a805289e7 100644
> > > > --- a/drivers/staging/media/hantro/hantro_v4l2.c
> > > > +++ b/drivers/staging/media/hantro/hantro_v4l2.c
> > > > @@ -118,7 +118,7 @@ hantro_find_format(const struct hantro_ctx *ctx, u32
> > > > fourcc)>
> > > > return NULL;
> > > >
> > > > }
> > > >
> > > > -static const struct hantro_fmt *
> > > > +const struct hantro_fmt *
> > > >
> > > > hantro_get_default_fmt(const struct hantro_ctx *ctx, bool bitstream)
> > > > {
> > > >
> > > > const struct hantro_fmt *formats;
> > > >
> > > > diff --git a/drivers/staging/media/hantro/hantro_v4l2.h
> > > > b/drivers/staging/media/hantro/hantro_v4l2.h index
> > > > b17e84c82582..64f6f57e9d7a 100644
> > > > --- a/drivers/staging/media/hantro/hantro_v4l2.h
> > > > +++ b/drivers/staging/media/hantro/hantro_v4l2.h
> > > > @@ -23,5 +23,7 @@ extern const struct vb2_ops hantro_queue_ops;
> > > >
> > > > void hantro_reset_fmts(struct hantro_ctx *ctx);
> > > > int hantro_get_format_depth(u32 fourcc);
> > > >
> > > > +const struct hantro_fmt *
> > > > +hantro_get_default_fmt(const struct hantro_ctx *ctx, bool bitstream);
> > > >
> > > > #endif /* HANTRO_V4L2_H_ */
> >
> >
> >
> >
>