This series implement AV1 stateless decoder for RK3588 SoC.
The hardware support 8 and 10 bits bitstreams up to 7680x4320.
AV1 feature like film grain or scaling are done by the postprocessor.
The driver can produce NV12_4L4, NV12_10LE40_4L4, NV12 and P010 pixels formats.
Even if Rockchip have named the hardware VPU981 it looks like a VC9000 but
with a different registers mapping.
It is based on Daniel's "[PATCH v4] media: Add AV1 uAPI" [1] patches.
The full branch can be found here:
https://gitlab.collabora.com/linux/for-upstream/-/commits/rk3588_av1_decoder_v3
Fluster score is: 200/239 while testing AV1-TEST-VECTORS with GStreamer-AV1-V4L2SL-Gst1.0.
The failing tests are:
- the 2 tests with 2 spatial layers: few errors in luma/chroma values
- tests with resolution < hardware limit (64x64)
- 10bits film grain test: bad macroblocks while decoding, the same 8bits
test is working fine.
Changes in v3:
- Fix arrays loops limites.
- Remove unused field.
- Reset raw pixel formats list when bit depth or film grain feature
values change.
- Enable post-processor P010 support
Changes in v2:
- Remove useless +1 in sbs computation.
- Describe NV12_10LE40_4L4 pixels format.
- Post-processor could generate P010.
- Fix comments done on v1.
- The last patch make sure that only post-processed formats are used when film
grain feature is enabled.
Benjamin
[1] https://lore.kernel.org/linux-media/[email protected]/T/#u
Benjamin Gaignard (12):
dt-bindings: media: rockchip-vpu: Add rk3588 vpu compatible
media: Add NV12_10LE40_4L4 pixel format
media: verisilicon: Get bit depth for V4L2_PIX_FMT_NV12_10LE40_4L4
media: verisilicon: Add AV1 decoder mode and controls
media: verisilicon: Save bit depth for AV1 decoder
media: verisilicon: Check AV1 bitstreams bit depth
media: verisilicon: Compute motion vectors size for AV1 frames
media: verisilicon: Add AV1 entropy helpers
media: verisilicon: Add Rockchip AV1 decoder
media: verisilicon: Add film grain feature to AV1 driver
media: verisilicon: Enable AV1 decoder on rk3588
media: verisilicon: Conditionnaly ignore native formats
Nicolas Dufresne (1):
v4l2-common: Add support for fractional bpp
.../bindings/media/rockchip-vpu.yaml | 1 +
.../media/v4l/pixfmt-yuv-planar.rst | 4 +
drivers/media/platform/verisilicon/Makefile | 3 +
drivers/media/platform/verisilicon/hantro.h | 8 +
.../media/platform/verisilicon/hantro_drv.c | 69 +
.../media/platform/verisilicon/hantro_hw.h | 102 +
.../platform/verisilicon/hantro_postproc.c | 7 +
.../media/platform/verisilicon/hantro_v4l2.c | 22 +-
.../media/platform/verisilicon/hantro_v4l2.h | 1 +
.../verisilicon/rockchip_av1_entropymode.c | 4546 +++++++++++++++++
.../verisilicon/rockchip_av1_entropymode.h | 272 +
.../verisilicon/rockchip_av1_filmgrain.c | 401 ++
.../verisilicon/rockchip_av1_filmgrain.h | 36 +
.../verisilicon/rockchip_vpu981_hw_av1_dec.c | 2278 +++++++++
.../verisilicon/rockchip_vpu981_regs.h | 477 ++
.../platform/verisilicon/rockchip_vpu_hw.c | 134 +
drivers/media/v4l2-core/v4l2-common.c | 149 +-
drivers/media/v4l2-core/v4l2-ioctl.c | 1 +
include/media/v4l2-common.h | 2 +
include/uapi/linux/videodev2.h | 1 +
20 files changed, 8439 insertions(+), 75 deletions(-)
create mode 100644 drivers/media/platform/verisilicon/rockchip_av1_entropymode.c
create mode 100644 drivers/media/platform/verisilicon/rockchip_av1_entropymode.h
create mode 100644 drivers/media/platform/verisilicon/rockchip_av1_filmgrain.c
create mode 100644 drivers/media/platform/verisilicon/rockchip_av1_filmgrain.h
create mode 100644 drivers/media/platform/verisilicon/rockchip_vpu981_hw_av1_dec.c
create mode 100644 drivers/media/platform/verisilicon/rockchip_vpu981_regs.h
--
2.34.1
Implement AV1 stateless decoder for rockchip VPU981.
It decode 8 and 10 bits AV1 bitstreams.
AV1 scaling feature is done by the postprocessor.
Signed-off-by: Benjamin Gaignard <[email protected]>
---
v3:
- Fix arrays loops limites.
- Remove unused field.
drivers/media/platform/verisilicon/Makefile | 1 +
.../media/platform/verisilicon/hantro_hw.h | 64 +-
.../verisilicon/rockchip_vpu981_hw_av1_dec.c | 2065 +++++++++++++++++
.../verisilicon/rockchip_vpu981_regs.h | 477 ++++
4 files changed, 2605 insertions(+), 2 deletions(-)
create mode 100644 drivers/media/platform/verisilicon/rockchip_vpu981_hw_av1_dec.c
create mode 100644 drivers/media/platform/verisilicon/rockchip_vpu981_regs.h
diff --git a/drivers/media/platform/verisilicon/Makefile b/drivers/media/platform/verisilicon/Makefile
index d2b2679c00eb..c9a9806ab8c5 100644
--- a/drivers/media/platform/verisilicon/Makefile
+++ b/drivers/media/platform/verisilicon/Makefile
@@ -18,6 +18,7 @@ hantro-vpu-y += \
rockchip_vpu2_hw_h264_dec.o \
rockchip_vpu2_hw_mpeg2_dec.o \
rockchip_vpu2_hw_vp8_dec.o \
+ rockchip_vpu981_hw_av1_dec.o \
rockchip_av1_entropymode.o \
hantro_jpeg.o \
hantro_h264.o \
diff --git a/drivers/media/platform/verisilicon/hantro_hw.h b/drivers/media/platform/verisilicon/hantro_hw.h
index c7438e197d85..1741ef939bf8 100644
--- a/drivers/media/platform/verisilicon/hantro_hw.h
+++ b/drivers/media/platform/verisilicon/hantro_hw.h
@@ -37,6 +37,8 @@
#define NUM_REF_PICTURES (V4L2_HEVC_DPB_ENTRIES_NUM_MAX + 1)
+#define AV1_MAX_FRAME_BUF_COUNT (V4L2_AV1_TOTAL_REFS_PER_FRAME + 1)
+
struct hantro_dev;
struct hantro_ctx;
struct hantro_buf;
@@ -250,23 +252,81 @@ struct hantro_vp9_dec_hw_ctx {
};
/**
- * hantro_av1_dec_hw_ctx
+ * struct hantro_av1_dec_ctrls
+ * @sequence: AV1 Sequence
+ * @tile_group_entry: AV1 Tile Group entry
+ * @frame: AV1 Frame Header OBU
+ * @film_grain: AV1 Film Grain
+ */
+struct hantro_av1_dec_ctrls {
+ const struct v4l2_ctrl_av1_sequence *sequence;
+ const struct v4l2_ctrl_av1_tile_group_entry *tile_group_entry;
+ const struct v4l2_ctrl_av1_frame *frame;
+ const struct v4l2_ctrl_av1_film_grain *film_grain;
+};
+
+struct hantro_av1_frame_ref {
+ int width;
+ int height;
+ int mi_cols;
+ int mi_rows;
+ u64 timestamp;
+ enum v4l2_av1_frame_type frame_type;
+ bool used;
+ u32 order_hint;
+ u32 order_hints[V4L2_AV1_TOTAL_REFS_PER_FRAME];
+ struct vb2_v4l2_buffer *vb2_ref;
+};
+
+/**
+ * struct hantro_av1_dec_hw_ctx
+ * @db_data_col: db tile col data buffer
+ * @db_ctrl_col: db tile col ctrl buffer
+ * @cdef_col: cdef tile col buffer
+ * @sr_col: sr tile col buffer
+ * @lr_col: lr tile col buffer
+ * @global_model: global model buffer
+ * @tile_info: tile info buffer
+ * @segment: segmentation info buffer
+ * @prob_tbl: probability table
+ * @prob_tbl_out: probability table output
+ * @tile_buf: tile buffer
+ * @ctrls: V4L2 controls attached to a run
+ * @frame_refs: reference frames info slots
+ * @ref_frame_sign_bias: array of sign bias
+ * @num_tile_cols_allocated: number of allocated tiles
* @cdfs: current probabilities structure
* @cdfs_ndvc: current mv probabilities structure
* @default_cdfs: default probabilities structure
* @default_cdfs_ndvc: default mv probabilties structure
* @cdfs_last: stored probabilities structures
* @cdfs_last_ndvc: stored mv probabilities structures
+ * @current_frame_index: index of the current in frame_refs array
*/
struct hantro_av1_dec_hw_ctx {
+ struct hantro_aux_buf db_data_col;
+ struct hantro_aux_buf db_ctrl_col;
+ struct hantro_aux_buf cdef_col;
+ struct hantro_aux_buf sr_col;
+ struct hantro_aux_buf lr_col;
+ struct hantro_aux_buf global_model;
+ struct hantro_aux_buf tile_info;
+ struct hantro_aux_buf segment;
+ struct hantro_aux_buf prob_tbl;
+ struct hantro_aux_buf prob_tbl_out;
+ struct hantro_aux_buf tile_buf;
+ struct hantro_av1_dec_ctrls ctrls;
+ struct hantro_av1_frame_ref frame_refs[AV1_MAX_FRAME_BUF_COUNT];
+ uint32_t ref_frame_sign_bias[V4L2_AV1_TOTAL_REFS_PER_FRAME];
+ unsigned int num_tile_cols_allocated;
struct av1cdfs *cdfs;
struct mvcdfs *cdfs_ndvc;
struct av1cdfs default_cdfs;
struct mvcdfs default_cdfs_ndvc;
struct av1cdfs cdfs_last[NUM_REF_FRAMES];
struct mvcdfs cdfs_last_ndvc[NUM_REF_FRAMES];
+ int current_frame_index;
};
-
/**
* struct hantro_postproc_ctx
*
diff --git a/drivers/media/platform/verisilicon/rockchip_vpu981_hw_av1_dec.c b/drivers/media/platform/verisilicon/rockchip_vpu981_hw_av1_dec.c
new file mode 100644
index 000000000000..81aeb1d6b93f
--- /dev/null
+++ b/drivers/media/platform/verisilicon/rockchip_vpu981_hw_av1_dec.c
@@ -0,0 +1,2065 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (c) 2021, Collabora
+ *
+ * Author: Benjamin Gaignard <[email protected]>
+ */
+
+#include <media/v4l2-mem2mem.h>
+#include "hantro.h"
+#include "hantro_v4l2.h"
+#include "rockchip_vpu981_regs.h"
+
+#define AV1_DEC_MODE 17
+#define GM_GLOBAL_MODELS_PER_FRAME 7
+#define GLOBAL_MODEL_TOTAL_SIZE (6 * 4 + 4 * 2)
+#define GLOBAL_MODEL_SIZE ALIGN(GM_GLOBAL_MODELS_PER_FRAME * GLOBAL_MODEL_TOTAL_SIZE, 2048)
+#define AV1_MAX_TILES 128
+#define AV1_TILE_INFO_SIZE (AV1_MAX_TILES * 16)
+#define AV1DEC_MAX_PIC_BUFFERS 24
+#define AV1_REF_SCALE_SHIFT 14
+#define AV1_INVALID_IDX -1
+#define MAX_FRAME_DISTANCE 31
+#define AV1_PRIMARY_REF_NONE 7
+#define AV1_TILE_SIZE ALIGN(32 * 128, 4096)
+/*
+ * These 3 values aren't defined enum v4l2_av1_segment_feature because
+ * they are not part of the specification
+ */
+#define V4L2_AV1_SEG_LVL_ALT_LF_Y_H 2
+#define V4L2_AV1_SEG_LVL_ALT_LF_U 3
+#define V4L2_AV1_SEG_LVL_ALT_LF_V 4
+
+#define SUPERRES_SCALE_BITS 3
+#define SCALE_NUMERATOR 8
+#define SUPERRES_SCALE_DENOMINATOR_MIN (SCALE_NUMERATOR + 1)
+
+#define RS_SUBPEL_BITS 6
+#define RS_SUBPEL_MASK ((1 << RS_SUBPEL_BITS) - 1)
+#define RS_SCALE_SUBPEL_BITS 14
+#define RS_SCALE_SUBPEL_MASK ((1 << RS_SCALE_SUBPEL_BITS) - 1)
+#define RS_SCALE_EXTRA_BITS (RS_SCALE_SUBPEL_BITS - RS_SUBPEL_BITS)
+#define RS_SCALE_EXTRA_OFF (1 << (RS_SCALE_EXTRA_BITS - 1))
+
+#define IS_INTRA(type) ((type == V4L2_AV1_KEY_FRAME) || (type == V4L2_AV1_INTRA_ONLY_FRAME))
+
+#define LST_BUF_IDX (V4L2_AV1_REF_LAST_FRAME - V4L2_AV1_REF_LAST_FRAME)
+#define LST2_BUF_IDX (V4L2_AV1_REF_LAST2_FRAME - V4L2_AV1_REF_LAST_FRAME)
+#define LST3_BUF_IDX (V4L2_AV1_REF_LAST3_FRAME - V4L2_AV1_REF_LAST_FRAME)
+#define GLD_BUF_IDX (V4L2_AV1_REF_GOLDEN_FRAME - V4L2_AV1_REF_LAST_FRAME)
+#define BWD_BUF_IDX (V4L2_AV1_REF_BWDREF_FRAME - V4L2_AV1_REF_LAST_FRAME)
+#define ALT2_BUF_IDX (V4L2_AV1_REF_ALTREF2_FRAME - V4L2_AV1_REF_LAST_FRAME)
+#define ALT_BUF_IDX (V4L2_AV1_REF_ALTREF_FRAME - V4L2_AV1_REF_LAST_FRAME)
+
+#define DIV_LUT_PREC_BITS 14
+#define DIV_LUT_BITS 8
+#define DIV_LUT_NUM BIT(DIV_LUT_BITS)
+#define WARP_PARAM_REDUCE_BITS 6
+#define WARPEDMODEL_PREC_BITS 16
+
+#define AV1_DIV_ROUND_UP_POW2(value, n) \
+({ \
+ typeof(n) _n = n; \
+ typeof(value) _value = value; \
+ (_value + (BIT(_n) >> 1)) >> _n; \
+})
+
+#define AV1_DIV_ROUND_UP_POW2_SIGNED(value, n) \
+({ \
+ typeof(n) _n_ = n; \
+ typeof(value) _value_ = value; \
+ (((_value_) < 0) ? -AV1_DIV_ROUND_UP_POW2(-(_value_), (_n_)) \
+ : AV1_DIV_ROUND_UP_POW2((_value_), (_n_))); \
+})
+
+struct rockchip_av1_film_grain {
+ uint8_t scaling_lut_y[256];
+ uint8_t scaling_lut_cb[256];
+ uint8_t scaling_lut_cr[256];
+ int16_t cropped_luma_grain_block[4096];
+ int16_t cropped_chroma_grain_block[1024 * 2];
+};
+
+static const short div_lut[DIV_LUT_NUM + 1] = {
+ 16384, 16320, 16257, 16194, 16132, 16070, 16009, 15948, 15888, 15828, 15768,
+ 15709, 15650, 15592, 15534, 15477, 15420, 15364, 15308, 15252, 15197, 15142,
+ 15087, 15033, 14980, 14926, 14873, 14821, 14769, 14717, 14665, 14614, 14564,
+ 14513, 14463, 14413, 14364, 14315, 14266, 14218, 14170, 14122, 14075, 14028,
+ 13981, 13935, 13888, 13843, 13797, 13752, 13707, 13662, 13618, 13574, 13530,
+ 13487, 13443, 13400, 13358, 13315, 13273, 13231, 13190, 13148, 13107, 13066,
+ 13026, 12985, 12945, 12906, 12866, 12827, 12788, 12749, 12710, 12672, 12633,
+ 12596, 12558, 12520, 12483, 12446, 12409, 12373, 12336, 12300, 12264, 12228,
+ 12193, 12157, 12122, 12087, 12053, 12018, 11984, 11950, 11916, 11882, 11848,
+ 11815, 11782, 11749, 11716, 11683, 11651, 11619, 11586, 11555, 11523, 11491,
+ 11460, 11429, 11398, 11367, 11336, 11305, 11275, 11245, 11215, 11185, 11155,
+ 11125, 11096, 11067, 11038, 11009, 10980, 10951, 10923, 10894, 10866, 10838,
+ 10810, 10782, 10755, 10727, 10700, 10673, 10645, 10618, 10592, 10565, 10538,
+ 10512, 10486, 10460, 10434, 10408, 10382, 10356, 10331, 10305, 10280, 10255,
+ 10230, 10205, 10180, 10156, 10131, 10107, 10082, 10058, 10034, 10010, 9986,
+ 9963, 9939, 9916, 9892, 9869, 9846, 9823, 9800, 9777, 9754, 9732,
+ 9709, 9687, 9664, 9642, 9620, 9598, 9576, 9554, 9533, 9511, 9489,
+ 9468, 9447, 9425, 9404, 9383, 9362, 9341, 9321, 9300, 9279, 9259,
+ 9239, 9218, 9198, 9178, 9158, 9138, 9118, 9098, 9079, 9059, 9039,
+ 9020, 9001, 8981, 8962, 8943, 8924, 8905, 8886, 8867, 8849, 8830,
+ 8812, 8793, 8775, 8756, 8738, 8720, 8702, 8684, 8666, 8648, 8630,
+ 8613, 8595, 8577, 8560, 8542, 8525, 8508, 8490, 8473, 8456, 8439,
+ 8422, 8405, 8389, 8372, 8355, 8339, 8322, 8306, 8289, 8273, 8257,
+ 8240, 8224, 8208, 8192,
+};
+
+static int rockchip_vpu981_get_frame_index(struct hantro_ctx *ctx, int ref)
+{
+ struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
+ struct hantro_av1_dec_ctrls *ctrls = &av1_dec->ctrls;
+ const struct v4l2_ctrl_av1_frame *frame = ctrls->frame;
+ u64 timestamp;
+ int i, idx = frame->ref_frame_idx[ref];
+
+ if (idx >= V4L2_AV1_TOTAL_REFS_PER_FRAME || idx < 0)
+ return AV1_INVALID_IDX;
+
+ timestamp = frame->reference_frame_ts[idx];
+ for (i = 0; i < AV1_MAX_FRAME_BUF_COUNT; i++) {
+ if (!av1_dec->frame_refs[i].used)
+ continue;
+ if (av1_dec->frame_refs[i].timestamp == timestamp)
+ return i;
+ }
+
+ return AV1_INVALID_IDX;
+}
+
+static int rockchip_vpu981_get_order_hint(struct hantro_ctx *ctx, int ref)
+{
+ struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
+ int idx = rockchip_vpu981_get_frame_index(ctx, ref);
+
+ if (idx != AV1_INVALID_IDX)
+ return av1_dec->frame_refs[idx].order_hint;
+
+ return 0;
+}
+
+static int rockchip_vpu981_av1_dec_frame_ref(struct hantro_ctx *ctx,
+ u64 timestamp)
+{
+ struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
+ struct hantro_av1_dec_ctrls *ctrls = &av1_dec->ctrls;
+ const struct v4l2_ctrl_av1_frame *frame = ctrls->frame;
+ int i;
+
+ for (i = 0; i < AV1_MAX_FRAME_BUF_COUNT; i++) {
+ if (!av1_dec->frame_refs[i].used) {
+ int j;
+
+ av1_dec->frame_refs[i].width =
+ frame->frame_width_minus_1 + 1;
+ av1_dec->frame_refs[i].height =
+ frame->frame_height_minus_1 + 1;
+ av1_dec->frame_refs[i].mi_cols =
+ DIV_ROUND_UP(frame->frame_width_minus_1 + 1, 8);
+ av1_dec->frame_refs[i].mi_rows =
+ DIV_ROUND_UP(frame->frame_height_minus_1 + 1, 8);
+ av1_dec->frame_refs[i].timestamp = timestamp;
+ av1_dec->frame_refs[i].frame_type = frame->frame_type;
+ av1_dec->frame_refs[i].order_hint = frame->order_hint;
+ if (!av1_dec->frame_refs[i].vb2_ref)
+ av1_dec->frame_refs[i].vb2_ref = hantro_get_dst_buf(ctx);
+
+ for (j = 0; j < V4L2_AV1_TOTAL_REFS_PER_FRAME; j++)
+ av1_dec->frame_refs[i].order_hints[j] = frame->order_hints[j];
+
+ av1_dec->frame_refs[i].used = true;
+ av1_dec->current_frame_index = i;
+ return i;
+ }
+ }
+
+ return AV1_INVALID_IDX;
+}
+
+static void rockchip_vpu981_av1_dec_frame_unref(struct hantro_ctx *ctx, int idx)
+{
+ struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
+
+ if (idx < 0)
+ return;
+
+ av1_dec->frame_refs[idx].used = false;
+}
+
+static void rockchip_vpu981_av1_dec_clean_refs(struct hantro_ctx *ctx)
+{
+ struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
+ struct hantro_av1_dec_ctrls *ctrls = &av1_dec->ctrls;
+
+ int ref, idx;
+
+ for (idx = 0; idx < AV1_MAX_FRAME_BUF_COUNT; idx++) {
+ u64 timestamp = av1_dec->frame_refs[idx].timestamp;
+ bool used = false;
+
+ if (!av1_dec->frame_refs[idx].used)
+ continue;
+
+ for (ref = 0; ref < V4L2_AV1_TOTAL_REFS_PER_FRAME; ref++) {
+ if (ctrls->frame->reference_frame_ts[ref] == timestamp)
+ used = true;
+ }
+
+ if (!used)
+ rockchip_vpu981_av1_dec_frame_unref(ctx, idx);
+ }
+}
+
+static size_t rockchip_vpu981_av1_dec_luma_size(struct hantro_ctx *ctx)
+{
+ return ctx->dst_fmt.width * ctx->dst_fmt.height * ctx->bit_depth / 8;
+}
+
+static size_t rockchip_vpu981_av1_dec_chroma_size(struct hantro_ctx *ctx)
+{
+ size_t cr_offset = rockchip_vpu981_av1_dec_luma_size(ctx);
+
+ return ALIGN((cr_offset * 3) / 2, 64);
+}
+
+void rockchip_vpu981_av1_dec_tiles_free(struct hantro_ctx *ctx)
+{
+ struct hantro_dev *vpu = ctx->dev;
+ struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
+
+ if (av1_dec->db_data_col.cpu)
+ dma_free_coherent(vpu->dev, av1_dec->db_data_col.size,
+ av1_dec->db_data_col.cpu,
+ av1_dec->db_data_col.dma);
+ av1_dec->db_data_col.cpu = NULL;
+
+ if (av1_dec->db_ctrl_col.cpu)
+ dma_free_coherent(vpu->dev, av1_dec->db_ctrl_col.size,
+ av1_dec->db_ctrl_col.cpu,
+ av1_dec->db_ctrl_col.dma);
+ av1_dec->db_ctrl_col.cpu = NULL;
+
+ if (av1_dec->cdef_col.cpu)
+ dma_free_coherent(vpu->dev, av1_dec->cdef_col.size,
+ av1_dec->cdef_col.cpu, av1_dec->cdef_col.dma);
+ av1_dec->cdef_col.cpu = NULL;
+
+ if (av1_dec->sr_col.cpu)
+ dma_free_coherent(vpu->dev, av1_dec->sr_col.size,
+ av1_dec->sr_col.cpu, av1_dec->sr_col.dma);
+ av1_dec->sr_col.cpu = NULL;
+
+ if (av1_dec->lr_col.cpu)
+ dma_free_coherent(vpu->dev, av1_dec->lr_col.size,
+ av1_dec->lr_col.cpu, av1_dec->lr_col.dma);
+ av1_dec->lr_col.cpu = NULL;
+}
+
+static int rockchip_vpu981_av1_dec_tiles_reallocate(struct hantro_ctx *ctx)
+{
+ struct hantro_dev *vpu = ctx->dev;
+ struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
+ struct hantro_av1_dec_ctrls *ctrls = &av1_dec->ctrls;
+ unsigned int num_tile_cols = 1 << ctrls->tile_group_entry->tile_col;
+ unsigned int height = ALIGN(ctrls->frame->frame_height_minus_1 + 1, 64);
+ unsigned int height_in_sb = height / 64;
+ unsigned int stripe_num = ((height + 8) + 63) / 64;
+ size_t size;
+
+ if (av1_dec->db_data_col.size >=
+ ALIGN(height * 12 * ctx->bit_depth / 8, 128) * num_tile_cols)
+ return 0;
+
+ rockchip_vpu981_av1_dec_tiles_free(ctx);
+
+ size = ALIGN(height * 12 * ctx->bit_depth / 8, 128) * num_tile_cols;
+ av1_dec->db_data_col.cpu = dma_alloc_coherent(vpu->dev, size,
+ &av1_dec->db_data_col.dma,
+ GFP_KERNEL);
+ if (!av1_dec->db_data_col.cpu)
+ goto buffer_allocation_error;
+ av1_dec->db_data_col.size = size;
+
+ size = ALIGN(height * 2 * 16 / 4, 128) * num_tile_cols;
+ av1_dec->db_ctrl_col.cpu = dma_alloc_coherent(vpu->dev, size,
+ &av1_dec->db_ctrl_col.dma,
+ GFP_KERNEL);
+ if (!av1_dec->db_ctrl_col.cpu)
+ goto buffer_allocation_error;
+ av1_dec->db_ctrl_col.size = size;
+
+ size = ALIGN(height_in_sb * 44 * ctx->bit_depth * 16 / 8, 128) * num_tile_cols;
+ av1_dec->cdef_col.cpu = dma_alloc_coherent(vpu->dev, size,
+ &av1_dec->cdef_col.dma,
+ GFP_KERNEL);
+ if (!av1_dec->cdef_col.cpu)
+ goto buffer_allocation_error;
+ av1_dec->cdef_col.size = size;
+
+ size = ALIGN(height_in_sb * (3040 + 1280), 128) * num_tile_cols;
+ av1_dec->sr_col.cpu = dma_alloc_coherent(vpu->dev, size,
+ &av1_dec->sr_col.dma,
+ GFP_KERNEL);
+ if (!av1_dec->sr_col.cpu)
+ goto buffer_allocation_error;
+ av1_dec->sr_col.size = size;
+
+ size = ALIGN(stripe_num * 1536 * ctx->bit_depth / 8, 128) * num_tile_cols;
+ av1_dec->lr_col.cpu = dma_alloc_coherent(vpu->dev, size,
+ &av1_dec->lr_col.dma,
+ GFP_KERNEL);
+ if (!av1_dec->lr_col.cpu)
+ goto buffer_allocation_error;
+ av1_dec->lr_col.size = size;
+
+ av1_dec->num_tile_cols_allocated = num_tile_cols;
+ return 0;
+
+buffer_allocation_error:
+ rockchip_vpu981_av1_dec_tiles_free(ctx);
+ return -ENOMEM;
+}
+
+void rockchip_vpu981_av1_dec_exit(struct hantro_ctx *ctx)
+{
+ struct hantro_dev *vpu = ctx->dev;
+ struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
+
+ if (av1_dec->global_model.cpu)
+ dma_free_coherent(vpu->dev, av1_dec->global_model.size,
+ av1_dec->global_model.cpu,
+ av1_dec->global_model.dma);
+ av1_dec->global_model.cpu = NULL;
+
+ if (av1_dec->tile_info.cpu)
+ dma_free_coherent(vpu->dev, av1_dec->tile_info.size,
+ av1_dec->tile_info.cpu,
+ av1_dec->tile_info.dma);
+ av1_dec->tile_info.cpu = NULL;
+
+ if (av1_dec->prob_tbl.cpu)
+ dma_free_coherent(vpu->dev, av1_dec->prob_tbl.size,
+ av1_dec->prob_tbl.cpu, av1_dec->prob_tbl.dma);
+ av1_dec->prob_tbl.cpu = NULL;
+
+ if (av1_dec->prob_tbl_out.cpu)
+ dma_free_coherent(vpu->dev, av1_dec->prob_tbl_out.size,
+ av1_dec->prob_tbl_out.cpu,
+ av1_dec->prob_tbl_out.dma);
+ av1_dec->prob_tbl_out.cpu = NULL;
+
+ if (av1_dec->tile_buf.cpu)
+ dma_free_coherent(vpu->dev, av1_dec->tile_buf.size,
+ av1_dec->tile_buf.cpu, av1_dec->tile_buf.dma);
+ av1_dec->tile_buf.cpu = NULL;
+
+ rockchip_vpu981_av1_dec_tiles_free(ctx);
+}
+
+int rockchip_vpu981_av1_dec_init(struct hantro_ctx *ctx)
+{
+ struct hantro_dev *vpu = ctx->dev;
+ struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
+
+ memset(av1_dec, 0, sizeof(*av1_dec));
+
+ av1_dec->global_model.cpu = dma_alloc_coherent(vpu->dev, GLOBAL_MODEL_SIZE,
+ &av1_dec->global_model.dma,
+ GFP_KERNEL);
+ if (!av1_dec->global_model.cpu)
+ return -ENOMEM;
+ av1_dec->global_model.size = GLOBAL_MODEL_SIZE;
+
+ av1_dec->tile_info.cpu = dma_alloc_coherent(vpu->dev, AV1_MAX_TILES,
+ &av1_dec->tile_info.dma,
+ GFP_KERNEL);
+ if (!av1_dec->tile_info.cpu)
+ return -ENOMEM;
+ av1_dec->tile_info.size = AV1_MAX_TILES;
+
+ av1_dec->prob_tbl.cpu = dma_alloc_coherent(vpu->dev,
+ ALIGN(sizeof(struct av1cdfs), 2048),
+ &av1_dec->prob_tbl.dma,
+ GFP_KERNEL);
+ if (!av1_dec->prob_tbl.cpu)
+ return -ENOMEM;
+ av1_dec->prob_tbl.size = ALIGN(sizeof(struct av1cdfs), 2048);
+
+ av1_dec->prob_tbl_out.cpu = dma_alloc_coherent(vpu->dev,
+ ALIGN(sizeof(struct av1cdfs), 2048),
+ &av1_dec->prob_tbl_out.dma,
+ GFP_KERNEL);
+ if (!av1_dec->prob_tbl_out.cpu)
+ return -ENOMEM;
+ av1_dec->prob_tbl_out.size = ALIGN(sizeof(struct av1cdfs), 2048);
+ av1_dec->cdfs = &av1_dec->default_cdfs;
+ av1_dec->cdfs_ndvc = &av1_dec->default_cdfs_ndvc;
+
+ rockchip_av1_set_default_cdfs(av1_dec->cdfs, av1_dec->cdfs_ndvc);
+
+ av1_dec->tile_buf.cpu = dma_alloc_coherent(vpu->dev,
+ AV1_TILE_SIZE,
+ &av1_dec->tile_buf.dma,
+ GFP_KERNEL);
+ if (!av1_dec->tile_buf.cpu)
+ return -ENOMEM;
+ av1_dec->tile_buf.size = AV1_TILE_SIZE;
+
+ return 0;
+}
+
+static int rockchip_vpu981_av1_dec_prepare_run(struct hantro_ctx *ctx)
+{
+ struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
+ struct hantro_av1_dec_ctrls *ctrls = &av1_dec->ctrls;
+
+ ctrls->sequence = hantro_get_ctrl(ctx, V4L2_CID_STATELESS_AV1_SEQUENCE);
+ if (WARN_ON(!ctrls->sequence))
+ return -EINVAL;
+
+ ctrls->tile_group_entry =
+ hantro_get_ctrl(ctx, V4L2_CID_STATELESS_AV1_TILE_GROUP_ENTRY);
+ if (WARN_ON(!ctrls->tile_group_entry))
+ return -EINVAL;
+
+ ctrls->frame = hantro_get_ctrl(ctx, V4L2_CID_STATELESS_AV1_FRAME);
+ if (WARN_ON(!ctrls->frame))
+ return -EINVAL;
+
+ ctrls->film_grain =
+ hantro_get_ctrl(ctx, V4L2_CID_STATELESS_AV1_FILM_GRAIN);
+
+ return rockchip_vpu981_av1_dec_tiles_reallocate(ctx);
+}
+
+static inline int rockchip_vpu981_av1_dec_get_msb(u32 n)
+{
+ if (n == 0)
+ return 0;
+ return 31 ^ __builtin_clz(n);
+}
+
+static short rockchip_vpu981_av1_dec_resolve_divisor_32(u32 d, short *shift)
+{
+ int f;
+ uint64_t e;
+
+ *shift = rockchip_vpu981_av1_dec_get_msb(d);
+ /* e is obtained from D after resetting the most significant 1 bit. */
+ e = d - ((u32)1 << *shift);
+ /* Get the most significant DIV_LUT_BITS (8) bits of e into f */
+ if (*shift > DIV_LUT_BITS)
+ f = AV1_DIV_ROUND_UP_POW2(e, *shift - DIV_LUT_BITS);
+ else
+ f = e << (DIV_LUT_BITS - *shift);
+ if (f > DIV_LUT_NUM)
+ return -1;
+ *shift += DIV_LUT_PREC_BITS;
+ /* Use f as lookup into the precomputed table of multipliers */
+ return div_lut[f];
+}
+
+static void rockchip_vpu981_av1_dec_get_shear_params(const uint32_t *params,
+ int64_t *alpha, int64_t *beta, int64_t *gamma, int64_t *delta)
+{
+ const int *mat = params;
+ short shift;
+ short y;
+ long long gv, dv;
+
+ if (mat[2] <= 0)
+ return;
+
+ *alpha = clamp_val(mat[2] - (1 << WARPEDMODEL_PREC_BITS), S16_MIN, S16_MAX);
+ *beta = clamp_val(mat[3], S16_MIN, S16_MAX);
+
+ y = rockchip_vpu981_av1_dec_resolve_divisor_32(abs(mat[2]), &shift) * (mat[2] < 0 ? -1 : 1);
+
+ gv = ((long long)mat[4] * (1 << WARPEDMODEL_PREC_BITS)) * y;
+
+ *gamma = clamp_val((int)AV1_DIV_ROUND_UP_POW2_SIGNED(gv, shift), S16_MIN, S16_MAX);
+
+ dv = ((long long)mat[3] * mat[4]) * y;
+ *delta = clamp_val(
+ mat[5] -
+ (int)AV1_DIV_ROUND_UP_POW2_SIGNED(dv, shift) - (1 << WARPEDMODEL_PREC_BITS),
+ S16_MIN, S16_MAX);
+
+ *alpha = AV1_DIV_ROUND_UP_POW2_SIGNED(*alpha, WARP_PARAM_REDUCE_BITS)
+ * (1 << WARP_PARAM_REDUCE_BITS);
+ *beta = AV1_DIV_ROUND_UP_POW2_SIGNED(*beta, WARP_PARAM_REDUCE_BITS)
+ * (1 << WARP_PARAM_REDUCE_BITS);
+ *gamma = AV1_DIV_ROUND_UP_POW2_SIGNED(*gamma, WARP_PARAM_REDUCE_BITS)
+ * (1 << WARP_PARAM_REDUCE_BITS);
+ *delta = AV1_DIV_ROUND_UP_POW2_SIGNED(*delta, WARP_PARAM_REDUCE_BITS)
+ * (1 << WARP_PARAM_REDUCE_BITS);
+}
+
+static void rockchip_vpu981_av1_dec_set_global_model(struct hantro_ctx *ctx)
+{
+ struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
+ struct hantro_av1_dec_ctrls *ctrls = &av1_dec->ctrls;
+ const struct v4l2_ctrl_av1_frame *frame = ctrls->frame;
+ const struct v4l2_av1_global_motion *gm = &frame->global_motion;
+ uint8_t *dst = av1_dec->global_model.cpu;
+ struct hantro_dev *vpu = ctx->dev;
+ int ref_frame, i;
+
+ memset(dst, 0, GLOBAL_MODEL_SIZE);
+ for (ref_frame = 0; ref_frame < V4L2_AV1_REFS_PER_FRAME; ++ref_frame) {
+ int64_t alpha = 0, beta = 0, gamma = 0, delta = 0;
+
+ for (i = 0; i < 6; ++i) {
+ if (i == 2)
+ *(int32_t *)dst =
+ gm->params[V4L2_AV1_REF_LAST_FRAME + ref_frame][3];
+ else if (i == 3)
+ *(int32_t *)dst =
+ gm->params[V4L2_AV1_REF_LAST_FRAME + ref_frame][2];
+ else
+ *(int32_t *)dst =
+ gm->params[V4L2_AV1_REF_LAST_FRAME + ref_frame][i];
+ dst += 4;
+ }
+
+ if (gm->type[V4L2_AV1_REF_LAST_FRAME + ref_frame] <= V4L2_AV1_WARP_MODEL_AFFINE)
+ rockchip_vpu981_av1_dec_get_shear_params(
+ &gm->params[V4L2_AV1_REF_LAST_FRAME + ref_frame][0],
+ &alpha, &beta, &gamma, &delta);
+
+ *(int16_t *)dst = alpha;
+ dst += 2;
+ *(int16_t *)dst = beta;
+ dst += 2;
+ *(int16_t *)dst = gamma;
+ dst += 2;
+ *(int16_t *)dst = delta;
+ dst += 2;
+ }
+
+ hantro_write_addr(vpu, AV1_GLOBAL_MODEL, av1_dec->global_model.dma);
+}
+
+static void rockchip_vpu981_av1_dec_set_tile_info(struct hantro_ctx *ctx)
+{
+ struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
+ struct hantro_av1_dec_ctrls *ctrls = &av1_dec->ctrls;
+ struct v4l2_av1_tile_info tile_info = ctrls->frame->tile_info;
+ const struct v4l2_ctrl_av1_tile_group_entry *group_entry =
+ ctrls->tile_group_entry;
+ int context_update_y =
+ tile_info.context_update_tile_id / tile_info.tile_cols;
+ int context_update_x =
+ tile_info.context_update_tile_id % tile_info.tile_cols;
+ int context_update_tile_id =
+ context_update_x * tile_info.tile_rows + context_update_y;
+ uint8_t *dst = av1_dec->tile_info.cpu;
+ struct hantro_dev *vpu = ctx->dev;
+ int tile0, tile1;
+
+ memset(dst, 0, av1_dec->tile_info.size);
+
+ for (tile0 = 0; tile0 < tile_info.tile_cols; tile0++) {
+ for (tile1 = 0; tile1 < tile_info.tile_rows; tile1++) {
+ int tile_id = tile1 * tile_info.tile_cols + tile0;
+ uint32_t start, end;
+ uint32_t y0 =
+ tile_info.height_in_sbs_minus_1[tile1] + 1;
+ uint32_t x0 = tile_info.width_in_sbs_minus_1[tile0] + 1;
+
+ // tile size in SB units (width,height)
+ *dst++ = x0;
+ *dst++ = 0;
+ *dst++ = 0;
+ *dst++ = 0;
+ *dst++ = y0;
+ *dst++ = 0;
+ *dst++ = 0;
+ *dst++ = 0;
+
+ // tile start position
+ start = group_entry[tile_id].tile_offset - group_entry[0].tile_offset;
+ *dst++ = start & 255;
+ *dst++ = (start >> 8) & 255;
+ *dst++ = (start >> 16) & 255;
+ *dst++ = (start >> 24) & 255;
+
+ // # of bytes in tile data
+ end = start + group_entry[tile_id].tile_size;
+ *dst++ = end & 255;
+ *dst++ = (end >> 8) & 255;
+ *dst++ = (end >> 16) & 255;
+ *dst++ = (end >> 24) & 255;
+ }
+ }
+
+ hantro_reg_write(vpu, &av1_multicore_expect_context_update,
+ !!(context_update_x == 0));
+ hantro_reg_write(vpu, &av1_tile_enable, !!((tile_info.tile_cols > 1)
+ || (tile_info.tile_rows > 1)));
+ hantro_reg_write(vpu, &av1_num_tile_cols_8k, tile_info.tile_cols);
+ hantro_reg_write(vpu, &av1_num_tile_rows_8k, tile_info.tile_rows);
+ hantro_reg_write(vpu, &av1_context_update_tile_id,
+ context_update_tile_id);
+ hantro_reg_write(vpu, &av1_tile_transpose, 1);
+ if (context_update_tile_id) {
+ hantro_reg_write(vpu, &av1_dec_tile_size_mag,
+ tile_info.tile_size_bytes);
+ } else
+ hantro_reg_write(vpu, &av1_dec_tile_size_mag, 3);
+
+ hantro_write_addr(vpu, AV1_TILE_BASE, av1_dec->tile_info.dma);
+}
+
+static int rockchip_vpu981_av1_dec_get_relative_dist(struct hantro_ctx *ctx,
+ int a, int b)
+{
+ struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
+ struct hantro_av1_dec_ctrls *ctrls = &av1_dec->ctrls;
+ int bits = ctrls->sequence->order_hint_bits - 1;
+ int diff, m;
+
+ if (!ctrls->sequence->order_hint_bits)
+ return 0;
+
+ diff = a - b;
+ m = 1 << bits;
+ diff = (diff & (m - 1)) - (diff & m);
+
+ return diff;
+}
+
+static void rockchip_vpu981_av1_dec_set_frame_sign_bias(struct hantro_ctx *ctx)
+{
+ struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
+ struct hantro_av1_dec_ctrls *ctrls = &av1_dec->ctrls;
+ const struct v4l2_ctrl_av1_frame *frame = ctrls->frame;
+ const struct v4l2_ctrl_av1_sequence *sequence = ctrls->sequence;
+ int i;
+
+ if (!sequence->order_hint_bits || IS_INTRA(frame->frame_type)) {
+ for (i = 0; i < V4L2_AV1_TOTAL_REFS_PER_FRAME; i++)
+ av1_dec->ref_frame_sign_bias[i] = 0;
+
+ return;
+ }
+ // Identify the nearest forward and backward references.
+ for (i = 0; i < V4L2_AV1_TOTAL_REFS_PER_FRAME - 1; i++) {
+ if (rockchip_vpu981_get_frame_index(ctx, i) >= 0) {
+ int rel_off =
+ rockchip_vpu981_av1_dec_get_relative_dist(ctx,
+ rockchip_vpu981_get_order_hint
+ (ctx, i),
+ frame->order_hint);
+ av1_dec->ref_frame_sign_bias[i + 1] = (rel_off <= 0) ? 0 : 1;
+ }
+ }
+}
+
+static bool
+rockchip_vpu981_av1_dec_set_ref(struct hantro_ctx *ctx, int ref, int idx,
+ int width, int height)
+{
+ struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
+ struct hantro_av1_dec_ctrls *ctrls = &av1_dec->ctrls;
+ const struct v4l2_ctrl_av1_frame *frame = ctrls->frame;
+ struct hantro_dev *vpu = ctx->dev;
+ struct hantro_decoded_buffer *dst;
+ dma_addr_t luma_addr, chroma_addr, mv_addr = 0;
+ size_t cr_offset = rockchip_vpu981_av1_dec_luma_size(ctx);
+ size_t mv_offset = rockchip_vpu981_av1_dec_chroma_size(ctx);
+ int cur_width = frame->frame_width_minus_1 + 1;
+ int cur_height = frame->frame_height_minus_1 + 1;
+ int scale_width =
+ ((width << AV1_REF_SCALE_SHIFT) + cur_width / 2) / cur_width;
+ int scale_height =
+ ((height << AV1_REF_SCALE_SHIFT) + cur_height / 2) / cur_height;
+
+ switch (ref) {
+ case 0:
+ hantro_reg_write(vpu, &av1_ref0_height, height);
+ hantro_reg_write(vpu, &av1_ref0_width, width);
+ hantro_reg_write(vpu, &av1_ref0_ver_scale, scale_width);
+ hantro_reg_write(vpu, &av1_ref0_hor_scale, scale_height);
+ break;
+ case 1:
+ hantro_reg_write(vpu, &av1_ref1_height, height);
+ hantro_reg_write(vpu, &av1_ref1_width, width);
+ hantro_reg_write(vpu, &av1_ref1_ver_scale, scale_width);
+ hantro_reg_write(vpu, &av1_ref1_hor_scale, scale_height);
+ break;
+ case 2:
+ hantro_reg_write(vpu, &av1_ref2_height, height);
+ hantro_reg_write(vpu, &av1_ref2_width, width);
+ hantro_reg_write(vpu, &av1_ref2_ver_scale, scale_width);
+ hantro_reg_write(vpu, &av1_ref2_hor_scale, scale_height);
+ break;
+ case 3:
+ hantro_reg_write(vpu, &av1_ref3_height, height);
+ hantro_reg_write(vpu, &av1_ref3_width, width);
+ hantro_reg_write(vpu, &av1_ref3_ver_scale, scale_width);
+ hantro_reg_write(vpu, &av1_ref3_hor_scale, scale_height);
+ break;
+ case 4:
+ hantro_reg_write(vpu, &av1_ref4_height, height);
+ hantro_reg_write(vpu, &av1_ref4_width, width);
+ hantro_reg_write(vpu, &av1_ref4_ver_scale, scale_width);
+ hantro_reg_write(vpu, &av1_ref4_hor_scale, scale_height);
+ break;
+ case 5:
+ hantro_reg_write(vpu, &av1_ref5_height, height);
+ hantro_reg_write(vpu, &av1_ref5_width, width);
+ hantro_reg_write(vpu, &av1_ref5_ver_scale, scale_width);
+ hantro_reg_write(vpu, &av1_ref5_hor_scale, scale_height);
+ break;
+ case 6:
+ hantro_reg_write(vpu, &av1_ref6_height, height);
+ hantro_reg_write(vpu, &av1_ref6_width, width);
+ hantro_reg_write(vpu, &av1_ref6_ver_scale, scale_width);
+ hantro_reg_write(vpu, &av1_ref6_hor_scale, scale_height);
+ break;
+ default:
+ pr_warn("AV1 invalid reference frame index\n");
+ }
+
+ dst = vb2_to_hantro_decoded_buf(&av1_dec->frame_refs[idx].vb2_ref->vb2_buf);
+ luma_addr = hantro_get_dec_buf_addr(ctx, &dst->base.vb.vb2_buf);
+ chroma_addr = luma_addr + cr_offset;
+ mv_addr = luma_addr + mv_offset;
+
+ hantro_write_addr(vpu, AV1_REFERENCE_Y(ref), luma_addr);
+ hantro_write_addr(vpu, AV1_REFERENCE_CB(ref), chroma_addr);
+ hantro_write_addr(vpu, AV1_REFERENCE_MV(ref), mv_addr);
+
+ return (scale_width != (1 << AV1_REF_SCALE_SHIFT))
+ || (scale_height != (1 << AV1_REF_SCALE_SHIFT));
+}
+
+static void rockchip_vpu981_av1_dec_set_sign_bias(struct hantro_ctx *ctx,
+ int ref, int val)
+{
+ struct hantro_dev *vpu = ctx->dev;
+
+ switch (ref) {
+ case 0:
+ hantro_reg_write(vpu, &av1_ref0_sign_bias, val);
+ break;
+ case 1:
+ hantro_reg_write(vpu, &av1_ref1_sign_bias, val);
+ break;
+ case 2:
+ hantro_reg_write(vpu, &av1_ref2_sign_bias, val);
+ break;
+ case 3:
+ hantro_reg_write(vpu, &av1_ref3_sign_bias, val);
+ break;
+ case 4:
+ hantro_reg_write(vpu, &av1_ref4_sign_bias, val);
+ break;
+ case 5:
+ hantro_reg_write(vpu, &av1_ref5_sign_bias, val);
+ break;
+ case 6:
+ hantro_reg_write(vpu, &av1_ref6_sign_bias, val);
+ break;
+ default:
+ pr_warn("AV1 invalid sign bias index\n");
+ break;
+ }
+}
+
+static void rockchip_vpu981_av1_dec_set_segmentation(struct hantro_ctx *ctx)
+{
+ struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
+ struct hantro_av1_dec_ctrls *ctrls = &av1_dec->ctrls;
+ const struct v4l2_ctrl_av1_frame *frame = ctrls->frame;
+ const struct v4l2_av1_segmentation *seg = &frame->segmentation;
+ uint32_t segval[V4L2_AV1_MAX_SEGMENTS][V4L2_AV1_SEG_LVL_MAX] = { 0 };
+ struct hantro_dev *vpu = ctx->dev;
+ uint8_t segsign = 0, preskip_segid = 0, last_active_seg = 0, i, j;
+
+ if (!!(seg->flags & V4L2_AV1_SEGMENTATION_FLAG_ENABLED)
+ && (frame->primary_ref_frame < V4L2_AV1_REFS_PER_FRAME)) {
+ int idx = rockchip_vpu981_get_frame_index(ctx, frame->primary_ref_frame);
+
+ if (idx >= 0) {
+ dma_addr_t luma_addr, chroma_addr, mv_addr = 0;
+ size_t cr_offset = rockchip_vpu981_av1_dec_luma_size(ctx);
+ size_t mv_offset = rockchip_vpu981_av1_dec_chroma_size(ctx);
+
+ luma_addr =
+ hantro_get_dec_buf_addr(ctx,
+ &av1_dec->frame_refs[idx].vb2_ref->vb2_buf);
+ chroma_addr = luma_addr + cr_offset;
+ mv_addr = luma_addr + mv_offset;
+
+ hantro_write_addr(vpu, AV1_SEGMENTATION, mv_addr);
+ hantro_reg_write(vpu, &av1_use_temporal3_mvs, 1);
+ }
+ }
+
+ hantro_reg_write(vpu, &av1_segment_temp_upd_e,
+ !!(seg->flags & V4L2_AV1_SEGMENTATION_FLAG_TEMPORAL_UPDATE));
+ hantro_reg_write(vpu, &av1_segment_upd_e,
+ !!(seg->flags & V4L2_AV1_SEGMENTATION_FLAG_UPDATE_MAP));
+ hantro_reg_write(vpu, &av1_segment_e,
+ !!(seg->flags & V4L2_AV1_SEGMENTATION_FLAG_ENABLED));
+
+ hantro_reg_write(vpu, &av1_error_resilient,
+ !!(frame->flags & V4L2_AV1_FRAME_FLAG_ERROR_RESILIENT_MODE));
+
+ if (IS_INTRA(frame->frame_type)
+ || !!(frame->flags & V4L2_AV1_FRAME_FLAG_ERROR_RESILIENT_MODE)) {
+ hantro_reg_write(vpu, &av1_use_temporal3_mvs, 0);
+ }
+
+ if (!!(seg->flags & V4L2_AV1_SEGMENTATION_FLAG_ENABLED)) {
+ int s;
+
+ for (s = 0; s < V4L2_AV1_MAX_SEGMENTS; s++) {
+ if (seg->feature_enabled[s] &
+ V4L2_AV1_SEGMENT_FEATURE_ENABLED(V4L2_AV1_SEG_LVL_ALT_Q)) {
+ segval[s][V4L2_AV1_SEG_LVL_ALT_Q] =
+ clamp(abs(seg->feature_data[s][V4L2_AV1_SEG_LVL_ALT_Q]),
+ 0, 255);
+ segsign |=
+ (seg->feature_data[s][V4L2_AV1_SEG_LVL_ALT_Q] < 0) << s;
+ }
+
+ if (seg->feature_enabled[s] &
+ V4L2_AV1_SEGMENT_FEATURE_ENABLED(V4L2_AV1_SEG_LVL_ALT_LF_Y_V))
+ segval[s][V4L2_AV1_SEG_LVL_ALT_LF_Y_V] =
+ clamp(abs(seg->feature_data[s][V4L2_AV1_SEG_LVL_ALT_LF_Y_V]),
+ -63, 63);
+
+ if (seg->feature_enabled[s] &
+ V4L2_AV1_SEGMENT_FEATURE_ENABLED(V4L2_AV1_SEG_LVL_ALT_LF_Y_H))
+ segval[s][V4L2_AV1_SEG_LVL_ALT_LF_Y_H] =
+ clamp(abs(seg->feature_data[s][V4L2_AV1_SEG_LVL_ALT_LF_Y_H]),
+ -63, 63);
+
+ if (seg->feature_enabled[s] &
+ V4L2_AV1_SEGMENT_FEATURE_ENABLED(V4L2_AV1_SEG_LVL_ALT_LF_U))
+ segval[s][V4L2_AV1_SEG_LVL_ALT_LF_U] =
+ clamp(abs(seg->feature_data[s][V4L2_AV1_SEG_LVL_ALT_LF_U]),
+ -63, 63);
+
+ if (seg->feature_enabled[s] &
+ V4L2_AV1_SEGMENT_FEATURE_ENABLED(V4L2_AV1_SEG_LVL_ALT_LF_V))
+ segval[s][V4L2_AV1_SEG_LVL_ALT_LF_V] =
+ clamp(abs(seg->feature_data[s][V4L2_AV1_SEG_LVL_ALT_LF_V]),
+ -63, 63);
+
+ if (frame->frame_type && seg->feature_enabled[s] &
+ V4L2_AV1_SEGMENT_FEATURE_ENABLED(V4L2_AV1_SEG_LVL_REF_FRAME))
+ segval[s][V4L2_AV1_SEG_LVL_REF_FRAME]++;
+
+ if (seg->feature_enabled[s] &
+ V4L2_AV1_SEGMENT_FEATURE_ENABLED(V4L2_AV1_SEG_LVL_REF_SKIP))
+ segval[s][V4L2_AV1_SEG_LVL_REF_SKIP] = 1;
+
+ if (seg->feature_enabled[s] &
+ V4L2_AV1_SEGMENT_FEATURE_ENABLED(V4L2_AV1_SEG_LVL_REF_GLOBALMV))
+ segval[s][V4L2_AV1_SEG_LVL_REF_GLOBALMV] = 1;
+ }
+ }
+
+ for (i = 0; i < V4L2_AV1_MAX_SEGMENTS; i++) {
+ for (j = 0; j < V4L2_AV1_SEG_LVL_MAX; j++) {
+ if (seg->feature_enabled[i]
+ & V4L2_AV1_SEGMENT_FEATURE_ENABLED(j)) {
+ preskip_segid |= (j >= V4L2_AV1_SEG_LVL_REF_FRAME);
+ last_active_seg = max(i, last_active_seg);
+ }
+ }
+ }
+
+ hantro_reg_write(vpu, &av1_last_active_seg, last_active_seg);
+ hantro_reg_write(vpu, &av1_preskip_segid, preskip_segid);
+
+ hantro_reg_write(vpu, &av1_seg_quant_sign, segsign);
+
+ /* Write QP, filter level, ref frame and skip for every segment */
+ hantro_reg_write(vpu, &av1_quant_seg0,
+ segval[0][V4L2_AV1_SEG_LVL_ALT_Q]);
+ hantro_reg_write(vpu, &av1_filt_level_delta0_seg0,
+ segval[0][V4L2_AV1_SEG_LVL_ALT_LF_Y_V]);
+ hantro_reg_write(vpu, &av1_filt_level_delta1_seg0,
+ segval[0][V4L2_AV1_SEG_LVL_ALT_LF_Y_H]);
+ hantro_reg_write(vpu, &av1_filt_level_delta2_seg0,
+ segval[0][V4L2_AV1_SEG_LVL_ALT_LF_U]);
+ hantro_reg_write(vpu, &av1_filt_level_delta3_seg0,
+ segval[0][V4L2_AV1_SEG_LVL_ALT_LF_V]);
+ hantro_reg_write(vpu, &av1_refpic_seg0,
+ segval[0][V4L2_AV1_SEG_LVL_REF_FRAME]);
+ hantro_reg_write(vpu, &av1_skip_seg0,
+ segval[0][V4L2_AV1_SEG_LVL_REF_SKIP]);
+ hantro_reg_write(vpu, &av1_global_mv_seg0,
+ segval[0][V4L2_AV1_SEG_LVL_REF_GLOBALMV]);
+
+ hantro_reg_write(vpu, &av1_quant_seg1,
+ segval[1][V4L2_AV1_SEG_LVL_ALT_Q]);
+ hantro_reg_write(vpu, &av1_filt_level_delta0_seg1,
+ segval[1][V4L2_AV1_SEG_LVL_ALT_LF_Y_V]);
+ hantro_reg_write(vpu, &av1_filt_level_delta1_seg1,
+ segval[1][V4L2_AV1_SEG_LVL_ALT_LF_Y_H]);
+ hantro_reg_write(vpu, &av1_filt_level_delta2_seg1,
+ segval[1][V4L2_AV1_SEG_LVL_ALT_LF_U]);
+ hantro_reg_write(vpu, &av1_filt_level_delta3_seg1,
+ segval[1][V4L2_AV1_SEG_LVL_ALT_LF_V]);
+ hantro_reg_write(vpu, &av1_refpic_seg1,
+ segval[1][V4L2_AV1_SEG_LVL_REF_FRAME]);
+ hantro_reg_write(vpu, &av1_skip_seg1,
+ segval[1][V4L2_AV1_SEG_LVL_REF_SKIP]);
+ hantro_reg_write(vpu, &av1_global_mv_seg1,
+ segval[1][V4L2_AV1_SEG_LVL_REF_GLOBALMV]);
+
+ hantro_reg_write(vpu, &av1_quant_seg2,
+ segval[2][V4L2_AV1_SEG_LVL_ALT_Q]);
+ hantro_reg_write(vpu, &av1_filt_level_delta0_seg2,
+ segval[2][V4L2_AV1_SEG_LVL_ALT_LF_Y_V]);
+ hantro_reg_write(vpu, &av1_filt_level_delta1_seg2,
+ segval[2][V4L2_AV1_SEG_LVL_ALT_LF_Y_H]);
+ hantro_reg_write(vpu, &av1_filt_level_delta2_seg2,
+ segval[2][V4L2_AV1_SEG_LVL_ALT_LF_U]);
+ hantro_reg_write(vpu, &av1_filt_level_delta3_seg2,
+ segval[2][V4L2_AV1_SEG_LVL_ALT_LF_V]);
+ hantro_reg_write(vpu, &av1_refpic_seg2,
+ segval[2][V4L2_AV1_SEG_LVL_REF_FRAME]);
+ hantro_reg_write(vpu, &av1_skip_seg2,
+ segval[2][V4L2_AV1_SEG_LVL_REF_SKIP]);
+ hantro_reg_write(vpu, &av1_global_mv_seg2,
+ segval[2][V4L2_AV1_SEG_LVL_REF_GLOBALMV]);
+
+ hantro_reg_write(vpu, &av1_quant_seg3,
+ segval[3][V4L2_AV1_SEG_LVL_ALT_Q]);
+ hantro_reg_write(vpu, &av1_filt_level_delta0_seg3,
+ segval[3][V4L2_AV1_SEG_LVL_ALT_LF_Y_V]);
+ hantro_reg_write(vpu, &av1_filt_level_delta1_seg3,
+ segval[3][V4L2_AV1_SEG_LVL_ALT_LF_Y_H]);
+ hantro_reg_write(vpu, &av1_filt_level_delta2_seg3,
+ segval[3][V4L2_AV1_SEG_LVL_ALT_LF_U]);
+ hantro_reg_write(vpu, &av1_filt_level_delta3_seg3,
+ segval[3][V4L2_AV1_SEG_LVL_ALT_LF_V]);
+ hantro_reg_write(vpu, &av1_refpic_seg3,
+ segval[3][V4L2_AV1_SEG_LVL_REF_FRAME]);
+ hantro_reg_write(vpu, &av1_skip_seg3,
+ segval[3][V4L2_AV1_SEG_LVL_REF_SKIP]);
+ hantro_reg_write(vpu, &av1_global_mv_seg3,
+ segval[3][V4L2_AV1_SEG_LVL_REF_GLOBALMV]);
+
+ hantro_reg_write(vpu, &av1_quant_seg4,
+ segval[4][V4L2_AV1_SEG_LVL_ALT_Q]);
+ hantro_reg_write(vpu, &av1_filt_level_delta0_seg4,
+ segval[4][V4L2_AV1_SEG_LVL_ALT_LF_Y_V]);
+ hantro_reg_write(vpu, &av1_filt_level_delta1_seg4,
+ segval[4][V4L2_AV1_SEG_LVL_ALT_LF_Y_H]);
+ hantro_reg_write(vpu, &av1_filt_level_delta2_seg4,
+ segval[4][V4L2_AV1_SEG_LVL_ALT_LF_U]);
+ hantro_reg_write(vpu, &av1_filt_level_delta3_seg4,
+ segval[4][V4L2_AV1_SEG_LVL_ALT_LF_V]);
+ hantro_reg_write(vpu, &av1_refpic_seg4,
+ segval[4][V4L2_AV1_SEG_LVL_REF_FRAME]);
+ hantro_reg_write(vpu, &av1_skip_seg4,
+ segval[4][V4L2_AV1_SEG_LVL_REF_SKIP]);
+ hantro_reg_write(vpu, &av1_global_mv_seg4,
+ segval[4][V4L2_AV1_SEG_LVL_REF_GLOBALMV]);
+
+ hantro_reg_write(vpu, &av1_quant_seg5,
+ segval[5][V4L2_AV1_SEG_LVL_ALT_Q]);
+ hantro_reg_write(vpu, &av1_filt_level_delta0_seg5,
+ segval[5][V4L2_AV1_SEG_LVL_ALT_LF_Y_V]);
+ hantro_reg_write(vpu, &av1_filt_level_delta1_seg5,
+ segval[5][V4L2_AV1_SEG_LVL_ALT_LF_Y_H]);
+ hantro_reg_write(vpu, &av1_filt_level_delta2_seg5,
+ segval[5][V4L2_AV1_SEG_LVL_ALT_LF_U]);
+ hantro_reg_write(vpu, &av1_filt_level_delta3_seg5,
+ segval[5][V4L2_AV1_SEG_LVL_ALT_LF_V]);
+ hantro_reg_write(vpu, &av1_refpic_seg5,
+ segval[5][V4L2_AV1_SEG_LVL_REF_FRAME]);
+ hantro_reg_write(vpu, &av1_skip_seg5,
+ segval[5][V4L2_AV1_SEG_LVL_REF_SKIP]);
+ hantro_reg_write(vpu, &av1_global_mv_seg5,
+ segval[5][V4L2_AV1_SEG_LVL_REF_GLOBALMV]);
+
+ hantro_reg_write(vpu, &av1_quant_seg6,
+ segval[6][V4L2_AV1_SEG_LVL_ALT_Q]);
+ hantro_reg_write(vpu, &av1_filt_level_delta0_seg6,
+ segval[6][V4L2_AV1_SEG_LVL_ALT_LF_Y_V]);
+ hantro_reg_write(vpu, &av1_filt_level_delta1_seg6,
+ segval[6][V4L2_AV1_SEG_LVL_ALT_LF_Y_H]);
+ hantro_reg_write(vpu, &av1_filt_level_delta2_seg6,
+ segval[6][V4L2_AV1_SEG_LVL_ALT_LF_U]);
+ hantro_reg_write(vpu, &av1_filt_level_delta3_seg6,
+ segval[6][V4L2_AV1_SEG_LVL_ALT_LF_V]);
+ hantro_reg_write(vpu, &av1_refpic_seg6,
+ segval[6][V4L2_AV1_SEG_LVL_REF_FRAME]);
+ hantro_reg_write(vpu, &av1_skip_seg6,
+ segval[6][V4L2_AV1_SEG_LVL_REF_SKIP]);
+ hantro_reg_write(vpu, &av1_global_mv_seg6,
+ segval[6][V4L2_AV1_SEG_LVL_REF_GLOBALMV]);
+
+ hantro_reg_write(vpu, &av1_quant_seg7,
+ segval[7][V4L2_AV1_SEG_LVL_ALT_Q]);
+ hantro_reg_write(vpu, &av1_filt_level_delta0_seg7,
+ segval[7][V4L2_AV1_SEG_LVL_ALT_LF_Y_V]);
+ hantro_reg_write(vpu, &av1_filt_level_delta1_seg7,
+ segval[7][V4L2_AV1_SEG_LVL_ALT_LF_Y_H]);
+ hantro_reg_write(vpu, &av1_filt_level_delta2_seg7,
+ segval[7][V4L2_AV1_SEG_LVL_ALT_LF_U]);
+ hantro_reg_write(vpu, &av1_filt_level_delta3_seg7,
+ segval[7][V4L2_AV1_SEG_LVL_ALT_LF_V]);
+ hantro_reg_write(vpu, &av1_refpic_seg7,
+ segval[7][V4L2_AV1_SEG_LVL_REF_FRAME]);
+ hantro_reg_write(vpu, &av1_skip_seg7,
+ segval[7][V4L2_AV1_SEG_LVL_REF_SKIP]);
+ hantro_reg_write(vpu, &av1_global_mv_seg7,
+ segval[7][V4L2_AV1_SEG_LVL_REF_GLOBALMV]);
+}
+
+static bool rockchip_vpu981_av1_dec_is_lossless(struct hantro_ctx *ctx)
+{
+ struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
+ struct hantro_av1_dec_ctrls *ctrls = &av1_dec->ctrls;
+ const struct v4l2_ctrl_av1_frame *frame = ctrls->frame;
+ const struct v4l2_av1_segmentation *segmentation = &frame->segmentation;
+ const struct v4l2_av1_quantization *quantization = &frame->quantization;
+ int i;
+
+ for (i = 0; i < V4L2_AV1_MAX_SEGMENTS; i++) {
+ int qindex = quantization->base_q_idx;
+
+ if (segmentation->feature_enabled[i] &
+ V4L2_AV1_SEGMENT_FEATURE_ENABLED(V4L2_AV1_SEG_LVL_ALT_Q)) {
+ qindex += segmentation->feature_data[i][V4L2_AV1_SEG_LVL_ALT_Q];
+ }
+ qindex = clamp(qindex, 0, 255);
+
+ if (qindex
+ || quantization->delta_q_y_dc
+ || quantization->delta_q_u_dc
+ || quantization->delta_q_u_ac
+ || quantization->delta_q_v_dc || quantization->delta_q_v_ac)
+ return false;
+ }
+ return true;
+}
+
+static void rockchip_vpu981_av1_dec_set_loopfilter(struct hantro_ctx *ctx)
+{
+ struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
+ struct hantro_av1_dec_ctrls *ctrls = &av1_dec->ctrls;
+ const struct v4l2_ctrl_av1_frame *frame = ctrls->frame;
+ const struct v4l2_av1_loop_filter *loop_filter = &frame->loop_filter;
+ bool filtering_dis = (loop_filter->level[0] == 0)
+ && (loop_filter->level[1] == 0);
+ struct hantro_dev *vpu = ctx->dev;
+
+ hantro_reg_write(vpu, &av1_filtering_dis, filtering_dis);
+ hantro_reg_write(vpu, &av1_filt_level_base_gt32, loop_filter->level[0] > 32);
+ hantro_reg_write(vpu, &av1_filt_sharpness, loop_filter->sharpness);
+
+ hantro_reg_write(vpu, &av1_filt_level0, loop_filter->level[0]);
+ hantro_reg_write(vpu, &av1_filt_level1, loop_filter->level[1]);
+ hantro_reg_write(vpu, &av1_filt_level2, loop_filter->level[2]);
+ hantro_reg_write(vpu, &av1_filt_level3, loop_filter->level[3]);
+
+ if (loop_filter->flags & V4L2_AV1_LOOP_FILTER_FLAG_DELTA_ENABLED
+ && !rockchip_vpu981_av1_dec_is_lossless(ctx)
+ && !(frame->flags & V4L2_AV1_FRAME_FLAG_ALLOW_INTRABC)) {
+ hantro_reg_write(vpu, &av1_filt_ref_adj_0,
+ loop_filter->ref_deltas[0]);
+ hantro_reg_write(vpu, &av1_filt_ref_adj_1,
+ loop_filter->ref_deltas[1]);
+ hantro_reg_write(vpu, &av1_filt_ref_adj_2,
+ loop_filter->ref_deltas[2]);
+ hantro_reg_write(vpu, &av1_filt_ref_adj_3,
+ loop_filter->ref_deltas[3]);
+ hantro_reg_write(vpu, &av1_filt_ref_adj_4,
+ loop_filter->ref_deltas[4]);
+ hantro_reg_write(vpu, &av1_filt_ref_adj_5,
+ loop_filter->ref_deltas[5]);
+ hantro_reg_write(vpu, &av1_filt_ref_adj_6,
+ loop_filter->ref_deltas[6]);
+ hantro_reg_write(vpu, &av1_filt_ref_adj_7,
+ loop_filter->ref_deltas[7]);
+ hantro_reg_write(vpu, &av1_filt_mb_adj_0,
+ loop_filter->mode_deltas[0]);
+ hantro_reg_write(vpu, &av1_filt_mb_adj_1,
+ loop_filter->mode_deltas[1]);
+ } else {
+ hantro_reg_write(vpu, &av1_filt_ref_adj_0, 0);
+ hantro_reg_write(vpu, &av1_filt_ref_adj_1, 0);
+ hantro_reg_write(vpu, &av1_filt_ref_adj_2, 0);
+ hantro_reg_write(vpu, &av1_filt_ref_adj_3, 0);
+ hantro_reg_write(vpu, &av1_filt_ref_adj_4, 0);
+ hantro_reg_write(vpu, &av1_filt_ref_adj_5, 0);
+ hantro_reg_write(vpu, &av1_filt_ref_adj_6, 0);
+ hantro_reg_write(vpu, &av1_filt_ref_adj_7, 0);
+ hantro_reg_write(vpu, &av1_filt_mb_adj_0, 0);
+ hantro_reg_write(vpu, &av1_filt_mb_adj_1, 0);
+ }
+
+ hantro_write_addr(vpu, AV1_DB_DATA_COL, av1_dec->db_data_col.dma);
+ hantro_write_addr(vpu, AV1_DB_CTRL_COL, av1_dec->db_ctrl_col.dma);
+}
+
+static void rockchip_vpu981_av1_dec_update_prob(struct hantro_ctx *ctx)
+{
+ struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
+ struct hantro_av1_dec_ctrls *ctrls = &av1_dec->ctrls;
+ const struct v4l2_ctrl_av1_frame *frame = ctrls->frame;
+ bool frame_is_intra = IS_INTRA(frame->frame_type);
+ struct av1cdfs *out_cdfs = (struct av1cdfs *)av1_dec->prob_tbl_out.cpu;
+ int i;
+
+ if (frame->flags & V4L2_AV1_FRAME_FLAG_DISABLE_FRAME_END_UPDATE_CDF)
+ return;
+
+ for (i = 0; i < NUM_REF_FRAMES; i++) {
+ if (frame->refresh_frame_flags & (1 << i)) {
+ struct mvcdfs stored_mv_cdf;
+
+ rockchip_av1_get_cdfs(ctx, i);
+ stored_mv_cdf = av1_dec->cdfs->mv_cdf;
+ *av1_dec->cdfs = *out_cdfs;
+ if (frame_is_intra) {
+ av1_dec->cdfs->mv_cdf = stored_mv_cdf;
+ *av1_dec->cdfs_ndvc = out_cdfs->mv_cdf;
+ }
+ rockchip_av1_store_cdfs(ctx,
+ frame->refresh_frame_flags);
+ break;
+ }
+ }
+}
+
+void rockchip_vpu981_av1_dec_done(struct hantro_ctx *ctx)
+{
+ rockchip_vpu981_av1_dec_update_prob(ctx);
+}
+
+static void rockchip_vpu981_av1_dec_set_prob(struct hantro_ctx *ctx)
+{
+ struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
+ struct hantro_av1_dec_ctrls *ctrls = &av1_dec->ctrls;
+ const struct v4l2_ctrl_av1_frame *frame = ctrls->frame;
+ const struct v4l2_av1_quantization *quantization = &frame->quantization;
+ struct hantro_dev *vpu = ctx->dev;
+ bool error_resilient_mode =
+ !!(frame->flags & V4L2_AV1_FRAME_FLAG_ERROR_RESILIENT_MODE);
+ bool frame_is_intra = IS_INTRA(frame->frame_type);
+
+ if (error_resilient_mode || frame_is_intra
+ || frame->primary_ref_frame == AV1_PRIMARY_REF_NONE) {
+ av1_dec->cdfs = &av1_dec->default_cdfs;
+ av1_dec->cdfs_ndvc = &av1_dec->default_cdfs_ndvc;
+ rockchip_av1_default_coeff_probs(quantization->base_q_idx,
+ av1_dec->cdfs);
+ } else {
+ rockchip_av1_get_cdfs(ctx, frame->ref_frame_idx[frame->primary_ref_frame]);
+ }
+ rockchip_av1_store_cdfs(ctx, frame->refresh_frame_flags);
+
+ memcpy(av1_dec->prob_tbl.cpu, av1_dec->cdfs, sizeof(struct av1cdfs));
+
+ if (frame_is_intra) {
+ int mv_offset = offsetof(struct av1cdfs, mv_cdf);
+ /* Overwrite MV context area with intrabc MV context */
+ memcpy(av1_dec->prob_tbl.cpu + mv_offset, av1_dec->cdfs_ndvc,
+ sizeof(struct mvcdfs));
+ }
+
+ hantro_write_addr(vpu, AV1_PROP_TABLE_OUT, av1_dec->prob_tbl_out.dma);
+ hantro_write_addr(vpu, AV1_PROP_TABLE, av1_dec->prob_tbl.dma);
+}
+
+static void rockchip_vpu981_av1_dec_set_cdef(struct hantro_ctx *ctx)
+{
+ struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
+ struct hantro_av1_dec_ctrls *ctrls = &av1_dec->ctrls;
+ const struct v4l2_ctrl_av1_frame *frame = ctrls->frame;
+ const struct v4l2_av1_cdef *cdef = &frame->cdef;
+ struct hantro_dev *vpu = ctx->dev;
+ uint32_t luma_pri_strength = 0;
+ uint16_t luma_sec_strength = 0;
+ uint32_t chroma_pri_strength = 0;
+ uint16_t chroma_sec_strength = 0;
+ int i;
+
+ hantro_reg_write(vpu, &av1_cdef_bits, cdef->bits);
+ hantro_reg_write(vpu, &av1_cdef_damping, cdef->damping_minus_3);
+
+ for (i = 0; i < (1 << cdef->bits); i++) {
+ luma_pri_strength |= cdef->y_pri_strength[i] << (i * 4);
+ if (cdef->y_sec_strength[i] == 4)
+ luma_sec_strength |= 3 << (i * 2);
+ else
+ luma_sec_strength |= cdef->y_sec_strength[i] << (i * 2);
+
+ chroma_pri_strength |= cdef->uv_pri_strength[i] << (i * 4);
+ if (cdef->uv_sec_strength[i] == 4)
+ chroma_sec_strength |= 3 << (i * 2);
+ else
+ chroma_sec_strength |= cdef->uv_sec_strength[i] << (i * 2);
+ }
+
+ hantro_reg_write(vpu, &av1_cdef_luma_primary_strength,
+ luma_pri_strength);
+ hantro_reg_write(vpu, &av1_cdef_luma_secondary_strength,
+ luma_sec_strength);
+ hantro_reg_write(vpu, &av1_cdef_chroma_primary_strength,
+ chroma_pri_strength);
+ hantro_reg_write(vpu, &av1_cdef_chroma_secondary_strength,
+ chroma_sec_strength);
+
+ hantro_write_addr(vpu, AV1_CDEF_COL, av1_dec->cdef_col.dma);
+}
+
+static void rockchip_vpu981_av1_dec_set_lr(struct hantro_ctx *ctx)
+{
+ struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
+ struct hantro_av1_dec_ctrls *ctrls = &av1_dec->ctrls;
+ const struct v4l2_ctrl_av1_frame *frame = ctrls->frame;
+ const struct v4l2_av1_loop_restoration *loop_restoration =
+ &frame->loop_restoration;
+ struct hantro_dev *vpu = ctx->dev;
+ uint16_t lr_type = 0, lr_unit_size = 0;
+ uint8_t restoration_unit_size[V4L2_AV1_NUM_PLANES_MAX] = { 3, 3, 3 };
+ int i;
+
+ if (loop_restoration->flags & V4L2_AV1_LOOP_RESTORATION_FLAG_USES_LR) {
+ restoration_unit_size[0] = 1 + loop_restoration->lr_unit_shift;
+ restoration_unit_size[1] =
+ 1 + loop_restoration->lr_unit_shift - loop_restoration->lr_uv_shift;
+ restoration_unit_size[2] =
+ 1 + loop_restoration->lr_unit_shift - loop_restoration->lr_uv_shift;
+ }
+
+ for (i = 0; i < V4L2_AV1_NUM_PLANES_MAX; i++) {
+ lr_type |=
+ loop_restoration->frame_restoration_type[i] << (i * 2);
+ lr_unit_size |= restoration_unit_size[i] << (i * 2);
+ }
+
+ hantro_reg_write(vpu, &av1_lr_type, lr_type);
+ hantro_reg_write(vpu, &av1_lr_unit_size, lr_unit_size);
+ hantro_write_addr(vpu, AV1_LR_COL, av1_dec->lr_col.dma);
+}
+
+static void rockchip_vpu981_av1_dec_set_superres_params(struct hantro_ctx *ctx)
+{
+ struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
+ struct hantro_av1_dec_ctrls *ctrls = &av1_dec->ctrls;
+ const struct v4l2_ctrl_av1_frame *frame = ctrls->frame;
+ struct hantro_dev *vpu = ctx->dev;
+ uint8_t superres_scale_denominator = SCALE_NUMERATOR;
+ int superres_luma_step = RS_SCALE_SUBPEL_BITS;
+ int superres_chroma_step = RS_SCALE_SUBPEL_BITS;
+ int superres_luma_step_invra = RS_SCALE_SUBPEL_BITS;
+ int superres_chroma_step_invra = RS_SCALE_SUBPEL_BITS;
+ int superres_init_luma_subpel_x = 0;
+ int superres_init_chroma_subpel_x = 0;
+ int superres_is_scaled = 0;
+ int min_w = min_t(uint32_t, 16, frame->upscaled_width);
+ int upscaled_luma, downscaled_luma;
+ int downscaled_chroma, upscaled_chroma;
+ int step_luma, step_chroma;
+ int err_luma, err_chroma;
+ int initial_luma, initial_chroma;
+ int width = 0;
+
+ if (frame->flags & V4L2_AV1_FRAME_FLAG_USE_SUPERRES)
+ superres_scale_denominator = frame->superres_denom;
+
+ if (superres_scale_denominator <= SCALE_NUMERATOR)
+ goto set_regs;
+
+ width = (frame->upscaled_width * SCALE_NUMERATOR +
+ (superres_scale_denominator / 2)) / superres_scale_denominator;
+
+ if (width < min_w)
+ width = min_w;
+
+ if (width == frame->upscaled_width)
+ goto set_regs;
+
+ superres_is_scaled = 1;
+ upscaled_luma = frame->upscaled_width;
+ downscaled_luma = width;
+ downscaled_chroma = (downscaled_luma + 1) >> 1;
+ upscaled_chroma = (upscaled_luma + 1) >> 1;
+ step_luma =
+ ((downscaled_luma << RS_SCALE_SUBPEL_BITS) +
+ (upscaled_luma / 2)) / upscaled_luma;
+ step_chroma =
+ ((downscaled_chroma << RS_SCALE_SUBPEL_BITS) +
+ (upscaled_chroma / 2)) / upscaled_chroma;
+ err_luma =
+ (upscaled_luma * step_luma)
+ - (downscaled_luma << RS_SCALE_SUBPEL_BITS);
+ err_chroma =
+ (upscaled_chroma * step_chroma)
+ - (downscaled_chroma << RS_SCALE_SUBPEL_BITS);
+ initial_luma =
+ ((-((upscaled_luma - downscaled_luma) << (RS_SCALE_SUBPEL_BITS - 1))
+ + upscaled_luma / 2)
+ / upscaled_luma + (1 << (RS_SCALE_EXTRA_BITS - 1)) - err_luma / 2)
+ & RS_SCALE_SUBPEL_MASK;
+ initial_chroma =
+ ((-((upscaled_chroma - downscaled_chroma) << (RS_SCALE_SUBPEL_BITS - 1))
+ + upscaled_chroma / 2)
+ / upscaled_chroma + (1 << (RS_SCALE_EXTRA_BITS - 1)) - err_chroma / 2)
+ & RS_SCALE_SUBPEL_MASK;
+ superres_luma_step = step_luma;
+ superres_chroma_step = step_chroma;
+ superres_luma_step_invra =
+ ((upscaled_luma << RS_SCALE_SUBPEL_BITS) + (downscaled_luma / 2))
+ / downscaled_luma;
+ superres_chroma_step_invra =
+ ((upscaled_chroma << RS_SCALE_SUBPEL_BITS) + (downscaled_chroma / 2))
+ / downscaled_chroma;
+ superres_init_luma_subpel_x = initial_luma;
+ superres_init_chroma_subpel_x = initial_chroma;
+
+set_regs:
+ hantro_reg_write(vpu, &av1_superres_pic_width, frame->upscaled_width);
+
+ if (frame->flags & V4L2_AV1_FRAME_FLAG_USE_SUPERRES)
+ hantro_reg_write(vpu, &av1_scale_denom_minus9,
+ frame->superres_denom - SUPERRES_SCALE_DENOMINATOR_MIN);
+ else
+ hantro_reg_write(vpu, &av1_scale_denom_minus9, frame->superres_denom);
+
+ hantro_reg_write(vpu, &av1_superres_luma_step, superres_luma_step);
+ hantro_reg_write(vpu, &av1_superres_chroma_step, superres_chroma_step);
+ hantro_reg_write(vpu, &av1_superres_luma_step_invra,
+ superres_luma_step_invra);
+ hantro_reg_write(vpu, &av1_superres_chroma_step_invra,
+ superres_chroma_step_invra);
+ hantro_reg_write(vpu, &av1_superres_init_luma_subpel_x,
+ superres_init_luma_subpel_x);
+ hantro_reg_write(vpu, &av1_superres_init_chroma_subpel_x,
+ superres_init_chroma_subpel_x);
+ hantro_reg_write(vpu, &av1_superres_is_scaled, superres_is_scaled);
+
+ hantro_write_addr(vpu, AV1_SR_COL, av1_dec->sr_col.dma);
+}
+
+static void rockchip_vpu981_av1_dec_set_picture_dimensions(struct hantro_ctx *ctx)
+{
+ struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
+ struct hantro_av1_dec_ctrls *ctrls = &av1_dec->ctrls;
+ const struct v4l2_ctrl_av1_frame *frame = ctrls->frame;
+ struct hantro_dev *vpu = ctx->dev;
+ int pic_width_in_cbs = DIV_ROUND_UP(frame->frame_width_minus_1 + 1, 8);
+ int pic_height_in_cbs = DIV_ROUND_UP(frame->frame_height_minus_1 + 1, 8);
+ int pic_width_pad = ALIGN(frame->frame_width_minus_1 + 1, 8)
+ - (frame->frame_width_minus_1 + 1);
+ int pic_height_pad = ALIGN(frame->frame_height_minus_1 + 1, 8)
+ - (frame->frame_height_minus_1 + 1);
+
+ hantro_reg_write(vpu, &av1_pic_width_in_cbs, pic_width_in_cbs);
+ hantro_reg_write(vpu, &av1_pic_height_in_cbs, pic_height_in_cbs);
+ hantro_reg_write(vpu, &av1_pic_width_pad, pic_width_pad);
+ hantro_reg_write(vpu, &av1_pic_height_pad, pic_height_pad);
+
+ rockchip_vpu981_av1_dec_set_superres_params(ctx);
+}
+
+static void rockchip_vpu981_av1_dec_set_other_frames(struct hantro_ctx *ctx)
+{
+ struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
+ struct hantro_av1_dec_ctrls *ctrls = &av1_dec->ctrls;
+ const struct v4l2_ctrl_av1_frame *frame = ctrls->frame;
+ struct hantro_dev *vpu = ctx->dev;
+ bool use_ref_frame_mvs =
+ !!(ctrls->frame->flags & V4L2_AV1_FRAME_FLAG_USE_REF_FRAME_MVS);
+ int cur_frame_offset = frame->order_hint;
+ int alt_frame_offset = 0;
+ int gld_frame_offset = 0;
+ int bwd_frame_offset = 0;
+ int alt2_frame_offset = 0;
+ int refs_selected[3] = { 0, 0, 0 };
+ int cur_mi_cols = DIV_ROUND_UP(frame->frame_width_minus_1 + 1, 8);
+ int cur_mi_rows = DIV_ROUND_UP(frame->frame_height_minus_1 + 1, 8);
+ int cur_offset[V4L2_AV1_TOTAL_REFS_PER_FRAME - 1];
+ int cur_roffset[V4L2_AV1_TOTAL_REFS_PER_FRAME - 1];
+ int mf_types[3] = { 0, 0, 0 };
+ int ref_stamp = 2;
+ int ref_ind = 0;
+ int rf, idx;
+
+ alt_frame_offset = rockchip_vpu981_get_order_hint(ctx, ALT_BUF_IDX);
+ gld_frame_offset = rockchip_vpu981_get_order_hint(ctx, GLD_BUF_IDX);
+ bwd_frame_offset = rockchip_vpu981_get_order_hint(ctx, BWD_BUF_IDX);
+ alt2_frame_offset = rockchip_vpu981_get_order_hint(ctx, ALT2_BUF_IDX);
+
+ idx = rockchip_vpu981_get_frame_index(ctx, LST_BUF_IDX);
+ if (idx >= 0) {
+ int alt_frame_offset_in_lst =
+ av1_dec->frame_refs[idx].order_hints[V4L2_AV1_REF_ALTREF_FRAME];
+ bool is_lst_overlay =
+ (alt_frame_offset_in_lst == gld_frame_offset);
+
+ if (!is_lst_overlay) {
+ int lst_mi_cols = av1_dec->frame_refs[idx].mi_cols;
+ int lst_mi_rows = av1_dec->frame_refs[idx].mi_rows;
+ bool lst_intra_only =
+ IS_INTRA(av1_dec->frame_refs[idx].frame_type);
+
+ if (lst_mi_cols == cur_mi_cols
+ && lst_mi_rows == cur_mi_rows && !lst_intra_only) {
+ mf_types[ref_ind] = V4L2_AV1_REF_LAST_FRAME;
+ refs_selected[ref_ind++] = LST_BUF_IDX;
+ }
+ }
+ ref_stamp--;
+ }
+
+ idx = rockchip_vpu981_get_frame_index(ctx, BWD_BUF_IDX);
+ if (rockchip_vpu981_av1_dec_get_relative_dist
+ (ctx, bwd_frame_offset, cur_frame_offset) > 0) {
+ int bwd_mi_cols = av1_dec->frame_refs[idx].mi_cols;
+ int bwd_mi_rows = av1_dec->frame_refs[idx].mi_rows;
+ bool bwd_intra_only =
+ IS_INTRA(av1_dec->frame_refs[idx].frame_type);
+
+ if (bwd_mi_cols == cur_mi_cols && bwd_mi_rows == cur_mi_rows &&
+ !bwd_intra_only) {
+ mf_types[ref_ind] = V4L2_AV1_REF_BWDREF_FRAME;
+ refs_selected[ref_ind++] = BWD_BUF_IDX;
+ ref_stamp--;
+ }
+ }
+
+ idx = rockchip_vpu981_get_frame_index(ctx, ALT2_BUF_IDX);
+ if (rockchip_vpu981_av1_dec_get_relative_dist
+ (ctx, alt2_frame_offset, cur_frame_offset) > 0) {
+ int alt2_mi_cols = av1_dec->frame_refs[idx].mi_cols;
+ int alt2_mi_rows = av1_dec->frame_refs[idx].mi_rows;
+ bool alt2_intra_only =
+ IS_INTRA(av1_dec->frame_refs[idx].frame_type);
+
+ if (alt2_mi_cols == cur_mi_cols && alt2_mi_rows == cur_mi_rows
+ && !alt2_intra_only) {
+ mf_types[ref_ind] = V4L2_AV1_REF_ALTREF2_FRAME;
+ refs_selected[ref_ind++] = ALT2_BUF_IDX;
+ ref_stamp--;
+ }
+ }
+
+ idx = rockchip_vpu981_get_frame_index(ctx, ALT_BUF_IDX);
+ if (rockchip_vpu981_av1_dec_get_relative_dist
+ (ctx, alt_frame_offset, cur_frame_offset) > 0 && ref_stamp >= 0) {
+ int alt_mi_cols = av1_dec->frame_refs[idx].mi_cols;
+ int alt_mi_rows = av1_dec->frame_refs[idx].mi_rows;
+ bool alt_intra_only =
+ IS_INTRA(av1_dec->frame_refs[idx].frame_type);
+
+ if (alt_mi_cols == cur_mi_cols && alt_mi_rows == cur_mi_rows &&
+ !alt_intra_only) {
+ mf_types[ref_ind] = V4L2_AV1_REF_ALTREF_FRAME;
+ refs_selected[ref_ind++] = ALT_BUF_IDX;
+ ref_stamp--;
+ }
+ }
+
+ idx = rockchip_vpu981_get_frame_index(ctx, LST2_BUF_IDX);
+ if (idx >= 0 && ref_stamp >= 0) {
+ int lst2_mi_cols = av1_dec->frame_refs[idx].mi_cols;
+ int lst2_mi_rows = av1_dec->frame_refs[idx].mi_rows;
+ bool lst2_intra_only =
+ IS_INTRA(av1_dec->frame_refs[idx].frame_type);
+
+ if (lst2_mi_cols == cur_mi_cols && lst2_mi_rows == cur_mi_rows
+ && !lst2_intra_only) {
+ mf_types[ref_ind] = V4L2_AV1_REF_LAST2_FRAME;
+ refs_selected[ref_ind++] = LST2_BUF_IDX;
+ ref_stamp--;
+ }
+ }
+
+ for (rf = 0; rf < V4L2_AV1_TOTAL_REFS_PER_FRAME - 1; ++rf) {
+ idx = rockchip_vpu981_get_frame_index(ctx, rf);
+ if (idx >= 0) {
+ int rf_order_hint = rockchip_vpu981_get_order_hint(ctx, rf);
+
+ cur_offset[rf] =
+ rockchip_vpu981_av1_dec_get_relative_dist(ctx,
+ cur_frame_offset,
+ rf_order_hint);
+ cur_roffset[rf] =
+ rockchip_vpu981_av1_dec_get_relative_dist(ctx,
+ rf_order_hint,
+ cur_frame_offset);
+ } else {
+ cur_offset[rf] = 0;
+ cur_roffset[rf] = 0;
+ }
+ }
+
+ hantro_reg_write(vpu, &av1_use_temporal0_mvs, 0);
+ hantro_reg_write(vpu, &av1_use_temporal1_mvs, 0);
+ hantro_reg_write(vpu, &av1_use_temporal2_mvs, 0);
+ hantro_reg_write(vpu, &av1_use_temporal3_mvs, 0);
+
+ hantro_reg_write(vpu, &av1_mf1_last_offset, 0);
+ hantro_reg_write(vpu, &av1_mf1_last2_offset, 0);
+ hantro_reg_write(vpu, &av1_mf1_last3_offset, 0);
+ hantro_reg_write(vpu, &av1_mf1_golden_offset, 0);
+ hantro_reg_write(vpu, &av1_mf1_bwdref_offset, 0);
+ hantro_reg_write(vpu, &av1_mf1_altref2_offset, 0);
+ hantro_reg_write(vpu, &av1_mf1_altref_offset, 0);
+
+ if (use_ref_frame_mvs && ref_ind > 0 &&
+ cur_offset[mf_types[0] - V4L2_AV1_REF_LAST_FRAME] <= MAX_FRAME_DISTANCE
+ && cur_offset[mf_types[0] - V4L2_AV1_REF_LAST_FRAME] >= -MAX_FRAME_DISTANCE) {
+ int rf_order_hint = rockchip_vpu981_get_order_hint(ctx, refs_selected[0]);
+ int idx = rockchip_vpu981_get_frame_index(ctx, refs_selected[0]);
+ int val;
+
+ hantro_reg_write(vpu, &av1_use_temporal0_mvs, 1);
+
+ val = rockchip_vpu981_av1_dec_get_relative_dist(ctx,
+ rf_order_hint,
+ av1_dec->frame_refs[idx].order_hints[V4L2_AV1_REF_LAST_FRAME]);
+ hantro_reg_write(vpu, &av1_mf1_last_offset, val);
+
+ val = rockchip_vpu981_av1_dec_get_relative_dist(ctx,
+ rf_order_hint,
+ av1_dec->frame_refs[idx].order_hints[V4L2_AV1_REF_LAST2_FRAME]);
+ hantro_reg_write(vpu, &av1_mf1_last2_offset, val);
+
+ val = rockchip_vpu981_av1_dec_get_relative_dist(ctx,
+ rf_order_hint,
+ av1_dec->frame_refs[idx].order_hints[V4L2_AV1_REF_LAST3_FRAME]);
+ hantro_reg_write(vpu, &av1_mf1_last3_offset, val);
+
+ val = rockchip_vpu981_av1_dec_get_relative_dist(ctx,
+ rf_order_hint,
+ av1_dec->frame_refs[idx].order_hints[V4L2_AV1_REF_GOLDEN_FRAME]);
+ hantro_reg_write(vpu, &av1_mf1_golden_offset, val);
+
+ val = rockchip_vpu981_av1_dec_get_relative_dist(ctx,
+ rf_order_hint,
+ av1_dec->frame_refs[idx].order_hints[V4L2_AV1_REF_BWDREF_FRAME]);
+ hantro_reg_write(vpu, &av1_mf1_bwdref_offset, val);
+
+ val = rockchip_vpu981_av1_dec_get_relative_dist(ctx,
+ rf_order_hint,
+ av1_dec->frame_refs[idx].order_hints[V4L2_AV1_REF_ALTREF2_FRAME]);
+ hantro_reg_write(vpu, &av1_mf1_altref2_offset, val);
+
+ val = rockchip_vpu981_av1_dec_get_relative_dist(ctx,
+ rf_order_hint,
+ av1_dec->frame_refs[idx].order_hints[V4L2_AV1_REF_ALTREF_FRAME]);
+ hantro_reg_write(vpu, &av1_mf1_altref_offset, val);
+ }
+
+ hantro_reg_write(vpu, &av1_mf2_last_offset, 0);
+ hantro_reg_write(vpu, &av1_mf2_last2_offset, 0);
+ hantro_reg_write(vpu, &av1_mf2_last3_offset, 0);
+ hantro_reg_write(vpu, &av1_mf2_golden_offset, 0);
+ hantro_reg_write(vpu, &av1_mf2_bwdref_offset, 0);
+ hantro_reg_write(vpu, &av1_mf2_altref2_offset, 0);
+ hantro_reg_write(vpu, &av1_mf2_altref_offset, 0);
+
+ if (use_ref_frame_mvs && ref_ind > 1 &&
+ cur_offset[mf_types[1] - V4L2_AV1_REF_LAST_FRAME] <= MAX_FRAME_DISTANCE
+ && cur_offset[mf_types[1] - V4L2_AV1_REF_LAST_FRAME] >= -MAX_FRAME_DISTANCE) {
+ int rf_order_hint = rockchip_vpu981_get_order_hint(ctx, refs_selected[1]);
+ int idx = rockchip_vpu981_get_frame_index(ctx, refs_selected[1]);
+ int val;
+
+ hantro_reg_write(vpu, &av1_use_temporal1_mvs, 1);
+
+ val = rockchip_vpu981_av1_dec_get_relative_dist(ctx,
+ rf_order_hint,
+ av1_dec->frame_refs[idx].order_hints[V4L2_AV1_REF_LAST_FRAME]);
+ hantro_reg_write(vpu, &av1_mf2_last_offset, val);
+
+ val = rockchip_vpu981_av1_dec_get_relative_dist(ctx,
+ rf_order_hint,
+ av1_dec->frame_refs[idx].order_hints[V4L2_AV1_REF_LAST2_FRAME]);
+ hantro_reg_write(vpu, &av1_mf2_last2_offset, val);
+
+ val = rockchip_vpu981_av1_dec_get_relative_dist(ctx,
+ rf_order_hint,
+ av1_dec->frame_refs[idx].order_hints[V4L2_AV1_REF_LAST3_FRAME]);
+ hantro_reg_write(vpu, &av1_mf2_last3_offset, val);
+
+ val = rockchip_vpu981_av1_dec_get_relative_dist(ctx,
+ rf_order_hint,
+ av1_dec->frame_refs[idx].order_hints[V4L2_AV1_REF_GOLDEN_FRAME]);
+ hantro_reg_write(vpu, &av1_mf2_golden_offset, val);
+
+ val = rockchip_vpu981_av1_dec_get_relative_dist(ctx,
+ rf_order_hint,
+ av1_dec->frame_refs[idx].order_hints[V4L2_AV1_REF_BWDREF_FRAME]);
+ hantro_reg_write(vpu, &av1_mf2_bwdref_offset, val);
+
+ val = rockchip_vpu981_av1_dec_get_relative_dist(ctx,
+ rf_order_hint,
+ av1_dec->frame_refs[idx].order_hints[V4L2_AV1_REF_ALTREF2_FRAME]);
+ hantro_reg_write(vpu, &av1_mf2_altref2_offset, val);
+
+ val = rockchip_vpu981_av1_dec_get_relative_dist(ctx,
+ rf_order_hint,
+ av1_dec->frame_refs[idx].order_hints[V4L2_AV1_REF_ALTREF_FRAME]);
+ hantro_reg_write(vpu, &av1_mf2_altref_offset, val);
+ }
+
+ hantro_reg_write(vpu, &av1_mf3_last_offset, 0);
+ hantro_reg_write(vpu, &av1_mf3_last2_offset, 0);
+ hantro_reg_write(vpu, &av1_mf3_last3_offset, 0);
+ hantro_reg_write(vpu, &av1_mf3_golden_offset, 0);
+ hantro_reg_write(vpu, &av1_mf3_bwdref_offset, 0);
+ hantro_reg_write(vpu, &av1_mf3_altref2_offset, 0);
+ hantro_reg_write(vpu, &av1_mf3_altref_offset, 0);
+
+ if (use_ref_frame_mvs && ref_ind > 2 &&
+ cur_offset[mf_types[2] - V4L2_AV1_REF_LAST_FRAME] <= MAX_FRAME_DISTANCE
+ && cur_offset[mf_types[2] - V4L2_AV1_REF_LAST_FRAME] >= -MAX_FRAME_DISTANCE) {
+ int rf_order_hint = rockchip_vpu981_get_order_hint(ctx, refs_selected[2]);
+ int idx = rockchip_vpu981_get_frame_index(ctx, refs_selected[2]);
+ int val;
+
+ hantro_reg_write(vpu, &av1_use_temporal2_mvs, 1);
+
+ val = rockchip_vpu981_av1_dec_get_relative_dist(ctx,
+ rf_order_hint,
+ av1_dec->frame_refs[idx].order_hints[V4L2_AV1_REF_LAST_FRAME]);
+ hantro_reg_write(vpu, &av1_mf3_last_offset, val);
+
+ val = rockchip_vpu981_av1_dec_get_relative_dist(ctx,
+ rf_order_hint,
+ av1_dec->frame_refs[idx].order_hints[V4L2_AV1_REF_LAST2_FRAME]);
+ hantro_reg_write(vpu, &av1_mf3_last2_offset, val);
+
+ val = rockchip_vpu981_av1_dec_get_relative_dist(ctx,
+ rf_order_hint,
+ av1_dec->frame_refs[idx].order_hints[V4L2_AV1_REF_LAST3_FRAME]);
+ hantro_reg_write(vpu, &av1_mf3_last3_offset, val);
+
+ val = rockchip_vpu981_av1_dec_get_relative_dist(ctx,
+ rf_order_hint,
+ av1_dec->frame_refs[idx].order_hints[V4L2_AV1_REF_GOLDEN_FRAME]);
+ hantro_reg_write(vpu, &av1_mf3_golden_offset, val);
+
+ val = rockchip_vpu981_av1_dec_get_relative_dist(ctx,
+ rf_order_hint,
+ av1_dec->frame_refs[idx].order_hints[V4L2_AV1_REF_BWDREF_FRAME]);
+ hantro_reg_write(vpu, &av1_mf3_bwdref_offset, val);
+
+ val = rockchip_vpu981_av1_dec_get_relative_dist(ctx,
+ rf_order_hint,
+ av1_dec->frame_refs[idx].order_hints[V4L2_AV1_REF_ALTREF2_FRAME]);
+ hantro_reg_write(vpu, &av1_mf3_altref2_offset, val);
+
+ val = rockchip_vpu981_av1_dec_get_relative_dist(ctx,
+ rf_order_hint,
+ av1_dec->frame_refs[idx].order_hints[V4L2_AV1_REF_ALTREF_FRAME]);
+ hantro_reg_write(vpu, &av1_mf3_altref_offset, val);
+ }
+
+ hantro_reg_write(vpu, &av1_cur_last_offset, cur_offset[0]);
+ hantro_reg_write(vpu, &av1_cur_last2_offset, cur_offset[1]);
+ hantro_reg_write(vpu, &av1_cur_last3_offset, cur_offset[2]);
+ hantro_reg_write(vpu, &av1_cur_golden_offset, cur_offset[3]);
+ hantro_reg_write(vpu, &av1_cur_bwdref_offset, cur_offset[4]);
+ hantro_reg_write(vpu, &av1_cur_altref2_offset, cur_offset[5]);
+ hantro_reg_write(vpu, &av1_cur_altref_offset, cur_offset[6]);
+
+ hantro_reg_write(vpu, &av1_cur_last_roffset, cur_roffset[0]);
+ hantro_reg_write(vpu, &av1_cur_last2_roffset, cur_roffset[1]);
+ hantro_reg_write(vpu, &av1_cur_last3_roffset, cur_roffset[2]);
+ hantro_reg_write(vpu, &av1_cur_golden_roffset, cur_roffset[3]);
+ hantro_reg_write(vpu, &av1_cur_bwdref_roffset, cur_roffset[4]);
+ hantro_reg_write(vpu, &av1_cur_altref2_roffset, cur_roffset[5]);
+ hantro_reg_write(vpu, &av1_cur_altref_roffset, cur_roffset[6]);
+
+ hantro_reg_write(vpu, &av1_mf1_type, mf_types[0] - V4L2_AV1_REF_LAST_FRAME);
+ hantro_reg_write(vpu, &av1_mf2_type, mf_types[1] - V4L2_AV1_REF_LAST_FRAME);
+ hantro_reg_write(vpu, &av1_mf3_type, mf_types[2] - V4L2_AV1_REF_LAST_FRAME);
+}
+
+static void rockchip_vpu981_av1_dec_set_reference_frames(struct hantro_ctx *ctx)
+{
+ struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
+ struct hantro_av1_dec_ctrls *ctrls = &av1_dec->ctrls;
+ const struct v4l2_ctrl_av1_frame *frame = ctrls->frame;
+ int frame_type = frame->frame_type;
+ bool allow_intrabc = !!(ctrls->frame->flags & V4L2_AV1_FRAME_FLAG_ALLOW_INTRABC);
+ int ref_count[AV1DEC_MAX_PIC_BUFFERS] = { 0 };
+ struct hantro_dev *vpu = ctx->dev;
+ int i, ref_frames = 0;
+ bool scale_enable = false;
+
+ if (IS_INTRA(frame_type) && !allow_intrabc)
+ return;
+
+ if (!allow_intrabc) {
+ for (i = 0; i < V4L2_AV1_REFS_PER_FRAME; i++) {
+ int idx = rockchip_vpu981_get_frame_index(ctx, i);
+
+ if (idx >= 0)
+ ref_count[idx]++;
+ }
+
+ for (i = 0; i < AV1DEC_MAX_PIC_BUFFERS; i++) {
+ if (ref_count[i])
+ ref_frames++;
+ }
+ } else {
+ ref_frames = 1;
+ }
+ hantro_reg_write(vpu, &av1_ref_frames, ref_frames);
+
+ rockchip_vpu981_av1_dec_set_frame_sign_bias(ctx);
+
+ for (i = V4L2_AV1_REF_LAST_FRAME; i < V4L2_AV1_TOTAL_REFS_PER_FRAME; i++) {
+ uint32_t ref = i - 1;
+ int idx = 0;
+ int width, height;
+
+ if (allow_intrabc) {
+ idx = av1_dec->current_frame_index;
+ width = frame->frame_width_minus_1 + 1;
+ height = frame->frame_height_minus_1 + 1;
+ } else {
+ if (rockchip_vpu981_get_frame_index(ctx, ref) > 0)
+ idx = rockchip_vpu981_get_frame_index(ctx, ref);
+ width = av1_dec->frame_refs[idx].width;
+ height = av1_dec->frame_refs[idx].height;
+ }
+
+ scale_enable |=
+ rockchip_vpu981_av1_dec_set_ref(ctx, ref, idx, width,
+ height);
+
+ rockchip_vpu981_av1_dec_set_sign_bias(ctx, ref,
+ av1_dec->ref_frame_sign_bias[i]);
+ }
+ hantro_reg_write(vpu, &av1_ref_scaling_enable, scale_enable);
+
+ hantro_reg_write(vpu, &av1_ref0_gm_mode,
+ frame->global_motion.type[V4L2_AV1_REF_LAST_FRAME]);
+ hantro_reg_write(vpu, &av1_ref1_gm_mode,
+ frame->global_motion.type[V4L2_AV1_REF_LAST2_FRAME]);
+ hantro_reg_write(vpu, &av1_ref2_gm_mode,
+ frame->global_motion.type[V4L2_AV1_REF_LAST3_FRAME]);
+ hantro_reg_write(vpu, &av1_ref3_gm_mode,
+ frame->global_motion.type[V4L2_AV1_REF_GOLDEN_FRAME]);
+ hantro_reg_write(vpu, &av1_ref4_gm_mode,
+ frame->global_motion.type[V4L2_AV1_REF_BWDREF_FRAME]);
+ hantro_reg_write(vpu, &av1_ref5_gm_mode,
+ frame->global_motion.type[V4L2_AV1_REF_ALTREF2_FRAME]);
+ hantro_reg_write(vpu, &av1_ref6_gm_mode,
+ frame->global_motion.type[V4L2_AV1_REF_ALTREF_FRAME]);
+
+ rockchip_vpu981_av1_dec_set_other_frames(ctx);
+}
+
+static void rockchip_vpu981_av1_dec_set_parameters(struct hantro_ctx *ctx)
+{
+ struct hantro_dev *vpu = ctx->dev;
+ struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
+ struct hantro_av1_dec_ctrls *ctrls = &av1_dec->ctrls;
+
+ hantro_reg_write(vpu, &av1_skip_mode,
+ !!(ctrls->frame->flags & V4L2_AV1_FRAME_FLAG_SKIP_MODE_PRESENT));
+ hantro_reg_write(vpu, &av1_tempor_mvp_e,
+ !!(ctrls->frame->flags & V4L2_AV1_FRAME_FLAG_USE_REF_FRAME_MVS));
+ hantro_reg_write(vpu, &av1_delta_lf_res_log,
+ ctrls->frame->loop_filter.delta_lf_res);
+ hantro_reg_write(vpu, &av1_delta_lf_multi,
+ !!(ctrls->frame->loop_filter.flags
+ & V4L2_AV1_LOOP_FILTER_FLAG_DELTA_LF_MULTI));
+ hantro_reg_write(vpu, &av1_delta_lf_present,
+ !!(ctrls->frame->loop_filter.flags
+ & V4L2_AV1_LOOP_FILTER_FLAG_DELTA_LF_PRESENT));
+ hantro_reg_write(vpu, &av1_disable_cdf_update,
+ !!(ctrls->frame->flags & V4L2_AV1_FRAME_FLAG_DISABLE_CDF_UPDATE));
+ hantro_reg_write(vpu, &av1_allow_warp,
+ !!(ctrls->frame->flags & V4L2_AV1_FRAME_FLAG_ALLOW_WARPED_MOTION));
+ hantro_reg_write(vpu, &av1_show_frame,
+ !!(ctrls->frame->flags & V4L2_AV1_FRAME_FLAG_SHOW_FRAME));
+ hantro_reg_write(vpu, &av1_switchable_motion_mode,
+ !!(ctrls->frame->flags & V4L2_AV1_FRAME_FLAG_IS_MOTION_MODE_SWITCHABLE));
+ hantro_reg_write(vpu, &av1_enable_cdef,
+ !!(ctrls->sequence->flags & V4L2_AV1_SEQUENCE_FLAG_ENABLE_CDEF));
+ hantro_reg_write(vpu, &av1_allow_masked_compound,
+ !!(ctrls->sequence->flags
+ & V4L2_AV1_SEQUENCE_FLAG_ENABLE_MASKED_COMPOUND));
+ hantro_reg_write(vpu, &av1_allow_interintra,
+ !!(ctrls->sequence->flags
+ & V4L2_AV1_SEQUENCE_FLAG_ENABLE_INTERINTRA_COMPOUND));
+ hantro_reg_write(vpu, &av1_enable_intra_edge_filter,
+ !!(ctrls->sequence->flags
+ & V4L2_AV1_SEQUENCE_FLAG_ENABLE_INTRA_EDGE_FILTER));
+ hantro_reg_write(vpu, &av1_allow_filter_intra,
+ !!(ctrls->sequence->flags & V4L2_AV1_SEQUENCE_FLAG_ENABLE_FILTER_INTRA));
+ hantro_reg_write(vpu, &av1_enable_jnt_comp,
+ !!(ctrls->sequence->flags & V4L2_AV1_SEQUENCE_FLAG_ENABLE_JNT_COMP));
+ hantro_reg_write(vpu, &av1_enable_dual_filter,
+ !!(ctrls->sequence->flags & V4L2_AV1_SEQUENCE_FLAG_ENABLE_DUAL_FILTER));
+ hantro_reg_write(vpu, &av1_reduced_tx_set_used,
+ !!(ctrls->frame->flags & V4L2_AV1_FRAME_FLAG_REDUCED_TX_SET));
+ hantro_reg_write(vpu, &av1_allow_screen_content_tools,
+ !!(ctrls->frame->flags & V4L2_AV1_FRAME_FLAG_ALLOW_SCREEN_CONTENT_TOOLS));
+ hantro_reg_write(vpu, &av1_allow_intrabc,
+ !!(ctrls->frame->flags & V4L2_AV1_FRAME_FLAG_ALLOW_INTRABC));
+
+ if (!(ctrls->frame->flags & V4L2_AV1_FRAME_FLAG_ALLOW_SCREEN_CONTENT_TOOLS))
+ hantro_reg_write(vpu, &av1_force_interger_mv, 0);
+ else
+ hantro_reg_write(vpu, &av1_force_interger_mv,
+ !!(ctrls->frame->flags & V4L2_AV1_FRAME_FLAG_FORCE_INTEGER_MV));
+
+ hantro_reg_write(vpu, &av1_blackwhite_e, 0);
+ hantro_reg_write(vpu, &av1_delta_q_res_log, ctrls->frame->quantization.delta_q_res);
+ hantro_reg_write(vpu, &av1_delta_q_present,
+ !!(ctrls->frame->quantization.flags
+ & V4L2_AV1_QUANTIZATION_FLAG_DELTA_Q_PRESENT));
+
+ hantro_reg_write(vpu, &av1_idr_pic_e, !ctrls->frame->frame_type);
+ hantro_reg_write(vpu, &av1_quant_base_qindex, ctrls->frame->quantization.base_q_idx);
+ hantro_reg_write(vpu, &av1_bit_depth_y_minus8, ctx->bit_depth - 8);
+ hantro_reg_write(vpu, &av1_bit_depth_c_minus8, ctx->bit_depth - 8);
+
+ hantro_reg_write(vpu, &av1_mcomp_filt_type, ctrls->frame->interpolation_filter);
+ hantro_reg_write(vpu, &av1_high_prec_mv_e,
+ !!(ctrls->frame->flags & V4L2_AV1_FRAME_FLAG_ALLOW_HIGH_PRECISION_MV));
+ hantro_reg_write(vpu, &av1_comp_pred_mode,
+ (ctrls->frame->flags & V4L2_AV1_FRAME_FLAG_REFERENCE_SELECT) ? 2 : 0);
+ hantro_reg_write(vpu, &av1_transform_mode, (ctrls->frame->tx_mode == 1) ? 3 : 4);
+ hantro_reg_write(vpu, &av1_max_cb_size,
+ (ctrls->sequence->flags
+ & V4L2_AV1_SEQUENCE_FLAG_USE_128X128_SUPERBLOCK) ? 7 : 6);
+ hantro_reg_write(vpu, &av1_min_cb_size, 3);
+
+ hantro_reg_write(vpu, &av1_comp_pred_fixed_ref, 0);
+ hantro_reg_write(vpu, &av1_comp_pred_var_ref0_av1, 0);
+ hantro_reg_write(vpu, &av1_comp_pred_var_ref1_av1, 0);
+ hantro_reg_write(vpu, &av1_filt_level_seg0, 0);
+ hantro_reg_write(vpu, &av1_filt_level_seg1, 0);
+ hantro_reg_write(vpu, &av1_filt_level_seg2, 0);
+ hantro_reg_write(vpu, &av1_filt_level_seg3, 0);
+ hantro_reg_write(vpu, &av1_filt_level_seg4, 0);
+ hantro_reg_write(vpu, &av1_filt_level_seg5, 0);
+ hantro_reg_write(vpu, &av1_filt_level_seg6, 0);
+ hantro_reg_write(vpu, &av1_filt_level_seg7, 0);
+
+ hantro_reg_write(vpu, &av1_qp_delta_y_dc_av1, ctrls->frame->quantization.delta_q_y_dc);
+ hantro_reg_write(vpu, &av1_qp_delta_ch_dc_av1, ctrls->frame->quantization.delta_q_u_dc);
+ hantro_reg_write(vpu, &av1_qp_delta_ch_ac_av1, ctrls->frame->quantization.delta_q_u_ac);
+ if (ctrls->frame->quantization.flags & V4L2_AV1_QUANTIZATION_FLAG_USING_QMATRIX) {
+ hantro_reg_write(vpu, &av1_qmlevel_y, ctrls->frame->quantization.qm_y);
+ hantro_reg_write(vpu, &av1_qmlevel_u, ctrls->frame->quantization.qm_u);
+ hantro_reg_write(vpu, &av1_qmlevel_v, ctrls->frame->quantization.qm_v);
+ } else {
+ hantro_reg_write(vpu, &av1_qmlevel_y, 0xff);
+ hantro_reg_write(vpu, &av1_qmlevel_u, 0xff);
+ hantro_reg_write(vpu, &av1_qmlevel_v, 0xff);
+ }
+
+ hantro_reg_write(vpu, &av1_lossless_e, rockchip_vpu981_av1_dec_is_lossless(ctx));
+ hantro_reg_write(vpu, &av1_quant_delta_v_dc, ctrls->frame->quantization.delta_q_v_dc);
+ hantro_reg_write(vpu, &av1_quant_delta_v_ac, ctrls->frame->quantization.delta_q_v_ac);
+
+ hantro_reg_write(vpu, &av1_skip_ref0,
+ (ctrls->frame->skip_mode_frame[0]) ? ctrls->frame->skip_mode_frame[0] : 1);
+ hantro_reg_write(vpu, &av1_skip_ref1,
+ (ctrls->frame->skip_mode_frame[1]) ? ctrls->frame->skip_mode_frame[1] : 1);
+
+ hantro_write_addr(vpu, AV1_MC_SYNC_CURR, av1_dec->tile_buf.dma);
+ hantro_write_addr(vpu, AV1_MC_SYNC_LEFT, av1_dec->tile_buf.dma);
+}
+
+static void
+rockchip_vpu981_av1_dec_set_input_buffer(struct hantro_ctx *ctx,
+ struct vb2_v4l2_buffer *vb2_src)
+{
+ struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
+ struct hantro_av1_dec_ctrls *ctrls = &av1_dec->ctrls;
+ const struct v4l2_ctrl_av1_tile_group_entry *group_entry =
+ ctrls->tile_group_entry;
+ struct hantro_dev *vpu = ctx->dev;
+ dma_addr_t src_dma;
+ u32 src_len, src_buf_len;
+ int start_bit, offset;
+
+ src_dma = vb2_dma_contig_plane_dma_addr(&vb2_src->vb2_buf, 0);
+ src_len = vb2_get_plane_payload(&vb2_src->vb2_buf, 0);
+ src_buf_len = vb2_plane_size(&vb2_src->vb2_buf, 0);
+
+ start_bit = (group_entry[0].tile_offset & 0xf) * 8;
+ offset = group_entry[0].tile_offset & ~0xf;
+
+ hantro_reg_write(vpu, &av1_strm_buffer_len, src_buf_len);
+ hantro_reg_write(vpu, &av1_strm_start_bit, start_bit);
+ hantro_reg_write(vpu, &av1_stream_len, src_len);
+ hantro_reg_write(vpu, &av1_strm_start_offset, 0);
+ hantro_write_addr(vpu, AV1_INPUT_STREAM, src_dma + offset);
+}
+
+static void
+rockchip_vpu981_av1_dec_set_output_buffer(struct hantro_ctx *ctx)
+{
+ struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
+ struct hantro_dev *vpu = ctx->dev;
+ struct hantro_decoded_buffer *dst;
+ struct vb2_v4l2_buffer *vb2_dst;
+ dma_addr_t luma_addr, chroma_addr, mv_addr = 0;
+ size_t cr_offset = rockchip_vpu981_av1_dec_luma_size(ctx);
+ size_t mv_offset = rockchip_vpu981_av1_dec_chroma_size(ctx);
+
+ vb2_dst = av1_dec->frame_refs[av1_dec->current_frame_index].vb2_ref;
+ dst = vb2_to_hantro_decoded_buf(&vb2_dst->vb2_buf);
+ luma_addr = hantro_get_dec_buf_addr(ctx, &dst->base.vb.vb2_buf);
+ chroma_addr = luma_addr + cr_offset;
+ mv_addr = luma_addr + mv_offset;
+
+ hantro_write_addr(vpu, AV1_TILE_OUT_LU, luma_addr);
+ hantro_write_addr(vpu, AV1_TILE_OUT_CH, chroma_addr);
+ hantro_write_addr(vpu, AV1_TILE_OUT_MV, mv_addr);
+}
+
+int rockchip_vpu981_av1_dec_run(struct hantro_ctx *ctx)
+{
+ struct hantro_dev *vpu = ctx->dev;
+ struct vb2_v4l2_buffer *vb2_src;
+ int ret;
+
+ hantro_start_prepare_run(ctx);
+
+ ret = rockchip_vpu981_av1_dec_prepare_run(ctx);
+ if (ret)
+ goto prepare_error;
+
+ vb2_src = hantro_get_src_buf(ctx);
+ if (!vb2_src) {
+ ret = -EINVAL;
+ goto prepare_error;
+ }
+
+ rockchip_vpu981_av1_dec_clean_refs(ctx);
+ rockchip_vpu981_av1_dec_frame_ref(ctx, vb2_src->vb2_buf.timestamp);
+
+ rockchip_vpu981_av1_dec_set_parameters(ctx);
+ rockchip_vpu981_av1_dec_set_global_model(ctx);
+ rockchip_vpu981_av1_dec_set_tile_info(ctx);
+ rockchip_vpu981_av1_dec_set_reference_frames(ctx);
+ rockchip_vpu981_av1_dec_set_segmentation(ctx);
+ rockchip_vpu981_av1_dec_set_loopfilter(ctx);
+ rockchip_vpu981_av1_dec_set_picture_dimensions(ctx);
+ rockchip_vpu981_av1_dec_set_cdef(ctx);
+ rockchip_vpu981_av1_dec_set_lr(ctx);
+ rockchip_vpu981_av1_dec_set_prob(ctx);
+
+ hantro_reg_write(vpu, &av1_dec_mode, AV1_DEC_MODE);
+ hantro_reg_write(vpu, &av1_dec_out_ec_byte_word, 0);
+ hantro_reg_write(vpu, &av1_write_mvs_e, 1);
+ hantro_reg_write(vpu, &av1_dec_out_ec_bypass, 1);
+ hantro_reg_write(vpu, &av1_dec_clk_gate_e, 1);
+
+ hantro_reg_write(vpu, &av1_dec_abort_e, 0);
+ hantro_reg_write(vpu, &av1_dec_tile_int_e, 0);
+
+ hantro_reg_write(vpu, &av1_dec_alignment, 64);
+ hantro_reg_write(vpu, &av1_apf_disable, 0);
+ hantro_reg_write(vpu, &av1_apf_threshold, 8);
+ hantro_reg_write(vpu, &av1_dec_buswidth, 2);
+ hantro_reg_write(vpu, &av1_dec_max_burst, 16);
+ hantro_reg_write(vpu, &av1_error_conceal_e, 0);
+ hantro_reg_write(vpu, &av1_axi_rd_ostd_threshold, 64);
+ hantro_reg_write(vpu, &av1_axi_wr_ostd_threshold, 64);
+
+ hantro_reg_write(vpu, &av1_ext_timeout_cycles, 0xfffffff);
+ hantro_reg_write(vpu, &av1_ext_timeout_override_e, 1);
+ hantro_reg_write(vpu, &av1_timeout_cycles, 0xfffffff);
+ hantro_reg_write(vpu, &av1_timeout_override_e, 1);
+
+ rockchip_vpu981_av1_dec_set_output_buffer(ctx);
+ rockchip_vpu981_av1_dec_set_input_buffer(ctx, vb2_src);
+
+ hantro_end_prepare_run(ctx);
+
+ hantro_reg_write(vpu, &av1_dec_e, 1);
+
+ return 0;
+
+prepare_error:
+ hantro_end_prepare_run(ctx);
+ hantro_irq_done(vpu, VB2_BUF_STATE_ERROR);
+ return ret;
+}
+
+static void rockchip_vpu981_postproc_enable(struct hantro_ctx *ctx)
+{
+ struct hantro_dev *vpu = ctx->dev;
+ int width = ctx->dst_fmt.width;
+ int height = ctx->dst_fmt.height;
+ struct vb2_v4l2_buffer *vb2_dst;
+ size_t chroma_offset;
+ dma_addr_t dst_dma;
+
+ vb2_dst = hantro_get_dst_buf(ctx);
+
+ dst_dma = vb2_dma_contig_plane_dma_addr(&vb2_dst->vb2_buf, 0);
+ chroma_offset = ctx->dst_fmt.plane_fmt[0].bytesperline *
+ ctx->dst_fmt.height;
+
+ /* enable post processor */
+ hantro_reg_write(vpu, &av1_pp_out_e, 1);
+ hantro_reg_write(vpu, &av1_pp_in_format, 0);
+ hantro_reg_write(vpu, &av1_pp0_dup_hor, 1);
+ hantro_reg_write(vpu, &av1_pp0_dup_ver, 1);
+
+ hantro_reg_write(vpu, &av1_pp_in_height, height / 2);
+ hantro_reg_write(vpu, &av1_pp_in_width, width / 2);
+ hantro_reg_write(vpu, &av1_pp_out_height, height);
+ hantro_reg_write(vpu, &av1_pp_out_width, width);
+ hantro_reg_write(vpu, &av1_pp_out_y_stride,
+ ctx->dst_fmt.plane_fmt[0].bytesperline);
+ hantro_reg_write(vpu, &av1_pp_out_c_stride,
+ ctx->dst_fmt.plane_fmt[0].bytesperline);
+ switch (ctx->dst_fmt.pixelformat) {
+ case V4L2_PIX_FMT_P010:
+ hantro_reg_write(vpu, &av1_pp_out_format, 1);
+ break;
+ case V4L2_PIX_FMT_NV12:
+ hantro_reg_write(vpu, &av1_pp_out_format, 3);
+ break;
+ default:
+ hantro_reg_write(vpu, &av1_pp_out_format, 0);
+ }
+
+ hantro_reg_write(vpu, &av1_ppd_blend_exist, 0);
+ hantro_reg_write(vpu, &av1_ppd_dith_exist, 0);
+ hantro_reg_write(vpu, &av1_ablend_crop_e, 0);
+ hantro_reg_write(vpu, &av1_pp_format_customer1_e, 0);
+ hantro_reg_write(vpu, &av1_pp_crop_exist, 0);
+ hantro_reg_write(vpu, &av1_pp_up_level, 0);
+ hantro_reg_write(vpu, &av1_pp_down_level, 0);
+ hantro_reg_write(vpu, &av1_pp_exist, 0);
+
+ hantro_write_addr(vpu, AV1_PP_OUT_LU, dst_dma);
+ hantro_write_addr(vpu, AV1_PP_OUT_CH, dst_dma + chroma_offset);
+}
+
+static void rockchip_vpu981_postproc_disable(struct hantro_ctx *ctx)
+{
+ struct hantro_dev *vpu = ctx->dev;
+
+ /* disable post processor */
+ hantro_reg_write(vpu, &av1_pp_out_e, 0);
+}
+
+const struct hantro_postproc_ops rockchip_vpu981_postproc_ops = {
+ .enable = rockchip_vpu981_postproc_enable,
+ .disable = rockchip_vpu981_postproc_disable,
+};
diff --git a/drivers/media/platform/verisilicon/rockchip_vpu981_regs.h b/drivers/media/platform/verisilicon/rockchip_vpu981_regs.h
new file mode 100644
index 000000000000..182e6c830ff6
--- /dev/null
+++ b/drivers/media/platform/verisilicon/rockchip_vpu981_regs.h
@@ -0,0 +1,477 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (c) 2022, Collabora
+ *
+ * Author: Benjamin Gaignard <[email protected]>
+ */
+
+#ifndef _ROCKCHIP_VPU981_REGS_H_
+#define _ROCKCHIP_VPU981_REGS_H_
+
+#include "hantro.h"
+
+#define AV1_SWREG(nr) ((nr) * 4)
+
+#define AV1_DEC_REG(b, s, m) \
+ ((const struct hantro_reg) { \
+ .base = AV1_SWREG(b), \
+ .shift = s, \
+ .mask = m, \
+ })
+
+#define AV1_REG_INTERRUPT AV1_SWREG(1)
+#define AV1_REG_INTERRUPT_DEC_RDY_INT BIT(12)
+
+#define AV1_REG_CONFIG AV1_SWREG(2)
+#define AV1_REG_CONFIG_DEC_CLK_GATE_E BIT(10)
+
+#define av1_dec_e AV1_DEC_REG(1, 0, 0x1)
+#define av1_dec_abort_e AV1_DEC_REG(1, 5, 0x1)
+#define av1_dec_tile_int_e AV1_DEC_REG(1, 7, 0x1)
+
+#define av1_dec_clk_gate_e AV1_DEC_REG(2, 10, 0x1)
+
+#define av1_dec_out_ec_bypass AV1_DEC_REG(3, 8, 0x1)
+#define av1_write_mvs_e AV1_DEC_REG(3, 12, 0x1)
+#define av1_filtering_dis AV1_DEC_REG(3, 14, 0x1)
+#define av1_dec_out_dis AV1_DEC_REG(3, 15, 0x1)
+#define av1_dec_out_ec_byte_word AV1_DEC_REG(3, 16, 0x1)
+#define av1_skip_mode AV1_DEC_REG(3, 26, 0x1)
+#define av1_dec_mode AV1_DEC_REG(3, 27, 0x1f)
+
+#define av1_ref_frames AV1_DEC_REG(4, 0, 0xf)
+#define av1_pic_height_in_cbs AV1_DEC_REG(4, 6, 0x1fff)
+#define av1_pic_width_in_cbs AV1_DEC_REG(4, 19, 0x1fff)
+
+#define av1_ref_scaling_enable AV1_DEC_REG(5, 0, 0x1)
+#define av1_filt_level_base_gt32 AV1_DEC_REG(5, 1, 0x1)
+#define av1_error_resilient AV1_DEC_REG(5, 2, 0x1)
+#define av1_force_interger_mv AV1_DEC_REG(5, 3, 0x1)
+#define av1_allow_intrabc AV1_DEC_REG(5, 4, 0x1)
+#define av1_allow_screen_content_tools AV1_DEC_REG(5, 5, 0x1)
+#define av1_reduced_tx_set_used AV1_DEC_REG(5, 6, 0x1)
+#define av1_enable_dual_filter AV1_DEC_REG(5, 7, 0x1)
+#define av1_enable_jnt_comp AV1_DEC_REG(5, 8, 0x1)
+#define av1_allow_filter_intra AV1_DEC_REG(5, 9, 0x1)
+#define av1_enable_intra_edge_filter AV1_DEC_REG(5, 10, 0x1)
+#define av1_tempor_mvp_e AV1_DEC_REG(5, 11, 0x1)
+#define av1_allow_interintra AV1_DEC_REG(5, 12, 0x1)
+#define av1_allow_masked_compound AV1_DEC_REG(5, 13, 0x1)
+#define av1_enable_cdef AV1_DEC_REG(5, 14, 0x1)
+#define av1_switchable_motion_mode AV1_DEC_REG(5, 15, 0x1)
+#define av1_show_frame AV1_DEC_REG(5, 16, 0x1)
+#define av1_superres_is_scaled AV1_DEC_REG(5, 17, 0x1)
+#define av1_allow_warp AV1_DEC_REG(5, 18, 0x1)
+#define av1_disable_cdf_update AV1_DEC_REG(5, 19, 0x1)
+#define av1_preskip_segid AV1_DEC_REG(5, 20, 0x1)
+#define av1_delta_lf_present AV1_DEC_REG(5, 21, 0x1)
+#define av1_delta_lf_multi AV1_DEC_REG(5, 22, 0x1)
+#define av1_delta_lf_res_log AV1_DEC_REG(5, 23, 0x3)
+#define av1_strm_start_bit AV1_DEC_REG(5, 25, 0x7f)
+
+#define av1_stream_len AV1_DEC_REG(6, 0, 0xffffffff)
+
+#define av1_delta_q_present AV1_DEC_REG(7, 0, 0x1)
+#define av1_delta_q_res_log AV1_DEC_REG(7, 1, 0x3)
+#define av1_cdef_damping AV1_DEC_REG(7, 3, 0x3)
+#define av1_cdef_bits AV1_DEC_REG(7, 5, 0x3)
+#define av1_apply_grain AV1_DEC_REG(7, 7, 0x1)
+#define av1_num_y_points_b AV1_DEC_REG(7, 8, 0x1)
+#define av1_num_cb_points_b AV1_DEC_REG(7, 9, 0x1)
+#define av1_num_cr_points_b AV1_DEC_REG(7, 10, 0x1)
+#define av1_overlap_flag AV1_DEC_REG(7, 11, 0x1)
+#define av1_clip_to_restricted_range AV1_DEC_REG(7, 12, 0x1)
+#define av1_chroma_scaling_from_luma AV1_DEC_REG(7, 13, 0x1)
+#define av1_random_seed AV1_DEC_REG(7, 14, 0xffff)
+#define av1_blackwhite_e AV1_DEC_REG(7, 30, 0x1)
+
+#define av1_scaling_shift AV1_DEC_REG(8, 0, 0xf)
+#define av1_bit_depth_c_minus8 AV1_DEC_REG(8, 4, 0x3)
+#define av1_bit_depth_y_minus8 AV1_DEC_REG(8, 6, 0x3)
+#define av1_quant_base_qindex AV1_DEC_REG(8, 8, 0xff)
+#define av1_idr_pic_e AV1_DEC_REG(8, 16, 0x1)
+#define av1_superres_pic_width AV1_DEC_REG(8, 17, 0x7fff)
+
+#define av1_ref4_sign_bias AV1_DEC_REG(9, 2, 0x1)
+#define av1_ref5_sign_bias AV1_DEC_REG(9, 3, 0x1)
+#define av1_ref6_sign_bias AV1_DEC_REG(9, 4, 0x1)
+#define av1_mf1_type AV1_DEC_REG(9, 5, 0x7)
+#define av1_mf2_type AV1_DEC_REG(9, 8, 0x7)
+#define av1_mf3_type AV1_DEC_REG(9, 11, 0x7)
+#define av1_scale_denom_minus9 AV1_DEC_REG(9, 14, 0x7)
+#define av1_last_active_seg AV1_DEC_REG(9, 17, 0x7)
+#define av1_context_update_tile_id AV1_DEC_REG(9, 20, 0xfff)
+
+#define av1_tile_transpose AV1_DEC_REG(10, 0, 0x1)
+#define av1_tile_enable AV1_DEC_REG(10, 1, 0x1)
+#define av1_multicore_full_width AV1_DEC_REG(10, 2, 0xff)
+#define av1_num_tile_rows_8k AV1_DEC_REG(10, 10, 0x7f)
+#define av1_num_tile_cols_8k AV1_DEC_REG(10, 17, 0x7f)
+#define av1_multicore_tile_start_x AV1_DEC_REG(10, 24, 0xff)
+
+#define av1_use_temporal3_mvs AV1_DEC_REG(11, 0, 0x1)
+#define av1_use_temporal2_mvs AV1_DEC_REG(11, 1, 0x1)
+#define av1_use_temporal1_mvs AV1_DEC_REG(11, 2, 0x1)
+#define av1_use_temporal0_mvs AV1_DEC_REG(11, 3, 0x1)
+#define av1_comp_pred_mode AV1_DEC_REG(11, 4, 0x3)
+#define av1_high_prec_mv_e AV1_DEC_REG(11, 7, 0x1)
+#define av1_mcomp_filt_type AV1_DEC_REG(11, 8, 0x7)
+#define av1_multicore_expect_context_update AV1_DEC_REG(11, 11, 0x1)
+#define av1_multicore_sbx_offset AV1_DEC_REG(11, 12, 0x7f)
+#define av1_ulticore_tile_col AV1_DEC_REG(11, 19, 0x7f)
+#define av1_transform_mode AV1_DEC_REG(11, 27, 0x7)
+#define av1_dec_tile_size_mag AV1_DEC_REG(11, 30, 0x3)
+
+#define av1_seg_quant_sign AV1_DEC_REG(12, 2, 0xff)
+#define av1_max_cb_size AV1_DEC_REG(12, 10, 0x7)
+#define av1_min_cb_size AV1_DEC_REG(12, 13, 0x7)
+#define av1_comp_pred_fixed_ref AV1_DEC_REG(12, 16, 0x7)
+#define av1_multicore_tile_width AV1_DEC_REG(12, 19, 0x7f)
+#define av1_pic_height_pad AV1_DEC_REG(12, 26, 0x7)
+#define av1_pic_width_pad AV1_DEC_REG(12, 29, 0x7)
+
+#define av1_segment_e AV1_DEC_REG(13, 0, 0x1)
+#define av1_segment_upd_e AV1_DEC_REG(13, 1, 0x1)
+#define av1_segment_temp_upd_e AV1_DEC_REG(13, 2, 0x1)
+#define av1_comp_pred_var_ref0_av1 AV1_DEC_REG(13, 3, 0x7)
+#define av1_comp_pred_var_ref1_av1 AV1_DEC_REG(13, 6, 0x7)
+#define av1_lossless_e AV1_DEC_REG(13, 9, 0x1)
+#define av1_qp_delta_ch_ac_av1 AV1_DEC_REG(13, 11, 0x7f)
+#define av1_qp_delta_ch_dc_av1 AV1_DEC_REG(13, 18, 0x7f)
+#define av1_qp_delta_y_dc_av1 AV1_DEC_REG(13, 25, 0x7f)
+
+#define av1_quant_seg0 AV1_DEC_REG(14, 0, 0xff)
+#define av1_filt_level_seg0 AV1_DEC_REG(14, 8, 0x3f)
+#define av1_skip_seg0 AV1_DEC_REG(14, 14, 0x1)
+#define av1_refpic_seg0 AV1_DEC_REG(14, 15, 0xf)
+#define av1_filt_level_delta0_seg0 AV1_DEC_REG(14, 19, 0x7f)
+#define av1_filt_level0 AV1_DEC_REG(14, 26, 0x3f)
+
+#define av1_quant_seg1 AV1_DEC_REG(15, 0, 0xff)
+#define av1_filt_level_seg1 AV1_DEC_REG(15, 8, 0x3f)
+#define av1_skip_seg1 AV1_DEC_REG(15, 14, 0x1)
+#define av1_refpic_seg1 AV1_DEC_REG(15, 15, 0xf)
+#define av1_filt_level_delta0_seg1 AV1_DEC_REG(15, 19, 0x7f)
+#define av1_filt_level1 AV1_DEC_REG(15, 26, 0x3f)
+
+#define av1_quant_seg2 AV1_DEC_REG(16, 0, 0xff)
+#define av1_filt_level_seg2 AV1_DEC_REG(16, 8, 0x3f)
+#define av1_skip_seg2 AV1_DEC_REG(16, 14, 0x1)
+#define av1_refpic_seg2 AV1_DEC_REG(16, 15, 0xf)
+#define av1_filt_level_delta0_seg2 AV1_DEC_REG(16, 19, 0x7f)
+#define av1_filt_level2 AV1_DEC_REG(16, 26, 0x3f)
+
+#define av1_quant_seg3 AV1_DEC_REG(17, 0, 0xff)
+#define av1_filt_level_seg3 AV1_DEC_REG(17, 8, 0x3f)
+#define av1_skip_seg3 AV1_DEC_REG(17, 14, 0x1)
+#define av1_refpic_seg3 AV1_DEC_REG(17, 15, 0xf)
+#define av1_filt_level_delta0_seg3 AV1_DEC_REG(17, 19, 0x7f)
+#define av1_filt_level3 AV1_DEC_REG(17, 26, 0x3f)
+
+#define av1_quant_seg4 AV1_DEC_REG(18, 0, 0xff)
+#define av1_filt_level_seg4 AV1_DEC_REG(18, 8, 0x3f)
+#define av1_skip_seg4 AV1_DEC_REG(18, 14, 0x1)
+#define av1_refpic_seg4 AV1_DEC_REG(18, 15, 0xf)
+#define av1_filt_level_delta0_seg4 AV1_DEC_REG(18, 19, 0x7f)
+#define av1_lr_type AV1_DEC_REG(18, 26, 0x3f)
+
+#define av1_quant_seg5 AV1_DEC_REG(19, 0, 0xff)
+#define av1_filt_level_seg5 AV1_DEC_REG(19, 8, 0x3f)
+#define av1_skip_seg5 AV1_DEC_REG(19, 14, 0x1)
+#define av1_refpic_seg5 AV1_DEC_REG(19, 15, 0xf)
+#define av1_filt_level_delta0_seg5 AV1_DEC_REG(19, 19, 0x7f)
+#define av1_lr_unit_size AV1_DEC_REG(19, 26, 0x3f)
+
+#define av1_filt_level_delta1_seg0 AV1_DEC_REG(20, 0, 0x7f)
+#define av1_filt_level_delta2_seg0 AV1_DEC_REG(20, 7, 0x7f)
+#define av1_filt_level_delta3_seg0 AV1_DEC_REG(20, 14, 0x7f)
+#define av1_global_mv_seg0 AV1_DEC_REG(20, 21, 0x1)
+#define av1_mf1_last_offset AV1_DEC_REG(20, 22, 0x1ff)
+
+#define av1_filt_level_delta1_seg1 AV1_DEC_REG(21, 0, 0x7f)
+#define av1_filt_level_delta2_seg1 AV1_DEC_REG(21, 7, 0x7f)
+#define av1_filt_level_delta3_seg1 AV1_DEC_REG(21, 14, 0x7f)
+#define av1_global_mv_seg1 AV1_DEC_REG(21, 21, 0x1)
+#define av1_mf1_last2_offset AV1_DEC_REG(21, 22, 0x1ff)
+
+#define av1_filt_level_delta1_seg2 AV1_DEC_REG(22, 0, 0x7f)
+#define av1_filt_level_delta2_seg2 AV1_DEC_REG(22, 7, 0x7f)
+#define av1_filt_level_delta3_seg2 AV1_DEC_REG(22, 14, 0x7f)
+#define av1_global_mv_seg2 AV1_DEC_REG(22, 21, 0x1)
+#define av1_mf1_last3_offset AV1_DEC_REG(22, 22, 0x1ff)
+
+#define av1_filt_level_delta1_seg3 AV1_DEC_REG(23, 0, 0x7f)
+#define av1_filt_level_delta2_seg3 AV1_DEC_REG(23, 7, 0x7f)
+#define av1_filt_level_delta3_seg3 AV1_DEC_REG(23, 14, 0x7f)
+#define av1_global_mv_seg3 AV1_DEC_REG(23, 21, 0x1)
+#define av1_mf1_golden_offset AV1_DEC_REG(23, 22, 0x1ff)
+
+#define av1_filt_level_delta1_seg4 AV1_DEC_REG(24, 0, 0x7f)
+#define av1_filt_level_delta2_seg4 AV1_DEC_REG(24, 7, 0x7f)
+#define av1_filt_level_delta3_seg4 AV1_DEC_REG(24, 14, 0x7f)
+#define av1_global_mv_seg4 AV1_DEC_REG(24, 21, 0x1)
+#define av1_mf1_bwdref_offset AV1_DEC_REG(24, 22, 0x1ff)
+
+#define av1_filt_level_delta1_seg5 AV1_DEC_REG(25, 0, 0x7f)
+#define av1_filt_level_delta2_seg5 AV1_DEC_REG(25, 7, 0x7f)
+#define av1_filt_level_delta3_seg5 AV1_DEC_REG(25, 14, 0x7f)
+#define av1_global_mv_seg5 AV1_DEC_REG(25, 21, 0x1)
+#define av1_mf1_altref2_offset AV1_DEC_REG(25, 22, 0x1ff)
+
+#define av1_filt_level_delta1_seg6 AV1_DEC_REG(26, 0, 0x7f)
+#define av1_filt_level_delta2_seg6 AV1_DEC_REG(26, 7, 0x7f)
+#define av1_filt_level_delta3_seg6 AV1_DEC_REG(26, 14, 0x7f)
+#define av1_global_mv_seg6 AV1_DEC_REG(26, 21, 0x1)
+#define av1_mf1_altref_offset AV1_DEC_REG(26, 22, 0x1ff)
+
+#define av1_filt_level_delta1_seg7 AV1_DEC_REG(27, 0, 0x7f)
+#define av1_filt_level_delta2_seg7 AV1_DEC_REG(27, 7, 0x7f)
+#define av1_filt_level_delta3_seg7 AV1_DEC_REG(27, 14, 0x7f)
+#define av1_global_mv_seg7 AV1_DEC_REG(27, 21, 0x1)
+#define av1_mf2_last_offset AV1_DEC_REG(27, 22, 0x1ff)
+
+#define av1_cb_offset AV1_DEC_REG(28, 0, 0x1ff)
+#define av1_cb_luma_mult AV1_DEC_REG(28, 9, 0xff)
+#define av1_cb_mult AV1_DEC_REG(28, 17, 0xff)
+#define av1_quant_delta_v_dc AV1_DEC_REG(28, 25, 0x7f)
+
+#define av1_cr_offset AV1_DEC_REG(29, 0, 0x1ff)
+#define av1_cr_luma_mult AV1_DEC_REG(29, 9, 0xff)
+#define av1_cr_mult AV1_DEC_REG(29, 17, 0xff)
+#define av1_quant_delta_v_ac AV1_DEC_REG(29, 25, 0x7f)
+
+#define av1_filt_ref_adj_5 AV1_DEC_REG(30, 0, 0x7f)
+#define av1_filt_ref_adj_4 AV1_DEC_REG(30, 7, 0x7f)
+#define av1_filt_mb_adj_1 AV1_DEC_REG(30, 14, 0x7f)
+#define av1_filt_mb_adj_0 AV1_DEC_REG(30, 21, 0x7f)
+#define av1_filt_sharpness AV1_DEC_REG(30, 28, 0x7)
+
+#define av1_quant_seg6 AV1_DEC_REG(31, 0, 0xff)
+#define av1_filt_level_seg6 AV1_DEC_REG(31, 8, 0x3f)
+#define av1_skip_seg6 AV1_DEC_REG(31, 14, 0x1)
+#define av1_refpic_seg6 AV1_DEC_REG(31, 15, 0xf)
+#define av1_filt_level_delta0_seg6 AV1_DEC_REG(31, 19, 0x7f)
+#define av1_skip_ref0 AV1_DEC_REG(31, 26, 0xf)
+
+#define av1_quant_seg7 AV1_DEC_REG(32, 0, 0xff)
+#define av1_filt_level_seg7 AV1_DEC_REG(32, 8, 0x3f)
+#define av1_skip_seg7 AV1_DEC_REG(32, 14, 0x1)
+#define av1_refpic_seg7 AV1_DEC_REG(32, 15, 0xf)
+#define av1_filt_level_delta0_seg7 AV1_DEC_REG(32, 19, 0x7f)
+#define av1_skip_ref1 AV1_DEC_REG(32, 26, 0xf)
+
+#define av1_ref0_height AV1_DEC_REG(33, 0, 0xffff)
+#define av1_ref0_width AV1_DEC_REG(33, 16, 0xffff)
+
+#define av1_ref1_height AV1_DEC_REG(34, 0, 0xffff)
+#define av1_ref1_width AV1_DEC_REG(34, 16, 0xffff)
+
+#define av1_ref2_height AV1_DEC_REG(35, 0, 0xffff)
+#define av1_ref2_width AV1_DEC_REG(35, 16, 0xffff)
+
+#define av1_ref0_ver_scale AV1_DEC_REG(36, 0, 0xffff)
+#define av1_ref0_hor_scale AV1_DEC_REG(36, 16, 0xffff)
+
+#define av1_ref1_ver_scale AV1_DEC_REG(37, 0, 0xffff)
+#define av1_ref1_hor_scale AV1_DEC_REG(37, 16, 0xffff)
+
+#define av1_ref2_ver_scale AV1_DEC_REG(38, 0, 0xffff)
+#define av1_ref2_hor_scale AV1_DEC_REG(38, 16, 0xffff)
+
+#define av1_ref3_ver_scale AV1_DEC_REG(39, 0, 0xffff)
+#define av1_ref3_hor_scale AV1_DEC_REG(39, 16, 0xffff)
+
+#define av1_ref4_ver_scale AV1_DEC_REG(40, 0, 0xffff)
+#define av1_ref4_hor_scale AV1_DEC_REG(40, 16, 0xffff)
+
+#define av1_ref5_ver_scale AV1_DEC_REG(41, 0, 0xffff)
+#define av1_ref5_hor_scale AV1_DEC_REG(41, 16, 0xffff)
+
+#define av1_ref6_ver_scale AV1_DEC_REG(42, 0, 0xffff)
+#define av1_ref6_hor_scale AV1_DEC_REG(42, 16, 0xffff)
+
+#define av1_ref3_height AV1_DEC_REG(43, 0, 0xffff)
+#define av1_ref3_width AV1_DEC_REG(43, 16, 0xffff)
+
+#define av1_ref4_height AV1_DEC_REG(44, 0, 0xffff)
+#define av1_ref4_width AV1_DEC_REG(44, 16, 0xffff)
+
+#define av1_ref5_height AV1_DEC_REG(45, 0, 0xffff)
+#define av1_ref5_width AV1_DEC_REG(45, 16, 0xffff)
+
+#define av1_ref6_height AV1_DEC_REG(46, 0, 0xffff)
+#define av1_ref6_width AV1_DEC_REG(46, 16, 0xffff)
+
+#define av1_mf2_last2_offset AV1_DEC_REG(47, 0, 0x1ff)
+#define av1_mf2_last3_offset AV1_DEC_REG(47, 9, 0x1ff)
+#define av1_mf2_golden_offset AV1_DEC_REG(47, 18, 0x1ff)
+#define av1_qmlevel_y AV1_DEC_REG(47, 27, 0xf)
+
+#define av1_mf2_bwdref_offset AV1_DEC_REG(48, 0, 0x1ff)
+#define av1_mf2_altref2_offset AV1_DEC_REG(48, 9, 0x1ff)
+#define av1_mf2_altref_offset AV1_DEC_REG(48, 18, 0x1ff)
+#define av1_qmlevel_u AV1_DEC_REG(48, 27, 0xf)
+
+#define av1_filt_ref_adj_6 AV1_DEC_REG(49, 0, 0x7f)
+#define av1_filt_ref_adj_7 AV1_DEC_REG(49, 7, 0x7f)
+#define av1_qmlevel_v AV1_DEC_REG(49, 14, 0xf)
+
+#define av1_superres_chroma_step AV1_DEC_REG(51, 0, 0x3fff)
+#define av1_superres_luma_step AV1_DEC_REG(51, 14, 0x3fff)
+
+#define av1_superres_init_chroma_subpel_x AV1_DEC_REG(52, 0, 0x3fff)
+#define av1_superres_init_luma_subpel_x AV1_DEC_REG(52, 14, 0x3fff)
+
+#define av1_cdef_chroma_secondary_strength AV1_DEC_REG(53, 0, 0xffff)
+#define av1_cdef_luma_secondary_strength AV1_DEC_REG(53, 16, 0xffff)
+
+#define av1_apf_threshold AV1_DEC_REG(55, 0, 0xffff)
+#define av1_apf_single_pu_mode AV1_DEC_REG(55, 30, 0x1)
+#define av1_apf_disable AV1_DEC_REG(55, 30, 0x1)
+
+#define av1_dec_max_burst AV1_DEC_REG(58, 0, 0xff)
+#define av1_dec_buswidth AV1_DEC_REG(58, 8, 0x7)
+#define av1_dec_multicore_mode AV1_DEC_REG(58, 11, 0x3)
+#define av1_dec_axi_wd_id_e AV1_DEC_REG(58, 13, 0x1)
+#define av1_dec_axi_rd_id_e AV1_DEC_REG(58, 14, 0x1)
+#define av1_dec_mc_polltime AV1_DEC_REG(58, 17, 0x3ff)
+#define av1_dec_mc_pollmode AV1_DEC_REG(58, 27, 0x3)
+
+#define av1_filt_ref_adj_3 AV1_DEC_REG(59, 0, 0x3f)
+#define av1_filt_ref_adj_2 AV1_DEC_REG(59, 7, 0x3f)
+#define av1_filt_ref_adj_1 AV1_DEC_REG(59, 14, 0x3f)
+#define av1_filt_ref_adj_0 AV1_DEC_REG(59, 21, 0x3f)
+#define av1_ref0_sign_bias AV1_DEC_REG(59, 28, 0x1)
+#define av1_ref1_sign_bias AV1_DEC_REG(59, 29, 0x1)
+#define av1_ref2_sign_bias AV1_DEC_REG(59, 30, 0x1)
+#define av1_ref3_sign_bias AV1_DEC_REG(59, 31, 0x1)
+
+#define av1_cur_last_roffset AV1_DEC_REG(184, 0, 0x1ff)
+#define av1_cur_last_offset AV1_DEC_REG(184, 9, 0x1ff)
+#define av1_mf3_last_offset AV1_DEC_REG(184, 18, 0x1ff)
+#define av1_ref0_gm_mode AV1_DEC_REG(184, 27, 0x3)
+
+#define av1_cur_last2_roffset AV1_DEC_REG(185, 0, 0x1ff)
+#define av1_cur_last2_offset AV1_DEC_REG(185, 9, 0x1ff)
+#define av1_mf3_last2_offset AV1_DEC_REG(185, 18, 0x1ff)
+#define av1_ref1_gm_mode AV1_DEC_REG(185, 27, 0x3)
+
+#define av1_cur_last3_roffset AV1_DEC_REG(186, 0, 0x1ff)
+#define av1_cur_last3_offset AV1_DEC_REG(186, 9, 0x1ff)
+#define av1_mf3_last3_offset AV1_DEC_REG(186, 18, 0x1ff)
+#define av1_ref2_gm_mode AV1_DEC_REG(186, 27, 0x3)
+
+#define av1_cur_golden_roffset AV1_DEC_REG(187, 0, 0x1ff)
+#define av1_cur_golden_offset AV1_DEC_REG(187, 9, 0x1ff)
+#define av1_mf3_golden_offset AV1_DEC_REG(187, 18, 0x1ff)
+#define av1_ref3_gm_mode AV1_DEC_REG(187, 27, 0x3)
+
+#define av1_cur_bwdref_roffset AV1_DEC_REG(188, 0, 0x1ff)
+#define av1_cur_bwdref_offset AV1_DEC_REG(188, 9, 0x1ff)
+#define av1_mf3_bwdref_offset AV1_DEC_REG(188, 18, 0x1ff)
+#define av1_ref4_gm_mode AV1_DEC_REG(188, 27, 0x3)
+
+#define av1_cur_altref2_roffset AV1_DEC_REG(257, 0, 0x1ff)
+#define av1_cur_altref2_offset AV1_DEC_REG(257, 9, 0x1ff)
+#define av1_mf3_altref2_offset AV1_DEC_REG(257, 18, 0x1ff)
+#define av1_ref5_gm_mode AV1_DEC_REG(257, 27, 0x3)
+
+#define av1_strm_buffer_len AV1_DEC_REG(258, 0, 0xffffffff)
+
+#define av1_strm_start_offset AV1_DEC_REG(259, 0, 0xffffffff)
+
+#define av1_ppd_blend_exist AV1_DEC_REG(260, 21, 0x1)
+#define av1_ppd_dith_exist AV1_DEC_REG(260, 23, 0x1)
+#define av1_ablend_crop_e AV1_DEC_REG(260, 24, 0x1)
+#define av1_pp_format_p010_e AV1_DEC_REG(260, 25, 0x1)
+#define av1_pp_format_customer1_e AV1_DEC_REG(260, 26, 0x1)
+#define av1_pp_crop_exist AV1_DEC_REG(260, 27, 0x1)
+#define av1_pp_up_level AV1_DEC_REG(260, 28, 0x1)
+#define av1_pp_down_level AV1_DEC_REG(260, 29, 0x3)
+#define av1_pp_exist AV1_DEC_REG(260, 31, 0x1)
+
+#define av1_cur_altref_roffset AV1_DEC_REG(262, 0, 0x1ff)
+#define av1_cur_altref_offset AV1_DEC_REG(262, 9, 0x1ff)
+#define av1_mf3_altref_offset AV1_DEC_REG(262, 18, 0x1ff)
+#define av1_ref6_gm_mode AV1_DEC_REG(262, 27, 0x3)
+
+#define av1_cdef_luma_primary_strength AV1_DEC_REG(263, 0, 0xffffffff)
+
+#define av1_cdef_chroma_primary_strength AV1_DEC_REG(264, 0, 0xffffffff)
+
+#define av1_axi_arqos AV1_DEC_REG(265, 0, 0xf)
+#define av1_axi_awqos AV1_DEC_REG(265, 4, 0xf)
+#define av1_axi_wr_ostd_threshold AV1_DEC_REG(265, 8, 0x3ff)
+#define av1_axi_rd_ostd_threshold AV1_DEC_REG(265, 18, 0x3ff)
+#define av1_axi_wr_4k_dis AV1_DEC_REG(265, 31, 0x1)
+
+#define av1_128bit_mode AV1_DEC_REG(266, 5, 0x1)
+#define av1_wr_shaper_bypass AV1_DEC_REG(266, 10, 0x1)
+#define av1_error_conceal_e AV1_DEC_REG(266, 30, 0x1)
+
+#define av1_superres_chroma_step_invra AV1_DEC_REG(298, 0, 0xffff)
+#define av1_superres_luma_step_invra AV1_DEC_REG(298, 16, 0xffff)
+
+#define av1_dec_alignment AV1_DEC_REG(314, 0, 0xffff)
+
+#define av1_ext_timeout_cycles AV1_DEC_REG(318, 0, 0x7fffffff)
+#define av1_ext_timeout_override_e AV1_DEC_REG(318, 31, 0x1)
+
+#define av1_timeout_cycles AV1_DEC_REG(319, 0, 0x7fffffff)
+#define av1_timeout_override_e AV1_DEC_REG(319, 31, 0x1)
+
+#define av1_pp_out_e AV1_DEC_REG(320, 0, 0x1)
+#define av1_pp_cr_first AV1_DEC_REG(320, 1, 0x1)
+#define av1_pp_out_mode AV1_DEC_REG(320, 2, 0x1)
+#define av1_pp_out_tile_e AV1_DEC_REG(320, 3, 0x1)
+#define av1_pp_status AV1_DEC_REG(320, 4, 0xf)
+#define av1_pp_in_blk_size AV1_DEC_REG(320, 8, 0x7)
+#define av1_pp_out_p010_fmt AV1_DEC_REG(320, 11, 0x3)
+#define av1_pp_out_rgb_fmt AV1_DEC_REG(320, 13, 0x1f)
+#define av1_rgb_range_max AV1_DEC_REG(320, 18, 0xfff)
+#define av1_pp_rgb_planar AV1_DEC_REG(320, 30, 0x1)
+
+#define av1_scale_hratio AV1_DEC_REG(322, 0, 0x3ffff)
+#define av1_pp_out_format AV1_DEC_REG(322, 18, 0x1f)
+#define av1_ver_scale_mode AV1_DEC_REG(322, 23, 0x3)
+#define av1_hor_scale_mode AV1_DEC_REG(322, 25, 0x3)
+#define av1_pp_in_format AV1_DEC_REG(322, 27, 0x1f)
+
+#define av1_pp_out_c_stride AV1_DEC_REG(329, 0, 0xffff)
+#define av1_pp_out_y_stride AV1_DEC_REG(329, 16, 0xffff)
+
+#define av1_pp_in_height AV1_DEC_REG(331, 0, 0xffff)
+#define av1_pp_in_width AV1_DEC_REG(331, 16, 0xffff)
+
+#define av1_pp_out_height AV1_DEC_REG(332, 0, 0xffff)
+#define av1_pp_out_width AV1_DEC_REG(332, 16, 0xffff)
+
+#define av1_pp1_dup_ver AV1_DEC_REG(394, 0, 0xff)
+#define av1_pp1_dup_hor AV1_DEC_REG(394, 8, 0xff)
+#define av1_pp0_dup_ver AV1_DEC_REG(394, 16, 0xff)
+#define av1_pp0_dup_hor AV1_DEC_REG(394, 24, 0xff)
+
+#define AV1_TILE_OUT_LU (AV1_SWREG(65))
+#define AV1_REFERENCE_Y(i) (AV1_SWREG(67) + ((i) * 0x8))
+#define AV1_SEGMENTATION (AV1_SWREG(81))
+#define AV1_GLOBAL_MODEL (AV1_SWREG(83))
+#define AV1_CDEF_COL (AV1_SWREG(85))
+#define AV1_SR_COL (AV1_SWREG(89))
+#define AV1_LR_COL (AV1_SWREG(91))
+#define AV1_FILM_GRAIN (AV1_SWREG(95))
+#define AV1_TILE_OUT_CH (AV1_SWREG(99))
+#define AV1_REFERENCE_CB(i) (AV1_SWREG(101) + ((i) * 0x8))
+#define AV1_TILE_OUT_MV (AV1_SWREG(133))
+#define AV1_REFERENCE_MV(i) (AV1_SWREG(135) + ((i) * 0x8))
+#define AV1_TILE_BASE (AV1_SWREG(167))
+#define AV1_INPUT_STREAM (AV1_SWREG(169))
+#define AV1_PROP_TABLE_OUT (AV1_SWREG(171))
+#define AV1_PROP_TABLE (AV1_SWREG(173))
+#define AV1_MC_SYNC_CURR (AV1_SWREG(175))
+#define AV1_MC_SYNC_LEFT (AV1_SWREG(177))
+#define AV1_DB_DATA_COL (AV1_SWREG(179))
+#define AV1_DB_CTRL_COL (AV1_SWREG(183))
+#define AV1_PP_OUT_LU (AV1_SWREG(326))
+#define AV1_PP_OUT_CH (AV1_SWREG(328))
+
+#endif /* _ROCKCHIP_VPU981_REGS_H_ */
--
2.34.1
Add compatible for rk3588 AV1 vpu decoder.
Signed-off-by: Benjamin Gaignard <[email protected]>
Acked-by: Krzysztof Kozlowski <[email protected]>
---
Documentation/devicetree/bindings/media/rockchip-vpu.yaml | 1 +
1 file changed, 1 insertion(+)
diff --git a/Documentation/devicetree/bindings/media/rockchip-vpu.yaml b/Documentation/devicetree/bindings/media/rockchip-vpu.yaml
index 6cc4d3e5a61d..8454df53f5cb 100644
--- a/Documentation/devicetree/bindings/media/rockchip-vpu.yaml
+++ b/Documentation/devicetree/bindings/media/rockchip-vpu.yaml
@@ -24,6 +24,7 @@ properties:
- rockchip,rk3399-vpu
- rockchip,px30-vpu
- rockchip,rk3568-vpu
+ - rockchip,rk3588-av1-vpu
- items:
- const: rockchip,rk3188-vpu
- const: rockchip,rk3066-vpu
--
2.34.1
Film grain feature add "old style" grain noise on decoded streams.
Grain noise is applied after decoding by the postprocessor.
The level of grain is based on gaussian sequence.
Signed-off-by: Benjamin Gaignard <[email protected]>
---
drivers/media/platform/verisilicon/Makefile | 1 +
.../media/platform/verisilicon/hantro_hw.h | 3 +
.../verisilicon/rockchip_av1_filmgrain.c | 401 ++++++++++++++++++
.../verisilicon/rockchip_av1_filmgrain.h | 36 ++
.../verisilicon/rockchip_vpu981_hw_av1_dec.c | 213 ++++++++++
5 files changed, 654 insertions(+)
create mode 100644 drivers/media/platform/verisilicon/rockchip_av1_filmgrain.c
create mode 100644 drivers/media/platform/verisilicon/rockchip_av1_filmgrain.h
diff --git a/drivers/media/platform/verisilicon/Makefile b/drivers/media/platform/verisilicon/Makefile
index c9a9806ab8c5..6ad2ef885920 100644
--- a/drivers/media/platform/verisilicon/Makefile
+++ b/drivers/media/platform/verisilicon/Makefile
@@ -19,6 +19,7 @@ hantro-vpu-y += \
rockchip_vpu2_hw_mpeg2_dec.o \
rockchip_vpu2_hw_vp8_dec.o \
rockchip_vpu981_hw_av1_dec.o \
+ rockchip_av1_filmgrain.o \
rockchip_av1_entropymode.o \
hantro_jpeg.o \
hantro_h264.o \
diff --git a/drivers/media/platform/verisilicon/hantro_hw.h b/drivers/media/platform/verisilicon/hantro_hw.h
index 1741ef939bf8..bcd00dd71060 100644
--- a/drivers/media/platform/verisilicon/hantro_hw.h
+++ b/drivers/media/platform/verisilicon/hantro_hw.h
@@ -16,6 +16,7 @@
#include <media/videobuf2-core.h>
#include "rockchip_av1_entropymode.h"
+#include "rockchip_av1_filmgrain.h"
#define DEC_8190_ALIGN_MASK 0x07U
@@ -288,6 +289,7 @@ struct hantro_av1_frame_ref {
* @global_model: global model buffer
* @tile_info: tile info buffer
* @segment: segmentation info buffer
+ * @film_grain: film grain buffer
* @prob_tbl: probability table
* @prob_tbl_out: probability table output
* @tile_buf: tile buffer
@@ -312,6 +314,7 @@ struct hantro_av1_dec_hw_ctx {
struct hantro_aux_buf global_model;
struct hantro_aux_buf tile_info;
struct hantro_aux_buf segment;
+ struct hantro_aux_buf film_grain;
struct hantro_aux_buf prob_tbl;
struct hantro_aux_buf prob_tbl_out;
struct hantro_aux_buf tile_buf;
diff --git a/drivers/media/platform/verisilicon/rockchip_av1_filmgrain.c b/drivers/media/platform/verisilicon/rockchip_av1_filmgrain.c
new file mode 100644
index 000000000000..008ba5782eb0
--- /dev/null
+++ b/drivers/media/platform/verisilicon/rockchip_av1_filmgrain.c
@@ -0,0 +1,401 @@
+// SPDX-License-Identifier: GPL-2.0-only or Apache-2.0
+
+#include "rockchip_av1_filmgrain.h"
+
+static const int32_t gaussian_sequence[2048] = {
+ 56, 568, -180, 172, 124, -84, 172, -64, -900, 24, 820,
+ 224, 1248, 996, 272, -8, -916, -388, -732, -104, -188, 800,
+ 112, -652, -320, -376, 140, -252, 492, -168, 44, -788, 588,
+ -584, 500, -228, 12, 680, 272, -476, 972, -100, 652, 368,
+ 432, -196, -720, -192, 1000, -332, 652, -136, -552, -604, -4,
+ 192, -220, -136, 1000, -52, 372, -96, -624, 124, -24, 396,
+ 540, -12, -104, 640, 464, 244, -208, -84, 368, -528, -740,
+ 248, -968, -848, 608, 376, -60, -292, -40, -156, 252, -292,
+ 248, 224, -280, 400, -244, 244, -60, 76, -80, 212, 532,
+ 340, 128, -36, 824, -352, -60, -264, -96, -612, 416, -704,
+ 220, -204, 640, -160, 1220, -408, 900, 336, 20, -336, -96,
+ -792, 304, 48, -28, -1232, -1172, -448, 104, -292, -520, 244,
+ 60, -948, 0, -708, 268, 108, 356, -548, 488, -344, -136,
+ 488, -196, -224, 656, -236, -1128, 60, 4, 140, 276, -676,
+ -376, 168, -108, 464, 8, 564, 64, 240, 308, -300, -400,
+ -456, -136, 56, 120, -408, -116, 436, 504, -232, 328, 844,
+ -164, -84, 784, -168, 232, -224, 348, -376, 128, 568, 96,
+ -1244, -288, 276, 848, 832, -360, 656, 464, -384, -332, -356,
+ 728, -388, 160, -192, 468, 296, 224, 140, -776, -100, 280,
+ 4, 196, 44, -36, -648, 932, 16, 1428, 28, 528, 808,
+ 772, 20, 268, 88, -332, -284, 124, -384, -448, 208, -228,
+ -1044, -328, 660, 380, -148, -300, 588, 240, 540, 28, 136,
+ -88, -436, 256, 296, -1000, 1400, 0, -48, 1056, -136, 264,
+ -528, -1108, 632, -484, -592, -344, 796, 124, -668, -768, 388,
+ 1296, -232, -188, -200, -288, -4, 308, 100, -168, 256, -500,
+ 204, -508, 648, -136, 372, -272, -120, -1004, -552, -548, -384,
+ 548, -296, 428, -108, -8, -912, -324, -224, -88, -112, -220,
+ -100, 996, -796, 548, 360, -216, 180, 428, -200, -212, 148,
+ 96, 148, 284, 216, -412, -320, 120, -300, -384, -604, -572,
+ -332, -8, -180, -176, 696, 116, -88, 628, 76, 44, -516,
+ 240, -208, -40, 100, -592, 344, -308, -452, -228, 20, 916,
+ -1752, -136, -340, -804, 140, 40, 512, 340, 248, 184, -492,
+ 896, -156, 932, -628, 328, -688, -448, -616, -752, -100, 560,
+ -1020, 180, -800, -64, 76, 576, 1068, 396, 660, 552, -108,
+ -28, 320, -628, 312, -92, -92, -472, 268, 16, 560, 516,
+ -672, -52, 492, -100, 260, 384, 284, 292, 304, -148, 88,
+ -152, 1012, 1064, -228, 164, -376, -684, 592, -392, 156, 196,
+ -524, -64, -884, 160, -176, 636, 648, 404, -396, -436, 864,
+ 424, -728, 988, -604, 904, -592, 296, -224, 536, -176, -920,
+ 436, -48, 1176, -884, 416, -776, -824, -884, 524, -548, -564,
+ -68, -164, -96, 692, 364, -692, -1012, -68, 260, -480, 876,
+ -1116, 452, -332, -352, 892, -1088, 1220, -676, 12, -292, 244,
+ 496, 372, -32, 280, 200, 112, -440, -96, 24, -644, -184,
+ 56, -432, 224, -980, 272, -260, 144, -436, 420, 356, 364,
+ -528, 76, 172, -744, -368, 404, -752, -416, 684, -688, 72,
+ 540, 416, 92, 444, 480, -72, -1416, 164, -1172, -68, 24,
+ 424, 264, 1040, 128, -912, -524, -356, 64, 876, -12, 4,
+ -88, 532, 272, -524, 320, 276, -508, 940, 24, -400, -120,
+ 756, 60, 236, -412, 100, 376, -484, 400, -100, -740, -108,
+ -260, 328, -268, 224, -200, -416, 184, -604, -564, -20, 296,
+ 60, 892, -888, 60, 164, 68, -760, 216, -296, 904, -336,
+ -28, 404, -356, -568, -208, -1480, -512, 296, 328, -360, -164,
+ -1560, -776, 1156, -428, 164, -504, -112, 120, -216, -148, -264,
+ 308, 32, 64, -72, 72, 116, 176, -64, -272, 460, -536,
+ -784, -280, 348, 108, -752, -132, 524, -540, -776, 116, -296,
+ -1196, -288, -560, 1040, -472, 116, -848, -1116, 116, 636, 696,
+ 284, -176, 1016, 204, -864, -648, -248, 356, 972, -584, -204,
+ 264, 880, 528, -24, -184, 116, 448, -144, 828, 524, 212,
+ -212, 52, 12, 200, 268, -488, -404, -880, 824, -672, -40,
+ 908, -248, 500, 716, -576, 492, -576, 16, 720, -108, 384,
+ 124, 344, 280, 576, -500, 252, 104, -308, 196, -188, -8,
+ 1268, 296, 1032, -1196, 436, 316, 372, -432, -200, -660, 704,
+ -224, 596, -132, 268, 32, -452, 884, 104, -1008, 424, -1348,
+ -280, 4, -1168, 368, 476, 696, 300, -8, 24, 180, -592,
+ -196, 388, 304, 500, 724, -160, 244, -84, 272, -256, -420,
+ 320, 208, -144, -156, 156, 364, 452, 28, 540, 316, 220,
+ -644, -248, 464, 72, 360, 32, -388, 496, -680, -48, 208,
+ -116, -408, 60, -604, -392, 548, -840, 784, -460, 656, -544,
+ -388, -264, 908, -800, -628, -612, -568, 572, -220, 164, 288,
+ -16, -308, 308, -112, -636, -760, 280, -668, 432, 364, 240,
+ -196, 604, 340, 384, 196, 592, -44, -500, 432, -580, -132,
+ 636, -76, 392, 4, -412, 540, 508, 328, -356, -36, 16,
+ -220, -64, -248, -60, 24, -192, 368, 1040, 92, -24, -1044,
+ -32, 40, 104, 148, 192, -136, -520, 56, -816, -224, 732,
+ 392, 356, 212, -80, -424, -1008, -324, 588, -1496, 576, 460,
+ -816, -848, 56, -580, -92, -1372, -112, -496, 200, 364, 52,
+ -140, 48, -48, -60, 84, 72, 40, 132, -356, -268, -104,
+ -284, -404, 732, -520, 164, -304, -540, 120, 328, -76, -460,
+ 756, 388, 588, 236, -436, -72, -176, -404, -316, -148, 716,
+ -604, 404, -72, -88, -888, -68, 944, 88, -220, -344, 960,
+ 472, 460, -232, 704, 120, 832, -228, 692, -508, 132, -476,
+ 844, -748, -364, -44, 1116, -1104, -1056, 76, 428, 552, -692,
+ 60, 356, 96, -384, -188, -612, -576, 736, 508, 892, 352,
+ -1132, 504, -24, -352, 324, 332, -600, -312, 292, 508, -144,
+ -8, 484, 48, 284, -260, -240, 256, -100, -292, -204, -44,
+ 472, -204, 908, -188, -1000, -256, 92, 1164, -392, 564, 356,
+ 652, -28, -884, 256, 484, -192, 760, -176, 376, -524, -452,
+ -436, 860, -736, 212, 124, 504, -476, 468, 76, -472, 552,
+ -692, -944, -620, 740, -240, 400, 132, 20, 192, -196, 264,
+ -668, -1012, -60, 296, -316, -828, 76, -156, 284, -768, -448,
+ -832, 148, 248, 652, 616, 1236, 288, -328, -400, -124, 588,
+ 220, 520, -696, 1032, 768, -740, -92, -272, 296, 448, -464,
+ 412, -200, 392, 440, -200, 264, -152, -260, 320, 1032, 216,
+ 320, -8, -64, 156, -1016, 1084, 1172, 536, 484, -432, 132,
+ 372, -52, -256, 84, 116, -352, 48, 116, 304, -384, 412,
+ 924, -300, 528, 628, 180, 648, 44, -980, -220, 1320, 48,
+ 332, 748, 524, -268, -720, 540, -276, 564, -344, -208, -196,
+ 436, 896, 88, -392, 132, 80, -964, -288, 568, 56, -48,
+ -456, 888, 8, 552, -156, -292, 948, 288, 128, -716, -292,
+ 1192, -152, 876, 352, -600, -260, -812, -468, -28, -120, -32,
+ -44, 1284, 496, 192, 464, 312, -76, -516, -380, -456, -1012,
+ -48, 308, -156, 36, 492, -156, -808, 188, 1652, 68, -120,
+ -116, 316, 160, -140, 352, 808, -416, 592, 316, -480, 56,
+ 528, -204, -568, 372, -232, 752, -344, 744, -4, 324, -416,
+ -600, 768, 268, -248, -88, -132, -420, -432, 80, -288, 404,
+ -316, -1216, -588, 520, -108, 92, -320, 368, -480, -216, -92,
+ 1688, -300, 180, 1020, -176, 820, -68, -228, -260, 436, -904,
+ 20, 40, -508, 440, -736, 312, 332, 204, 760, -372, 728,
+ 96, -20, -632, -520, -560, 336, 1076, -64, -532, 776, 584,
+ 192, 396, -728, -520, 276, -188, 80, -52, -612, -252, -48,
+ 648, 212, -688, 228, -52, -260, 428, -412, -272, -404, 180,
+ 816, -796, 48, 152, 484, -88, -216, 988, 696, 188, -528,
+ 648, -116, -180, 316, 476, 12, -564, 96, 476, -252, -364,
+ -376, -392, 556, -256, -576, 260, -352, 120, -16, -136, -260,
+ -492, 72, 556, 660, 580, 616, 772, 436, 424, -32, -324,
+ -1268, 416, -324, -80, 920, 160, 228, 724, 32, -516, 64,
+ 384, 68, -128, 136, 240, 248, -204, -68, 252, -932, -120,
+ -480, -628, -84, 192, 852, -404, -288, -132, 204, 100, 168,
+ -68, -196, -868, 460, 1080, 380, -80, 244, 0, 484, -888,
+ 64, 184, 352, 600, 460, 164, 604, -196, 320, -64, 588,
+ -184, 228, 12, 372, 48, -848, -344, 224, 208, -200, 484,
+ 128, -20, 272, -468, -840, 384, 256, -720, -520, -464, -580,
+ 112, -120, 644, -356, -208, -608, -528, 704, 560, -424, 392,
+ 828, 40, 84, 200, -152, 0, -144, 584, 280, -120, 80,
+ -556, -972, -196, -472, 724, 80, 168, -32, 88, 160, -688,
+ 0, 160, 356, 372, -776, 740, -128, 676, -248, -480, 4,
+ -364, 96, 544, 232, -1032, 956, 236, 356, 20, -40, 300,
+ 24, -676, -596, 132, 1120, -104, 532, -1096, 568, 648, 444,
+ 508, 380, 188, -376, -604, 1488, 424, 24, 756, -220, -192,
+ 716, 120, 920, 688, 168, 44, -460, 568, 284, 1144, 1160,
+ 600, 424, 888, 656, -356, -320, 220, 316, -176, -724, -188,
+ -816, -628, -348, -228, -380, 1012, -452, -660, 736, 928, 404,
+ -696, -72, -268, -892, 128, 184, -344, -780, 360, 336, 400,
+ 344, 428, 548, -112, 136, -228, -216, -820, -516, 340, 92,
+ -136, 116, -300, 376, -244, 100, -316, -520, -284, -12, 824,
+ 164, -548, -180, -128, 116, -924, -828, 268, -368, -580, 620,
+ 192, 160, 0, -1676, 1068, 424, -56, -360, 468, -156, 720,
+ 288, -528, 556, -364, 548, -148, 504, 316, 152, -648, -620,
+ -684, -24, -376, -384, -108, -920, -1032, 768, 180, -264, -508,
+ -1268, -260, -60, 300, -240, 988, 724, -376, -576, -212, -736,
+ 556, 192, 1092, -620, -880, 376, -56, -4, -216, -32, 836,
+ 268, 396, 1332, 864, -600, 100, 56, -412, -92, 356, 180,
+ 884, -468, -436, 292, -388, -804, -704, -840, 368, -348, 140,
+ -724, 1536, 940, 372, 112, -372, 436, -480, 1136, 296, -32,
+ -228, 132, -48, -220, 868, -1016, -60, -1044, -464, 328, 916,
+ 244, 12, -736, -296, 360, 468, -376, -108, -92, 788, 368,
+ -56, 544, 400, -672, -420, 728, 16, 320, 44, -284, -380,
+ -796, 488, 132, 204, -596, -372, 88, -152, -908, -636, -572,
+ -624, -116, -692, -200, -56, 276, -88, 484, -324, 948, 864,
+ 1000, -456, -184, -276, 292, -296, 156, 676, 320, 160, 908,
+ -84, -1236, -288, -116, 260, -372, -644, 732, -756, -96, 84,
+ 344, -520, 348, -688, 240, -84, 216, -1044, -136, -676, -396,
+ -1500, 960, -40, 176, 168, 1516, 420, -504, -344, -364, -360,
+ 1216, -940, -380, -212, 252, -660, -708, 484, -444, -152, 928,
+ -120, 1112, 476, -260, 560, -148, -344, 108, -196, 228, -288,
+ 504, 560, -328, -88, 288, -1008, 460, -228, 468, -836, -196,
+ 76, 388, 232, 412, -1168, -716, -644, 756, -172, -356, -504,
+ 116, 432, 528, 48, 476, -168, -608, 448, 160, -532, -272,
+ 28, -676, -12, 828, 980, 456, 520, 104, -104, 256, -344,
+ -4, -28, -368, -52, -524, -572, -556, -200, 768, 1124, -208,
+ -512, 176, 232, 248, -148, -888, 604, -600, -304, 804, -156,
+ -212, 488, -192, -804, -256, 368, -360, -916, -328, 228, -240,
+ -448, -472, 856, -556, -364, 572, -12, -156, -368, -340, 432,
+ 252, -752, -152, 288, 268, -580, -848, -592, 108, -76, 244,
+ 312, -716, 592, -80, 436, 360, 4, -248, 160, 516, 584,
+ 732, 44, -468, -280, -292, -156, -588, 28, 308, 912, 24,
+ 124, 156, 180, -252, 944, -924, -772, -520, -428, -624, 300,
+ -212, -1144, 32, -724, 800, -1128, -212, -1288, -848, 180, -416,
+ 440, 192, -576, -792, -76, -1080, 80, -532, -352, -132, 380,
+ -820, 148, 1112, 128, 164, 456, 700, -924, 144, -668, -384,
+ 648, -832, 508, 552, -52, -100, -656, 208, -568, 748, -88,
+ 680, 232, 300, 192, -408, -1012, -152, -252, -268, 272, -876,
+ -664, -648, -332, -136, 16, 12, 1152, -28, 332, -536, 320,
+ -672, -460, -316, 532, -260, 228, -40, 1052, -816, 180, 88,
+ -496, -556, -672, -368, 428, 92, 356, 404, -408, 252, 196,
+ -176, -556, 792, 268, 32, 372, 40, 96, -332, 328, 120,
+ 372, -900, -40, 472, -264, -592, 952, 128, 656, 112, 664,
+ -232, 420, 4, -344, -464, 556, 244, -416, -32, 252, 0,
+ -412, 188, -696, 508, -476, 324, -1096, 656, -312, 560, 264,
+ -136, 304, 160, -64, -580, 248, 336, -720, 560, -348, -288,
+ -276, -196, -500, 852, -544, -236, -1128, -992, -776, 116, 56,
+ 52, 860, 884, 212, -12, 168, 1020, 512, -552, 924, -148,
+ 716, 188, 164, -340, -520, -184, 880, -152, -680, -208, -1156,
+ -300, -528, -472, 364, 100, -744, -1056, -32, 540, 280, 144,
+ -676, -32, -232, -280, -224, 96, 568, -76, 172, 148, 148,
+ 104, 32, -296, -32, 788, -80, 32, -16, 280, 288, 944,
+ 428, -484
+};
+
+static inline int32_t clamp(int32_t value, int32_t low, int32_t high)
+{
+ return value < low ? low : (value > high ? high : value);
+}
+
+static inline int32_t round_power_of_two(const int32_t val, int32_t n)
+{
+ const int32_t a = (int32_t)1 << (n - 1);
+
+ return (val + a) >> n;
+}
+
+static void rockchip_av1_init_random_generator(uint8_t luma_num, uint16_t seed,
+ uint16_t *random_register)
+{
+ uint16_t random_reg = seed;
+
+ random_reg ^= ((luma_num * 37 + 178) & 255) << 8;
+ random_reg ^= ((luma_num * 173 + 105) & 255);
+ *random_register = random_reg;
+}
+
+static inline void rockchip_av1_update_random_register(uint16_t *random_register)
+{
+ uint16_t bit;
+ uint16_t random_reg = *random_register;
+
+ bit = ((random_reg >> 0) ^ (random_reg >> 1) ^ (random_reg >> 3) ^
+ (random_reg >> 12)) & 1;
+ *random_register = (random_reg >> 1) | (bit << 15);
+}
+
+static inline int32_t rockchip_av1_get_random_number(uint16_t random_register)
+{
+ return (random_register >> 5) & ((1 << 11) - 1);
+}
+
+void rockchip_av1_generate_luma_grain_block(int32_t (*luma_grain_block)[73][82],
+ int32_t bitdepth,
+ uint8_t num_y_points,
+ int32_t grain_scale_shift,
+ int32_t ar_coeff_lag,
+ int32_t (*ar_coeffs_y)[24],
+ int32_t ar_coeff_shift,
+ int32_t grain_min,
+ int32_t grain_max,
+ uint16_t random_seed)
+{
+ int32_t gauss_sec_shift = 12 - bitdepth + grain_scale_shift;
+ uint16_t grain_random_register = random_seed;
+ int32_t i, j;
+
+ for (i = 0; i < 73; i++) {
+ for (j = 0; j < 82; j++) {
+ if (num_y_points > 0) {
+ rockchip_av1_update_random_register
+ (&grain_random_register);
+ (*luma_grain_block)[i][j] =
+ round_power_of_two(gaussian_sequence
+ [rockchip_av1_get_random_number
+ (grain_random_register)],
+ gauss_sec_shift);
+ } else {
+ (*luma_grain_block)[i][j] = 0;
+ }
+ }
+ }
+
+ for (i = 3; i < 73; i++)
+ for (j = 3; j < 82 - 3; j++) {
+ int32_t pos = 0;
+ int32_t wsum = 0;
+ int32_t deltaRow, deltaCol;
+
+ for (deltaRow = -ar_coeff_lag; deltaRow <= 0;
+ deltaRow++) {
+ for (deltaCol = -ar_coeff_lag;
+ deltaCol <= ar_coeff_lag; deltaCol++) {
+ if (deltaRow == 0 && deltaCol == 0)
+ break;
+ wsum = wsum + (*ar_coeffs_y)[pos] *
+ (*luma_grain_block)[i + deltaRow][j + deltaCol];
+ ++pos;
+ }
+ }
+ (*luma_grain_block)[i][j] =
+ clamp((*luma_grain_block)[i][j] +
+ round_power_of_two(wsum, ar_coeff_shift),
+ grain_min, grain_max);
+ }
+}
+
+// Calculate chroma grain noise once per frame
+void rockchip_av1_generate_chroma_grain_block(int32_t (*luma_grain_block)[73][82],
+ int32_t (*cb_grain_block)[38][44],
+ int32_t (*cr_grain_block)[38][44],
+ int32_t bitdepth,
+ uint8_t num_y_points,
+ uint8_t num_cb_points,
+ uint8_t num_cr_points,
+ int32_t grain_scale_shift,
+ int32_t ar_coeff_lag,
+ int32_t (*ar_coeffs_cb)[25],
+ int32_t (*ar_coeffs_cr)[25],
+ int32_t ar_coeff_shift,
+ int32_t grain_min,
+ int32_t grain_max,
+ uint8_t chroma_scaling_from_luma,
+ uint16_t random_seed)
+{
+ int32_t gauss_sec_shift = 12 - bitdepth + grain_scale_shift;
+ uint16_t grain_random_register = 0;
+ int32_t i, j;
+
+ rockchip_av1_init_random_generator(7, random_seed,
+ &grain_random_register);
+ for (i = 0; i < 38; i++) {
+ for (j = 0; j < 44; j++) {
+ if (num_cb_points || chroma_scaling_from_luma) {
+ rockchip_av1_update_random_register
+ (&grain_random_register);
+ (*cb_grain_block)[i][j] =
+ round_power_of_two(gaussian_sequence
+ [rockchip_av1_get_random_number
+ (grain_random_register)],
+ gauss_sec_shift);
+ } else {
+ (*cb_grain_block)[i][j] = 0;
+ }
+ }
+ }
+
+ rockchip_av1_init_random_generator(11, random_seed,
+ &grain_random_register);
+ for (i = 0; i < 38; i++) {
+ for (j = 0; j < 44; j++) {
+ if (num_cr_points || chroma_scaling_from_luma) {
+ rockchip_av1_update_random_register
+ (&grain_random_register);
+ (*cr_grain_block)[i][j] =
+ round_power_of_two(gaussian_sequence
+ [rockchip_av1_get_random_number
+ (grain_random_register)],
+ gauss_sec_shift);
+ } else {
+ (*cr_grain_block)[i][j] = 0;
+ }
+ }
+ }
+
+ for (i = 3; i < 38; i++) {
+ for (j = 3; j < 44 - 3; j++) {
+ int32_t wsum_cb = 0;
+ int32_t wsum_cr = 0;
+ int32_t pos = 0;
+ int32_t deltaRow, deltaCol;
+
+ for (deltaRow = -ar_coeff_lag; deltaRow <= 0;
+ deltaRow++) {
+ for (deltaCol = -ar_coeff_lag;
+ deltaCol <= ar_coeff_lag; deltaCol++) {
+ if (deltaRow == 0 && deltaCol == 0)
+ break;
+ wsum_cb = wsum_cb + (*ar_coeffs_cb)[pos] *
+ (*cb_grain_block)[i + deltaRow][j + deltaCol];
+ wsum_cr =
+ wsum_cr +
+ (*ar_coeffs_cr)[pos] *
+ (*cr_grain_block)[i + deltaRow][j + deltaCol];
+ ++pos;
+ }
+ }
+
+ if (num_y_points > 0) {
+ int32_t av_luma = 0;
+ int32_t luma_coord_y = (i << 1) - 3;
+ int32_t luma_coord_x = (j << 1) - 3;
+
+ av_luma +=
+ (*luma_grain_block)[luma_coord_y][luma_coord_x];
+ av_luma +=
+ (*luma_grain_block)[luma_coord_y][luma_coord_x + 1];
+ av_luma +=
+ (*luma_grain_block)[luma_coord_y + 1][luma_coord_x];
+ av_luma +=
+ (*luma_grain_block)[(luma_coord_y + 1)][luma_coord_x + 1];
+ av_luma = round_power_of_two(av_luma, 2);
+
+ wsum_cb = wsum_cb + (*ar_coeffs_cb)[pos] * av_luma;
+ wsum_cr = wsum_cr + (*ar_coeffs_cr)[pos] * av_luma;
+ }
+
+ if (num_cb_points || chroma_scaling_from_luma) {
+ (*cb_grain_block)[i][j] =
+ clamp((*cb_grain_block)[i][j] +
+ round_power_of_two(wsum_cb, ar_coeff_shift),
+ grain_min, grain_max);
+ }
+ if (num_cr_points || chroma_scaling_from_luma) {
+ (*cr_grain_block)[i][j] =
+ clamp((*cr_grain_block)[i][j] +
+ round_power_of_two(wsum_cr, ar_coeff_shift),
+ grain_min, grain_max);
+ }
+ }
+ }
+}
diff --git a/drivers/media/platform/verisilicon/rockchip_av1_filmgrain.h b/drivers/media/platform/verisilicon/rockchip_av1_filmgrain.h
new file mode 100644
index 000000000000..dbef112699b8
--- /dev/null
+++ b/drivers/media/platform/verisilicon/rockchip_av1_filmgrain.h
@@ -0,0 +1,36 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+
+#ifndef _ROCKCHIP_AV1_FILMGRAIN_H_
+#define _ROCKCHIP_AV1_FILMGRAIN_H_
+
+#include <linux/types.h>
+
+void rockchip_av1_generate_luma_grain_block(int32_t (*luma_grain_block)[73][82],
+ int32_t bitdepth,
+ uint8_t num_y_points,
+ int32_t grain_scale_shift,
+ int32_t ar_coeff_lag,
+ int32_t (*ar_coeffs_y)[24],
+ int32_t ar_coeff_shift,
+ int32_t grain_min,
+ int32_t grain_max,
+ uint16_t random_seed);
+
+void rockchip_av1_generate_chroma_grain_block(int32_t (*luma_grain_block)[73][82],
+ int32_t (*cb_grain_block)[38][44],
+ int32_t (*cr_grain_block)[38][44],
+ int32_t bitdepth,
+ uint8_t num_y_points,
+ uint8_t num_cb_points,
+ uint8_t num_cr_points,
+ int32_t grain_scale_shift,
+ int32_t ar_coeff_lag,
+ int32_t (*ar_coeffs_cb)[25],
+ int32_t (*ar_coeffs_cr)[25],
+ int32_t ar_coeff_shift,
+ int32_t grain_min,
+ int32_t grain_max,
+ uint8_t chroma_scaling_from_luma,
+ uint16_t random_seed);
+
+#endif
diff --git a/drivers/media/platform/verisilicon/rockchip_vpu981_hw_av1_dec.c b/drivers/media/platform/verisilicon/rockchip_vpu981_hw_av1_dec.c
index 81aeb1d6b93f..58c2a64a7a06 100644
--- a/drivers/media/platform/verisilicon/rockchip_vpu981_hw_av1_dec.c
+++ b/drivers/media/platform/verisilicon/rockchip_vpu981_hw_av1_dec.c
@@ -339,6 +339,12 @@ void rockchip_vpu981_av1_dec_exit(struct hantro_ctx *ctx)
av1_dec->tile_info.dma);
av1_dec->tile_info.cpu = NULL;
+ if (av1_dec->film_grain.cpu)
+ dma_free_coherent(vpu->dev, av1_dec->film_grain.size,
+ av1_dec->film_grain.cpu,
+ av1_dec->film_grain.dma);
+ av1_dec->film_grain.cpu = NULL;
+
if (av1_dec->prob_tbl.cpu)
dma_free_coherent(vpu->dev, av1_dec->prob_tbl.size,
av1_dec->prob_tbl.cpu, av1_dec->prob_tbl.dma);
@@ -379,6 +385,14 @@ int rockchip_vpu981_av1_dec_init(struct hantro_ctx *ctx)
return -ENOMEM;
av1_dec->tile_info.size = AV1_MAX_TILES;
+ av1_dec->film_grain.cpu = dma_alloc_coherent(vpu->dev,
+ ALIGN(sizeof(struct rockchip_av1_film_grain), 2048),
+ &av1_dec->film_grain.dma,
+ GFP_KERNEL);
+ if (!av1_dec->film_grain.cpu)
+ return -ENOMEM;
+ av1_dec->film_grain.size = ALIGN(sizeof(struct rockchip_av1_film_grain), 2048);
+
av1_dec->prob_tbl.cpu = dma_alloc_coherent(vpu->dev,
ALIGN(sizeof(struct av1cdfs), 2048),
&av1_dec->prob_tbl.dma,
@@ -1176,6 +1190,204 @@ static void rockchip_vpu981_av1_dec_set_prob(struct hantro_ctx *ctx)
hantro_write_addr(vpu, AV1_PROP_TABLE, av1_dec->prob_tbl.dma);
}
+static void
+rockchip_vpu981_av1_dec_init_scaling_function(const uint8_t *values,
+ const uint8_t *scaling,
+ uint8_t num_points,
+ uint8_t *scaling_lut)
+{
+ int i, point;
+
+ if (num_points == 0) {
+ memset(scaling_lut, 0, 256);
+ return;
+ }
+
+ for (point = 0; point < num_points - 1; point++) {
+ int x;
+ int32_t delta_y = scaling[point + 1] - scaling[point];
+ int32_t delta_x = values[point + 1] - values[point];
+ int64_t delta =
+ delta_x ? delta_y * ((65536 + (delta_x >> 1)) /
+ delta_x) : 0;
+
+ for (x = 0; x < delta_x; x++) {
+ scaling_lut[values[point] + x] =
+ scaling[point] +
+ (int32_t) ((x * delta + 32768) >> 16);
+ }
+ }
+
+ for (i = values[num_points - 1]; i < 256; i++)
+ scaling_lut[i] = scaling[num_points - 1];
+}
+
+static void rockchip_vpu981_av1_dec_set_fgs(struct hantro_ctx *ctx)
+{
+ struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
+ struct hantro_av1_dec_ctrls *ctrls = &av1_dec->ctrls;
+ const struct v4l2_ctrl_av1_film_grain *film_grain = ctrls->film_grain;
+ struct rockchip_av1_film_grain *fgmem = av1_dec->film_grain.cpu;
+ struct hantro_dev *vpu = ctx->dev;
+ int32_t (*ar_coeffs_y)[24];
+ int32_t (*ar_coeffs_cb)[25];
+ int32_t (*ar_coeffs_cr)[25];
+ int32_t (*luma_grain_block)[73][82];
+ int32_t (*cb_grain_block)[38][44];
+ int32_t (*cr_grain_block)[38][44];
+ int32_t ar_coeff_lag, ar_coeff_shift;
+ int32_t grain_scale_shift, bitdepth;
+ int32_t grain_center, grain_min, grain_max;
+ int i, j;
+
+ hantro_reg_write(vpu, &av1_apply_grain, 0);
+
+ if (!(film_grain->flags & V4L2_AV1_FILM_GRAIN_FLAG_APPLY_GRAIN)) {
+ hantro_reg_write(vpu, &av1_num_y_points_b, 0);
+ hantro_reg_write(vpu, &av1_num_cb_points_b, 0);
+ hantro_reg_write(vpu, &av1_num_cr_points_b, 0);
+ hantro_reg_write(vpu, &av1_scaling_shift, 0);
+ hantro_reg_write(vpu, &av1_cb_mult, 0);
+ hantro_reg_write(vpu, &av1_cb_luma_mult, 0);
+ hantro_reg_write(vpu, &av1_cb_offset, 0);
+ hantro_reg_write(vpu, &av1_cr_mult, 0);
+ hantro_reg_write(vpu, &av1_cr_luma_mult, 0);
+ hantro_reg_write(vpu, &av1_cr_offset, 0);
+ hantro_reg_write(vpu, &av1_overlap_flag, 0);
+ hantro_reg_write(vpu, &av1_clip_to_restricted_range, 0);
+ hantro_reg_write(vpu, &av1_chroma_scaling_from_luma, 0);
+ hantro_reg_write(vpu, &av1_random_seed, 0);
+ hantro_write_addr(vpu, AV1_FILM_GRAIN, 0);
+ return;
+ }
+
+ ar_coeffs_y = kzalloc(sizeof(int32_t) * 24, GFP_KERNEL);
+ ar_coeffs_cb = kzalloc(sizeof(int32_t) * 25, GFP_KERNEL);
+ ar_coeffs_cr = kzalloc(sizeof(int32_t) * 25, GFP_KERNEL);
+ luma_grain_block = kzalloc(sizeof(int32_t) * 73 * 82, GFP_KERNEL);
+ cb_grain_block = kzalloc(sizeof(int32_t) * 38 * 44, GFP_KERNEL);
+ cr_grain_block = kzalloc(sizeof(int32_t) * 38 * 44, GFP_KERNEL);
+
+ if (!ar_coeffs_y || !ar_coeffs_cb || !ar_coeffs_cr
+ || !luma_grain_block || !cb_grain_block || !cr_grain_block) {
+ pr_warn("Fail allocating memory for film grain parameters\n");
+ goto alloc_fail;
+ }
+
+ hantro_reg_write(vpu, &av1_apply_grain, 1);
+
+ hantro_reg_write(vpu, &av1_num_y_points_b,
+ film_grain->num_y_points > 0);
+ hantro_reg_write(vpu, &av1_num_cb_points_b,
+ film_grain->num_cb_points > 0);
+ hantro_reg_write(vpu, &av1_num_cr_points_b,
+ film_grain->num_cr_points > 0);
+ hantro_reg_write(vpu, &av1_scaling_shift,
+ film_grain->grain_scaling_minus_8 + 8);
+
+ if (!(film_grain->flags & V4L2_AV1_FILM_GRAIN_FLAG_CHROMA_SCALING_FROM_LUMA)) {
+ hantro_reg_write(vpu, &av1_cb_mult, film_grain->cb_mult - 128);
+ hantro_reg_write(vpu, &av1_cb_luma_mult, film_grain->cb_luma_mult - 128);
+ hantro_reg_write(vpu, &av1_cb_offset, film_grain->cb_offset - 256);
+ hantro_reg_write(vpu, &av1_cr_mult, film_grain->cr_mult - 128);
+ hantro_reg_write(vpu, &av1_cr_luma_mult, film_grain->cr_luma_mult - 128);
+ hantro_reg_write(vpu, &av1_cr_offset, film_grain->cr_offset - 256);
+ } else {
+ hantro_reg_write(vpu, &av1_cb_mult, 0);
+ hantro_reg_write(vpu, &av1_cb_luma_mult, 0);
+ hantro_reg_write(vpu, &av1_cb_offset, 0);
+ hantro_reg_write(vpu, &av1_cr_mult, 0);
+ hantro_reg_write(vpu, &av1_cr_luma_mult, 0);
+ hantro_reg_write(vpu, &av1_cr_offset, 0);
+ }
+
+ hantro_reg_write(vpu, &av1_overlap_flag,
+ !!(film_grain->flags & V4L2_AV1_FILM_GRAIN_FLAG_OVERLAP));
+ hantro_reg_write(vpu, &av1_clip_to_restricted_range,
+ !!(film_grain->flags & V4L2_AV1_FILM_GRAIN_FLAG_CLIP_TO_RESTRICTED_RANGE));
+ hantro_reg_write(vpu, &av1_chroma_scaling_from_luma,
+ !!(film_grain->flags & V4L2_AV1_FILM_GRAIN_FLAG_CHROMA_SCALING_FROM_LUMA));
+ hantro_reg_write(vpu, &av1_random_seed, film_grain->grain_seed);
+
+ rockchip_vpu981_av1_dec_init_scaling_function(film_grain->point_y_value,
+ film_grain->point_y_scaling,
+ film_grain->num_y_points,
+ fgmem->scaling_lut_y);
+
+ if (film_grain->flags &
+ V4L2_AV1_FILM_GRAIN_FLAG_CHROMA_SCALING_FROM_LUMA) {
+ memcpy(fgmem->scaling_lut_cb, fgmem->scaling_lut_y,
+ sizeof(*fgmem->scaling_lut_y) * 256);
+ memcpy(fgmem->scaling_lut_cr, fgmem->scaling_lut_y,
+ sizeof(*fgmem->scaling_lut_y) * 256);
+ } else {
+ rockchip_vpu981_av1_dec_init_scaling_function
+ (film_grain->point_cb_value, film_grain->point_cb_scaling,
+ film_grain->num_cb_points, fgmem->scaling_lut_cb);
+ rockchip_vpu981_av1_dec_init_scaling_function
+ (film_grain->point_cr_value, film_grain->point_cr_scaling,
+ film_grain->num_cr_points, fgmem->scaling_lut_cr);
+ }
+
+ for (i = 0; i < V4L2_AV1_MAX_NUM_POS_LUMA; i++) {
+ if (i < 24)
+ (*ar_coeffs_y)[i] = film_grain->ar_coeffs_y_plus_128[i] - 128;
+ (*ar_coeffs_cb)[i] = film_grain->ar_coeffs_cb_plus_128[i] - 128;
+ (*ar_coeffs_cr)[i] = film_grain->ar_coeffs_cr_plus_128[i] - 128;
+ }
+
+ ar_coeff_lag = film_grain->ar_coeff_lag;
+ ar_coeff_shift = film_grain->ar_coeff_shift_minus_6 + 6;
+ grain_scale_shift = film_grain->grain_scale_shift;
+ bitdepth = ctx->bit_depth;
+ grain_center = 128 << (bitdepth - 8);
+ grain_min = 0 - grain_center;
+ grain_max = (256 << (bitdepth - 8)) - 1 - grain_center;
+
+ rockchip_av1_generate_luma_grain_block(luma_grain_block, bitdepth,
+ film_grain->num_y_points, grain_scale_shift,
+ ar_coeff_lag, ar_coeffs_y, ar_coeff_shift,
+ grain_min, grain_max,
+ film_grain->grain_seed);
+
+ rockchip_av1_generate_chroma_grain_block(luma_grain_block, cb_grain_block,
+ cr_grain_block, bitdepth,
+ film_grain->num_y_points,
+ film_grain->num_cb_points,
+ film_grain->num_cr_points,
+ grain_scale_shift, ar_coeff_lag, ar_coeffs_cb,
+ ar_coeffs_cr, ar_coeff_shift, grain_min,
+ grain_max,
+ !!(film_grain->flags
+ & V4L2_AV1_FILM_GRAIN_FLAG_CHROMA_SCALING_FROM_LUMA),
+ film_grain->grain_seed);
+
+ for (i = 0; i < 64; i++) {
+ for (j = 0; j < 64; j++)
+ fgmem->cropped_luma_grain_block[i * 64 + j] =
+ (*luma_grain_block)[i + 9][j + 9];
+ }
+
+ for (i = 0; i < 32; i++) {
+ for (j = 0; j < 32; j++) {
+ fgmem->cropped_chroma_grain_block[i * 64 + 2 * j] =
+ (*cb_grain_block)[i + 6][j + 6];
+ fgmem->cropped_chroma_grain_block[i * 64 + 2 * j + 1] =
+ (*cr_grain_block)[i + 6][j + 6];
+ }
+ }
+
+ hantro_write_addr(vpu, AV1_FILM_GRAIN, av1_dec->film_grain.dma);
+
+alloc_fail:
+ kfree(ar_coeffs_y);
+ kfree(ar_coeffs_cb);
+ kfree(ar_coeffs_cr);
+ kfree(luma_grain_block);
+ kfree(cb_grain_block);
+ kfree(cr_grain_block);
+}
+
static void rockchip_vpu981_av1_dec_set_cdef(struct hantro_ctx *ctx)
{
struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
@@ -1958,6 +2170,7 @@ int rockchip_vpu981_av1_dec_run(struct hantro_ctx *ctx)
rockchip_vpu981_av1_dec_set_picture_dimensions(ctx);
rockchip_vpu981_av1_dec_set_cdef(ctx);
rockchip_vpu981_av1_dec_set_lr(ctx);
+ rockchip_vpu981_av1_dec_set_fgs(ctx);
rockchip_vpu981_av1_dec_set_prob(ctx);
hantro_reg_write(vpu, &av1_dec_mode, AV1_DEC_MODE);
--
2.34.1
Hi Benjamin,
I love your patch! Perhaps something to improve:
[auto build test WARNING on media-tree/master]
[also build test WARNING on rockchip/for-next linus/master v6.2-rc3 next-20230112]
[cannot apply to pza/reset/next pza/imx-drm/next]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch#_base_tree_information]
url: https://github.com/intel-lab-lkp/linux/commits/Benjamin-Gaignard/dt-bindings-media-rockchip-vpu-Add-rk3588-vpu-compatible/20230112-010155
base: git://linuxtv.org/media_tree.git master
patch link: https://lore.kernel.org/r/20230111165931.753763-11-benjamin.gaignard%40collabora.com
patch subject: [PATCH v3 10/13] media: verisilicon: Add Rockchip AV1 decoder
config: hexagon-randconfig-r011-20230110
compiler: clang version 16.0.0 (https://github.com/llvm/llvm-project 8d9828ef5aa9688500657d36cd2aefbe12bbd162)
reproduce (this is a W=1 build):
wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross
chmod +x ~/bin/make.cross
# https://github.com/intel-lab-lkp/linux/commit/e6afd9e0717775b8e81ee22d5f2dc54fdb1c3a5a
git remote add linux-review https://github.com/intel-lab-lkp/linux
git fetch --no-tags linux-review Benjamin-Gaignard/dt-bindings-media-rockchip-vpu-Add-rk3588-vpu-compatible/20230112-010155
git checkout e6afd9e0717775b8e81ee22d5f2dc54fdb1c3a5a
# save the config file
mkdir build_dir && cp config build_dir/.config
COMPILER_INSTALL_PATH=$HOME/0day COMPILER=clang make.cross W=1 O=build_dir ARCH=hexagon olddefconfig
COMPILER_INSTALL_PATH=$HOME/0day COMPILER=clang make.cross W=1 O=build_dir ARCH=hexagon SHELL=/bin/bash drivers/media/platform/verisilicon/
If you fix the issue, kindly add following tag where applicable
| Reported-by: kernel test robot <[email protected]>
All warnings (new ones prefixed by >>):
In file included from drivers/media/platform/verisilicon/rockchip_vpu981_hw_av1_dec.c:8:
In file included from include/media/v4l2-mem2mem.h:16:
In file included from include/media/videobuf2-v4l2.h:16:
In file included from include/media/videobuf2-core.h:18:
In file included from include/linux/dma-buf.h:16:
In file included from include/linux/iosys-map.h:10:
In file included from include/linux/io.h:13:
In file included from arch/hexagon/include/asm/io.h:334:
include/asm-generic/io.h:547:31: warning: performing pointer arithmetic on a null pointer has undefined behavior [-Wnull-pointer-arithmetic]
val = __raw_readb(PCI_IOBASE + addr);
~~~~~~~~~~ ^
include/asm-generic/io.h:560:61: warning: performing pointer arithmetic on a null pointer has undefined behavior [-Wnull-pointer-arithmetic]
val = __le16_to_cpu((__le16 __force)__raw_readw(PCI_IOBASE + addr));
~~~~~~~~~~ ^
include/uapi/linux/byteorder/little_endian.h:37:51: note: expanded from macro '__le16_to_cpu'
#define __le16_to_cpu(x) ((__force __u16)(__le16)(x))
^
In file included from drivers/media/platform/verisilicon/rockchip_vpu981_hw_av1_dec.c:8:
In file included from include/media/v4l2-mem2mem.h:16:
In file included from include/media/videobuf2-v4l2.h:16:
In file included from include/media/videobuf2-core.h:18:
In file included from include/linux/dma-buf.h:16:
In file included from include/linux/iosys-map.h:10:
In file included from include/linux/io.h:13:
In file included from arch/hexagon/include/asm/io.h:334:
include/asm-generic/io.h:573:61: warning: performing pointer arithmetic on a null pointer has undefined behavior [-Wnull-pointer-arithmetic]
val = __le32_to_cpu((__le32 __force)__raw_readl(PCI_IOBASE + addr));
~~~~~~~~~~ ^
include/uapi/linux/byteorder/little_endian.h:35:51: note: expanded from macro '__le32_to_cpu'
#define __le32_to_cpu(x) ((__force __u32)(__le32)(x))
^
In file included from drivers/media/platform/verisilicon/rockchip_vpu981_hw_av1_dec.c:8:
In file included from include/media/v4l2-mem2mem.h:16:
In file included from include/media/videobuf2-v4l2.h:16:
In file included from include/media/videobuf2-core.h:18:
In file included from include/linux/dma-buf.h:16:
In file included from include/linux/iosys-map.h:10:
In file included from include/linux/io.h:13:
In file included from arch/hexagon/include/asm/io.h:334:
include/asm-generic/io.h:584:33: warning: performing pointer arithmetic on a null pointer has undefined behavior [-Wnull-pointer-arithmetic]
__raw_writeb(value, PCI_IOBASE + addr);
~~~~~~~~~~ ^
include/asm-generic/io.h:594:59: warning: performing pointer arithmetic on a null pointer has undefined behavior [-Wnull-pointer-arithmetic]
__raw_writew((u16 __force)cpu_to_le16(value), PCI_IOBASE + addr);
~~~~~~~~~~ ^
include/asm-generic/io.h:604:59: warning: performing pointer arithmetic on a null pointer has undefined behavior [-Wnull-pointer-arithmetic]
__raw_writel((u32 __force)cpu_to_le32(value), PCI_IOBASE + addr);
~~~~~~~~~~ ^
>> drivers/media/platform/verisilicon/rockchip_vpu981_hw_av1_dec.c:227:6: warning: no previous prototype for function 'rockchip_vpu981_av1_dec_tiles_free' [-Wmissing-prototypes]
void rockchip_vpu981_av1_dec_tiles_free(struct hantro_ctx *ctx)
^
drivers/media/platform/verisilicon/rockchip_vpu981_hw_av1_dec.c:227:1: note: declare 'static' if the function is not intended to be used outside of this translation unit
void rockchip_vpu981_av1_dec_tiles_free(struct hantro_ctx *ctx)
^
static
drivers/media/platform/verisilicon/rockchip_vpu981_hw_av1_dec.c:325:6: warning: no previous prototype for function 'rockchip_vpu981_av1_dec_exit' [-Wmissing-prototypes]
void rockchip_vpu981_av1_dec_exit(struct hantro_ctx *ctx)
^
drivers/media/platform/verisilicon/rockchip_vpu981_hw_av1_dec.c:325:1: note: declare 'static' if the function is not intended to be used outside of this translation unit
void rockchip_vpu981_av1_dec_exit(struct hantro_ctx *ctx)
^
static
drivers/media/platform/verisilicon/rockchip_vpu981_hw_av1_dec.c:361:5: warning: no previous prototype for function 'rockchip_vpu981_av1_dec_init' [-Wmissing-prototypes]
int rockchip_vpu981_av1_dec_init(struct hantro_ctx *ctx)
^
drivers/media/platform/verisilicon/rockchip_vpu981_hw_av1_dec.c:361:1: note: declare 'static' if the function is not intended to be used outside of this translation unit
int rockchip_vpu981_av1_dec_init(struct hantro_ctx *ctx)
^
static
drivers/media/platform/verisilicon/rockchip_vpu981_hw_av1_dec.c:788:26: warning: variable 'chroma_addr' set but not used [-Wunused-but-set-variable]
dma_addr_t luma_addr, chroma_addr, mv_addr = 0;
^
drivers/media/platform/verisilicon/rockchip_vpu981_hw_av1_dec.c:1139:6: warning: no previous prototype for function 'rockchip_vpu981_av1_dec_done' [-Wmissing-prototypes]
void rockchip_vpu981_av1_dec_done(struct hantro_ctx *ctx)
^
drivers/media/platform/verisilicon/rockchip_vpu981_hw_av1_dec.c:1139:1: note: declare 'static' if the function is not intended to be used outside of this translation unit
void rockchip_vpu981_av1_dec_done(struct hantro_ctx *ctx)
^
static
drivers/media/platform/verisilicon/rockchip_vpu981_hw_av1_dec.c:1931:5: warning: no previous prototype for function 'rockchip_vpu981_av1_dec_run' [-Wmissing-prototypes]
int rockchip_vpu981_av1_dec_run(struct hantro_ctx *ctx)
^
drivers/media/platform/verisilicon/rockchip_vpu981_hw_av1_dec.c:1931:1: note: declare 'static' if the function is not intended to be used outside of this translation unit
int rockchip_vpu981_av1_dec_run(struct hantro_ctx *ctx)
^
static
>> drivers/media/platform/verisilicon/rockchip_vpu981_hw_av1_dec.c:1931:5: warning: stack frame size (1176) exceeds limit (1024) in 'rockchip_vpu981_av1_dec_run' [-Wframe-larger-than]
int rockchip_vpu981_av1_dec_run(struct hantro_ctx *ctx)
^
120/1176 (10.20%) spills, 1056/1176 (89.80%) variables
13 warnings generated.
vim +/rockchip_vpu981_av1_dec_tiles_free +227 drivers/media/platform/verisilicon/rockchip_vpu981_hw_av1_dec.c
226
> 227 void rockchip_vpu981_av1_dec_tiles_free(struct hantro_ctx *ctx)
228 {
229 struct hantro_dev *vpu = ctx->dev;
230 struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
231
232 if (av1_dec->db_data_col.cpu)
233 dma_free_coherent(vpu->dev, av1_dec->db_data_col.size,
234 av1_dec->db_data_col.cpu,
235 av1_dec->db_data_col.dma);
236 av1_dec->db_data_col.cpu = NULL;
237
238 if (av1_dec->db_ctrl_col.cpu)
239 dma_free_coherent(vpu->dev, av1_dec->db_ctrl_col.size,
240 av1_dec->db_ctrl_col.cpu,
241 av1_dec->db_ctrl_col.dma);
242 av1_dec->db_ctrl_col.cpu = NULL;
243
244 if (av1_dec->cdef_col.cpu)
245 dma_free_coherent(vpu->dev, av1_dec->cdef_col.size,
246 av1_dec->cdef_col.cpu, av1_dec->cdef_col.dma);
247 av1_dec->cdef_col.cpu = NULL;
248
249 if (av1_dec->sr_col.cpu)
250 dma_free_coherent(vpu->dev, av1_dec->sr_col.size,
251 av1_dec->sr_col.cpu, av1_dec->sr_col.dma);
252 av1_dec->sr_col.cpu = NULL;
253
254 if (av1_dec->lr_col.cpu)
255 dma_free_coherent(vpu->dev, av1_dec->lr_col.size,
256 av1_dec->lr_col.cpu, av1_dec->lr_col.dma);
257 av1_dec->lr_col.cpu = NULL;
258 }
259
--
0-DAY CI Kernel Test Service
https://github.com/intel/lkp-tests
On 1/11/23 17:59, Benjamin Gaignard wrote:
> This series implement AV1 stateless decoder for RK3588 SoC.
> The hardware support 8 and 10 bits bitstreams up to 7680x4320.
> AV1 feature like film grain or scaling are done by the postprocessor.
> The driver can produce NV12_4L4, NV12_10LE40_4L4, NV12 and P010 pixels formats.
> Even if Rockchip have named the hardware VPU981 it looks like a VC9000 but
> with a different registers mapping.
>
> It is based on Daniel's "[PATCH v4] media: Add AV1 uAPI" [1] patches.
>
> The full branch can be found here:
> https://gitlab.collabora.com/linux/for-upstream/-/commits/rk3588_av1_decoder_v3
>
> Fluster score is: 200/239 while testing AV1-TEST-VECTORS with GStreamer-AV1-V4L2SL-Gst1.0.
> The failing tests are:
> - the 2 tests with 2 spatial layers: few errors in luma/chroma values
> - tests with resolution < hardware limit (64x64)
> - 10bits film grain test: bad macroblocks while decoding, the same 8bits
> test is working fine.
>
> Changes in v3:
> - Fix arrays loops limites.
> - Remove unused field.
> - Reset raw pixel formats list when bit depth or film grain feature
> values change.
> - Enable post-processor P010 support
>
> Changes in v2:
> - Remove useless +1 in sbs computation.
> - Describe NV12_10LE40_4L4 pixels format.
> - Post-processor could generate P010.
> - Fix comments done on v1.
> - The last patch make sure that only post-processed formats are used when film
> grain feature is enabled.
>
> Benjamin
>
I have a bunch of sparse errors:
SPARSE:drivers/media/platform/verisilicon/hantro_drv.c:342:17: error: typename in expression
SPARSE:drivers/media/platform/verisilicon/hantro_drv.c:342:21: error: Expected ; at end of statement
SPARSE:drivers/media/platform/verisilicon/hantro_drv.c:342:21: error: got bit_depth
SPARSE:drivers/media/platform/verisilicon/hantro_drv.c:342:17: error: undefined identifier 'int'
SPARSE:drivers/media/platform/verisilicon/hantro_drv.c:346:47: error: undefined identifier 'bit_depth'
SPARSE:drivers/media/platform/verisilicon/hantro_drv.c:353:39: error: undefined identifier 'bit_depth'
SPARSE:drivers/media/platform/verisilicon/hantro_drv.c:354:42: error: undefined identifier 'bit_depth'
Also some smatch errors:
SMATCH:drivers/media/platform/verisilicon/hantro_drv.c:342:17: :error: typename in expression
SMATCH:drivers/media/platform/verisilicon/hantro_drv.c:342:21: :error: Expected ; at end of statement
SMATCH:drivers/media/platform/verisilicon/hantro_drv.c:342:21: :error: got bit_depth
drivers/media/platform/verisilicon/hantro_drv.c:342 hantro_av1_s_ctrl() warn: statement has no effect 3
drivers/media/platform/verisilicon/rockchip_vpu981_hw_av1_dec.c:227:6: warning: no previous prototype for 'rockchip_vpu981_av1_dec_tiles_free' [-Wmissing-prototypes]
drivers/media/platform/verisilicon/rockchip_vpu981_hw_av1_dec.c:802:47: warning: variable 'chroma_addr' set but not used [-Wunused-but-set-variable]
Regards,
Hans
> [1] https://lore.kernel.org/linux-media/[email protected]/T/#u
>
> Benjamin Gaignard (12):
> dt-bindings: media: rockchip-vpu: Add rk3588 vpu compatible
> media: Add NV12_10LE40_4L4 pixel format
> media: verisilicon: Get bit depth for V4L2_PIX_FMT_NV12_10LE40_4L4
> media: verisilicon: Add AV1 decoder mode and controls
> media: verisilicon: Save bit depth for AV1 decoder
> media: verisilicon: Check AV1 bitstreams bit depth
> media: verisilicon: Compute motion vectors size for AV1 frames
> media: verisilicon: Add AV1 entropy helpers
> media: verisilicon: Add Rockchip AV1 decoder
> media: verisilicon: Add film grain feature to AV1 driver
> media: verisilicon: Enable AV1 decoder on rk3588
> media: verisilicon: Conditionnaly ignore native formats
>
> Nicolas Dufresne (1):
> v4l2-common: Add support for fractional bpp
>
> .../bindings/media/rockchip-vpu.yaml | 1 +
> .../media/v4l/pixfmt-yuv-planar.rst | 4 +
> drivers/media/platform/verisilicon/Makefile | 3 +
> drivers/media/platform/verisilicon/hantro.h | 8 +
> .../media/platform/verisilicon/hantro_drv.c | 69 +
> .../media/platform/verisilicon/hantro_hw.h | 102 +
> .../platform/verisilicon/hantro_postproc.c | 7 +
> .../media/platform/verisilicon/hantro_v4l2.c | 22 +-
> .../media/platform/verisilicon/hantro_v4l2.h | 1 +
> .../verisilicon/rockchip_av1_entropymode.c | 4546 +++++++++++++++++
> .../verisilicon/rockchip_av1_entropymode.h | 272 +
> .../verisilicon/rockchip_av1_filmgrain.c | 401 ++
> .../verisilicon/rockchip_av1_filmgrain.h | 36 +
> .../verisilicon/rockchip_vpu981_hw_av1_dec.c | 2278 +++++++++
> .../verisilicon/rockchip_vpu981_regs.h | 477 ++
> .../platform/verisilicon/rockchip_vpu_hw.c | 134 +
> drivers/media/v4l2-core/v4l2-common.c | 149 +-
> drivers/media/v4l2-core/v4l2-ioctl.c | 1 +
> include/media/v4l2-common.h | 2 +
> include/uapi/linux/videodev2.h | 1 +
> 20 files changed, 8439 insertions(+), 75 deletions(-)
> create mode 100644 drivers/media/platform/verisilicon/rockchip_av1_entropymode.c
> create mode 100644 drivers/media/platform/verisilicon/rockchip_av1_entropymode.h
> create mode 100644 drivers/media/platform/verisilicon/rockchip_av1_filmgrain.c
> create mode 100644 drivers/media/platform/verisilicon/rockchip_av1_filmgrain.h
> create mode 100644 drivers/media/platform/verisilicon/rockchip_vpu981_hw_av1_dec.c
> create mode 100644 drivers/media/platform/verisilicon/rockchip_vpu981_regs.h
>
Le 25/01/2023 à 10:54, Hans Verkuil a écrit :
> On 1/11/23 17:59, Benjamin Gaignard wrote:
>> This series implement AV1 stateless decoder for RK3588 SoC.
>> The hardware support 8 and 10 bits bitstreams up to 7680x4320.
>> AV1 feature like film grain or scaling are done by the postprocessor.
>> The driver can produce NV12_4L4, NV12_10LE40_4L4, NV12 and P010 pixels formats.
>> Even if Rockchip have named the hardware VPU981 it looks like a VC9000 but
>> with a different registers mapping.
>>
>> It is based on Daniel's "[PATCH v4] media: Add AV1 uAPI" [1] patches.
>>
>> The full branch can be found here:
>> https://gitlab.collabora.com/linux/for-upstream/-/commits/rk3588_av1_decoder_v3
>>
>> Fluster score is: 200/239 while testing AV1-TEST-VECTORS with GStreamer-AV1-V4L2SL-Gst1.0.
>> The failing tests are:
>> - the 2 tests with 2 spatial layers: few errors in luma/chroma values
>> - tests with resolution < hardware limit (64x64)
>> - 10bits film grain test: bad macroblocks while decoding, the same 8bits
>> test is working fine.
>>
>> Changes in v3:
>> - Fix arrays loops limites.
>> - Remove unused field.
>> - Reset raw pixel formats list when bit depth or film grain feature
>> values change.
>> - Enable post-processor P010 support
>>
>> Changes in v2:
>> - Remove useless +1 in sbs computation.
>> - Describe NV12_10LE40_4L4 pixels format.
>> - Post-processor could generate P010.
>> - Fix comments done on v1.
>> - The last patch make sure that only post-processed formats are used when film
>> grain feature is enabled.
>>
>> Benjamin
>>
> I have a bunch of sparse errors:
>
> SPARSE:drivers/media/platform/verisilicon/hantro_drv.c:342:17: error: typename in expression
> SPARSE:drivers/media/platform/verisilicon/hantro_drv.c:342:21: error: Expected ; at end of statement
> SPARSE:drivers/media/platform/verisilicon/hantro_drv.c:342:21: error: got bit_depth
> SPARSE:drivers/media/platform/verisilicon/hantro_drv.c:342:17: error: undefined identifier 'int'
> SPARSE:drivers/media/platform/verisilicon/hantro_drv.c:346:47: error: undefined identifier 'bit_depth'
> SPARSE:drivers/media/platform/verisilicon/hantro_drv.c:353:39: error: undefined identifier 'bit_depth'
> SPARSE:drivers/media/platform/verisilicon/hantro_drv.c:354:42: error: undefined identifier 'bit_depth'
>
> Also some smatch errors:
>
> SMATCH:drivers/media/platform/verisilicon/hantro_drv.c:342:17: :error: typename in expression
> SMATCH:drivers/media/platform/verisilicon/hantro_drv.c:342:21: :error: Expected ; at end of statement
> SMATCH:drivers/media/platform/verisilicon/hantro_drv.c:342:21: :error: got bit_depth
> drivers/media/platform/verisilicon/hantro_drv.c:342 hantro_av1_s_ctrl() warn: statement has no effect 3
> drivers/media/platform/verisilicon/rockchip_vpu981_hw_av1_dec.c:227:6: warning: no previous prototype for 'rockchip_vpu981_av1_dec_tiles_free' [-Wmissing-prototypes]
> drivers/media/platform/verisilicon/rockchip_vpu981_hw_av1_dec.c:802:47: warning: variable 'chroma_addr' set but not used [-Wunused-but-set-variable]
I have fix that in v4.
I will send v4 once Hantro bit depth problem will be fixed for HEVC since the root cause are the same.
Thanks,
Benjamin
>
> Regards,
>
> Hans
>
>> [1] https://lore.kernel.org/linux-media/[email protected]/T/#u
>>
>> Benjamin Gaignard (12):
>> dt-bindings: media: rockchip-vpu: Add rk3588 vpu compatible
>> media: Add NV12_10LE40_4L4 pixel format
>> media: verisilicon: Get bit depth for V4L2_PIX_FMT_NV12_10LE40_4L4
>> media: verisilicon: Add AV1 decoder mode and controls
>> media: verisilicon: Save bit depth for AV1 decoder
>> media: verisilicon: Check AV1 bitstreams bit depth
>> media: verisilicon: Compute motion vectors size for AV1 frames
>> media: verisilicon: Add AV1 entropy helpers
>> media: verisilicon: Add Rockchip AV1 decoder
>> media: verisilicon: Add film grain feature to AV1 driver
>> media: verisilicon: Enable AV1 decoder on rk3588
>> media: verisilicon: Conditionnaly ignore native formats
>>
>> Nicolas Dufresne (1):
>> v4l2-common: Add support for fractional bpp
>>
>> .../bindings/media/rockchip-vpu.yaml | 1 +
>> .../media/v4l/pixfmt-yuv-planar.rst | 4 +
>> drivers/media/platform/verisilicon/Makefile | 3 +
>> drivers/media/platform/verisilicon/hantro.h | 8 +
>> .../media/platform/verisilicon/hantro_drv.c | 69 +
>> .../media/platform/verisilicon/hantro_hw.h | 102 +
>> .../platform/verisilicon/hantro_postproc.c | 7 +
>> .../media/platform/verisilicon/hantro_v4l2.c | 22 +-
>> .../media/platform/verisilicon/hantro_v4l2.h | 1 +
>> .../verisilicon/rockchip_av1_entropymode.c | 4546 +++++++++++++++++
>> .../verisilicon/rockchip_av1_entropymode.h | 272 +
>> .../verisilicon/rockchip_av1_filmgrain.c | 401 ++
>> .../verisilicon/rockchip_av1_filmgrain.h | 36 +
>> .../verisilicon/rockchip_vpu981_hw_av1_dec.c | 2278 +++++++++
>> .../verisilicon/rockchip_vpu981_regs.h | 477 ++
>> .../platform/verisilicon/rockchip_vpu_hw.c | 134 +
>> drivers/media/v4l2-core/v4l2-common.c | 149 +-
>> drivers/media/v4l2-core/v4l2-ioctl.c | 1 +
>> include/media/v4l2-common.h | 2 +
>> include/uapi/linux/videodev2.h | 1 +
>> 20 files changed, 8439 insertions(+), 75 deletions(-)
>> create mode 100644 drivers/media/platform/verisilicon/rockchip_av1_entropymode.c
>> create mode 100644 drivers/media/platform/verisilicon/rockchip_av1_entropymode.h
>> create mode 100644 drivers/media/platform/verisilicon/rockchip_av1_filmgrain.c
>> create mode 100644 drivers/media/platform/verisilicon/rockchip_av1_filmgrain.h
>> create mode 100644 drivers/media/platform/verisilicon/rockchip_vpu981_hw_av1_dec.c
>> create mode 100644 drivers/media/platform/verisilicon/rockchip_vpu981_regs.h
>>
Some comments:
On 1/11/23 17:59, Benjamin Gaignard wrote:
> Implement AV1 stateless decoder for rockchip VPU981.
> It decode 8 and 10 bits AV1 bitstreams.
> AV1 scaling feature is done by the postprocessor.
>
> Signed-off-by: Benjamin Gaignard <[email protected]>
> ---
> v3:
> - Fix arrays loops limites.
> - Remove unused field.
>
> drivers/media/platform/verisilicon/Makefile | 1 +
> .../media/platform/verisilicon/hantro_hw.h | 64 +-
> .../verisilicon/rockchip_vpu981_hw_av1_dec.c | 2065 +++++++++++++++++
> .../verisilicon/rockchip_vpu981_regs.h | 477 ++++
> 4 files changed, 2605 insertions(+), 2 deletions(-)
> create mode 100644 drivers/media/platform/verisilicon/rockchip_vpu981_hw_av1_dec.c
> create mode 100644 drivers/media/platform/verisilicon/rockchip_vpu981_regs.h
>
> diff --git a/drivers/media/platform/verisilicon/Makefile b/drivers/media/platform/verisilicon/Makefile
> index d2b2679c00eb..c9a9806ab8c5 100644
> --- a/drivers/media/platform/verisilicon/Makefile
> +++ b/drivers/media/platform/verisilicon/Makefile
> @@ -18,6 +18,7 @@ hantro-vpu-y += \
> rockchip_vpu2_hw_h264_dec.o \
> rockchip_vpu2_hw_mpeg2_dec.o \
> rockchip_vpu2_hw_vp8_dec.o \
> + rockchip_vpu981_hw_av1_dec.o \
> rockchip_av1_entropymode.o \
> hantro_jpeg.o \
> hantro_h264.o \
> diff --git a/drivers/media/platform/verisilicon/hantro_hw.h b/drivers/media/platform/verisilicon/hantro_hw.h
> index c7438e197d85..1741ef939bf8 100644
> --- a/drivers/media/platform/verisilicon/hantro_hw.h
> +++ b/drivers/media/platform/verisilicon/hantro_hw.h
> @@ -37,6 +37,8 @@
>
> #define NUM_REF_PICTURES (V4L2_HEVC_DPB_ENTRIES_NUM_MAX + 1)
>
> +#define AV1_MAX_FRAME_BUF_COUNT (V4L2_AV1_TOTAL_REFS_PER_FRAME + 1)
> +
> struct hantro_dev;
> struct hantro_ctx;
> struct hantro_buf;
> @@ -250,23 +252,81 @@ struct hantro_vp9_dec_hw_ctx {
> };
>
> /**
> - * hantro_av1_dec_hw_ctx
> + * struct hantro_av1_dec_ctrls
> + * @sequence: AV1 Sequence
> + * @tile_group_entry: AV1 Tile Group entry
> + * @frame: AV1 Frame Header OBU
> + * @film_grain: AV1 Film Grain
> + */
> +struct hantro_av1_dec_ctrls {
> + const struct v4l2_ctrl_av1_sequence *sequence;
> + const struct v4l2_ctrl_av1_tile_group_entry *tile_group_entry;
> + const struct v4l2_ctrl_av1_frame *frame;
> + const struct v4l2_ctrl_av1_film_grain *film_grain;
> +};
> +
> +struct hantro_av1_frame_ref {
> + int width;
> + int height;
> + int mi_cols;
> + int mi_rows;
> + u64 timestamp;
> + enum v4l2_av1_frame_type frame_type;
> + bool used;
> + u32 order_hint;
> + u32 order_hints[V4L2_AV1_TOTAL_REFS_PER_FRAME];
> + struct vb2_v4l2_buffer *vb2_ref;
> +};
> +
> +/**
> + * struct hantro_av1_dec_hw_ctx
> + * @db_data_col: db tile col data buffer
> + * @db_ctrl_col: db tile col ctrl buffer
> + * @cdef_col: cdef tile col buffer
> + * @sr_col: sr tile col buffer
> + * @lr_col: lr tile col buffer
> + * @global_model: global model buffer
> + * @tile_info: tile info buffer
> + * @segment: segmentation info buffer
> + * @prob_tbl: probability table
> + * @prob_tbl_out: probability table output
> + * @tile_buf: tile buffer
> + * @ctrls: V4L2 controls attached to a run
> + * @frame_refs: reference frames info slots
> + * @ref_frame_sign_bias: array of sign bias
> + * @num_tile_cols_allocated: number of allocated tiles
> * @cdfs: current probabilities structure
> * @cdfs_ndvc: current mv probabilities structure
> * @default_cdfs: default probabilities structure
> * @default_cdfs_ndvc: default mv probabilties structure
> * @cdfs_last: stored probabilities structures
> * @cdfs_last_ndvc: stored mv probabilities structures
> + * @current_frame_index: index of the current in frame_refs array
> */
> struct hantro_av1_dec_hw_ctx {
> + struct hantro_aux_buf db_data_col;
> + struct hantro_aux_buf db_ctrl_col;
> + struct hantro_aux_buf cdef_col;
> + struct hantro_aux_buf sr_col;
> + struct hantro_aux_buf lr_col;
> + struct hantro_aux_buf global_model;
> + struct hantro_aux_buf tile_info;
> + struct hantro_aux_buf segment;
> + struct hantro_aux_buf prob_tbl;
> + struct hantro_aux_buf prob_tbl_out;
> + struct hantro_aux_buf tile_buf;
> + struct hantro_av1_dec_ctrls ctrls;
> + struct hantro_av1_frame_ref frame_refs[AV1_MAX_FRAME_BUF_COUNT];
> + uint32_t ref_frame_sign_bias[V4L2_AV1_TOTAL_REFS_PER_FRAME];
> + unsigned int num_tile_cols_allocated;
> struct av1cdfs *cdfs;
> struct mvcdfs *cdfs_ndvc;
> struct av1cdfs default_cdfs;
> struct mvcdfs default_cdfs_ndvc;
> struct av1cdfs cdfs_last[NUM_REF_FRAMES];
> struct mvcdfs cdfs_last_ndvc[NUM_REF_FRAMES];
> + int current_frame_index;
> };
> -
> /**
> * struct hantro_postproc_ctx
> *
> diff --git a/drivers/media/platform/verisilicon/rockchip_vpu981_hw_av1_dec.c b/drivers/media/platform/verisilicon/rockchip_vpu981_hw_av1_dec.c
> new file mode 100644
> index 000000000000..81aeb1d6b93f
> --- /dev/null
> +++ b/drivers/media/platform/verisilicon/rockchip_vpu981_hw_av1_dec.c
> @@ -0,0 +1,2065 @@
> +// SPDX-License-Identifier: GPL-2.0-only
> +/*
> + * Copyright (c) 2021, Collabora
> + *
> + * Author: Benjamin Gaignard <[email protected]>
> + */
> +
> +#include <media/v4l2-mem2mem.h>
> +#include "hantro.h"
> +#include "hantro_v4l2.h"
> +#include "rockchip_vpu981_regs.h"
> +
> +#define AV1_DEC_MODE 17
> +#define GM_GLOBAL_MODELS_PER_FRAME 7
> +#define GLOBAL_MODEL_TOTAL_SIZE (6 * 4 + 4 * 2)
> +#define GLOBAL_MODEL_SIZE ALIGN(GM_GLOBAL_MODELS_PER_FRAME * GLOBAL_MODEL_TOTAL_SIZE, 2048)
> +#define AV1_MAX_TILES 128
> +#define AV1_TILE_INFO_SIZE (AV1_MAX_TILES * 16)
> +#define AV1DEC_MAX_PIC_BUFFERS 24
> +#define AV1_REF_SCALE_SHIFT 14
> +#define AV1_INVALID_IDX -1
> +#define MAX_FRAME_DISTANCE 31
> +#define AV1_PRIMARY_REF_NONE 7
> +#define AV1_TILE_SIZE ALIGN(32 * 128, 4096)
> +/*
> + * These 3 values aren't defined enum v4l2_av1_segment_feature because
> + * they are not part of the specification
> + */
> +#define V4L2_AV1_SEG_LVL_ALT_LF_Y_H 2
> +#define V4L2_AV1_SEG_LVL_ALT_LF_U 3
> +#define V4L2_AV1_SEG_LVL_ALT_LF_V 4
> +
> +#define SUPERRES_SCALE_BITS 3
> +#define SCALE_NUMERATOR 8
> +#define SUPERRES_SCALE_DENOMINATOR_MIN (SCALE_NUMERATOR + 1)
> +
> +#define RS_SUBPEL_BITS 6
> +#define RS_SUBPEL_MASK ((1 << RS_SUBPEL_BITS) - 1)
> +#define RS_SCALE_SUBPEL_BITS 14
> +#define RS_SCALE_SUBPEL_MASK ((1 << RS_SCALE_SUBPEL_BITS) - 1)
> +#define RS_SCALE_EXTRA_BITS (RS_SCALE_SUBPEL_BITS - RS_SUBPEL_BITS)
> +#define RS_SCALE_EXTRA_OFF (1 << (RS_SCALE_EXTRA_BITS - 1))
> +
> +#define IS_INTRA(type) ((type == V4L2_AV1_KEY_FRAME) || (type == V4L2_AV1_INTRA_ONLY_FRAME))
> +
> +#define LST_BUF_IDX (V4L2_AV1_REF_LAST_FRAME - V4L2_AV1_REF_LAST_FRAME)
> +#define LST2_BUF_IDX (V4L2_AV1_REF_LAST2_FRAME - V4L2_AV1_REF_LAST_FRAME)
> +#define LST3_BUF_IDX (V4L2_AV1_REF_LAST3_FRAME - V4L2_AV1_REF_LAST_FRAME)
> +#define GLD_BUF_IDX (V4L2_AV1_REF_GOLDEN_FRAME - V4L2_AV1_REF_LAST_FRAME)
> +#define BWD_BUF_IDX (V4L2_AV1_REF_BWDREF_FRAME - V4L2_AV1_REF_LAST_FRAME)
> +#define ALT2_BUF_IDX (V4L2_AV1_REF_ALTREF2_FRAME - V4L2_AV1_REF_LAST_FRAME)
> +#define ALT_BUF_IDX (V4L2_AV1_REF_ALTREF_FRAME - V4L2_AV1_REF_LAST_FRAME)
> +
> +#define DIV_LUT_PREC_BITS 14
> +#define DIV_LUT_BITS 8
> +#define DIV_LUT_NUM BIT(DIV_LUT_BITS)
> +#define WARP_PARAM_REDUCE_BITS 6
> +#define WARPEDMODEL_PREC_BITS 16
> +
> +#define AV1_DIV_ROUND_UP_POW2(value, n) \
> +({ \
> + typeof(n) _n = n; \
> + typeof(value) _value = value; \
> + (_value + (BIT(_n) >> 1)) >> _n; \
> +})
> +
> +#define AV1_DIV_ROUND_UP_POW2_SIGNED(value, n) \
> +({ \
> + typeof(n) _n_ = n; \
> + typeof(value) _value_ = value; \
> + (((_value_) < 0) ? -AV1_DIV_ROUND_UP_POW2(-(_value_), (_n_)) \
> + : AV1_DIV_ROUND_UP_POW2((_value_), (_n_))); \
> +})
> +
> +struct rockchip_av1_film_grain {
> + uint8_t scaling_lut_y[256];
> + uint8_t scaling_lut_cb[256];
> + uint8_t scaling_lut_cr[256];
> + int16_t cropped_luma_grain_block[4096];
> + int16_t cropped_chroma_grain_block[1024 * 2];
> +};
> +
> +static const short div_lut[DIV_LUT_NUM + 1] = {
> + 16384, 16320, 16257, 16194, 16132, 16070, 16009, 15948, 15888, 15828, 15768,
> + 15709, 15650, 15592, 15534, 15477, 15420, 15364, 15308, 15252, 15197, 15142,
> + 15087, 15033, 14980, 14926, 14873, 14821, 14769, 14717, 14665, 14614, 14564,
> + 14513, 14463, 14413, 14364, 14315, 14266, 14218, 14170, 14122, 14075, 14028,
> + 13981, 13935, 13888, 13843, 13797, 13752, 13707, 13662, 13618, 13574, 13530,
> + 13487, 13443, 13400, 13358, 13315, 13273, 13231, 13190, 13148, 13107, 13066,
> + 13026, 12985, 12945, 12906, 12866, 12827, 12788, 12749, 12710, 12672, 12633,
> + 12596, 12558, 12520, 12483, 12446, 12409, 12373, 12336, 12300, 12264, 12228,
> + 12193, 12157, 12122, 12087, 12053, 12018, 11984, 11950, 11916, 11882, 11848,
> + 11815, 11782, 11749, 11716, 11683, 11651, 11619, 11586, 11555, 11523, 11491,
> + 11460, 11429, 11398, 11367, 11336, 11305, 11275, 11245, 11215, 11185, 11155,
> + 11125, 11096, 11067, 11038, 11009, 10980, 10951, 10923, 10894, 10866, 10838,
> + 10810, 10782, 10755, 10727, 10700, 10673, 10645, 10618, 10592, 10565, 10538,
> + 10512, 10486, 10460, 10434, 10408, 10382, 10356, 10331, 10305, 10280, 10255,
> + 10230, 10205, 10180, 10156, 10131, 10107, 10082, 10058, 10034, 10010, 9986,
> + 9963, 9939, 9916, 9892, 9869, 9846, 9823, 9800, 9777, 9754, 9732,
> + 9709, 9687, 9664, 9642, 9620, 9598, 9576, 9554, 9533, 9511, 9489,
> + 9468, 9447, 9425, 9404, 9383, 9362, 9341, 9321, 9300, 9279, 9259,
> + 9239, 9218, 9198, 9178, 9158, 9138, 9118, 9098, 9079, 9059, 9039,
> + 9020, 9001, 8981, 8962, 8943, 8924, 8905, 8886, 8867, 8849, 8830,
> + 8812, 8793, 8775, 8756, 8738, 8720, 8702, 8684, 8666, 8648, 8630,
> + 8613, 8595, 8577, 8560, 8542, 8525, 8508, 8490, 8473, 8456, 8439,
> + 8422, 8405, 8389, 8372, 8355, 8339, 8322, 8306, 8289, 8273, 8257,
> + 8240, 8224, 8208, 8192,
> +};
> +
> +static int rockchip_vpu981_get_frame_index(struct hantro_ctx *ctx, int ref)
> +{
> + struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
> + struct hantro_av1_dec_ctrls *ctrls = &av1_dec->ctrls;
> + const struct v4l2_ctrl_av1_frame *frame = ctrls->frame;
> + u64 timestamp;
> + int i, idx = frame->ref_frame_idx[ref];
> +
> + if (idx >= V4L2_AV1_TOTAL_REFS_PER_FRAME || idx < 0)
> + return AV1_INVALID_IDX;
> +
> + timestamp = frame->reference_frame_ts[idx];
> + for (i = 0; i < AV1_MAX_FRAME_BUF_COUNT; i++) {
> + if (!av1_dec->frame_refs[i].used)
> + continue;
> + if (av1_dec->frame_refs[i].timestamp == timestamp)
> + return i;
> + }
> +
> + return AV1_INVALID_IDX;
> +}
> +
> +static int rockchip_vpu981_get_order_hint(struct hantro_ctx *ctx, int ref)
> +{
> + struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
> + int idx = rockchip_vpu981_get_frame_index(ctx, ref);
> +
> + if (idx != AV1_INVALID_IDX)
> + return av1_dec->frame_refs[idx].order_hint;
> +
> + return 0;
> +}
> +
> +static int rockchip_vpu981_av1_dec_frame_ref(struct hantro_ctx *ctx,
> + u64 timestamp)
> +{
> + struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
> + struct hantro_av1_dec_ctrls *ctrls = &av1_dec->ctrls;
> + const struct v4l2_ctrl_av1_frame *frame = ctrls->frame;
> + int i;
> +
> + for (i = 0; i < AV1_MAX_FRAME_BUF_COUNT; i++) {
I'd do:
if (av1_dec->frame_refs[i].used)
continue;
> + if (!av1_dec->frame_refs[i].used) {
> + int j;
> +
> + av1_dec->frame_refs[i].width =
> + frame->frame_width_minus_1 + 1;
> + av1_dec->frame_refs[i].height =
> + frame->frame_height_minus_1 + 1;
> + av1_dec->frame_refs[i].mi_cols =
> + DIV_ROUND_UP(frame->frame_width_minus_1 + 1, 8);
> + av1_dec->frame_refs[i].mi_rows =
> + DIV_ROUND_UP(frame->frame_height_minus_1 + 1, 8);
> + av1_dec->frame_refs[i].timestamp = timestamp;
> + av1_dec->frame_refs[i].frame_type = frame->frame_type;
> + av1_dec->frame_refs[i].order_hint = frame->order_hint;
> + if (!av1_dec->frame_refs[i].vb2_ref)
> + av1_dec->frame_refs[i].vb2_ref = hantro_get_dst_buf(ctx);
> +
> + for (j = 0; j < V4L2_AV1_TOTAL_REFS_PER_FRAME; j++)
> + av1_dec->frame_refs[i].order_hints[j] = frame->order_hints[j];
> +
> + av1_dec->frame_refs[i].used = true;
> + av1_dec->current_frame_index = i;
> + return i;
That allows the remainder to be shifted on indent to the left.
> + }
> + }
> +
> + return AV1_INVALID_IDX;
> +}
> +
> +static void rockchip_vpu981_av1_dec_frame_unref(struct hantro_ctx *ctx, int idx)
> +{
> + struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
> +
> + if (idx < 0)
> + return;
> +
> + av1_dec->frame_refs[idx].used = false;
This seems a bit overkill. Just write:
if (idx >= 0)
ctx->av1_dec.frame_refs[idx].used = false;
> +}
> +
> +static void rockchip_vpu981_av1_dec_clean_refs(struct hantro_ctx *ctx)
> +{
> + struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
> + struct hantro_av1_dec_ctrls *ctrls = &av1_dec->ctrls;
> +
> + int ref, idx;
> +
> + for (idx = 0; idx < AV1_MAX_FRAME_BUF_COUNT; idx++) {
> + u64 timestamp = av1_dec->frame_refs[idx].timestamp;
> + bool used = false;
> +
> + if (!av1_dec->frame_refs[idx].used)
> + continue;
> +
> + for (ref = 0; ref < V4L2_AV1_TOTAL_REFS_PER_FRAME; ref++) {
> + if (ctrls->frame->reference_frame_ts[ref] == timestamp)
> + used = true;
> + }
> +
> + if (!used)
> + rockchip_vpu981_av1_dec_frame_unref(ctx, idx);
> + }
> +}
> +
> +static size_t rockchip_vpu981_av1_dec_luma_size(struct hantro_ctx *ctx)
> +{
> + return ctx->dst_fmt.width * ctx->dst_fmt.height * ctx->bit_depth / 8;
> +}
> +
> +static size_t rockchip_vpu981_av1_dec_chroma_size(struct hantro_ctx *ctx)
> +{
> + size_t cr_offset = rockchip_vpu981_av1_dec_luma_size(ctx);
> +
> + return ALIGN((cr_offset * 3) / 2, 64);
> +}
> +
> +void rockchip_vpu981_av1_dec_tiles_free(struct hantro_ctx *ctx)
> +{
> + struct hantro_dev *vpu = ctx->dev;
> + struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
> +
> + if (av1_dec->db_data_col.cpu)
> + dma_free_coherent(vpu->dev, av1_dec->db_data_col.size,
> + av1_dec->db_data_col.cpu,
> + av1_dec->db_data_col.dma);
> + av1_dec->db_data_col.cpu = NULL;
> +
> + if (av1_dec->db_ctrl_col.cpu)
> + dma_free_coherent(vpu->dev, av1_dec->db_ctrl_col.size,
> + av1_dec->db_ctrl_col.cpu,
> + av1_dec->db_ctrl_col.dma);
> + av1_dec->db_ctrl_col.cpu = NULL;
> +
> + if (av1_dec->cdef_col.cpu)
> + dma_free_coherent(vpu->dev, av1_dec->cdef_col.size,
> + av1_dec->cdef_col.cpu, av1_dec->cdef_col.dma);
> + av1_dec->cdef_col.cpu = NULL;
> +
> + if (av1_dec->sr_col.cpu)
> + dma_free_coherent(vpu->dev, av1_dec->sr_col.size,
> + av1_dec->sr_col.cpu, av1_dec->sr_col.dma);
> + av1_dec->sr_col.cpu = NULL;
> +
> + if (av1_dec->lr_col.cpu)
> + dma_free_coherent(vpu->dev, av1_dec->lr_col.size,
> + av1_dec->lr_col.cpu, av1_dec->lr_col.dma);
> + av1_dec->lr_col.cpu = NULL;
> +}
> +
> +static int rockchip_vpu981_av1_dec_tiles_reallocate(struct hantro_ctx *ctx)
> +{
> + struct hantro_dev *vpu = ctx->dev;
> + struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
> + struct hantro_av1_dec_ctrls *ctrls = &av1_dec->ctrls;
> + unsigned int num_tile_cols = 1 << ctrls->tile_group_entry->tile_col;
> + unsigned int height = ALIGN(ctrls->frame->frame_height_minus_1 + 1, 64);
> + unsigned int height_in_sb = height / 64;
> + unsigned int stripe_num = ((height + 8) + 63) / 64;
> + size_t size;
> +
> + if (av1_dec->db_data_col.size >=
> + ALIGN(height * 12 * ctx->bit_depth / 8, 128) * num_tile_cols)
> + return 0;
> +
> + rockchip_vpu981_av1_dec_tiles_free(ctx);
> +
> + size = ALIGN(height * 12 * ctx->bit_depth / 8, 128) * num_tile_cols;
> + av1_dec->db_data_col.cpu = dma_alloc_coherent(vpu->dev, size,
> + &av1_dec->db_data_col.dma,
> + GFP_KERNEL);
> + if (!av1_dec->db_data_col.cpu)
> + goto buffer_allocation_error;
> + av1_dec->db_data_col.size = size;
> +
> + size = ALIGN(height * 2 * 16 / 4, 128) * num_tile_cols;
> + av1_dec->db_ctrl_col.cpu = dma_alloc_coherent(vpu->dev, size,
> + &av1_dec->db_ctrl_col.dma,
> + GFP_KERNEL);
> + if (!av1_dec->db_ctrl_col.cpu)
> + goto buffer_allocation_error;
> + av1_dec->db_ctrl_col.size = size;
> +
> + size = ALIGN(height_in_sb * 44 * ctx->bit_depth * 16 / 8, 128) * num_tile_cols;
> + av1_dec->cdef_col.cpu = dma_alloc_coherent(vpu->dev, size,
> + &av1_dec->cdef_col.dma,
> + GFP_KERNEL);
> + if (!av1_dec->cdef_col.cpu)
> + goto buffer_allocation_error;
> + av1_dec->cdef_col.size = size;
> +
> + size = ALIGN(height_in_sb * (3040 + 1280), 128) * num_tile_cols;
> + av1_dec->sr_col.cpu = dma_alloc_coherent(vpu->dev, size,
> + &av1_dec->sr_col.dma,
> + GFP_KERNEL);
> + if (!av1_dec->sr_col.cpu)
> + goto buffer_allocation_error;
> + av1_dec->sr_col.size = size;
> +
> + size = ALIGN(stripe_num * 1536 * ctx->bit_depth / 8, 128) * num_tile_cols;
> + av1_dec->lr_col.cpu = dma_alloc_coherent(vpu->dev, size,
> + &av1_dec->lr_col.dma,
> + GFP_KERNEL);
> + if (!av1_dec->lr_col.cpu)
> + goto buffer_allocation_error;
> + av1_dec->lr_col.size = size;
> +
> + av1_dec->num_tile_cols_allocated = num_tile_cols;
> + return 0;
> +
> +buffer_allocation_error:
> + rockchip_vpu981_av1_dec_tiles_free(ctx);
> + return -ENOMEM;
> +}
> +
> +void rockchip_vpu981_av1_dec_exit(struct hantro_ctx *ctx)
> +{
> + struct hantro_dev *vpu = ctx->dev;
> + struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
> +
> + if (av1_dec->global_model.cpu)
> + dma_free_coherent(vpu->dev, av1_dec->global_model.size,
> + av1_dec->global_model.cpu,
> + av1_dec->global_model.dma);
> + av1_dec->global_model.cpu = NULL;
> +
> + if (av1_dec->tile_info.cpu)
> + dma_free_coherent(vpu->dev, av1_dec->tile_info.size,
> + av1_dec->tile_info.cpu,
> + av1_dec->tile_info.dma);
> + av1_dec->tile_info.cpu = NULL;
> +
> + if (av1_dec->prob_tbl.cpu)
> + dma_free_coherent(vpu->dev, av1_dec->prob_tbl.size,
> + av1_dec->prob_tbl.cpu, av1_dec->prob_tbl.dma);
> + av1_dec->prob_tbl.cpu = NULL;
> +
> + if (av1_dec->prob_tbl_out.cpu)
> + dma_free_coherent(vpu->dev, av1_dec->prob_tbl_out.size,
> + av1_dec->prob_tbl_out.cpu,
> + av1_dec->prob_tbl_out.dma);
> + av1_dec->prob_tbl_out.cpu = NULL;
> +
> + if (av1_dec->tile_buf.cpu)
> + dma_free_coherent(vpu->dev, av1_dec->tile_buf.size,
> + av1_dec->tile_buf.cpu, av1_dec->tile_buf.dma);
> + av1_dec->tile_buf.cpu = NULL;
> +
> + rockchip_vpu981_av1_dec_tiles_free(ctx);
> +}
> +
> +int rockchip_vpu981_av1_dec_init(struct hantro_ctx *ctx)
> +{
> + struct hantro_dev *vpu = ctx->dev;
> + struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
> +
> + memset(av1_dec, 0, sizeof(*av1_dec));
> +
> + av1_dec->global_model.cpu = dma_alloc_coherent(vpu->dev, GLOBAL_MODEL_SIZE,
> + &av1_dec->global_model.dma,
> + GFP_KERNEL);
> + if (!av1_dec->global_model.cpu)
> + return -ENOMEM;
> + av1_dec->global_model.size = GLOBAL_MODEL_SIZE;
> +
> + av1_dec->tile_info.cpu = dma_alloc_coherent(vpu->dev, AV1_MAX_TILES,
> + &av1_dec->tile_info.dma,
> + GFP_KERNEL);
> + if (!av1_dec->tile_info.cpu)
> + return -ENOMEM;
> + av1_dec->tile_info.size = AV1_MAX_TILES;
> +
> + av1_dec->prob_tbl.cpu = dma_alloc_coherent(vpu->dev,
> + ALIGN(sizeof(struct av1cdfs), 2048),
> + &av1_dec->prob_tbl.dma,
> + GFP_KERNEL);
> + if (!av1_dec->prob_tbl.cpu)
> + return -ENOMEM;
> + av1_dec->prob_tbl.size = ALIGN(sizeof(struct av1cdfs), 2048);
> +
> + av1_dec->prob_tbl_out.cpu = dma_alloc_coherent(vpu->dev,
> + ALIGN(sizeof(struct av1cdfs), 2048),
> + &av1_dec->prob_tbl_out.dma,
> + GFP_KERNEL);
> + if (!av1_dec->prob_tbl_out.cpu)
> + return -ENOMEM;
> + av1_dec->prob_tbl_out.size = ALIGN(sizeof(struct av1cdfs), 2048);
> + av1_dec->cdfs = &av1_dec->default_cdfs;
> + av1_dec->cdfs_ndvc = &av1_dec->default_cdfs_ndvc;
> +
> + rockchip_av1_set_default_cdfs(av1_dec->cdfs, av1_dec->cdfs_ndvc);
> +
> + av1_dec->tile_buf.cpu = dma_alloc_coherent(vpu->dev,
> + AV1_TILE_SIZE,
> + &av1_dec->tile_buf.dma,
> + GFP_KERNEL);
> + if (!av1_dec->tile_buf.cpu)
> + return -ENOMEM;
> + av1_dec->tile_buf.size = AV1_TILE_SIZE;
> +
> + return 0;
> +}
> +
> +static int rockchip_vpu981_av1_dec_prepare_run(struct hantro_ctx *ctx)
> +{
> + struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
> + struct hantro_av1_dec_ctrls *ctrls = &av1_dec->ctrls;
> +
> + ctrls->sequence = hantro_get_ctrl(ctx, V4L2_CID_STATELESS_AV1_SEQUENCE);
> + if (WARN_ON(!ctrls->sequence))
> + return -EINVAL;
> +
> + ctrls->tile_group_entry =
> + hantro_get_ctrl(ctx, V4L2_CID_STATELESS_AV1_TILE_GROUP_ENTRY);
> + if (WARN_ON(!ctrls->tile_group_entry))
> + return -EINVAL;
> +
> + ctrls->frame = hantro_get_ctrl(ctx, V4L2_CID_STATELESS_AV1_FRAME);
> + if (WARN_ON(!ctrls->frame))
> + return -EINVAL;
> +
> + ctrls->film_grain =
> + hantro_get_ctrl(ctx, V4L2_CID_STATELESS_AV1_FILM_GRAIN);
> +
> + return rockchip_vpu981_av1_dec_tiles_reallocate(ctx);
> +}
> +
> +static inline int rockchip_vpu981_av1_dec_get_msb(u32 n)
> +{
> + if (n == 0)
> + return 0;
> + return 31 ^ __builtin_clz(n);
> +}
> +
> +static short rockchip_vpu981_av1_dec_resolve_divisor_32(u32 d, short *shift)
> +{
> + int f;
> + uint64_t e;
> +
> + *shift = rockchip_vpu981_av1_dec_get_msb(d);
> + /* e is obtained from D after resetting the most significant 1 bit. */
> + e = d - ((u32)1 << *shift);
> + /* Get the most significant DIV_LUT_BITS (8) bits of e into f */
> + if (*shift > DIV_LUT_BITS)
> + f = AV1_DIV_ROUND_UP_POW2(e, *shift - DIV_LUT_BITS);
> + else
> + f = e << (DIV_LUT_BITS - *shift);
> + if (f > DIV_LUT_NUM)
> + return -1;
> + *shift += DIV_LUT_PREC_BITS;
> + /* Use f as lookup into the precomputed table of multipliers */
> + return div_lut[f];
> +}
> +
> +static void rockchip_vpu981_av1_dec_get_shear_params(const uint32_t *params,
> + int64_t *alpha, int64_t *beta, int64_t *gamma, int64_t *delta)
Use u32 and s64 instead of uint32_t and int64_t. If these types are used elsewhere
as well, please change them.
Run 'checkpatch.pl --strict' for these patches, and fix the issues (like this)
that it reports where it makes sense.
> +{
> + const int *mat = params;
> + short shift;
> + short y;
> + long long gv, dv;
> +
> + if (mat[2] <= 0)
> + return;
> +
> + *alpha = clamp_val(mat[2] - (1 << WARPEDMODEL_PREC_BITS), S16_MIN, S16_MAX);
> + *beta = clamp_val(mat[3], S16_MIN, S16_MAX);
> +
> + y = rockchip_vpu981_av1_dec_resolve_divisor_32(abs(mat[2]), &shift) * (mat[2] < 0 ? -1 : 1);
> +
> + gv = ((long long)mat[4] * (1 << WARPEDMODEL_PREC_BITS)) * y;
> +
> + *gamma = clamp_val((int)AV1_DIV_ROUND_UP_POW2_SIGNED(gv, shift), S16_MIN, S16_MAX);
> +
> + dv = ((long long)mat[3] * mat[4]) * y;
> + *delta = clamp_val(
> + mat[5] -
> + (int)AV1_DIV_ROUND_UP_POW2_SIGNED(dv, shift) - (1 << WARPEDMODEL_PREC_BITS),
> + S16_MIN, S16_MAX);
> +
> + *alpha = AV1_DIV_ROUND_UP_POW2_SIGNED(*alpha, WARP_PARAM_REDUCE_BITS)
> + * (1 << WARP_PARAM_REDUCE_BITS);
> + *beta = AV1_DIV_ROUND_UP_POW2_SIGNED(*beta, WARP_PARAM_REDUCE_BITS)
> + * (1 << WARP_PARAM_REDUCE_BITS);
> + *gamma = AV1_DIV_ROUND_UP_POW2_SIGNED(*gamma, WARP_PARAM_REDUCE_BITS)
> + * (1 << WARP_PARAM_REDUCE_BITS);
> + *delta = AV1_DIV_ROUND_UP_POW2_SIGNED(*delta, WARP_PARAM_REDUCE_BITS)
> + * (1 << WARP_PARAM_REDUCE_BITS);
> +}
> +
> +static void rockchip_vpu981_av1_dec_set_global_model(struct hantro_ctx *ctx)
> +{
> + struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
> + struct hantro_av1_dec_ctrls *ctrls = &av1_dec->ctrls;
> + const struct v4l2_ctrl_av1_frame *frame = ctrls->frame;
> + const struct v4l2_av1_global_motion *gm = &frame->global_motion;
> + uint8_t *dst = av1_dec->global_model.cpu;
> + struct hantro_dev *vpu = ctx->dev;
> + int ref_frame, i;
> +
> + memset(dst, 0, GLOBAL_MODEL_SIZE);
> + for (ref_frame = 0; ref_frame < V4L2_AV1_REFS_PER_FRAME; ++ref_frame) {
> + int64_t alpha = 0, beta = 0, gamma = 0, delta = 0;
> +
> + for (i = 0; i < 6; ++i) {
> + if (i == 2)
> + *(int32_t *)dst =
> + gm->params[V4L2_AV1_REF_LAST_FRAME + ref_frame][3];
> + else if (i == 3)
> + *(int32_t *)dst =
> + gm->params[V4L2_AV1_REF_LAST_FRAME + ref_frame][2];
> + else
> + *(int32_t *)dst =
> + gm->params[V4L2_AV1_REF_LAST_FRAME + ref_frame][i];
> + dst += 4;
> + }
> +
> + if (gm->type[V4L2_AV1_REF_LAST_FRAME + ref_frame] <= V4L2_AV1_WARP_MODEL_AFFINE)
> + rockchip_vpu981_av1_dec_get_shear_params(
> + &gm->params[V4L2_AV1_REF_LAST_FRAME + ref_frame][0],
> + &alpha, &beta, &gamma, &delta);
> +
> + *(int16_t *)dst = alpha;
> + dst += 2;
> + *(int16_t *)dst = beta;
> + dst += 2;
> + *(int16_t *)dst = gamma;
> + dst += 2;
> + *(int16_t *)dst = delta;
> + dst += 2;
> + }
> +
> + hantro_write_addr(vpu, AV1_GLOBAL_MODEL, av1_dec->global_model.dma);
> +}
> +
> +static void rockchip_vpu981_av1_dec_set_tile_info(struct hantro_ctx *ctx)
> +{
> + struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
> + struct hantro_av1_dec_ctrls *ctrls = &av1_dec->ctrls;
> + struct v4l2_av1_tile_info tile_info = ctrls->frame->tile_info;
> + const struct v4l2_ctrl_av1_tile_group_entry *group_entry =
> + ctrls->tile_group_entry;
> + int context_update_y =
> + tile_info.context_update_tile_id / tile_info.tile_cols;
> + int context_update_x =
> + tile_info.context_update_tile_id % tile_info.tile_cols;
> + int context_update_tile_id =
> + context_update_x * tile_info.tile_rows + context_update_y;
> + uint8_t *dst = av1_dec->tile_info.cpu;
> + struct hantro_dev *vpu = ctx->dev;
> + int tile0, tile1;
> +
> + memset(dst, 0, av1_dec->tile_info.size);
> +
> + for (tile0 = 0; tile0 < tile_info.tile_cols; tile0++) {
> + for (tile1 = 0; tile1 < tile_info.tile_rows; tile1++) {
> + int tile_id = tile1 * tile_info.tile_cols + tile0;
> + uint32_t start, end;
> + uint32_t y0 =
> + tile_info.height_in_sbs_minus_1[tile1] + 1;
> + uint32_t x0 = tile_info.width_in_sbs_minus_1[tile0] + 1;
> +
> + // tile size in SB units (width,height)
> + *dst++ = x0;
> + *dst++ = 0;
> + *dst++ = 0;
> + *dst++ = 0;
> + *dst++ = y0;
> + *dst++ = 0;
> + *dst++ = 0;
> + *dst++ = 0;
> +
> + // tile start position
> + start = group_entry[tile_id].tile_offset - group_entry[0].tile_offset;
> + *dst++ = start & 255;
> + *dst++ = (start >> 8) & 255;
> + *dst++ = (start >> 16) & 255;
> + *dst++ = (start >> 24) & 255;
> +
> + // # of bytes in tile data
> + end = start + group_entry[tile_id].tile_size;
> + *dst++ = end & 255;
> + *dst++ = (end >> 8) & 255;
> + *dst++ = (end >> 16) & 255;
> + *dst++ = (end >> 24) & 255;
> + }
> + }
> +
> + hantro_reg_write(vpu, &av1_multicore_expect_context_update,
> + !!(context_update_x == 0));
> + hantro_reg_write(vpu, &av1_tile_enable, !!((tile_info.tile_cols > 1)
> + || (tile_info.tile_rows > 1)));
> + hantro_reg_write(vpu, &av1_num_tile_cols_8k, tile_info.tile_cols);
> + hantro_reg_write(vpu, &av1_num_tile_rows_8k, tile_info.tile_rows);
> + hantro_reg_write(vpu, &av1_context_update_tile_id,
> + context_update_tile_id);
> + hantro_reg_write(vpu, &av1_tile_transpose, 1);
> + if (context_update_tile_id) {
> + hantro_reg_write(vpu, &av1_dec_tile_size_mag,
> + tile_info.tile_size_bytes);
> + } else
> + hantro_reg_write(vpu, &av1_dec_tile_size_mag, 3);
> +
> + hantro_write_addr(vpu, AV1_TILE_BASE, av1_dec->tile_info.dma);
> +}
> +
> +static int rockchip_vpu981_av1_dec_get_relative_dist(struct hantro_ctx *ctx,
> + int a, int b)
> +{
> + struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
> + struct hantro_av1_dec_ctrls *ctrls = &av1_dec->ctrls;
> + int bits = ctrls->sequence->order_hint_bits - 1;
> + int diff, m;
> +
> + if (!ctrls->sequence->order_hint_bits)
> + return 0;
> +
> + diff = a - b;
> + m = 1 << bits;
> + diff = (diff & (m - 1)) - (diff & m);
> +
> + return diff;
> +}
> +
> +static void rockchip_vpu981_av1_dec_set_frame_sign_bias(struct hantro_ctx *ctx)
> +{
> + struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
> + struct hantro_av1_dec_ctrls *ctrls = &av1_dec->ctrls;
> + const struct v4l2_ctrl_av1_frame *frame = ctrls->frame;
> + const struct v4l2_ctrl_av1_sequence *sequence = ctrls->sequence;
> + int i;
> +
> + if (!sequence->order_hint_bits || IS_INTRA(frame->frame_type)) {
> + for (i = 0; i < V4L2_AV1_TOTAL_REFS_PER_FRAME; i++)
> + av1_dec->ref_frame_sign_bias[i] = 0;
> +
> + return;
> + }
> + // Identify the nearest forward and backward references.
> + for (i = 0; i < V4L2_AV1_TOTAL_REFS_PER_FRAME - 1; i++) {
> + if (rockchip_vpu981_get_frame_index(ctx, i) >= 0) {
> + int rel_off =
> + rockchip_vpu981_av1_dec_get_relative_dist(ctx,
> + rockchip_vpu981_get_order_hint
> + (ctx, i),
> + frame->order_hint);
> + av1_dec->ref_frame_sign_bias[i + 1] = (rel_off <= 0) ? 0 : 1;
> + }
> + }
> +}
> +
> +static bool
> +rockchip_vpu981_av1_dec_set_ref(struct hantro_ctx *ctx, int ref, int idx,
> + int width, int height)
> +{
> + struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
> + struct hantro_av1_dec_ctrls *ctrls = &av1_dec->ctrls;
> + const struct v4l2_ctrl_av1_frame *frame = ctrls->frame;
> + struct hantro_dev *vpu = ctx->dev;
> + struct hantro_decoded_buffer *dst;
> + dma_addr_t luma_addr, chroma_addr, mv_addr = 0;
> + size_t cr_offset = rockchip_vpu981_av1_dec_luma_size(ctx);
> + size_t mv_offset = rockchip_vpu981_av1_dec_chroma_size(ctx);
> + int cur_width = frame->frame_width_minus_1 + 1;
> + int cur_height = frame->frame_height_minus_1 + 1;
> + int scale_width =
> + ((width << AV1_REF_SCALE_SHIFT) + cur_width / 2) / cur_width;
> + int scale_height =
> + ((height << AV1_REF_SCALE_SHIFT) + cur_height / 2) / cur_height;
> +
> + switch (ref) {
> + case 0:
> + hantro_reg_write(vpu, &av1_ref0_height, height);
> + hantro_reg_write(vpu, &av1_ref0_width, width);
> + hantro_reg_write(vpu, &av1_ref0_ver_scale, scale_width);
> + hantro_reg_write(vpu, &av1_ref0_hor_scale, scale_height);
> + break;
> + case 1:
> + hantro_reg_write(vpu, &av1_ref1_height, height);
> + hantro_reg_write(vpu, &av1_ref1_width, width);
> + hantro_reg_write(vpu, &av1_ref1_ver_scale, scale_width);
> + hantro_reg_write(vpu, &av1_ref1_hor_scale, scale_height);
> + break;
> + case 2:
> + hantro_reg_write(vpu, &av1_ref2_height, height);
> + hantro_reg_write(vpu, &av1_ref2_width, width);
> + hantro_reg_write(vpu, &av1_ref2_ver_scale, scale_width);
> + hantro_reg_write(vpu, &av1_ref2_hor_scale, scale_height);
> + break;
> + case 3:
> + hantro_reg_write(vpu, &av1_ref3_height, height);
> + hantro_reg_write(vpu, &av1_ref3_width, width);
> + hantro_reg_write(vpu, &av1_ref3_ver_scale, scale_width);
> + hantro_reg_write(vpu, &av1_ref3_hor_scale, scale_height);
> + break;
> + case 4:
> + hantro_reg_write(vpu, &av1_ref4_height, height);
> + hantro_reg_write(vpu, &av1_ref4_width, width);
> + hantro_reg_write(vpu, &av1_ref4_ver_scale, scale_width);
> + hantro_reg_write(vpu, &av1_ref4_hor_scale, scale_height);
> + break;
> + case 5:
> + hantro_reg_write(vpu, &av1_ref5_height, height);
> + hantro_reg_write(vpu, &av1_ref5_width, width);
> + hantro_reg_write(vpu, &av1_ref5_ver_scale, scale_width);
> + hantro_reg_write(vpu, &av1_ref5_hor_scale, scale_height);
> + break;
> + case 6:
> + hantro_reg_write(vpu, &av1_ref6_height, height);
> + hantro_reg_write(vpu, &av1_ref6_width, width);
> + hantro_reg_write(vpu, &av1_ref6_ver_scale, scale_width);
> + hantro_reg_write(vpu, &av1_ref6_hor_scale, scale_height);
> + break;
> + default:
> + pr_warn("AV1 invalid reference frame index\n");
> + }
> +
> + dst = vb2_to_hantro_decoded_buf(&av1_dec->frame_refs[idx].vb2_ref->vb2_buf);
> + luma_addr = hantro_get_dec_buf_addr(ctx, &dst->base.vb.vb2_buf);
> + chroma_addr = luma_addr + cr_offset;
> + mv_addr = luma_addr + mv_offset;
> +
> + hantro_write_addr(vpu, AV1_REFERENCE_Y(ref), luma_addr);
> + hantro_write_addr(vpu, AV1_REFERENCE_CB(ref), chroma_addr);
> + hantro_write_addr(vpu, AV1_REFERENCE_MV(ref), mv_addr);
> +
> + return (scale_width != (1 << AV1_REF_SCALE_SHIFT))
> + || (scale_height != (1 << AV1_REF_SCALE_SHIFT));
> +}
> +
> +static void rockchip_vpu981_av1_dec_set_sign_bias(struct hantro_ctx *ctx,
> + int ref, int val)
> +{
> + struct hantro_dev *vpu = ctx->dev;
> +
> + switch (ref) {
> + case 0:
> + hantro_reg_write(vpu, &av1_ref0_sign_bias, val);
> + break;
> + case 1:
> + hantro_reg_write(vpu, &av1_ref1_sign_bias, val);
> + break;
> + case 2:
> + hantro_reg_write(vpu, &av1_ref2_sign_bias, val);
> + break;
> + case 3:
> + hantro_reg_write(vpu, &av1_ref3_sign_bias, val);
> + break;
> + case 4:
> + hantro_reg_write(vpu, &av1_ref4_sign_bias, val);
> + break;
> + case 5:
> + hantro_reg_write(vpu, &av1_ref5_sign_bias, val);
> + break;
> + case 6:
> + hantro_reg_write(vpu, &av1_ref6_sign_bias, val);
> + break;
> + default:
> + pr_warn("AV1 invalid sign bias index\n");
> + break;
> + }
> +}
> +
> +static void rockchip_vpu981_av1_dec_set_segmentation(struct hantro_ctx *ctx)
> +{
> + struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
> + struct hantro_av1_dec_ctrls *ctrls = &av1_dec->ctrls;
> + const struct v4l2_ctrl_av1_frame *frame = ctrls->frame;
> + const struct v4l2_av1_segmentation *seg = &frame->segmentation;
> + uint32_t segval[V4L2_AV1_MAX_SEGMENTS][V4L2_AV1_SEG_LVL_MAX] = { 0 };
> + struct hantro_dev *vpu = ctx->dev;
> + uint8_t segsign = 0, preskip_segid = 0, last_active_seg = 0, i, j;
> +
> + if (!!(seg->flags & V4L2_AV1_SEGMENTATION_FLAG_ENABLED)
> + && (frame->primary_ref_frame < V4L2_AV1_REFS_PER_FRAME)) {
> + int idx = rockchip_vpu981_get_frame_index(ctx, frame->primary_ref_frame);
> +
> + if (idx >= 0) {
> + dma_addr_t luma_addr, chroma_addr, mv_addr = 0;
> + size_t cr_offset = rockchip_vpu981_av1_dec_luma_size(ctx);
> + size_t mv_offset = rockchip_vpu981_av1_dec_chroma_size(ctx);
> +
> + luma_addr =
> + hantro_get_dec_buf_addr(ctx,
> + &av1_dec->frame_refs[idx].vb2_ref->vb2_buf);
> + chroma_addr = luma_addr + cr_offset;
> + mv_addr = luma_addr + mv_offset;
> +
> + hantro_write_addr(vpu, AV1_SEGMENTATION, mv_addr);
> + hantro_reg_write(vpu, &av1_use_temporal3_mvs, 1);
> + }
> + }
> +
> + hantro_reg_write(vpu, &av1_segment_temp_upd_e,
> + !!(seg->flags & V4L2_AV1_SEGMENTATION_FLAG_TEMPORAL_UPDATE));
> + hantro_reg_write(vpu, &av1_segment_upd_e,
> + !!(seg->flags & V4L2_AV1_SEGMENTATION_FLAG_UPDATE_MAP));
> + hantro_reg_write(vpu, &av1_segment_e,
> + !!(seg->flags & V4L2_AV1_SEGMENTATION_FLAG_ENABLED));
> +
> + hantro_reg_write(vpu, &av1_error_resilient,
> + !!(frame->flags & V4L2_AV1_FRAME_FLAG_ERROR_RESILIENT_MODE));
> +
> + if (IS_INTRA(frame->frame_type)
> + || !!(frame->flags & V4L2_AV1_FRAME_FLAG_ERROR_RESILIENT_MODE)) {
> + hantro_reg_write(vpu, &av1_use_temporal3_mvs, 0);
> + }
> +
> + if (!!(seg->flags & V4L2_AV1_SEGMENTATION_FLAG_ENABLED)) {
Why '!!'?
> + int s;
> +
> + for (s = 0; s < V4L2_AV1_MAX_SEGMENTS; s++) {
> + if (seg->feature_enabled[s] &
> + V4L2_AV1_SEGMENT_FEATURE_ENABLED(V4L2_AV1_SEG_LVL_ALT_Q)) {
> + segval[s][V4L2_AV1_SEG_LVL_ALT_Q] =
> + clamp(abs(seg->feature_data[s][V4L2_AV1_SEG_LVL_ALT_Q]),
> + 0, 255);
> + segsign |=
> + (seg->feature_data[s][V4L2_AV1_SEG_LVL_ALT_Q] < 0) << s;
> + }
> +
> + if (seg->feature_enabled[s] &
> + V4L2_AV1_SEGMENT_FEATURE_ENABLED(V4L2_AV1_SEG_LVL_ALT_LF_Y_V))
> + segval[s][V4L2_AV1_SEG_LVL_ALT_LF_Y_V] =
> + clamp(abs(seg->feature_data[s][V4L2_AV1_SEG_LVL_ALT_LF_Y_V]),
> + -63, 63);
> +
> + if (seg->feature_enabled[s] &
> + V4L2_AV1_SEGMENT_FEATURE_ENABLED(V4L2_AV1_SEG_LVL_ALT_LF_Y_H))
> + segval[s][V4L2_AV1_SEG_LVL_ALT_LF_Y_H] =
> + clamp(abs(seg->feature_data[s][V4L2_AV1_SEG_LVL_ALT_LF_Y_H]),
> + -63, 63);
> +
> + if (seg->feature_enabled[s] &
> + V4L2_AV1_SEGMENT_FEATURE_ENABLED(V4L2_AV1_SEG_LVL_ALT_LF_U))
> + segval[s][V4L2_AV1_SEG_LVL_ALT_LF_U] =
> + clamp(abs(seg->feature_data[s][V4L2_AV1_SEG_LVL_ALT_LF_U]),
> + -63, 63);
> +
> + if (seg->feature_enabled[s] &
> + V4L2_AV1_SEGMENT_FEATURE_ENABLED(V4L2_AV1_SEG_LVL_ALT_LF_V))
> + segval[s][V4L2_AV1_SEG_LVL_ALT_LF_V] =
> + clamp(abs(seg->feature_data[s][V4L2_AV1_SEG_LVL_ALT_LF_V]),
> + -63, 63);
> +
> + if (frame->frame_type && seg->feature_enabled[s] &
> + V4L2_AV1_SEGMENT_FEATURE_ENABLED(V4L2_AV1_SEG_LVL_REF_FRAME))
> + segval[s][V4L2_AV1_SEG_LVL_REF_FRAME]++;
> +
> + if (seg->feature_enabled[s] &
> + V4L2_AV1_SEGMENT_FEATURE_ENABLED(V4L2_AV1_SEG_LVL_REF_SKIP))
> + segval[s][V4L2_AV1_SEG_LVL_REF_SKIP] = 1;
> +
> + if (seg->feature_enabled[s] &
> + V4L2_AV1_SEGMENT_FEATURE_ENABLED(V4L2_AV1_SEG_LVL_REF_GLOBALMV))
> + segval[s][V4L2_AV1_SEG_LVL_REF_GLOBALMV] = 1;
> + }
> + }
> +
> + for (i = 0; i < V4L2_AV1_MAX_SEGMENTS; i++) {
> + for (j = 0; j < V4L2_AV1_SEG_LVL_MAX; j++) {
> + if (seg->feature_enabled[i]
> + & V4L2_AV1_SEGMENT_FEATURE_ENABLED(j)) {
> + preskip_segid |= (j >= V4L2_AV1_SEG_LVL_REF_FRAME);
> + last_active_seg = max(i, last_active_seg);
> + }
> + }
> + }
> +
> + hantro_reg_write(vpu, &av1_last_active_seg, last_active_seg);
> + hantro_reg_write(vpu, &av1_preskip_segid, preskip_segid);
> +
> + hantro_reg_write(vpu, &av1_seg_quant_sign, segsign);
> +
> + /* Write QP, filter level, ref frame and skip for every segment */
> + hantro_reg_write(vpu, &av1_quant_seg0,
> + segval[0][V4L2_AV1_SEG_LVL_ALT_Q]);
> + hantro_reg_write(vpu, &av1_filt_level_delta0_seg0,
> + segval[0][V4L2_AV1_SEG_LVL_ALT_LF_Y_V]);
> + hantro_reg_write(vpu, &av1_filt_level_delta1_seg0,
> + segval[0][V4L2_AV1_SEG_LVL_ALT_LF_Y_H]);
> + hantro_reg_write(vpu, &av1_filt_level_delta2_seg0,
> + segval[0][V4L2_AV1_SEG_LVL_ALT_LF_U]);
> + hantro_reg_write(vpu, &av1_filt_level_delta3_seg0,
> + segval[0][V4L2_AV1_SEG_LVL_ALT_LF_V]);
> + hantro_reg_write(vpu, &av1_refpic_seg0,
> + segval[0][V4L2_AV1_SEG_LVL_REF_FRAME]);
> + hantro_reg_write(vpu, &av1_skip_seg0,
> + segval[0][V4L2_AV1_SEG_LVL_REF_SKIP]);
> + hantro_reg_write(vpu, &av1_global_mv_seg0,
> + segval[0][V4L2_AV1_SEG_LVL_REF_GLOBALMV]);
> +
> + hantro_reg_write(vpu, &av1_quant_seg1,
> + segval[1][V4L2_AV1_SEG_LVL_ALT_Q]);
> + hantro_reg_write(vpu, &av1_filt_level_delta0_seg1,
> + segval[1][V4L2_AV1_SEG_LVL_ALT_LF_Y_V]);
> + hantro_reg_write(vpu, &av1_filt_level_delta1_seg1,
> + segval[1][V4L2_AV1_SEG_LVL_ALT_LF_Y_H]);
> + hantro_reg_write(vpu, &av1_filt_level_delta2_seg1,
> + segval[1][V4L2_AV1_SEG_LVL_ALT_LF_U]);
> + hantro_reg_write(vpu, &av1_filt_level_delta3_seg1,
> + segval[1][V4L2_AV1_SEG_LVL_ALT_LF_V]);
> + hantro_reg_write(vpu, &av1_refpic_seg1,
> + segval[1][V4L2_AV1_SEG_LVL_REF_FRAME]);
> + hantro_reg_write(vpu, &av1_skip_seg1,
> + segval[1][V4L2_AV1_SEG_LVL_REF_SKIP]);
> + hantro_reg_write(vpu, &av1_global_mv_seg1,
> + segval[1][V4L2_AV1_SEG_LVL_REF_GLOBALMV]);
> +
> + hantro_reg_write(vpu, &av1_quant_seg2,
> + segval[2][V4L2_AV1_SEG_LVL_ALT_Q]);
> + hantro_reg_write(vpu, &av1_filt_level_delta0_seg2,
> + segval[2][V4L2_AV1_SEG_LVL_ALT_LF_Y_V]);
> + hantro_reg_write(vpu, &av1_filt_level_delta1_seg2,
> + segval[2][V4L2_AV1_SEG_LVL_ALT_LF_Y_H]);
> + hantro_reg_write(vpu, &av1_filt_level_delta2_seg2,
> + segval[2][V4L2_AV1_SEG_LVL_ALT_LF_U]);
> + hantro_reg_write(vpu, &av1_filt_level_delta3_seg2,
> + segval[2][V4L2_AV1_SEG_LVL_ALT_LF_V]);
> + hantro_reg_write(vpu, &av1_refpic_seg2,
> + segval[2][V4L2_AV1_SEG_LVL_REF_FRAME]);
> + hantro_reg_write(vpu, &av1_skip_seg2,
> + segval[2][V4L2_AV1_SEG_LVL_REF_SKIP]);
> + hantro_reg_write(vpu, &av1_global_mv_seg2,
> + segval[2][V4L2_AV1_SEG_LVL_REF_GLOBALMV]);
> +
> + hantro_reg_write(vpu, &av1_quant_seg3,
> + segval[3][V4L2_AV1_SEG_LVL_ALT_Q]);
> + hantro_reg_write(vpu, &av1_filt_level_delta0_seg3,
> + segval[3][V4L2_AV1_SEG_LVL_ALT_LF_Y_V]);
> + hantro_reg_write(vpu, &av1_filt_level_delta1_seg3,
> + segval[3][V4L2_AV1_SEG_LVL_ALT_LF_Y_H]);
> + hantro_reg_write(vpu, &av1_filt_level_delta2_seg3,
> + segval[3][V4L2_AV1_SEG_LVL_ALT_LF_U]);
> + hantro_reg_write(vpu, &av1_filt_level_delta3_seg3,
> + segval[3][V4L2_AV1_SEG_LVL_ALT_LF_V]);
> + hantro_reg_write(vpu, &av1_refpic_seg3,
> + segval[3][V4L2_AV1_SEG_LVL_REF_FRAME]);
> + hantro_reg_write(vpu, &av1_skip_seg3,
> + segval[3][V4L2_AV1_SEG_LVL_REF_SKIP]);
> + hantro_reg_write(vpu, &av1_global_mv_seg3,
> + segval[3][V4L2_AV1_SEG_LVL_REF_GLOBALMV]);
> +
> + hantro_reg_write(vpu, &av1_quant_seg4,
> + segval[4][V4L2_AV1_SEG_LVL_ALT_Q]);
> + hantro_reg_write(vpu, &av1_filt_level_delta0_seg4,
> + segval[4][V4L2_AV1_SEG_LVL_ALT_LF_Y_V]);
> + hantro_reg_write(vpu, &av1_filt_level_delta1_seg4,
> + segval[4][V4L2_AV1_SEG_LVL_ALT_LF_Y_H]);
> + hantro_reg_write(vpu, &av1_filt_level_delta2_seg4,
> + segval[4][V4L2_AV1_SEG_LVL_ALT_LF_U]);
> + hantro_reg_write(vpu, &av1_filt_level_delta3_seg4,
> + segval[4][V4L2_AV1_SEG_LVL_ALT_LF_V]);
> + hantro_reg_write(vpu, &av1_refpic_seg4,
> + segval[4][V4L2_AV1_SEG_LVL_REF_FRAME]);
> + hantro_reg_write(vpu, &av1_skip_seg4,
> + segval[4][V4L2_AV1_SEG_LVL_REF_SKIP]);
> + hantro_reg_write(vpu, &av1_global_mv_seg4,
> + segval[4][V4L2_AV1_SEG_LVL_REF_GLOBALMV]);
> +
> + hantro_reg_write(vpu, &av1_quant_seg5,
> + segval[5][V4L2_AV1_SEG_LVL_ALT_Q]);
> + hantro_reg_write(vpu, &av1_filt_level_delta0_seg5,
> + segval[5][V4L2_AV1_SEG_LVL_ALT_LF_Y_V]);
> + hantro_reg_write(vpu, &av1_filt_level_delta1_seg5,
> + segval[5][V4L2_AV1_SEG_LVL_ALT_LF_Y_H]);
> + hantro_reg_write(vpu, &av1_filt_level_delta2_seg5,
> + segval[5][V4L2_AV1_SEG_LVL_ALT_LF_U]);
> + hantro_reg_write(vpu, &av1_filt_level_delta3_seg5,
> + segval[5][V4L2_AV1_SEG_LVL_ALT_LF_V]);
> + hantro_reg_write(vpu, &av1_refpic_seg5,
> + segval[5][V4L2_AV1_SEG_LVL_REF_FRAME]);
> + hantro_reg_write(vpu, &av1_skip_seg5,
> + segval[5][V4L2_AV1_SEG_LVL_REF_SKIP]);
> + hantro_reg_write(vpu, &av1_global_mv_seg5,
> + segval[5][V4L2_AV1_SEG_LVL_REF_GLOBALMV]);
> +
> + hantro_reg_write(vpu, &av1_quant_seg6,
> + segval[6][V4L2_AV1_SEG_LVL_ALT_Q]);
> + hantro_reg_write(vpu, &av1_filt_level_delta0_seg6,
> + segval[6][V4L2_AV1_SEG_LVL_ALT_LF_Y_V]);
> + hantro_reg_write(vpu, &av1_filt_level_delta1_seg6,
> + segval[6][V4L2_AV1_SEG_LVL_ALT_LF_Y_H]);
> + hantro_reg_write(vpu, &av1_filt_level_delta2_seg6,
> + segval[6][V4L2_AV1_SEG_LVL_ALT_LF_U]);
> + hantro_reg_write(vpu, &av1_filt_level_delta3_seg6,
> + segval[6][V4L2_AV1_SEG_LVL_ALT_LF_V]);
> + hantro_reg_write(vpu, &av1_refpic_seg6,
> + segval[6][V4L2_AV1_SEG_LVL_REF_FRAME]);
> + hantro_reg_write(vpu, &av1_skip_seg6,
> + segval[6][V4L2_AV1_SEG_LVL_REF_SKIP]);
> + hantro_reg_write(vpu, &av1_global_mv_seg6,
> + segval[6][V4L2_AV1_SEG_LVL_REF_GLOBALMV]);
> +
> + hantro_reg_write(vpu, &av1_quant_seg7,
> + segval[7][V4L2_AV1_SEG_LVL_ALT_Q]);
> + hantro_reg_write(vpu, &av1_filt_level_delta0_seg7,
> + segval[7][V4L2_AV1_SEG_LVL_ALT_LF_Y_V]);
> + hantro_reg_write(vpu, &av1_filt_level_delta1_seg7,
> + segval[7][V4L2_AV1_SEG_LVL_ALT_LF_Y_H]);
> + hantro_reg_write(vpu, &av1_filt_level_delta2_seg7,
> + segval[7][V4L2_AV1_SEG_LVL_ALT_LF_U]);
> + hantro_reg_write(vpu, &av1_filt_level_delta3_seg7,
> + segval[7][V4L2_AV1_SEG_LVL_ALT_LF_V]);
> + hantro_reg_write(vpu, &av1_refpic_seg7,
> + segval[7][V4L2_AV1_SEG_LVL_REF_FRAME]);
> + hantro_reg_write(vpu, &av1_skip_seg7,
> + segval[7][V4L2_AV1_SEG_LVL_REF_SKIP]);
> + hantro_reg_write(vpu, &av1_global_mv_seg7,
> + segval[7][V4L2_AV1_SEG_LVL_REF_GLOBALMV]);
> +}
> +
> +static bool rockchip_vpu981_av1_dec_is_lossless(struct hantro_ctx *ctx)
> +{
> + struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
> + struct hantro_av1_dec_ctrls *ctrls = &av1_dec->ctrls;
> + const struct v4l2_ctrl_av1_frame *frame = ctrls->frame;
> + const struct v4l2_av1_segmentation *segmentation = &frame->segmentation;
> + const struct v4l2_av1_quantization *quantization = &frame->quantization;
> + int i;
> +
> + for (i = 0; i < V4L2_AV1_MAX_SEGMENTS; i++) {
> + int qindex = quantization->base_q_idx;
> +
> + if (segmentation->feature_enabled[i] &
> + V4L2_AV1_SEGMENT_FEATURE_ENABLED(V4L2_AV1_SEG_LVL_ALT_Q)) {
> + qindex += segmentation->feature_data[i][V4L2_AV1_SEG_LVL_ALT_Q];
> + }
> + qindex = clamp(qindex, 0, 255);
> +
> + if (qindex
> + || quantization->delta_q_y_dc
> + || quantization->delta_q_u_dc
> + || quantization->delta_q_u_ac
> + || quantization->delta_q_v_dc || quantization->delta_q_v_ac)
> + return false;
> + }
> + return true;
> +}
> +
> +static void rockchip_vpu981_av1_dec_set_loopfilter(struct hantro_ctx *ctx)
> +{
> + struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
> + struct hantro_av1_dec_ctrls *ctrls = &av1_dec->ctrls;
> + const struct v4l2_ctrl_av1_frame *frame = ctrls->frame;
> + const struct v4l2_av1_loop_filter *loop_filter = &frame->loop_filter;
> + bool filtering_dis = (loop_filter->level[0] == 0)
> + && (loop_filter->level[1] == 0);
> + struct hantro_dev *vpu = ctx->dev;
> +
> + hantro_reg_write(vpu, &av1_filtering_dis, filtering_dis);
> + hantro_reg_write(vpu, &av1_filt_level_base_gt32, loop_filter->level[0] > 32);
> + hantro_reg_write(vpu, &av1_filt_sharpness, loop_filter->sharpness);
> +
> + hantro_reg_write(vpu, &av1_filt_level0, loop_filter->level[0]);
> + hantro_reg_write(vpu, &av1_filt_level1, loop_filter->level[1]);
> + hantro_reg_write(vpu, &av1_filt_level2, loop_filter->level[2]);
> + hantro_reg_write(vpu, &av1_filt_level3, loop_filter->level[3]);
> +
> + if (loop_filter->flags & V4L2_AV1_LOOP_FILTER_FLAG_DELTA_ENABLED
> + && !rockchip_vpu981_av1_dec_is_lossless(ctx)
> + && !(frame->flags & V4L2_AV1_FRAME_FLAG_ALLOW_INTRABC)) {
> + hantro_reg_write(vpu, &av1_filt_ref_adj_0,
> + loop_filter->ref_deltas[0]);
> + hantro_reg_write(vpu, &av1_filt_ref_adj_1,
> + loop_filter->ref_deltas[1]);
> + hantro_reg_write(vpu, &av1_filt_ref_adj_2,
> + loop_filter->ref_deltas[2]);
> + hantro_reg_write(vpu, &av1_filt_ref_adj_3,
> + loop_filter->ref_deltas[3]);
> + hantro_reg_write(vpu, &av1_filt_ref_adj_4,
> + loop_filter->ref_deltas[4]);
> + hantro_reg_write(vpu, &av1_filt_ref_adj_5,
> + loop_filter->ref_deltas[5]);
> + hantro_reg_write(vpu, &av1_filt_ref_adj_6,
> + loop_filter->ref_deltas[6]);
> + hantro_reg_write(vpu, &av1_filt_ref_adj_7,
> + loop_filter->ref_deltas[7]);
> + hantro_reg_write(vpu, &av1_filt_mb_adj_0,
> + loop_filter->mode_deltas[0]);
> + hantro_reg_write(vpu, &av1_filt_mb_adj_1,
> + loop_filter->mode_deltas[1]);
> + } else {
> + hantro_reg_write(vpu, &av1_filt_ref_adj_0, 0);
> + hantro_reg_write(vpu, &av1_filt_ref_adj_1, 0);
> + hantro_reg_write(vpu, &av1_filt_ref_adj_2, 0);
> + hantro_reg_write(vpu, &av1_filt_ref_adj_3, 0);
> + hantro_reg_write(vpu, &av1_filt_ref_adj_4, 0);
> + hantro_reg_write(vpu, &av1_filt_ref_adj_5, 0);
> + hantro_reg_write(vpu, &av1_filt_ref_adj_6, 0);
> + hantro_reg_write(vpu, &av1_filt_ref_adj_7, 0);
> + hantro_reg_write(vpu, &av1_filt_mb_adj_0, 0);
> + hantro_reg_write(vpu, &av1_filt_mb_adj_1, 0);
> + }
> +
> + hantro_write_addr(vpu, AV1_DB_DATA_COL, av1_dec->db_data_col.dma);
> + hantro_write_addr(vpu, AV1_DB_CTRL_COL, av1_dec->db_ctrl_col.dma);
> +}
> +
> +static void rockchip_vpu981_av1_dec_update_prob(struct hantro_ctx *ctx)
> +{
> + struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
> + struct hantro_av1_dec_ctrls *ctrls = &av1_dec->ctrls;
> + const struct v4l2_ctrl_av1_frame *frame = ctrls->frame;
> + bool frame_is_intra = IS_INTRA(frame->frame_type);
> + struct av1cdfs *out_cdfs = (struct av1cdfs *)av1_dec->prob_tbl_out.cpu;
> + int i;
> +
> + if (frame->flags & V4L2_AV1_FRAME_FLAG_DISABLE_FRAME_END_UPDATE_CDF)
> + return;
> +
> + for (i = 0; i < NUM_REF_FRAMES; i++) {
> + if (frame->refresh_frame_flags & (1 << i)) {
> + struct mvcdfs stored_mv_cdf;
> +
> + rockchip_av1_get_cdfs(ctx, i);
> + stored_mv_cdf = av1_dec->cdfs->mv_cdf;
> + *av1_dec->cdfs = *out_cdfs;
> + if (frame_is_intra) {
> + av1_dec->cdfs->mv_cdf = stored_mv_cdf;
> + *av1_dec->cdfs_ndvc = out_cdfs->mv_cdf;
> + }
> + rockchip_av1_store_cdfs(ctx,
> + frame->refresh_frame_flags);
> + break;
> + }
> + }
> +}
> +
> +void rockchip_vpu981_av1_dec_done(struct hantro_ctx *ctx)
> +{
> + rockchip_vpu981_av1_dec_update_prob(ctx);
> +}
> +
> +static void rockchip_vpu981_av1_dec_set_prob(struct hantro_ctx *ctx)
> +{
> + struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
> + struct hantro_av1_dec_ctrls *ctrls = &av1_dec->ctrls;
> + const struct v4l2_ctrl_av1_frame *frame = ctrls->frame;
> + const struct v4l2_av1_quantization *quantization = &frame->quantization;
> + struct hantro_dev *vpu = ctx->dev;
> + bool error_resilient_mode =
> + !!(frame->flags & V4L2_AV1_FRAME_FLAG_ERROR_RESILIENT_MODE);
> + bool frame_is_intra = IS_INTRA(frame->frame_type);
> +
> + if (error_resilient_mode || frame_is_intra
> + || frame->primary_ref_frame == AV1_PRIMARY_REF_NONE) {
> + av1_dec->cdfs = &av1_dec->default_cdfs;
> + av1_dec->cdfs_ndvc = &av1_dec->default_cdfs_ndvc;
> + rockchip_av1_default_coeff_probs(quantization->base_q_idx,
> + av1_dec->cdfs);
> + } else {
> + rockchip_av1_get_cdfs(ctx, frame->ref_frame_idx[frame->primary_ref_frame]);
> + }
> + rockchip_av1_store_cdfs(ctx, frame->refresh_frame_flags);
> +
> + memcpy(av1_dec->prob_tbl.cpu, av1_dec->cdfs, sizeof(struct av1cdfs));
> +
> + if (frame_is_intra) {
> + int mv_offset = offsetof(struct av1cdfs, mv_cdf);
> + /* Overwrite MV context area with intrabc MV context */
> + memcpy(av1_dec->prob_tbl.cpu + mv_offset, av1_dec->cdfs_ndvc,
> + sizeof(struct mvcdfs));
> + }
> +
> + hantro_write_addr(vpu, AV1_PROP_TABLE_OUT, av1_dec->prob_tbl_out.dma);
> + hantro_write_addr(vpu, AV1_PROP_TABLE, av1_dec->prob_tbl.dma);
> +}
> +
> +static void rockchip_vpu981_av1_dec_set_cdef(struct hantro_ctx *ctx)
> +{
> + struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
> + struct hantro_av1_dec_ctrls *ctrls = &av1_dec->ctrls;
> + const struct v4l2_ctrl_av1_frame *frame = ctrls->frame;
> + const struct v4l2_av1_cdef *cdef = &frame->cdef;
> + struct hantro_dev *vpu = ctx->dev;
> + uint32_t luma_pri_strength = 0;
> + uint16_t luma_sec_strength = 0;
> + uint32_t chroma_pri_strength = 0;
> + uint16_t chroma_sec_strength = 0;
> + int i;
> +
> + hantro_reg_write(vpu, &av1_cdef_bits, cdef->bits);
> + hantro_reg_write(vpu, &av1_cdef_damping, cdef->damping_minus_3);
> +
> + for (i = 0; i < (1 << cdef->bits); i++) {
> + luma_pri_strength |= cdef->y_pri_strength[i] << (i * 4);
> + if (cdef->y_sec_strength[i] == 4)
> + luma_sec_strength |= 3 << (i * 2);
> + else
> + luma_sec_strength |= cdef->y_sec_strength[i] << (i * 2);
> +
> + chroma_pri_strength |= cdef->uv_pri_strength[i] << (i * 4);
> + if (cdef->uv_sec_strength[i] == 4)
> + chroma_sec_strength |= 3 << (i * 2);
> + else
> + chroma_sec_strength |= cdef->uv_sec_strength[i] << (i * 2);
> + }
> +
> + hantro_reg_write(vpu, &av1_cdef_luma_primary_strength,
> + luma_pri_strength);
> + hantro_reg_write(vpu, &av1_cdef_luma_secondary_strength,
> + luma_sec_strength);
> + hantro_reg_write(vpu, &av1_cdef_chroma_primary_strength,
> + chroma_pri_strength);
> + hantro_reg_write(vpu, &av1_cdef_chroma_secondary_strength,
> + chroma_sec_strength);
> +
> + hantro_write_addr(vpu, AV1_CDEF_COL, av1_dec->cdef_col.dma);
> +}
> +
> +static void rockchip_vpu981_av1_dec_set_lr(struct hantro_ctx *ctx)
> +{
> + struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
> + struct hantro_av1_dec_ctrls *ctrls = &av1_dec->ctrls;
> + const struct v4l2_ctrl_av1_frame *frame = ctrls->frame;
> + const struct v4l2_av1_loop_restoration *loop_restoration =
> + &frame->loop_restoration;
> + struct hantro_dev *vpu = ctx->dev;
> + uint16_t lr_type = 0, lr_unit_size = 0;
> + uint8_t restoration_unit_size[V4L2_AV1_NUM_PLANES_MAX] = { 3, 3, 3 };
> + int i;
> +
> + if (loop_restoration->flags & V4L2_AV1_LOOP_RESTORATION_FLAG_USES_LR) {
> + restoration_unit_size[0] = 1 + loop_restoration->lr_unit_shift;
> + restoration_unit_size[1] =
> + 1 + loop_restoration->lr_unit_shift - loop_restoration->lr_uv_shift;
> + restoration_unit_size[2] =
> + 1 + loop_restoration->lr_unit_shift - loop_restoration->lr_uv_shift;
> + }
> +
> + for (i = 0; i < V4L2_AV1_NUM_PLANES_MAX; i++) {
> + lr_type |=
> + loop_restoration->frame_restoration_type[i] << (i * 2);
> + lr_unit_size |= restoration_unit_size[i] << (i * 2);
> + }
> +
> + hantro_reg_write(vpu, &av1_lr_type, lr_type);
> + hantro_reg_write(vpu, &av1_lr_unit_size, lr_unit_size);
> + hantro_write_addr(vpu, AV1_LR_COL, av1_dec->lr_col.dma);
> +}
> +
> +static void rockchip_vpu981_av1_dec_set_superres_params(struct hantro_ctx *ctx)
> +{
> + struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
> + struct hantro_av1_dec_ctrls *ctrls = &av1_dec->ctrls;
> + const struct v4l2_ctrl_av1_frame *frame = ctrls->frame;
> + struct hantro_dev *vpu = ctx->dev;
> + uint8_t superres_scale_denominator = SCALE_NUMERATOR;
> + int superres_luma_step = RS_SCALE_SUBPEL_BITS;
> + int superres_chroma_step = RS_SCALE_SUBPEL_BITS;
> + int superres_luma_step_invra = RS_SCALE_SUBPEL_BITS;
> + int superres_chroma_step_invra = RS_SCALE_SUBPEL_BITS;
> + int superres_init_luma_subpel_x = 0;
> + int superres_init_chroma_subpel_x = 0;
> + int superres_is_scaled = 0;
> + int min_w = min_t(uint32_t, 16, frame->upscaled_width);
> + int upscaled_luma, downscaled_luma;
> + int downscaled_chroma, upscaled_chroma;
> + int step_luma, step_chroma;
> + int err_luma, err_chroma;
> + int initial_luma, initial_chroma;
> + int width = 0;
> +
> + if (frame->flags & V4L2_AV1_FRAME_FLAG_USE_SUPERRES)
> + superres_scale_denominator = frame->superres_denom;
> +
> + if (superres_scale_denominator <= SCALE_NUMERATOR)
> + goto set_regs;
> +
> + width = (frame->upscaled_width * SCALE_NUMERATOR +
> + (superres_scale_denominator / 2)) / superres_scale_denominator;
> +
> + if (width < min_w)
> + width = min_w;
> +
> + if (width == frame->upscaled_width)
> + goto set_regs;
> +
> + superres_is_scaled = 1;
> + upscaled_luma = frame->upscaled_width;
> + downscaled_luma = width;
> + downscaled_chroma = (downscaled_luma + 1) >> 1;
> + upscaled_chroma = (upscaled_luma + 1) >> 1;
> + step_luma =
> + ((downscaled_luma << RS_SCALE_SUBPEL_BITS) +
> + (upscaled_luma / 2)) / upscaled_luma;
> + step_chroma =
> + ((downscaled_chroma << RS_SCALE_SUBPEL_BITS) +
> + (upscaled_chroma / 2)) / upscaled_chroma;
> + err_luma =
> + (upscaled_luma * step_luma)
> + - (downscaled_luma << RS_SCALE_SUBPEL_BITS);
> + err_chroma =
> + (upscaled_chroma * step_chroma)
> + - (downscaled_chroma << RS_SCALE_SUBPEL_BITS);
> + initial_luma =
> + ((-((upscaled_luma - downscaled_luma) << (RS_SCALE_SUBPEL_BITS - 1))
> + + upscaled_luma / 2)
> + / upscaled_luma + (1 << (RS_SCALE_EXTRA_BITS - 1)) - err_luma / 2)
> + & RS_SCALE_SUBPEL_MASK;
> + initial_chroma =
> + ((-((upscaled_chroma - downscaled_chroma) << (RS_SCALE_SUBPEL_BITS - 1))
> + + upscaled_chroma / 2)
> + / upscaled_chroma + (1 << (RS_SCALE_EXTRA_BITS - 1)) - err_chroma / 2)
> + & RS_SCALE_SUBPEL_MASK;
> + superres_luma_step = step_luma;
> + superres_chroma_step = step_chroma;
> + superres_luma_step_invra =
> + ((upscaled_luma << RS_SCALE_SUBPEL_BITS) + (downscaled_luma / 2))
> + / downscaled_luma;
> + superres_chroma_step_invra =
> + ((upscaled_chroma << RS_SCALE_SUBPEL_BITS) + (downscaled_chroma / 2))
> + / downscaled_chroma;
> + superres_init_luma_subpel_x = initial_luma;
> + superres_init_chroma_subpel_x = initial_chroma;
> +
> +set_regs:
> + hantro_reg_write(vpu, &av1_superres_pic_width, frame->upscaled_width);
> +
> + if (frame->flags & V4L2_AV1_FRAME_FLAG_USE_SUPERRES)
> + hantro_reg_write(vpu, &av1_scale_denom_minus9,
> + frame->superres_denom - SUPERRES_SCALE_DENOMINATOR_MIN);
> + else
> + hantro_reg_write(vpu, &av1_scale_denom_minus9, frame->superres_denom);
> +
> + hantro_reg_write(vpu, &av1_superres_luma_step, superres_luma_step);
> + hantro_reg_write(vpu, &av1_superres_chroma_step, superres_chroma_step);
> + hantro_reg_write(vpu, &av1_superres_luma_step_invra,
> + superres_luma_step_invra);
> + hantro_reg_write(vpu, &av1_superres_chroma_step_invra,
> + superres_chroma_step_invra);
> + hantro_reg_write(vpu, &av1_superres_init_luma_subpel_x,
> + superres_init_luma_subpel_x);
> + hantro_reg_write(vpu, &av1_superres_init_chroma_subpel_x,
> + superres_init_chroma_subpel_x);
> + hantro_reg_write(vpu, &av1_superres_is_scaled, superres_is_scaled);
> +
> + hantro_write_addr(vpu, AV1_SR_COL, av1_dec->sr_col.dma);
> +}
> +
> +static void rockchip_vpu981_av1_dec_set_picture_dimensions(struct hantro_ctx *ctx)
> +{
> + struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
> + struct hantro_av1_dec_ctrls *ctrls = &av1_dec->ctrls;
> + const struct v4l2_ctrl_av1_frame *frame = ctrls->frame;
> + struct hantro_dev *vpu = ctx->dev;
> + int pic_width_in_cbs = DIV_ROUND_UP(frame->frame_width_minus_1 + 1, 8);
> + int pic_height_in_cbs = DIV_ROUND_UP(frame->frame_height_minus_1 + 1, 8);
> + int pic_width_pad = ALIGN(frame->frame_width_minus_1 + 1, 8)
> + - (frame->frame_width_minus_1 + 1);
> + int pic_height_pad = ALIGN(frame->frame_height_minus_1 + 1, 8)
> + - (frame->frame_height_minus_1 + 1);
> +
> + hantro_reg_write(vpu, &av1_pic_width_in_cbs, pic_width_in_cbs);
> + hantro_reg_write(vpu, &av1_pic_height_in_cbs, pic_height_in_cbs);
> + hantro_reg_write(vpu, &av1_pic_width_pad, pic_width_pad);
> + hantro_reg_write(vpu, &av1_pic_height_pad, pic_height_pad);
> +
> + rockchip_vpu981_av1_dec_set_superres_params(ctx);
> +}
> +
> +static void rockchip_vpu981_av1_dec_set_other_frames(struct hantro_ctx *ctx)
> +{
> + struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
> + struct hantro_av1_dec_ctrls *ctrls = &av1_dec->ctrls;
> + const struct v4l2_ctrl_av1_frame *frame = ctrls->frame;
> + struct hantro_dev *vpu = ctx->dev;
> + bool use_ref_frame_mvs =
> + !!(ctrls->frame->flags & V4L2_AV1_FRAME_FLAG_USE_REF_FRAME_MVS);
> + int cur_frame_offset = frame->order_hint;
> + int alt_frame_offset = 0;
> + int gld_frame_offset = 0;
> + int bwd_frame_offset = 0;
> + int alt2_frame_offset = 0;
> + int refs_selected[3] = { 0, 0, 0 };
> + int cur_mi_cols = DIV_ROUND_UP(frame->frame_width_minus_1 + 1, 8);
> + int cur_mi_rows = DIV_ROUND_UP(frame->frame_height_minus_1 + 1, 8);
> + int cur_offset[V4L2_AV1_TOTAL_REFS_PER_FRAME - 1];
> + int cur_roffset[V4L2_AV1_TOTAL_REFS_PER_FRAME - 1];
> + int mf_types[3] = { 0, 0, 0 };
> + int ref_stamp = 2;
> + int ref_ind = 0;
> + int rf, idx;
> +
> + alt_frame_offset = rockchip_vpu981_get_order_hint(ctx, ALT_BUF_IDX);
> + gld_frame_offset = rockchip_vpu981_get_order_hint(ctx, GLD_BUF_IDX);
> + bwd_frame_offset = rockchip_vpu981_get_order_hint(ctx, BWD_BUF_IDX);
> + alt2_frame_offset = rockchip_vpu981_get_order_hint(ctx, ALT2_BUF_IDX);
> +
> + idx = rockchip_vpu981_get_frame_index(ctx, LST_BUF_IDX);
> + if (idx >= 0) {
> + int alt_frame_offset_in_lst =
> + av1_dec->frame_refs[idx].order_hints[V4L2_AV1_REF_ALTREF_FRAME];
> + bool is_lst_overlay =
> + (alt_frame_offset_in_lst == gld_frame_offset);
> +
> + if (!is_lst_overlay) {
> + int lst_mi_cols = av1_dec->frame_refs[idx].mi_cols;
> + int lst_mi_rows = av1_dec->frame_refs[idx].mi_rows;
> + bool lst_intra_only =
> + IS_INTRA(av1_dec->frame_refs[idx].frame_type);
> +
> + if (lst_mi_cols == cur_mi_cols
> + && lst_mi_rows == cur_mi_rows && !lst_intra_only) {
> + mf_types[ref_ind] = V4L2_AV1_REF_LAST_FRAME;
> + refs_selected[ref_ind++] = LST_BUF_IDX;
> + }
> + }
> + ref_stamp--;
> + }
> +
> + idx = rockchip_vpu981_get_frame_index(ctx, BWD_BUF_IDX);
> + if (rockchip_vpu981_av1_dec_get_relative_dist
> + (ctx, bwd_frame_offset, cur_frame_offset) > 0) {
> + int bwd_mi_cols = av1_dec->frame_refs[idx].mi_cols;
> + int bwd_mi_rows = av1_dec->frame_refs[idx].mi_rows;
> + bool bwd_intra_only =
> + IS_INTRA(av1_dec->frame_refs[idx].frame_type);
> +
> + if (bwd_mi_cols == cur_mi_cols && bwd_mi_rows == cur_mi_rows &&
> + !bwd_intra_only) {
> + mf_types[ref_ind] = V4L2_AV1_REF_BWDREF_FRAME;
> + refs_selected[ref_ind++] = BWD_BUF_IDX;
> + ref_stamp--;
> + }
> + }
> +
> + idx = rockchip_vpu981_get_frame_index(ctx, ALT2_BUF_IDX);
> + if (rockchip_vpu981_av1_dec_get_relative_dist
> + (ctx, alt2_frame_offset, cur_frame_offset) > 0) {
> + int alt2_mi_cols = av1_dec->frame_refs[idx].mi_cols;
> + int alt2_mi_rows = av1_dec->frame_refs[idx].mi_rows;
> + bool alt2_intra_only =
> + IS_INTRA(av1_dec->frame_refs[idx].frame_type);
> +
> + if (alt2_mi_cols == cur_mi_cols && alt2_mi_rows == cur_mi_rows
> + && !alt2_intra_only) {
> + mf_types[ref_ind] = V4L2_AV1_REF_ALTREF2_FRAME;
> + refs_selected[ref_ind++] = ALT2_BUF_IDX;
> + ref_stamp--;
> + }
> + }
> +
> + idx = rockchip_vpu981_get_frame_index(ctx, ALT_BUF_IDX);
> + if (rockchip_vpu981_av1_dec_get_relative_dist
> + (ctx, alt_frame_offset, cur_frame_offset) > 0 && ref_stamp >= 0) {
> + int alt_mi_cols = av1_dec->frame_refs[idx].mi_cols;
> + int alt_mi_rows = av1_dec->frame_refs[idx].mi_rows;
> + bool alt_intra_only =
> + IS_INTRA(av1_dec->frame_refs[idx].frame_type);
> +
> + if (alt_mi_cols == cur_mi_cols && alt_mi_rows == cur_mi_rows &&
> + !alt_intra_only) {
> + mf_types[ref_ind] = V4L2_AV1_REF_ALTREF_FRAME;
> + refs_selected[ref_ind++] = ALT_BUF_IDX;
> + ref_stamp--;
> + }
> + }
> +
> + idx = rockchip_vpu981_get_frame_index(ctx, LST2_BUF_IDX);
> + if (idx >= 0 && ref_stamp >= 0) {
> + int lst2_mi_cols = av1_dec->frame_refs[idx].mi_cols;
> + int lst2_mi_rows = av1_dec->frame_refs[idx].mi_rows;
> + bool lst2_intra_only =
> + IS_INTRA(av1_dec->frame_refs[idx].frame_type);
> +
> + if (lst2_mi_cols == cur_mi_cols && lst2_mi_rows == cur_mi_rows
> + && !lst2_intra_only) {
> + mf_types[ref_ind] = V4L2_AV1_REF_LAST2_FRAME;
> + refs_selected[ref_ind++] = LST2_BUF_IDX;
> + ref_stamp--;
> + }
> + }
> +
> + for (rf = 0; rf < V4L2_AV1_TOTAL_REFS_PER_FRAME - 1; ++rf) {
> + idx = rockchip_vpu981_get_frame_index(ctx, rf);
> + if (idx >= 0) {
> + int rf_order_hint = rockchip_vpu981_get_order_hint(ctx, rf);
> +
> + cur_offset[rf] =
> + rockchip_vpu981_av1_dec_get_relative_dist(ctx,
> + cur_frame_offset,
> + rf_order_hint);
> + cur_roffset[rf] =
> + rockchip_vpu981_av1_dec_get_relative_dist(ctx,
> + rf_order_hint,
> + cur_frame_offset);
> + } else {
> + cur_offset[rf] = 0;
> + cur_roffset[rf] = 0;
> + }
> + }
> +
> + hantro_reg_write(vpu, &av1_use_temporal0_mvs, 0);
> + hantro_reg_write(vpu, &av1_use_temporal1_mvs, 0);
> + hantro_reg_write(vpu, &av1_use_temporal2_mvs, 0);
> + hantro_reg_write(vpu, &av1_use_temporal3_mvs, 0);
> +
> + hantro_reg_write(vpu, &av1_mf1_last_offset, 0);
> + hantro_reg_write(vpu, &av1_mf1_last2_offset, 0);
> + hantro_reg_write(vpu, &av1_mf1_last3_offset, 0);
> + hantro_reg_write(vpu, &av1_mf1_golden_offset, 0);
> + hantro_reg_write(vpu, &av1_mf1_bwdref_offset, 0);
> + hantro_reg_write(vpu, &av1_mf1_altref2_offset, 0);
> + hantro_reg_write(vpu, &av1_mf1_altref_offset, 0);
> +
> + if (use_ref_frame_mvs && ref_ind > 0 &&
> + cur_offset[mf_types[0] - V4L2_AV1_REF_LAST_FRAME] <= MAX_FRAME_DISTANCE
> + && cur_offset[mf_types[0] - V4L2_AV1_REF_LAST_FRAME] >= -MAX_FRAME_DISTANCE) {
> + int rf_order_hint = rockchip_vpu981_get_order_hint(ctx, refs_selected[0]);
> + int idx = rockchip_vpu981_get_frame_index(ctx, refs_selected[0]);
> + int val;
> +
> + hantro_reg_write(vpu, &av1_use_temporal0_mvs, 1);
> +
> + val = rockchip_vpu981_av1_dec_get_relative_dist(ctx,
> + rf_order_hint,
> + av1_dec->frame_refs[idx].order_hints[V4L2_AV1_REF_LAST_FRAME]);
> + hantro_reg_write(vpu, &av1_mf1_last_offset, val);
> +
> + val = rockchip_vpu981_av1_dec_get_relative_dist(ctx,
> + rf_order_hint,
> + av1_dec->frame_refs[idx].order_hints[V4L2_AV1_REF_LAST2_FRAME]);
> + hantro_reg_write(vpu, &av1_mf1_last2_offset, val);
> +
> + val = rockchip_vpu981_av1_dec_get_relative_dist(ctx,
> + rf_order_hint,
> + av1_dec->frame_refs[idx].order_hints[V4L2_AV1_REF_LAST3_FRAME]);
> + hantro_reg_write(vpu, &av1_mf1_last3_offset, val);
> +
> + val = rockchip_vpu981_av1_dec_get_relative_dist(ctx,
> + rf_order_hint,
> + av1_dec->frame_refs[idx].order_hints[V4L2_AV1_REF_GOLDEN_FRAME]);
> + hantro_reg_write(vpu, &av1_mf1_golden_offset, val);
> +
> + val = rockchip_vpu981_av1_dec_get_relative_dist(ctx,
> + rf_order_hint,
> + av1_dec->frame_refs[idx].order_hints[V4L2_AV1_REF_BWDREF_FRAME]);
> + hantro_reg_write(vpu, &av1_mf1_bwdref_offset, val);
> +
> + val = rockchip_vpu981_av1_dec_get_relative_dist(ctx,
> + rf_order_hint,
> + av1_dec->frame_refs[idx].order_hints[V4L2_AV1_REF_ALTREF2_FRAME]);
> + hantro_reg_write(vpu, &av1_mf1_altref2_offset, val);
> +
> + val = rockchip_vpu981_av1_dec_get_relative_dist(ctx,
> + rf_order_hint,
> + av1_dec->frame_refs[idx].order_hints[V4L2_AV1_REF_ALTREF_FRAME]);
> + hantro_reg_write(vpu, &av1_mf1_altref_offset, val);
> + }
> +
> + hantro_reg_write(vpu, &av1_mf2_last_offset, 0);
> + hantro_reg_write(vpu, &av1_mf2_last2_offset, 0);
> + hantro_reg_write(vpu, &av1_mf2_last3_offset, 0);
> + hantro_reg_write(vpu, &av1_mf2_golden_offset, 0);
> + hantro_reg_write(vpu, &av1_mf2_bwdref_offset, 0);
> + hantro_reg_write(vpu, &av1_mf2_altref2_offset, 0);
> + hantro_reg_write(vpu, &av1_mf2_altref_offset, 0);
> +
> + if (use_ref_frame_mvs && ref_ind > 1 &&
> + cur_offset[mf_types[1] - V4L2_AV1_REF_LAST_FRAME] <= MAX_FRAME_DISTANCE
> + && cur_offset[mf_types[1] - V4L2_AV1_REF_LAST_FRAME] >= -MAX_FRAME_DISTANCE) {
> + int rf_order_hint = rockchip_vpu981_get_order_hint(ctx, refs_selected[1]);
> + int idx = rockchip_vpu981_get_frame_index(ctx, refs_selected[1]);
> + int val;
> +
> + hantro_reg_write(vpu, &av1_use_temporal1_mvs, 1);
> +
> + val = rockchip_vpu981_av1_dec_get_relative_dist(ctx,
> + rf_order_hint,
> + av1_dec->frame_refs[idx].order_hints[V4L2_AV1_REF_LAST_FRAME]);
> + hantro_reg_write(vpu, &av1_mf2_last_offset, val);
> +
> + val = rockchip_vpu981_av1_dec_get_relative_dist(ctx,
> + rf_order_hint,
> + av1_dec->frame_refs[idx].order_hints[V4L2_AV1_REF_LAST2_FRAME]);
> + hantro_reg_write(vpu, &av1_mf2_last2_offset, val);
> +
> + val = rockchip_vpu981_av1_dec_get_relative_dist(ctx,
> + rf_order_hint,
> + av1_dec->frame_refs[idx].order_hints[V4L2_AV1_REF_LAST3_FRAME]);
> + hantro_reg_write(vpu, &av1_mf2_last3_offset, val);
> +
> + val = rockchip_vpu981_av1_dec_get_relative_dist(ctx,
> + rf_order_hint,
> + av1_dec->frame_refs[idx].order_hints[V4L2_AV1_REF_GOLDEN_FRAME]);
> + hantro_reg_write(vpu, &av1_mf2_golden_offset, val);
> +
> + val = rockchip_vpu981_av1_dec_get_relative_dist(ctx,
> + rf_order_hint,
> + av1_dec->frame_refs[idx].order_hints[V4L2_AV1_REF_BWDREF_FRAME]);
> + hantro_reg_write(vpu, &av1_mf2_bwdref_offset, val);
> +
> + val = rockchip_vpu981_av1_dec_get_relative_dist(ctx,
> + rf_order_hint,
> + av1_dec->frame_refs[idx].order_hints[V4L2_AV1_REF_ALTREF2_FRAME]);
> + hantro_reg_write(vpu, &av1_mf2_altref2_offset, val);
> +
> + val = rockchip_vpu981_av1_dec_get_relative_dist(ctx,
> + rf_order_hint,
> + av1_dec->frame_refs[idx].order_hints[V4L2_AV1_REF_ALTREF_FRAME]);
> + hantro_reg_write(vpu, &av1_mf2_altref_offset, val);
> + }
> +
> + hantro_reg_write(vpu, &av1_mf3_last_offset, 0);
> + hantro_reg_write(vpu, &av1_mf3_last2_offset, 0);
> + hantro_reg_write(vpu, &av1_mf3_last3_offset, 0);
> + hantro_reg_write(vpu, &av1_mf3_golden_offset, 0);
> + hantro_reg_write(vpu, &av1_mf3_bwdref_offset, 0);
> + hantro_reg_write(vpu, &av1_mf3_altref2_offset, 0);
> + hantro_reg_write(vpu, &av1_mf3_altref_offset, 0);
> +
> + if (use_ref_frame_mvs && ref_ind > 2 &&
> + cur_offset[mf_types[2] - V4L2_AV1_REF_LAST_FRAME] <= MAX_FRAME_DISTANCE
> + && cur_offset[mf_types[2] - V4L2_AV1_REF_LAST_FRAME] >= -MAX_FRAME_DISTANCE) {
> + int rf_order_hint = rockchip_vpu981_get_order_hint(ctx, refs_selected[2]);
> + int idx = rockchip_vpu981_get_frame_index(ctx, refs_selected[2]);
> + int val;
> +
> + hantro_reg_write(vpu, &av1_use_temporal2_mvs, 1);
> +
> + val = rockchip_vpu981_av1_dec_get_relative_dist(ctx,
> + rf_order_hint,
> + av1_dec->frame_refs[idx].order_hints[V4L2_AV1_REF_LAST_FRAME]);
> + hantro_reg_write(vpu, &av1_mf3_last_offset, val);
> +
> + val = rockchip_vpu981_av1_dec_get_relative_dist(ctx,
> + rf_order_hint,
> + av1_dec->frame_refs[idx].order_hints[V4L2_AV1_REF_LAST2_FRAME]);
> + hantro_reg_write(vpu, &av1_mf3_last2_offset, val);
> +
> + val = rockchip_vpu981_av1_dec_get_relative_dist(ctx,
> + rf_order_hint,
> + av1_dec->frame_refs[idx].order_hints[V4L2_AV1_REF_LAST3_FRAME]);
> + hantro_reg_write(vpu, &av1_mf3_last3_offset, val);
> +
> + val = rockchip_vpu981_av1_dec_get_relative_dist(ctx,
> + rf_order_hint,
> + av1_dec->frame_refs[idx].order_hints[V4L2_AV1_REF_GOLDEN_FRAME]);
> + hantro_reg_write(vpu, &av1_mf3_golden_offset, val);
> +
> + val = rockchip_vpu981_av1_dec_get_relative_dist(ctx,
> + rf_order_hint,
> + av1_dec->frame_refs[idx].order_hints[V4L2_AV1_REF_BWDREF_FRAME]);
> + hantro_reg_write(vpu, &av1_mf3_bwdref_offset, val);
> +
> + val = rockchip_vpu981_av1_dec_get_relative_dist(ctx,
> + rf_order_hint,
> + av1_dec->frame_refs[idx].order_hints[V4L2_AV1_REF_ALTREF2_FRAME]);
> + hantro_reg_write(vpu, &av1_mf3_altref2_offset, val);
> +
> + val = rockchip_vpu981_av1_dec_get_relative_dist(ctx,
> + rf_order_hint,
> + av1_dec->frame_refs[idx].order_hints[V4L2_AV1_REF_ALTREF_FRAME]);
> + hantro_reg_write(vpu, &av1_mf3_altref_offset, val);
> + }
> +
> + hantro_reg_write(vpu, &av1_cur_last_offset, cur_offset[0]);
> + hantro_reg_write(vpu, &av1_cur_last2_offset, cur_offset[1]);
> + hantro_reg_write(vpu, &av1_cur_last3_offset, cur_offset[2]);
> + hantro_reg_write(vpu, &av1_cur_golden_offset, cur_offset[3]);
> + hantro_reg_write(vpu, &av1_cur_bwdref_offset, cur_offset[4]);
> + hantro_reg_write(vpu, &av1_cur_altref2_offset, cur_offset[5]);
> + hantro_reg_write(vpu, &av1_cur_altref_offset, cur_offset[6]);
> +
> + hantro_reg_write(vpu, &av1_cur_last_roffset, cur_roffset[0]);
> + hantro_reg_write(vpu, &av1_cur_last2_roffset, cur_roffset[1]);
> + hantro_reg_write(vpu, &av1_cur_last3_roffset, cur_roffset[2]);
> + hantro_reg_write(vpu, &av1_cur_golden_roffset, cur_roffset[3]);
> + hantro_reg_write(vpu, &av1_cur_bwdref_roffset, cur_roffset[4]);
> + hantro_reg_write(vpu, &av1_cur_altref2_roffset, cur_roffset[5]);
> + hantro_reg_write(vpu, &av1_cur_altref_roffset, cur_roffset[6]);
> +
> + hantro_reg_write(vpu, &av1_mf1_type, mf_types[0] - V4L2_AV1_REF_LAST_FRAME);
> + hantro_reg_write(vpu, &av1_mf2_type, mf_types[1] - V4L2_AV1_REF_LAST_FRAME);
> + hantro_reg_write(vpu, &av1_mf3_type, mf_types[2] - V4L2_AV1_REF_LAST_FRAME);
> +}
> +
> +static void rockchip_vpu981_av1_dec_set_reference_frames(struct hantro_ctx *ctx)
> +{
> + struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
> + struct hantro_av1_dec_ctrls *ctrls = &av1_dec->ctrls;
> + const struct v4l2_ctrl_av1_frame *frame = ctrls->frame;
> + int frame_type = frame->frame_type;
> + bool allow_intrabc = !!(ctrls->frame->flags & V4L2_AV1_FRAME_FLAG_ALLOW_INTRABC);
> + int ref_count[AV1DEC_MAX_PIC_BUFFERS] = { 0 };
> + struct hantro_dev *vpu = ctx->dev;
> + int i, ref_frames = 0;
> + bool scale_enable = false;
> +
> + if (IS_INTRA(frame_type) && !allow_intrabc)
> + return;
> +
> + if (!allow_intrabc) {
> + for (i = 0; i < V4L2_AV1_REFS_PER_FRAME; i++) {
> + int idx = rockchip_vpu981_get_frame_index(ctx, i);
> +
> + if (idx >= 0)
> + ref_count[idx]++;
> + }
> +
> + for (i = 0; i < AV1DEC_MAX_PIC_BUFFERS; i++) {
> + if (ref_count[i])
> + ref_frames++;
> + }
> + } else {
> + ref_frames = 1;
> + }
> + hantro_reg_write(vpu, &av1_ref_frames, ref_frames);
> +
> + rockchip_vpu981_av1_dec_set_frame_sign_bias(ctx);
> +
> + for (i = V4L2_AV1_REF_LAST_FRAME; i < V4L2_AV1_TOTAL_REFS_PER_FRAME; i++) {
> + uint32_t ref = i - 1;
> + int idx = 0;
> + int width, height;
> +
> + if (allow_intrabc) {
> + idx = av1_dec->current_frame_index;
> + width = frame->frame_width_minus_1 + 1;
> + height = frame->frame_height_minus_1 + 1;
> + } else {
> + if (rockchip_vpu981_get_frame_index(ctx, ref) > 0)
> + idx = rockchip_vpu981_get_frame_index(ctx, ref);
> + width = av1_dec->frame_refs[idx].width;
> + height = av1_dec->frame_refs[idx].height;
> + }
> +
> + scale_enable |=
> + rockchip_vpu981_av1_dec_set_ref(ctx, ref, idx, width,
> + height);
> +
> + rockchip_vpu981_av1_dec_set_sign_bias(ctx, ref,
> + av1_dec->ref_frame_sign_bias[i]);
> + }
> + hantro_reg_write(vpu, &av1_ref_scaling_enable, scale_enable);
> +
> + hantro_reg_write(vpu, &av1_ref0_gm_mode,
> + frame->global_motion.type[V4L2_AV1_REF_LAST_FRAME]);
> + hantro_reg_write(vpu, &av1_ref1_gm_mode,
> + frame->global_motion.type[V4L2_AV1_REF_LAST2_FRAME]);
> + hantro_reg_write(vpu, &av1_ref2_gm_mode,
> + frame->global_motion.type[V4L2_AV1_REF_LAST3_FRAME]);
> + hantro_reg_write(vpu, &av1_ref3_gm_mode,
> + frame->global_motion.type[V4L2_AV1_REF_GOLDEN_FRAME]);
> + hantro_reg_write(vpu, &av1_ref4_gm_mode,
> + frame->global_motion.type[V4L2_AV1_REF_BWDREF_FRAME]);
> + hantro_reg_write(vpu, &av1_ref5_gm_mode,
> + frame->global_motion.type[V4L2_AV1_REF_ALTREF2_FRAME]);
> + hantro_reg_write(vpu, &av1_ref6_gm_mode,
> + frame->global_motion.type[V4L2_AV1_REF_ALTREF_FRAME]);
> +
> + rockchip_vpu981_av1_dec_set_other_frames(ctx);
> +}
> +
> +static void rockchip_vpu981_av1_dec_set_parameters(struct hantro_ctx *ctx)
> +{
> + struct hantro_dev *vpu = ctx->dev;
> + struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
> + struct hantro_av1_dec_ctrls *ctrls = &av1_dec->ctrls;
> +
> + hantro_reg_write(vpu, &av1_skip_mode,
> + !!(ctrls->frame->flags & V4L2_AV1_FRAME_FLAG_SKIP_MODE_PRESENT));
> + hantro_reg_write(vpu, &av1_tempor_mvp_e,
> + !!(ctrls->frame->flags & V4L2_AV1_FRAME_FLAG_USE_REF_FRAME_MVS));
> + hantro_reg_write(vpu, &av1_delta_lf_res_log,
> + ctrls->frame->loop_filter.delta_lf_res);
> + hantro_reg_write(vpu, &av1_delta_lf_multi,
> + !!(ctrls->frame->loop_filter.flags
> + & V4L2_AV1_LOOP_FILTER_FLAG_DELTA_LF_MULTI));
> + hantro_reg_write(vpu, &av1_delta_lf_present,
> + !!(ctrls->frame->loop_filter.flags
> + & V4L2_AV1_LOOP_FILTER_FLAG_DELTA_LF_PRESENT));
> + hantro_reg_write(vpu, &av1_disable_cdf_update,
> + !!(ctrls->frame->flags & V4L2_AV1_FRAME_FLAG_DISABLE_CDF_UPDATE));
> + hantro_reg_write(vpu, &av1_allow_warp,
> + !!(ctrls->frame->flags & V4L2_AV1_FRAME_FLAG_ALLOW_WARPED_MOTION));
> + hantro_reg_write(vpu, &av1_show_frame,
> + !!(ctrls->frame->flags & V4L2_AV1_FRAME_FLAG_SHOW_FRAME));
> + hantro_reg_write(vpu, &av1_switchable_motion_mode,
> + !!(ctrls->frame->flags & V4L2_AV1_FRAME_FLAG_IS_MOTION_MODE_SWITCHABLE));
> + hantro_reg_write(vpu, &av1_enable_cdef,
> + !!(ctrls->sequence->flags & V4L2_AV1_SEQUENCE_FLAG_ENABLE_CDEF));
> + hantro_reg_write(vpu, &av1_allow_masked_compound,
> + !!(ctrls->sequence->flags
> + & V4L2_AV1_SEQUENCE_FLAG_ENABLE_MASKED_COMPOUND));
> + hantro_reg_write(vpu, &av1_allow_interintra,
> + !!(ctrls->sequence->flags
> + & V4L2_AV1_SEQUENCE_FLAG_ENABLE_INTERINTRA_COMPOUND));
> + hantro_reg_write(vpu, &av1_enable_intra_edge_filter,
> + !!(ctrls->sequence->flags
> + & V4L2_AV1_SEQUENCE_FLAG_ENABLE_INTRA_EDGE_FILTER));
> + hantro_reg_write(vpu, &av1_allow_filter_intra,
> + !!(ctrls->sequence->flags & V4L2_AV1_SEQUENCE_FLAG_ENABLE_FILTER_INTRA));
> + hantro_reg_write(vpu, &av1_enable_jnt_comp,
> + !!(ctrls->sequence->flags & V4L2_AV1_SEQUENCE_FLAG_ENABLE_JNT_COMP));
> + hantro_reg_write(vpu, &av1_enable_dual_filter,
> + !!(ctrls->sequence->flags & V4L2_AV1_SEQUENCE_FLAG_ENABLE_DUAL_FILTER));
> + hantro_reg_write(vpu, &av1_reduced_tx_set_used,
> + !!(ctrls->frame->flags & V4L2_AV1_FRAME_FLAG_REDUCED_TX_SET));
> + hantro_reg_write(vpu, &av1_allow_screen_content_tools,
> + !!(ctrls->frame->flags & V4L2_AV1_FRAME_FLAG_ALLOW_SCREEN_CONTENT_TOOLS));
> + hantro_reg_write(vpu, &av1_allow_intrabc,
> + !!(ctrls->frame->flags & V4L2_AV1_FRAME_FLAG_ALLOW_INTRABC));
> +
> + if (!(ctrls->frame->flags & V4L2_AV1_FRAME_FLAG_ALLOW_SCREEN_CONTENT_TOOLS))
> + hantro_reg_write(vpu, &av1_force_interger_mv, 0);
> + else
> + hantro_reg_write(vpu, &av1_force_interger_mv,
> + !!(ctrls->frame->flags & V4L2_AV1_FRAME_FLAG_FORCE_INTEGER_MV));
> +
> + hantro_reg_write(vpu, &av1_blackwhite_e, 0);
> + hantro_reg_write(vpu, &av1_delta_q_res_log, ctrls->frame->quantization.delta_q_res);
> + hantro_reg_write(vpu, &av1_delta_q_present,
> + !!(ctrls->frame->quantization.flags
> + & V4L2_AV1_QUANTIZATION_FLAG_DELTA_Q_PRESENT));
> +
> + hantro_reg_write(vpu, &av1_idr_pic_e, !ctrls->frame->frame_type);
> + hantro_reg_write(vpu, &av1_quant_base_qindex, ctrls->frame->quantization.base_q_idx);
> + hantro_reg_write(vpu, &av1_bit_depth_y_minus8, ctx->bit_depth - 8);
> + hantro_reg_write(vpu, &av1_bit_depth_c_minus8, ctx->bit_depth - 8);
> +
> + hantro_reg_write(vpu, &av1_mcomp_filt_type, ctrls->frame->interpolation_filter);
> + hantro_reg_write(vpu, &av1_high_prec_mv_e,
> + !!(ctrls->frame->flags & V4L2_AV1_FRAME_FLAG_ALLOW_HIGH_PRECISION_MV));
> + hantro_reg_write(vpu, &av1_comp_pred_mode,
> + (ctrls->frame->flags & V4L2_AV1_FRAME_FLAG_REFERENCE_SELECT) ? 2 : 0);
> + hantro_reg_write(vpu, &av1_transform_mode, (ctrls->frame->tx_mode == 1) ? 3 : 4);
> + hantro_reg_write(vpu, &av1_max_cb_size,
> + (ctrls->sequence->flags
> + & V4L2_AV1_SEQUENCE_FLAG_USE_128X128_SUPERBLOCK) ? 7 : 6);
> + hantro_reg_write(vpu, &av1_min_cb_size, 3);
> +
> + hantro_reg_write(vpu, &av1_comp_pred_fixed_ref, 0);
> + hantro_reg_write(vpu, &av1_comp_pred_var_ref0_av1, 0);
> + hantro_reg_write(vpu, &av1_comp_pred_var_ref1_av1, 0);
> + hantro_reg_write(vpu, &av1_filt_level_seg0, 0);
> + hantro_reg_write(vpu, &av1_filt_level_seg1, 0);
> + hantro_reg_write(vpu, &av1_filt_level_seg2, 0);
> + hantro_reg_write(vpu, &av1_filt_level_seg3, 0);
> + hantro_reg_write(vpu, &av1_filt_level_seg4, 0);
> + hantro_reg_write(vpu, &av1_filt_level_seg5, 0);
> + hantro_reg_write(vpu, &av1_filt_level_seg6, 0);
> + hantro_reg_write(vpu, &av1_filt_level_seg7, 0);
> +
> + hantro_reg_write(vpu, &av1_qp_delta_y_dc_av1, ctrls->frame->quantization.delta_q_y_dc);
> + hantro_reg_write(vpu, &av1_qp_delta_ch_dc_av1, ctrls->frame->quantization.delta_q_u_dc);
> + hantro_reg_write(vpu, &av1_qp_delta_ch_ac_av1, ctrls->frame->quantization.delta_q_u_ac);
> + if (ctrls->frame->quantization.flags & V4L2_AV1_QUANTIZATION_FLAG_USING_QMATRIX) {
> + hantro_reg_write(vpu, &av1_qmlevel_y, ctrls->frame->quantization.qm_y);
> + hantro_reg_write(vpu, &av1_qmlevel_u, ctrls->frame->quantization.qm_u);
> + hantro_reg_write(vpu, &av1_qmlevel_v, ctrls->frame->quantization.qm_v);
> + } else {
> + hantro_reg_write(vpu, &av1_qmlevel_y, 0xff);
> + hantro_reg_write(vpu, &av1_qmlevel_u, 0xff);
> + hantro_reg_write(vpu, &av1_qmlevel_v, 0xff);
> + }
> +
> + hantro_reg_write(vpu, &av1_lossless_e, rockchip_vpu981_av1_dec_is_lossless(ctx));
> + hantro_reg_write(vpu, &av1_quant_delta_v_dc, ctrls->frame->quantization.delta_q_v_dc);
> + hantro_reg_write(vpu, &av1_quant_delta_v_ac, ctrls->frame->quantization.delta_q_v_ac);
> +
> + hantro_reg_write(vpu, &av1_skip_ref0,
> + (ctrls->frame->skip_mode_frame[0]) ? ctrls->frame->skip_mode_frame[0] : 1);
> + hantro_reg_write(vpu, &av1_skip_ref1,
> + (ctrls->frame->skip_mode_frame[1]) ? ctrls->frame->skip_mode_frame[1] : 1);
> +
> + hantro_write_addr(vpu, AV1_MC_SYNC_CURR, av1_dec->tile_buf.dma);
> + hantro_write_addr(vpu, AV1_MC_SYNC_LEFT, av1_dec->tile_buf.dma);
> +}
> +
> +static void
> +rockchip_vpu981_av1_dec_set_input_buffer(struct hantro_ctx *ctx,
> + struct vb2_v4l2_buffer *vb2_src)
> +{
> + struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
> + struct hantro_av1_dec_ctrls *ctrls = &av1_dec->ctrls;
> + const struct v4l2_ctrl_av1_tile_group_entry *group_entry =
> + ctrls->tile_group_entry;
> + struct hantro_dev *vpu = ctx->dev;
> + dma_addr_t src_dma;
> + u32 src_len, src_buf_len;
> + int start_bit, offset;
> +
> + src_dma = vb2_dma_contig_plane_dma_addr(&vb2_src->vb2_buf, 0);
> + src_len = vb2_get_plane_payload(&vb2_src->vb2_buf, 0);
> + src_buf_len = vb2_plane_size(&vb2_src->vb2_buf, 0);
> +
> + start_bit = (group_entry[0].tile_offset & 0xf) * 8;
> + offset = group_entry[0].tile_offset & ~0xf;
> +
> + hantro_reg_write(vpu, &av1_strm_buffer_len, src_buf_len);
> + hantro_reg_write(vpu, &av1_strm_start_bit, start_bit);
> + hantro_reg_write(vpu, &av1_stream_len, src_len);
> + hantro_reg_write(vpu, &av1_strm_start_offset, 0);
> + hantro_write_addr(vpu, AV1_INPUT_STREAM, src_dma + offset);
> +}
> +
> +static void
> +rockchip_vpu981_av1_dec_set_output_buffer(struct hantro_ctx *ctx)
> +{
> + struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
> + struct hantro_dev *vpu = ctx->dev;
> + struct hantro_decoded_buffer *dst;
> + struct vb2_v4l2_buffer *vb2_dst;
> + dma_addr_t luma_addr, chroma_addr, mv_addr = 0;
> + size_t cr_offset = rockchip_vpu981_av1_dec_luma_size(ctx);
> + size_t mv_offset = rockchip_vpu981_av1_dec_chroma_size(ctx);
> +
> + vb2_dst = av1_dec->frame_refs[av1_dec->current_frame_index].vb2_ref;
> + dst = vb2_to_hantro_decoded_buf(&vb2_dst->vb2_buf);
> + luma_addr = hantro_get_dec_buf_addr(ctx, &dst->base.vb.vb2_buf);
> + chroma_addr = luma_addr + cr_offset;
> + mv_addr = luma_addr + mv_offset;
> +
> + hantro_write_addr(vpu, AV1_TILE_OUT_LU, luma_addr);
> + hantro_write_addr(vpu, AV1_TILE_OUT_CH, chroma_addr);
> + hantro_write_addr(vpu, AV1_TILE_OUT_MV, mv_addr);
> +}
> +
> +int rockchip_vpu981_av1_dec_run(struct hantro_ctx *ctx)
> +{
> + struct hantro_dev *vpu = ctx->dev;
> + struct vb2_v4l2_buffer *vb2_src;
> + int ret;
> +
> + hantro_start_prepare_run(ctx);
> +
> + ret = rockchip_vpu981_av1_dec_prepare_run(ctx);
> + if (ret)
> + goto prepare_error;
> +
> + vb2_src = hantro_get_src_buf(ctx);
> + if (!vb2_src) {
> + ret = -EINVAL;
> + goto prepare_error;
> + }
> +
> + rockchip_vpu981_av1_dec_clean_refs(ctx);
> + rockchip_vpu981_av1_dec_frame_ref(ctx, vb2_src->vb2_buf.timestamp);
> +
> + rockchip_vpu981_av1_dec_set_parameters(ctx);
> + rockchip_vpu981_av1_dec_set_global_model(ctx);
> + rockchip_vpu981_av1_dec_set_tile_info(ctx);
> + rockchip_vpu981_av1_dec_set_reference_frames(ctx);
> + rockchip_vpu981_av1_dec_set_segmentation(ctx);
> + rockchip_vpu981_av1_dec_set_loopfilter(ctx);
> + rockchip_vpu981_av1_dec_set_picture_dimensions(ctx);
> + rockchip_vpu981_av1_dec_set_cdef(ctx);
> + rockchip_vpu981_av1_dec_set_lr(ctx);
> + rockchip_vpu981_av1_dec_set_prob(ctx);
> +
> + hantro_reg_write(vpu, &av1_dec_mode, AV1_DEC_MODE);
> + hantro_reg_write(vpu, &av1_dec_out_ec_byte_word, 0);
> + hantro_reg_write(vpu, &av1_write_mvs_e, 1);
> + hantro_reg_write(vpu, &av1_dec_out_ec_bypass, 1);
> + hantro_reg_write(vpu, &av1_dec_clk_gate_e, 1);
> +
> + hantro_reg_write(vpu, &av1_dec_abort_e, 0);
> + hantro_reg_write(vpu, &av1_dec_tile_int_e, 0);
> +
> + hantro_reg_write(vpu, &av1_dec_alignment, 64);
> + hantro_reg_write(vpu, &av1_apf_disable, 0);
> + hantro_reg_write(vpu, &av1_apf_threshold, 8);
> + hantro_reg_write(vpu, &av1_dec_buswidth, 2);
> + hantro_reg_write(vpu, &av1_dec_max_burst, 16);
> + hantro_reg_write(vpu, &av1_error_conceal_e, 0);
> + hantro_reg_write(vpu, &av1_axi_rd_ostd_threshold, 64);
> + hantro_reg_write(vpu, &av1_axi_wr_ostd_threshold, 64);
> +
> + hantro_reg_write(vpu, &av1_ext_timeout_cycles, 0xfffffff);
> + hantro_reg_write(vpu, &av1_ext_timeout_override_e, 1);
> + hantro_reg_write(vpu, &av1_timeout_cycles, 0xfffffff);
> + hantro_reg_write(vpu, &av1_timeout_override_e, 1);
> +
> + rockchip_vpu981_av1_dec_set_output_buffer(ctx);
> + rockchip_vpu981_av1_dec_set_input_buffer(ctx, vb2_src);
> +
> + hantro_end_prepare_run(ctx);
> +
> + hantro_reg_write(vpu, &av1_dec_e, 1);
> +
> + return 0;
> +
> +prepare_error:
> + hantro_end_prepare_run(ctx);
> + hantro_irq_done(vpu, VB2_BUF_STATE_ERROR);
> + return ret;
> +}
> +
> +static void rockchip_vpu981_postproc_enable(struct hantro_ctx *ctx)
> +{
> + struct hantro_dev *vpu = ctx->dev;
> + int width = ctx->dst_fmt.width;
> + int height = ctx->dst_fmt.height;
> + struct vb2_v4l2_buffer *vb2_dst;
> + size_t chroma_offset;
> + dma_addr_t dst_dma;
> +
> + vb2_dst = hantro_get_dst_buf(ctx);
> +
> + dst_dma = vb2_dma_contig_plane_dma_addr(&vb2_dst->vb2_buf, 0);
> + chroma_offset = ctx->dst_fmt.plane_fmt[0].bytesperline *
> + ctx->dst_fmt.height;
> +
> + /* enable post processor */
> + hantro_reg_write(vpu, &av1_pp_out_e, 1);
> + hantro_reg_write(vpu, &av1_pp_in_format, 0);
> + hantro_reg_write(vpu, &av1_pp0_dup_hor, 1);
> + hantro_reg_write(vpu, &av1_pp0_dup_ver, 1);
> +
> + hantro_reg_write(vpu, &av1_pp_in_height, height / 2);
> + hantro_reg_write(vpu, &av1_pp_in_width, width / 2);
> + hantro_reg_write(vpu, &av1_pp_out_height, height);
> + hantro_reg_write(vpu, &av1_pp_out_width, width);
> + hantro_reg_write(vpu, &av1_pp_out_y_stride,
> + ctx->dst_fmt.plane_fmt[0].bytesperline);
> + hantro_reg_write(vpu, &av1_pp_out_c_stride,
> + ctx->dst_fmt.plane_fmt[0].bytesperline);
> + switch (ctx->dst_fmt.pixelformat) {
> + case V4L2_PIX_FMT_P010:
> + hantro_reg_write(vpu, &av1_pp_out_format, 1);
> + break;
> + case V4L2_PIX_FMT_NV12:
> + hantro_reg_write(vpu, &av1_pp_out_format, 3);
> + break;
> + default:
> + hantro_reg_write(vpu, &av1_pp_out_format, 0);
> + }
> +
> + hantro_reg_write(vpu, &av1_ppd_blend_exist, 0);
> + hantro_reg_write(vpu, &av1_ppd_dith_exist, 0);
> + hantro_reg_write(vpu, &av1_ablend_crop_e, 0);
> + hantro_reg_write(vpu, &av1_pp_format_customer1_e, 0);
> + hantro_reg_write(vpu, &av1_pp_crop_exist, 0);
> + hantro_reg_write(vpu, &av1_pp_up_level, 0);
> + hantro_reg_write(vpu, &av1_pp_down_level, 0);
> + hantro_reg_write(vpu, &av1_pp_exist, 0);
> +
> + hantro_write_addr(vpu, AV1_PP_OUT_LU, dst_dma);
> + hantro_write_addr(vpu, AV1_PP_OUT_CH, dst_dma + chroma_offset);
> +}
> +
> +static void rockchip_vpu981_postproc_disable(struct hantro_ctx *ctx)
> +{
> + struct hantro_dev *vpu = ctx->dev;
> +
> + /* disable post processor */
> + hantro_reg_write(vpu, &av1_pp_out_e, 0);
> +}
> +
> +const struct hantro_postproc_ops rockchip_vpu981_postproc_ops = {
> + .enable = rockchip_vpu981_postproc_enable,
> + .disable = rockchip_vpu981_postproc_disable,
> +};
> diff --git a/drivers/media/platform/verisilicon/rockchip_vpu981_regs.h b/drivers/media/platform/verisilicon/rockchip_vpu981_regs.h
> new file mode 100644
> index 000000000000..182e6c830ff6
> --- /dev/null
> +++ b/drivers/media/platform/verisilicon/rockchip_vpu981_regs.h
> @@ -0,0 +1,477 @@
> +/* SPDX-License-Identifier: GPL-2.0-only */
> +/*
> + * Copyright (c) 2022, Collabora
> + *
> + * Author: Benjamin Gaignard <[email protected]>
> + */
> +
> +#ifndef _ROCKCHIP_VPU981_REGS_H_
> +#define _ROCKCHIP_VPU981_REGS_H_
> +
> +#include "hantro.h"
> +
> +#define AV1_SWREG(nr) ((nr) * 4)
> +
> +#define AV1_DEC_REG(b, s, m) \
> + ((const struct hantro_reg) { \
> + .base = AV1_SWREG(b), \
> + .shift = s, \
> + .mask = m, \
> + })
> +
> +#define AV1_REG_INTERRUPT AV1_SWREG(1)
> +#define AV1_REG_INTERRUPT_DEC_RDY_INT BIT(12)
> +
> +#define AV1_REG_CONFIG AV1_SWREG(2)
> +#define AV1_REG_CONFIG_DEC_CLK_GATE_E BIT(10)
> +
> +#define av1_dec_e AV1_DEC_REG(1, 0, 0x1)
> +#define av1_dec_abort_e AV1_DEC_REG(1, 5, 0x1)
> +#define av1_dec_tile_int_e AV1_DEC_REG(1, 7, 0x1)
> +
> +#define av1_dec_clk_gate_e AV1_DEC_REG(2, 10, 0x1)
> +
> +#define av1_dec_out_ec_bypass AV1_DEC_REG(3, 8, 0x1)
> +#define av1_write_mvs_e AV1_DEC_REG(3, 12, 0x1)
> +#define av1_filtering_dis AV1_DEC_REG(3, 14, 0x1)
> +#define av1_dec_out_dis AV1_DEC_REG(3, 15, 0x1)
> +#define av1_dec_out_ec_byte_word AV1_DEC_REG(3, 16, 0x1)
> +#define av1_skip_mode AV1_DEC_REG(3, 26, 0x1)
> +#define av1_dec_mode AV1_DEC_REG(3, 27, 0x1f)
> +
> +#define av1_ref_frames AV1_DEC_REG(4, 0, 0xf)
> +#define av1_pic_height_in_cbs AV1_DEC_REG(4, 6, 0x1fff)
> +#define av1_pic_width_in_cbs AV1_DEC_REG(4, 19, 0x1fff)
> +
> +#define av1_ref_scaling_enable AV1_DEC_REG(5, 0, 0x1)
> +#define av1_filt_level_base_gt32 AV1_DEC_REG(5, 1, 0x1)
> +#define av1_error_resilient AV1_DEC_REG(5, 2, 0x1)
> +#define av1_force_interger_mv AV1_DEC_REG(5, 3, 0x1)
> +#define av1_allow_intrabc AV1_DEC_REG(5, 4, 0x1)
> +#define av1_allow_screen_content_tools AV1_DEC_REG(5, 5, 0x1)
> +#define av1_reduced_tx_set_used AV1_DEC_REG(5, 6, 0x1)
> +#define av1_enable_dual_filter AV1_DEC_REG(5, 7, 0x1)
> +#define av1_enable_jnt_comp AV1_DEC_REG(5, 8, 0x1)
> +#define av1_allow_filter_intra AV1_DEC_REG(5, 9, 0x1)
> +#define av1_enable_intra_edge_filter AV1_DEC_REG(5, 10, 0x1)
> +#define av1_tempor_mvp_e AV1_DEC_REG(5, 11, 0x1)
> +#define av1_allow_interintra AV1_DEC_REG(5, 12, 0x1)
> +#define av1_allow_masked_compound AV1_DEC_REG(5, 13, 0x1)
> +#define av1_enable_cdef AV1_DEC_REG(5, 14, 0x1)
> +#define av1_switchable_motion_mode AV1_DEC_REG(5, 15, 0x1)
> +#define av1_show_frame AV1_DEC_REG(5, 16, 0x1)
> +#define av1_superres_is_scaled AV1_DEC_REG(5, 17, 0x1)
> +#define av1_allow_warp AV1_DEC_REG(5, 18, 0x1)
> +#define av1_disable_cdf_update AV1_DEC_REG(5, 19, 0x1)
> +#define av1_preskip_segid AV1_DEC_REG(5, 20, 0x1)
> +#define av1_delta_lf_present AV1_DEC_REG(5, 21, 0x1)
> +#define av1_delta_lf_multi AV1_DEC_REG(5, 22, 0x1)
> +#define av1_delta_lf_res_log AV1_DEC_REG(5, 23, 0x3)
> +#define av1_strm_start_bit AV1_DEC_REG(5, 25, 0x7f)
> +
> +#define av1_stream_len AV1_DEC_REG(6, 0, 0xffffffff)
> +
> +#define av1_delta_q_present AV1_DEC_REG(7, 0, 0x1)
> +#define av1_delta_q_res_log AV1_DEC_REG(7, 1, 0x3)
> +#define av1_cdef_damping AV1_DEC_REG(7, 3, 0x3)
> +#define av1_cdef_bits AV1_DEC_REG(7, 5, 0x3)
> +#define av1_apply_grain AV1_DEC_REG(7, 7, 0x1)
> +#define av1_num_y_points_b AV1_DEC_REG(7, 8, 0x1)
> +#define av1_num_cb_points_b AV1_DEC_REG(7, 9, 0x1)
> +#define av1_num_cr_points_b AV1_DEC_REG(7, 10, 0x1)
> +#define av1_overlap_flag AV1_DEC_REG(7, 11, 0x1)
> +#define av1_clip_to_restricted_range AV1_DEC_REG(7, 12, 0x1)
> +#define av1_chroma_scaling_from_luma AV1_DEC_REG(7, 13, 0x1)
> +#define av1_random_seed AV1_DEC_REG(7, 14, 0xffff)
> +#define av1_blackwhite_e AV1_DEC_REG(7, 30, 0x1)
> +
> +#define av1_scaling_shift AV1_DEC_REG(8, 0, 0xf)
> +#define av1_bit_depth_c_minus8 AV1_DEC_REG(8, 4, 0x3)
> +#define av1_bit_depth_y_minus8 AV1_DEC_REG(8, 6, 0x3)
> +#define av1_quant_base_qindex AV1_DEC_REG(8, 8, 0xff)
> +#define av1_idr_pic_e AV1_DEC_REG(8, 16, 0x1)
> +#define av1_superres_pic_width AV1_DEC_REG(8, 17, 0x7fff)
> +
> +#define av1_ref4_sign_bias AV1_DEC_REG(9, 2, 0x1)
> +#define av1_ref5_sign_bias AV1_DEC_REG(9, 3, 0x1)
> +#define av1_ref6_sign_bias AV1_DEC_REG(9, 4, 0x1)
> +#define av1_mf1_type AV1_DEC_REG(9, 5, 0x7)
> +#define av1_mf2_type AV1_DEC_REG(9, 8, 0x7)
> +#define av1_mf3_type AV1_DEC_REG(9, 11, 0x7)
> +#define av1_scale_denom_minus9 AV1_DEC_REG(9, 14, 0x7)
> +#define av1_last_active_seg AV1_DEC_REG(9, 17, 0x7)
> +#define av1_context_update_tile_id AV1_DEC_REG(9, 20, 0xfff)
> +
> +#define av1_tile_transpose AV1_DEC_REG(10, 0, 0x1)
> +#define av1_tile_enable AV1_DEC_REG(10, 1, 0x1)
> +#define av1_multicore_full_width AV1_DEC_REG(10, 2, 0xff)
> +#define av1_num_tile_rows_8k AV1_DEC_REG(10, 10, 0x7f)
> +#define av1_num_tile_cols_8k AV1_DEC_REG(10, 17, 0x7f)
> +#define av1_multicore_tile_start_x AV1_DEC_REG(10, 24, 0xff)
> +
> +#define av1_use_temporal3_mvs AV1_DEC_REG(11, 0, 0x1)
> +#define av1_use_temporal2_mvs AV1_DEC_REG(11, 1, 0x1)
> +#define av1_use_temporal1_mvs AV1_DEC_REG(11, 2, 0x1)
> +#define av1_use_temporal0_mvs AV1_DEC_REG(11, 3, 0x1)
> +#define av1_comp_pred_mode AV1_DEC_REG(11, 4, 0x3)
> +#define av1_high_prec_mv_e AV1_DEC_REG(11, 7, 0x1)
> +#define av1_mcomp_filt_type AV1_DEC_REG(11, 8, 0x7)
> +#define av1_multicore_expect_context_update AV1_DEC_REG(11, 11, 0x1)
> +#define av1_multicore_sbx_offset AV1_DEC_REG(11, 12, 0x7f)
> +#define av1_ulticore_tile_col AV1_DEC_REG(11, 19, 0x7f)
> +#define av1_transform_mode AV1_DEC_REG(11, 27, 0x7)
> +#define av1_dec_tile_size_mag AV1_DEC_REG(11, 30, 0x3)
> +
> +#define av1_seg_quant_sign AV1_DEC_REG(12, 2, 0xff)
> +#define av1_max_cb_size AV1_DEC_REG(12, 10, 0x7)
> +#define av1_min_cb_size AV1_DEC_REG(12, 13, 0x7)
> +#define av1_comp_pred_fixed_ref AV1_DEC_REG(12, 16, 0x7)
> +#define av1_multicore_tile_width AV1_DEC_REG(12, 19, 0x7f)
> +#define av1_pic_height_pad AV1_DEC_REG(12, 26, 0x7)
> +#define av1_pic_width_pad AV1_DEC_REG(12, 29, 0x7)
> +
> +#define av1_segment_e AV1_DEC_REG(13, 0, 0x1)
> +#define av1_segment_upd_e AV1_DEC_REG(13, 1, 0x1)
> +#define av1_segment_temp_upd_e AV1_DEC_REG(13, 2, 0x1)
> +#define av1_comp_pred_var_ref0_av1 AV1_DEC_REG(13, 3, 0x7)
> +#define av1_comp_pred_var_ref1_av1 AV1_DEC_REG(13, 6, 0x7)
> +#define av1_lossless_e AV1_DEC_REG(13, 9, 0x1)
> +#define av1_qp_delta_ch_ac_av1 AV1_DEC_REG(13, 11, 0x7f)
> +#define av1_qp_delta_ch_dc_av1 AV1_DEC_REG(13, 18, 0x7f)
> +#define av1_qp_delta_y_dc_av1 AV1_DEC_REG(13, 25, 0x7f)
> +
> +#define av1_quant_seg0 AV1_DEC_REG(14, 0, 0xff)
> +#define av1_filt_level_seg0 AV1_DEC_REG(14, 8, 0x3f)
> +#define av1_skip_seg0 AV1_DEC_REG(14, 14, 0x1)
> +#define av1_refpic_seg0 AV1_DEC_REG(14, 15, 0xf)
> +#define av1_filt_level_delta0_seg0 AV1_DEC_REG(14, 19, 0x7f)
> +#define av1_filt_level0 AV1_DEC_REG(14, 26, 0x3f)
> +
> +#define av1_quant_seg1 AV1_DEC_REG(15, 0, 0xff)
> +#define av1_filt_level_seg1 AV1_DEC_REG(15, 8, 0x3f)
> +#define av1_skip_seg1 AV1_DEC_REG(15, 14, 0x1)
> +#define av1_refpic_seg1 AV1_DEC_REG(15, 15, 0xf)
> +#define av1_filt_level_delta0_seg1 AV1_DEC_REG(15, 19, 0x7f)
> +#define av1_filt_level1 AV1_DEC_REG(15, 26, 0x3f)
> +
> +#define av1_quant_seg2 AV1_DEC_REG(16, 0, 0xff)
> +#define av1_filt_level_seg2 AV1_DEC_REG(16, 8, 0x3f)
> +#define av1_skip_seg2 AV1_DEC_REG(16, 14, 0x1)
> +#define av1_refpic_seg2 AV1_DEC_REG(16, 15, 0xf)
> +#define av1_filt_level_delta0_seg2 AV1_DEC_REG(16, 19, 0x7f)
> +#define av1_filt_level2 AV1_DEC_REG(16, 26, 0x3f)
> +
> +#define av1_quant_seg3 AV1_DEC_REG(17, 0, 0xff)
> +#define av1_filt_level_seg3 AV1_DEC_REG(17, 8, 0x3f)
> +#define av1_skip_seg3 AV1_DEC_REG(17, 14, 0x1)
> +#define av1_refpic_seg3 AV1_DEC_REG(17, 15, 0xf)
> +#define av1_filt_level_delta0_seg3 AV1_DEC_REG(17, 19, 0x7f)
> +#define av1_filt_level3 AV1_DEC_REG(17, 26, 0x3f)
> +
> +#define av1_quant_seg4 AV1_DEC_REG(18, 0, 0xff)
> +#define av1_filt_level_seg4 AV1_DEC_REG(18, 8, 0x3f)
> +#define av1_skip_seg4 AV1_DEC_REG(18, 14, 0x1)
> +#define av1_refpic_seg4 AV1_DEC_REG(18, 15, 0xf)
> +#define av1_filt_level_delta0_seg4 AV1_DEC_REG(18, 19, 0x7f)
> +#define av1_lr_type AV1_DEC_REG(18, 26, 0x3f)
> +
> +#define av1_quant_seg5 AV1_DEC_REG(19, 0, 0xff)
> +#define av1_filt_level_seg5 AV1_DEC_REG(19, 8, 0x3f)
> +#define av1_skip_seg5 AV1_DEC_REG(19, 14, 0x1)
> +#define av1_refpic_seg5 AV1_DEC_REG(19, 15, 0xf)
> +#define av1_filt_level_delta0_seg5 AV1_DEC_REG(19, 19, 0x7f)
> +#define av1_lr_unit_size AV1_DEC_REG(19, 26, 0x3f)
> +
> +#define av1_filt_level_delta1_seg0 AV1_DEC_REG(20, 0, 0x7f)
> +#define av1_filt_level_delta2_seg0 AV1_DEC_REG(20, 7, 0x7f)
> +#define av1_filt_level_delta3_seg0 AV1_DEC_REG(20, 14, 0x7f)
> +#define av1_global_mv_seg0 AV1_DEC_REG(20, 21, 0x1)
> +#define av1_mf1_last_offset AV1_DEC_REG(20, 22, 0x1ff)
> +
> +#define av1_filt_level_delta1_seg1 AV1_DEC_REG(21, 0, 0x7f)
> +#define av1_filt_level_delta2_seg1 AV1_DEC_REG(21, 7, 0x7f)
> +#define av1_filt_level_delta3_seg1 AV1_DEC_REG(21, 14, 0x7f)
> +#define av1_global_mv_seg1 AV1_DEC_REG(21, 21, 0x1)
> +#define av1_mf1_last2_offset AV1_DEC_REG(21, 22, 0x1ff)
> +
> +#define av1_filt_level_delta1_seg2 AV1_DEC_REG(22, 0, 0x7f)
> +#define av1_filt_level_delta2_seg2 AV1_DEC_REG(22, 7, 0x7f)
> +#define av1_filt_level_delta3_seg2 AV1_DEC_REG(22, 14, 0x7f)
> +#define av1_global_mv_seg2 AV1_DEC_REG(22, 21, 0x1)
> +#define av1_mf1_last3_offset AV1_DEC_REG(22, 22, 0x1ff)
> +
> +#define av1_filt_level_delta1_seg3 AV1_DEC_REG(23, 0, 0x7f)
> +#define av1_filt_level_delta2_seg3 AV1_DEC_REG(23, 7, 0x7f)
> +#define av1_filt_level_delta3_seg3 AV1_DEC_REG(23, 14, 0x7f)
> +#define av1_global_mv_seg3 AV1_DEC_REG(23, 21, 0x1)
> +#define av1_mf1_golden_offset AV1_DEC_REG(23, 22, 0x1ff)
> +
> +#define av1_filt_level_delta1_seg4 AV1_DEC_REG(24, 0, 0x7f)
> +#define av1_filt_level_delta2_seg4 AV1_DEC_REG(24, 7, 0x7f)
> +#define av1_filt_level_delta3_seg4 AV1_DEC_REG(24, 14, 0x7f)
> +#define av1_global_mv_seg4 AV1_DEC_REG(24, 21, 0x1)
> +#define av1_mf1_bwdref_offset AV1_DEC_REG(24, 22, 0x1ff)
> +
> +#define av1_filt_level_delta1_seg5 AV1_DEC_REG(25, 0, 0x7f)
> +#define av1_filt_level_delta2_seg5 AV1_DEC_REG(25, 7, 0x7f)
> +#define av1_filt_level_delta3_seg5 AV1_DEC_REG(25, 14, 0x7f)
> +#define av1_global_mv_seg5 AV1_DEC_REG(25, 21, 0x1)
> +#define av1_mf1_altref2_offset AV1_DEC_REG(25, 22, 0x1ff)
> +
> +#define av1_filt_level_delta1_seg6 AV1_DEC_REG(26, 0, 0x7f)
> +#define av1_filt_level_delta2_seg6 AV1_DEC_REG(26, 7, 0x7f)
> +#define av1_filt_level_delta3_seg6 AV1_DEC_REG(26, 14, 0x7f)
> +#define av1_global_mv_seg6 AV1_DEC_REG(26, 21, 0x1)
> +#define av1_mf1_altref_offset AV1_DEC_REG(26, 22, 0x1ff)
> +
> +#define av1_filt_level_delta1_seg7 AV1_DEC_REG(27, 0, 0x7f)
> +#define av1_filt_level_delta2_seg7 AV1_DEC_REG(27, 7, 0x7f)
> +#define av1_filt_level_delta3_seg7 AV1_DEC_REG(27, 14, 0x7f)
> +#define av1_global_mv_seg7 AV1_DEC_REG(27, 21, 0x1)
> +#define av1_mf2_last_offset AV1_DEC_REG(27, 22, 0x1ff)
> +
> +#define av1_cb_offset AV1_DEC_REG(28, 0, 0x1ff)
> +#define av1_cb_luma_mult AV1_DEC_REG(28, 9, 0xff)
> +#define av1_cb_mult AV1_DEC_REG(28, 17, 0xff)
> +#define av1_quant_delta_v_dc AV1_DEC_REG(28, 25, 0x7f)
> +
> +#define av1_cr_offset AV1_DEC_REG(29, 0, 0x1ff)
> +#define av1_cr_luma_mult AV1_DEC_REG(29, 9, 0xff)
> +#define av1_cr_mult AV1_DEC_REG(29, 17, 0xff)
> +#define av1_quant_delta_v_ac AV1_DEC_REG(29, 25, 0x7f)
> +
> +#define av1_filt_ref_adj_5 AV1_DEC_REG(30, 0, 0x7f)
> +#define av1_filt_ref_adj_4 AV1_DEC_REG(30, 7, 0x7f)
> +#define av1_filt_mb_adj_1 AV1_DEC_REG(30, 14, 0x7f)
> +#define av1_filt_mb_adj_0 AV1_DEC_REG(30, 21, 0x7f)
> +#define av1_filt_sharpness AV1_DEC_REG(30, 28, 0x7)
> +
> +#define av1_quant_seg6 AV1_DEC_REG(31, 0, 0xff)
> +#define av1_filt_level_seg6 AV1_DEC_REG(31, 8, 0x3f)
> +#define av1_skip_seg6 AV1_DEC_REG(31, 14, 0x1)
> +#define av1_refpic_seg6 AV1_DEC_REG(31, 15, 0xf)
> +#define av1_filt_level_delta0_seg6 AV1_DEC_REG(31, 19, 0x7f)
> +#define av1_skip_ref0 AV1_DEC_REG(31, 26, 0xf)
> +
> +#define av1_quant_seg7 AV1_DEC_REG(32, 0, 0xff)
> +#define av1_filt_level_seg7 AV1_DEC_REG(32, 8, 0x3f)
> +#define av1_skip_seg7 AV1_DEC_REG(32, 14, 0x1)
> +#define av1_refpic_seg7 AV1_DEC_REG(32, 15, 0xf)
> +#define av1_filt_level_delta0_seg7 AV1_DEC_REG(32, 19, 0x7f)
> +#define av1_skip_ref1 AV1_DEC_REG(32, 26, 0xf)
> +
> +#define av1_ref0_height AV1_DEC_REG(33, 0, 0xffff)
> +#define av1_ref0_width AV1_DEC_REG(33, 16, 0xffff)
> +
> +#define av1_ref1_height AV1_DEC_REG(34, 0, 0xffff)
> +#define av1_ref1_width AV1_DEC_REG(34, 16, 0xffff)
> +
> +#define av1_ref2_height AV1_DEC_REG(35, 0, 0xffff)
> +#define av1_ref2_width AV1_DEC_REG(35, 16, 0xffff)
> +
> +#define av1_ref0_ver_scale AV1_DEC_REG(36, 0, 0xffff)
> +#define av1_ref0_hor_scale AV1_DEC_REG(36, 16, 0xffff)
> +
> +#define av1_ref1_ver_scale AV1_DEC_REG(37, 0, 0xffff)
> +#define av1_ref1_hor_scale AV1_DEC_REG(37, 16, 0xffff)
> +
> +#define av1_ref2_ver_scale AV1_DEC_REG(38, 0, 0xffff)
> +#define av1_ref2_hor_scale AV1_DEC_REG(38, 16, 0xffff)
> +
> +#define av1_ref3_ver_scale AV1_DEC_REG(39, 0, 0xffff)
> +#define av1_ref3_hor_scale AV1_DEC_REG(39, 16, 0xffff)
> +
> +#define av1_ref4_ver_scale AV1_DEC_REG(40, 0, 0xffff)
> +#define av1_ref4_hor_scale AV1_DEC_REG(40, 16, 0xffff)
> +
> +#define av1_ref5_ver_scale AV1_DEC_REG(41, 0, 0xffff)
> +#define av1_ref5_hor_scale AV1_DEC_REG(41, 16, 0xffff)
> +
> +#define av1_ref6_ver_scale AV1_DEC_REG(42, 0, 0xffff)
> +#define av1_ref6_hor_scale AV1_DEC_REG(42, 16, 0xffff)
> +
> +#define av1_ref3_height AV1_DEC_REG(43, 0, 0xffff)
> +#define av1_ref3_width AV1_DEC_REG(43, 16, 0xffff)
> +
> +#define av1_ref4_height AV1_DEC_REG(44, 0, 0xffff)
> +#define av1_ref4_width AV1_DEC_REG(44, 16, 0xffff)
> +
> +#define av1_ref5_height AV1_DEC_REG(45, 0, 0xffff)
> +#define av1_ref5_width AV1_DEC_REG(45, 16, 0xffff)
> +
> +#define av1_ref6_height AV1_DEC_REG(46, 0, 0xffff)
> +#define av1_ref6_width AV1_DEC_REG(46, 16, 0xffff)
> +
> +#define av1_mf2_last2_offset AV1_DEC_REG(47, 0, 0x1ff)
> +#define av1_mf2_last3_offset AV1_DEC_REG(47, 9, 0x1ff)
> +#define av1_mf2_golden_offset AV1_DEC_REG(47, 18, 0x1ff)
> +#define av1_qmlevel_y AV1_DEC_REG(47, 27, 0xf)
> +
> +#define av1_mf2_bwdref_offset AV1_DEC_REG(48, 0, 0x1ff)
> +#define av1_mf2_altref2_offset AV1_DEC_REG(48, 9, 0x1ff)
> +#define av1_mf2_altref_offset AV1_DEC_REG(48, 18, 0x1ff)
> +#define av1_qmlevel_u AV1_DEC_REG(48, 27, 0xf)
> +
> +#define av1_filt_ref_adj_6 AV1_DEC_REG(49, 0, 0x7f)
> +#define av1_filt_ref_adj_7 AV1_DEC_REG(49, 7, 0x7f)
> +#define av1_qmlevel_v AV1_DEC_REG(49, 14, 0xf)
> +
> +#define av1_superres_chroma_step AV1_DEC_REG(51, 0, 0x3fff)
> +#define av1_superres_luma_step AV1_DEC_REG(51, 14, 0x3fff)
> +
> +#define av1_superres_init_chroma_subpel_x AV1_DEC_REG(52, 0, 0x3fff)
> +#define av1_superres_init_luma_subpel_x AV1_DEC_REG(52, 14, 0x3fff)
> +
> +#define av1_cdef_chroma_secondary_strength AV1_DEC_REG(53, 0, 0xffff)
> +#define av1_cdef_luma_secondary_strength AV1_DEC_REG(53, 16, 0xffff)
> +
> +#define av1_apf_threshold AV1_DEC_REG(55, 0, 0xffff)
> +#define av1_apf_single_pu_mode AV1_DEC_REG(55, 30, 0x1)
> +#define av1_apf_disable AV1_DEC_REG(55, 30, 0x1)
> +
> +#define av1_dec_max_burst AV1_DEC_REG(58, 0, 0xff)
> +#define av1_dec_buswidth AV1_DEC_REG(58, 8, 0x7)
> +#define av1_dec_multicore_mode AV1_DEC_REG(58, 11, 0x3)
> +#define av1_dec_axi_wd_id_e AV1_DEC_REG(58, 13, 0x1)
> +#define av1_dec_axi_rd_id_e AV1_DEC_REG(58, 14, 0x1)
> +#define av1_dec_mc_polltime AV1_DEC_REG(58, 17, 0x3ff)
> +#define av1_dec_mc_pollmode AV1_DEC_REG(58, 27, 0x3)
> +
> +#define av1_filt_ref_adj_3 AV1_DEC_REG(59, 0, 0x3f)
> +#define av1_filt_ref_adj_2 AV1_DEC_REG(59, 7, 0x3f)
> +#define av1_filt_ref_adj_1 AV1_DEC_REG(59, 14, 0x3f)
> +#define av1_filt_ref_adj_0 AV1_DEC_REG(59, 21, 0x3f)
> +#define av1_ref0_sign_bias AV1_DEC_REG(59, 28, 0x1)
> +#define av1_ref1_sign_bias AV1_DEC_REG(59, 29, 0x1)
> +#define av1_ref2_sign_bias AV1_DEC_REG(59, 30, 0x1)
> +#define av1_ref3_sign_bias AV1_DEC_REG(59, 31, 0x1)
> +
> +#define av1_cur_last_roffset AV1_DEC_REG(184, 0, 0x1ff)
> +#define av1_cur_last_offset AV1_DEC_REG(184, 9, 0x1ff)
> +#define av1_mf3_last_offset AV1_DEC_REG(184, 18, 0x1ff)
> +#define av1_ref0_gm_mode AV1_DEC_REG(184, 27, 0x3)
> +
> +#define av1_cur_last2_roffset AV1_DEC_REG(185, 0, 0x1ff)
> +#define av1_cur_last2_offset AV1_DEC_REG(185, 9, 0x1ff)
> +#define av1_mf3_last2_offset AV1_DEC_REG(185, 18, 0x1ff)
> +#define av1_ref1_gm_mode AV1_DEC_REG(185, 27, 0x3)
> +
> +#define av1_cur_last3_roffset AV1_DEC_REG(186, 0, 0x1ff)
> +#define av1_cur_last3_offset AV1_DEC_REG(186, 9, 0x1ff)
> +#define av1_mf3_last3_offset AV1_DEC_REG(186, 18, 0x1ff)
> +#define av1_ref2_gm_mode AV1_DEC_REG(186, 27, 0x3)
> +
> +#define av1_cur_golden_roffset AV1_DEC_REG(187, 0, 0x1ff)
> +#define av1_cur_golden_offset AV1_DEC_REG(187, 9, 0x1ff)
> +#define av1_mf3_golden_offset AV1_DEC_REG(187, 18, 0x1ff)
> +#define av1_ref3_gm_mode AV1_DEC_REG(187, 27, 0x3)
> +
> +#define av1_cur_bwdref_roffset AV1_DEC_REG(188, 0, 0x1ff)
> +#define av1_cur_bwdref_offset AV1_DEC_REG(188, 9, 0x1ff)
> +#define av1_mf3_bwdref_offset AV1_DEC_REG(188, 18, 0x1ff)
> +#define av1_ref4_gm_mode AV1_DEC_REG(188, 27, 0x3)
> +
> +#define av1_cur_altref2_roffset AV1_DEC_REG(257, 0, 0x1ff)
> +#define av1_cur_altref2_offset AV1_DEC_REG(257, 9, 0x1ff)
> +#define av1_mf3_altref2_offset AV1_DEC_REG(257, 18, 0x1ff)
> +#define av1_ref5_gm_mode AV1_DEC_REG(257, 27, 0x3)
> +
> +#define av1_strm_buffer_len AV1_DEC_REG(258, 0, 0xffffffff)
> +
> +#define av1_strm_start_offset AV1_DEC_REG(259, 0, 0xffffffff)
> +
> +#define av1_ppd_blend_exist AV1_DEC_REG(260, 21, 0x1)
> +#define av1_ppd_dith_exist AV1_DEC_REG(260, 23, 0x1)
> +#define av1_ablend_crop_e AV1_DEC_REG(260, 24, 0x1)
> +#define av1_pp_format_p010_e AV1_DEC_REG(260, 25, 0x1)
> +#define av1_pp_format_customer1_e AV1_DEC_REG(260, 26, 0x1)
> +#define av1_pp_crop_exist AV1_DEC_REG(260, 27, 0x1)
> +#define av1_pp_up_level AV1_DEC_REG(260, 28, 0x1)
> +#define av1_pp_down_level AV1_DEC_REG(260, 29, 0x3)
> +#define av1_pp_exist AV1_DEC_REG(260, 31, 0x1)
> +
> +#define av1_cur_altref_roffset AV1_DEC_REG(262, 0, 0x1ff)
> +#define av1_cur_altref_offset AV1_DEC_REG(262, 9, 0x1ff)
> +#define av1_mf3_altref_offset AV1_DEC_REG(262, 18, 0x1ff)
> +#define av1_ref6_gm_mode AV1_DEC_REG(262, 27, 0x3)
> +
> +#define av1_cdef_luma_primary_strength AV1_DEC_REG(263, 0, 0xffffffff)
> +
> +#define av1_cdef_chroma_primary_strength AV1_DEC_REG(264, 0, 0xffffffff)
> +
> +#define av1_axi_arqos AV1_DEC_REG(265, 0, 0xf)
> +#define av1_axi_awqos AV1_DEC_REG(265, 4, 0xf)
> +#define av1_axi_wr_ostd_threshold AV1_DEC_REG(265, 8, 0x3ff)
> +#define av1_axi_rd_ostd_threshold AV1_DEC_REG(265, 18, 0x3ff)
> +#define av1_axi_wr_4k_dis AV1_DEC_REG(265, 31, 0x1)
> +
> +#define av1_128bit_mode AV1_DEC_REG(266, 5, 0x1)
> +#define av1_wr_shaper_bypass AV1_DEC_REG(266, 10, 0x1)
> +#define av1_error_conceal_e AV1_DEC_REG(266, 30, 0x1)
> +
> +#define av1_superres_chroma_step_invra AV1_DEC_REG(298, 0, 0xffff)
> +#define av1_superres_luma_step_invra AV1_DEC_REG(298, 16, 0xffff)
> +
> +#define av1_dec_alignment AV1_DEC_REG(314, 0, 0xffff)
> +
> +#define av1_ext_timeout_cycles AV1_DEC_REG(318, 0, 0x7fffffff)
> +#define av1_ext_timeout_override_e AV1_DEC_REG(318, 31, 0x1)
> +
> +#define av1_timeout_cycles AV1_DEC_REG(319, 0, 0x7fffffff)
> +#define av1_timeout_override_e AV1_DEC_REG(319, 31, 0x1)
> +
> +#define av1_pp_out_e AV1_DEC_REG(320, 0, 0x1)
> +#define av1_pp_cr_first AV1_DEC_REG(320, 1, 0x1)
> +#define av1_pp_out_mode AV1_DEC_REG(320, 2, 0x1)
> +#define av1_pp_out_tile_e AV1_DEC_REG(320, 3, 0x1)
> +#define av1_pp_status AV1_DEC_REG(320, 4, 0xf)
> +#define av1_pp_in_blk_size AV1_DEC_REG(320, 8, 0x7)
> +#define av1_pp_out_p010_fmt AV1_DEC_REG(320, 11, 0x3)
> +#define av1_pp_out_rgb_fmt AV1_DEC_REG(320, 13, 0x1f)
> +#define av1_rgb_range_max AV1_DEC_REG(320, 18, 0xfff)
> +#define av1_pp_rgb_planar AV1_DEC_REG(320, 30, 0x1)
> +
> +#define av1_scale_hratio AV1_DEC_REG(322, 0, 0x3ffff)
> +#define av1_pp_out_format AV1_DEC_REG(322, 18, 0x1f)
> +#define av1_ver_scale_mode AV1_DEC_REG(322, 23, 0x3)
> +#define av1_hor_scale_mode AV1_DEC_REG(322, 25, 0x3)
> +#define av1_pp_in_format AV1_DEC_REG(322, 27, 0x1f)
> +
> +#define av1_pp_out_c_stride AV1_DEC_REG(329, 0, 0xffff)
> +#define av1_pp_out_y_stride AV1_DEC_REG(329, 16, 0xffff)
> +
> +#define av1_pp_in_height AV1_DEC_REG(331, 0, 0xffff)
> +#define av1_pp_in_width AV1_DEC_REG(331, 16, 0xffff)
> +
> +#define av1_pp_out_height AV1_DEC_REG(332, 0, 0xffff)
> +#define av1_pp_out_width AV1_DEC_REG(332, 16, 0xffff)
> +
> +#define av1_pp1_dup_ver AV1_DEC_REG(394, 0, 0xff)
> +#define av1_pp1_dup_hor AV1_DEC_REG(394, 8, 0xff)
> +#define av1_pp0_dup_ver AV1_DEC_REG(394, 16, 0xff)
> +#define av1_pp0_dup_hor AV1_DEC_REG(394, 24, 0xff)
> +
> +#define AV1_TILE_OUT_LU (AV1_SWREG(65))
> +#define AV1_REFERENCE_Y(i) (AV1_SWREG(67) + ((i) * 0x8))
> +#define AV1_SEGMENTATION (AV1_SWREG(81))
> +#define AV1_GLOBAL_MODEL (AV1_SWREG(83))
> +#define AV1_CDEF_COL (AV1_SWREG(85))
> +#define AV1_SR_COL (AV1_SWREG(89))
> +#define AV1_LR_COL (AV1_SWREG(91))
> +#define AV1_FILM_GRAIN (AV1_SWREG(95))
> +#define AV1_TILE_OUT_CH (AV1_SWREG(99))
> +#define AV1_REFERENCE_CB(i) (AV1_SWREG(101) + ((i) * 0x8))
> +#define AV1_TILE_OUT_MV (AV1_SWREG(133))
> +#define AV1_REFERENCE_MV(i) (AV1_SWREG(135) + ((i) * 0x8))
> +#define AV1_TILE_BASE (AV1_SWREG(167))
> +#define AV1_INPUT_STREAM (AV1_SWREG(169))
> +#define AV1_PROP_TABLE_OUT (AV1_SWREG(171))
> +#define AV1_PROP_TABLE (AV1_SWREG(173))
> +#define AV1_MC_SYNC_CURR (AV1_SWREG(175))
> +#define AV1_MC_SYNC_LEFT (AV1_SWREG(177))
> +#define AV1_DB_DATA_COL (AV1_SWREG(179))
> +#define AV1_DB_CTRL_COL (AV1_SWREG(183))
> +#define AV1_PP_OUT_LU (AV1_SWREG(326))
> +#define AV1_PP_OUT_CH (AV1_SWREG(328))
> +
> +#endif /* _ROCKCHIP_VPU981_REGS_H_ */
Regards,
Hans