2019-08-27 06:21:31

by Aniket Masule

[permalink] [raw]
Subject: [PATCH 0/3] media: venus: Update clock scaling

In this patch series, clock scaling and core selection methods are
updated. Current clock scaling is same for vpu4 and previous versions.
Introducing load calculations using vpp cycles, which indicates the
cycles required by video hardware to process each macroblock. Also
adding vsp cycles, cycles require by stream processor. Clock scaling
is now done more precisely using vpp and vsp cycles.
Removing core selection from this series, there will be separate patch
once issue related to power domain is fixed.

This patch depends on the following patches:
https://lore.kernel.org/patchwork/patch/1114762/ - Venus interconnect support for sdm845
https://lore.kernel.org/patchwork/patch/1114761/ - Venus interconnect support for sdm845

Changes since v6:
- Removed core selection.
- Corrected frequency calculations.
- Removed instance lock used while iterating over buffers.

Changes since v5:
- Corrected load_per_core calculations.

Changes since v4:
- Added call to load_scale_clocks from venus_helper_vb2_buf_queue.
- Modified check to match core_id in core_selection.

Changes since v3:
- vsp_cycles and vpp_cyles are now unsigned long.
- Core number counting aligned with VIDC_CORE_ID_.
- Aligned hardware overload handling of scale_clocks_v4 with scale_clocks.
- Added bitrate based clock scaling patch in this patch series.
- Instance state check is now moved from scale_clocks to load_scale_clocks.

Aniket Masule (3):
media: venus: Add codec data table
media: venus: Update clock scaling
media: venus: Update to bitrate based clock scaling

drivers/media/platform/qcom/venus/core.c | 13 ++
drivers/media/platform/qcom/venus/core.h | 16 +++
drivers/media/platform/qcom/venus/helpers.c | 188 +++++++++++++++++++++++++---
drivers/media/platform/qcom/venus/helpers.h | 3 +-
drivers/media/platform/qcom/venus/vdec.c | 8 +-
drivers/media/platform/qcom/venus/venc.c | 4 +
6 files changed, 209 insertions(+), 23 deletions(-)

--
The Qualcomm Innovation Center, Inc. is a member of the Code Aurora Forum,
a Linux Foundation Collaborative Project


2019-08-27 06:21:39

by Aniket Masule

[permalink] [raw]
Subject: [PATCH 2/3] media: venus: Update clock scaling

Current clock scaling calculations are same for vpu4 and
previous versions. For vpu4, Clock scaling calculations
are updated with cycles/mb. This helps in getting precise
clock required

Signed-off-by: Aniket Masule <[email protected]>
---
drivers/media/platform/qcom/venus/helpers.c | 135 +++++++++++++++++++++++-----
drivers/media/platform/qcom/venus/helpers.h | 2 +-
drivers/media/platform/qcom/venus/vdec.c | 4 +-
3 files changed, 118 insertions(+), 23 deletions(-)

diff --git a/drivers/media/platform/qcom/venus/helpers.c b/drivers/media/platform/qcom/venus/helpers.c
index 71af237..4ed630b 100644
--- a/drivers/media/platform/qcom/venus/helpers.c
+++ b/drivers/media/platform/qcom/venus/helpers.c
@@ -448,12 +448,32 @@ static int load_scale_bw(struct venus_core *core)
return icc_set_bw(core->video_path, total_avg, total_peak);
}

-int venus_helper_load_scale_clocks(struct venus_core *core)
+static int set_clk_freq(struct venus_core *core, unsigned long freq)
{
+ struct clk *clk = core->clks[0];
+ int ret;
+
+ ret = clk_set_rate(clk, freq);
+ if (ret)
+ return ret;
+
+ ret = clk_set_rate(core->core0_clk, freq);
+ if (ret)
+ return ret;
+
+ ret = clk_set_rate(core->core1_clk, freq);
+ if (ret)
+ return ret;
+
+ return 0;
+}
+
+static int scale_clocks(struct venus_inst *inst)
+{
+ struct venus_core *core = inst->core;
const struct freq_tbl *table = core->res->freq_tbl;
unsigned int num_rows = core->res->freq_tbl_size;
unsigned long freq = table[0].freq;
- struct clk *clk = core->clks[0];
struct device *dev = core->dev;
u32 mbs_per_sec;
unsigned int i;
@@ -479,28 +499,103 @@ int venus_helper_load_scale_clocks(struct venus_core *core)

set_freq:

- ret = clk_set_rate(clk, freq);
- if (ret)
- goto err;
+ ret = set_clk_freq(core, freq);
+ if (ret) {
+ dev_err(dev, "failed to set clock rate %lu (%d)\n",
+ freq, ret);
+ return ret;
+ }

- ret = clk_set_rate(core->core0_clk, freq);
- if (ret)
- goto err;
+ ret = load_scale_bw(core);
+ if (ret) {
+ dev_err(dev, "failed to set bandwidth (%d)\n",
+ ret);
+ return ret;
+ }

- ret = clk_set_rate(core->core1_clk, freq);
- if (ret)
- goto err;
+ return 0;
+}
+
+static unsigned long calculate_vpp_freq(struct venus_inst *inst)
+{
+ unsigned long vpp_freq = 0;
+ u32 mbs_per_sec;
+
+ mbs_per_sec = load_per_instance(inst);
+ vpp_freq = mbs_per_sec * inst->clk_data.codec_freq_data->vpp_freq;
+ /* 21 / 20 is overhead factor */
+ vpp_freq += vpp_freq / 20;
+
+ return vpp_freq;
+}
+
+static int scale_clocks_v4(struct venus_inst *inst)
+{
+ struct venus_core *core = inst->core;
+ const struct freq_tbl *table = core->res->freq_tbl;
+ unsigned int num_rows = core->res->freq_tbl_size;
+ struct device *dev = core->dev;
+ unsigned int i;
+ unsigned long freq = 0, freq_core1 = 0, freq_core2 = 0;
+ int ret;
+
+ freq = calculate_vpp_freq(inst);
+ inst->clk_data.freq = freq;
+
+ mutex_lock(&core->lock);
+ list_for_each_entry(inst, &core->instances, list) {
+ if (inst->clk_data.core_id == VIDC_CORE_ID_1) {
+ freq_core1 += inst->clk_data.freq;
+ } else if (inst->clk_data.core_id == VIDC_CORE_ID_2) {
+ freq_core2 += inst->clk_data.freq;
+ } else if (inst->clk_data.core_id == VIDC_CORE_ID_3) {
+ freq_core1 += inst->clk_data.freq;
+ freq_core2 += inst->clk_data.freq;
+ }
+ }
+ mutex_unlock(&core->lock);
+
+ freq = max(freq_core1, freq_core2);
+
+ if (freq >= table[0].freq) {
+ freq = table[0].freq;
+ dev_warn(dev, "HW is overloaded, needed: %lu max: %lu\n",
+ freq, table[0].freq);
+ goto set_freq;
+ }
+
+ for (i = num_rows - 1 ; i >= 0; i--) {
+ if (freq <= table[i].freq) {
+ freq = table[i].freq;
+ break;
+ }
+ }
+
+set_freq:
+
+ ret = set_clk_freq(core, freq);
+ if (ret) {
+ dev_err(dev, "failed to set clock rate %lu (%d)\n",
+ freq, ret);
+ return ret;
+ }

ret = load_scale_bw(core);
- if (ret)
- goto err;
+ if (ret) {
+ dev_err(dev, "failed to set bandwidth (%d)\n",
+ ret);
+ return ret;
+ }

return 0;
+}

-err:
- dev_err(dev, "failed to set clock rate %lu or bandwidth (%d)\n",
- freq, ret);
- return ret;
+int venus_helper_load_scale_clocks(struct venus_inst *inst)
+{
+ if (IS_V4(inst->core))
+ return scale_clocks_v4(inst);
+
+ return scale_clocks(inst);
}
EXPORT_SYMBOL_GPL(venus_helper_load_scale_clocks);

@@ -874,6 +969,7 @@ int venus_helper_set_core_usage(struct venus_inst *inst, u32 usage)
const u32 ptype = HFI_PROPERTY_CONFIG_VIDEOCORES_USAGE;
struct hfi_videocores_usage_type cu;

+ inst->clk_data.core_id = usage;
if (!IS_V4(inst->core))
return 0;

@@ -1235,7 +1331,7 @@ void venus_helper_vb2_stop_streaming(struct vb2_queue *q)

venus_helper_free_dpb_bufs(inst);

- venus_helper_load_scale_clocks(core);
+ venus_helper_load_scale_clocks(inst);
INIT_LIST_HEAD(&inst->registeredbufs);
}

@@ -1288,7 +1384,6 @@ int venus_helper_process_initial_out_bufs(struct venus_inst *inst)

int venus_helper_vb2_start_streaming(struct venus_inst *inst)
{
- struct venus_core *core = inst->core;
int ret;

ret = venus_helper_intbufs_alloc(inst);
@@ -1299,7 +1394,7 @@ int venus_helper_vb2_start_streaming(struct venus_inst *inst)
if (ret)
goto err_bufs_free;

- venus_helper_load_scale_clocks(core);
+ venus_helper_load_scale_clocks(inst);

ret = hfi_session_load_res(inst);
if (ret)
diff --git a/drivers/media/platform/qcom/venus/helpers.h b/drivers/media/platform/qcom/venus/helpers.h
index ba44a28..934ac0f 100644
--- a/drivers/media/platform/qcom/venus/helpers.h
+++ b/drivers/media/platform/qcom/venus/helpers.h
@@ -61,7 +61,7 @@ int venus_helper_power_enable(struct venus_core *core, u32 session_type,
int venus_helper_intbufs_realloc(struct venus_inst *inst);
int venus_helper_queue_dpb_bufs(struct venus_inst *inst);
int venus_helper_unregister_bufs(struct venus_inst *inst);
-int venus_helper_load_scale_clocks(struct venus_core *core);
+int venus_helper_load_scale_clocks(struct venus_inst *inst);
int venus_helper_process_initial_cap_bufs(struct venus_inst *inst);
int venus_helper_process_initial_out_bufs(struct venus_inst *inst);
void venus_helper_get_ts_metadata(struct venus_inst *inst, u64 timestamp_us,
diff --git a/drivers/media/platform/qcom/venus/vdec.c b/drivers/media/platform/qcom/venus/vdec.c
index 3261657..73325ed 100644
--- a/drivers/media/platform/qcom/venus/vdec.c
+++ b/drivers/media/platform/qcom/venus/vdec.c
@@ -857,7 +857,7 @@ static int vdec_start_capture(struct venus_inst *inst)
if (ret)
goto free_dpb_bufs;

- venus_helper_load_scale_clocks(inst->core);
+ venus_helper_load_scale_clocks(inst);

ret = hfi_session_continue(inst);
if (ret)
@@ -1065,7 +1065,7 @@ static void vdec_session_release(struct venus_inst *inst)
hfi_session_abort(inst);

venus_helper_free_dpb_bufs(inst);
- venus_helper_load_scale_clocks(core);
+ venus_helper_load_scale_clocks(inst);
INIT_LIST_HEAD(&inst->registeredbufs);

mutex_unlock(&inst->lock);
--
The Qualcomm Innovation Center, Inc. is a member of the Code Aurora Forum,
a Linux Foundation Collaborative Project

2019-08-27 06:23:09

by Aniket Masule

[permalink] [raw]
Subject: [PATCH 1/3] media: venus: Add codec data table

Add vpp cycles for different types of codec.
It indicates the cycles required by video hardware
to process each macroblock. Add vsp cycles, cycles
required by stream processor. Initialize the codec
data with core resources.

Signed-off-by: Aniket Masule <[email protected]>
---
drivers/media/platform/qcom/venus/core.c | 13 +++++++++++++
drivers/media/platform/qcom/venus/core.h | 16 +++++++++++++++
drivers/media/platform/qcom/venus/helpers.c | 30 +++++++++++++++++++++++++++++
drivers/media/platform/qcom/venus/helpers.h | 1 +
drivers/media/platform/qcom/venus/vdec.c | 4 ++++
drivers/media/platform/qcom/venus/venc.c | 4 ++++
6 files changed, 68 insertions(+)

diff --git a/drivers/media/platform/qcom/venus/core.c b/drivers/media/platform/qcom/venus/core.c
index 19cbe9d..49d32b2 100644
--- a/drivers/media/platform/qcom/venus/core.c
+++ b/drivers/media/platform/qcom/venus/core.c
@@ -480,6 +480,17 @@ static __maybe_unused int venus_runtime_resume(struct device *dev)
{ 244800, 100000000 }, /* 1920x1080@30 */
};

+static struct codec_freq_data sdm845_codec_freq_data[] = {
+ { V4L2_PIX_FMT_H264, VIDC_SESSION_TYPE_ENC, 675, 10 },
+ { V4L2_PIX_FMT_HEVC, VIDC_SESSION_TYPE_ENC, 675, 10 },
+ { V4L2_PIX_FMT_VP8, VIDC_SESSION_TYPE_ENC, 675, 10 },
+ { V4L2_PIX_FMT_MPEG2, VIDC_SESSION_TYPE_DEC, 200, 10 },
+ { V4L2_PIX_FMT_H264, VIDC_SESSION_TYPE_DEC, 200, 10 },
+ { V4L2_PIX_FMT_HEVC, VIDC_SESSION_TYPE_DEC, 200, 10 },
+ { V4L2_PIX_FMT_VP8, VIDC_SESSION_TYPE_DEC, 200, 10 },
+ { V4L2_PIX_FMT_VP9, VIDC_SESSION_TYPE_DEC, 200, 10 },
+};
+
static const struct bw_tbl sdm845_bw_table_enc[] = {
{ 1944000, 1612000, 0, 2416000, 0 }, /* 3840x2160@60 */
{ 972000, 951000, 0, 1434000, 0 }, /* 3840x2160@30 */
@@ -501,6 +512,8 @@ static __maybe_unused int venus_runtime_resume(struct device *dev)
.bw_tbl_enc_size = ARRAY_SIZE(sdm845_bw_table_enc),
.bw_tbl_dec = sdm845_bw_table_dec,
.bw_tbl_dec_size = ARRAY_SIZE(sdm845_bw_table_dec),
+ .codec_freq_data = sdm845_codec_freq_data,
+ .codec_freq_data_size = ARRAY_SIZE(sdm845_codec_freq_data),
.clks = {"core", "iface", "bus" },
.clks_num = 3,
.max_load = 3110400, /* 4096x2160@90 */
diff --git a/drivers/media/platform/qcom/venus/core.h b/drivers/media/platform/qcom/venus/core.h
index 13e35f3..684a950 100644
--- a/drivers/media/platform/qcom/venus/core.h
+++ b/drivers/media/platform/qcom/venus/core.h
@@ -26,6 +26,13 @@ struct reg_val {
u32 value;
};

+struct codec_freq_data {
+ u32 pixfmt;
+ u32 session_type;
+ unsigned long vpp_freq;
+ unsigned long vsp_freq;
+};
+
struct bw_tbl {
u32 mbs_per_sec;
u32 avg;
@@ -44,6 +51,8 @@ struct venus_resources {
unsigned int bw_tbl_dec_size;
const struct reg_val *reg_tbl;
unsigned int reg_tbl_size;
+ const struct codec_freq_data *codec_freq_data;
+ unsigned int codec_freq_data_size;
const char * const clks[VIDC_CLKS_NUM_MAX];
unsigned int clks_num;
enum hfi_version hfi_version;
@@ -221,6 +230,12 @@ struct venus_buffer {
struct list_head ref_list;
};

+struct clock_data {
+ u32 core_id;
+ unsigned long freq;
+ const struct codec_freq_data *codec_freq_data;
+};
+
#define to_venus_buffer(ptr) container_of(ptr, struct venus_buffer, vb)

enum venus_dec_state {
@@ -301,6 +316,7 @@ struct venus_inst {
struct list_head list;
struct mutex lock;
struct venus_core *core;
+ struct clock_data clk_data;
struct list_head dpbbufs;
struct list_head internalbufs;
struct list_head registeredbufs;
diff --git a/drivers/media/platform/qcom/venus/helpers.c b/drivers/media/platform/qcom/venus/helpers.c
index 4320ea9..71af237 100644
--- a/drivers/media/platform/qcom/venus/helpers.c
+++ b/drivers/media/platform/qcom/venus/helpers.c
@@ -883,6 +883,36 @@ int venus_helper_set_core_usage(struct venus_inst *inst, u32 usage)
}
EXPORT_SYMBOL_GPL(venus_helper_set_core_usage);

+int venus_helper_init_codec_freq_data(struct venus_inst *inst)
+{
+ const struct codec_freq_data *data;
+ unsigned int i, data_size;
+ u32 pixfmt;
+ int ret = 0;
+
+ if (!IS_V4(inst->core))
+ return 0;
+
+ data = inst->core->res->codec_freq_data;
+ data_size = inst->core->res->codec_freq_data_size;
+ pixfmt = inst->session_type == VIDC_SESSION_TYPE_DEC ?
+ inst->fmt_out->pixfmt : inst->fmt_cap->pixfmt;
+
+ for (i = 0; i < data_size; i++) {
+ if (data[i].pixfmt == pixfmt &&
+ data[i].session_type == inst->session_type) {
+ inst->clk_data.codec_freq_data = &data[i];
+ break;
+ }
+ }
+
+ if (!inst->clk_data.codec_freq_data)
+ ret = -EINVAL;
+
+ return ret;
+}
+EXPORT_SYMBOL_GPL(venus_helper_init_codec_freq_data);
+
int venus_helper_set_num_bufs(struct venus_inst *inst, unsigned int input_bufs,
unsigned int output_bufs,
unsigned int output2_bufs)
diff --git a/drivers/media/platform/qcom/venus/helpers.h b/drivers/media/platform/qcom/venus/helpers.h
index 791c35c..ba44a28 100644
--- a/drivers/media/platform/qcom/venus/helpers.h
+++ b/drivers/media/platform/qcom/venus/helpers.h
@@ -34,6 +34,7 @@ int venus_helper_set_output_resolution(struct venus_inst *inst,
unsigned int width, unsigned int height,
u32 buftype);
int venus_helper_set_work_mode(struct venus_inst *inst, u32 mode);
+int venus_helper_init_codec_freq_data(struct venus_inst *inst);
int venus_helper_set_core_usage(struct venus_inst *inst, u32 usage);
int venus_helper_set_num_bufs(struct venus_inst *inst, unsigned int input_bufs,
unsigned int output_bufs,
diff --git a/drivers/media/platform/qcom/venus/vdec.c b/drivers/media/platform/qcom/venus/vdec.c
index 0b7d65d..3261657 100644
--- a/drivers/media/platform/qcom/venus/vdec.c
+++ b/drivers/media/platform/qcom/venus/vdec.c
@@ -674,6 +674,10 @@ static int vdec_session_init(struct venus_inst *inst)
if (ret)
goto deinit;

+ ret = venus_helper_init_codec_freq_data(inst);
+ if (ret)
+ goto deinit;
+
return 0;
deinit:
hfi_session_deinit(inst);
diff --git a/drivers/media/platform/qcom/venus/venc.c b/drivers/media/platform/qcom/venus/venc.c
index 1b7fb2d..501fb8c 100644
--- a/drivers/media/platform/qcom/venus/venc.c
+++ b/drivers/media/platform/qcom/venus/venc.c
@@ -842,6 +842,10 @@ static int venc_init_session(struct venus_inst *inst)
if (ret)
goto deinit;

+ ret = venus_helper_init_codec_freq_data(inst);
+ if (ret)
+ goto deinit;
+
ret = venc_set_properties(inst);
if (ret)
goto deinit;
--
The Qualcomm Innovation Center, Inc. is a member of the Code Aurora Forum,
a Linux Foundation Collaborative Project

2019-08-27 06:23:23

by Aniket Masule

[permalink] [raw]
Subject: [PATCH 3/3] media: venus: Update to bitrate based clock scaling

Introduced clock scaling using bitrate, preavious
calculations consider only the cycles per mb.
Also, clock scaling is now triggered before every
buffer being queued to the device. This helps in
deciding precise clock cycles required.

Signed-off-by: Aniket Masule <[email protected]>
---
drivers/media/platform/qcom/venus/helpers.c | 33 ++++++++++++++++++++++++-----
1 file changed, 28 insertions(+), 5 deletions(-)

diff --git a/drivers/media/platform/qcom/venus/helpers.c b/drivers/media/platform/qcom/venus/helpers.c
index 4ed630b..8fee0ef 100644
--- a/drivers/media/platform/qcom/venus/helpers.c
+++ b/drivers/media/platform/qcom/venus/helpers.c
@@ -516,17 +516,26 @@ static int scale_clocks(struct venus_inst *inst)
return 0;
}

-static unsigned long calculate_vpp_freq(struct venus_inst *inst)
+static unsigned long calculate_inst_freq(struct venus_inst *inst,
+ unsigned long filled_len)
{
- unsigned long vpp_freq = 0;
+ unsigned long vpp_freq = 0, vsp_freq = 0;
+ u64 fps = inst->fps;
u32 mbs_per_sec;

- mbs_per_sec = load_per_instance(inst);
+ mbs_per_sec = load_per_instance(inst) / inst->fps;
vpp_freq = mbs_per_sec * inst->clk_data.codec_freq_data->vpp_freq;
/* 21 / 20 is overhead factor */
vpp_freq += vpp_freq / 20;
+ vsp_freq = mbs_per_sec * inst->clk_data.codec_freq_data->vsp_freq;

- return vpp_freq;
+ /* 10 / 7 is overhead factor */
+ if (inst->session_type == VIDC_SESSION_TYPE_ENC)
+ vsp_freq += (inst->controls.enc.bitrate * 10) / 7;
+ else
+ vsp_freq += ((fps * filled_len * 8) * 10) / 7;
+
+ return max(vpp_freq, vsp_freq);
}

static int scale_clocks_v4(struct venus_inst *inst)
@@ -534,12 +543,24 @@ static int scale_clocks_v4(struct venus_inst *inst)
struct venus_core *core = inst->core;
const struct freq_tbl *table = core->res->freq_tbl;
unsigned int num_rows = core->res->freq_tbl_size;
+ struct v4l2_m2m_ctx *m2m_ctx = inst->m2m_ctx;
struct device *dev = core->dev;
unsigned int i;
unsigned long freq = 0, freq_core1 = 0, freq_core2 = 0;
+ unsigned long filled_len = 0;
+ struct venus_buffer *buf, *n;
+ struct vb2_buffer *vb;
int ret;

- freq = calculate_vpp_freq(inst);
+ v4l2_m2m_for_each_src_buf_safe(m2m_ctx, buf, n) {
+ vb = &buf->vb.vb2_buf;
+ filled_len = max(filled_len, vb2_get_plane_payload(vb, 0));
+ }
+
+ if (inst->session_type == VIDC_SESSION_TYPE_DEC && !filled_len)
+ return 0;
+
+ freq = calculate_inst_freq(inst, filled_len);
inst->clk_data.freq = freq;

mutex_lock(&core->lock);
@@ -701,6 +722,8 @@ void venus_helper_get_ts_metadata(struct venus_inst *inst, u64 timestamp_us,

if (inst->session_type == VIDC_SESSION_TYPE_DEC)
put_ts_metadata(inst, vbuf);
+
+ venus_helper_load_scale_clocks(inst);
} else if (type == V4L2_BUF_TYPE_VIDEO_CAPTURE_MPLANE) {
if (inst->session_type == VIDC_SESSION_TYPE_ENC)
fdata.buffer_type = HFI_BUFFER_OUTPUT;
--
The Qualcomm Innovation Center, Inc. is a member of the Code Aurora Forum,
a Linux Foundation Collaborative Project