In this patch series, clock scaling and core selection methods are
updated. Current clock scaling and core selection methods are same
for vpu4 and previous versions. Introducing load calculations using
vpp cycles, which indicates the cycles required by video hardware to
process each macroblock. Clock scaling is now done more precisely using
vpp cycles. Instance is assigned to core with minimum load, instead of
of static assignment
Changes since v1:
- Corrected VPP cycles entries in codec data table.
- Removed filled_len from arguments to calculate_inst_freq,
filled_len won't be used in this patch series.
filled_len will be used in clock scaling based on bitrate.
Aniket Masule (5):
media: venus: Add codec data table
media: venus: Initialize codec data
media: venus: Update clock scaling
media: venus: Add interface for load per core
media: venus: Update core selection
drivers/media/platform/qcom/venus/core.c | 13 ++
drivers/media/platform/qcom/venus/core.h | 15 ++
drivers/media/platform/qcom/venus/helpers.c | 186 +++++++++++++++++++++++--
drivers/media/platform/qcom/venus/helpers.h | 3 +-
drivers/media/platform/qcom/venus/hfi_helper.h | 1 +
drivers/media/platform/qcom/venus/hfi_parser.h | 5 +
drivers/media/platform/qcom/venus/vdec.c | 9 +-
drivers/media/platform/qcom/venus/venc.c | 8 +-
8 files changed, 226 insertions(+), 14 deletions(-)
--
The Qualcomm Innovation Center, Inc. is a member of the Code Aurora Forum,
a Linux Foundation Collaborative Project
Initialize the codec data with core resources.
Signed-off-by: Aniket Masule <[email protected]>
---
drivers/media/platform/qcom/venus/helpers.c | 30 +++++++++++++++++++++++++++++
drivers/media/platform/qcom/venus/helpers.h | 1 +
drivers/media/platform/qcom/venus/vdec.c | 4 ++++
drivers/media/platform/qcom/venus/venc.c | 4 ++++
4 files changed, 39 insertions(+)
diff --git a/drivers/media/platform/qcom/venus/helpers.c b/drivers/media/platform/qcom/venus/helpers.c
index 5cad601..f7f724b 100644
--- a/drivers/media/platform/qcom/venus/helpers.c
+++ b/drivers/media/platform/qcom/venus/helpers.c
@@ -715,6 +715,36 @@ int venus_helper_set_core_usage(struct venus_inst *inst, u32 usage)
}
EXPORT_SYMBOL_GPL(venus_helper_set_core_usage);
+int venus_helper_init_codec_data(struct venus_inst *inst)
+{
+ const struct codec_data *codec_data;
+ unsigned int i, codec_data_size;
+ u32 pixfmt;
+ int ret = 0;
+
+ if (!IS_V4(inst->core))
+ return 0;
+
+ codec_data = inst->core->res->codec_data;
+ codec_data_size = inst->core->res->codec_data_size;
+ pixfmt = inst->session_type == VIDC_SESSION_TYPE_DEC ?
+ inst->fmt_out->pixfmt : inst->fmt_cap->pixfmt;
+
+ for (i = 0; i < codec_data_size; i++) {
+ if (codec_data[i].pixfmt == pixfmt &&
+ codec_data[i].session_type == inst->session_type) {
+ inst->clk_data.codec_data = &codec_data[i];
+ break;
+ }
+ }
+
+ if (!inst->clk_data.codec_data)
+ ret = -EINVAL;
+
+ return ret;
+}
+EXPORT_SYMBOL_GPL(venus_helper_init_codec_data);
+
int venus_helper_set_num_bufs(struct venus_inst *inst, unsigned int input_bufs,
unsigned int output_bufs,
unsigned int output2_bufs)
diff --git a/drivers/media/platform/qcom/venus/helpers.h b/drivers/media/platform/qcom/venus/helpers.h
index 2475f284..f9360a8 100644
--- a/drivers/media/platform/qcom/venus/helpers.h
+++ b/drivers/media/platform/qcom/venus/helpers.h
@@ -41,6 +41,7 @@ int venus_helper_set_output_resolution(struct venus_inst *inst,
unsigned int width, unsigned int height,
u32 buftype);
int venus_helper_set_work_mode(struct venus_inst *inst, u32 mode);
+int venus_helper_init_codec_data(struct venus_inst *inst);
int venus_helper_set_core_usage(struct venus_inst *inst, u32 usage);
int venus_helper_set_num_bufs(struct venus_inst *inst, unsigned int input_bufs,
unsigned int output_bufs,
diff --git a/drivers/media/platform/qcom/venus/vdec.c b/drivers/media/platform/qcom/venus/vdec.c
index 282de21..51795fd 100644
--- a/drivers/media/platform/qcom/venus/vdec.c
+++ b/drivers/media/platform/qcom/venus/vdec.c
@@ -660,6 +660,10 @@ static int vdec_init_session(struct venus_inst *inst)
if (ret)
goto deinit;
+ ret = venus_helper_init_codec_data(inst);
+ if (ret)
+ goto deinit;
+
return 0;
deinit:
hfi_session_deinit(inst);
diff --git a/drivers/media/platform/qcom/venus/venc.c b/drivers/media/platform/qcom/venus/venc.c
index 32cff29..792cdce 100644
--- a/drivers/media/platform/qcom/venus/venc.c
+++ b/drivers/media/platform/qcom/venus/venc.c
@@ -847,6 +847,10 @@ static int venc_init_session(struct venus_inst *inst)
if (ret)
goto deinit;
+ ret = venus_helper_init_codec_data(inst);
+ if (ret)
+ goto deinit;
+
ret = venc_set_properties(inst);
if (ret)
goto deinit;
--
The Qualcomm Innovation Center, Inc. is a member of the Code Aurora Forum,
a Linux Foundation Collaborative Project
Add vpp cycles for for different types of codec
It indicates the cycles required by video hardware
to process each macroblock.
Signed-off-by: Aniket Masule <[email protected]>
---
drivers/media/platform/qcom/venus/core.c | 13 +++++++++++++
drivers/media/platform/qcom/venus/core.h | 15 +++++++++++++++
2 files changed, 28 insertions(+)
diff --git a/drivers/media/platform/qcom/venus/core.c b/drivers/media/platform/qcom/venus/core.c
index 7393667..43eb446 100644
--- a/drivers/media/platform/qcom/venus/core.c
+++ b/drivers/media/platform/qcom/venus/core.c
@@ -473,9 +473,22 @@ static __maybe_unused int venus_runtime_resume(struct device *dev)
{ 244800, 100000000 }, /* 1920x1080@30 */
};
+static struct codec_data sdm845_codec_data[] = {
+ { V4L2_PIX_FMT_H264, VIDC_SESSION_TYPE_ENC, 675 },
+ { V4L2_PIX_FMT_HEVC, VIDC_SESSION_TYPE_ENC, 675 },
+ { V4L2_PIX_FMT_VP8, VIDC_SESSION_TYPE_ENC, 675 },
+ { V4L2_PIX_FMT_MPEG2, VIDC_SESSION_TYPE_DEC, 200 },
+ { V4L2_PIX_FMT_H264, VIDC_SESSION_TYPE_DEC, 200 },
+ { V4L2_PIX_FMT_HEVC, VIDC_SESSION_TYPE_DEC, 200 },
+ { V4L2_PIX_FMT_VP8, VIDC_SESSION_TYPE_DEC, 200 },
+ { V4L2_PIX_FMT_VP9, VIDC_SESSION_TYPE_DEC, 200 },
+};
+
static const struct venus_resources sdm845_res = {
.freq_tbl = sdm845_freq_table,
.freq_tbl_size = ARRAY_SIZE(sdm845_freq_table),
+ .codec_data = sdm845_codec_data,
+ .codec_data_size = ARRAY_SIZE(sdm845_codec_data),
.clks = {"core", "iface", "bus" },
.clks_num = 3,
.max_load = 2563200,
diff --git a/drivers/media/platform/qcom/venus/core.h b/drivers/media/platform/qcom/venus/core.h
index 7a3feb5..b1a9b43 100644
--- a/drivers/media/platform/qcom/venus/core.h
+++ b/drivers/media/platform/qcom/venus/core.h
@@ -35,12 +35,20 @@ struct reg_val {
u32 value;
};
+struct codec_data {
+u32 pixfmt;
+u32 session_type;
+int vpp_cycles;
+};
+
struct venus_resources {
u64 dma_mask;
const struct freq_tbl *freq_tbl;
unsigned int freq_tbl_size;
const struct reg_val *reg_tbl;
unsigned int reg_tbl_size;
+ const struct codec_data *codec_data;
+ unsigned int codec_data_size;
const char * const clks[VIDC_CLKS_NUM_MAX];
unsigned int clks_num;
enum hfi_version hfi_version;
@@ -216,6 +224,12 @@ struct venus_buffer {
struct list_head ref_list;
};
+struct clock_data {
+ u32 core_id;
+ unsigned long freq;
+ struct codec_data *codec_data;
+};
+
#define to_venus_buffer(ptr) container_of(ptr, struct venus_buffer, vb)
/**
@@ -275,6 +289,7 @@ struct venus_inst {
struct list_head list;
struct mutex lock;
struct venus_core *core;
+ struct clock_data clk_data;
struct list_head dpbbufs;
struct list_head internalbufs;
struct list_head registeredbufs;
--
The Qualcomm Innovation Center, Inc. is a member of the Code Aurora Forum,
a Linux Foundation Collaborative Project
Current clock scaling calculations are same for vpu4 and
previous versions. For vpu4, Clock scaling calculations
are updated with cycles/mb. This helps in getting precise
clock required.
Signed-off-by: Aniket Masule <[email protected]>
---
drivers/media/platform/qcom/venus/helpers.c | 88 +++++++++++++++++++++++++++--
1 file changed, 84 insertions(+), 4 deletions(-)
diff --git a/drivers/media/platform/qcom/venus/helpers.c b/drivers/media/platform/qcom/venus/helpers.c
index f7f724b..7bcc1e6 100644
--- a/drivers/media/platform/qcom/venus/helpers.c
+++ b/drivers/media/platform/qcom/venus/helpers.c
@@ -348,8 +348,9 @@ static u32 load_per_type(struct venus_core *core, u32 session_type)
return mbs_per_sec;
}
-static int load_scale_clocks(struct venus_core *core)
+static int scale_clocks(struct venus_inst *inst)
{
+ struct venus_core *core = inst->core;
const struct freq_tbl *table = core->res->freq_tbl;
unsigned int num_rows = core->res->freq_tbl_size;
unsigned long freq = table[0].freq;
@@ -398,6 +399,86 @@ static int load_scale_clocks(struct venus_core *core)
return ret;
}
+static unsigned long calculate_inst_freq(struct venus_inst *inst)
+{
+ unsigned long vpp_cycles = 0;
+ u32 mbs_per_sec;
+
+ mbs_per_sec = load_per_instance(inst);
+ vpp_cycles = mbs_per_sec * inst->clk_data.codec_data->vpp_cycles;
+ /* 21 / 20 is overhead factor */
+ vpp_cycles += vpp_cycles / 20;
+
+ return vpp_cycles;
+}
+
+static int scale_clocks_vpu4(struct venus_inst *inst)
+{
+ struct venus_core *core = inst->core;
+ const struct freq_tbl *table = core->res->freq_tbl;
+ unsigned int num_rows = core->res->freq_tbl_size;
+
+ struct clk *clk = core->clks[0];
+ struct device *dev = core->dev;
+ unsigned int i;
+ unsigned long freq = 0, freq_core0 = 0, freq_core1 = 0;
+ int ret;
+
+ freq = calculate_inst_freq(inst);
+
+ if (freq > table[0].freq)
+ goto err;
+
+ for (i = 0; i < num_rows; i++) {
+ if (freq > table[i].freq)
+ break;
+ freq = table[i].freq;
+ }
+
+ inst->clk_data.freq = freq;
+
+ mutex_lock(&core->lock);
+ list_for_each_entry(inst, &core->instances, list) {
+ if (inst->clk_data.core_id == VIDC_CORE_ID_1) {
+ freq_core0 += inst->clk_data.freq;
+ } else if (inst->clk_data.core_id == VIDC_CORE_ID_2) {
+ freq_core1 += inst->clk_data.freq;
+ } else if (inst->clk_data.core_id == VIDC_CORE_ID_3) {
+ freq_core0 += inst->clk_data.freq;
+ freq_core1 += inst->clk_data.freq;
+ }
+ }
+ mutex_unlock(&core->lock);
+
+ freq = max(freq_core0, freq_core1);
+
+ ret = clk_set_rate(clk, freq);
+ if (ret)
+ goto err;
+
+ ret = clk_set_rate(core->core0_clk, freq);
+ if (ret)
+ goto err;
+
+ ret = clk_set_rate(core->core1_clk, freq);
+ if (ret)
+ goto err;
+
+ return 0;
+
+err:
+ dev_err(dev, "failed to set clock rate %lu (%d)\n", freq, ret);
+ return ret;
+}
+
+static int load_scale_clocks(struct venus_inst *inst)
+{
+ if (IS_V3(inst->core) || IS_V1(inst->core))
+ return scale_clocks(inst);
+ else
+ return scale_clocks_vpu4(inst);
+}
+
static void fill_buffer_desc(const struct venus_buffer *buf,
struct hfi_buffer_desc *bd, bool response)
{
@@ -1053,7 +1134,7 @@ void venus_helper_vb2_stop_streaming(struct vb2_queue *q)
venus_helper_free_dpb_bufs(inst);
- load_scale_clocks(core);
+ load_scale_clocks(inst);
INIT_LIST_HEAD(&inst->registeredbufs);
}
@@ -1070,7 +1151,6 @@ void venus_helper_vb2_stop_streaming(struct vb2_queue *q)
int venus_helper_vb2_start_streaming(struct venus_inst *inst)
{
- struct venus_core *core = inst->core;
int ret;
ret = intbufs_alloc(inst);
@@ -1081,7 +1161,7 @@ int venus_helper_vb2_start_streaming(struct venus_inst *inst)
if (ret)
goto err_bufs_free;
- load_scale_clocks(core);
+ load_scale_clocks(inst);
ret = hfi_session_load_res(inst);
if (ret)
--
The Qualcomm Innovation Center, Inc. is a member of the Code Aurora Forum,
a Linux Foundation Collaborative Project
Present core assignment is static. Introduced load balancing
across the cores. Load on earch core is calculated and core
with minimum load is assigned to given instance.
Signed-off-by: Aniket Masule <[email protected]>
---
drivers/media/platform/qcom/venus/helpers.c | 50 +++++++++++++++++++++++++----
drivers/media/platform/qcom/venus/helpers.h | 2 +-
drivers/media/platform/qcom/venus/vdec.c | 5 +--
drivers/media/platform/qcom/venus/venc.c | 4 ++-
4 files changed, 51 insertions(+), 10 deletions(-)
diff --git a/drivers/media/platform/qcom/venus/helpers.c b/drivers/media/platform/qcom/venus/helpers.c
index edb653e..38d617b 100644
--- a/drivers/media/platform/qcom/venus/helpers.c
+++ b/drivers/media/platform/qcom/venus/helpers.c
@@ -497,6 +497,16 @@ static int load_scale_clocks(struct venus_inst *inst)
return scale_clocks_vpu4(inst);
}
+int set_core_usage(struct venus_inst *inst, u32 usage)
+{
+ const u32 ptype = HFI_PROPERTY_CONFIG_VIDEOCORES_USAGE;
+ struct hfi_videocores_usage_type cu;
+
+ cu.video_core_enable_mask = usage;
+
+ return hfi_session_set_property(inst, ptype, &cu);
+}
+
static void fill_buffer_desc(const struct venus_buffer *buf,
struct hfi_buffer_desc *bd, bool response)
{
@@ -800,19 +810,47 @@ int venus_helper_set_work_mode(struct venus_inst *inst, u32 mode)
}
EXPORT_SYMBOL_GPL(venus_helper_set_work_mode);
-int venus_helper_set_core_usage(struct venus_inst *inst, u32 usage)
+int venus_helper_decide_core(struct venus_inst *inst, u32 cores_max)
{
- const u32 ptype = HFI_PROPERTY_CONFIG_VIDEOCORES_USAGE;
- struct hfi_videocores_usage_type cu;
+ struct venus_core *core = inst->core;
+ u32 min_core_id = 0, core0_load = 0, core1_load = 0;
+ unsigned long min_load, max_freq, cur_inst_load;
+ int ret;
if (!IS_V4(inst->core))
return 0;
- cu.video_core_enable_mask = usage;
+ core0_load = load_per_core(core, VIDC_CORE_ID_1);
+ core1_load = load_per_core(core, VIDC_CORE_ID_2);
- return hfi_session_set_property(inst, ptype, &cu);
+ min_core_id = core0_load < core1_load ? VIDC_CORE_ID_1 : VIDC_CORE_ID_2;
+ min_load = min(core0_load, core1_load);
+
+ if (cores_max < VIDC_CORE_ID_1) {
+ min_core_id = VIDC_CORE_ID_1;
+ min_load = core0_load;
+ }
+
+ cur_inst_load = load_per_instance(inst) *
+ inst->clk_data.codec_data->vpp_cycles;
+ max_freq = core->res->freq_tbl[0].freq;
+
+ if ((cur_inst_load + min_load) > max_freq) {
+ dev_warn(core->dev, "HW is overloaded, needed: %lu max: %lu\n",
+ cur_inst_load, max_freq);
+ return -EINVAL;
+ }
+
+ ret = set_core_usage(inst, min_core_id);
+
+ if (ret)
+ return ret;
+
+ inst->clk_data.core_id = min_core_id;
+
+ return 0;
}
-EXPORT_SYMBOL_GPL(venus_helper_set_core_usage);
+EXPORT_SYMBOL_GPL(venus_helper_decide_core);
int venus_helper_init_codec_data(struct venus_inst *inst)
{
diff --git a/drivers/media/platform/qcom/venus/helpers.h b/drivers/media/platform/qcom/venus/helpers.h
index f9360a8..c41ceb3 100644
--- a/drivers/media/platform/qcom/venus/helpers.h
+++ b/drivers/media/platform/qcom/venus/helpers.h
@@ -42,7 +42,7 @@ int venus_helper_set_output_resolution(struct venus_inst *inst,
u32 buftype);
int venus_helper_set_work_mode(struct venus_inst *inst, u32 mode);
int venus_helper_init_codec_data(struct venus_inst *inst);
-int venus_helper_set_core_usage(struct venus_inst *inst, u32 usage);
+int venus_helper_decide_core(struct venus_inst *inst, u32 cores_max);
int venus_helper_set_num_bufs(struct venus_inst *inst, unsigned int input_bufs,
unsigned int output_bufs,
unsigned int output2_bufs);
diff --git a/drivers/media/platform/qcom/venus/vdec.c b/drivers/media/platform/qcom/venus/vdec.c
index 51795fd..9f988ba 100644
--- a/drivers/media/platform/qcom/venus/vdec.c
+++ b/drivers/media/platform/qcom/venus/vdec.c
@@ -544,14 +544,15 @@ static int vdec_output_conf(struct venus_inst *inst)
u32 height = inst->out_height;
u32 out_fmt, out2_fmt;
bool ubwc = false;
- u32 ptype;
+ u32 ptype, cores_max;
int ret;
ret = venus_helper_set_work_mode(inst, VIDC_WORK_MODE_2);
if (ret)
return ret;
- ret = venus_helper_set_core_usage(inst, VIDC_CORE_ID_1);
+ cores_max = core_num_max(inst);
+ ret = venus_helper_decide_core(inst, cores_max);
if (ret)
return ret;
diff --git a/drivers/media/platform/qcom/venus/venc.c b/drivers/media/platform/qcom/venus/venc.c
index 792cdce..ed39efd 100644
--- a/drivers/media/platform/qcom/venus/venc.c
+++ b/drivers/media/platform/qcom/venus/venc.c
@@ -654,13 +654,15 @@ static int venc_set_properties(struct venus_inst *inst)
struct hfi_quantization quant;
struct hfi_quantization_range quant_range;
u32 ptype, rate_control, bitrate, profile = 0, level = 0;
+ u32 cores_max;
int ret;
ret = venus_helper_set_work_mode(inst, VIDC_WORK_MODE_2);
if (ret)
return ret;
- ret = venus_helper_set_core_usage(inst, VIDC_CORE_ID_2);
+ cores_max = core_num_max(inst);
+ ret = venus_helper_decide_core(inst, cores_max);
if (ret)
return ret;
--
The Qualcomm Innovation Center, Inc. is a member of the Code Aurora Forum,
a Linux Foundation Collaborative Project
Add and interface to calculate load per core. Also,
add an interface to get maximum cores available with
video. This interface is preparation for updating core
selection.
Signed-off-by: Aniket Masule <[email protected]>
---
drivers/media/platform/qcom/venus/helpers.c | 18 ++++++++++++++++++
drivers/media/platform/qcom/venus/hfi_helper.h | 1 +
drivers/media/platform/qcom/venus/hfi_parser.h | 5 +++++
3 files changed, 24 insertions(+)
diff --git a/drivers/media/platform/qcom/venus/helpers.c b/drivers/media/platform/qcom/venus/helpers.c
index 7bcc1e6..edb653e 100644
--- a/drivers/media/platform/qcom/venus/helpers.c
+++ b/drivers/media/platform/qcom/venus/helpers.c
@@ -331,6 +331,24 @@ static u32 load_per_instance(struct venus_inst *inst)
return mbs * inst->fps;
}
+static u32 load_per_core(struct venus_core *core, u32 core_id)
+{
+ struct venus_inst *inst = NULL;
+ u32 mbs_per_sec = 0, load = 0;
+
+ mutex_lock(&core->lock);
+ list_for_each_entry(inst, &core->instances, list) {
+ if (!(inst->clk_data.core_id == core_id))
+ continue;
+
+ mbs_per_sec += load_per_instance(inst);
+ load += mbs_per_sec * inst->clk_data.codec_data->vpp_cycles;
+ }
+ mutex_unlock(&core->lock);
+
+ return load;
+}
+
static u32 load_per_type(struct venus_core *core, u32 session_type)
{
struct venus_inst *inst = NULL;
diff --git a/drivers/media/platform/qcom/venus/hfi_helper.h b/drivers/media/platform/qcom/venus/hfi_helper.h
index 34ea503..3677e2e 100644
--- a/drivers/media/platform/qcom/venus/hfi_helper.h
+++ b/drivers/media/platform/qcom/venus/hfi_helper.h
@@ -559,6 +559,7 @@ struct hfi_bitrate {
#define HFI_CAPABILITY_LCU_SIZE 0x14
#define HFI_CAPABILITY_HIER_P_HYBRID_NUM_ENH_LAYERS 0x15
#define HFI_CAPABILITY_MBS_PER_SECOND_POWERSAVE 0x16
+#define HFI_CAPABILITY_MAX_VIDEOCORES 0x2B
struct hfi_capability {
u32 capability_type;
diff --git a/drivers/media/platform/qcom/venus/hfi_parser.h b/drivers/media/platform/qcom/venus/hfi_parser.h
index 3e931c7..264e6dd 100644
--- a/drivers/media/platform/qcom/venus/hfi_parser.h
+++ b/drivers/media/platform/qcom/venus/hfi_parser.h
@@ -107,4 +107,9 @@ static inline u32 frate_step(struct venus_inst *inst)
return cap_step(inst, HFI_CAPABILITY_FRAMERATE);
}
+static inline u32 core_num_max(struct venus_inst *inst)
+{
+ return cap_max(inst, HFI_CAPABILITY_MAX_VIDEOCORES);
+}
+
#endif
--
The Qualcomm Innovation Center, Inc. is a member of the Code Aurora Forum,
a Linux Foundation Collaborative Project
Hi Aniket,
On 6/11/19 9:05 AM, Aniket Masule wrote:
> Add vpp cycles for for different types of codec
> It indicates the cycles required by video hardware
> to process each macroblock.
>
> Signed-off-by: Aniket Masule <[email protected]>
> ---
> drivers/media/platform/qcom/venus/core.c | 13 +++++++++++++
> drivers/media/platform/qcom/venus/core.h | 15 +++++++++++++++
> 2 files changed, 28 insertions(+)
>
> diff --git a/drivers/media/platform/qcom/venus/core.c b/drivers/media/platform/qcom/venus/core.c
> index 7393667..43eb446 100644
> --- a/drivers/media/platform/qcom/venus/core.c
> +++ b/drivers/media/platform/qcom/venus/core.c
> @@ -473,9 +473,22 @@ static __maybe_unused int venus_runtime_resume(struct device *dev)
> { 244800, 100000000 }, /* 1920x1080@30 */
> };
>
> +static struct codec_data sdm845_codec_data[] = {
> + { V4L2_PIX_FMT_H264, VIDC_SESSION_TYPE_ENC, 675 },
> + { V4L2_PIX_FMT_HEVC, VIDC_SESSION_TYPE_ENC, 675 },
> + { V4L2_PIX_FMT_VP8, VIDC_SESSION_TYPE_ENC, 675 },
> + { V4L2_PIX_FMT_MPEG2, VIDC_SESSION_TYPE_DEC, 200 },
> + { V4L2_PIX_FMT_H264, VIDC_SESSION_TYPE_DEC, 200 },
> + { V4L2_PIX_FMT_HEVC, VIDC_SESSION_TYPE_DEC, 200 },
> + { V4L2_PIX_FMT_VP8, VIDC_SESSION_TYPE_DEC, 200 },
> + { V4L2_PIX_FMT_VP9, VIDC_SESSION_TYPE_DEC, 200 },
> +};
> +
> static const struct venus_resources sdm845_res = {
> .freq_tbl = sdm845_freq_table,
> .freq_tbl_size = ARRAY_SIZE(sdm845_freq_table),
> + .codec_data = sdm845_codec_data,
> + .codec_data_size = ARRAY_SIZE(sdm845_codec_data),
> .clks = {"core", "iface", "bus" },
> .clks_num = 3,
> .max_load = 2563200,
> diff --git a/drivers/media/platform/qcom/venus/core.h b/drivers/media/platform/qcom/venus/core.h
> index 7a3feb5..b1a9b43 100644
> --- a/drivers/media/platform/qcom/venus/core.h
> +++ b/drivers/media/platform/qcom/venus/core.h
> @@ -35,12 +35,20 @@ struct reg_val {
> u32 value;
> };
>
> +struct codec_data {
The name is very generic, could you rename the structure to something
like vpp_cycles_data?
> +u32 pixfmt;
> +u32 session_type;
> +int vpp_cycles;
please check your editor, those fields should have a tab to the right.
> +};
> +
> struct venus_resources {
> u64 dma_mask;
> const struct freq_tbl *freq_tbl;
> unsigned int freq_tbl_size;
> const struct reg_val *reg_tbl;
> unsigned int reg_tbl_size;
> + const struct codec_data *codec_data;
> + unsigned int codec_data_size;
> const char * const clks[VIDC_CLKS_NUM_MAX];
> unsigned int clks_num;
> enum hfi_version hfi_version;
> @@ -216,6 +224,12 @@ struct venus_buffer {
> struct list_head ref_list;
> };
>
> +struct clock_data {
> + u32 core_id;
> + unsigned long freq;
I cannot see how this 'freq' structure field is used? I can see that you
fill it in 3/5 patch but you don't used nowhere.
> + struct codec_data *codec_data;
> +};
Having the fact that freq field seems not needed can we just merge the
fields in venus_inst structure?
> +
> #define to_venus_buffer(ptr) container_of(ptr, struct venus_buffer, vb)
>
> /**
> @@ -275,6 +289,7 @@ struct venus_inst {
> struct list_head list;
> struct mutex lock;
> struct venus_core *core;
> + struct clock_data clk_data;
> struct list_head dpbbufs;
> struct list_head internalbufs;
> struct list_head registeredbufs;
>
--
regards,
Stan
Hi Aniket,
On 6/11/19 9:05 AM, Aniket Masule wrote:
> Initialize the codec data with core resources.
Please squash this patch in 1/5 patch.
>
> Signed-off-by: Aniket Masule <[email protected]>
> ---
> drivers/media/platform/qcom/venus/helpers.c | 30 +++++++++++++++++++++++++++++
> drivers/media/platform/qcom/venus/helpers.h | 1 +
> drivers/media/platform/qcom/venus/vdec.c | 4 ++++
> drivers/media/platform/qcom/venus/venc.c | 4 ++++
> 4 files changed, 39 insertions(+)
>
> diff --git a/drivers/media/platform/qcom/venus/helpers.c b/drivers/media/platform/qcom/venus/helpers.c
> index 5cad601..f7f724b 100644
> --- a/drivers/media/platform/qcom/venus/helpers.c
> +++ b/drivers/media/platform/qcom/venus/helpers.c
> @@ -715,6 +715,36 @@ int venus_helper_set_core_usage(struct venus_inst *inst, u32 usage)
> }
> EXPORT_SYMBOL_GPL(venus_helper_set_core_usage);
>
> +int venus_helper_init_codec_data(struct venus_inst *inst)
> +{
> + const struct codec_data *codec_data;
> + unsigned int i, codec_data_size;
> + u32 pixfmt;
> + int ret = 0;
> +
> + if (!IS_V4(inst->core))
> + return 0;
> +
> + codec_data = inst->core->res->codec_data;
> + codec_data_size = inst->core->res->codec_data_size;
> + pixfmt = inst->session_type == VIDC_SESSION_TYPE_DEC ?
> + inst->fmt_out->pixfmt : inst->fmt_cap->pixfmt;
> +
> + for (i = 0; i < codec_data_size; i++) {
> + if (codec_data[i].pixfmt == pixfmt &&
> + codec_data[i].session_type == inst->session_type) {
> + inst->clk_data.codec_data = &codec_data[i];
> + break;
> + }
> + }
> +
> + if (!inst->clk_data.codec_data)
> + ret = -EINVAL;
just return -EINVAL
> +
> + return ret;
return 0 is enough, and that will avoid ret variable.
> +}
> +EXPORT_SYMBOL_GPL(venus_helper_init_codec_data);
> +
> int venus_helper_set_num_bufs(struct venus_inst *inst, unsigned int input_bufs,
> unsigned int output_bufs,
> unsigned int output2_bufs)
> diff --git a/drivers/media/platform/qcom/venus/helpers.h b/drivers/media/platform/qcom/venus/helpers.h
> index 2475f284..f9360a8 100644
> --- a/drivers/media/platform/qcom/venus/helpers.h
> +++ b/drivers/media/platform/qcom/venus/helpers.h
> @@ -41,6 +41,7 @@ int venus_helper_set_output_resolution(struct venus_inst *inst,
> unsigned int width, unsigned int height,
> u32 buftype);
> int venus_helper_set_work_mode(struct venus_inst *inst, u32 mode);
> +int venus_helper_init_codec_data(struct venus_inst *inst);
> int venus_helper_set_core_usage(struct venus_inst *inst, u32 usage);
> int venus_helper_set_num_bufs(struct venus_inst *inst, unsigned int input_bufs,
> unsigned int output_bufs,
> diff --git a/drivers/media/platform/qcom/venus/vdec.c b/drivers/media/platform/qcom/venus/vdec.c
> index 282de21..51795fd 100644
> --- a/drivers/media/platform/qcom/venus/vdec.c
> +++ b/drivers/media/platform/qcom/venus/vdec.c
> @@ -660,6 +660,10 @@ static int vdec_init_session(struct venus_inst *inst)
> if (ret)
> goto deinit;
>
> + ret = venus_helper_init_codec_data(inst);
> + if (ret)
> + goto deinit;
> +
> return 0;
> deinit:
> hfi_session_deinit(inst);
> diff --git a/drivers/media/platform/qcom/venus/venc.c b/drivers/media/platform/qcom/venus/venc.c
> index 32cff29..792cdce 100644
> --- a/drivers/media/platform/qcom/venus/venc.c
> +++ b/drivers/media/platform/qcom/venus/venc.c
> @@ -847,6 +847,10 @@ static int venc_init_session(struct venus_inst *inst)
> if (ret)
> goto deinit;
>
> + ret = venus_helper_init_codec_data(inst);
> + if (ret)
> + goto deinit;
> +
> ret = venc_set_properties(inst);
> if (ret)
> goto deinit;
>
--
regards,
Stan
Hi Aniket,
On 6/11/19 9:05 AM, Aniket Masule wrote:
> Current clock scaling calculations are same for vpu4 and
> previous versions. For vpu4, Clock scaling calculations
> are updated with cycles/mb. This helps in getting precise
> clock required.
>
> Signed-off-by: Aniket Masule <[email protected]>
> ---
> drivers/media/platform/qcom/venus/helpers.c | 88 +++++++++++++++++++++++++++--
> 1 file changed, 84 insertions(+), 4 deletions(-)
>
> diff --git a/drivers/media/platform/qcom/venus/helpers.c b/drivers/media/platform/qcom/venus/helpers.c
> index f7f724b..7bcc1e6 100644
> --- a/drivers/media/platform/qcom/venus/helpers.c
> +++ b/drivers/media/platform/qcom/venus/helpers.c
> @@ -348,8 +348,9 @@ static u32 load_per_type(struct venus_core *core, u32 session_type)
> return mbs_per_sec;
> }
>
> -static int load_scale_clocks(struct venus_core *core)
> +static int scale_clocks(struct venus_inst *inst)
> {
> + struct venus_core *core = inst->core;
> const struct freq_tbl *table = core->res->freq_tbl;
> unsigned int num_rows = core->res->freq_tbl_size;
> unsigned long freq = table[0].freq;
> @@ -398,6 +399,86 @@ static int load_scale_clocks(struct venus_core *core)
> return ret;
> }
>
> +static unsigned long calculate_inst_freq(struct venus_inst *inst)
> +{
> + unsigned long vpp_cycles = 0;
> + u32 mbs_per_sec;
> +
> + mbs_per_sec = load_per_instance(inst);
> + vpp_cycles = mbs_per_sec * inst->clk_data.codec_data->vpp_cycles;
> + /* 21 / 20 is overhead factor */
> + vpp_cycles += vpp_cycles / 20;
shouldn't you multiply by 21?
> +
> + return vpp_cycles;
It is not clear to me is that vpp_cycles or frequency (rate)? I just
lost in dimensions used here.
If you return vpp_cycles could you rename the function name?
> +}
> +
> +static int scale_clocks_vpu4(struct venus_inst *inst)
does vpu4 equivalent to HFI_VERSION_4XX? If so could you rename function
to scale_clocks_v4.
> +{
> + struct venus_core *core = inst->core;
> + const struct freq_tbl *table = core->res->freq_tbl;
> + unsigned int num_rows = core->res->freq_tbl_size;
> +
> + struct clk *clk = core->clks[0];
> + struct device *dev = core->dev;
> + unsigned int i;
> + unsigned long freq = 0, freq_core0 = 0, freq_core1 = 0;
> + int ret;
> +
> + freq = calculate_inst_freq(inst);
> +
> + if (freq > table[0].freq)
> + goto err;
> +
> + for (i = 0; i < num_rows; i++) {
> + if (freq > table[i].freq)
> + break;
> + freq = table[i].freq;
> + }
> +
> + inst->clk_data.freq = freq;
> +
> + mutex_lock(&core->lock);
> + list_for_each_entry(inst, &core->instances, list) {
> + if (inst->clk_data.core_id == VIDC_CORE_ID_1) {
> + freq_core0 += inst->clk_data.freq;
> + } else if (inst->clk_data.core_id == VIDC_CORE_ID_2) {
> + freq_core1 += inst->clk_data.freq;
> + } else if (inst->clk_data.core_id == VIDC_CORE_ID_3) {
> + freq_core0 += inst->clk_data.freq;
> + freq_core1 += inst->clk_data.freq;
> + }
> + }
> + mutex_unlock(&core->lock);
> +
> + freq = max(freq_core0, freq_core1);
hmm, this doesn't look right. core0 and core1 frequencies can be
different why you get the bigger and set it on both?
> +
> + ret = clk_set_rate(clk, freq);
> + if (ret)
> + goto err;
> +
> + ret = clk_set_rate(core->core0_clk, freq);
IMO this should set freq_core0
> + if (ret)
> + goto err;
> +
> + ret = clk_set_rate(core->core1_clk, freq);
set freq_core1
> + if (ret)
> + goto err;
> +
> + return 0;
> +
> +err:
> + dev_err(dev, "failed to set clock rate %lu (%d)\n", freq, ret);
> + return ret;
> +}
> +
> +static int load_scale_clocks(struct venus_inst *inst)
> +{
> + if (IS_V3(inst->core) || IS_V1(inst->core))
> + return scale_clocks(inst);
> + else
> + return scale_clocks_vpu4(inst);
could you reorder this to:
if (IS_V4())
return scale_clocks_v4(inst);
return scale_clocks(inst);
> +}
> +
> static void fill_buffer_desc(const struct venus_buffer *buf,
> struct hfi_buffer_desc *bd, bool response)
> {
> @@ -1053,7 +1134,7 @@ void venus_helper_vb2_stop_streaming(struct vb2_queue *q)
>
> venus_helper_free_dpb_bufs(inst);
>
> - load_scale_clocks(core);
> + load_scale_clocks(inst);
> INIT_LIST_HEAD(&inst->registeredbufs);
> }
>
> @@ -1070,7 +1151,6 @@ void venus_helper_vb2_stop_streaming(struct vb2_queue *q)
>
> int venus_helper_vb2_start_streaming(struct venus_inst *inst)
> {
> - struct venus_core *core = inst->core;
> int ret;
>
> ret = intbufs_alloc(inst);
> @@ -1081,7 +1161,7 @@ int venus_helper_vb2_start_streaming(struct venus_inst *inst)
> if (ret)
> goto err_bufs_free;
>
> - load_scale_clocks(core);
> + load_scale_clocks(inst);
>
> ret = hfi_session_load_res(inst);
> if (ret)
>
--
regards,
Stan
Hi Aniket,
On 6/11/19 9:05 AM, Aniket Masule wrote:
> Add and interface to calculate load per core. Also,
> add an interface to get maximum cores available with
> video. This interface is preparation for updating core
> selection.
>
> Signed-off-by: Aniket Masule <[email protected]>
> ---
> drivers/media/platform/qcom/venus/helpers.c | 18 ++++++++++++++++++
> drivers/media/platform/qcom/venus/hfi_helper.h | 1 +
> drivers/media/platform/qcom/venus/hfi_parser.h | 5 +++++
> 3 files changed, 24 insertions(+)
>
> diff --git a/drivers/media/platform/qcom/venus/helpers.c b/drivers/media/platform/qcom/venus/helpers.c
> index 7bcc1e6..edb653e 100644
> --- a/drivers/media/platform/qcom/venus/helpers.c
> +++ b/drivers/media/platform/qcom/venus/helpers.c
> @@ -331,6 +331,24 @@ static u32 load_per_instance(struct venus_inst *inst)
> return mbs * inst->fps;
> }
>
> +static u32 load_per_core(struct venus_core *core, u32 core_id)
> +{
> + struct venus_inst *inst = NULL;
> + u32 mbs_per_sec = 0, load = 0;
> +
> + mutex_lock(&core->lock);
> + list_for_each_entry(inst, &core->instances, list) {
> + if (!(inst->clk_data.core_id == core_id))
> + continue;
> +
> + mbs_per_sec += load_per_instance(inst);
> + load += mbs_per_sec * inst->clk_data.codec_data->vpp_cycles;
> + }
> + mutex_unlock(&core->lock);
> +
> + return load;
> +}
> +
> static u32 load_per_type(struct venus_core *core, u32 session_type)
> {
> struct venus_inst *inst = NULL;
> diff --git a/drivers/media/platform/qcom/venus/hfi_helper.h b/drivers/media/platform/qcom/venus/hfi_helper.h
> index 34ea503..3677e2e 100644
> --- a/drivers/media/platform/qcom/venus/hfi_helper.h
> +++ b/drivers/media/platform/qcom/venus/hfi_helper.h
> @@ -559,6 +559,7 @@ struct hfi_bitrate {
> #define HFI_CAPABILITY_LCU_SIZE 0x14
> #define HFI_CAPABILITY_HIER_P_HYBRID_NUM_ENH_LAYERS 0x15
> #define HFI_CAPABILITY_MBS_PER_SECOND_POWERSAVE 0x16
> +#define HFI_CAPABILITY_MAX_VIDEOCORES 0x2B
please use tabs instead of spaces.
>
> struct hfi_capability {
> u32 capability_type;
> diff --git a/drivers/media/platform/qcom/venus/hfi_parser.h b/drivers/media/platform/qcom/venus/hfi_parser.h
> index 3e931c7..264e6dd 100644
> --- a/drivers/media/platform/qcom/venus/hfi_parser.h
> +++ b/drivers/media/platform/qcom/venus/hfi_parser.h
> @@ -107,4 +107,9 @@ static inline u32 frate_step(struct venus_inst *inst)
> return cap_step(inst, HFI_CAPABILITY_FRAMERATE);
> }
>
> +static inline u32 core_num_max(struct venus_inst *inst)
> +{
> + return cap_max(inst, HFI_CAPABILITY_MAX_VIDEOCORES);
> +}
> +
> #endif
>
--
regards,
Stan
Hi Aniket,
On 6/11/19 9:05 AM, Aniket Masule wrote:
> Present core assignment is static. Introduced load balancing
> across the cores. Load on earch core is calculated and core
> with minimum load is assigned to given instance.
>
> Signed-off-by: Aniket Masule <[email protected]>
> ---
> drivers/media/platform/qcom/venus/helpers.c | 50 +++++++++++++++++++++++++----
> drivers/media/platform/qcom/venus/helpers.h | 2 +-
> drivers/media/platform/qcom/venus/vdec.c | 5 +--
> drivers/media/platform/qcom/venus/venc.c | 4 ++-
> 4 files changed, 51 insertions(+), 10 deletions(-)
>
> diff --git a/drivers/media/platform/qcom/venus/helpers.c b/drivers/media/platform/qcom/venus/helpers.c
> index edb653e..38d617b 100644
> --- a/drivers/media/platform/qcom/venus/helpers.c
> +++ b/drivers/media/platform/qcom/venus/helpers.c
> @@ -497,6 +497,16 @@ static int load_scale_clocks(struct venus_inst *inst)
> return scale_clocks_vpu4(inst);
> }
>
> +int set_core_usage(struct venus_inst *inst, u32 usage)
> +{
> + const u32 ptype = HFI_PROPERTY_CONFIG_VIDEOCORES_USAGE;
> + struct hfi_videocores_usage_type cu;
> +
> + cu.video_core_enable_mask = usage;
> +
> + return hfi_session_set_property(inst, ptype, &cu);
> +}
> +
> static void fill_buffer_desc(const struct venus_buffer *buf,
> struct hfi_buffer_desc *bd, bool response)
> {
> @@ -800,19 +810,47 @@ int venus_helper_set_work_mode(struct venus_inst *inst, u32 mode)
> }
> EXPORT_SYMBOL_GPL(venus_helper_set_work_mode);
>
> -int venus_helper_set_core_usage(struct venus_inst *inst, u32 usage)
> +int venus_helper_decide_core(struct venus_inst *inst, u32 cores_max)
I think venus_helper_set_core is better?
> {
> - const u32 ptype = HFI_PROPERTY_CONFIG_VIDEOCORES_USAGE;
> - struct hfi_videocores_usage_type cu;
> + struct venus_core *core = inst->core;
> + u32 min_core_id = 0, core0_load = 0, core1_load = 0;
> + unsigned long min_load, max_freq, cur_inst_load;
> + int ret;
>
> if (!IS_V4(inst->core))
> return 0;
>
> - cu.video_core_enable_mask = usage;
> + core0_load = load_per_core(core, VIDC_CORE_ID_1);
> + core1_load = load_per_core(core, VIDC_CORE_ID_2);
>
> - return hfi_session_set_property(inst, ptype, &cu);
> + min_core_id = core0_load < core1_load ? VIDC_CORE_ID_1 : VIDC_CORE_ID_2;
> + min_load = min(core0_load, core1_load);
> +
> + if (cores_max < VIDC_CORE_ID_1) {
> + min_core_id = VIDC_CORE_ID_1;
> + min_load = core0_load;
> + }
could you please move that fragment just after IS_V4 check and return an
error if cores_max < VIDC_CORE_ID_1.
> +
> + cur_inst_load = load_per_instance(inst) *
> + inst->clk_data.codec_data->vpp_cycles;
> + max_freq = core->res->freq_tbl[0].freq;
> +
> + if ((cur_inst_load + min_load) > max_freq) {
> + dev_warn(core->dev, "HW is overloaded, needed: %lu max: %lu\n",
> + cur_inst_load, max_freq);
> + return -EINVAL;
> + }
> +
> + ret = set_core_usage(inst, min_core_id);
> +
> + if (ret)
> + return ret;
> +
> + inst->clk_data.core_id = min_core_id;
> +
> + return 0;
> }
> -EXPORT_SYMBOL_GPL(venus_helper_set_core_usage);
> +EXPORT_SYMBOL_GPL(venus_helper_decide_core);
>
> int venus_helper_init_codec_data(struct venus_inst *inst)
> {
> diff --git a/drivers/media/platform/qcom/venus/helpers.h b/drivers/media/platform/qcom/venus/helpers.h
> index f9360a8..c41ceb3 100644
> --- a/drivers/media/platform/qcom/venus/helpers.h
> +++ b/drivers/media/platform/qcom/venus/helpers.h
> @@ -42,7 +42,7 @@ int venus_helper_set_output_resolution(struct venus_inst *inst,
> u32 buftype);
> int venus_helper_set_work_mode(struct venus_inst *inst, u32 mode);
> int venus_helper_init_codec_data(struct venus_inst *inst);
> -int venus_helper_set_core_usage(struct venus_inst *inst, u32 usage);
> +int venus_helper_decide_core(struct venus_inst *inst, u32 cores_max);
> int venus_helper_set_num_bufs(struct venus_inst *inst, unsigned int input_bufs,
> unsigned int output_bufs,
> unsigned int output2_bufs);
> diff --git a/drivers/media/platform/qcom/venus/vdec.c b/drivers/media/platform/qcom/venus/vdec.c
> index 51795fd..9f988ba 100644
> --- a/drivers/media/platform/qcom/venus/vdec.c
> +++ b/drivers/media/platform/qcom/venus/vdec.c
> @@ -544,14 +544,15 @@ static int vdec_output_conf(struct venus_inst *inst)
> u32 height = inst->out_height;
> u32 out_fmt, out2_fmt;
> bool ubwc = false;
> - u32 ptype;
> + u32 ptype, cores_max;
> int ret;
>
> ret = venus_helper_set_work_mode(inst, VIDC_WORK_MODE_2);
> if (ret)
> return ret;
>
> - ret = venus_helper_set_core_usage(inst, VIDC_CORE_ID_1);
> + cores_max = core_num_max(inst);
please move core_max calculation in the venus_helper_decide_core() here
and below.
> + ret = venus_helper_decide_core(inst, cores_max);
> if (ret)
> return ret;
>
> diff --git a/drivers/media/platform/qcom/venus/venc.c b/drivers/media/platform/qcom/venus/venc.c
> index 792cdce..ed39efd 100644
> --- a/drivers/media/platform/qcom/venus/venc.c
> +++ b/drivers/media/platform/qcom/venus/venc.c
> @@ -654,13 +654,15 @@ static int venc_set_properties(struct venus_inst *inst)
> struct hfi_quantization quant;
> struct hfi_quantization_range quant_range;
> u32 ptype, rate_control, bitrate, profile = 0, level = 0;
> + u32 cores_max;
> int ret;
>
> ret = venus_helper_set_work_mode(inst, VIDC_WORK_MODE_2);
> if (ret)
> return ret;
>
> - ret = venus_helper_set_core_usage(inst, VIDC_CORE_ID_2);
> + cores_max = core_num_max(inst);
> + ret = venus_helper_decide_core(inst, cores_max);
> if (ret)
> return ret;
>
>
--
regards,
Stan
Hi Stan,
On 2019-06-17 14:07, Stanimir Varbanov wrote:
> Hi Aniket,
>
> On 6/11/19 9:05 AM, Aniket Masule wrote:
>> Add vpp cycles for for different types of codec
>> It indicates the cycles required by video hardware
>> to process each macroblock.
>>
>> Signed-off-by: Aniket Masule <[email protected]>
>> ---
>> drivers/media/platform/qcom/venus/core.c | 13 +++++++++++++
>> drivers/media/platform/qcom/venus/core.h | 15 +++++++++++++++
>> 2 files changed, 28 insertions(+)
>>
>> diff --git a/drivers/media/platform/qcom/venus/core.c
>> b/drivers/media/platform/qcom/venus/core.c
>> index 7393667..43eb446 100644
>> --- a/drivers/media/platform/qcom/venus/core.c
>> +++ b/drivers/media/platform/qcom/venus/core.c
>> @@ -473,9 +473,22 @@ static __maybe_unused int
>> venus_runtime_resume(struct device *dev)
>> { 244800, 100000000 }, /* 1920x1080@30 */
>> };
>>
>> +static struct codec_data sdm845_codec_data[] = {
>> + { V4L2_PIX_FMT_H264, VIDC_SESSION_TYPE_ENC, 675 },
>> + { V4L2_PIX_FMT_HEVC, VIDC_SESSION_TYPE_ENC, 675 },
>> + { V4L2_PIX_FMT_VP8, VIDC_SESSION_TYPE_ENC, 675 },
>> + { V4L2_PIX_FMT_MPEG2, VIDC_SESSION_TYPE_DEC, 200 },
>> + { V4L2_PIX_FMT_H264, VIDC_SESSION_TYPE_DEC, 200 },
>> + { V4L2_PIX_FMT_HEVC, VIDC_SESSION_TYPE_DEC, 200 },
>> + { V4L2_PIX_FMT_VP8, VIDC_SESSION_TYPE_DEC, 200 },
>> + { V4L2_PIX_FMT_VP9, VIDC_SESSION_TYPE_DEC, 200 },
>> +};
>> +
>> static const struct venus_resources sdm845_res = {
>> .freq_tbl = sdm845_freq_table,
>> .freq_tbl_size = ARRAY_SIZE(sdm845_freq_table),
>> + .codec_data = sdm845_codec_data,
>> + .codec_data_size = ARRAY_SIZE(sdm845_codec_data),
>> .clks = {"core", "iface", "bus" },
>> .clks_num = 3,
>> .max_load = 2563200,
>> diff --git a/drivers/media/platform/qcom/venus/core.h
>> b/drivers/media/platform/qcom/venus/core.h
>> index 7a3feb5..b1a9b43 100644
>> --- a/drivers/media/platform/qcom/venus/core.h
>> +++ b/drivers/media/platform/qcom/venus/core.h
>> @@ -35,12 +35,20 @@ struct reg_val {
>> u32 value;
>> };
>>
>> +struct codec_data {
>
> The name is very generic, could you rename the structure to something
> like vpp_cycles_data?
>
I will be adding vsp_cycles with next patch for bitrate based clock
scaling.
So, I could rename it to codec_cycles_data.
>> +u32 pixfmt;
>> +u32 session_type;
>> +int vpp_cycles;
>
> please check your editor, those fields should have a tab to the right.
>
>> +};
>> +
>> struct venus_resources {
>> u64 dma_mask;
>> const struct freq_tbl *freq_tbl;
>> unsigned int freq_tbl_size;
>> const struct reg_val *reg_tbl;
>> unsigned int reg_tbl_size;
>> + const struct codec_data *codec_data;
>> + unsigned int codec_data_size;
>> const char * const clks[VIDC_CLKS_NUM_MAX];
>> unsigned int clks_num;
>> enum hfi_version hfi_version;
>> @@ -216,6 +224,12 @@ struct venus_buffer {
>> struct list_head ref_list;
>> };
>>
>> +struct clock_data {
>> + u32 core_id;
>> + unsigned long freq;
>
> I cannot see how this 'freq' structure field is used? I can see that
> you
> fill it in 3/5 patch but you don't used nowhere.
>
Yes Stan, I will remove 'freq' from clock data structure.
>> + struct codec_data *codec_data;
>> +};
>
> Having the fact that freq field seems not needed can we just merge the
> fields in venus_inst structure?
>
I will be adding 'freq' with next patch for bitrate based clock scaling.
So, it would be easier if we maintain separate structure from this
patch.
>> +
>> #define to_venus_buffer(ptr) container_of(ptr, struct venus_buffer,
>> vb)
>>
>> /**
>> @@ -275,6 +289,7 @@ struct venus_inst {
>> struct list_head list;
>> struct mutex lock;
>> struct venus_core *core;
>> + struct clock_data clk_data;
>> struct list_head dpbbufs;
>> struct list_head internalbufs;
>> struct list_head registeredbufs;
>>
Hi Stan,
On 2019-06-17 14:07, Stanimir Varbanov wrote:
> Hi Aniket,
>
> On 6/11/19 9:05 AM, Aniket Masule wrote:
>> Initialize the codec data with core resources.
>
> Please squash this patch in 1/5 patch.
Yes Stan.
>
>>
>> Signed-off-by: Aniket Masule <[email protected]>
>> ---
>> drivers/media/platform/qcom/venus/helpers.c | 30
>> +++++++++++++++++++++++++++++
>> drivers/media/platform/qcom/venus/helpers.h | 1 +
>> drivers/media/platform/qcom/venus/vdec.c | 4 ++++
>> drivers/media/platform/qcom/venus/venc.c | 4 ++++
>> 4 files changed, 39 insertions(+)
>>
>> diff --git a/drivers/media/platform/qcom/venus/helpers.c
>> b/drivers/media/platform/qcom/venus/helpers.c
>> index 5cad601..f7f724b 100644
>> --- a/drivers/media/platform/qcom/venus/helpers.c
>> +++ b/drivers/media/platform/qcom/venus/helpers.c
>> @@ -715,6 +715,36 @@ int venus_helper_set_core_usage(struct venus_inst
>> *inst, u32 usage)
>> }
>> EXPORT_SYMBOL_GPL(venus_helper_set_core_usage);
>>
>> +int venus_helper_init_codec_data(struct venus_inst *inst)
>> +{
>> + const struct codec_data *codec_data;
>> + unsigned int i, codec_data_size;
>> + u32 pixfmt;
>> + int ret = 0;
>> +
>> + if (!IS_V4(inst->core))
>> + return 0;
>> +
>> + codec_data = inst->core->res->codec_data;
>> + codec_data_size = inst->core->res->codec_data_size;
>> + pixfmt = inst->session_type == VIDC_SESSION_TYPE_DEC ?
>> + inst->fmt_out->pixfmt : inst->fmt_cap->pixfmt;
>> +
>> + for (i = 0; i < codec_data_size; i++) {
>> + if (codec_data[i].pixfmt == pixfmt &&
>> + codec_data[i].session_type == inst->session_type) {
>> + inst->clk_data.codec_data = &codec_data[i];
>> + break;
>> + }
>> + }
>> +
>> + if (!inst->clk_data.codec_data)
>> + ret = -EINVAL;
>
> just return -EINVAL
>
>> +
>> + return ret;
>
> return 0 is enough, and that will avoid ret variable.
Sure Stan.
>
>> +}
>> +EXPORT_SYMBOL_GPL(venus_helper_init_codec_data);
>> +
>> int venus_helper_set_num_bufs(struct venus_inst *inst, unsigned int
>> input_bufs,
>> unsigned int output_bufs,
>> unsigned int output2_bufs)
>> diff --git a/drivers/media/platform/qcom/venus/helpers.h
>> b/drivers/media/platform/qcom/venus/helpers.h
>> index 2475f284..f9360a8 100644
>> --- a/drivers/media/platform/qcom/venus/helpers.h
>> +++ b/drivers/media/platform/qcom/venus/helpers.h
>> @@ -41,6 +41,7 @@ int venus_helper_set_output_resolution(struct
>> venus_inst *inst,
>> unsigned int width, unsigned int height,
>> u32 buftype);
>> int venus_helper_set_work_mode(struct venus_inst *inst, u32 mode);
>> +int venus_helper_init_codec_data(struct venus_inst *inst);
>> int venus_helper_set_core_usage(struct venus_inst *inst, u32 usage);
>> int venus_helper_set_num_bufs(struct venus_inst *inst, unsigned int
>> input_bufs,
>> unsigned int output_bufs,
>> diff --git a/drivers/media/platform/qcom/venus/vdec.c
>> b/drivers/media/platform/qcom/venus/vdec.c
>> index 282de21..51795fd 100644
>> --- a/drivers/media/platform/qcom/venus/vdec.c
>> +++ b/drivers/media/platform/qcom/venus/vdec.c
>> @@ -660,6 +660,10 @@ static int vdec_init_session(struct venus_inst
>> *inst)
>> if (ret)
>> goto deinit;
>>
>> + ret = venus_helper_init_codec_data(inst);
>> + if (ret)
>> + goto deinit;
>> +
>> return 0;
>> deinit:
>> hfi_session_deinit(inst);
>> diff --git a/drivers/media/platform/qcom/venus/venc.c
>> b/drivers/media/platform/qcom/venus/venc.c
>> index 32cff29..792cdce 100644
>> --- a/drivers/media/platform/qcom/venus/venc.c
>> +++ b/drivers/media/platform/qcom/venus/venc.c
>> @@ -847,6 +847,10 @@ static int venc_init_session(struct venus_inst
>> *inst)
>> if (ret)
>> goto deinit;
>>
>> + ret = venus_helper_init_codec_data(inst);
>> + if (ret)
>> + goto deinit;
>> +
>> ret = venc_set_properties(inst);
>> if (ret)
>> goto deinit;
>>
Hi Stan,
On 2019-06-17 14:28, Stanimir Varbanov wrote:
> Hi Aniket,
>
> On 6/11/19 9:05 AM, Aniket Masule wrote:
>> Current clock scaling calculations are same for vpu4 and
>> previous versions. For vpu4, Clock scaling calculations
>> are updated with cycles/mb. This helps in getting precise
>> clock required.
>>
>> Signed-off-by: Aniket Masule <[email protected]>
>> ---
>> drivers/media/platform/qcom/venus/helpers.c | 88
>> +++++++++++++++++++++++++++--
>> 1 file changed, 84 insertions(+), 4 deletions(-)
>>
>> diff --git a/drivers/media/platform/qcom/venus/helpers.c
>> b/drivers/media/platform/qcom/venus/helpers.c
>> index f7f724b..7bcc1e6 100644
>> --- a/drivers/media/platform/qcom/venus/helpers.c
>> +++ b/drivers/media/platform/qcom/venus/helpers.c
>> @@ -348,8 +348,9 @@ static u32 load_per_type(struct venus_core *core,
>> u32 session_type)
>> return mbs_per_sec;
>> }
>>
>> -static int load_scale_clocks(struct venus_core *core)
>> +static int scale_clocks(struct venus_inst *inst)
>> {
>> + struct venus_core *core = inst->core;
>> const struct freq_tbl *table = core->res->freq_tbl;
>> unsigned int num_rows = core->res->freq_tbl_size;
>> unsigned long freq = table[0].freq;
>> @@ -398,6 +399,86 @@ static int load_scale_clocks(struct venus_core
>> *core)
>> return ret;
>> }
>>
>> +static unsigned long calculate_inst_freq(struct venus_inst *inst)
>> +{
>> + unsigned long vpp_cycles = 0;
>> + u32 mbs_per_sec;
>> +
>> + mbs_per_sec = load_per_instance(inst);
>> + vpp_cycles = mbs_per_sec * inst->clk_data.codec_data->vpp_cycles;
>> + /* 21 / 20 is overhead factor */
>> + vpp_cycles += vpp_cycles / 20;
>
> shouldn't you multiply by 21?
>
Expansion of given expression results to the same.
>> +
>> + return vpp_cycles;
>
> It is not clear to me is that vpp_cycles or frequency (rate)? I just
> lost in dimensions used here.
>
> If you return vpp_cycles could you rename the function name?
>
Initial calculations included frequency (for bitrate based scaling),
which I removed.
I will rename it calculate_inst_vpp_cycles for this patch.
>> +}
>> +
>> +static int scale_clocks_vpu4(struct venus_inst *inst)
>
> does vpu4 equivalent to HFI_VERSION_4XX? If so could you rename
> function
> to scale_clocks_v4.
>
Sure Stan, I will rename it to scale_clocks_v4.
>> +{
>> + struct venus_core *core = inst->core;
>> + const struct freq_tbl *table = core->res->freq_tbl;
>> + unsigned int num_rows = core->res->freq_tbl_size;
>> +
>> + struct clk *clk = core->clks[0];
>> + struct device *dev = core->dev;
>> + unsigned int i;
>> + unsigned long freq = 0, freq_core0 = 0, freq_core1 = 0;
>> + int ret;
>> +
>> + freq = calculate_inst_freq(inst);
>> +
>> + if (freq > table[0].freq)
>> + goto err;
>> +
>> + for (i = 0; i < num_rows; i++) {
>> + if (freq > table[i].freq)
>> + break;
>> + freq = table[i].freq;
>> + }
>> +
>> + inst->clk_data.freq = freq;
>> +
>> + mutex_lock(&core->lock);
>> + list_for_each_entry(inst, &core->instances, list) {
>> + if (inst->clk_data.core_id == VIDC_CORE_ID_1) {
>> + freq_core0 += inst->clk_data.freq;
>> + } else if (inst->clk_data.core_id == VIDC_CORE_ID_2) {
>> + freq_core1 += inst->clk_data.freq;
>> + } else if (inst->clk_data.core_id == VIDC_CORE_ID_3) {
>> + freq_core0 += inst->clk_data.freq;
>> + freq_core1 += inst->clk_data.freq;
>> + }
>> + }
>> + mutex_unlock(&core->lock);
>> +
>> + freq = max(freq_core0, freq_core1);
>
> hmm, this doesn't look right. core0 and core1 frequencies can be
> different why you get the bigger and set it on both?
>
We can't set separate clocks to core0 and core1.
As per the design, we can set clocks to the branch only not the
individual cores.
>> +
>> + ret = clk_set_rate(clk, freq);
>> + if (ret)
>> + goto err;
>> +
>> + ret = clk_set_rate(core->core0_clk, freq);
>
> IMO this should set freq_core0
We need set max required frequency, due to the reason mentioned above.
>
>> + if (ret)
>> + goto err;
>> +
>> + ret = clk_set_rate(core->core1_clk, freq);
>
> set freq_core1
>
We need set max required frequency, due to the reason mentioned above.
>> + if (ret)
>> + goto err;
>> +
>> + return 0;
>> +
>> +err:
>> + dev_err(dev, "failed to set clock rate %lu (%d)\n", freq, ret);
>> + return ret;
>> +}
>> +
>> +static int load_scale_clocks(struct venus_inst *inst)
>> +{
>> + if (IS_V3(inst->core) || IS_V1(inst->core))
>> + return scale_clocks(inst);
>> + else
>> + return scale_clocks_vpu4(inst);
>
> could you reorder this to:
>
> if (IS_V4())
> return scale_clocks_v4(inst);
>
> return scale_clocks(inst);
>
Yes Stan.
>> +}
>> +
>> static void fill_buffer_desc(const struct venus_buffer *buf,
>> struct hfi_buffer_desc *bd, bool response)
>> {
>> @@ -1053,7 +1134,7 @@ void venus_helper_vb2_stop_streaming(struct
>> vb2_queue *q)
>>
>> venus_helper_free_dpb_bufs(inst);
>>
>> - load_scale_clocks(core);
>> + load_scale_clocks(inst);
>> INIT_LIST_HEAD(&inst->registeredbufs);
>> }
>>
>> @@ -1070,7 +1151,6 @@ void venus_helper_vb2_stop_streaming(struct
>> vb2_queue *q)
>>
>> int venus_helper_vb2_start_streaming(struct venus_inst *inst)
>> {
>> - struct venus_core *core = inst->core;
>> int ret;
>>
>> ret = intbufs_alloc(inst);
>> @@ -1081,7 +1161,7 @@ int venus_helper_vb2_start_streaming(struct
>> venus_inst *inst)
>> if (ret)
>> goto err_bufs_free;
>>
>> - load_scale_clocks(core);
>> + load_scale_clocks(inst);
>>
>> ret = hfi_session_load_res(inst);
>> if (ret)
>>
On 2019-06-17 14:37, Stanimir Varbanov wrote:
> Hi Aniket,
>
> On 6/11/19 9:05 AM, Aniket Masule wrote:
>> Present core assignment is static. Introduced load balancing
>> across the cores. Load on earch core is calculated and core
>> with minimum load is assigned to given instance.
>>
>> Signed-off-by: Aniket Masule <[email protected]>
>> ---
>> drivers/media/platform/qcom/venus/helpers.c | 50
>> +++++++++++++++++++++++++----
>> drivers/media/platform/qcom/venus/helpers.h | 2 +-
>> drivers/media/platform/qcom/venus/vdec.c | 5 +--
>> drivers/media/platform/qcom/venus/venc.c | 4 ++-
>> 4 files changed, 51 insertions(+), 10 deletions(-)
>>
>> diff --git a/drivers/media/platform/qcom/venus/helpers.c
>> b/drivers/media/platform/qcom/venus/helpers.c
>> index edb653e..38d617b 100644
>> --- a/drivers/media/platform/qcom/venus/helpers.c
>> +++ b/drivers/media/platform/qcom/venus/helpers.c
>> @@ -497,6 +497,16 @@ static int load_scale_clocks(struct venus_inst
>> *inst)
>> return scale_clocks_vpu4(inst);
>> }
>>
>> +int set_core_usage(struct venus_inst *inst, u32 usage)
>> +{
>> + const u32 ptype = HFI_PROPERTY_CONFIG_VIDEOCORES_USAGE;
>> + struct hfi_videocores_usage_type cu;
>> +
>> + cu.video_core_enable_mask = usage;
>> +
>> + return hfi_session_set_property(inst, ptype, &cu);
>> +}
>> +
>> static void fill_buffer_desc(const struct venus_buffer *buf,
>> struct hfi_buffer_desc *bd, bool response)
>> {
>> @@ -800,19 +810,47 @@ int venus_helper_set_work_mode(struct venus_inst
>> *inst, u32 mode)
>> }
>> EXPORT_SYMBOL_GPL(venus_helper_set_work_mode);
>>
>> -int venus_helper_set_core_usage(struct venus_inst *inst, u32 usage)
>> +int venus_helper_decide_core(struct venus_inst *inst, u32 cores_max)
>
> I think venus_helper_set_core is better?
>
Sure Stan.
>> {
>> - const u32 ptype = HFI_PROPERTY_CONFIG_VIDEOCORES_USAGE;
>> - struct hfi_videocores_usage_type cu;
>> + struct venus_core *core = inst->core;
>> + u32 min_core_id = 0, core0_load = 0, core1_load = 0;
>> + unsigned long min_load, max_freq, cur_inst_load;
>> + int ret;
>>
>> if (!IS_V4(inst->core))
>> return 0;
>>
>> - cu.video_core_enable_mask = usage;
>> + core0_load = load_per_core(core, VIDC_CORE_ID_1);
>> + core1_load = load_per_core(core, VIDC_CORE_ID_2);
>>
>> - return hfi_session_set_property(inst, ptype, &cu);
>> + min_core_id = core0_load < core1_load ? VIDC_CORE_ID_1 :
>> VIDC_CORE_ID_2;
>> + min_load = min(core0_load, core1_load);
>> +
>> + if (cores_max < VIDC_CORE_ID_1) {
>> + min_core_id = VIDC_CORE_ID_1;
>> + min_load = core0_load;
>> + }
>
> could you please move that fragment just after IS_V4 check and return
> an
> error if cores_max < VIDC_CORE_ID_1.
>
Instead of "if cores_max < VIDC_CORE_ID_1", we need to check if
cores_max < VIDC_CORE_ID_2
and set core the single core as minimum load core. I can't return after
this check immidiately
as it needs to be checked whether load can be accommodated or not.
>> +
>> + cur_inst_load = load_per_instance(inst) *
>> + inst->clk_data.codec_data->vpp_cycles;
>> + max_freq = core->res->freq_tbl[0].freq;
>> +
>> + if ((cur_inst_load + min_load) > max_freq) {
>> + dev_warn(core->dev, "HW is overloaded, needed: %lu max: %lu\n",
>> + cur_inst_load, max_freq);
>> + return -EINVAL;
>> + }
>> +
>> + ret = set_core_usage(inst, min_core_id);
>> +
>> + if (ret)
>> + return ret;
>> +
>> + inst->clk_data.core_id = min_core_id;
>> +
>> + return 0;
>> }
>> -EXPORT_SYMBOL_GPL(venus_helper_set_core_usage);
>> +EXPORT_SYMBOL_GPL(venus_helper_decide_core);
>>
>> int venus_helper_init_codec_data(struct venus_inst *inst)
>> {
>> diff --git a/drivers/media/platform/qcom/venus/helpers.h
>> b/drivers/media/platform/qcom/venus/helpers.h
>> index f9360a8..c41ceb3 100644
>> --- a/drivers/media/platform/qcom/venus/helpers.h
>> +++ b/drivers/media/platform/qcom/venus/helpers.h
>> @@ -42,7 +42,7 @@ int venus_helper_set_output_resolution(struct
>> venus_inst *inst,
>> u32 buftype);
>> int venus_helper_set_work_mode(struct venus_inst *inst, u32 mode);
>> int venus_helper_init_codec_data(struct venus_inst *inst);
>> -int venus_helper_set_core_usage(struct venus_inst *inst, u32 usage);
>> +int venus_helper_decide_core(struct venus_inst *inst, u32 cores_max);
>> int venus_helper_set_num_bufs(struct venus_inst *inst, unsigned int
>> input_bufs,
>> unsigned int output_bufs,
>> unsigned int output2_bufs);
>> diff --git a/drivers/media/platform/qcom/venus/vdec.c
>> b/drivers/media/platform/qcom/venus/vdec.c
>> index 51795fd..9f988ba 100644
>> --- a/drivers/media/platform/qcom/venus/vdec.c
>> +++ b/drivers/media/platform/qcom/venus/vdec.c
>> @@ -544,14 +544,15 @@ static int vdec_output_conf(struct venus_inst
>> *inst)
>> u32 height = inst->out_height;
>> u32 out_fmt, out2_fmt;
>> bool ubwc = false;
>> - u32 ptype;
>> + u32 ptype, cores_max;
>> int ret;
>>
>> ret = venus_helper_set_work_mode(inst, VIDC_WORK_MODE_2);
>> if (ret)
>> return ret;
>>
>> - ret = venus_helper_set_core_usage(inst, VIDC_CORE_ID_1);
>> + cores_max = core_num_max(inst);
>
> please move core_max calculation in the venus_helper_decide_core() here
> and below.
>
Yes Stan.
>> + ret = venus_helper_decide_core(inst, cores_max);
>> if (ret)
>> return ret;
>>
>> diff --git a/drivers/media/platform/qcom/venus/venc.c
>> b/drivers/media/platform/qcom/venus/venc.c
>> index 792cdce..ed39efd 100644
>> --- a/drivers/media/platform/qcom/venus/venc.c
>> +++ b/drivers/media/platform/qcom/venus/venc.c
>> @@ -654,13 +654,15 @@ static int venc_set_properties(struct venus_inst
>> *inst)
>> struct hfi_quantization quant;
>> struct hfi_quantization_range quant_range;
>> u32 ptype, rate_control, bitrate, profile = 0, level = 0;
>> + u32 cores_max;
>> int ret;
>>
>> ret = venus_helper_set_work_mode(inst, VIDC_WORK_MODE_2);
>> if (ret)
>> return ret;
>>
>> - ret = venus_helper_set_core_usage(inst, VIDC_CORE_ID_2);
>> + cores_max = core_num_max(inst);
>> + ret = venus_helper_decide_core(inst, cores_max);
>> if (ret)
>> return ret;
>>
>>