2019-06-25 19:24:35

by Aniket Masule

[permalink] [raw]
Subject: [PATCH v3 2/4] media: venus: Update clock scaling

Current clock scaling calculations are same for vpu4 and
previous versions. For vpu4, Clock scaling calculations
are updated with cycles/mb. This helps in getting precise
clock required.

Signed-off-by: Aniket Masule <[email protected]>
---
drivers/media/platform/qcom/venus/helpers.c | 111 ++++++++++++++++++++++++----
drivers/media/platform/qcom/venus/helpers.h | 2 +-
drivers/media/platform/qcom/venus/vdec.c | 2 +-
drivers/media/platform/qcom/venus/venc.c | 2 +-
4 files changed, 99 insertions(+), 18 deletions(-)

diff --git a/drivers/media/platform/qcom/venus/helpers.c b/drivers/media/platform/qcom/venus/helpers.c
index f7f724b..e1a0247 100644
--- a/drivers/media/platform/qcom/venus/helpers.c
+++ b/drivers/media/platform/qcom/venus/helpers.c
@@ -348,8 +348,9 @@ static u32 load_per_type(struct venus_core *core, u32 session_type)
return mbs_per_sec;
}

-static int load_scale_clocks(struct venus_core *core)
+static int scale_clocks(struct venus_inst *inst)
{
+ struct venus_core *core = inst->core;
const struct freq_tbl *table = core->res->freq_tbl;
unsigned int num_rows = core->res->freq_tbl_size;
unsigned long freq = table[0].freq;
@@ -398,6 +399,86 @@ static int load_scale_clocks(struct venus_core *core)
return ret;
}

+static unsigned long calculate_vpp_freq(struct venus_inst *inst)
+{
+ unsigned long vpp_freq = 0;
+ u32 mbs_per_sec;
+
+ mbs_per_sec = load_per_instance(inst);
+ vpp_freq = mbs_per_sec * inst->clk_data.codec_freq_data->vpp_freq;
+ /* 21 / 20 is overhead factor */
+ vpp_freq += vpp_freq / 20;
+
+ return vpp_freq;
+}
+
+static int scale_clocks_v4(struct venus_inst *inst)
+{
+ struct venus_core *core = inst->core;
+ const struct freq_tbl *table = core->res->freq_tbl;
+ unsigned int num_rows = core->res->freq_tbl_size;
+
+ struct clk *clk = core->clks[0];
+ struct device *dev = core->dev;
+ unsigned int i;
+ unsigned long freq = 0, freq_core0 = 0, freq_core1 = 0;
+ int ret;
+
+ freq = calculate_vpp_freq(inst);
+
+ if (freq > table[0].freq)
+ goto err;
+
+ for (i = 0; i < num_rows; i++) {
+ if (freq > table[i].freq)
+ break;
+ freq = table[i].freq;
+ }
+
+ inst->clk_data.freq = freq;
+
+ mutex_lock(&core->lock);
+ list_for_each_entry(inst, &core->instances, list) {
+ if (inst->clk_data.core_id == VIDC_CORE_ID_1) {
+ freq_core0 += inst->clk_data.freq;
+ } else if (inst->clk_data.core_id == VIDC_CORE_ID_2) {
+ freq_core1 += inst->clk_data.freq;
+ } else if (inst->clk_data.core_id == VIDC_CORE_ID_3) {
+ freq_core0 += inst->clk_data.freq;
+ freq_core1 += inst->clk_data.freq;
+ }
+ }
+ mutex_unlock(&core->lock);
+
+ freq = max(freq_core0, freq_core1);
+
+ ret = clk_set_rate(clk, freq);
+ if (ret)
+ goto err;
+
+ ret = clk_set_rate(core->core0_clk, freq);
+ if (ret)
+ goto err;
+
+ ret = clk_set_rate(core->core1_clk, freq);
+ if (ret)
+ goto err;
+
+ return 0;
+
+err:
+ dev_err(dev, "failed to set clock rate %lu (%d)\n", freq, ret);
+ return ret;
+}
+
+static int load_scale_clocks(struct venus_inst *inst)
+{
+ if (IS_V4(inst->core))
+ return scale_clocks_v4(inst);
+
+ return scale_clocks(inst);
+}
+
static void fill_buffer_desc(const struct venus_buffer *buf,
struct hfi_buffer_desc *bd, bool response)
{
@@ -715,35 +796,36 @@ int venus_helper_set_core_usage(struct venus_inst *inst, u32 usage)
}
EXPORT_SYMBOL_GPL(venus_helper_set_core_usage);

-int venus_helper_init_codec_data(struct venus_inst *inst)
+int venus_helper_init_codec_freq_data(struct venus_inst *inst)
{
- const struct codec_data *codec_data;
- unsigned int i, codec_data_size;
+ const struct codec_freq_data *codec_freq_data;
+ unsigned int i, codec_freq_data_size;
u32 pixfmt;
int ret = 0;

if (!IS_V4(inst->core))
return 0;

- codec_data = inst->core->res->codec_data;
- codec_data_size = inst->core->res->codec_data_size;
+ codec_freq_data = inst->core->res->codec_freq_data;
+ codec_freq_data_size = inst->core->res->codec_freq_data_size;
pixfmt = inst->session_type == VIDC_SESSION_TYPE_DEC ?
inst->fmt_out->pixfmt : inst->fmt_cap->pixfmt;

- for (i = 0; i < codec_data_size; i++) {
- if (codec_data[i].pixfmt == pixfmt &&
- codec_data[i].session_type == inst->session_type) {
- inst->clk_data.codec_data = &codec_data[i];
+ for (i = 0; i < codec_freq_data_size; i++) {
+ if (codec_freq_data[i].pixfmt == pixfmt &&
+ codec_freq_data[i].session_type == inst->session_type) {
+ inst->clk_data.codec_freq_data =
+ &codec_freq_data[i];
break;
}
}

- if (!inst->clk_data.codec_data)
+ if (!inst->clk_data.codec_freq_data)
ret = -EINVAL;

return ret;
}
-EXPORT_SYMBOL_GPL(venus_helper_init_codec_data);
+EXPORT_SYMBOL_GPL(venus_helper_init_codec_freq_data);

int venus_helper_set_num_bufs(struct venus_inst *inst, unsigned int input_bufs,
unsigned int output_bufs,
@@ -1053,7 +1135,7 @@ void venus_helper_vb2_stop_streaming(struct vb2_queue *q)

venus_helper_free_dpb_bufs(inst);

- load_scale_clocks(core);
+ load_scale_clocks(inst);
INIT_LIST_HEAD(&inst->registeredbufs);
}

@@ -1070,7 +1152,6 @@ void venus_helper_vb2_stop_streaming(struct vb2_queue *q)

int venus_helper_vb2_start_streaming(struct venus_inst *inst)
{
- struct venus_core *core = inst->core;
int ret;

ret = intbufs_alloc(inst);
@@ -1081,7 +1162,7 @@ int venus_helper_vb2_start_streaming(struct venus_inst *inst)
if (ret)
goto err_bufs_free;

- load_scale_clocks(core);
+ load_scale_clocks(inst);

ret = hfi_session_load_res(inst);
if (ret)
diff --git a/drivers/media/platform/qcom/venus/helpers.h b/drivers/media/platform/qcom/venus/helpers.h
index f9360a8..2c13245 100644
--- a/drivers/media/platform/qcom/venus/helpers.h
+++ b/drivers/media/platform/qcom/venus/helpers.h
@@ -41,7 +41,7 @@ int venus_helper_set_output_resolution(struct venus_inst *inst,
unsigned int width, unsigned int height,
u32 buftype);
int venus_helper_set_work_mode(struct venus_inst *inst, u32 mode);
-int venus_helper_init_codec_data(struct venus_inst *inst);
+int venus_helper_init_codec_freq_data(struct venus_inst *inst);
int venus_helper_set_core_usage(struct venus_inst *inst, u32 usage);
int venus_helper_set_num_bufs(struct venus_inst *inst, unsigned int input_bufs,
unsigned int output_bufs,
diff --git a/drivers/media/platform/qcom/venus/vdec.c b/drivers/media/platform/qcom/venus/vdec.c
index 51795fd..d037f80 100644
--- a/drivers/media/platform/qcom/venus/vdec.c
+++ b/drivers/media/platform/qcom/venus/vdec.c
@@ -660,7 +660,7 @@ static int vdec_init_session(struct venus_inst *inst)
if (ret)
goto deinit;

- ret = venus_helper_init_codec_data(inst);
+ ret = venus_helper_init_codec_freq_data(inst);
if (ret)
goto deinit;

diff --git a/drivers/media/platform/qcom/venus/venc.c b/drivers/media/platform/qcom/venus/venc.c
index 792cdce..cdddc82 100644
--- a/drivers/media/platform/qcom/venus/venc.c
+++ b/drivers/media/platform/qcom/venus/venc.c
@@ -847,7 +847,7 @@ static int venc_init_session(struct venus_inst *inst)
if (ret)
goto deinit;

- ret = venus_helper_init_codec_data(inst);
+ ret = venus_helper_init_codec_freq_data(inst);
if (ret)
goto deinit;

--
The Qualcomm Innovation Center, Inc. is a member of the Code Aurora Forum,
a Linux Foundation Collaborative Project


2019-07-01 13:12:37

by Stanimir Varbanov

[permalink] [raw]
Subject: Re: [PATCH v3 2/4] media: venus: Update clock scaling



On 6/25/19 7:27 PM, Aniket Masule wrote:
> Current clock scaling calculations are same for vpu4 and
> previous versions. For vpu4, Clock scaling calculations
> are updated with cycles/mb. This helps in getting precise
> clock required.
>
> Signed-off-by: Aniket Masule <[email protected]>
> ---
> drivers/media/platform/qcom/venus/helpers.c | 111 ++++++++++++++++++++++++----
> drivers/media/platform/qcom/venus/helpers.h | 2 +-
> drivers/media/platform/qcom/venus/vdec.c | 2 +-
> drivers/media/platform/qcom/venus/venc.c | 2 +-
> 4 files changed, 99 insertions(+), 18 deletions(-)
>
> diff --git a/drivers/media/platform/qcom/venus/helpers.c b/drivers/media/platform/qcom/venus/helpers.c
> index f7f724b..e1a0247 100644
> --- a/drivers/media/platform/qcom/venus/helpers.c
> +++ b/drivers/media/platform/qcom/venus/helpers.c
> @@ -348,8 +348,9 @@ static u32 load_per_type(struct venus_core *core, u32 session_type)
> return mbs_per_sec;
> }
>
> -static int load_scale_clocks(struct venus_core *core)
> +static int scale_clocks(struct venus_inst *inst)
> {
> + struct venus_core *core = inst->core;
> const struct freq_tbl *table = core->res->freq_tbl;
> unsigned int num_rows = core->res->freq_tbl_size;
> unsigned long freq = table[0].freq;
> @@ -398,6 +399,86 @@ static int load_scale_clocks(struct venus_core *core)
> return ret;
> }
>
> +static unsigned long calculate_vpp_freq(struct venus_inst *inst)
> +{
> + unsigned long vpp_freq = 0;
> + u32 mbs_per_sec;
> +
> + mbs_per_sec = load_per_instance(inst);
> + vpp_freq = mbs_per_sec * inst->clk_data.codec_freq_data->vpp_freq;
> + /* 21 / 20 is overhead factor */
> + vpp_freq += vpp_freq / 20;
> +
> + return vpp_freq;
> +}
> +
> +static int scale_clocks_v4(struct venus_inst *inst)
> +{
> + struct venus_core *core = inst->core;
> + const struct freq_tbl *table = core->res->freq_tbl;
> + unsigned int num_rows = core->res->freq_tbl_size;
> +

please remove this blank line.

> + struct clk *clk = core->clks[0];
> + struct device *dev = core->dev;
> + unsigned int i;
> + unsigned long freq = 0, freq_core0 = 0, freq_core1 = 0;

could you count the cores as it is done for VIDC_CORE_ID_ ?
i.e. start counting from one.

> + int ret;
> +
> + freq = calculate_vpp_freq(inst);
> +
> + if (freq > table[0].freq)
> + goto err;

if the goto is triggered the error message will be wrong. Infact the
dev_err message is targeted for clk_set_rate failure.

> +
> + for (i = 0; i < num_rows; i++) {
> + if (freq > table[i].freq)
> + break;
> + freq = table[i].freq;
> + }
> +
> + inst->clk_data.freq = freq;
> +
> + mutex_lock(&core->lock);
> + list_for_each_entry(inst, &core->instances, list) {
> + if (inst->clk_data.core_id == VIDC_CORE_ID_1) {
> + freq_core0 += inst->clk_data.freq;
> + } else if (inst->clk_data.core_id == VIDC_CORE_ID_2) {
> + freq_core1 += inst->clk_data.freq;
> + } else if (inst->clk_data.core_id == VIDC_CORE_ID_3) {
> + freq_core0 += inst->clk_data.freq;
> + freq_core1 += inst->clk_data.freq;
> + }
> + }
> + mutex_unlock(&core->lock);
> +
> + freq = max(freq_core0, freq_core1);
> +
> + ret = clk_set_rate(clk, freq);
> + if (ret)
> + goto err;
> +
> + ret = clk_set_rate(core->core0_clk, freq);
> + if (ret)
> + goto err;
> +
> + ret = clk_set_rate(core->core1_clk, freq);
> + if (ret)
> + goto err;
> +
> + return 0;
> +
> +err:
> + dev_err(dev, "failed to set clock rate %lu (%d)\n", freq, ret);
> + return ret;
> +}
> +
> +static int load_scale_clocks(struct venus_inst *inst)
> +{
> + if (IS_V4(inst->core))
> + return scale_clocks_v4(inst);
> +
> + return scale_clocks(inst);
> +}
> +
> static void fill_buffer_desc(const struct venus_buffer *buf,
> struct hfi_buffer_desc *bd, bool response)
> {
> @@ -715,35 +796,36 @@ int venus_helper_set_core_usage(struct venus_inst *inst, u32 usage)
> }
> EXPORT_SYMBOL_GPL(venus_helper_set_core_usage);
>
> -int venus_helper_init_codec_data(struct venus_inst *inst)
> +int venus_helper_init_codec_freq_data(struct venus_inst *inst)
> {
> - const struct codec_data *codec_data;
> - unsigned int i, codec_data_size;

those deletions shouldn't exist once you fix the git rebase issue.

> + const struct codec_freq_data *codec_freq_data;
> + unsigned int i, codec_freq_data_size;

could you rename the variables to shorter?

> u32 pixfmt;
> int ret = 0;
>
> if (!IS_V4(inst->core))
> return 0;
>
> - codec_data = inst->core->res->codec_data;
> - codec_data_size = inst->core->res->codec_data_size;
> + codec_freq_data = inst->core->res->codec_freq_data;
> + codec_freq_data_size = inst->core->res->codec_freq_data_size;
> pixfmt = inst->session_type == VIDC_SESSION_TYPE_DEC ?
> inst->fmt_out->pixfmt : inst->fmt_cap->pixfmt;
>
> - for (i = 0; i < codec_data_size; i++) {
> - if (codec_data[i].pixfmt == pixfmt &&
> - codec_data[i].session_type == inst->session_type) {
> - inst->clk_data.codec_data = &codec_data[i];
> + for (i = 0; i < codec_freq_data_size; i++) {
> + if (codec_freq_data[i].pixfmt == pixfmt &&
> + codec_freq_data[i].session_type == inst->session_type) {
> + inst->clk_data.codec_freq_data =
> + &codec_freq_data[i];
> break;
> }
> }
>
> - if (!inst->clk_data.codec_data)
> + if (!inst->clk_data.codec_freq_data)
> ret = -EINVAL;
>
> return ret;
> }
> -EXPORT_SYMBOL_GPL(venus_helper_init_codec_data);
> +EXPORT_SYMBOL_GPL(venus_helper_init_codec_freq_data);
>

--
regards,
Stan

2019-07-02 04:59:46

by Aniket Masule

[permalink] [raw]
Subject: Re: [PATCH v3 2/4] media: venus: Update clock scaling

Hi Stan,

On 2019-07-01 18:41, Stanimir Varbanov wrote:
> On 6/25/19 7:27 PM, Aniket Masule wrote:
>> Current clock scaling calculations are same for vpu4 and
>> previous versions. For vpu4, Clock scaling calculations
>> are updated with cycles/mb. This helps in getting precise
>> clock required.
>>
>> Signed-off-by: Aniket Masule <[email protected]>
>> ---
>> drivers/media/platform/qcom/venus/helpers.c | 111
>> ++++++++++++++++++++++++----
>> drivers/media/platform/qcom/venus/helpers.h | 2 +-
>> drivers/media/platform/qcom/venus/vdec.c | 2 +-
>> drivers/media/platform/qcom/venus/venc.c | 2 +-
>> 4 files changed, 99 insertions(+), 18 deletions(-)
>>
>> diff --git a/drivers/media/platform/qcom/venus/helpers.c
>> b/drivers/media/platform/qcom/venus/helpers.c
>> index f7f724b..e1a0247 100644
>> --- a/drivers/media/platform/qcom/venus/helpers.c
>> +++ b/drivers/media/platform/qcom/venus/helpers.c
>> @@ -348,8 +348,9 @@ static u32 load_per_type(struct venus_core *core,
>> u32 session_type)
>> return mbs_per_sec;
>> }
>>
>> -static int load_scale_clocks(struct venus_core *core)
>> +static int scale_clocks(struct venus_inst *inst)
>> {
>> + struct venus_core *core = inst->core;
>> const struct freq_tbl *table = core->res->freq_tbl;
>> unsigned int num_rows = core->res->freq_tbl_size;
>> unsigned long freq = table[0].freq;
>> @@ -398,6 +399,86 @@ static int load_scale_clocks(struct venus_core
>> *core)
>> return ret;
>> }
>>
>> +static unsigned long calculate_vpp_freq(struct venus_inst *inst)
>> +{
>> + unsigned long vpp_freq = 0;
>> + u32 mbs_per_sec;
>> +
>> + mbs_per_sec = load_per_instance(inst);
>> + vpp_freq = mbs_per_sec * inst->clk_data.codec_freq_data->vpp_freq;
>> + /* 21 / 20 is overhead factor */
>> + vpp_freq += vpp_freq / 20;
>> +
>> + return vpp_freq;
>> +}
>> +
>> +static int scale_clocks_v4(struct venus_inst *inst)
>> +{
>> + struct venus_core *core = inst->core;
>> + const struct freq_tbl *table = core->res->freq_tbl;
>> + unsigned int num_rows = core->res->freq_tbl_size;
>> +
>
> please remove this blank line.
>
>> + struct clk *clk = core->clks[0];
>> + struct device *dev = core->dev;
>> + unsigned int i;
>> + unsigned long freq = 0, freq_core0 = 0, freq_core1 = 0;
>
> could you count the cores as it is done for VIDC_CORE_ID_ ?
> i.e. start counting from one.
>
Sure, I was aligning it with clock handle name in core, but
aligning it with VIDC_CORE_ID_ would give good readability.
>> + int ret;
>> +
>> + freq = calculate_vpp_freq(inst);
>> +
>> + if (freq > table[0].freq)
>> + goto err;
>
> if the goto is triggered the error message will be wrong. Infact the
> dev_err message is targeted for clk_set_rate failure.
>
I will add separate exit for this.
>> +
>> + for (i = 0; i < num_rows; i++) {
>> + if (freq > table[i].freq)
>> + break;
>> + freq = table[i].freq;
>> + }
>> +
>> + inst->clk_data.freq = freq;
>> +
>> + mutex_lock(&core->lock);
>> + list_for_each_entry(inst, &core->instances, list) {
>> + if (inst->clk_data.core_id == VIDC_CORE_ID_1) {
>> + freq_core0 += inst->clk_data.freq;
>> + } else if (inst->clk_data.core_id == VIDC_CORE_ID_2) {
>> + freq_core1 += inst->clk_data.freq;
>> + } else if (inst->clk_data.core_id == VIDC_CORE_ID_3) {
>> + freq_core0 += inst->clk_data.freq;
>> + freq_core1 += inst->clk_data.freq;
>> + }
>> + }
>> + mutex_unlock(&core->lock);
>> +
>> + freq = max(freq_core0, freq_core1);
>> +
>> + ret = clk_set_rate(clk, freq);
>> + if (ret)
>> + goto err;
>> +
>> + ret = clk_set_rate(core->core0_clk, freq);
>> + if (ret)
>> + goto err;
>> +
>> + ret = clk_set_rate(core->core1_clk, freq);
>> + if (ret)
>> + goto err;
>> +
>> + return 0;
>> +
>> +err:
>> + dev_err(dev, "failed to set clock rate %lu (%d)\n", freq, ret);
>> + return ret;
>> +}
>> +
>> +static int load_scale_clocks(struct venus_inst *inst)
>> +{
>> + if (IS_V4(inst->core))
>> + return scale_clocks_v4(inst);
>> +
>> + return scale_clocks(inst);
>> +}
>> +
>> static void fill_buffer_desc(const struct venus_buffer *buf,
>> struct hfi_buffer_desc *bd, bool response)
>> {
>> @@ -715,35 +796,36 @@ int venus_helper_set_core_usage(struct
>> venus_inst *inst, u32 usage)
>> }
>> EXPORT_SYMBOL_GPL(venus_helper_set_core_usage);
>>
>> -int venus_helper_init_codec_data(struct venus_inst *inst)
>> +int venus_helper_init_codec_freq_data(struct venus_inst *inst)
>> {
>> - const struct codec_data *codec_data;
>> - unsigned int i, codec_data_size;
>
> those deletions shouldn't exist once you fix the git rebase issue.
>
>> + const struct codec_freq_data *codec_freq_data;
>> + unsigned int i, codec_freq_data_size;
>
> could you rename the variables to shorter?
>
Yes
>> u32 pixfmt;
>> int ret = 0;
>>
>> if (!IS_V4(inst->core))
>> return 0;
>>
>> - codec_data = inst->core->res->codec_data;
>> - codec_data_size = inst->core->res->codec_data_size;
>> + codec_freq_data = inst->core->res->codec_freq_data;
>> + codec_freq_data_size = inst->core->res->codec_freq_data_size;
>> pixfmt = inst->session_type == VIDC_SESSION_TYPE_DEC ?
>> inst->fmt_out->pixfmt : inst->fmt_cap->pixfmt;
>>
>> - for (i = 0; i < codec_data_size; i++) {
>> - if (codec_data[i].pixfmt == pixfmt &&
>> - codec_data[i].session_type == inst->session_type) {
>> - inst->clk_data.codec_data = &codec_data[i];
>> + for (i = 0; i < codec_freq_data_size; i++) {
>> + if (codec_freq_data[i].pixfmt == pixfmt &&
>> + codec_freq_data[i].session_type == inst->session_type) {
>> + inst->clk_data.codec_freq_data =
>> + &codec_freq_data[i];
>> break;
>> }
>> }
>>
>> - if (!inst->clk_data.codec_data)
>> + if (!inst->clk_data.codec_freq_data)
>> ret = -EINVAL;
>>
>> return ret;
>> }
>> -EXPORT_SYMBOL_GPL(venus_helper_init_codec_data);
>> +EXPORT_SYMBOL_GPL(venus_helper_init_codec_freq_data);
>>

Thanks,
Aniket