2019-07-24 03:22:06

by Huazhong Tan

[permalink] [raw]
Subject: [PATCH net-next 08/11] net: hns3: add interrupt affinity support for misc interrupt

From: Yunsheng Lin <[email protected]>

The misc interrupt is used to schedule the reset and mailbox
subtask, and a 1 sec timer is used to schedule the service
subtask, which does periodic work.

This patch sets the above three subtask's affinity using the
misc interrupt' affinity.

Also this patch setups a affinity notify for misc interrupt to
allow user to change the above three subtask's affinity.

Signed-off-by: Yunsheng Lin <[email protected]>
Signed-off-by: Peng Li <[email protected]>
Signed-off-by: Huazhong Tan <[email protected]>
---
.../ethernet/hisilicon/hns3/hns3pf/hclge_main.c | 59 ++++++++++++++++++++--
.../ethernet/hisilicon/hns3/hns3pf/hclge_main.h | 4 ++
2 files changed, 59 insertions(+), 4 deletions(-)

diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
index f345095..fe45986 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
@@ -1270,6 +1270,12 @@ static int hclge_configure(struct hclge_dev *hdev)

hclge_init_kdump_kernel_config(hdev);

+ /* Set the init affinity based on pci func number */
+ i = cpumask_weight(cpumask_of_node(dev_to_node(&hdev->pdev->dev)));
+ i = i ? PCI_FUNC(hdev->pdev->devfn) % i : 0;
+ cpumask_set_cpu(cpumask_local_spread(i, dev_to_node(&hdev->pdev->dev)),
+ &hdev->affinity_mask);
+
return ret;
}

@@ -2502,14 +2508,16 @@ static void hclge_mbx_task_schedule(struct hclge_dev *hdev)
{
if (!test_bit(HCLGE_STATE_CMD_DISABLE, &hdev->state) &&
!test_and_set_bit(HCLGE_STATE_MBX_SERVICE_SCHED, &hdev->state))
- schedule_work(&hdev->mbx_service_task);
+ queue_work_on(cpumask_first(&hdev->affinity_mask), system_wq,
+ &hdev->mbx_service_task);
}

static void hclge_reset_task_schedule(struct hclge_dev *hdev)
{
if (!test_bit(HCLGE_STATE_REMOVING, &hdev->state) &&
!test_and_set_bit(HCLGE_STATE_RST_SERVICE_SCHED, &hdev->state))
- schedule_work(&hdev->rst_service_task);
+ queue_work_on(cpumask_first(&hdev->affinity_mask), system_wq,
+ &hdev->rst_service_task);
}

static void hclge_task_schedule(struct hclge_dev *hdev)
@@ -2517,7 +2525,8 @@ static void hclge_task_schedule(struct hclge_dev *hdev)
if (!test_bit(HCLGE_STATE_DOWN, &hdev->state) &&
!test_bit(HCLGE_STATE_REMOVING, &hdev->state) &&
!test_and_set_bit(HCLGE_STATE_SERVICE_SCHED, &hdev->state))
- (void)schedule_work(&hdev->service_task);
+ queue_work_on(cpumask_first(&hdev->affinity_mask), system_wq,
+ &hdev->service_task);
}

static int hclge_get_mac_link_status(struct hclge_dev *hdev)
@@ -2921,6 +2930,39 @@ static void hclge_get_misc_vector(struct hclge_dev *hdev)
hdev->num_msi_used += 1;
}

+static void hclge_irq_affinity_notify(struct irq_affinity_notify *notify,
+ const cpumask_t *mask)
+{
+ struct hclge_dev *hdev = container_of(notify, struct hclge_dev,
+ affinity_notify);
+
+ cpumask_copy(&hdev->affinity_mask, mask);
+ del_timer_sync(&hdev->service_timer);
+ hdev->service_timer.expires = jiffies + HZ;
+ add_timer_on(&hdev->service_timer, cpumask_first(&hdev->affinity_mask));
+}
+
+static void hclge_irq_affinity_release(struct kref *ref)
+{
+}
+
+static void hclge_misc_affinity_setup(struct hclge_dev *hdev)
+{
+ irq_set_affinity_hint(hdev->misc_vector.vector_irq,
+ &hdev->affinity_mask);
+
+ hdev->affinity_notify.notify = hclge_irq_affinity_notify;
+ hdev->affinity_notify.release = hclge_irq_affinity_release;
+ irq_set_affinity_notifier(hdev->misc_vector.vector_irq,
+ &hdev->affinity_notify);
+}
+
+static void hclge_misc_affinity_teardown(struct hclge_dev *hdev)
+{
+ irq_set_affinity_notifier(hdev->misc_vector.vector_irq, NULL);
+ irq_set_affinity_hint(hdev->misc_vector.vector_irq, NULL);
+}
+
static int hclge_misc_irq_init(struct hclge_dev *hdev)
{
int ret;
@@ -6151,7 +6193,10 @@ static void hclge_set_timer_task(struct hnae3_handle *handle, bool enable)
struct hclge_dev *hdev = vport->back;

if (enable) {
- mod_timer(&hdev->service_timer, jiffies + HZ);
+ del_timer_sync(&hdev->service_timer);
+ hdev->service_timer.expires = jiffies + HZ;
+ add_timer_on(&hdev->service_timer,
+ cpumask_first(&hdev->affinity_mask));
} else {
del_timer_sync(&hdev->service_timer);
cancel_work_sync(&hdev->service_task);
@@ -8809,6 +8854,11 @@ static int hclge_init_ae_dev(struct hnae3_ae_dev *ae_dev)
INIT_WORK(&hdev->rst_service_task, hclge_reset_service_task);
INIT_WORK(&hdev->mbx_service_task, hclge_mailbox_service_task);

+ /* Setup affinity after service timer setup because add_timer_on
+ * is called in affinity notify.
+ */
+ hclge_misc_affinity_setup(hdev);
+
hclge_clear_all_event_cause(hdev);
hclge_clear_resetting_state(hdev);

@@ -8970,6 +9020,7 @@ static void hclge_uninit_ae_dev(struct hnae3_ae_dev *ae_dev)
struct hclge_dev *hdev = ae_dev->priv;
struct hclge_mac *mac = &hdev->hw.mac;

+ hclge_misc_affinity_teardown(hdev);
hclge_state_uninit(hdev);

if (mac->phydev)
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h
index 6a12285..14df23c 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h
@@ -864,6 +864,10 @@ struct hclge_dev {

DECLARE_KFIFO(mac_tnl_log, struct hclge_mac_tnl_stats,
HCLGE_MAC_TNL_LOG_SIZE);
+
+ /* affinity mask and notify for misc interrupt */
+ cpumask_t affinity_mask;
+ struct irq_affinity_notify affinity_notify;
};

/* VPort level vlan tag configuration for TX direction */
--
2.7.4


2019-07-24 20:37:36

by Saeed Mahameed

[permalink] [raw]
Subject: Re: [PATCH net-next 08/11] net: hns3: add interrupt affinity support for misc interrupt

On Wed, 2019-07-24 at 11:18 +0800, Huazhong Tan wrote:
> From: Yunsheng Lin <[email protected]>
>
> The misc interrupt is used to schedule the reset and mailbox
> subtask, and a 1 sec timer is used to schedule the service
> subtask, which does periodic work.
>
> This patch sets the above three subtask's affinity using the
> misc interrupt' affinity.
>
> Also this patch setups a affinity notify for misc interrupt to
> allow user to change the above three subtask's affinity.
>
> Signed-off-by: Yunsheng Lin <[email protected]>
> Signed-off-by: Peng Li <[email protected]>
> Signed-off-by: Huazhong Tan <[email protected]>
> ---
> .../ethernet/hisilicon/hns3/hns3pf/hclge_main.c | 59
> ++++++++++++++++++++--
> .../ethernet/hisilicon/hns3/hns3pf/hclge_main.h | 4 ++
> 2 files changed, 59 insertions(+), 4 deletions(-)
>
> diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
> b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
> index f345095..fe45986 100644
> --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
> +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
> @@ -1270,6 +1270,12 @@ static int hclge_configure(struct hclge_dev
> *hdev)
>
> hclge_init_kdump_kernel_config(hdev);
>
> + /* Set the init affinity based on pci func number */
> + i = cpumask_weight(cpumask_of_node(dev_to_node(&hdev->pdev-
> >dev)));
> + i = i ? PCI_FUNC(hdev->pdev->devfn) % i : 0;
> + cpumask_set_cpu(cpumask_local_spread(i, dev_to_node(&hdev-
> >pdev->dev)),
> + &hdev->affinity_mask);
> +
> return ret;
> }
>
> @@ -2502,14 +2508,16 @@ static void hclge_mbx_task_schedule(struct
> hclge_dev *hdev)
> {
> if (!test_bit(HCLGE_STATE_CMD_DISABLE, &hdev->state) &&
> !test_and_set_bit(HCLGE_STATE_MBX_SERVICE_SCHED, &hdev-
> >state))
> - schedule_work(&hdev->mbx_service_task);
> + queue_work_on(cpumask_first(&hdev->affinity_mask),
> system_wq,
> + &hdev->mbx_service_task);
> }
>
> static void hclge_reset_task_schedule(struct hclge_dev *hdev)
> {
> if (!test_bit(HCLGE_STATE_REMOVING, &hdev->state) &&
> !test_and_set_bit(HCLGE_STATE_RST_SERVICE_SCHED, &hdev-
> >state))
> - schedule_work(&hdev->rst_service_task);
> + queue_work_on(cpumask_first(&hdev->affinity_mask),
> system_wq,
> + &hdev->rst_service_task);
> }
>
> static void hclge_task_schedule(struct hclge_dev *hdev)
> @@ -2517,7 +2525,8 @@ static void hclge_task_schedule(struct
> hclge_dev *hdev)
> if (!test_bit(HCLGE_STATE_DOWN, &hdev->state) &&
> !test_bit(HCLGE_STATE_REMOVING, &hdev->state) &&
> !test_and_set_bit(HCLGE_STATE_SERVICE_SCHED, &hdev->state))
> - (void)schedule_work(&hdev->service_task);
> + queue_work_on(cpumask_first(&hdev->affinity_mask),
> system_wq,
> + &hdev->service_task);
> }
>
> static int hclge_get_mac_link_status(struct hclge_dev *hdev)
> @@ -2921,6 +2930,39 @@ static void hclge_get_misc_vector(struct
> hclge_dev *hdev)
> hdev->num_msi_used += 1;
> }
>
> +static void hclge_irq_affinity_notify(struct irq_affinity_notify
> *notify,
> + const cpumask_t *mask)
> +{
> + struct hclge_dev *hdev = container_of(notify, struct hclge_dev,
> + affinity_notify);
> +
> + cpumask_copy(&hdev->affinity_mask, mask);
> + del_timer_sync(&hdev->service_timer);
> + hdev->service_timer.expires = jiffies + HZ;
> + add_timer_on(&hdev->service_timer, cpumask_first(&hdev-
> >affinity_mask));
> +}

I don't see any relation between your misc irq vector and &hdev-
>service_timer, to me this looks like an abuse of the irq affinity API
to allow the user to move the service timer affinity.

> +
> +static void hclge_irq_affinity_release(struct kref *ref)
> +{
> +}
> +
> +static void hclge_misc_affinity_setup(struct hclge_dev *hdev)
> +{
> + irq_set_affinity_hint(hdev->misc_vector.vector_irq,
> + &hdev->affinity_mask);
> +
> + hdev->affinity_notify.notify = hclge_irq_affinity_notify;
> + hdev->affinity_notify.release = hclge_irq_affinity_release;
> + irq_set_affinity_notifier(hdev->misc_vector.vector_irq,
> + &hdev->affinity_notify);
> +}
> +
> +static void hclge_misc_affinity_teardown(struct hclge_dev *hdev)
> +{
> + irq_set_affinity_notifier(hdev->misc_vector.vector_irq, NULL);
> + irq_set_affinity_hint(hdev->misc_vector.vector_irq, NULL);
> +}
> +
> static int hclge_misc_irq_init(struct hclge_dev *hdev)
> {
> int ret;
> @@ -6151,7 +6193,10 @@ static void hclge_set_timer_task(struct
> hnae3_handle *handle, bool enable)
> struct hclge_dev *hdev = vport->back;
>
> if (enable) {
> - mod_timer(&hdev->service_timer, jiffies + HZ);
> + del_timer_sync(&hdev->service_timer);
> + hdev->service_timer.expires = jiffies + HZ;
> + add_timer_on(&hdev->service_timer,
> + cpumask_first(&hdev->affinity_mask));
> } else {
> del_timer_sync(&hdev->service_timer);
> cancel_work_sync(&hdev->service_task);
> @@ -8809,6 +8854,11 @@ static int hclge_init_ae_dev(struct
> hnae3_ae_dev *ae_dev)
> INIT_WORK(&hdev->rst_service_task, hclge_reset_service_task);
> INIT_WORK(&hdev->mbx_service_task, hclge_mailbox_service_task);
>
> + /* Setup affinity after service timer setup because
> add_timer_on
> + * is called in affinity notify.
> + */
> + hclge_misc_affinity_setup(hdev);
> +
> hclge_clear_all_event_cause(hdev);
> hclge_clear_resetting_state(hdev);
>
> @@ -8970,6 +9020,7 @@ static void hclge_uninit_ae_dev(struct
> hnae3_ae_dev *ae_dev)
> struct hclge_dev *hdev = ae_dev->priv;
> struct hclge_mac *mac = &hdev->hw.mac;
>
> + hclge_misc_affinity_teardown(hdev);
> hclge_state_uninit(hdev);
>
> if (mac->phydev)
> diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h
> b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h
> index 6a12285..14df23c 100644
> --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h
> +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h
> @@ -864,6 +864,10 @@ struct hclge_dev {
>
> DECLARE_KFIFO(mac_tnl_log, struct hclge_mac_tnl_stats,
> HCLGE_MAC_TNL_LOG_SIZE);
> +
> + /* affinity mask and notify for misc interrupt */
> + cpumask_t affinity_mask;
> + struct irq_affinity_notify affinity_notify;
> };
>
> /* VPort level vlan tag configuration for TX direction */

2019-07-25 05:51:49

by Yunsheng Lin

[permalink] [raw]
Subject: Re: [PATCH net-next 08/11] net: hns3: add interrupt affinity support for misc interrupt

On 2019/7/25 3:24, Saeed Mahameed wrote:
> On Wed, 2019-07-24 at 11:18 +0800, Huazhong Tan wrote:
>> From: Yunsheng Lin <[email protected]>
>>
>> The misc interrupt is used to schedule the reset and mailbox
>> subtask, and a 1 sec timer is used to schedule the service
>> subtask, which does periodic work.
>>
>> This patch sets the above three subtask's affinity using the
>> misc interrupt' affinity.
>>
>> Also this patch setups a affinity notify for misc interrupt to
>> allow user to change the above three subtask's affinity.
>>
>> Signed-off-by: Yunsheng Lin <[email protected]>
>> Signed-off-by: Peng Li <[email protected]>
>> Signed-off-by: Huazhong Tan <[email protected]>
>> ---
>> .../ethernet/hisilicon/hns3/hns3pf/hclge_main.c | 59
>> ++++++++++++++++++++--
>> .../ethernet/hisilicon/hns3/hns3pf/hclge_main.h | 4 ++
>> 2 files changed, 59 insertions(+), 4 deletions(-)
>>
>> diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
>> b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
>> index f345095..fe45986 100644
>> --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
>> +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
>> @@ -1270,6 +1270,12 @@ static int hclge_configure(struct hclge_dev
>> *hdev)
>>
>> hclge_init_kdump_kernel_config(hdev);
>>
>> + /* Set the init affinity based on pci func number */
>> + i = cpumask_weight(cpumask_of_node(dev_to_node(&hdev->pdev-
>>> dev)));
>> + i = i ? PCI_FUNC(hdev->pdev->devfn) % i : 0;
>> + cpumask_set_cpu(cpumask_local_spread(i, dev_to_node(&hdev-
>>> pdev->dev)),
>> + &hdev->affinity_mask);
>> +
>> return ret;
>> }
>>
>> @@ -2502,14 +2508,16 @@ static void hclge_mbx_task_schedule(struct
>> hclge_dev *hdev)
>> {
>> if (!test_bit(HCLGE_STATE_CMD_DISABLE, &hdev->state) &&
>> !test_and_set_bit(HCLGE_STATE_MBX_SERVICE_SCHED, &hdev-
>>> state))
>> - schedule_work(&hdev->mbx_service_task);
>> + queue_work_on(cpumask_first(&hdev->affinity_mask),
>> system_wq,
>> + &hdev->mbx_service_task);
>> }
>>
>> static void hclge_reset_task_schedule(struct hclge_dev *hdev)
>> {
>> if (!test_bit(HCLGE_STATE_REMOVING, &hdev->state) &&
>> !test_and_set_bit(HCLGE_STATE_RST_SERVICE_SCHED, &hdev-
>>> state))
>> - schedule_work(&hdev->rst_service_task);
>> + queue_work_on(cpumask_first(&hdev->affinity_mask),
>> system_wq,
>> + &hdev->rst_service_task);
>> }
>>
>> static void hclge_task_schedule(struct hclge_dev *hdev)
>> @@ -2517,7 +2525,8 @@ static void hclge_task_schedule(struct
>> hclge_dev *hdev)
>> if (!test_bit(HCLGE_STATE_DOWN, &hdev->state) &&
>> !test_bit(HCLGE_STATE_REMOVING, &hdev->state) &&
>> !test_and_set_bit(HCLGE_STATE_SERVICE_SCHED, &hdev->state))
>> - (void)schedule_work(&hdev->service_task);
>> + queue_work_on(cpumask_first(&hdev->affinity_mask),
>> system_wq,
>> + &hdev->service_task);
>> }
>>
>> static int hclge_get_mac_link_status(struct hclge_dev *hdev)
>> @@ -2921,6 +2930,39 @@ static void hclge_get_misc_vector(struct
>> hclge_dev *hdev)
>> hdev->num_msi_used += 1;
>> }
>>
>> +static void hclge_irq_affinity_notify(struct irq_affinity_notify
>> *notify,
>> + const cpumask_t *mask)
>> +{
>> + struct hclge_dev *hdev = container_of(notify, struct hclge_dev,
>> + affinity_notify);
>> +
>> + cpumask_copy(&hdev->affinity_mask, mask);
>> + del_timer_sync(&hdev->service_timer);
>> + hdev->service_timer.expires = jiffies + HZ;
>> + add_timer_on(&hdev->service_timer, cpumask_first(&hdev-
>>> affinity_mask));
>> +}
>
> I don't see any relation between your misc irq vector and &hdev-
>> service_timer, to me this looks like an abuse of the irq affinity API
> to allow the user to move the service timer affinity.

Hi, thanks for reviewing.

hdev->service_timer is used to schedule the periodic work
queue hdev->service_task, we want all the management work
queue including hdev->service_task to bind to the same cpu
to improve cache and power efficiency, it is better to move
service timer affinity according to that.

The hdev->service_task is changed to delay work queue in
next patch " net: hns3: make hclge_service use delayed workqueue",
So the affinity in the timer of the delay work queue is automatically
set to the affinity of the delay work queue, we will move the
"make hclge_service use delayed workqueue" patch before the
"add interrupt affinity support for misc interrupt" patch, so
we do not have to set service timer affinity explicitly.

Also, There is there work queues(mbx_service_task, service_task,
rst_service_task) in the hns3 driver, we plan to combine them
to one or two workqueue to improve efficiency and readability.

>
>> +
>> +static void hclge_irq_affinity_release(struct kref *ref)
>> +{
>> +}
>> +
>> +static void hclge_misc_affinity_setup(struct hclge_dev *hdev)
>> +{
>> + irq_set_affinity_hint(hdev->misc_vector.vector_irq,
>> + &hdev->affinity_mask);
>> +
>> + hdev->affinity_notify.notify = hclge_irq_affinity_notify;
>> + hdev->affinity_notify.release = hclge_irq_affinity_release;
>> + irq_set_affinity_notifier(hdev->misc_vector.vector_irq,
>> + &hdev->affinity_notify);
>> +}
>> +
>> +static void hclge_misc_affinity_teardown(struct hclge_dev *hdev)
>> +{
>> + irq_set_affinity_notifier(hdev->misc_vector.vector_irq, NULL);
>> + irq_set_affinity_hint(hdev->misc_vector.vector_irq, NULL);
>> +}
>> +
>> static int hclge_misc_irq_init(struct hclge_dev *hdev)
>> {
>> int ret;
>> @@ -6151,7 +6193,10 @@ static void hclge_set_timer_task(struct
>> hnae3_handle *handle, bool enable)
>> struct hclge_dev *hdev = vport->back;
>>
>> if (enable) {
>> - mod_timer(&hdev->service_timer, jiffies + HZ);
>> + del_timer_sync(&hdev->service_timer);
>> + hdev->service_timer.expires = jiffies + HZ;
>> + add_timer_on(&hdev->service_timer,
>> + cpumask_first(&hdev->affinity_mask));
>> } else {
>> del_timer_sync(&hdev->service_timer);
>> cancel_work_sync(&hdev->service_task);
>> @@ -8809,6 +8854,11 @@ static int hclge_init_ae_dev(struct
>> hnae3_ae_dev *ae_dev)
>> INIT_WORK(&hdev->rst_service_task, hclge_reset_service_task);
>> INIT_WORK(&hdev->mbx_service_task, hclge_mailbox_service_task);
>>
>> + /* Setup affinity after service timer setup because
>> add_timer_on
>> + * is called in affinity notify.
>> + */
>> + hclge_misc_affinity_setup(hdev);
>> +
>> hclge_clear_all_event_cause(hdev);
>> hclge_clear_resetting_state(hdev);
>>
>> @@ -8970,6 +9020,7 @@ static void hclge_uninit_ae_dev(struct
>> hnae3_ae_dev *ae_dev)
>> struct hclge_dev *hdev = ae_dev->priv;
>> struct hclge_mac *mac = &hdev->hw.mac;
>>
>> + hclge_misc_affinity_teardown(hdev);
>> hclge_state_uninit(hdev);
>>
>> if (mac->phydev)
>> diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h
>> b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h
>> index 6a12285..14df23c 100644
>> --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h
>> +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h
>> @@ -864,6 +864,10 @@ struct hclge_dev {
>>
>> DECLARE_KFIFO(mac_tnl_log, struct hclge_mac_tnl_stats,
>> HCLGE_MAC_TNL_LOG_SIZE);
>> +
>> + /* affinity mask and notify for misc interrupt */
>> + cpumask_t affinity_mask;
>> + struct irq_affinity_notify affinity_notify;
>> };
>>
>> /* VPort level vlan tag configuration for TX direction */

2019-07-26 23:26:42

by Saeed Mahameed

[permalink] [raw]
Subject: Re: [PATCH net-next 08/11] net: hns3: add interrupt affinity support for misc interrupt

On Thu, 2019-07-25 at 10:05 +0800, Yunsheng Lin wrote:
> On 2019/7/25 3:24, Saeed Mahameed wrote:
> > On Wed, 2019-07-24 at 11:18 +0800, Huazhong Tan wrote:
> > > From: Yunsheng Lin <[email protected]>
> > >

[...]
> > >
> > > +static void hclge_irq_affinity_notify(struct irq_affinity_notify
> > > *notify,
> > > + const cpumask_t *mask)
> > > +{
> > > + struct hclge_dev *hdev = container_of(notify, struct hclge_dev,
> > > + affinity_notify);
> > > +
> > > + cpumask_copy(&hdev->affinity_mask, mask);
> > > + del_timer_sync(&hdev->service_timer);
> > > + hdev->service_timer.expires = jiffies + HZ;
> > > + add_timer_on(&hdev->service_timer, cpumask_first(&hdev-
> > > > affinity_mask));
> > > +}
> >
> > I don't see any relation between your misc irq vector and &hdev-
> > > service_timer, to me this looks like an abuse of the irq affinity
> > > API
> > to allow the user to move the service timer affinity.
>
> Hi, thanks for reviewing.
>
> hdev->service_timer is used to schedule the periodic work
> queue hdev->service_task, we want all the management work
> queue including hdev->service_task to bind to the same cpu
> to improve cache and power efficiency, it is better to move
> service timer affinity according to that.
>
> The hdev->service_task is changed to delay work queue in
> next patch " net: hns3: make hclge_service use delayed workqueue",
> So the affinity in the timer of the delay work queue is automatically
> set to the affinity of the delay work queue, we will move the
> "make hclge_service use delayed workqueue" patch before the
> "add interrupt affinity support for misc interrupt" patch, so
> we do not have to set service timer affinity explicitly.
>
> Also, There is there work queues(mbx_service_task, service_task,
> rst_service_task) in the hns3 driver, we plan to combine them
> to one or two workqueue to improve efficiency and readability.
>

So just to make it clear, you have 3 deferred works, 2 are triggered by
the misc irq and 1 is periodic, you want them all on the same core and
you want to control their affinity via the irq affinity ? for works #1
and #2 you get that for free since the irq is triggering them, but for
work #3 (the periodic one) you need to manually move it when the irq
affinity changes.

I guess i am ok with this, since moving the irq affinity isn't only
required by the periodic work but also for the other works that the irq
is actually triggering (so it is not an actual abuse, sort of .. )



2019-07-29 09:22:20

by Salil Mehta

[permalink] [raw]
Subject: RE: [PATCH net-next 08/11] net: hns3: add interrupt affinity support for misc interrupt

> From: tanhuazhong
> Sent: Wednesday, July 24, 2019 4:19 AM
> To: [email protected]
> Cc: [email protected]; [email protected]; Salil Mehta
> <[email protected]>; Zhuangyuzeng (Yisen) <[email protected]>;
> Linuxarm <[email protected]>; linyunsheng <[email protected]>; lipeng
> (Y) <[email protected]>; tanhuazhong <[email protected]>
> Subject: [PATCH net-next 08/11] net: hns3: add interrupt affinity support for
> misc interrupt
>
> From: Yunsheng Lin <[email protected]>
>
> The misc interrupt is used to schedule the reset and mailbox
> subtask, and a 1 sec timer is used to schedule the service
> subtask, which does periodic work.
>
> This patch sets the above three subtask's affinity using the
> misc interrupt' affinity.
>
> Also this patch setups a affinity notify for misc interrupt to
> allow user to change the above three subtask's affinity.
>
> Signed-off-by: Yunsheng Lin <[email protected]>
> Signed-off-by: Peng Li <[email protected]>
> Signed-off-by: Huazhong Tan <[email protected]>
> ---
> .../ethernet/hisilicon/hns3/hns3pf/hclge_main.c | 59
> ++++++++++++++++++++--
> .../ethernet/hisilicon/hns3/hns3pf/hclge_main.h | 4 ++
> 2 files changed, 59 insertions(+), 4 deletions(-)
>
> diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
> b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
> index f345095..fe45986 100644
> --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
> +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
> @@ -1270,6 +1270,12 @@ static int hclge_configure(struct hclge_dev *hdev)
>
> hclge_init_kdump_kernel_config(hdev);
>
> + /* Set the init affinity based on pci func number */
> + i = cpumask_weight(cpumask_of_node(dev_to_node(&hdev->pdev->dev)));
> + i = i ? PCI_FUNC(hdev->pdev->devfn) % i : 0;
> + cpumask_set_cpu(cpumask_local_spread(i, dev_to_node(&hdev->pdev->dev)),
> + &hdev->affinity_mask);
> +
> return ret;
> }
>
> @@ -2502,14 +2508,16 @@ static void hclge_mbx_task_schedule(struct hclge_dev
> *hdev)
> {
> if (!test_bit(HCLGE_STATE_CMD_DISABLE, &hdev->state) &&
> !test_and_set_bit(HCLGE_STATE_MBX_SERVICE_SCHED, &hdev->state))
> - schedule_work(&hdev->mbx_service_task);
> + queue_work_on(cpumask_first(&hdev->affinity_mask), system_wq,


Why we have to use system Work Queue here? This could adversely affect
other work scheduled not related to the HNS3 driver. Mailbox is internal
to the driver and depending upon utilization of the mbx channel(which could
be heavy if many VMs are running), this might stress other system tasks
as well. Have we thought of this?



> + &hdev->mbx_service_task);
> }
>
> static void hclge_reset_task_schedule(struct hclge_dev *hdev)
> {
> if (!test_bit(HCLGE_STATE_REMOVING, &hdev->state) &&
> !test_and_set_bit(HCLGE_STATE_RST_SERVICE_SCHED, &hdev->state))
> - schedule_work(&hdev->rst_service_task);
> + queue_work_on(cpumask_first(&hdev->affinity_mask), system_wq,
> + &hdev->rst_service_task);
> }
>
> static void hclge_task_schedule(struct hclge_dev *hdev)
> @@ -2517,7 +2525,8 @@ static void hclge_task_schedule(struct hclge_dev *hdev)
> if (!test_bit(HCLGE_STATE_DOWN, &hdev->state) &&
> !test_bit(HCLGE_STATE_REMOVING, &hdev->state) &&
> !test_and_set_bit(HCLGE_STATE_SERVICE_SCHED, &hdev->state))
> - (void)schedule_work(&hdev->service_task);
> + queue_work_on(cpumask_first(&hdev->affinity_mask), system_wq,

Same here.


Salil.

2019-07-29 10:19:14

by Yunsheng Lin

[permalink] [raw]
Subject: Re: [PATCH net-next 08/11] net: hns3: add interrupt affinity support for misc interrupt

On 2019/7/27 7:18, Saeed Mahameed wrote:
> On Thu, 2019-07-25 at 10:05 +0800, Yunsheng Lin wrote:
>> On 2019/7/25 3:24, Saeed Mahameed wrote:
>>> On Wed, 2019-07-24 at 11:18 +0800, Huazhong Tan wrote:
>>>> From: Yunsheng Lin <[email protected]>
>>>>
>
> [...]
>>>>
>>>> +static void hclge_irq_affinity_notify(struct irq_affinity_notify
>>>> *notify,
>>>> + const cpumask_t *mask)
>>>> +{
>>>> + struct hclge_dev *hdev = container_of(notify, struct hclge_dev,
>>>> + affinity_notify);
>>>> +
>>>> + cpumask_copy(&hdev->affinity_mask, mask);
>>>> + del_timer_sync(&hdev->service_timer);
>>>> + hdev->service_timer.expires = jiffies + HZ;
>>>> + add_timer_on(&hdev->service_timer, cpumask_first(&hdev-
>>>>> affinity_mask));
>>>> +}
>>>
>>> I don't see any relation between your misc irq vector and &hdev-
>>>> service_timer, to me this looks like an abuse of the irq affinity
>>>> API
>>> to allow the user to move the service timer affinity.
>>
>> Hi, thanks for reviewing.
>>
>> hdev->service_timer is used to schedule the periodic work
>> queue hdev->service_task, we want all the management work
>> queue including hdev->service_task to bind to the same cpu
>> to improve cache and power efficiency, it is better to move
>> service timer affinity according to that.
>>
>> The hdev->service_task is changed to delay work queue in
>> next patch " net: hns3: make hclge_service use delayed workqueue",
>> So the affinity in the timer of the delay work queue is automatically
>> set to the affinity of the delay work queue, we will move the
>> "make hclge_service use delayed workqueue" patch before the
>> "add interrupt affinity support for misc interrupt" patch, so
>> we do not have to set service timer affinity explicitly.
>>
>> Also, There is there work queues(mbx_service_task, service_task,
>> rst_service_task) in the hns3 driver, we plan to combine them
>> to one or two workqueue to improve efficiency and readability.
>>
>
> So just to make it clear, you have 3 deferred works, 2 are triggered by
> the misc irq and 1 is periodic, you want them all on the same core and
> you want to control their affinity via the irq affinity ? for works #1
> and #2 you get that for free since the irq is triggering them, but for
> work #3 (the periodic one) you need to manually move it when the irq
> affinity changes.

Yes, You are right.

>
> I guess i am ok with this, since moving the irq affinity isn't only
> required by the periodic work but also for the other works that the irq
> is actually triggering (so it is not an actual abuse, sort of .. )
>
>
>

2019-07-29 13:18:47

by Yunsheng Lin

[permalink] [raw]
Subject: Re: [PATCH net-next 08/11] net: hns3: add interrupt affinity support for misc interrupt

On 2019/7/29 17:18, Salil Mehta wrote:
>> From: tanhuazhong
>> Sent: Wednesday, July 24, 2019 4:19 AM
>> To: [email protected]
>> Cc: [email protected]; [email protected]; Salil Mehta
>> <[email protected]>; Zhuangyuzeng (Yisen) <[email protected]>;
>> Linuxarm <[email protected]>; linyunsheng <[email protected]>; lipeng
>> (Y) <[email protected]>; tanhuazhong <[email protected]>
>> Subject: [PATCH net-next 08/11] net: hns3: add interrupt affinity support for
>> misc interrupt
>>
>> From: Yunsheng Lin <[email protected]>
>>
>> The misc interrupt is used to schedule the reset and mailbox
>> subtask, and a 1 sec timer is used to schedule the service
>> subtask, which does periodic work.
>>
>> This patch sets the above three subtask's affinity using the
>> misc interrupt' affinity.
>>
>> Also this patch setups a affinity notify for misc interrupt to
>> allow user to change the above three subtask's affinity.
>>
>> Signed-off-by: Yunsheng Lin <[email protected]>
>> Signed-off-by: Peng Li <[email protected]>
>> Signed-off-by: Huazhong Tan <[email protected]>
>> ---
>> .../ethernet/hisilicon/hns3/hns3pf/hclge_main.c | 59
>> ++++++++++++++++++++--
>> .../ethernet/hisilicon/hns3/hns3pf/hclge_main.h | 4 ++
>> 2 files changed, 59 insertions(+), 4 deletions(-)
>>
>> diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
>> b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
>> index f345095..fe45986 100644
>> --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
>> +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
>> @@ -1270,6 +1270,12 @@ static int hclge_configure(struct hclge_dev *hdev)
>>
>> hclge_init_kdump_kernel_config(hdev);
>>
>> + /* Set the init affinity based on pci func number */
>> + i = cpumask_weight(cpumask_of_node(dev_to_node(&hdev->pdev->dev)));
>> + i = i ? PCI_FUNC(hdev->pdev->devfn) % i : 0;
>> + cpumask_set_cpu(cpumask_local_spread(i, dev_to_node(&hdev->pdev->dev)),
>> + &hdev->affinity_mask);
>> +
>> return ret;
>> }
>>
>> @@ -2502,14 +2508,16 @@ static void hclge_mbx_task_schedule(struct hclge_dev
>> *hdev)
>> {
>> if (!test_bit(HCLGE_STATE_CMD_DISABLE, &hdev->state) &&
>> !test_and_set_bit(HCLGE_STATE_MBX_SERVICE_SCHED, &hdev->state))
>> - schedule_work(&hdev->mbx_service_task);
>> + queue_work_on(cpumask_first(&hdev->affinity_mask), system_wq,
>
>
> Why we have to use system Work Queue here? This could adversely affect
> other work scheduled not related to the HNS3 driver. Mailbox is internal
> to the driver and depending upon utilization of the mbx channel(which could
> be heavy if many VMs are running), this might stress other system tasks
> as well. Have we thought of this?

If I understand the CMWQ correctly, it is better to reuse the system Work
Queue when the work queue needed by HNS3 driver shares the same property of
system Work Queue, because Concurrency Managed Workqueue mechnism has ensured
they have the same worker pool if they share the same property.

Some driver(i40e) allocates a work queue with WQ_MEM_RECLAIM, I am not sure
what is the usecase which requires the WQ_MEM_RECLAIM.

Anyway, If the HNS3 need to allocate a new work queue, we need to
figure out the usecase first.

>
>
>
>> + &hdev->mbx_service_task);
>> }
>>
>> static void hclge_reset_task_schedule(struct hclge_dev *hdev)
>> {
>> if (!test_bit(HCLGE_STATE_REMOVING, &hdev->state) &&
>> !test_and_set_bit(HCLGE_STATE_RST_SERVICE_SCHED, &hdev->state))
>> - schedule_work(&hdev->rst_service_task);
>> + queue_work_on(cpumask_first(&hdev->affinity_mask), system_wq,
>> + &hdev->rst_service_task);
>> }
>>
>> static void hclge_task_schedule(struct hclge_dev *hdev)
>> @@ -2517,7 +2525,8 @@ static void hclge_task_schedule(struct hclge_dev *hdev)
>> if (!test_bit(HCLGE_STATE_DOWN, &hdev->state) &&
>> !test_bit(HCLGE_STATE_REMOVING, &hdev->state) &&
>> !test_and_set_bit(HCLGE_STATE_SERVICE_SCHED, &hdev->state))
>> - (void)schedule_work(&hdev->service_task);
>> + queue_work_on(cpumask_first(&hdev->affinity_mask), system_wq,
>
> Same here.
>
>
> Salil.
>
> .
>