MIME-Version: 1.0
References: <20230228094110.37-1-xieyongji@bytedance.com> <20230228094110.37-6-xieyongji@bytedance.com>
 <59bdd215-6465-a240-338c-04a3a67a061d@redhat.com>
In-Reply-To: <59bdd215-6465-a240-338c-04a3a67a061d@redhat.com>
From:   Yongji Xie <xieyongji@bytedance.com>
Date:   Fri, 17 Mar 2023 15:04:32 +0800
Message-ID: <CACycT3tv-GZRK0OOec7e40fT=uDMN2cef-PcN-J54RL3ZF6LTQ@mail.gmail.com>
Subject: Re: [PATCH v3 05/11] vduse: Support automatic irq callback affinity
To:     Jason Wang <jasowang@redhat.com>
Cc:     "Michael S. Tsirkin" <mst@redhat.com>,
        Thomas Gleixner <tglx@linutronix.de>,
        Christoph Hellwig <hch@lst.de>,
        virtualization <virtualization@lists.linux-foundation.org>,
        linux-kernel <linux-kernel@vger.kernel.org>
Content-Type: text/plain; charset="UTF-8"
Content-Transfer-Encoding: quoted-printable
Precedence: bulk

On Thu, Mar 16, 2023 at 5:03=E2=80=AFPM Jason Wang <jasowang@redhat.com> wr=
ote:
>
>
> =E5=9C=A8 2023/2/28 17:41, Xie Yongji =E5=86=99=E9=81=93:
> > This brings current interrupt affinity spreading mechanism
> > to vduse device. We will make use of group_cpus_evenly()
> > to create an irq callback affinity mask for each virtqueue of
> > vduse device. Then we will spread IRQs between CPUs in the affinity
> > mask, in a round-robin manner, to run the irq callback.
> >
> > Signed-off-by: Xie Yongji <xieyongji@bytedance.com>
> > ---
> >   drivers/vdpa/vdpa_user/vduse_dev.c | 130 +++++++++++++++++++++++++++-=
-
> >   1 file changed, 123 insertions(+), 7 deletions(-)
> >
> > diff --git a/drivers/vdpa/vdpa_user/vduse_dev.c b/drivers/vdpa/vdpa_use=
r/vduse_dev.c
> > index 98359d87a06f..bde28a8692d5 100644
> > --- a/drivers/vdpa/vdpa_user/vduse_dev.c
> > +++ b/drivers/vdpa/vdpa_user/vduse_dev.c
> > @@ -23,6 +23,8 @@
> >   #include <linux/nospec.h>
> >   #include <linux/vmalloc.h>
> >   #include <linux/sched/mm.h>
> > +#include <linux/interrupt.h>
> > +#include <linux/group_cpus.h>
> >   #include <uapi/linux/vduse.h>
> >   #include <uapi/linux/vdpa.h>
> >   #include <uapi/linux/virtio_config.h>
> > @@ -41,6 +43,8 @@
> >   #define VDUSE_IOVA_SIZE (128 * 1024 * 1024)
> >   #define VDUSE_MSG_DEFAULT_TIMEOUT 30
> >
> > +#define IRQ_UNBOUND -1
> > +
> >   struct vduse_virtqueue {
> >       u16 index;
> >       u16 num_max;
> > @@ -57,6 +61,8 @@ struct vduse_virtqueue {
> >       struct vdpa_callback cb;
> >       struct work_struct inject;
> >       struct work_struct kick;
> > +     int irq_effective_cpu;
> > +     struct cpumask irq_affinity;
> >   };
> >
> >   struct vduse_dev;
> > @@ -128,6 +134,7 @@ static struct class *vduse_class;
> >   static struct cdev vduse_ctrl_cdev;
> >   static struct cdev vduse_cdev;
> >   static struct workqueue_struct *vduse_irq_wq;
> > +static struct workqueue_struct *vduse_irq_bound_wq;
> >
> >   static u32 allowed_device_id[] =3D {
> >       VIRTIO_ID_BLOCK,
> > @@ -708,6 +715,82 @@ static u32 vduse_vdpa_get_generation(struct vdpa_d=
evice *vdpa)
> >       return dev->generation;
> >   }
> >
> > +static void default_calc_sets(struct irq_affinity *affd, unsigned int =
affvecs)
> > +{
> > +     affd->nr_sets =3D 1;
> > +     affd->set_size[0] =3D affvecs;
> > +}
> > +
> > +struct cpumask *
> > +create_affinity_masks(unsigned int nvecs, struct irq_affinity *affd)
> > +{
> > +     unsigned int affvecs =3D 0, curvec, usedvecs, i;
> > +     struct cpumask *masks =3D NULL;
> > +
> > +     if (nvecs > affd->pre_vectors + affd->post_vectors)
> > +             affvecs =3D nvecs - affd->pre_vectors - affd->post_vector=
s;
> > +
> > +     if (!affd->calc_sets)
> > +             affd->calc_sets =3D default_calc_sets;
> > +
> > +     affd->calc_sets(affd, affvecs);
> > +
> > +     if (!affvecs)
> > +             return NULL;
> > +
> > +     masks =3D kcalloc(nvecs, sizeof(*masks), GFP_KERNEL);
> > +     if (!masks)
> > +             return NULL;
> > +
> > +     /* Fill out vectors at the beginning that don't need affinity */
> > +     for (curvec =3D 0; curvec < affd->pre_vectors; curvec++)
> > +             cpumask_setall(&masks[curvec]);
> > +
> > +     for (i =3D 0, usedvecs =3D 0; i < affd->nr_sets; i++) {
> > +             unsigned int this_vecs =3D affd->set_size[i];
> > +             int j;
> > +             struct cpumask *result =3D group_cpus_evenly(this_vecs);
> > +
> > +             if (!result) {
> > +                     kfree(masks);
> > +                     return NULL;
> > +             }
> > +
> > +             for (j =3D 0; j < this_vecs; j++)
> > +                     cpumask_copy(&masks[curvec + j], &result[j]);
> > +             kfree(result);
> > +
> > +             curvec +=3D this_vecs;
> > +             usedvecs +=3D this_vecs;
> > +     }
> > +
> > +     /* Fill out vectors at the end that don't need affinity */
> > +     if (usedvecs >=3D affvecs)
> > +             curvec =3D affd->pre_vectors + affvecs;
> > +     else
> > +             curvec =3D affd->pre_vectors + usedvecs;
> > +     for (; curvec < nvecs; curvec++)
> > +             cpumask_setall(&masks[curvec]);
> > +
> > +     return masks;
> > +}
> > +
> > +static void vduse_vdpa_set_irq_affinity(struct vdpa_device *vdpa,
> > +                                     struct irq_affinity *desc)
> > +{
> > +     struct vduse_dev *dev =3D vdpa_to_vduse(vdpa);
> > +     struct cpumask *masks;
> > +     int i;
> > +
> > +     masks =3D create_affinity_masks(dev->vq_num, desc);
> > +     if (!masks)
> > +             return;
> > +
> > +     for (i =3D 0; i < dev->vq_num; i++)
> > +             cpumask_copy(&dev->vqs[i]->irq_affinity, &masks[i]);
> > +     kfree(masks);
> > +}
> > +
> >   static int vduse_vdpa_set_map(struct vdpa_device *vdpa,
> >                               unsigned int asid,
> >                               struct vhost_iotlb *iotlb)
> > @@ -758,6 +841,7 @@ static const struct vdpa_config_ops vduse_vdpa_conf=
ig_ops =3D {
> >       .get_config             =3D vduse_vdpa_get_config,
> >       .set_config             =3D vduse_vdpa_set_config,
> >       .get_generation         =3D vduse_vdpa_get_generation,
> > +     .set_irq_affinity       =3D vduse_vdpa_set_irq_affinity,
> >       .reset                  =3D vduse_vdpa_reset,
> >       .set_map                =3D vduse_vdpa_set_map,
> >       .free                   =3D vduse_vdpa_free,
> > @@ -917,7 +1001,8 @@ static void vduse_vq_irq_inject(struct work_struct=
 *work)
> >   }
> >
> >   static int vduse_dev_queue_irq_work(struct vduse_dev *dev,
> > -                                 struct work_struct *irq_work)
> > +                                 struct work_struct *irq_work,
> > +                                 int irq_effective_cpu)
> >   {
> >       int ret =3D -EINVAL;
> >
> > @@ -926,7 +1011,11 @@ static int vduse_dev_queue_irq_work(struct vduse_=
dev *dev,
> >               goto unlock;
> >
> >       ret =3D 0;
> > -     queue_work(vduse_irq_wq, irq_work);
> > +     if (irq_effective_cpu =3D=3D IRQ_UNBOUND)
> > +             queue_work(vduse_irq_wq, irq_work);
> > +     else
> > +             queue_work_on(irq_effective_cpu,
> > +                           vduse_irq_bound_wq, irq_work);
> >   unlock:
> >       up_read(&dev->rwsem);
> >
> > @@ -1029,6 +1118,22 @@ static int vduse_dev_reg_umem(struct vduse_dev *=
dev,
> >       return ret;
> >   }
> >
> > +static void vduse_vq_update_effective_cpu(struct vduse_virtqueue *vq)
> > +{
> > +     int curr_cpu =3D vq->irq_effective_cpu;
> > +
> > +     while (true) {
> > +             curr_cpu =3D cpumask_next(curr_cpu, &vq->irq_affinity);
> > +             if (cpu_online(curr_cpu))
> > +                     break;
> > +
> > +             if (curr_cpu >=3D nr_cpu_ids)
> > +                     curr_cpu =3D -1;
>
>
> IRQ_UNBOUND?
>

Will fix it.

>
> > +     }
> > +
> > +     vq->irq_effective_cpu =3D curr_cpu;
> > +}
> > +
> >   static long vduse_dev_ioctl(struct file *file, unsigned int cmd,
> >                           unsigned long arg)
> >   {
> > @@ -1111,7 +1216,7 @@ static long vduse_dev_ioctl(struct file *file, un=
signed int cmd,
> >               break;
> >       }
> >       case VDUSE_DEV_INJECT_CONFIG_IRQ:
> > -             ret =3D vduse_dev_queue_irq_work(dev, &dev->inject);
> > +             ret =3D vduse_dev_queue_irq_work(dev, &dev->inject, IRQ_U=
NBOUND);
> >               break;
> >       case VDUSE_VQ_SETUP: {
> >               struct vduse_vq_config config;
> > @@ -1198,7 +1303,10 @@ static long vduse_dev_ioctl(struct file *file, u=
nsigned int cmd,
> >                       break;
> >
> >               index =3D array_index_nospec(index, dev->vq_num);
> > -             ret =3D vduse_dev_queue_irq_work(dev, &dev->vqs[index]->i=
nject);
> > +
> > +             vduse_vq_update_effective_cpu(dev->vqs[index]);
> > +             ret =3D vduse_dev_queue_irq_work(dev, &dev->vqs[index]->i=
nject,
> > +                                     dev->vqs[index]->irq_effective_cp=
u);
> >               break;
> >       }
> >       case VDUSE_IOTLB_REG_UMEM: {
> > @@ -1367,10 +1475,12 @@ static int vduse_dev_init_vqs(struct vduse_dev =
*dev, u32 vq_align, u32 vq_num)
> >                       goto err;
> >
> >               dev->vqs[i]->index =3D i;
> > +             dev->vqs[i]->irq_effective_cpu =3D -1;
>
>
> IRQ_UNBOUND?
>

Will fix it.

Thanks,
Yongji