LinuxLists.cc - [PATCH] nvme: Add support for Apple 2018+ models

2019-07-15 03:50:42

Subject: [PATCH] nvme: Add support for Apple 2018+ models

Based on reverse engineering and original patch by

Paul Pawlowski <[email protected]>

This adds support for Apple weird implementation of NVME in their
2018 or later machines. It accounts for the twice-as-big SQ entries
for the IO queues, and the fact that only interrupt vector 0 appears
to function properly.

Signed-off-by: Benjamin Herrenschmidt <[email protected]>
---

I reworked Paul's patch to be less invasive in nvme_submit_cmd()
hot path, effectively only adding a shift with a value hopefully
coming from the same cache line as existing stuff.

It could probably be made even less by making sq_extra_shift be
instead "sq_shift" and contain the complete shift between entries,
ie, 6 or 7, and then replacing the memcpy to

&nvmeq->sq_cmds[nvmeq->sq_tail]

With something like

((char *)nvmeq->sq_cmds) + ((size_t)nvmeq->sq_tail) << nvmeq->sq_shift

But I doubt the difference will be measurable anywhere and it makes
the code grosser imho.

Note: I'm not subscribed to linux-nvme, please CC me on replies.

drivers/nvme/host/nvme.h | 5 ++++
drivers/nvme/host/pci.c | 59 ++++++++++++++++++++++++++++------------
2 files changed, 47 insertions(+), 17 deletions(-)

diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h
index 55553d293a98..9ae53cbfb320 100644
--- a/drivers/nvme/host/nvme.h
+++ b/drivers/nvme/host/nvme.h
@@ -92,6 +92,11 @@ enum nvme_quirks {
* Broken Write Zeroes.
*/
NVME_QUIRK_DISABLE_WRITE_ZEROES = (1 << 9),
+
+ /*
+ * Apple 2018 and latter variant has a few issues
+ */
+ NVME_QUIRK_APPLE_2018 = (1 << 10),
};

/*
diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c
index 524d6bd6d095..1a41412fc48b 100644
--- a/drivers/nvme/host/pci.c
+++ b/drivers/nvme/host/pci.c
@@ -27,8 +27,8 @@
#include "trace.h"
#include "nvme.h"

-#define SQ_SIZE(depth) (depth * sizeof(struct nvme_command))
-#define CQ_SIZE(depth) (depth * sizeof(struct nvme_completion))
+#define SQ_SIZE(q) ((((size_t)(q)->q_depth) << (q)->sq_extra_shift) * sizeof(struct nvme_command))
+#define CQ_SIZE(q) (((size_t)(q)->q_depth) * sizeof(struct nvme_completion))

#define SGES_PER_PAGE (PAGE_SIZE / sizeof(struct nvme_sgl_desc))

@@ -195,6 +195,7 @@ struct nvme_queue {
u16 last_cq_head;
u16 qid;
u8 cq_phase;
+ u8 sq_extra_shift;
unsigned long flags;
#define NVMEQ_ENABLED 0
#define NVMEQ_SQ_CMB 1
@@ -504,8 +505,11 @@ static inline void nvme_write_sq_db(struct nvme_queue *nvmeq, bool write_sq)
static void nvme_submit_cmd(struct nvme_queue *nvmeq, struct nvme_command *cmd,
bool write_sq)
{
+ u16 sq_actual_pos;
+
spin_lock(&nvmeq->sq_lock);
- memcpy(&nvmeq->sq_cmds[nvmeq->sq_tail], cmd, sizeof(*cmd));
+ sq_actual_pos = nvmeq->sq_tail << nvmeq->sq_extra_shift;
+ memcpy(&nvmeq->sq_cmds[sq_actual_pos], cmd, sizeof(*cmd));
if (++nvmeq->sq_tail == nvmeq->q_depth)
nvmeq->sq_tail = 0;
nvme_write_sq_db(nvmeq, write_sq);
@@ -1361,16 +1365,16 @@ static enum blk_eh_timer_return nvme_timeout(struct request *req, bool reserved)

static void nvme_free_queue(struct nvme_queue *nvmeq)
{
- dma_free_coherent(nvmeq->dev->dev, CQ_SIZE(nvmeq->q_depth),
+ dma_free_coherent(nvmeq->dev->dev, CQ_SIZE(nvmeq),
(void *)nvmeq->cqes, nvmeq->cq_dma_addr);
if (!nvmeq->sq_cmds)
return;

if (test_and_clear_bit(NVMEQ_SQ_CMB, &nvmeq->flags)) {
pci_free_p2pmem(to_pci_dev(nvmeq->dev->dev),
- nvmeq->sq_cmds, SQ_SIZE(nvmeq->q_depth));
+ nvmeq->sq_cmds, SQ_SIZE(nvmeq));
} else {
- dma_free_coherent(nvmeq->dev->dev, SQ_SIZE(nvmeq->q_depth),
+ dma_free_coherent(nvmeq->dev->dev, SQ_SIZE(nvmeq),
nvmeq->sq_cmds, nvmeq->sq_dma_addr);
}
}
@@ -1450,12 +1454,12 @@ static int nvme_cmb_qdepth(struct nvme_dev *dev, int nr_io_queues,
}

static int nvme_alloc_sq_cmds(struct nvme_dev *dev, struct nvme_queue *nvmeq,
- int qid, int depth)
+ int qid)
{
struct pci_dev *pdev = to_pci_dev(dev->dev);

if (qid && dev->cmb_use_sqes && (dev->cmbsz & NVME_CMBSZ_SQS)) {
- nvmeq->sq_cmds = pci_alloc_p2pmem(pdev, SQ_SIZE(depth));
+ nvmeq->sq_cmds = pci_alloc_p2pmem(pdev, SQ_SIZE(nvmeq));
nvmeq->sq_dma_addr = pci_p2pmem_virt_to_bus(pdev,
nvmeq->sq_cmds);
if (nvmeq->sq_dma_addr) {
@@ -1464,7 +1468,7 @@ static int nvme_alloc_sq_cmds(struct nvme_dev *dev, struct nvme_queue *nvmeq,
}
}

- nvmeq->sq_cmds = dma_alloc_coherent(dev->dev, SQ_SIZE(depth),
+ nvmeq->sq_cmds = dma_alloc_coherent(dev->dev, SQ_SIZE(nvmeq),
&nvmeq->sq_dma_addr, GFP_KERNEL);
if (!nvmeq->sq_cmds)
return -ENOMEM;
@@ -1478,12 +1482,24 @@ static int nvme_alloc_queue(struct nvme_dev *dev, int qid, int depth)
if (dev->ctrl.queue_count > qid)
return 0;

- nvmeq->cqes = dma_alloc_coherent(dev->dev, CQ_SIZE(depth),
+ /*
+ * On Apple 2018 or later implementations, the IO submission
+ * queue(s) have twice as large entries. Current implementations
+ * seem to only have qid 1, let's assume this is true of all IO
+ * queues until we know better.
+ */
+ if (qid && (dev->ctrl.quirks & NVME_QUIRK_APPLE_2018))
+ nvmeq->sq_extra_shift = 1;
+ else
+ nvmeq->sq_extra_shift = 0;
+
+ nvmeq->q_depth = depth;
+ nvmeq->cqes = dma_alloc_coherent(dev->dev, CQ_SIZE(nvmeq),
&nvmeq->cq_dma_addr, GFP_KERNEL);
if (!nvmeq->cqes)
goto free_nvmeq;

- if (nvme_alloc_sq_cmds(dev, nvmeq, qid, depth))
+ if (nvme_alloc_sq_cmds(dev, nvmeq, qid))
goto free_cqdma;

nvmeq->dev = dev;
@@ -1492,15 +1508,14 @@ static int nvme_alloc_queue(struct nvme_dev *dev, int qid, int depth)
nvmeq->cq_head = 0;
nvmeq->cq_phase = 1;
nvmeq->q_db = &dev->dbs[qid * 2 * dev->db_stride];
- nvmeq->q_depth = depth;
nvmeq->qid = qid;
dev->ctrl.queue_count++;

return 0;

free_cqdma:
- dma_free_coherent(dev->dev, CQ_SIZE(depth), (void *)nvmeq->cqes,
- nvmeq->cq_dma_addr);
+ dma_free_coherent(dev->dev, CQ_SIZE(nvmeq), (void *)nvmeq->cqes,
+ nvmeq->cq_dma_addr);
free_nvmeq:
return -ENOMEM;
}
@@ -1527,8 +1542,9 @@ static void nvme_init_queue(struct nvme_queue *nvmeq, u16 qid)
nvmeq->last_sq_tail = 0;
nvmeq->cq_head = 0;
nvmeq->cq_phase = 1;
+
nvmeq->q_db = &dev->dbs[qid * 2 * dev->db_stride];
- memset((void *)nvmeq->cqes, 0, CQ_SIZE(nvmeq->q_depth));
+ memset((void *)nvmeq->cqes, 0, CQ_SIZE(nvmeq));
nvme_dbbuf_init(dev, nvmeq, qid);
dev->online_queues++;
wmb(); /* ensure the first interrupt sees the initialization */
@@ -1546,9 +1562,16 @@ static int nvme_create_queue(struct nvme_queue *nvmeq, int qid, bool polled)
* A queue's vector matches the queue identifier unless the controller
* has only one vector available.
*/
- if (!polled)
+ if (!polled) {
vector = dev->num_vecs == 1 ? 0 : qid;
- else
+
+ /*
+ * On Apple 2018 or later implementations, only vector 0 is accepted
+ * by the drive firmware
+ */
+ if (dev->ctrl.quirks & NVME_QUIRK_APPLE_2018)
+ vector = 0;
+ } else
set_bit(NVMEQ_POLLED, &nvmeq->flags);

result = adapter_alloc_cq(dev, qid, nvmeq, vector);
@@ -2949,6 +2972,8 @@ static const struct pci_device_id nvme_id_table[] = {
{ PCI_DEVICE_CLASS(PCI_CLASS_STORAGE_EXPRESS, 0xffffff) },
{ PCI_DEVICE(PCI_VENDOR_ID_APPLE, 0x2001) },
{ PCI_DEVICE(PCI_VENDOR_ID_APPLE, 0x2003) },
+ { PCI_DEVICE(PCI_VENDOR_ID_APPLE, 0x2005),
+ .driver_data = NVME_QUIRK_APPLE_2018},
{ 0, }
};
MODULE_DEVICE_TABLE(pci, nvme_id_table);

2019-07-15 08:11:20

by [email protected]

[permalink] [raw]

Subject: Re: [PATCH] nvme: Add support for Apple 2018+ models

> + /*
> + * Apple 2018 and latter variant has a few issues
> + */
> + NVME_QUIRK_APPLE_2018 = (1 << 10),

We try to have quirks for the actual issue, so this should be one quirk
for the irq vectors issues, and another for the sq entry size. Note that
NVMe actually has the concept of an I/O queue entry size (IOSQES in the
Cc register based on values reported in the SQES field in Identify
Controller. Do these controllers report anything interesting there?

At the very least I'd make all the terminology based on that and then
just treat the Apple controllers as a buggy implementation of that model.

Btw, are there open source darwin NVMe driver that could explain this
mess a little better?

> @@ -504,8 +505,11 @@ static inline void nvme_write_sq_db(struct nvme_queue *nvmeq, bool write_sq)
> static void nvme_submit_cmd(struct nvme_queue *nvmeq, struct nvme_command *cmd,
> bool write_sq)
> {
> + u16 sq_actual_pos;
> +
> spin_lock(&nvmeq->sq_lock);
> - memcpy(&nvmeq->sq_cmds[nvmeq->sq_tail], cmd, sizeof(*cmd));
> + sq_actual_pos = nvmeq->sq_tail << nvmeq->sq_extra_shift;
> + memcpy(&nvmeq->sq_cmds[sq_actual_pos], cmd, sizeof(*cmd));

This is a little too magic. I think we'd better off making sq_cmds
a void array and use manual indexing, at least that makes it very
obvious what is going on.

> - nvmeq->sq_cmds, SQ_SIZE(nvmeq->q_depth));
> + nvmeq->sq_cmds, SQ_SIZE(nvmeq));

Btw, chaning SQ_SIZE to take the queue seems like something that should
be split into a prep patch, making the main change a lot smaller.

> - if (!polled)
> + if (!polled) {
> +
> + /*
> + * On Apple 2018 or later implementations, only vector 0 is accepted

Please fix the > 80 char line.

2019-07-15 08:44:17

by Benjamin Herrenschmidt

[permalink] [raw]

Subject: Re: [PATCH] nvme: Add support for Apple 2018+ models

On Mon, 2019-07-15 at 10:10 +0200, Christoph Hellwig wrote:
> > + /*
> > + * Apple 2018 and latter variant has a few issues
> > + */
> > + NVME_QUIRK_APPLE_2018 = (1 << 10),
>
> We try to have quirks for the actual issue, so this should be one quirk
> for the irq vectors issues, and another for the sq entry size. Note that
> NVMe actually has the concept of an I/O queue entry size (IOSQES in the
> Cc register based on values reported in the SQES field in Identify
> Controller. Do these controllers report anything interesting there?

Ah good to know, I'll dig.

> At the very least I'd make all the terminology based on that and then
> just treat the Apple controllers as a buggy implementation of that model.

Yup, sounds good. I'll poke around tomorrow.

> Btw, are there open source darwin NVMe driver that could explain this
> mess a little better?

You wish...

> > @@ -504,8 +505,11 @@ static inline void nvme_write_sq_db(struct nvme_queue *nvmeq, bool write_sq)
> > static void nvme_submit_cmd(struct nvme_queue *nvmeq, struct nvme_command *cmd,
> > bool write_sq)
> > {
> > + u16 sq_actual_pos;
> > +
> > spin_lock(&nvmeq->sq_lock);
> > - memcpy(&nvmeq->sq_cmds[nvmeq->sq_tail], cmd, sizeof(*cmd));
> > + sq_actual_pos = nvmeq->sq_tail << nvmeq->sq_extra_shift;
> > + memcpy(&nvmeq->sq_cmds[sq_actual_pos], cmd, sizeof(*cmd));
>
> This is a little too magic. I think we'd better off making sq_cmds
> a void array and use manual indexing, at least that makes it very
> obvious what is going on.

Ok. That's plan B as I described in the message. There's an advantage
to do that, it merges the indexing shift and the quirk shift into one.

I'll look into it & respin

> > - nvmeq->sq_cmds, SQ_SIZE(nvmeq->q_depth));
> > + nvmeq->sq_cmds, SQ_SIZE(nvmeq));
>
> Btw, chaning SQ_SIZE to take the queue seems like something that should
> be split into a prep patch, making the main change a lot smaller.

Sure. Will do.

> > - if (!polled)
> > + if (!polled) {
> > +
> > + /*
> > + * On Apple 2018 or later implementations, only vector 0 is accepted
>
> Please fix the > 80 char line.

Ok.

Thanks for the review.

Cheers,
Ben.

2019-07-15 09:04:52

by Benjamin Herrenschmidt

[permalink] [raw]

Subject: Re: [PATCH] nvme: Add support for Apple 2018+ models

On Mon, 2019-07-15 at 18:43 +1000, Benjamin Herrenschmidt wrote:
> On Mon, 2019-07-15 at 10:10 +0200, Christoph Hellwig wrote:
> > > + /*
> > > + * Apple 2018 and latter variant has a few issues
> > > + */
> > > + NVME_QUIRK_APPLE_2018 = (1 << 10),
> >
> > We try to have quirks for the actual issue, so this should be one quirk
> > for the irq vectors issues, and another for the sq entry size. Note that
> > NVMe actually has the concept of an I/O queue entry size (IOSQES in the
> > Cc register based on values reported in the SQES field in Identify
> > Controller. Do these controllers report anything interesting there?
>
> Ah good to know, I'll dig.

Interesting... so SQES is 0x76, indicating that it supports the larger
entry size but not that it mandates it.

However, we configure CC:IOSQES with 6 and the HW fails unless we have
the 128 bytes entry size.

So the HW is bogus, but we can probably sort that by doing a better job
at fixing up SQES in the identify on the Apple HW, and then actually
using it for the SQ.

I checked and CC is 0x00460001 so it takes our write of "6" fine. I
think they just ignore the value.

How do you want to proceed here ? Should I go all the way at attempting
to honor sqes "mandatory" size field (and quirk *that*) or just I go
the simpler way and stick to shift 6 unless Apple ?

If I go the complicated path, should I do the same with cq size
(knowing that no known HW has a non-4 mandatory size there and we don't
know of a HW bug... yet).

Cheers,
Ben.

2019-07-15 09:29:09

by Maxim Levitsky

[permalink] [raw]

Subject: Re: [PATCH] nvme: Add support for Apple 2018+ models

On Mon, 2019-07-15 at 19:03 +1000, Benjamin Herrenschmidt wrote:
> On Mon, 2019-07-15 at 18:43 +1000, Benjamin Herrenschmidt wrote:
> > On Mon, 2019-07-15 at 10:10 +0200, Christoph Hellwig wrote:
> > > > + /*
> > > > + * Apple 2018 and latter variant has a few issues
> > > > + */
> > > > + NVME_QUIRK_APPLE_2018 = (1 << 10),
> > >
> > > We try to have quirks for the actual issue, so this should be one quirk
> > > for the irq vectors issues, and another for the sq entry size. Note that
> > > NVMe actually has the concept of an I/O queue entry size (IOSQES in the
> > > Cc register based on values reported in the SQES field in Identify
> > > Controller. Do these controllers report anything interesting there?
> >
> > Ah good to know, I'll dig.
>
> Interesting... so SQES is 0x76, indicating that it supports the larger
> entry size but not that it mandates it.
>
> However, we configure CC:IOSQES with 6 and the HW fails unless we have
> the 128 bytes entry size.
>
> So the HW is bogus, but we can probably sort that by doing a better job
> at fixing up SQES in the identify on the Apple HW, and then actually
> using it for the SQ.
>
> I checked and CC is 0x00460001 so it takes our write of "6" fine. I
> think they just ignore the value.
>
> How do you want to proceed here ? Should I go all the way at attempting
> to honor sqes "mandatory" size field (and quirk *that*) or just I go
> the simpler way and stick to shift 6 unless Apple ?
>
> If I go the complicated path, should I do the same with cq size
> (knowing that no known HW has a non-4 mandatory size there and we don't
> know of a HW bug... yet).
>
> Cheers,
> Ben.
>

To be honest, the spec explicitly states that minimum submission queue entry size is 64
and minimum completion entry size should be is 16 bytes for NVM command set:

"Bits 3:0 define the required (i.e., minimum) Submission Queue Entry size when
using the NVM Command Set. This is the minimum entry size that may be used.
The value is in bytes and is reported as a power of two (2^n). The required value
shall be 6, corresponding to 64."

"Bits 3:0 define the required (i.e., minimum) Completion Queue entry size when using
the NVM Command Set. This is the minimum entry size that may be used. The value
is in bytes and is reported as a power of two (2^n). The required value shall be 4,
corresponding to 16."

Pages 136/137, NVME 1.3d.

In theory the spec allows for non NVM IO command set, and for which the sq/cq entry sizes can be of any size,
as indicated in SQES/CQES and set in CC.IOCQES/CC.IOSQES, but than most of the spec won't apply to it.

Also FYI, values in CC (IOCQES/IOSQES) are for I/O queues, which kind of implies that admin queue,
should always use the 64/16 bytes entries, although I haven't found any explicit mention of that.

Best regards,
Maxim Levitsky

2019-07-15 10:04:01

by Benjamin Herrenschmidt

[permalink] [raw]

Subject: Re: [PATCH] nvme: Add support for Apple 2018+ models

On Mon, 2019-07-15 at 12:28 +0300, Maxim Levitsky wrote:
>
> To be honest, the spec explicitly states that minimum submission queue entry size is 64
> and minimum completion entry size should be is 16 bytes for NVM command set:
>
> "Bits 3:0 define the required (i.e., minimum) Submission Queue Entry size when
> using the NVM Command Set. This is the minimum entry size that may be used.
> The value is in bytes and is reported as a power of two (2^n). The required value
> shall be 6, corresponding to 64."

Yes, I saw that :-) Apple seems to ignore this and CC:IOSQES and
effectively hard wire a size of 7 (128 bytes) for the IO queue.

> "Bits 3:0 define the required (i.e., minimum) Completion Queue entry size when using
> the NVM Command Set. This is the minimum entry size that may be used. The value
> is in bytes and is reported as a power of two (2^n). The required value shall be 4,
> corresponding to 16."
>
> Pages 136/137, NVME 1.3d.
>
> In theory the spec allows for non NVM IO command set, and for which the sq/cq entry sizes can be of any size,
> as indicated in SQES/CQES and set in CC.IOCQES/CC.IOSQES, but than most of the spec won't apply to it.
>
>
> Also FYI, values in CC (IOCQES/IOSQES) are for I/O queues, which kind of implies that admin queue,
> should always use the 64/16 bytes entries, although I haven't found any explicit mention of that.

Right, and it does on the Apple HW as well.

Cheers,
Ben.

2019-07-15 12:31:44

by [email protected]

[permalink] [raw]

Subject: Re: [PATCH] nvme: Add support for Apple 2018+ models

On Mon, Jul 15, 2019 at 12:28:05PM +0300, Maxim Levitsky wrote:
>
> To be honest, the spec explicitly states that minimum submission queue entry size is 64
> and minimum completion entry size should be is 16 bytes for NVM command set:

That doesn't keep Apple from implementing whatever they want and soldering
that down on their MacBook mainboards unfortunately :(