2020-02-21 03:30:07

by Alastair D'Silva

[permalink] [raw]
Subject: [PATCH v3 00/27] Add support for OpenCAPI Persistent Memory devices

From: Alastair D'Silva <[email protected]>

This series adds support for OpenCAPI Persistent Memory devices, exposing
them as nvdimms so that we can make use of the existing infrastructure.

Alastair D'Silva (27):
powerpc: Add OPAL calls for LPC memory alloc/release
mm/memory_hotplug: Allow check_hotplug_memory_addressable to be called
from drivers
powerpc: Map & release OpenCAPI LPC memory
ocxl: Remove unnecessary externs
ocxl: Address kernel doc errors & warnings
ocxl: Tally up the LPC memory on a link & allow it to be mapped
ocxl: Add functions to map/unmap LPC memory
ocxl: Emit a log message showing how much LPC memory was detected
ocxl: Save the device serial number in ocxl_fn
powerpc: Add driver for OpenCAPI Persistent Memory
powerpc: Enable the OpenCAPI Persistent Memory driver for
powernv_defconfig
powerpc/powernv/pmem: Add register addresses & status values to the
header
powerpc/powernv/pmem: Read the capability registers & wait for device
ready
powerpc/powernv/pmem: Add support for Admin commands
powerpc/powernv/pmem: Add support for near storage commands
powerpc/powernv/pmem: Register a character device for userspace to
interact with
powerpc/powernv/pmem: Implement the Read Error Log command
powerpc/powernv/pmem: Add controller dump IOCTLs
powerpc/powernv/pmem: Add an IOCTL to report controller statistics
powerpc/powernv/pmem: Forward events to userspace
powerpc/powernv/pmem: Add an IOCTL to request controller health & perf
data
powerpc/powernv/pmem: Implement the heartbeat command
powerpc/powernv/pmem: Add debug IOCTLs
powerpc/powernv/pmem: Expose SMART data via ndctl
powerpc/powernv/pmem: Expose the serial number in sysfs
powerpc/powernv/pmem: Expose the firmware version in sysfs
MAINTAINERS: Add myself & nvdimm/ocxl to ocxl

MAINTAINERS | 3 +
arch/powerpc/configs/powernv_defconfig | 5 +
arch/powerpc/include/asm/opal-api.h | 2 +
arch/powerpc/include/asm/opal.h | 3 +
arch/powerpc/include/asm/pnv-ocxl.h | 40 +-
arch/powerpc/platforms/powernv/Kconfig | 3 +
arch/powerpc/platforms/powernv/Makefile | 1 +
arch/powerpc/platforms/powernv/ocxl.c | 43 +
arch/powerpc/platforms/powernv/opal-call.c | 2 +
arch/powerpc/platforms/powernv/pmem/Kconfig | 21 +
arch/powerpc/platforms/powernv/pmem/Makefile | 7 +
arch/powerpc/platforms/powernv/pmem/ocxl.c | 1991 +++++++++++++++++
.../platforms/powernv/pmem/ocxl_internal.c | 213 ++
.../platforms/powernv/pmem/ocxl_internal.h | 254 +++
.../platforms/powernv/pmem/ocxl_sysfs.c | 46 +
drivers/misc/ocxl/config.c | 74 +-
drivers/misc/ocxl/core.c | 61 +
drivers/misc/ocxl/link.c | 53 +
drivers/misc/ocxl/ocxl_internal.h | 45 +-
include/linux/memory_hotplug.h | 5 +
include/misc/ocxl.h | 122 +-
include/uapi/linux/ndctl.h | 1 +
include/uapi/nvdimm/ocxl-pmem.h | 127 ++
mm/memory_hotplug.c | 4 +-
24 files changed, 3029 insertions(+), 97 deletions(-)
create mode 100644 arch/powerpc/platforms/powernv/pmem/Kconfig
create mode 100644 arch/powerpc/platforms/powernv/pmem/Makefile
create mode 100644 arch/powerpc/platforms/powernv/pmem/ocxl.c
create mode 100644 arch/powerpc/platforms/powernv/pmem/ocxl_internal.c
create mode 100644 arch/powerpc/platforms/powernv/pmem/ocxl_internal.h
create mode 100644 arch/powerpc/platforms/powernv/pmem/ocxl_sysfs.c
create mode 100644 include/uapi/nvdimm/ocxl-pmem.h

--
2.24.1


2020-02-21 03:30:07

by Alastair D'Silva

[permalink] [raw]
Subject: [PATCH v3 07/27] ocxl: Add functions to map/unmap LPC memory

From: Alastair D'Silva <[email protected]>

Add functions to map/unmap LPC memory

Signed-off-by: Alastair D'Silva <[email protected]>
---
drivers/misc/ocxl/core.c | 51 +++++++++++++++++++++++++++++++
drivers/misc/ocxl/ocxl_internal.h | 3 ++
include/misc/ocxl.h | 21 +++++++++++++
3 files changed, 75 insertions(+)

diff --git a/drivers/misc/ocxl/core.c b/drivers/misc/ocxl/core.c
index 2531c6cf19a0..75ff14e3882a 100644
--- a/drivers/misc/ocxl/core.c
+++ b/drivers/misc/ocxl/core.c
@@ -210,6 +210,56 @@ static void unmap_mmio_areas(struct ocxl_afu *afu)
release_fn_bar(afu->fn, afu->config.global_mmio_bar);
}

+int ocxl_afu_map_lpc_mem(struct ocxl_afu *afu)
+{
+ struct pci_dev *dev = to_pci_dev(afu->fn->dev.parent);
+
+ if ((afu->config.lpc_mem_size + afu->config.special_purpose_mem_size) == 0)
+ return 0;
+
+ afu->lpc_base_addr = ocxl_link_lpc_map(afu->fn->link, dev);
+ if (afu->lpc_base_addr == 0)
+ return -EINVAL;
+
+ if (afu->config.lpc_mem_size > 0) {
+ afu->lpc_res.start = afu->lpc_base_addr + afu->config.lpc_mem_offset;
+ afu->lpc_res.end = afu->lpc_res.start + afu->config.lpc_mem_size - 1;
+ }
+
+ if (afu->config.special_purpose_mem_size > 0) {
+ afu->special_purpose_res.start = afu->lpc_base_addr +
+ afu->config.special_purpose_mem_offset;
+ afu->special_purpose_res.end = afu->special_purpose_res.start +
+ afu->config.special_purpose_mem_size - 1;
+ }
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(ocxl_afu_map_lpc_mem);
+
+struct resource *ocxl_afu_lpc_mem(struct ocxl_afu *afu)
+{
+ return &afu->lpc_res;
+}
+EXPORT_SYMBOL_GPL(ocxl_afu_lpc_mem);
+
+static void unmap_lpc_mem(struct ocxl_afu *afu)
+{
+ struct pci_dev *dev = to_pci_dev(afu->fn->dev.parent);
+
+ if (afu->lpc_res.start || afu->special_purpose_res.start) {
+ void *link = afu->fn->link;
+
+ // only release the link when the the last consumer calls release
+ ocxl_link_lpc_release(link, dev);
+
+ afu->lpc_res.start = 0;
+ afu->lpc_res.end = 0;
+ afu->special_purpose_res.start = 0;
+ afu->special_purpose_res.end = 0;
+ }
+}
+
static int configure_afu(struct ocxl_afu *afu, u8 afu_idx, struct pci_dev *dev)
{
int rc;
@@ -251,6 +301,7 @@ static int configure_afu(struct ocxl_afu *afu, u8 afu_idx, struct pci_dev *dev)

static void deconfigure_afu(struct ocxl_afu *afu)
{
+ unmap_lpc_mem(afu);
unmap_mmio_areas(afu);
reclaim_afu_pasid(afu);
reclaim_afu_actag(afu);
diff --git a/drivers/misc/ocxl/ocxl_internal.h b/drivers/misc/ocxl/ocxl_internal.h
index d0c8c4838f42..ce0cac1da416 100644
--- a/drivers/misc/ocxl/ocxl_internal.h
+++ b/drivers/misc/ocxl/ocxl_internal.h
@@ -52,6 +52,9 @@ struct ocxl_afu {
void __iomem *global_mmio_ptr;
u64 pp_mmio_start;
void *private;
+ u64 lpc_base_addr; /* Covers both LPC & special purpose memory */
+ struct resource lpc_res;
+ struct resource special_purpose_res;
};

enum ocxl_context_status {
diff --git a/include/misc/ocxl.h b/include/misc/ocxl.h
index 357ef1aadbc0..d8b0b4d46bfb 100644
--- a/include/misc/ocxl.h
+++ b/include/misc/ocxl.h
@@ -203,6 +203,27 @@ int ocxl_irq_set_handler(struct ocxl_context *ctx, int irq_id,

// AFU Metadata

+/**
+ * ocxl_afu_map_lpc_mem() - Map the LPC system & special purpose memory for an AFU
+ * Do not call this during device discovery, as there may me multiple
+ * devices on a link, and the memory is mapped for the whole link, not
+ * just one device. It should only be called after all devices have
+ * registered their memory on the link.
+ *
+ * @afu: The AFU that has the LPC memory to map
+ *
+ * Returns 0 on success, negative on failure
+ */
+int ocxl_afu_map_lpc_mem(struct ocxl_afu *afu);
+
+/**
+ * ocxl_afu_lpc_mem() - Get the physical address range of LPC memory for an AFU
+ * @afu: The AFU associated with the LPC memory
+ *
+ * Returns a pointer to the resource struct for the physical address range
+ */
+struct resource *ocxl_afu_lpc_mem(struct ocxl_afu *afu);
+
/**
* ocxl_afu_config() - Get a pointer to the config for an AFU
* @afu: a pointer to the AFU to get the config for
--
2.24.1

2020-02-21 03:30:09

by Alastair D'Silva

[permalink] [raw]
Subject: [PATCH v3 01/27] powerpc: Add OPAL calls for LPC memory alloc/release

From: Alastair D'Silva <[email protected]>

Add OPAL calls for LPC memory alloc/release

Signed-off-by: Alastair D'Silva <[email protected]>
Acked-by: Andrew Donnellan <[email protected]>
Acked-by: Frederic Barrat <[email protected]>
---
arch/powerpc/include/asm/opal-api.h | 2 ++
arch/powerpc/include/asm/opal.h | 3 +++
arch/powerpc/platforms/powernv/opal-call.c | 2 ++
3 files changed, 7 insertions(+)

diff --git a/arch/powerpc/include/asm/opal-api.h b/arch/powerpc/include/asm/opal-api.h
index c1f25a760eb1..9298e603001b 100644
--- a/arch/powerpc/include/asm/opal-api.h
+++ b/arch/powerpc/include/asm/opal-api.h
@@ -208,6 +208,8 @@
#define OPAL_HANDLE_HMI2 166
#define OPAL_NX_COPROC_INIT 167
#define OPAL_XIVE_GET_VP_STATE 170
+#define OPAL_NPU_MEM_ALLOC 171
+#define OPAL_NPU_MEM_RELEASE 172
#define OPAL_MPIPL_UPDATE 173
#define OPAL_MPIPL_REGISTER_TAG 174
#define OPAL_MPIPL_QUERY_TAG 175
diff --git a/arch/powerpc/include/asm/opal.h b/arch/powerpc/include/asm/opal.h
index 9986ac34b8e2..8f7727e0f9ce 100644
--- a/arch/powerpc/include/asm/opal.h
+++ b/arch/powerpc/include/asm/opal.h
@@ -39,6 +39,9 @@ int64_t opal_npu_spa_clear_cache(uint64_t phb_id, uint32_t bdfn,
uint64_t PE_handle);
int64_t opal_npu_tl_set(uint64_t phb_id, uint32_t bdfn, long cap,
uint64_t rate_phys, uint32_t size);
+int64_t opal_npu_mem_alloc(uint64_t phb_id, uint32_t bdfn,
+ uint64_t size, uint64_t *bar);
+int64_t opal_npu_mem_release(uint64_t phb_id, uint32_t bdfn);

int64_t opal_console_write(int64_t term_number, __be64 *length,
const uint8_t *buffer);
diff --git a/arch/powerpc/platforms/powernv/opal-call.c b/arch/powerpc/platforms/powernv/opal-call.c
index 5cd0f52d258f..f26e58b72c04 100644
--- a/arch/powerpc/platforms/powernv/opal-call.c
+++ b/arch/powerpc/platforms/powernv/opal-call.c
@@ -287,6 +287,8 @@ OPAL_CALL(opal_pci_set_pbcq_tunnel_bar, OPAL_PCI_SET_PBCQ_TUNNEL_BAR);
OPAL_CALL(opal_sensor_read_u64, OPAL_SENSOR_READ_U64);
OPAL_CALL(opal_sensor_group_enable, OPAL_SENSOR_GROUP_ENABLE);
OPAL_CALL(opal_nx_coproc_init, OPAL_NX_COPROC_INIT);
+OPAL_CALL(opal_npu_mem_alloc, OPAL_NPU_MEM_ALLOC);
+OPAL_CALL(opal_npu_mem_release, OPAL_NPU_MEM_RELEASE);
OPAL_CALL(opal_mpipl_update, OPAL_MPIPL_UPDATE);
OPAL_CALL(opal_mpipl_register_tag, OPAL_MPIPL_REGISTER_TAG);
OPAL_CALL(opal_mpipl_query_tag, OPAL_MPIPL_QUERY_TAG);
--
2.24.1

2020-02-21 03:30:22

by Alastair D'Silva

[permalink] [raw]
Subject: [PATCH v3 16/27] powerpc/powernv/pmem: Register a character device for userspace to interact with

From: Alastair D'Silva <[email protected]>

This patch introduces a character device (/dev/ocxl-scmX) which further
patches will use to interact with userspace.

Signed-off-by: Alastair D'Silva <[email protected]>
---
arch/powerpc/platforms/powernv/pmem/ocxl.c | 116 +++++++++++++++++-
.../platforms/powernv/pmem/ocxl_internal.h | 2 +
2 files changed, 116 insertions(+), 2 deletions(-)

diff --git a/arch/powerpc/platforms/powernv/pmem/ocxl.c b/arch/powerpc/platforms/powernv/pmem/ocxl.c
index b8bd7e703b19..63109a870d2c 100644
--- a/arch/powerpc/platforms/powernv/pmem/ocxl.c
+++ b/arch/powerpc/platforms/powernv/pmem/ocxl.c
@@ -10,6 +10,7 @@
#include <misc/ocxl.h>
#include <linux/delay.h>
#include <linux/ndctl.h>
+#include <linux/fs.h>
#include <linux/mm_types.h>
#include <linux/memory_hotplug.h>
#include "ocxl_internal.h"
@@ -339,6 +340,9 @@ static void free_ocxlpmem(struct ocxlpmem *ocxlpmem)

free_minor(ocxlpmem);

+ if (ocxlpmem->cdev.owner)
+ cdev_del(&ocxlpmem->cdev);
+
if (ocxlpmem->metadata_addr)
devm_memunmap(&ocxlpmem->dev, ocxlpmem->metadata_addr);

@@ -396,6 +400,70 @@ static int ocxlpmem_register(struct ocxlpmem *ocxlpmem)
return device_register(&ocxlpmem->dev);
}

+static void ocxlpmem_put(struct ocxlpmem *ocxlpmem)
+{
+ put_device(&ocxlpmem->dev);
+}
+
+static struct ocxlpmem *ocxlpmem_get(struct ocxlpmem *ocxlpmem)
+{
+ return (get_device(&ocxlpmem->dev) == NULL) ? NULL : ocxlpmem;
+}
+
+static struct ocxlpmem *find_and_get_ocxlpmem(dev_t devno)
+{
+ struct ocxlpmem *ocxlpmem;
+ int minor = MINOR(devno);
+ /*
+ * We don't declare an RCU critical section here, as our AFU
+ * is protected by a reference counter on the device. By the time the
+ * minor number of a device is removed from the idr, the ref count of
+ * the device is already at 0, so no user API will access that AFU and
+ * this function can't return it.
+ */
+ ocxlpmem = idr_find(&minors_idr, minor);
+ if (ocxlpmem)
+ ocxlpmem_get(ocxlpmem);
+ return ocxlpmem;
+}
+
+static int file_open(struct inode *inode, struct file *file)
+{
+ struct ocxlpmem *ocxlpmem;
+
+ ocxlpmem = find_and_get_ocxlpmem(inode->i_rdev);
+ if (!ocxlpmem)
+ return -ENODEV;
+
+ file->private_data = ocxlpmem;
+ return 0;
+}
+
+static int file_release(struct inode *inode, struct file *file)
+{
+ struct ocxlpmem *ocxlpmem = file->private_data;
+
+ ocxlpmem_put(ocxlpmem);
+ return 0;
+}
+
+static const struct file_operations fops = {
+ .owner = THIS_MODULE,
+ .open = file_open,
+ .release = file_release,
+};
+
+/**
+ * create_cdev() - Create the chardev in /dev for the device
+ * @ocxlpmem: the SCM metadata
+ * Return: 0 on success, negative on failure
+ */
+static int create_cdev(struct ocxlpmem *ocxlpmem)
+{
+ cdev_init(&ocxlpmem->cdev, &fops);
+ return cdev_add(&ocxlpmem->cdev, ocxlpmem->dev.devt, 1);
+}
+
/**
* ocxlpmem_remove() - Free an OpenCAPI persistent memory device
* @pdev: the PCI device information struct
@@ -572,6 +640,11 @@ static int probe(struct pci_dev *pdev, const struct pci_device_id *ent)
goto err;
}

+ if (create_cdev(ocxlpmem)) {
+ dev_err(&pdev->dev, "Could not create character device\n");
+ goto err;
+ }
+
elapsed = 0;
timeout = ocxlpmem->readiness_timeout + ocxlpmem->memory_available_timeout;
while (!is_usable(ocxlpmem, false)) {
@@ -613,20 +686,59 @@ static struct pci_driver pci_driver = {
.shutdown = ocxlpmem_remove,
};

+static int file_init(void)
+{
+ int rc;
+
+ mutex_init(&minors_idr_lock);
+ idr_init(&minors_idr);
+
+ rc = alloc_chrdev_region(&ocxlpmem_dev, 0, NUM_MINORS, "ocxl-pmem");
+ if (rc) {
+ idr_destroy(&minors_idr);
+ pr_err("Unable to allocate OpenCAPI persistent memory major number: %d\n", rc);
+ return rc;
+ }
+
+ ocxlpmem_class = class_create(THIS_MODULE, "ocxl-pmem");
+ if (IS_ERR(ocxlpmem_class)) {
+ idr_destroy(&minors_idr);
+ pr_err("Unable to create ocxl-pmem class\n");
+ unregister_chrdev_region(ocxlpmem_dev, NUM_MINORS);
+ return PTR_ERR(ocxlpmem_class);
+ }
+
+ return 0;
+}
+
+static void file_exit(void)
+{
+ class_destroy(ocxlpmem_class);
+ unregister_chrdev_region(ocxlpmem_dev, NUM_MINORS);
+ idr_destroy(&minors_idr);
+}
+
static int __init ocxlpmem_init(void)
{
- int rc = 0;
+ int rc;

- rc = pci_register_driver(&pci_driver);
+ rc = file_init();
if (rc)
return rc;

+ rc = pci_register_driver(&pci_driver);
+ if (rc) {
+ file_exit();
+ return rc;
+ }
+
return 0;
}

static void ocxlpmem_exit(void)
{
pci_unregister_driver(&pci_driver);
+ file_exit();
}

module_init(ocxlpmem_init);
diff --git a/arch/powerpc/platforms/powernv/pmem/ocxl_internal.h b/arch/powerpc/platforms/powernv/pmem/ocxl_internal.h
index 28e2020f6355..d2d81fec7bb1 100644
--- a/arch/powerpc/platforms/powernv/pmem/ocxl_internal.h
+++ b/arch/powerpc/platforms/powernv/pmem/ocxl_internal.h
@@ -2,6 +2,7 @@
// Copyright 2019 IBM Corp.

#include <linux/pci.h>
+#include <linux/cdev.h>
#include <misc/ocxl.h>
#include <linux/libnvdimm.h>
#include <linux/mm.h>
@@ -99,6 +100,7 @@ struct ocxlpmem_function0 {
struct ocxlpmem {
struct device dev;
struct pci_dev *pdev;
+ struct cdev cdev;
struct ocxl_fn *ocxl_fn;
struct nd_interleave_set nd_set;
struct nvdimm_bus_descriptor bus_desc;
--
2.24.1

2020-02-21 03:30:27

by Alastair D'Silva

[permalink] [raw]
Subject: [PATCH v3 12/27] powerpc/powernv/pmem: Add register addresses & status values to the header

From: Alastair D'Silva <[email protected]>

These values have been taken from the device specifications.

Signed-off-by: Alastair D'Silva <[email protected]>
---
.../platforms/powernv/pmem/ocxl_internal.h | 72 +++++++++++++++++++
1 file changed, 72 insertions(+)

diff --git a/arch/powerpc/platforms/powernv/pmem/ocxl_internal.h b/arch/powerpc/platforms/powernv/pmem/ocxl_internal.h
index 0faf3740e9b8..9cf3e42750e7 100644
--- a/arch/powerpc/platforms/powernv/pmem/ocxl_internal.h
+++ b/arch/powerpc/platforms/powernv/pmem/ocxl_internal.h
@@ -8,6 +8,78 @@

#define LABEL_AREA_SIZE (1UL << PA_SECTION_SHIFT)

+#define GLOBAL_MMIO_CHI 0x000
+#define GLOBAL_MMIO_CHIC 0x008
+#define GLOBAL_MMIO_CHIE 0x010
+#define GLOBAL_MMIO_CHIEC 0x018
+#define GLOBAL_MMIO_HCI 0x020
+#define GLOBAL_MMIO_HCIC 0x028
+#define GLOBAL_MMIO_IMA0_OHP 0x040
+#define GLOBAL_MMIO_IMA0_CFP 0x048
+#define GLOBAL_MMIO_IMA1_OHP 0x050
+#define GLOBAL_MMIO_IMA1_CFP 0x058
+#define GLOBAL_MMIO_ACMA_CREQO 0x100
+#define GLOBAL_MMIO_ACMA_CRSPO 0x104
+#define GLOBAL_MMIO_ACMA_CDBO 0x108
+#define GLOBAL_MMIO_ACMA_CDBS 0x10c
+#define GLOBAL_MMIO_NSCMA_CREQO 0x120
+#define GLOBAL_MMIO_NSCMA_CRSPO 0x124
+#define GLOBAL_MMIO_NSCMA_CDBO 0x128
+#define GLOBAL_MMIO_NSCMA_CDBS 0x12c
+#define GLOBAL_MMIO_CSTS 0x140
+#define GLOBAL_MMIO_FWVER 0x148
+#define GLOBAL_MMIO_CCAP0 0x160
+#define GLOBAL_MMIO_CCAP1 0x168
+
+#define GLOBAL_MMIO_CHI_ACRA BIT_ULL(0)
+#define GLOBAL_MMIO_CHI_NSCRA BIT_ULL(1)
+#define GLOBAL_MMIO_CHI_CRDY BIT_ULL(4)
+#define GLOBAL_MMIO_CHI_CFFS BIT_ULL(5)
+#define GLOBAL_MMIO_CHI_MA BIT_ULL(6)
+#define GLOBAL_MMIO_CHI_ELA BIT_ULL(7)
+#define GLOBAL_MMIO_CHI_CDA BIT_ULL(8)
+#define GLOBAL_MMIO_CHI_CHFS BIT_ULL(9)
+
+#define GLOBAL_MMIO_CHI_ALL (GLOBAL_MMIO_CHI_ACRA | \
+ GLOBAL_MMIO_CHI_NSCRA | \
+ GLOBAL_MMIO_CHI_CRDY | \
+ GLOBAL_MMIO_CHI_CFFS | \
+ GLOBAL_MMIO_CHI_MA | \
+ GLOBAL_MMIO_CHI_ELA | \
+ GLOBAL_MMIO_CHI_CDA | \
+ GLOBAL_MMIO_CHI_CHFS)
+
+#define GLOBAL_MMIO_HCI_ACRW BIT_ULL(0)
+#define GLOBAL_MMIO_HCI_NSCRW BIT_ULL(1)
+#define GLOBAL_MMIO_HCI_AFU_RESET BIT_ULL(2)
+#define GLOBAL_MMIO_HCI_FW_DEBUG BIT_ULL(3)
+#define GLOBAL_MMIO_HCI_CONTROLLER_DUMP BIT_ULL(4)
+#define GLOBAL_MMIO_HCI_CONTROLLER_DUMP_COLLECTED BIT_ULL(5)
+#define GLOBAL_MMIO_HCI_REQ_HEALTH_PERF BIT_ULL(6)
+
+#define ADMIN_COMMAND_HEARTBEAT 0x00u
+#define ADMIN_COMMAND_SHUTDOWN 0x01u
+#define ADMIN_COMMAND_FW_UPDATE 0x02u
+#define ADMIN_COMMAND_FW_DEBUG 0x03u
+#define ADMIN_COMMAND_ERRLOG 0x04u
+#define ADMIN_COMMAND_SMART 0x05u
+#define ADMIN_COMMAND_CONTROLLER_STATS 0x06u
+#define ADMIN_COMMAND_CONTROLLER_DUMP 0x07u
+#define ADMIN_COMMAND_CMD_CAPS 0x08u
+#define ADMIN_COMMAND_MAX 0x08u
+
+#define STATUS_SUCCESS 0x00
+#define STATUS_MEM_UNAVAILABLE 0x20
+#define STATUS_BAD_OPCODE 0x50
+#define STATUS_BAD_REQUEST_PARM 0x51
+#define STATUS_BAD_DATA_PARM 0x52
+#define STATUS_DEBUG_BLOCKED 0x70
+#define STATUS_FAIL 0xFF
+
+#define STATUS_FW_UPDATE_BLOCKED 0x21
+#define STATUS_FW_ARG_INVALID 0x51
+#define STATUS_FW_INVALID 0x52
+
struct ocxlpmem_function0 {
struct pci_dev *pdev;
struct ocxl_fn *ocxl_fn;
--
2.24.1

2020-02-21 03:30:28

by Alastair D'Silva

[permalink] [raw]
Subject: [PATCH v3 03/27] powerpc: Map & release OpenCAPI LPC memory

From: Alastair D'Silva <[email protected]>

This patch adds platform support to map & release LPC memory.

Signed-off-by: Alastair D'Silva <[email protected]>
---
arch/powerpc/include/asm/pnv-ocxl.h | 4 +++
arch/powerpc/platforms/powernv/ocxl.c | 43 +++++++++++++++++++++++++++
2 files changed, 47 insertions(+)

diff --git a/arch/powerpc/include/asm/pnv-ocxl.h b/arch/powerpc/include/asm/pnv-ocxl.h
index 7de82647e761..0b2a6707e555 100644
--- a/arch/powerpc/include/asm/pnv-ocxl.h
+++ b/arch/powerpc/include/asm/pnv-ocxl.h
@@ -32,5 +32,9 @@ extern int pnv_ocxl_spa_remove_pe_from_cache(void *platform_data, int pe_handle)

extern int pnv_ocxl_alloc_xive_irq(u32 *irq, u64 *trigger_addr);
extern void pnv_ocxl_free_xive_irq(u32 irq);
+#ifdef CONFIG_MEMORY_HOTPLUG_SPARSE
+u64 pnv_ocxl_platform_lpc_setup(struct pci_dev *pdev, u64 size);
+void pnv_ocxl_platform_lpc_release(struct pci_dev *pdev);
+#endif

#endif /* _ASM_PNV_OCXL_H */
diff --git a/arch/powerpc/platforms/powernv/ocxl.c b/arch/powerpc/platforms/powernv/ocxl.c
index 8c65aacda9c8..f2edbcc67361 100644
--- a/arch/powerpc/platforms/powernv/ocxl.c
+++ b/arch/powerpc/platforms/powernv/ocxl.c
@@ -475,6 +475,49 @@ void pnv_ocxl_spa_release(void *platform_data)
}
EXPORT_SYMBOL_GPL(pnv_ocxl_spa_release);

+#ifdef CONFIG_MEMORY_HOTPLUG_SPARSE
+u64 pnv_ocxl_platform_lpc_setup(struct pci_dev *pdev, u64 size)
+{
+ struct pci_controller *hose = pci_bus_to_host(pdev->bus);
+ struct pnv_phb *phb = hose->private_data;
+ u32 bdfn = pci_dev_id(pdev);
+ __be64 base_addr_be64;
+ u64 base_addr;
+ int rc;
+
+ rc = opal_npu_mem_alloc(phb->opal_id, bdfn, size, &base_addr_be64);
+ if (rc) {
+ dev_warn(&pdev->dev,
+ "OPAL could not allocate LPC memory, rc=%d\n", rc);
+ return 0;
+ }
+
+ base_addr = be64_to_cpu(base_addr_be64);
+
+ rc = check_hotplug_memory_addressable(base_addr >> PAGE_SHIFT,
+ size >> PAGE_SHIFT);
+ if (rc)
+ return 0;
+
+ return base_addr;
+}
+EXPORT_SYMBOL_GPL(pnv_ocxl_platform_lpc_setup);
+
+void pnv_ocxl_platform_lpc_release(struct pci_dev *pdev)
+{
+ struct pci_controller *hose = pci_bus_to_host(pdev->bus);
+ struct pnv_phb *phb = hose->private_data;
+ u32 bdfn = pci_dev_id(pdev);
+ int rc;
+
+ rc = opal_npu_mem_release(phb->opal_id, bdfn);
+ if (rc)
+ dev_warn(&pdev->dev,
+ "OPAL reported rc=%d when releasing LPC memory\n", rc);
+}
+EXPORT_SYMBOL_GPL(pnv_ocxl_platform_lpc_release);
+#endif
+
int pnv_ocxl_spa_remove_pe_from_cache(void *platform_data, int pe_handle)
{
struct spa_data *data = (struct spa_data *) platform_data;
--
2.24.1

2020-02-21 03:30:30

by Alastair D'Silva

[permalink] [raw]
Subject: [PATCH v3 11/27] powerpc: Enable the OpenCAPI Persistent Memory driver for powernv_defconfig

From: Alastair D'Silva <[email protected]>

This patch enables the OpenCAPI Persistent Memory driver, as well
as DAX support, for the 'powernv' platform.

DAX is not a strict requirement for the functioning of the driver, but it
is likely that a user will want to create a DAX device on top of their
persistent memory device.

Signed-off-by: Alastair D'Silva <[email protected]>
---
arch/powerpc/configs/powernv_defconfig | 5 +++++
1 file changed, 5 insertions(+)

diff --git a/arch/powerpc/configs/powernv_defconfig b/arch/powerpc/configs/powernv_defconfig
index 71749377d164..921d77bbd3d2 100644
--- a/arch/powerpc/configs/powernv_defconfig
+++ b/arch/powerpc/configs/powernv_defconfig
@@ -348,3 +348,8 @@ CONFIG_KVM_BOOK3S_64=m
CONFIG_KVM_BOOK3S_64_HV=m
CONFIG_VHOST_NET=m
CONFIG_PRINTK_TIME=y
+CONFIG_ZONE_DEVICE=y
+CONFIG_OCXL_PMEM=m
+CONFIG_DEV_DAX=m
+CONFIG_DEV_DAX_PMEM=m
+CONFIG_FS_DAX=y
--
2.24.1

2020-02-21 03:30:43

by Alastair D'Silva

[permalink] [raw]
Subject: [PATCH v3 18/27] powerpc/powernv/pmem: Add controller dump IOCTLs

From: Alastair D'Silva <[email protected]>

This patch adds IOCTLs to allow userspace to request & fetch dumps
of the internal controller state.

This is useful during debugging or when a fatal error on the controller
has occurred.

Signed-off-by: Alastair D'Silva <[email protected]>
---
arch/powerpc/platforms/powernv/pmem/ocxl.c | 132 +++++++++++++++++++++
include/uapi/nvdimm/ocxl-pmem.h | 15 +++
2 files changed, 147 insertions(+)

diff --git a/arch/powerpc/platforms/powernv/pmem/ocxl.c b/arch/powerpc/platforms/powernv/pmem/ocxl.c
index 2b64504f9129..2cabafe1fc58 100644
--- a/arch/powerpc/platforms/powernv/pmem/ocxl.c
+++ b/arch/powerpc/platforms/powernv/pmem/ocxl.c
@@ -640,6 +640,124 @@ static int ioctl_error_log(struct ocxlpmem *ocxlpmem,
return 0;
}

+static int ioctl_controller_dump_data(struct ocxlpmem *ocxlpmem,
+ struct ioctl_ocxl_pmem_controller_dump_data __user *uarg)
+{
+ struct ioctl_ocxl_pmem_controller_dump_data args;
+ u16 i;
+ u64 val;
+ int rc;
+
+ if (copy_from_user(&args, uarg, sizeof(args)))
+ return -EFAULT;
+
+ if (args.buf_size % 8)
+ return -EINVAL;
+
+ if (args.buf_size > ocxlpmem->admin_command.data_size)
+ return -EINVAL;
+
+ mutex_lock(&ocxlpmem->admin_command.lock);
+
+ rc = admin_command_request(ocxlpmem, ADMIN_COMMAND_CONTROLLER_DUMP);
+ if (rc)
+ goto out;
+
+ val = ((u64)args.offset) << 32;
+ val |= args.buf_size;
+ rc = ocxl_global_mmio_write64(ocxlpmem->ocxl_afu,
+ ocxlpmem->admin_command.request_offset + 0x08,
+ OCXL_LITTLE_ENDIAN, val);
+ if (rc)
+ goto out;
+
+ rc = admin_command_execute(ocxlpmem);
+ if (rc)
+ goto out;
+
+ rc = admin_command_complete_timeout(ocxlpmem,
+ ADMIN_COMMAND_CONTROLLER_DUMP);
+ if (rc < 0) {
+ dev_warn(&ocxlpmem->dev, "Controller dump timed out\n");
+ goto out;
+ }
+
+ rc = admin_response(ocxlpmem);
+ if (rc < 0)
+ goto out;
+ if (rc != STATUS_SUCCESS) {
+ warn_status(ocxlpmem,
+ "Unexpected status from retrieve error log",
+ rc);
+ goto out;
+ }
+
+ for (i = 0; i < args.buf_size; i += 8) {
+ u64 val;
+
+ rc = ocxl_global_mmio_read64(ocxlpmem->ocxl_afu,
+ ocxlpmem->admin_command.data_offset + i,
+ OCXL_HOST_ENDIAN, &val);
+ if (rc)
+ goto out;
+
+ if (copy_to_user(&args.buf[i], &val, sizeof(u64))) {
+ rc = -EFAULT;
+ goto out;
+ }
+ }
+
+ if (copy_to_user(uarg, &args, sizeof(args))) {
+ rc = -EFAULT;
+ goto out;
+ }
+
+ rc = admin_response_handled(ocxlpmem);
+ if (rc)
+ goto out;
+
+out:
+ mutex_unlock(&ocxlpmem->admin_command.lock);
+ return rc;
+}
+
+int request_controller_dump(struct ocxlpmem *ocxlpmem)
+{
+ int rc;
+ u64 busy = 1;
+
+ rc = ocxl_global_mmio_set64(ocxlpmem->ocxl_afu, GLOBAL_MMIO_CHIC,
+ OCXL_LITTLE_ENDIAN,
+ GLOBAL_MMIO_CHI_CDA);
+
+
+ rc = ocxl_global_mmio_set64(ocxlpmem->ocxl_afu, GLOBAL_MMIO_HCI,
+ OCXL_LITTLE_ENDIAN,
+ GLOBAL_MMIO_HCI_CONTROLLER_DUMP);
+ if (rc)
+ return rc;
+
+ while (busy) {
+ rc = ocxl_global_mmio_read64(ocxlpmem->ocxl_afu,
+ GLOBAL_MMIO_HCI,
+ OCXL_LITTLE_ENDIAN, &busy);
+ if (rc)
+ return rc;
+
+ busy &= GLOBAL_MMIO_HCI_CONTROLLER_DUMP;
+ cond_resched();
+ }
+
+ return 0;
+}
+
+static int ioctl_controller_dump_complete(struct ocxlpmem *ocxlpmem)
+{
+ return ocxl_global_mmio_set64(ocxlpmem->ocxl_afu, GLOBAL_MMIO_HCI,
+ OCXL_LITTLE_ENDIAN,
+ GLOBAL_MMIO_HCI_CONTROLLER_DUMP_COLLECTED);
+}
+
static long file_ioctl(struct file *file, unsigned int cmd, unsigned long args)
{
struct ocxlpmem *ocxlpmem = file->private_data;
@@ -650,7 +768,21 @@ static long file_ioctl(struct file *file, unsigned int cmd, unsigned long args)
rc = ioctl_error_log(ocxlpmem,
(struct ioctl_ocxl_pmem_error_log __user *)args);
break;
+
+ case IOCTL_OCXL_PMEM_CONTROLLER_DUMP:
+ rc = request_controller_dump(ocxlpmem);
+ break;
+
+ case IOCTL_OCXL_PMEM_CONTROLLER_DUMP_DATA:
+ rc = ioctl_controller_dump_data(ocxlpmem,
+ (struct ioctl_ocxl_pmem_controller_dump_data __user *)args);
+ break;
+
+ case IOCTL_OCXL_PMEM_CONTROLLER_DUMP_COMPLETE:
+ rc = ioctl_controller_dump_complete(ocxlpmem);
+ break;
}
+
return rc;
}

diff --git a/include/uapi/nvdimm/ocxl-pmem.h b/include/uapi/nvdimm/ocxl-pmem.h
index b10f8ac0c20f..d4d8512d03f7 100644
--- a/include/uapi/nvdimm/ocxl-pmem.h
+++ b/include/uapi/nvdimm/ocxl-pmem.h
@@ -38,9 +38,24 @@ struct ioctl_ocxl_pmem_error_log {
__u8 *buf; /* pointer to output buffer */
};

+struct ioctl_ocxl_pmem_controller_dump_data {
+ __u8 *buf; /* pointer to output buffer */
+ __u16 buf_size; /* in/out, buffer size provided/required.
+ * If required is greater than provided, the buffer
+ * will be truncated to the amount provided. If its
+ * less, then only the required bytes will be populated.
+ * If it is 0, then there is no more dump data available.
+ */
+ __u32 offset; /* in, Offset within the dump */
+ __u64 reserved[8];
+};
+
/* ioctl numbers */
#define OCXL_PMEM_MAGIC 0x5C
/* SCM devices */
#define IOCTL_OCXL_PMEM_ERROR_LOG _IOWR(OCXL_PMEM_MAGIC, 0x01, struct ioctl_ocxl_pmem_error_log)
+#define IOCTL_OCXL_PMEM_CONTROLLER_DUMP _IO(OCXL_PMEM_MAGIC, 0x02)
+#define IOCTL_OCXL_PMEM_CONTROLLER_DUMP_DATA _IOWR(OCXL_PMEM_MAGIC, 0x03, struct ioctl_ocxl_pmem_controller_dump_data)
+#define IOCTL_OCXL_PMEM_CONTROLLER_DUMP_COMPLETE _IO(OCXL_PMEM_MAGIC, 0x04)

#endif /* _UAPI_OCXL_SCM_H */
--
2.24.1

2020-02-21 03:30:55

by Alastair D'Silva

[permalink] [raw]
Subject: [PATCH v3 08/27] ocxl: Emit a log message showing how much LPC memory was detected

From: Alastair D'Silva <[email protected]>

This patch emits a message showing how much LPC memory & special purpose
memory was detected on an OCXL device.

Signed-off-by: Alastair D'Silva <[email protected]>
---
drivers/misc/ocxl/config.c | 4 ++++
1 file changed, 4 insertions(+)

diff --git a/drivers/misc/ocxl/config.c b/drivers/misc/ocxl/config.c
index a62e3d7db2bf..701ae6216abf 100644
--- a/drivers/misc/ocxl/config.c
+++ b/drivers/misc/ocxl/config.c
@@ -568,6 +568,10 @@ static int read_afu_lpc_memory_info(struct pci_dev *dev,
afu->special_purpose_mem_size =
total_mem_size - lpc_mem_size;
}
+
+ dev_info(&dev->dev, "Probed LPC memory of %#llx bytes and special purpose memory of %#llx bytes\n",
+ afu->lpc_mem_size, afu->special_purpose_mem_size);
+
return 0;
}

--
2.24.1

2020-02-21 03:31:12

by Alastair D'Silva

[permalink] [raw]
Subject: [PATCH v3 02/27] mm/memory_hotplug: Allow check_hotplug_memory_addressable to be called from drivers

From: Alastair D'Silva <[email protected]>

When setting up OpenCAPI connected persistent memory, the range check may
not be performed until quite late (or perhaps not at all, if the user does
not establish a DAX device).

This patch makes the range check callable so we can perform the check while
probing the OpenCAPI SCM device.

Signed-off-by: Alastair D'Silva <[email protected]>
---
include/linux/memory_hotplug.h | 5 +++++
mm/memory_hotplug.c | 4 ++--
2 files changed, 7 insertions(+), 2 deletions(-)

diff --git a/include/linux/memory_hotplug.h b/include/linux/memory_hotplug.h
index f4d59155f3d4..34a69aecc45e 100644
--- a/include/linux/memory_hotplug.h
+++ b/include/linux/memory_hotplug.h
@@ -337,6 +337,11 @@ static inline void __remove_memory(int nid, u64 start, u64 size) {}
extern void set_zone_contiguous(struct zone *zone);
extern void clear_zone_contiguous(struct zone *zone);

+#ifdef CONFIG_MEMORY_HOTPLUG_SPARSE
+int check_hotplug_memory_addressable(unsigned long pfn,
+ unsigned long nr_pages);
+#endif /* CONFIG_MEMORY_HOTPLUG_SPARSE */
+
extern void __ref free_area_init_core_hotplug(int nid);
extern int __add_memory(int nid, u64 start, u64 size);
extern int add_memory(int nid, u64 start, u64 size);
diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
index 0a54ffac8c68..14945f033594 100644
--- a/mm/memory_hotplug.c
+++ b/mm/memory_hotplug.c
@@ -276,8 +276,8 @@ static int check_pfn_span(unsigned long pfn, unsigned long nr_pages,
return 0;
}

-static int check_hotplug_memory_addressable(unsigned long pfn,
- unsigned long nr_pages)
+int check_hotplug_memory_addressable(unsigned long pfn,
+ unsigned long nr_pages)
{
const u64 max_addr = PFN_PHYS(pfn + nr_pages) - 1;

--
2.24.1

2020-02-21 03:31:30

by Alastair D'Silva

[permalink] [raw]
Subject: [PATCH v3 21/27] powerpc/powernv/pmem: Add an IOCTL to request controller health & perf data

From: Alastair D'Silva <[email protected]>

When health & performance data is requested from the controller,
it responds with an error log containing the requested information.

This patch allows the request to me issued via an IOCTL.

Signed-off-by: Alastair D'Silva <[email protected]>
---
arch/powerpc/platforms/powernv/pmem/ocxl.c | 16 ++++++++++++++++
include/uapi/nvdimm/ocxl-pmem.h | 1 +
2 files changed, 17 insertions(+)

diff --git a/arch/powerpc/platforms/powernv/pmem/ocxl.c b/arch/powerpc/platforms/powernv/pmem/ocxl.c
index e46696d3cc36..081883a8247a 100644
--- a/arch/powerpc/platforms/powernv/pmem/ocxl.c
+++ b/arch/powerpc/platforms/powernv/pmem/ocxl.c
@@ -1000,6 +1000,18 @@ static int ioctl_event_check(struct ocxlpmem *ocxlpmem, u64 __user *uarg)
return rc;
}

+/**
+ * req_controller_health_perf() - Request controller health & performance data
+ * @ocxlpmem: the device metadata
+ * Return: 0 on success, negative on failure
+ */
+int req_controller_health_perf(struct ocxlpmem *ocxlpmem)
+{
+ return ocxl_global_mmio_set64(ocxlpmem->ocxl_afu, GLOBAL_MMIO_HCI,
+ OCXL_LITTLE_ENDIAN,
+ GLOBAL_MMIO_HCI_REQ_HEALTH_PERF);
+}
+
static long file_ioctl(struct file *file, unsigned int cmd, unsigned long args)
{
struct ocxlpmem *ocxlpmem = file->private_data;
@@ -1037,6 +1049,10 @@ static long file_ioctl(struct file *file, unsigned int cmd, unsigned long args)
case IOCTL_OCXL_PMEM_EVENT_CHECK:
rc = ioctl_event_check(ocxlpmem, (u64 __user *)args);
break;
+
+ case IOCTL_OCXL_PMEM_REQUEST_HEALTH:
+ rc = req_controller_health_perf(ocxlpmem);
+ break;
}

return rc;
diff --git a/include/uapi/nvdimm/ocxl-pmem.h b/include/uapi/nvdimm/ocxl-pmem.h
index 988eb0bc413d..0d03abb44001 100644
--- a/include/uapi/nvdimm/ocxl-pmem.h
+++ b/include/uapi/nvdimm/ocxl-pmem.h
@@ -90,5 +90,6 @@ struct ioctl_ocxl_pmem_eventfd {
#define IOCTL_OCXL_PMEM_CONTROLLER_STATS _IO(OCXL_PMEM_MAGIC, 0x05)
#define IOCTL_OCXL_PMEM_EVENTFD _IOW(OCXL_PMEM_MAGIC, 0x06, struct ioctl_ocxl_pmem_eventfd)
#define IOCTL_OCXL_PMEM_EVENT_CHECK _IOR(OCXL_PMEM_MAGIC, 0x07, __u64)
+#define IOCTL_OCXL_PMEM_REQUEST_HEALTH _IO(OCXL_PMEM_MAGIC, 0x08)

#endif /* _UAPI_OCXL_SCM_H */
--
2.24.1

2020-02-21 03:34:26

by Alastair D'Silva

[permalink] [raw]
Subject: [PATCH v3 20/27] powerpc/powernv/pmem: Forward events to userspace

From: Alastair D'Silva <[email protected]>

Some of the interrupts that the card generates are better handled
by the userspace daemon, in particular:
Controller Hardware/Firmware Fatal
Controller Dump Available
Error Log available

This patch allows a userspace application to register an eventfd with
the driver via SCM_IOCTL_EVENTFD to receive notifications of these
interrupts.

Userspace can then identify what events have occurred by calling
SCM_IOCTL_EVENT_CHECK and checking against the SCM_IOCTL_EVENT_FOO
masks.

Signed-off-by: Alastair D'Silva <[email protected]>
---
arch/powerpc/platforms/powernv/pmem/ocxl.c | 216 ++++++++++++++++++
.../platforms/powernv/pmem/ocxl_internal.h | 5 +
include/uapi/nvdimm/ocxl-pmem.h | 16 ++
3 files changed, 237 insertions(+)

diff --git a/arch/powerpc/platforms/powernv/pmem/ocxl.c b/arch/powerpc/platforms/powernv/pmem/ocxl.c
index 009d4fd29e7d..e46696d3cc36 100644
--- a/arch/powerpc/platforms/powernv/pmem/ocxl.c
+++ b/arch/powerpc/platforms/powernv/pmem/ocxl.c
@@ -10,6 +10,7 @@
#include <misc/ocxl.h>
#include <linux/delay.h>
#include <linux/ndctl.h>
+#include <linux/eventfd.h>
#include <linux/fs.h>
#include <linux/mm_types.h>
#include <linux/memory_hotplug.h>
@@ -335,11 +336,22 @@ static void free_ocxlpmem(struct ocxlpmem *ocxlpmem)
{
int rc;

+ // Disable doorbells
+ (void)ocxl_global_mmio_set64(ocxlpmem->ocxl_afu, GLOBAL_MMIO_CHIEC,
+ OCXL_LITTLE_ENDIAN,
+ GLOBAL_MMIO_CHI_ALL);
+
if (ocxlpmem->nvdimm_bus)
nvdimm_bus_unregister(ocxlpmem->nvdimm_bus);

free_minor(ocxlpmem);

+ if (ocxlpmem->irq_addr[1])
+ iounmap(ocxlpmem->irq_addr[1]);
+
+ if (ocxlpmem->irq_addr[0])
+ iounmap(ocxlpmem->irq_addr[0]);
+
if (ocxlpmem->cdev.owner)
cdev_del(&ocxlpmem->cdev);

@@ -443,6 +455,11 @@ static int file_release(struct inode *inode, struct file *file)
{
struct ocxlpmem *ocxlpmem = file->private_data;

+ if (ocxlpmem->ev_ctx) {
+ eventfd_ctx_put(ocxlpmem->ev_ctx);
+ ocxlpmem->ev_ctx = NULL;
+ }
+
ocxlpmem_put(ocxlpmem);
return 0;
}
@@ -938,6 +955,51 @@ static int ioctl_controller_stats(struct ocxlpmem *ocxlpmem,
return rc;
}

+static int ioctl_eventfd(struct ocxlpmem *ocxlpmem,
+ struct ioctl_ocxl_pmem_eventfd __user *uarg)
+{
+ struct ioctl_ocxl_pmem_eventfd args;
+
+ if (copy_from_user(&args, uarg, sizeof(args)))
+ return -EFAULT;
+
+ if (ocxlpmem->ev_ctx)
+ return -EINVAL;
+
+ ocxlpmem->ev_ctx = eventfd_ctx_fdget(args.eventfd);
+ if (!ocxlpmem->ev_ctx)
+ return -EFAULT;
+
+ return 0;
+}
+
+static int ioctl_event_check(struct ocxlpmem *ocxlpmem, u64 __user *uarg)
+{
+ u64 val = 0;
+ int rc;
+ u64 chi = 0;
+
+ rc = ocxlpmem_chi(ocxlpmem, &chi);
+ if (rc < 0)
+ return rc;
+
+ if (chi & GLOBAL_MMIO_CHI_ELA)
+ val |= IOCTL_OCXL_PMEM_EVENT_ERROR_LOG_AVAILABLE;
+
+ if (chi & GLOBAL_MMIO_CHI_CDA)
+ val |= IOCTL_OCXL_PMEM_EVENT_CONTROLLER_DUMP_AVAILABLE;
+
+ if (chi & GLOBAL_MMIO_CHI_CFFS)
+ val |= IOCTL_OCXL_PMEM_EVENT_FIRMWARE_FATAL;
+
+ if (chi & GLOBAL_MMIO_CHI_CHFS)
+ val |= IOCTL_OCXL_PMEM_EVENT_HARDWARE_FATAL;
+
+ rc = copy_to_user((u64 __user *) uarg, &val, sizeof(val));
+
+ return rc;
+}
+
static long file_ioctl(struct file *file, unsigned int cmd, unsigned long args)
{
struct ocxlpmem *ocxlpmem = file->private_data;
@@ -966,6 +1028,15 @@ static long file_ioctl(struct file *file, unsigned int cmd, unsigned long args)
rc = ioctl_controller_stats(ocxlpmem,
(struct ioctl_ocxl_pmem_controller_stats __user *)args);
break;
+
+ case IOCTL_OCXL_PMEM_EVENTFD:
+ rc = ioctl_eventfd(ocxlpmem,
+ (struct ioctl_ocxl_pmem_eventfd __user *)args);
+ break;
+
+ case IOCTL_OCXL_PMEM_EVENT_CHECK:
+ rc = ioctl_event_check(ocxlpmem, (u64 __user *)args);
+ break;
}

return rc;
@@ -1107,6 +1178,146 @@ static void dump_error_log(struct ocxlpmem *ocxlpmem)
kfree(buf);
}

+static irqreturn_t imn0_handler(void *private)
+{
+ struct ocxlpmem *ocxlpmem = private;
+ u64 chi = 0;
+
+ (void)ocxlpmem_chi(ocxlpmem, &chi);
+
+ if (chi & GLOBAL_MMIO_CHI_ELA) {
+ dev_warn(&ocxlpmem->dev, "Error log is available\n");
+
+ if (ocxlpmem->ev_ctx)
+ eventfd_signal(ocxlpmem->ev_ctx, 1);
+ }
+
+ if (chi & GLOBAL_MMIO_CHI_CDA) {
+ dev_warn(&ocxlpmem->dev, "Controller dump is available\n");
+
+ if (ocxlpmem->ev_ctx)
+ eventfd_signal(ocxlpmem->ev_ctx, 1);
+ }
+
+
+ return IRQ_HANDLED;
+}
+
+static irqreturn_t imn1_handler(void *private)
+{
+ struct ocxlpmem *ocxlpmem = private;
+ u64 chi = 0;
+
+ (void)ocxlpmem_chi(ocxlpmem, &chi);
+
+ if (chi & (GLOBAL_MMIO_CHI_CFFS | GLOBAL_MMIO_CHI_CHFS)) {
+ dev_err(&ocxlpmem->dev,
+ "Controller status is fatal, chi=0x%llx, going offline\n", chi);
+
+ if (ocxlpmem->nvdimm_bus) {
+ nvdimm_bus_unregister(ocxlpmem->nvdimm_bus);
+ ocxlpmem->nvdimm_bus = NULL;
+ }
+
+ if (ocxlpmem->ev_ctx)
+ eventfd_signal(ocxlpmem->ev_ctx, 1);
+ }
+
+ return IRQ_HANDLED;
+}
+
+
+/**
+ * ocxlpmem_setup_irq() - Set up the IRQs for the OpenCAPI Persistent Memory device
+ * @ocxlpmem: the device metadata
+ * Return: 0 on success, negative on failure
+ */
+static int ocxlpmem_setup_irq(struct ocxlpmem *ocxlpmem)
+{
+ int rc;
+ u64 irq_addr;
+
+ rc = ocxl_afu_irq_alloc(ocxlpmem->ocxl_context, &ocxlpmem->irq_id[0]);
+ if (rc)
+ return rc;
+
+ rc = ocxl_irq_set_handler(ocxlpmem->ocxl_context, ocxlpmem->irq_id[0],
+ imn0_handler, NULL, ocxlpmem);
+
+ irq_addr = ocxl_afu_irq_get_addr(ocxlpmem->ocxl_context, ocxlpmem->irq_id[0]);
+ if (!irq_addr)
+ return -EINVAL;
+
+ ocxlpmem->irq_addr[0] = ioremap(irq_addr, PAGE_SIZE);
+ if (!ocxlpmem->irq_addr[0])
+ return -EINVAL;
+
+ rc = ocxl_global_mmio_write64(ocxlpmem->ocxl_afu, GLOBAL_MMIO_IMA0_OHP,
+ OCXL_LITTLE_ENDIAN,
+ (u64)ocxlpmem->irq_addr[0]);
+ if (rc)
+ goto out_irq0;
+
+ rc = ocxl_global_mmio_write64(ocxlpmem->ocxl_afu, GLOBAL_MMIO_IMA0_CFP,
+ OCXL_LITTLE_ENDIAN, 0);
+ if (rc)
+ goto out_irq0;
+
+ rc = ocxl_afu_irq_alloc(ocxlpmem->ocxl_context, &ocxlpmem->irq_id[1]);
+ if (rc)
+ goto out_irq0;
+
+
+ rc = ocxl_irq_set_handler(ocxlpmem->ocxl_context, ocxlpmem->irq_id[1],
+ imn1_handler, NULL, ocxlpmem);
+ if (rc)
+ goto out_irq0;
+
+ irq_addr = ocxl_afu_irq_get_addr(ocxlpmem->ocxl_context, ocxlpmem->irq_id[1]);
+ if (!irq_addr) {
+ rc = -EFAULT;
+ goto out_irq0;
+ }
+
+ ocxlpmem->irq_addr[1] = ioremap(irq_addr, PAGE_SIZE);
+ if (!ocxlpmem->irq_addr[1]) {
+ rc = -EINVAL;
+ goto out_irq0;
+ }
+
+ rc = ocxl_global_mmio_write64(ocxlpmem->ocxl_afu, GLOBAL_MMIO_IMA1_OHP,
+ OCXL_LITTLE_ENDIAN,
+ (u64)ocxlpmem->irq_addr[1]);
+ if (rc)
+ goto out_irq1;
+
+ rc = ocxl_global_mmio_write64(ocxlpmem->ocxl_afu, GLOBAL_MMIO_IMA1_CFP,
+ OCXL_LITTLE_ENDIAN, 0);
+ if (rc)
+ goto out_irq1;
+
+ // Enable doorbells
+ rc = ocxl_global_mmio_set64(ocxlpmem->ocxl_afu, GLOBAL_MMIO_CHIE,
+ OCXL_LITTLE_ENDIAN,
+ GLOBAL_MMIO_CHI_ELA | GLOBAL_MMIO_CHI_CDA |
+ GLOBAL_MMIO_CHI_CFFS | GLOBAL_MMIO_CHI_CHFS |
+ GLOBAL_MMIO_CHI_NSCRA);
+ if (rc)
+ goto out_irq1;
+
+ return 0;
+
+out_irq1:
+ iounmap(ocxlpmem->irq_addr[1]);
+ ocxlpmem->irq_addr[1] = NULL;
+
+out_irq0:
+ iounmap(ocxlpmem->irq_addr[0]);
+ ocxlpmem->irq_addr[0] = NULL;
+
+ return rc;
+}
+
/**
* probe_function0() - Set up function 0 for an OpenCAPI persistent memory device
* This is important as it enables templates higher than 0 across all other functions,
@@ -1216,6 +1427,11 @@ static int probe(struct pci_dev *pdev, const struct pci_device_id *ent)
goto err;
}

+ if (ocxlpmem_setup_irq(ocxlpmem)) {
+ dev_err(&pdev->dev, "Could not set up OCXL IRQs\n");
+ goto err;
+ }
+
if (setup_command_metadata(ocxlpmem)) {
dev_err(&pdev->dev, "Could not read OCXL command matada\n");
goto err;
diff --git a/arch/powerpc/platforms/powernv/pmem/ocxl_internal.h b/arch/powerpc/platforms/powernv/pmem/ocxl_internal.h
index b953ee522ed4..927690f4888f 100644
--- a/arch/powerpc/platforms/powernv/pmem/ocxl_internal.h
+++ b/arch/powerpc/platforms/powernv/pmem/ocxl_internal.h
@@ -103,6 +103,10 @@ struct ocxlpmem {
struct pci_dev *pdev;
struct cdev cdev;
struct ocxl_fn *ocxl_fn;
+#define SCM_IRQ_COUNT 2
+ int irq_id[SCM_IRQ_COUNT];
+ struct dev_pagemap irq_pgmap[SCM_IRQ_COUNT];
+ void *irq_addr[SCM_IRQ_COUNT];
struct nd_interleave_set nd_set;
struct nvdimm_bus_descriptor bus_desc;
struct nvdimm_bus *nvdimm_bus;
@@ -113,6 +117,7 @@ struct ocxlpmem {
struct command_metadata ns_command;
struct resource pmem_res;
struct nd_region *nd_region;
+ struct eventfd_ctx *ev_ctx;
char fw_version[8+1];
u32 timeouts[ADMIN_COMMAND_MAX+1];

diff --git a/include/uapi/nvdimm/ocxl-pmem.h b/include/uapi/nvdimm/ocxl-pmem.h
index add223aa2fdb..988eb0bc413d 100644
--- a/include/uapi/nvdimm/ocxl-pmem.h
+++ b/include/uapi/nvdimm/ocxl-pmem.h
@@ -66,6 +66,20 @@ struct ioctl_ocxl_pmem_controller_stats {
__u64 cache_write_latency; /* nanoseconds */
};

+struct ioctl_ocxl_pmem_eventfd {
+ __s32 eventfd;
+ __u32 reserved;
+};
+
+#ifndef BIT_ULL
+#define BIT_ULL(nr) (1ULL << (nr))
+#endif
+
+#define IOCTL_OCXL_PMEM_EVENT_CONTROLLER_DUMP_AVAILABLE BIT_ULL(0)
+#define IOCTL_OCXL_PMEM_EVENT_ERROR_LOG_AVAILABLE BIT_ULL(1)
+#define IOCTL_OCXL_PMEM_EVENT_HARDWARE_FATAL BIT_ULL(2)
+#define IOCTL_OCXL_PMEM_EVENT_FIRMWARE_FATAL BIT_ULL(3)
+
/* ioctl numbers */
#define OCXL_PMEM_MAGIC 0x5C
/* SCM devices */
@@ -74,5 +88,7 @@ struct ioctl_ocxl_pmem_controller_stats {
#define IOCTL_OCXL_PMEM_CONTROLLER_DUMP_DATA _IOWR(OCXL_PMEM_MAGIC, 0x03, struct ioctl_ocxl_pmem_controller_dump_data)
#define IOCTL_OCXL_PMEM_CONTROLLER_DUMP_COMPLETE _IO(OCXL_PMEM_MAGIC, 0x04)
#define IOCTL_OCXL_PMEM_CONTROLLER_STATS _IO(OCXL_PMEM_MAGIC, 0x05)
+#define IOCTL_OCXL_PMEM_EVENTFD _IOW(OCXL_PMEM_MAGIC, 0x06, struct ioctl_ocxl_pmem_eventfd)
+#define IOCTL_OCXL_PMEM_EVENT_CHECK _IOR(OCXL_PMEM_MAGIC, 0x07, __u64)

#endif /* _UAPI_OCXL_SCM_H */
--
2.24.1

2020-02-21 03:35:44

by Alastair D'Silva

[permalink] [raw]
Subject: [PATCH v3 25/27] powerpc/powernv/pmem: Expose the serial number in sysfs

From: Alastair D'Silva <[email protected]>

This information will be used by ndctl in userspace to help users identify
the device.

Signed-off-by: Alastair D'Silva <[email protected]>
---
arch/powerpc/platforms/powernv/pmem/Makefile | 2 +-
arch/powerpc/platforms/powernv/pmem/ocxl.c | 5 +++
.../platforms/powernv/pmem/ocxl_internal.h | 6 +++
.../platforms/powernv/pmem/ocxl_sysfs.c | 37 +++++++++++++++++++
4 files changed, 49 insertions(+), 1 deletion(-)
create mode 100644 arch/powerpc/platforms/powernv/pmem/ocxl_sysfs.c

diff --git a/arch/powerpc/platforms/powernv/pmem/Makefile b/arch/powerpc/platforms/powernv/pmem/Makefile
index 4ceda25907d4..d02870806f30 100644
--- a/arch/powerpc/platforms/powernv/pmem/Makefile
+++ b/arch/powerpc/platforms/powernv/pmem/Makefile
@@ -4,4 +4,4 @@ ccflags-$(CONFIG_PPC_WERROR) += -Werror

obj-$(CONFIG_OCXL_PMEM) += ocxlpmem.o

-ocxlpmem-y := ocxl.o ocxl_internal.o
+ocxlpmem-y := ocxl.o ocxl_internal.o ocxl_sysfs.o
diff --git a/arch/powerpc/platforms/powernv/pmem/ocxl.c b/arch/powerpc/platforms/powernv/pmem/ocxl.c
index 5cd1b6d78dd6..ec73713d05ad 100644
--- a/arch/powerpc/platforms/powernv/pmem/ocxl.c
+++ b/arch/powerpc/platforms/powernv/pmem/ocxl.c
@@ -1878,6 +1878,11 @@ static int probe(struct pci_dev *pdev, const struct pci_device_id *ent)
goto err;
}

+ if (ocxlpmem_sysfs_add(ocxlpmem)) {
+ dev_err(&pdev->dev, "Could not create sysfs entries\n");
+ goto err;
+ }
+
elapsed = 0;
timeout = ocxlpmem->readiness_timeout + ocxlpmem->memory_available_timeout;
while (!is_usable(ocxlpmem, false)) {
diff --git a/arch/powerpc/platforms/powernv/pmem/ocxl_internal.h b/arch/powerpc/platforms/powernv/pmem/ocxl_internal.h
index 0eb7a35d24ae..12304ceace61 100644
--- a/arch/powerpc/platforms/powernv/pmem/ocxl_internal.h
+++ b/arch/powerpc/platforms/powernv/pmem/ocxl_internal.h
@@ -246,3 +246,9 @@ int ns_response_handled(const struct ocxlpmem *ocxlpmem);
*/
void warn_status(const struct ocxlpmem *ocxlpmem, const char *message,
u8 status);
+
+/**
+ * ocxlpmem_sysfs_add() - Create sysfs entries for an OpenCAPI persistent memory device
+ * @ocxlpmem: the device metadata
+ */
+int ocxlpmem_sysfs_add(struct ocxlpmem *ocxlpmem);
diff --git a/arch/powerpc/platforms/powernv/pmem/ocxl_sysfs.c b/arch/powerpc/platforms/powernv/pmem/ocxl_sysfs.c
new file mode 100644
index 000000000000..7829e4bc887d
--- /dev/null
+++ b/arch/powerpc/platforms/powernv/pmem/ocxl_sysfs.c
@@ -0,0 +1,37 @@
+// SPDX-License-Identifier: GPL-2.0+
+// Copyright 2018 IBM Corp.
+
+#include <linux/sysfs.h>
+#include <linux/capability.h>
+#include <linux/limits.h>
+#include <linux/firmware.h>
+#include "ocxl_internal.h"
+
+static ssize_t serial_show(struct device *device, struct device_attribute *attr,
+ char *buf)
+{
+ struct ocxlpmem *ocxlpmem = container_of(device, struct ocxlpmem, dev);
+ const struct ocxl_fn_config *fn_config = ocxl_function_config(ocxlpmem->ocxl_fn);
+
+ return scnprintf(buf, PAGE_SIZE, "%llu\n", fn_config->serial);
+}
+
+static struct device_attribute attrs[] = {
+ __ATTR_RO(serial),
+};
+
+int ocxlpmem_sysfs_add(struct ocxlpmem *ocxlpmem)
+{
+ int i, rc;
+
+ for (i = 0; i < ARRAY_SIZE(attrs); i++) {
+ rc = device_create_file(&ocxlpmem->dev, &attrs[i]);
+ if (rc) {
+ for (; --i >= 0;)
+ device_remove_file(&ocxlpmem->dev, &attrs[i]);
+
+ return rc;
+ }
+ }
+ return 0;
+}
--
2.24.1

2020-02-21 03:39:07

by Alastair D'Silva

[permalink] [raw]
Subject: [PATCH v3 19/27] powerpc/powernv/pmem: Add an IOCTL to report controller statistics

From: Alastair D'Silva <[email protected]>

The controller can report a number of statistics that are useful
in evaluating the performance and reliability of the card.

This patch exposes this information via an IOCTL.

Signed-off-by: Alastair D'Silva <[email protected]>
---
arch/powerpc/platforms/powernv/pmem/ocxl.c | 185 +++++++++++++++++++++
include/uapi/nvdimm/ocxl-pmem.h | 17 ++
2 files changed, 202 insertions(+)

diff --git a/arch/powerpc/platforms/powernv/pmem/ocxl.c b/arch/powerpc/platforms/powernv/pmem/ocxl.c
index 2cabafe1fc58..009d4fd29e7d 100644
--- a/arch/powerpc/platforms/powernv/pmem/ocxl.c
+++ b/arch/powerpc/platforms/powernv/pmem/ocxl.c
@@ -758,6 +758,186 @@ static int ioctl_controller_dump_complete(struct ocxlpmem *ocxlpmem)
GLOBAL_MMIO_HCI_CONTROLLER_DUMP_COLLECTED);
}

+/**
+ * controller_stats_header_parse() - Parse the first 64 bits of the controller stats admin command response
+ * @ocxlpmem: the device metadata
+ * @length: out, returns the number of bytes in the response (excluding the 64 bit header)
+ */
+static int controller_stats_header_parse(struct ocxlpmem *ocxlpmem,
+ u32 *length)
+{
+ int rc;
+ u64 val;
+
+ u16 data_identifier;
+ u32 data_length;
+
+ rc = ocxl_global_mmio_read64(ocxlpmem->ocxl_afu,
+ ocxlpmem->admin_command.data_offset,
+ OCXL_LITTLE_ENDIAN, &val);
+ if (rc)
+ return rc;
+
+ data_identifier = val >> 48;
+ data_length = val & 0xFFFFFFFF;
+
+ if (data_identifier != 0x4353) { // 'CS'
+ dev_err(&ocxlpmem->dev,
+ "Bad data identifier for controller stats, expected 'CS', got '%-.*s'\n",
+ 2, (char *)&data_identifier);
+ return -EINVAL;
+ }
+
+ *length = data_length;
+ return 0;
+}
+
+static int ioctl_controller_stats(struct ocxlpmem *ocxlpmem,
+ struct ioctl_ocxl_pmem_controller_stats __user *uarg)
+{
+ struct ioctl_ocxl_pmem_controller_stats args;
+ u32 length;
+ int rc;
+ u64 val;
+
+ memset(&args, '\0', sizeof(args));
+
+ mutex_lock(&ocxlpmem->admin_command.lock);
+
+ rc = admin_command_request(ocxlpmem, ADMIN_COMMAND_CONTROLLER_STATS);
+ if (rc)
+ goto out;
+
+ rc = ocxl_global_mmio_write64(ocxlpmem->ocxl_afu,
+ ocxlpmem->admin_command.request_offset + 0x08,
+ OCXL_LITTLE_ENDIAN, 0);
+ if (rc)
+ goto out;
+
+ rc = admin_command_execute(ocxlpmem);
+ if (rc)
+ goto out;
+
+
+ rc = admin_command_complete_timeout(ocxlpmem,
+ ADMIN_COMMAND_CONTROLLER_STATS);
+ if (rc < 0) {
+ dev_warn(&ocxlpmem->dev, "Controller stats timed out\n");
+ goto out;
+ }
+
+ rc = admin_response(ocxlpmem);
+ if (rc < 0)
+ goto out;
+ if (rc != STATUS_SUCCESS) {
+ warn_status(ocxlpmem,
+ "Unexpected status from controller stats", rc);
+ goto out;
+ }
+
+ rc = controller_stats_header_parse(ocxlpmem, &length);
+ if (rc)
+ goto out;
+
+ if (length != 0x140)
+ warn_status(ocxlpmem,
+ "Unexpected length for controller stats data, expected 0x140, got 0x%x",
+ length);
+
+ rc = ocxl_global_mmio_read64(ocxlpmem->ocxl_afu,
+ ocxlpmem->admin_command.data_offset + 0x08 + 0x08,
+ OCXL_LITTLE_ENDIAN, &val);
+ if (rc)
+ goto out;
+
+ args.reset_count = val >> 32;
+ args.reset_uptime = val & 0xFFFFFFFF;
+
+ rc = ocxl_global_mmio_read64(ocxlpmem->ocxl_afu,
+ ocxlpmem->admin_command.data_offset + 0x08 + 0x10,
+ OCXL_LITTLE_ENDIAN, &val);
+ if (rc)
+ goto out;
+
+ args.power_on_uptime = val >> 32;
+
+ rc = ocxl_global_mmio_read64(ocxlpmem->ocxl_afu,
+ ocxlpmem->admin_command.data_offset + 0x08 + 0x40 + 0x08,
+ OCXL_LITTLE_ENDIAN, &args.host_load_count);
+ if (rc)
+ goto out;
+
+ rc = ocxl_global_mmio_read64(ocxlpmem->ocxl_afu,
+ ocxlpmem->admin_command.data_offset + 0x08 + 0x40 + 0x10,
+ OCXL_LITTLE_ENDIAN, &args.host_store_count);
+ if (rc)
+ goto out;
+
+ rc = ocxl_global_mmio_read64(ocxlpmem->ocxl_afu,
+ ocxlpmem->admin_command.data_offset + 0x08 + 0x40 + 0x18,
+ OCXL_LITTLE_ENDIAN, &args.media_read_count);
+ if (rc)
+ goto out;
+
+ rc = ocxl_global_mmio_read64(ocxlpmem->ocxl_afu,
+ ocxlpmem->admin_command.data_offset + 0x08 + 0x40 + 0x20,
+ OCXL_LITTLE_ENDIAN, &args.media_write_count);
+ if (rc)
+ goto out;
+
+ rc = ocxl_global_mmio_read64(ocxlpmem->ocxl_afu,
+ ocxlpmem->admin_command.data_offset + 0x08 + 0x40 + 0x28,
+ OCXL_LITTLE_ENDIAN, &args.cache_hit_count);
+ if (rc)
+ goto out;
+
+ rc = ocxl_global_mmio_read64(ocxlpmem->ocxl_afu,
+ ocxlpmem->admin_command.data_offset + 0x08 + 0x40 + 0x30,
+ OCXL_LITTLE_ENDIAN, &args.cache_miss_count);
+ if (rc)
+ goto out;
+
+ rc = ocxl_global_mmio_read64(ocxlpmem->ocxl_afu,
+ ocxlpmem->admin_command.data_offset + 0x08 + 0x40 + 0x38,
+ OCXL_LITTLE_ENDIAN, &args.media_read_latency);
+ if (rc)
+ goto out;
+
+ rc = ocxl_global_mmio_read64(ocxlpmem->ocxl_afu,
+ ocxlpmem->admin_command.data_offset + 0x08 + 0x40 + 0x40,
+ OCXL_LITTLE_ENDIAN, &args.media_write_latency);
+ if (rc)
+ goto out;
+
+ rc = ocxl_global_mmio_read64(ocxlpmem->ocxl_afu,
+ ocxlpmem->admin_command.data_offset + 0x08 + 0x40 + 0x48,
+ OCXL_LITTLE_ENDIAN, &args.cache_read_latency);
+ if (rc)
+ goto out;
+
+ rc = ocxl_global_mmio_read64(ocxlpmem->ocxl_afu,
+ ocxlpmem->admin_command.data_offset + 0x08 + 0x40 + 0x50,
+ OCXL_LITTLE_ENDIAN, &args.cache_write_latency);
+ if (rc)
+ goto out;
+
+ if (copy_to_user(uarg, &args, sizeof(args))) {
+ rc = -EFAULT;
+ goto out;
+ }
+
+ rc = admin_response_handled(ocxlpmem);
+ if (rc)
+ goto out;
+
+ rc = 0;
+ goto out;
+
+out:
+ mutex_unlock(&ocxlpmem->admin_command.lock);
+ return rc;
+}
+
static long file_ioctl(struct file *file, unsigned int cmd, unsigned long args)
{
struct ocxlpmem *ocxlpmem = file->private_data;
@@ -781,6 +961,11 @@ static long file_ioctl(struct file *file, unsigned int cmd, unsigned long args)
case IOCTL_OCXL_PMEM_CONTROLLER_DUMP_COMPLETE:
rc = ioctl_controller_dump_complete(ocxlpmem);
break;
+
+ case IOCTL_OCXL_PMEM_CONTROLLER_STATS:
+ rc = ioctl_controller_stats(ocxlpmem,
+ (struct ioctl_ocxl_pmem_controller_stats __user *)args);
+ break;
}

return rc;
diff --git a/include/uapi/nvdimm/ocxl-pmem.h b/include/uapi/nvdimm/ocxl-pmem.h
index d4d8512d03f7..add223aa2fdb 100644
--- a/include/uapi/nvdimm/ocxl-pmem.h
+++ b/include/uapi/nvdimm/ocxl-pmem.h
@@ -50,6 +50,22 @@ struct ioctl_ocxl_pmem_controller_dump_data {
__u64 reserved[8];
};

+struct ioctl_ocxl_pmem_controller_stats {
+ __u32 reset_count;
+ __u32 reset_uptime; /* seconds */
+ __u32 power_on_uptime; /* seconds */
+ __u64 host_load_count;
+ __u64 host_store_count;
+ __u64 media_read_count;
+ __u64 media_write_count;
+ __u64 cache_hit_count;
+ __u64 cache_miss_count;
+ __u64 media_read_latency; /* nanoseconds */
+ __u64 media_write_latency; /* nanoseconds */
+ __u64 cache_read_latency; /* nanoseconds */
+ __u64 cache_write_latency; /* nanoseconds */
+};
+
/* ioctl numbers */
#define OCXL_PMEM_MAGIC 0x5C
/* SCM devices */
@@ -57,5 +73,6 @@ struct ioctl_ocxl_pmem_controller_dump_data {
#define IOCTL_OCXL_PMEM_CONTROLLER_DUMP _IO(OCXL_PMEM_MAGIC, 0x02)
#define IOCTL_OCXL_PMEM_CONTROLLER_DUMP_DATA _IOWR(OCXL_PMEM_MAGIC, 0x03, struct ioctl_ocxl_pmem_controller_dump_data)
#define IOCTL_OCXL_PMEM_CONTROLLER_DUMP_COMPLETE _IO(OCXL_PMEM_MAGIC, 0x04)
+#define IOCTL_OCXL_PMEM_CONTROLLER_STATS _IO(OCXL_PMEM_MAGIC, 0x05)

#endif /* _UAPI_OCXL_SCM_H */
--
2.24.1

2020-02-21 07:10:00

by Andrew Donnellan

[permalink] [raw]
Subject: Re: [PATCH v3 02/27] mm/memory_hotplug: Allow check_hotplug_memory_addressable to be called from drivers

On 21/2/20 2:26 pm, Alastair D'Silva wrote:
> From: Alastair D'Silva <[email protected]>
>
> When setting up OpenCAPI connected persistent memory, the range check may
> not be performed until quite late (or perhaps not at all, if the user does
> not establish a DAX device).
>
> This patch makes the range check callable so we can perform the check while
> probing the OpenCAPI SCM device.
>
> Signed-off-by: Alastair D'Silva <[email protected]>

Reviewed-by: Andrew Donnellan <[email protected]>


--
Andrew Donnellan OzLabs, ADL Canberra
[email protected] IBM Australia Limited

2020-02-21 16:26:41

by Dan Williams

[permalink] [raw]
Subject: Re: [PATCH v3 00/27] Add support for OpenCAPI Persistent Memory devices

On Thu, Feb 20, 2020 at 7:28 PM Alastair D'Silva <[email protected]> wrote:
>
> From: Alastair D'Silva <[email protected]>
>
> This series adds support for OpenCAPI Persistent Memory devices, exposing
> them as nvdimms so that we can make use of the existing infrastructure.

A single sentence to introduce:

24 files changed, 3029 insertions(+), 97 deletions(-)

...is inadequate. What are OpenCAPI Persistent Memory devices? How do
they compare, in terms relevant to libnvdimm, to other persistent
memory devices? What challenges do they pose to the existing enabling?
What is the overall approach taken with this 27 patch break down? What
are the changes since v2, v1? If you incorporated someone's review
feedback note it in the cover letter changelog, if you didn't
incorporate someone's feedback note that too with an explanation.

In short, provide a bridge document for someone familiar with the
upstream infrastructure, but not necessarily steeped in powernv /
OpenCAPI platform details, to get started with this code.

For now, no need to resend the whole series, just reply to this
message with a fleshed out cover letter and then incorporate it going
forward for v4+.

2020-02-21 16:27:21

by Dan Williams

[permalink] [raw]
Subject: Re: [PATCH v3 00/27] Add support for OpenCAPI Persistent Memory devices

On Fri, Feb 21, 2020 at 8:21 AM Dan Williams <[email protected]> wrote:
>
> On Thu, Feb 20, 2020 at 7:28 PM Alastair D'Silva <[email protected]> wrote:
> >
> > From: Alastair D'Silva <[email protected]>
> >
> > This series adds support for OpenCAPI Persistent Memory devices, exposing
> > them as nvdimms so that we can make use of the existing infrastructure.
>
> A single sentence to introduce:
>
> 24 files changed, 3029 insertions(+), 97 deletions(-)
>
> ...is inadequate. What are OpenCAPI Persistent Memory devices? How do
> they compare, in terms relevant to libnvdimm, to other persistent
> memory devices? What challenges do they pose to the existing enabling?
> What is the overall approach taken with this 27 patch break down? What
> are the changes since v2, v1? If you incorporated someone's review
> feedback note it in the cover letter changelog, if you didn't

Assumptions and tradeoffs the implementation considered are also
critical for reviewing the approach.

2020-02-24 02:52:22

by Andrew Donnellan

[permalink] [raw]
Subject: Re: [PATCH v3 03/27] powerpc: Map & release OpenCAPI LPC memory

On 21/2/20 2:26 pm, Alastair D'Silva wrote:
> From: Alastair D'Silva <[email protected]>
>
> This patch adds platform support to map & release LPC memory.
>
> Signed-off-by: Alastair D'Silva <[email protected]>

Nothing seems obviously wrong here.

Reviewed-by: Andrew Donnellan <[email protected]>


--
Andrew Donnellan OzLabs, ADL Canberra
[email protected] IBM Australia Limited

2020-02-24 04:35:11

by Alastair D'Silva

[permalink] [raw]
Subject: Re: [PATCH v3 00/27] Add support for OpenCAPI Persistent Memory devices

On Fri, 2020-02-21 at 08:21 -0800, Dan Williams wrote:
> On Thu, Feb 20, 2020 at 7:28 PM Alastair D'Silva <
> [email protected]> wrote:
> > From: Alastair D'Silva <[email protected]>
> >
> > This series adds support for OpenCAPI Persistent Memory devices,
> > exposing
> > them as nvdimms so that we can make use of the existing
> > infrastructure.
>
> A single sentence to introduce:
>
> 24 files changed, 3029 insertions(+), 97 deletions(-)
>
> ...is inadequate. What are OpenCAPI Persistent Memory devices? How do
> they compare, in terms relevant to libnvdimm, to other persistent
> memory devices? What challenges do they pose to the existing
> enabling?
> What is the overall approach taken with this 27 patch break down?
> What
> are the changes since v2, v1? If you incorporated someone's review
> feedback note it in the cover letter changelog, if you didn't
> incorporate someone's feedback note that too with an explanation.
>
> In short, provide a bridge document for someone familiar with the
> upstream infrastructure, but not necessarily steeped in powernv /
> OpenCAPI platform details, to get started with this code.
>
> For now, no need to resend the whole series, just reply to this
> message with a fleshed out cover letter and then incorporate it going
> forward for v4+.


Apologies, I was maintaining a changelog, and forgot to include it.
I'll flesh out the cover letter too:

This series adds support for OpenCAPI Persistent Memory devices on bare
metal (arch/powernv), exposing them as nvdimms so that we can make use
of the existing infrastructure. There already exists a driver for the
same devices abstracted through PowerVM (arch/pseries):
arch/powerpc/platforms/pseries/papr_scm.c

These devices are connected via OpenCAPI, and present as LPC (lowest
coherence point) memory to the system, practically, that means that
memory on these cards could be treated as conventional, cache-coherent
memory.

Since the devices are connected via OpenCAPI, they are not enumerated
via ACPI. Instead, OpenCAPI links present as pseudo-PCI bridges, with
devices below them.

This series introduces a driver that exposes the memory on these cards
as nvdimms, with each card getting it's own bus. This is somewhat
complicated by the fact that the cards do not have out of band
persistent storage for metadata, so 1 SECTION_SIZE's (see SPARSEMEM)
worth of storage is carved out of the top of the card storage to
implement the ndctl_config_* calls.

The driver is not responsible for configuring the NPU (NVLink
Processing Unit) BARs to map the LPC memory from the card into the
system's physical address space, instead, it requests this to be done
via OPAL calls (typically implemented by Skiboot).

The series is structured as follows:
- Required infrastructure changes & cleanup
- A minimal driver implementation
- Implementing additional features within the driver

V3:
- Rebase against next/next-20200220
- Move driver to arch/powerpc/platforms/powernv, we now expect this
driver to go upstream via the powerpc tree
- "nvdimm/ocxl: Implement the Read Error Log command"
- Fix bad header path
- "nvdimm/ocxl: Read the capability registers & wait for device
ready"
- Fix overlapping masks between readiness_timeout &
memory_available_timeout
- "nvdimm: Add driver for OpenCAPI Storage Class Memory"
- Address minor review comments from Jonathan Cameron
- Remove attributes
- Default to module if building LIBNVDIMM
- Propogate errors up from called functions in probe()
- "nvdimm/ocxl: Expose SMART data via ndctl"
- Pack attributes in struct
- Support different size SMART buffers for compatibility with
newer
ndctls that may want more SMART attribs than we provide
- Rework to to use ND_CMD_CALL instead of ND_CMD_SMART
- drop "ocxl: Free detached contexts in ocxl_context_detach_all()"
- "powerpc: Map & release OpenCAPI LPC memory"
- Remove 'extern'
- Only available with CONFIG_MEMORY_HOTPLUG_SPARSE
- "ocxl: Tally up the LPC memory on a link & allow it to be mapped"
- Address minor review comments from Jonathan Cameron
- "ocxl: Add functions to map/unmap LPC memory"
- Split detected memory message into a separate patch
- Address minor review comments from Jonathan Cameron
- Add a comment explaining why unmap_lpc_mem is in
deconfigure_afu
- "nvdimm/ocxl: Add support for Admin commands"
- use sizeof(u64) rather than 0x08 when iterating u64s
- "nvdimm/ocxl: Implement the heartbeat command"
- Fix typo in blurb
- Address kernel doc issues
- Ensure all uapi headers use C89 compatible comments
- Drop patches for firmware update & overwrite, these will be
submitted later once patches are available for ndctl
- Rename SCM to OpenCAPI Persistent Memory

V2:
- "powerpc: Map & release OpenCAPI LPC memory"
- Fix #if -> #ifdef
- use pci_dev_id to get the bdfn
- use __be64 to hold be data
- indent check_hotplug_memory_addressable correctly
- Remove export of check_hotplug_memory_addressable
- "ocxl: Conditionally bind SCM devices to the generic OCXL driver"
- Improve patch description and remove redundant default
- "nvdimm: Add driver for OpenCAPI Storage Class Memory"
- Mark a few funcs as static as identified by the 0day bot
- Add OCXL dependancies to OCXL_SCM
- Use memcpy_mcsafe in scm_ndctl_config_read
- Rename scm_foo_offset_0x00 to scm_foo_header_parse & add docs
- Name DIMM attribs "ocxl" rather than "scm"
- Split out into base + many feature patches
- "powerpc: Enable OpenCAPI Storage Class Memory driver on bare
metal"
- Build DEV_DAX & friends as modules
- "ocxl: Conditionally bind SCM devices to the generic OCXL driver"
- Patch dropped (easy enough to maintain this out of tree for
development)
- "ocxl: Tally up the LPC memory on a link & allow it to be mapped"
- Add a warning if an unmatched lpc_release is called
- "ocxl: Add functions to map/unmap LPC memory"
- Use EXPORT_SYMBOL_GPL

--
Alastair D'Silva
Open Source Developer
Linux Technology Centre, IBM Australia
mob: 0423 762 819

2020-02-24 04:38:46

by Matthew Wilcox (Oracle)

[permalink] [raw]
Subject: Re: [PATCH v3 00/27] Add support for OpenCAPI Persistent Memory devices

On Mon, Feb 24, 2020 at 03:34:07PM +1100, Alastair D'Silva wrote:
> V3:
> - Rebase against next/next-20200220
> - Move driver to arch/powerpc/platforms/powernv, we now expect this
> driver to go upstream via the powerpc tree

That's rather the opposite direction of normal; mostly drivers live under
drivers/ and not in arch/. It's easier for drivers to get overlooked
when doing tree-wide changes if they're hiding.

2020-02-24 04:45:20

by Alastair D'Silva

[permalink] [raw]
Subject: Re: [PATCH v3 00/27] Add support for OpenCAPI Persistent Memory devices

On Sun, 2020-02-23 at 20:37 -0800, Matthew Wilcox wrote:
> On Mon, Feb 24, 2020 at 03:34:07PM +1100, Alastair D'Silva wrote:
> > V3:
> > - Rebase against next/next-20200220
> > - Move driver to arch/powerpc/platforms/powernv, we now expect
> > this
> > driver to go upstream via the powerpc tree
>
> That's rather the opposite direction of normal; mostly drivers live
> under
> drivers/ and not in arch/. It's easier for drivers to get overlooked
> when doing tree-wide changes if they're hiding.

This is true, however, given that it was not all that desirable to have
it under drivers/nvdimm, it's sister driver (for the same hardware) is
also under arch, and that we don't expect this driver to be used on any
platform other than powernv, we think this was the most reasonable
place to put it.

--
Alastair D'Silva
Open Source Developer
Linux Technology Centre, IBM Australia
mob: 0423 762 819

2020-02-24 05:49:44

by Andrew Donnellan

[permalink] [raw]
Subject: Re: [PATCH v3 01/27] powerpc: Add OPAL calls for LPC memory alloc/release

On 21/2/20 2:26 pm, Alastair D'Silva wrote:
> From: Alastair D'Silva <[email protected]>
>
> Add OPAL calls for LPC memory alloc/release
>
> Signed-off-by: Alastair D'Silva <[email protected]>
> Acked-by: Andrew Donnellan <[email protected]>
> Acked-by: Frederic Barrat <[email protected]>

Summary line should be "powerpc/powernv".


--
Andrew Donnellan OzLabs, ADL Canberra
[email protected] IBM Australia Limited

2020-02-24 05:51:26

by Alastair D'Silva

[permalink] [raw]
Subject: Re: [PATCH v3 01/27] powerpc: Add OPAL calls for LPC memory alloc/release

On Mon, 2020-02-24 at 16:49 +1100, Andrew Donnellan wrote:
> On 21/2/20 2:26 pm, Alastair D'Silva wrote:
> > From: Alastair D'Silva <[email protected]>
> >
> > Add OPAL calls for LPC memory alloc/release
> >
> > Signed-off-by: Alastair D'Silva <[email protected]>
> > Acked-by: Andrew Donnellan <[email protected]>
> > Acked-by: Frederic Barrat <[email protected]>
>
> Summary line should be "powerpc/powernv".
>
>

Ok

--
Alastair D'Silva
Open Source Developer
Linux Technology Centre, IBM Australia
mob: 0423 762 819

2020-02-24 05:51:37

by Andrew Donnellan

[permalink] [raw]
Subject: Re: [PATCH v3 03/27] powerpc: Map & release OpenCAPI LPC memory

On 24/2/20 1:51 pm, Andrew Donnellan wrote:
> On 21/2/20 2:26 pm, Alastair D'Silva wrote:
>> From: Alastair D'Silva <[email protected]>
>>
>> This patch adds platform support to map & release LPC memory.
>>
>> Signed-off-by: Alastair D'Silva <[email protected]>
>
> Nothing seems obviously wrong here.
>
> Reviewed-by: Andrew Donnellan <[email protected]>

Oh, commit message nitpick :)

Summary should be powerpc/powernv. Commit message should explain that
this is for the powernv platform and presents an interface that drivers
can use to make use of the new OPAL calls.

--
Andrew Donnellan OzLabs, ADL Canberra
[email protected] IBM Australia Limited

2020-02-24 06:03:18

by Andrew Donnellan

[permalink] [raw]
Subject: Re: [PATCH v3 07/27] ocxl: Add functions to map/unmap LPC memory

On 21/2/20 2:27 pm, Alastair D'Silva wrote:
> From: Alastair D'Silva <[email protected]>
>
> Add functions to map/unmap LPC memory
>
> Signed-off-by: Alastair D'Silva <[email protected]>
> ---
> drivers/misc/ocxl/core.c | 51 +++++++++++++++++++++++++++++++
> drivers/misc/ocxl/ocxl_internal.h | 3 ++
> include/misc/ocxl.h | 21 +++++++++++++
> 3 files changed, 75 insertions(+)
>
> diff --git a/drivers/misc/ocxl/core.c b/drivers/misc/ocxl/core.c
> index 2531c6cf19a0..75ff14e3882a 100644
> --- a/drivers/misc/ocxl/core.c
> +++ b/drivers/misc/ocxl/core.c
> @@ -210,6 +210,56 @@ static void unmap_mmio_areas(struct ocxl_afu *afu)
> release_fn_bar(afu->fn, afu->config.global_mmio_bar);
> }
>
> +int ocxl_afu_map_lpc_mem(struct ocxl_afu *afu)
> +{
> + struct pci_dev *dev = to_pci_dev(afu->fn->dev.parent);
> +
> + if ((afu->config.lpc_mem_size + afu->config.special_purpose_mem_size) == 0)
> + return 0;

I'd prefer the comparison here to be:

afu->config.lpc_mem_size == 0 &&
afu->config.special_purpose_mem_size == 0

so a reader doesn't have to think about what this means.

> +
> + afu->lpc_base_addr = ocxl_link_lpc_map(afu->fn->link, dev);
> + if (afu->lpc_base_addr == 0)
> + return -EINVAL;
> +
> + if (afu->config.lpc_mem_size > 0) {
> + afu->lpc_res.start = afu->lpc_base_addr + afu->config.lpc_mem_offset;

Maybe not for this series - hmm, I wonder if we should print a warning
somewhere (maybe in read_afu_lpc_memory_info()?) if we see the case
where (lpc_mem_offset > 0 && lpc_mem_size == 0). Likewise for special
purpose?

> + afu->lpc_res.end = afu->lpc_res.start + afu->config.lpc_mem_size - 1;
> + }
> +
> + if (afu->config.special_purpose_mem_size > 0) {
> + afu->special_purpose_res.start = afu->lpc_base_addr +
> + afu->config.special_purpose_mem_offset;
> + afu->special_purpose_res.end = afu->special_purpose_res.start +
> + afu->config.special_purpose_mem_size - 1;
> + }
> +
> + return 0;
> +}
> +EXPORT_SYMBOL_GPL(ocxl_afu_map_lpc_mem);
> +
> +struct resource *ocxl_afu_lpc_mem(struct ocxl_afu *afu)
> +{
> + return &afu->lpc_res;
> +}
> +EXPORT_SYMBOL_GPL(ocxl_afu_lpc_mem);

What's the point of this function? A layer of indirection just in case
we need it in future?

> +
> +static void unmap_lpc_mem(struct ocxl_afu *afu)
> +{
> + struct pci_dev *dev = to_pci_dev(afu->fn->dev.parent);
> +
> + if (afu->lpc_res.start || afu->special_purpose_res.start) {
> + void *link = afu->fn->link;
> +
> + // only release the link when the the last consumer calls release
> + ocxl_link_lpc_release(link, dev);
> +
> + afu->lpc_res.start = 0;
> + afu->lpc_res.end = 0;
> + afu->special_purpose_res.start = 0;
> + afu->special_purpose_res.end = 0;
> + }
> +}
> +
> static int configure_afu(struct ocxl_afu *afu, u8 afu_idx, struct pci_dev *dev)
> {
> int rc;
> @@ -251,6 +301,7 @@ static int configure_afu(struct ocxl_afu *afu, u8 afu_idx, struct pci_dev *dev)
>
> static void deconfigure_afu(struct ocxl_afu *afu)
> {
> + unmap_lpc_mem(afu);
> unmap_mmio_areas(afu);
> reclaim_afu_pasid(afu);
> reclaim_afu_actag(afu);
> diff --git a/drivers/misc/ocxl/ocxl_internal.h b/drivers/misc/ocxl/ocxl_internal.h
> index d0c8c4838f42..ce0cac1da416 100644
> --- a/drivers/misc/ocxl/ocxl_internal.h
> +++ b/drivers/misc/ocxl/ocxl_internal.h
> @@ -52,6 +52,9 @@ struct ocxl_afu {
> void __iomem *global_mmio_ptr;
> u64 pp_mmio_start;
> void *private;
> + u64 lpc_base_addr; /* Covers both LPC & special purpose memory */
> + struct resource lpc_res;
> + struct resource special_purpose_res;
> };
>
> enum ocxl_context_status {
> diff --git a/include/misc/ocxl.h b/include/misc/ocxl.h
> index 357ef1aadbc0..d8b0b4d46bfb 100644
> --- a/include/misc/ocxl.h
> +++ b/include/misc/ocxl.h
> @@ -203,6 +203,27 @@ int ocxl_irq_set_handler(struct ocxl_context *ctx, int irq_id,
>
> // AFU Metadata
>
> +/**
> + * ocxl_afu_map_lpc_mem() - Map the LPC system & special purpose memory for an AFU
> + * Do not call this during device discovery, as there may me multiple

be

> + * devices on a link, and the memory is mapped for the whole link, not
> + * just one device. It should only be called after all devices have
> + * registered their memory on the link.
> + *
> + * @afu: The AFU that has the LPC memory to map
> + *
> + * Returns 0 on success, negative on failure
> + */
> +int ocxl_afu_map_lpc_mem(struct ocxl_afu *afu);
> +
> +/**
> + * ocxl_afu_lpc_mem() - Get the physical address range of LPC memory for an AFU
> + * @afu: The AFU associated with the LPC memory
> + *
> + * Returns a pointer to the resource struct for the physical address range
> + */
> +struct resource *ocxl_afu_lpc_mem(struct ocxl_afu *afu);
> +
> /**
> * ocxl_afu_config() - Get a pointer to the config for an AFU
> * @afu: a pointer to the AFU to get the config for
>

--
Andrew Donnellan OzLabs, ADL Canberra
[email protected] IBM Australia Limited

2020-02-24 06:07:36

by Andrew Donnellan

[permalink] [raw]
Subject: Re: [PATCH v3 08/27] ocxl: Emit a log message showing how much LPC memory was detected

On 21/2/20 2:27 pm, Alastair D'Silva wrote:
> From: Alastair D'Silva <[email protected]>
>
> This patch emits a message showing how much LPC memory & special purpose
> memory was detected on an OCXL device.
>
> Signed-off-by: Alastair D'Silva <[email protected]>
> ---
> drivers/misc/ocxl/config.c | 4 ++++
> 1 file changed, 4 insertions(+)
>
> diff --git a/drivers/misc/ocxl/config.c b/drivers/misc/ocxl/config.c
> index a62e3d7db2bf..701ae6216abf 100644
> --- a/drivers/misc/ocxl/config.c
> +++ b/drivers/misc/ocxl/config.c
> @@ -568,6 +568,10 @@ static int read_afu_lpc_memory_info(struct pci_dev *dev,
> afu->special_purpose_mem_size =
> total_mem_size - lpc_mem_size;
> }
> +
> + dev_info(&dev->dev, "Probed LPC memory of %#llx bytes and special purpose memory of %#llx bytes\n",
> + afu->lpc_mem_size, afu->special_purpose_mem_size);
> +

Printing this at info level for every single AFU seems a bit noisy.
Perhaps we can print it only if LPC memory is > 0?

--
Andrew Donnellan OzLabs, ADL Canberra
[email protected] IBM Australia Limited

2020-02-24 06:09:28

by Alastair D'Silva

[permalink] [raw]
Subject: Re: [PATCH v3 07/27] ocxl: Add functions to map/unmap LPC memory

On Mon, 2020-02-24 at 17:02 +1100, Andrew Donnellan wrote:
> On 21/2/20 2:27 pm, Alastair D'Silva wrote:
> > From: Alastair D'Silva <[email protected]>
> >
> > Add functions to map/unmap LPC memory
> >
> > Signed-off-by: Alastair D'Silva <[email protected]>
> > ---
> > drivers/misc/ocxl/core.c | 51
> > +++++++++++++++++++++++++++++++
> > drivers/misc/ocxl/ocxl_internal.h | 3 ++
> > include/misc/ocxl.h | 21 +++++++++++++
> > 3 files changed, 75 insertions(+)
> >
> > diff --git a/drivers/misc/ocxl/core.c b/drivers/misc/ocxl/core.c
> > index 2531c6cf19a0..75ff14e3882a 100644
> > --- a/drivers/misc/ocxl/core.c
> > +++ b/drivers/misc/ocxl/core.c
> > @@ -210,6 +210,56 @@ static void unmap_mmio_areas(struct ocxl_afu
> > *afu)
> > release_fn_bar(afu->fn, afu->config.global_mmio_bar);
> > }
> >
> > +int ocxl_afu_map_lpc_mem(struct ocxl_afu *afu)
> > +{
> > + struct pci_dev *dev = to_pci_dev(afu->fn->dev.parent);
> > +
> > + if ((afu->config.lpc_mem_size + afu-
> > >config.special_purpose_mem_size) == 0)
> > + return 0;
>
> I'd prefer the comparison here to be:
>
> afu->config.lpc_mem_size == 0 &&
> afu->config.special_purpose_mem_size == 0
>
> so a reader doesn't have to think about what this means.
>

Ok

> > +
> > + afu->lpc_base_addr = ocxl_link_lpc_map(afu->fn->link, dev);
> > + if (afu->lpc_base_addr == 0)
> > + return -EINVAL;
> > +
> > + if (afu->config.lpc_mem_size > 0) {
> > + afu->lpc_res.start = afu->lpc_base_addr + afu-
> > >config.lpc_mem_offset;
>
> Maybe not for this series - hmm, I wonder if we should print a
> warning
> somewhere (maybe in read_afu_lpc_memory_info()?) if we see the case
> where (lpc_mem_offset > 0 && lpc_mem_size == 0). Likewise for
> special
> purpose?
>

Sounds reasonable, might as well add it here since there are other LPC
changes.

> > + afu->lpc_res.end = afu->lpc_res.start + afu-
> > >config.lpc_mem_size - 1;
> > + }
> > +
> > + if (afu->config.special_purpose_mem_size > 0) {
> > + afu->special_purpose_res.start = afu->lpc_base_addr +
> > + afu-
> > >config.special_purpose_mem_offset;
> > + afu->special_purpose_res.end = afu-
> > >special_purpose_res.start +
> > + afu-
> > >config.special_purpose_mem_size - 1;
> > + }
> > +
> > + return 0;
> > +}
> > +EXPORT_SYMBOL_GPL(ocxl_afu_map_lpc_mem);
> > +
> > +struct resource *ocxl_afu_lpc_mem(struct ocxl_afu *afu)
> > +{
> > + return &afu->lpc_res;
> > +}
> > +EXPORT_SYMBOL_GPL(ocxl_afu_lpc_mem);
>
> What's the point of this function? A layer of indirection just in
> case
> we need it in future?
>

struct ocxl_afu is opaque outsite the ocxl driver.

> > +
> > +static void unmap_lpc_mem(struct ocxl_afu *afu)
> > +{
> > + struct pci_dev *dev = to_pci_dev(afu->fn->dev.parent);
> > +
> > + if (afu->lpc_res.start || afu->special_purpose_res.start) {
> > + void *link = afu->fn->link;
> > +
> > + // only release the link when the the last consumer
> > calls release
> > + ocxl_link_lpc_release(link, dev);
> > +
> > + afu->lpc_res.start = 0;
> > + afu->lpc_res.end = 0;
> > + afu->special_purpose_res.start = 0;
> > + afu->special_purpose_res.end = 0;
> > + }
> > +}
> > +
> > static int configure_afu(struct ocxl_afu *afu, u8 afu_idx, struct
> > pci_dev *dev)
> > {
> > int rc;
> > @@ -251,6 +301,7 @@ static int configure_afu(struct ocxl_afu *afu,
> > u8 afu_idx, struct pci_dev *dev)
> >
> > static void deconfigure_afu(struct ocxl_afu *afu)
> > {
> > + unmap_lpc_mem(afu);
> > unmap_mmio_areas(afu);
> > reclaim_afu_pasid(afu);
> > reclaim_afu_actag(afu);
> > diff --git a/drivers/misc/ocxl/ocxl_internal.h
> > b/drivers/misc/ocxl/ocxl_internal.h
> > index d0c8c4838f42..ce0cac1da416 100644
> > --- a/drivers/misc/ocxl/ocxl_internal.h
> > +++ b/drivers/misc/ocxl/ocxl_internal.h
> > @@ -52,6 +52,9 @@ struct ocxl_afu {
> > void __iomem *global_mmio_ptr;
> > u64 pp_mmio_start;
> > void *private;
> > + u64 lpc_base_addr; /* Covers both LPC & special purpose memory
> > */
> > + struct resource lpc_res;
> > + struct resource special_purpose_res;
> > };
> >
> > enum ocxl_context_status {
> > diff --git a/include/misc/ocxl.h b/include/misc/ocxl.h
> > index 357ef1aadbc0..d8b0b4d46bfb 100644
> > --- a/include/misc/ocxl.h
> > +++ b/include/misc/ocxl.h
> > @@ -203,6 +203,27 @@ int ocxl_irq_set_handler(struct ocxl_context
> > *ctx, int irq_id,
> >
> > // AFU Metadata
> >
> > +/**
> > + * ocxl_afu_map_lpc_mem() - Map the LPC system & special purpose
> > memory for an AFU
> > + * Do not call this during device discovery, as there may me
> > multiple
>
> be
>
> > + * devices on a link, and the memory is mapped for the whole link,
> > not
> > + * just one device. It should only be called after all devices
> > have
> > + * registered their memory on the link.
> > + *
> > + * @afu: The AFU that has the LPC memory to map
> > + *
> > + * Returns 0 on success, negative on failure
> > + */
> > +int ocxl_afu_map_lpc_mem(struct ocxl_afu *afu);
> > +
> > +/**
> > + * ocxl_afu_lpc_mem() - Get the physical address range of LPC
> > memory for an AFU
> > + * @afu: The AFU associated with the LPC memory
> > + *
> > + * Returns a pointer to the resource struct for the physical
> > address range
> > + */
> > +struct resource *ocxl_afu_lpc_mem(struct ocxl_afu *afu);
> > +
> > /**
> > * ocxl_afu_config() - Get a pointer to the config for an AFU
> > * @afu: a pointer to the AFU to get the config for
> >
--
Alastair D'Silva
Open Source Developer
Linux Technology Centre, IBM Australia
mob: 0423 762 819

2020-02-24 06:10:39

by Alastair D'Silva

[permalink] [raw]
Subject: Re: [PATCH v3 08/27] ocxl: Emit a log message showing how much LPC memory was detected

On Mon, 2020-02-24 at 17:06 +1100, Andrew Donnellan wrote:
> On 21/2/20 2:27 pm, Alastair D'Silva wrote:
> > From: Alastair D'Silva <[email protected]>
> >
> > This patch emits a message showing how much LPC memory & special
> > purpose
> > memory was detected on an OCXL device.
> >
> > Signed-off-by: Alastair D'Silva <[email protected]>
> > ---
> > drivers/misc/ocxl/config.c | 4 ++++
> > 1 file changed, 4 insertions(+)
> >
> > diff --git a/drivers/misc/ocxl/config.c
> > b/drivers/misc/ocxl/config.c
> > index a62e3d7db2bf..701ae6216abf 100644
> > --- a/drivers/misc/ocxl/config.c
> > +++ b/drivers/misc/ocxl/config.c
> > @@ -568,6 +568,10 @@ static int read_afu_lpc_memory_info(struct
> > pci_dev *dev,
> > afu->special_purpose_mem_size =
> > total_mem_size - lpc_mem_size;
> > }
> > +
> > + dev_info(&dev->dev, "Probed LPC memory of %#llx bytes and
> > special purpose memory of %#llx bytes\n",
> > + afu->lpc_mem_size, afu->special_purpose_mem_size);
> > +
>
> Printing this at info level for every single AFU seems a bit noisy.
> Perhaps we can print it only if LPC memory is > 0?
>

There is an early exit before this if there is no LPC memory.

--
Alastair D'Silva
Open Source Developer
Linux Technology Centre, IBM Australia
mob: 0423 762 819

2020-02-24 06:14:37

by Andrew Donnellan

[permalink] [raw]
Subject: Re: [PATCH v3 08/27] ocxl: Emit a log message showing how much LPC memory was detected

On 24/2/20 5:10 pm, Alastair D'Silva wrote:
>> Printing this at info level for every single AFU seems a bit noisy.
>> Perhaps we can print it only if LPC memory is > 0?
>>
>
> There is an early exit before this if there is no LPC memory.
>

Noted, I'd missed that amidst all the early returns for errors.

In that case

Acked-by: Andrew Donnellan <[email protected]>

--
Andrew Donnellan OzLabs, ADL Canberra
[email protected] IBM Australia Limited

2020-02-24 06:52:53

by Oliver O'Halloran

[permalink] [raw]
Subject: Re: [PATCH v3 00/27] Add support for OpenCAPI Persistent Memory devices

On Mon, Feb 24, 2020 at 3:43 PM Alastair D'Silva <[email protected]> wrote:
>
> On Sun, 2020-02-23 at 20:37 -0800, Matthew Wilcox wrote:
> > On Mon, Feb 24, 2020 at 03:34:07PM +1100, Alastair D'Silva wrote:
> > > V3:
> > > - Rebase against next/next-20200220
> > > - Move driver to arch/powerpc/platforms/powernv, we now expect
> > > this
> > > driver to go upstream via the powerpc tree
> >
> > That's rather the opposite direction of normal; mostly drivers live
> > under
> > drivers/ and not in arch/. It's easier for drivers to get overlooked
> > when doing tree-wide changes if they're hiding.
>
> This is true, however, given that it was not all that desirable to have
> it under drivers/nvdimm, it's sister driver (for the same hardware) is
> also under arch, and that we don't expect this driver to be used on any
> platform other than powernv, we think this was the most reasonable
> place to put it.

Historically powernv specific platform drivers go in their respective
subsystem trees rather than in arch/ and I'd prefer we kept it that
way. When I added the papr_scm driver I put it in the pseries platform
directory because most of the pseries paravirt code lives there for
some reason; I don't know why. Luckily for me that followed the same
model that Dan used when he put the NFIT driver in drivers/acpi/ and
the libnvdimm core in drivers/nvdimm/ so we didn't have anything to
argue about. However, as Matthew pointed out, it is at odds with how
most subsystems operate. Is there any particular reason we're doing
things this way or should we think about moving libnvdimm users to
drivers/nvdimm/?

Oliver

2020-02-25 03:02:04

by Andrew Donnellan

[permalink] [raw]
Subject: Re: [PATCH v3 11/27] powerpc: Enable the OpenCAPI Persistent Memory driver for powernv_defconfig

On 21/2/20 2:27 pm, Alastair D'Silva wrote:
> From: Alastair D'Silva <[email protected]>
>
> This patch enables the OpenCAPI Persistent Memory driver, as well
> as DAX support, for the 'powernv' platform.

defconfig, not platform

>
> DAX is not a strict requirement for the functioning of the driver, but it
> is likely that a user will want to create a DAX device on top of their
> persistent memory device.
>
> Signed-off-by: Alastair D'Silva <[email protected]>

Otherwise

Reviewed-by: Andrew Donnellan <[email protected]>

--
Andrew Donnellan OzLabs, ADL Canberra
[email protected] IBM Australia Limited

2020-02-25 10:03:20

by Frederic Barrat

[permalink] [raw]
Subject: Re: [PATCH v3 03/27] powerpc: Map & release OpenCAPI LPC memory



Le 21/02/2020 à 04:26, Alastair D'Silva a écrit :
> From: Alastair D'Silva <[email protected]>
>
> This patch adds platform support to map & release LPC memory.
>
> Signed-off-by: Alastair D'Silva <[email protected]>
> ---
> arch/powerpc/include/asm/pnv-ocxl.h | 4 +++
> arch/powerpc/platforms/powernv/ocxl.c | 43 +++++++++++++++++++++++++++
> 2 files changed, 47 insertions(+)
>
> diff --git a/arch/powerpc/include/asm/pnv-ocxl.h b/arch/powerpc/include/asm/pnv-ocxl.h
> index 7de82647e761..0b2a6707e555 100644
> --- a/arch/powerpc/include/asm/pnv-ocxl.h
> +++ b/arch/powerpc/include/asm/pnv-ocxl.h
> @@ -32,5 +32,9 @@ extern int pnv_ocxl_spa_remove_pe_from_cache(void *platform_data, int pe_handle)
>
> extern int pnv_ocxl_alloc_xive_irq(u32 *irq, u64 *trigger_addr);
> extern void pnv_ocxl_free_xive_irq(u32 irq);
> +#ifdef CONFIG_MEMORY_HOTPLUG_SPARSE
> +u64 pnv_ocxl_platform_lpc_setup(struct pci_dev *pdev, u64 size);
> +void pnv_ocxl_platform_lpc_release(struct pci_dev *pdev);
> +#endif


This breaks the compilation of the ocxl driver if CONFIG_MEMORY_HOTPLUG=n

Those functions still make sense even without memory hotplug, for
example in the context of the implementation you had to access opencapi
LPC memory through mmap(). The #ifdef is really needed only around the
check_hotplug_memory_addressable() call.

Fred


> #endif /* _ASM_PNV_OCXL_H */
> diff --git a/arch/powerpc/platforms/powernv/ocxl.c b/arch/powerpc/platforms/powernv/ocxl.c
> index 8c65aacda9c8..f2edbcc67361 100644
> --- a/arch/powerpc/platforms/powernv/ocxl.c
> +++ b/arch/powerpc/platforms/powernv/ocxl.c
> @@ -475,6 +475,49 @@ void pnv_ocxl_spa_release(void *platform_data)
> }
> EXPORT_SYMBOL_GPL(pnv_ocxl_spa_release);
>
> +#ifdef CONFIG_MEMORY_HOTPLUG_SPARSE
> +u64 pnv_ocxl_platform_lpc_setup(struct pci_dev *pdev, u64 size)
> +{
> + struct pci_controller *hose = pci_bus_to_host(pdev->bus);
> + struct pnv_phb *phb = hose->private_data;
> + u32 bdfn = pci_dev_id(pdev);
> + __be64 base_addr_be64;
> + u64 base_addr;
> + int rc;
> +
> + rc = opal_npu_mem_alloc(phb->opal_id, bdfn, size, &base_addr_be64);
> + if (rc) {
> + dev_warn(&pdev->dev,
> + "OPAL could not allocate LPC memory, rc=%d\n", rc);
> + return 0;
> + }
> +
> + base_addr = be64_to_cpu(base_addr_be64);
> +
> + rc = check_hotplug_memory_addressable(base_addr >> PAGE_SHIFT,
> + size >> PAGE_SHIFT);
> + if (rc)
> + return 0;
> +
> + return base_addr;
> +}
> +EXPORT_SYMBOL_GPL(pnv_ocxl_platform_lpc_setup);
> +
> +void pnv_ocxl_platform_lpc_release(struct pci_dev *pdev)
> +{
> + struct pci_controller *hose = pci_bus_to_host(pdev->bus);
> + struct pnv_phb *phb = hose->private_data;
> + u32 bdfn = pci_dev_id(pdev);
> + int rc;
> +
> + rc = opal_npu_mem_release(phb->opal_id, bdfn);
> + if (rc)
> + dev_warn(&pdev->dev,
> + "OPAL reported rc=%d when releasing LPC memory\n", rc);
> +}
> +EXPORT_SYMBOL_GPL(pnv_ocxl_platform_lpc_release);
> +#endif
> +
> int pnv_ocxl_spa_remove_pe_from_cache(void *platform_data, int pe_handle)
> {
> struct spa_data *data = (struct spa_data *) platform_data;
>

2020-02-25 17:23:55

by Frederic Barrat

[permalink] [raw]
Subject: Re: [PATCH v3 07/27] ocxl: Add functions to map/unmap LPC memory



Le 21/02/2020 à 04:27, Alastair D'Silva a écrit :
> From: Alastair D'Silva <[email protected]>
>
> Add functions to map/unmap LPC memory
>
> Signed-off-by: Alastair D'Silva <[email protected]>
> ---


It looks ok to me.
Acked-by: Frederic Barrat <[email protected]>



> drivers/misc/ocxl/core.c | 51 +++++++++++++++++++++++++++++++
> drivers/misc/ocxl/ocxl_internal.h | 3 ++
> include/misc/ocxl.h | 21 +++++++++++++
> 3 files changed, 75 insertions(+)
>
> diff --git a/drivers/misc/ocxl/core.c b/drivers/misc/ocxl/core.c
> index 2531c6cf19a0..75ff14e3882a 100644
> --- a/drivers/misc/ocxl/core.c
> +++ b/drivers/misc/ocxl/core.c
> @@ -210,6 +210,56 @@ static void unmap_mmio_areas(struct ocxl_afu *afu)
> release_fn_bar(afu->fn, afu->config.global_mmio_bar);
> }
>
> +int ocxl_afu_map_lpc_mem(struct ocxl_afu *afu)
> +{
> + struct pci_dev *dev = to_pci_dev(afu->fn->dev.parent);
> +
> + if ((afu->config.lpc_mem_size + afu->config.special_purpose_mem_size) == 0)
> + return 0;
> +
> + afu->lpc_base_addr = ocxl_link_lpc_map(afu->fn->link, dev);
> + if (afu->lpc_base_addr == 0)
> + return -EINVAL;
> +
> + if (afu->config.lpc_mem_size > 0) {
> + afu->lpc_res.start = afu->lpc_base_addr + afu->config.lpc_mem_offset;
> + afu->lpc_res.end = afu->lpc_res.start + afu->config.lpc_mem_size - 1;
> + }
> +
> + if (afu->config.special_purpose_mem_size > 0) {
> + afu->special_purpose_res.start = afu->lpc_base_addr +
> + afu->config.special_purpose_mem_offset;
> + afu->special_purpose_res.end = afu->special_purpose_res.start +
> + afu->config.special_purpose_mem_size - 1;
> + }
> +
> + return 0;
> +}
> +EXPORT_SYMBOL_GPL(ocxl_afu_map_lpc_mem);
> +
> +struct resource *ocxl_afu_lpc_mem(struct ocxl_afu *afu)
> +{
> + return &afu->lpc_res;
> +}
> +EXPORT_SYMBOL_GPL(ocxl_afu_lpc_mem);
> +
> +static void unmap_lpc_mem(struct ocxl_afu *afu)
> +{
> + struct pci_dev *dev = to_pci_dev(afu->fn->dev.parent);
> +
> + if (afu->lpc_res.start || afu->special_purpose_res.start) {
> + void *link = afu->fn->link;
> +
> + // only release the link when the the last consumer calls release
> + ocxl_link_lpc_release(link, dev);
> +
> + afu->lpc_res.start = 0;
> + afu->lpc_res.end = 0;
> + afu->special_purpose_res.start = 0;
> + afu->special_purpose_res.end = 0;
> + }
> +}
> +
> static int configure_afu(struct ocxl_afu *afu, u8 afu_idx, struct pci_dev *dev)
> {
> int rc;
> @@ -251,6 +301,7 @@ static int configure_afu(struct ocxl_afu *afu, u8 afu_idx, struct pci_dev *dev)
>
> static void deconfigure_afu(struct ocxl_afu *afu)
> {
> + unmap_lpc_mem(afu);
> unmap_mmio_areas(afu);
> reclaim_afu_pasid(afu);
> reclaim_afu_actag(afu);
> diff --git a/drivers/misc/ocxl/ocxl_internal.h b/drivers/misc/ocxl/ocxl_internal.h
> index d0c8c4838f42..ce0cac1da416 100644
> --- a/drivers/misc/ocxl/ocxl_internal.h
> +++ b/drivers/misc/ocxl/ocxl_internal.h
> @@ -52,6 +52,9 @@ struct ocxl_afu {
> void __iomem *global_mmio_ptr;
> u64 pp_mmio_start;
> void *private;
> + u64 lpc_base_addr; /* Covers both LPC & special purpose memory */
> + struct resource lpc_res;
> + struct resource special_purpose_res;
> };
>
> enum ocxl_context_status {
> diff --git a/include/misc/ocxl.h b/include/misc/ocxl.h
> index 357ef1aadbc0..d8b0b4d46bfb 100644
> --- a/include/misc/ocxl.h
> +++ b/include/misc/ocxl.h
> @@ -203,6 +203,27 @@ int ocxl_irq_set_handler(struct ocxl_context *ctx, int irq_id,
>
> // AFU Metadata
>
> +/**
> + * ocxl_afu_map_lpc_mem() - Map the LPC system & special purpose memory for an AFU
> + * Do not call this during device discovery, as there may me multiple
> + * devices on a link, and the memory is mapped for the whole link, not
> + * just one device. It should only be called after all devices have
> + * registered their memory on the link.
> + *
> + * @afu: The AFU that has the LPC memory to map
> + *
> + * Returns 0 on success, negative on failure
> + */
> +int ocxl_afu_map_lpc_mem(struct ocxl_afu *afu);
> +
> +/**
> + * ocxl_afu_lpc_mem() - Get the physical address range of LPC memory for an AFU
> + * @afu: The AFU associated with the LPC memory
> + *
> + * Returns a pointer to the resource struct for the physical address range
> + */
> +struct resource *ocxl_afu_lpc_mem(struct ocxl_afu *afu);
> +
> /**
> * ocxl_afu_config() - Get a pointer to the config for an AFU
> * @afu: a pointer to the AFU to get the config for
>

2020-02-25 17:24:58

by Frederic Barrat

[permalink] [raw]
Subject: Re: [PATCH v3 08/27] ocxl: Emit a log message showing how much LPC memory was detected



Le 21/02/2020 à 04:27, Alastair D'Silva a écrit :
> From: Alastair D'Silva <[email protected]>
>
> This patch emits a message showing how much LPC memory & special purpose
> memory was detected on an OCXL device.
>
> Signed-off-by: Alastair D'Silva <[email protected]>
> ---


Acked-by: Frederic Barrat <[email protected]>



> drivers/misc/ocxl/config.c | 4 ++++
> 1 file changed, 4 insertions(+)
>
> diff --git a/drivers/misc/ocxl/config.c b/drivers/misc/ocxl/config.c
> index a62e3d7db2bf..701ae6216abf 100644
> --- a/drivers/misc/ocxl/config.c
> +++ b/drivers/misc/ocxl/config.c
> @@ -568,6 +568,10 @@ static int read_afu_lpc_memory_info(struct pci_dev *dev,
> afu->special_purpose_mem_size =
> total_mem_size - lpc_mem_size;
> }
> +
> + dev_info(&dev->dev, "Probed LPC memory of %#llx bytes and special purpose memory of %#llx bytes\n",
> + afu->lpc_mem_size, afu->special_purpose_mem_size);
> +
> return 0;
> }
>
>

2020-02-26 00:16:00

by Alastair D'Silva

[permalink] [raw]
Subject: RE: [PATCH v3 00/27] Add support for OpenCAPI Persistent Memory devices

On Mon, 2020-02-24 at 17:51 +1100, Oliver O'Halloran wrote:
> On Mon, Feb 24, 2020 at 3:43 PM Alastair D'Silva <
> [email protected]> wrote:
> > On Sun, 2020-02-23 at 20:37 -0800, Matthew Wilcox wrote:
> > > On Mon, Feb 24, 2020 at 03:34:07PM +1100, Alastair D'Silva wrote:
> > > > V3:
> > > > - Rebase against next/next-20200220
> > > > - Move driver to arch/powerpc/platforms/powernv, we now
> > > > expect
> > > > this
> > > > driver to go upstream via the powerpc tree
> > >
> > > That's rather the opposite direction of normal; mostly drivers
> > > live
> > > under
> > > drivers/ and not in arch/. It's easier for drivers to get
> > > overlooked
> > > when doing tree-wide changes if they're hiding.
> >
> > This is true, however, given that it was not all that desirable to
> > have
> > it under drivers/nvdimm, it's sister driver (for the same hardware)
> > is
> > also under arch, and that we don't expect this driver to be used on
> > any
> > platform other than powernv, we think this was the most reasonable
> > place to put it.
>
> Historically powernv specific platform drivers go in their respective
> subsystem trees rather than in arch/ and I'd prefer we kept it that
> way. When I added the papr_scm driver I put it in the pseries
> platform
> directory because most of the pseries paravirt code lives there for
> some reason; I don't know why. Luckily for me that followed the same
> model that Dan used when he put the NFIT driver in drivers/acpi/ and
> the libnvdimm core in drivers/nvdimm/ so we didn't have anything to
> argue about. However, as Matthew pointed out, it is at odds with how
> most subsystems operate. Is there any particular reason we're doing
> things this way or should we think about moving libnvdimm users to
> drivers/nvdimm/?
>
> Oliver


I'm not too fussed where it ends up, as long as it ends up somewhere :)

From what I can tell, the issue is that we have both "infrastructure"
drivers, and end-device drivers. To me, it feels like drivers/nvdimm
should contain both, and I think this feels like the right approach.

I could move it back to drivers/nvdimm/ocxl, but I felt that it was
only tolerated there, not desired. This could be cleared up with a
response from Dan Williams, and if it is indeed dersired, this is my
preferred location.

I think a case could also be made for drivers/ocxl, simply because we
don't expect more than a handful of drivers to ever live there (I
expect most users will drive their devices from userspace via libocxl).

In defence of keeping it in arch/powerpc/powernv, I highly doubt this
driver will end up being used on any platform other than this. Even
though OpenCAPI was engineered as an open standard, there is some
competition from industry giants with a competing standard on a much
more popular platform.

--
Alastair D'Silva
Open Source Developer
Linux Technology Centre, IBM Australia
mob: 0423 762 819

2020-02-26 00:19:45

by Alastair D'Silva

[permalink] [raw]
Subject: Re: [PATCH v3 03/27] powerpc: Map & release OpenCAPI LPC memory

On Tue, 2020-02-25 at 11:02 +0100, Frederic Barrat wrote:
>
> Le 21/02/2020 à 04:26, Alastair D'Silva a écrit :
> > From: Alastair D'Silva <[email protected]>
> >
> > This patch adds platform support to map & release LPC memory.
> >
> > Signed-off-by: Alastair D'Silva <[email protected]>
> > ---
> > arch/powerpc/include/asm/pnv-ocxl.h | 4 +++
> > arch/powerpc/platforms/powernv/ocxl.c | 43
> > +++++++++++++++++++++++++++
> > 2 files changed, 47 insertions(+)
> >
> > diff --git a/arch/powerpc/include/asm/pnv-ocxl.h
> > b/arch/powerpc/include/asm/pnv-ocxl.h
> > index 7de82647e761..0b2a6707e555 100644
> > --- a/arch/powerpc/include/asm/pnv-ocxl.h
> > +++ b/arch/powerpc/include/asm/pnv-ocxl.h
> > @@ -32,5 +32,9 @@ extern int pnv_ocxl_spa_remove_pe_from_cache(void
> > *platform_data, int pe_handle)
> >
> > extern int pnv_ocxl_alloc_xive_irq(u32 *irq, u64 *trigger_addr);
> > extern void pnv_ocxl_free_xive_irq(u32 irq);
> > +#ifdef CONFIG_MEMORY_HOTPLUG_SPARSE
> > +u64 pnv_ocxl_platform_lpc_setup(struct pci_dev *pdev, u64 size);
> > +void pnv_ocxl_platform_lpc_release(struct pci_dev *pdev);
> > +#endif
>
> This breaks the compilation of the ocxl driver if
> CONFIG_MEMORY_HOTPLUG=n
>
> Those functions still make sense even without memory hotplug, for
> example in the context of the implementation you had to access
> opencapi
> LPC memory through mmap(). The #ifdef is really needed only around
> the
> check_hotplug_memory_addressable() call.
>
> Fred

Hmm, we do still need sparsemem though. Let me think about his some
more.

>
>
> > #endif /* _ASM_PNV_OCXL_H */
> > diff --git a/arch/powerpc/platforms/powernv/ocxl.c
> > b/arch/powerpc/platforms/powernv/ocxl.c
> > index 8c65aacda9c8..f2edbcc67361 100644
> > --- a/arch/powerpc/platforms/powernv/ocxl.c
> > +++ b/arch/powerpc/platforms/powernv/ocxl.c
> > @@ -475,6 +475,49 @@ void pnv_ocxl_spa_release(void *platform_data)
> > }
> > EXPORT_SYMBOL_GPL(pnv_ocxl_spa_release);
> >
> > +#ifdef CONFIG_MEMORY_HOTPLUG_SPARSE
> > +u64 pnv_ocxl_platform_lpc_setup(struct pci_dev *pdev, u64 size)
> > +{
> > + struct pci_controller *hose = pci_bus_to_host(pdev->bus);
> > + struct pnv_phb *phb = hose->private_data;
> > + u32 bdfn = pci_dev_id(pdev);
> > + __be64 base_addr_be64;
> > + u64 base_addr;
> > + int rc;
> > +
> > + rc = opal_npu_mem_alloc(phb->opal_id, bdfn, size,
> > &base_addr_be64);
> > + if (rc) {
> > + dev_warn(&pdev->dev,
> > + "OPAL could not allocate LPC memory, rc=%d\n",
> > rc);
> > + return 0;
> > + }
> > +
> > + base_addr = be64_to_cpu(base_addr_be64);
> > +
> > + rc = check_hotplug_memory_addressable(base_addr >> PAGE_SHIFT,
> > + size >> PAGE_SHIFT);
> > + if (rc)
> > + return 0;
> > +
> > + return base_addr;
> > +}
> > +EXPORT_SYMBOL_GPL(pnv_ocxl_platform_lpc_setup);
> > +
> > +void pnv_ocxl_platform_lpc_release(struct pci_dev *pdev)
> > +{
> > + struct pci_controller *hose = pci_bus_to_host(pdev->bus);
> > + struct pnv_phb *phb = hose->private_data;
> > + u32 bdfn = pci_dev_id(pdev);
> > + int rc;
> > +
> > + rc = opal_npu_mem_release(phb->opal_id, bdfn);
> > + if (rc)
> > + dev_warn(&pdev->dev,
> > + "OPAL reported rc=%d when releasing LPC
> > memory\n", rc);
> > +}
> > +EXPORT_SYMBOL_GPL(pnv_ocxl_platform_lpc_release);
> > +#endif
> > +
> > int pnv_ocxl_spa_remove_pe_from_cache(void *platform_data, int
> > pe_handle)
> > {
> > struct spa_data *data = (struct spa_data *) platform_data;
> >
--
Alastair D'Silva
Open Source Developer
Linux Technology Centre, IBM Australia
mob: 0423 762 819

2020-02-26 00:34:11

by Dan Williams

[permalink] [raw]
Subject: Re: [PATCH v3 00/27] Add support for OpenCAPI Persistent Memory devices

On Tue, Feb 25, 2020 at 4:14 PM Alastair D'Silva <[email protected]> wrote:
>
> On Mon, 2020-02-24 at 17:51 +1100, Oliver O'Halloran wrote:
> > On Mon, Feb 24, 2020 at 3:43 PM Alastair D'Silva <
> > [email protected]> wrote:
> > > On Sun, 2020-02-23 at 20:37 -0800, Matthew Wilcox wrote:
> > > > On Mon, Feb 24, 2020 at 03:34:07PM +1100, Alastair D'Silva wrote:
> > > > > V3:
> > > > > - Rebase against next/next-20200220
> > > > > - Move driver to arch/powerpc/platforms/powernv, we now
> > > > > expect
> > > > > this
> > > > > driver to go upstream via the powerpc tree
> > > >
> > > > That's rather the opposite direction of normal; mostly drivers
> > > > live
> > > > under
> > > > drivers/ and not in arch/. It's easier for drivers to get
> > > > overlooked
> > > > when doing tree-wide changes if they're hiding.
> > >
> > > This is true, however, given that it was not all that desirable to
> > > have
> > > it under drivers/nvdimm, it's sister driver (for the same hardware)
> > > is
> > > also under arch, and that we don't expect this driver to be used on
> > > any
> > > platform other than powernv, we think this was the most reasonable
> > > place to put it.
> >
> > Historically powernv specific platform drivers go in their respective
> > subsystem trees rather than in arch/ and I'd prefer we kept it that
> > way. When I added the papr_scm driver I put it in the pseries
> > platform
> > directory because most of the pseries paravirt code lives there for
> > some reason; I don't know why. Luckily for me that followed the same
> > model that Dan used when he put the NFIT driver in drivers/acpi/ and
> > the libnvdimm core in drivers/nvdimm/ so we didn't have anything to
> > argue about. However, as Matthew pointed out, it is at odds with how
> > most subsystems operate. Is there any particular reason we're doing
> > things this way or should we think about moving libnvdimm users to
> > drivers/nvdimm/?
> >
> > Oliver
>
>
> I'm not too fussed where it ends up, as long as it ends up somewhere :)
>
> From what I can tell, the issue is that we have both "infrastructure"
> drivers, and end-device drivers. To me, it feels like drivers/nvdimm
> should contain both, and I think this feels like the right approach.
>
> I could move it back to drivers/nvdimm/ocxl, but I felt that it was
> only tolerated there, not desired. This could be cleared up with a
> response from Dan Williams, and if it is indeed dersired, this is my
> preferred location.

Apologies if I gave the impression it was only tolerated. I'm ok with
drivers/nvdimm/ocxl/, and to the larger point I'd also be ok with a
drivers/{acpi => nvdimm}/nfit and {arch/powerpc/platforms/pseries =>
drivers/nvdimm}/papr_scm.c move as well to keep all the consumers of
the nvdimm related code together with the core.

2020-02-26 00:37:04

by Alastair D'Silva

[permalink] [raw]
Subject: RE: [PATCH v3 00/27] Add support for OpenCAPI Persistent Memory devices

On Tue, 2020-02-25 at 16:32 -0800, Dan Williams wrote:
> On Tue, Feb 25, 2020 at 4:14 PM Alastair D'Silva <
> [email protected]> wrote:
> > On Mon, 2020-02-24 at 17:51 +1100, Oliver O'Halloran wrote:
> > > On Mon, Feb 24, 2020 at 3:43 PM Alastair D'Silva <
> > > [email protected]> wrote:
> > > > On Sun, 2020-02-23 at 20:37 -0800, Matthew Wilcox wrote:
> > > > > On Mon, Feb 24, 2020 at 03:34:07PM +1100, Alastair D'Silva
> > > > > wrote:
> > > > > > V3:
> > > > > > - Rebase against next/next-20200220
> > > > > > - Move driver to arch/powerpc/platforms/powernv, we now
> > > > > > expect
> > > > > > this
> > > > > > driver to go upstream via the powerpc tree
> > > > >
> > > > > That's rather the opposite direction of normal; mostly
> > > > > drivers
> > > > > live
> > > > > under
> > > > > drivers/ and not in arch/. It's easier for drivers to get
> > > > > overlooked
> > > > > when doing tree-wide changes if they're hiding.
> > > >
> > > > This is true, however, given that it was not all that desirable
> > > > to
> > > > have
> > > > it under drivers/nvdimm, it's sister driver (for the same
> > > > hardware)
> > > > is
> > > > also under arch, and that we don't expect this driver to be
> > > > used on
> > > > any
> > > > platform other than powernv, we think this was the most
> > > > reasonable
> > > > place to put it.
> > >
> > > Historically powernv specific platform drivers go in their
> > > respective
> > > subsystem trees rather than in arch/ and I'd prefer we kept it
> > > that
> > > way. When I added the papr_scm driver I put it in the pseries
> > > platform
> > > directory because most of the pseries paravirt code lives there
> > > for
> > > some reason; I don't know why. Luckily for me that followed the
> > > same
> > > model that Dan used when he put the NFIT driver in drivers/acpi/
> > > and
> > > the libnvdimm core in drivers/nvdimm/ so we didn't have anything
> > > to
> > > argue about. However, as Matthew pointed out, it is at odds with
> > > how
> > > most subsystems operate. Is there any particular reason we're
> > > doing
> > > things this way or should we think about moving libnvdimm users
> > > to
> > > drivers/nvdimm/?
> > >
> > > Oliver
> >
> > I'm not too fussed where it ends up, as long as it ends up
> > somewhere :)
> >
> > From what I can tell, the issue is that we have both
> > "infrastructure"
> > drivers, and end-device drivers. To me, it feels like
> > drivers/nvdimm
> > should contain both, and I think this feels like the right
> > approach.
> >
> > I could move it back to drivers/nvdimm/ocxl, but I felt that it was
> > only tolerated there, not desired. This could be cleared up with a
> > response from Dan Williams, and if it is indeed dersired, this is
> > my
> > preferred location.
>
> Apologies if I gave the impression it was only tolerated. I'm ok with
> drivers/nvdimm/ocxl/, and to the larger point I'd also be ok with a
> drivers/{acpi => nvdimm}/nfit and {arch/powerpc/platforms/pseries =>
> drivers/nvdimm}/papr_scm.c move as well to keep all the consumers of
> the nvdimm related code together with the core.

Great, thanks for clarifying, text is so imprecise when it comes to
nuance :)

I'll move ti back to drivers/nvdimm/ocxl then.

--
Alastair D'Silva
Open Source Developer
Linux Technology Centre, IBM Australia
mob: 0423 762 819

2020-02-27 05:09:35

by Andrew Donnellan

[permalink] [raw]
Subject: Re: [PATCH v3 12/27] powerpc/powernv/pmem: Add register addresses & status values to the header

On 21/2/20 2:27 pm, Alastair D'Silva wrote:
> From: Alastair D'Silva <[email protected]>
>
> These values have been taken from the device specifications.
>
> Signed-off-by: Alastair D'Silva <[email protected]>

I've compared these values against the internal version of the device
specifications that I have access to, and they appear to match.

A few minor comments below, otherwise:

Reviewed-by: Andrew Donnellan <[email protected]>

> +#define GLOBAL_MMIO_HCI_ACRW BIT_ULL(0)
> +#define GLOBAL_MMIO_HCI_NSCRW BIT_ULL(1)
> +#define GLOBAL_MMIO_HCI_AFU_RESET BIT_ULL(2)
> +#define GLOBAL_MMIO_HCI_FW_DEBUG BIT_ULL(3)
> +#define GLOBAL_MMIO_HCI_CONTROLLER_DUMP BIT_ULL(4)
> +#define GLOBAL_MMIO_HCI_CONTROLLER_DUMP_COLLECTED BIT_ULL(5)
> +#define GLOBAL_MMIO_HCI_REQ_HEALTH_PERF BIT_ULL(6)

The labelling of some of these bits deviates from the standard
abbreviations in the spec, which is fine I guess as these names are more
descriptive, but maybe add a brief comment with the original abbreviation?

> +
> +#define ADMIN_COMMAND_HEARTBEAT 0x00u
> +#define ADMIN_COMMAND_SHUTDOWN 0x01u
> +#define ADMIN_COMMAND_FW_UPDATE 0x02u
> +#define ADMIN_COMMAND_FW_DEBUG 0x03u
> +#define ADMIN_COMMAND_ERRLOG 0x04u
> +#define ADMIN_COMMAND_SMART 0x05u
> +#define ADMIN_COMMAND_CONTROLLER_STATS 0x06u
> +#define ADMIN_COMMAND_CONTROLLER_DUMP 0x07u
> +#define ADMIN_COMMAND_CMD_CAPS 0x08u
> +#define ADMIN_COMMAND_MAX 0x08u
> +
> +#define STATUS_SUCCESS 0x00
> +#define STATUS_MEM_UNAVAILABLE 0x20

There's also a "blocked on account of background task" code, 0x21.

> +#define STATUS_BAD_OPCODE 0x50
> +#define STATUS_BAD_REQUEST_PARM 0x51
> +#define STATUS_BAD_DATA_PARM 0x52
> +#define STATUS_DEBUG_BLOCKED 0x70
> +#define STATUS_FAIL 0xFF
> +
> +#define STATUS_FW_UPDATE_BLOCKED 0x21
> +#define STATUS_FW_ARG_INVALID 0x51
> +#define STATUS_FW_INVALID 0x52

These status codes seem, from the specification, to correspond to the
generic error codes above, so perhaps they're not needed.


--
Andrew Donnellan OzLabs, ADL Canberra
[email protected] IBM Australia Limited

2020-02-27 05:17:21

by Alastair D'Silva

[permalink] [raw]
Subject: Re: [PATCH v3 12/27] powerpc/powernv/pmem: Add register addresses & status values to the header

On Thu, 2020-02-27 at 16:08 +1100, Andrew Donnellan wrote:
> On 21/2/20 2:27 pm, Alastair D'Silva wrote:
> > From: Alastair D'Silva <[email protected]>
> >
> > These values have been taken from the device specifications.
> >
> > Signed-off-by: Alastair D'Silva <[email protected]>
>
> I've compared these values against the internal version of the
> device
> specifications that I have access to, and they appear to match.
>
> A few minor comments below, otherwise:
>
> Reviewed-by: Andrew Donnellan <[email protected]>
>
> > +#define GLOBAL_MMIO_HCI_ACRW BIT_ULL
> > (0)
> > +#define GLOBAL_MMIO_HCI_NSCRW BIT_ULL
> > (1)
> > +#define GLOBAL_MMIO_HCI_AFU_RESET BIT_ULL(2)
> > +#define GLOBAL_MMIO_HCI_FW_DEBUG BIT_ULL(3)
> > +#define GLOBAL_MMIO_HCI_CONTROLLER_DUMP BIT_ULL
> > (4)
> > +#define GLOBAL_MMIO_HCI_CONTROLLER_DUMP_COLLECTED BIT_ULL(5)
> > +#define GLOBAL_MMIO_HCI_REQ_HEALTH_PERF BIT_ULL
> > (6)
>
> The labelling of some of these bits deviates from the standard
> abbreviations in the spec, which is fine I guess as these names are
> more
> descriptive, but maybe add a brief comment with the original
> abbreviation?
>

Ok

> > +
> > +#define ADMIN_COMMAND_HEARTBEAT 0x00u
> > +#define ADMIN_COMMAND_SHUTDOWN 0x01u
> > +#define ADMIN_COMMAND_FW_UPDATE 0x02u
> > +#define ADMIN_COMMAND_FW_DEBUG 0x03u
> > +#define ADMIN_COMMAND_ERRLOG 0x04u
> > +#define ADMIN_COMMAND_SMART 0x05u
> > +#define ADMIN_COMMAND_CONTROLLER_STATS 0x06u
> > +#define ADMIN_COMMAND_CONTROLLER_DUMP 0x07u
> > +#define ADMIN_COMMAND_CMD_CAPS 0x08u
> > +#define ADMIN_COMMAND_MAX 0x08u
> > +
> > +#define STATUS_SUCCESS 0x00
> > +#define STATUS_MEM_UNAVAILABLE 0x20
>
> There's also a "blocked on account of background task" code, 0x21.
>

Ok

> > +#define STATUS_BAD_OPCODE 0x50
> > +#define STATUS_BAD_REQUEST_PARM 0x51
> > +#define STATUS_BAD_DATA_PARM 0x52
> > +#define STATUS_DEBUG_BLOCKED 0x70
> > +#define STATUS_FAIL 0xFF
> > +
> > +#define STATUS_FW_UPDATE_BLOCKED 0x21
> > +#define STATUS_FW_ARG_INVALID 0x51
> > +#define STATUS_FW_INVALID 0x52
>
> These status codes seem, from the specification, to correspond to
> the
> generic error codes above, so perhaps they're not needed.
>

These will be used in warn_status_fw_update() later, but I'll alias
them to make it clear that they are shadowing values

--
Alastair D'Silva
Open Source Developer
Linux Technology Centre, IBM Australia
mob: 0423 762 819

2020-02-28 06:13:46

by Andrew Donnellan

[permalink] [raw]
Subject: Re: [PATCH v3 21/27] powerpc/powernv/pmem: Add an IOCTL to request controller health & perf data

On 21/2/20 2:27 pm, Alastair D'Silva wrote:
> From: Alastair D'Silva <[email protected]>
>
> When health & performance data is requested from the controller,
> it responds with an error log containing the requested information.
>
> This patch allows the request to me issued via an IOCTL.

A better explanation would be good - this IOCTL triggers a request to
the controller to collect controller health/perf data, and the
controller will later respond with an error log that can be picked up
via the error log IOCTL that you've defined earlier.


--
Andrew Donnellan OzLabs, ADL Canberra
[email protected] IBM Australia Limited

2020-02-28 06:26:13

by Andrew Donnellan

[permalink] [raw]
Subject: Re: [PATCH v3 25/27] powerpc/powernv/pmem: Expose the serial number in sysfs

On 21/2/20 2:27 pm, Alastair D'Silva wrote:
> +int ocxlpmem_sysfs_add(struct ocxlpmem *ocxlpmem)
> +{
> + int i, rc;
> +
> + for (i = 0; i < ARRAY_SIZE(attrs); i++) {
> + rc = device_create_file(&ocxlpmem->dev, &attrs[i]);
> + if (rc) {
> + for (; --i >= 0;)
> + device_remove_file(&ocxlpmem->dev, &attrs[i]);

I'd rather avoid weird for loop constructs if possible.

Is it actually dangerous to call device_remove_file() on an attr that
hasn't been added? If not then I'd rather define an err: label and loop
over the whole array there.

--
Andrew Donnellan OzLabs, ADL Canberra
[email protected] IBM Australia Limited

2020-02-28 07:16:39

by Greg Kroah-Hartman

[permalink] [raw]
Subject: Re: [PATCH v3 25/27] powerpc/powernv/pmem: Expose the serial number in sysfs

On Fri, Feb 28, 2020 at 05:25:31PM +1100, Andrew Donnellan wrote:
> On 21/2/20 2:27 pm, Alastair D'Silva wrote:
> > +int ocxlpmem_sysfs_add(struct ocxlpmem *ocxlpmem)
> > +{
> > + int i, rc;
> > +
> > + for (i = 0; i < ARRAY_SIZE(attrs); i++) {
> > + rc = device_create_file(&ocxlpmem->dev, &attrs[i]);
> > + if (rc) {
> > + for (; --i >= 0;)
> > + device_remove_file(&ocxlpmem->dev, &attrs[i]);
>
> I'd rather avoid weird for loop constructs if possible.
>
> Is it actually dangerous to call device_remove_file() on an attr that hasn't
> been added? If not then I'd rather define an err: label and loop over the
> whole array there.

None of this should be used at all, just use attribute groups properly
and the driver core will handle this all for you.

device_create/remove_file should never be called by anyone anymore if at all
possible.

thanks,

greg k-h

2020-03-01 23:42:51

by Alastair D'Silva

[permalink] [raw]
Subject: RE: [PATCH v3 25/27] powerpc/powernv/pmem: Expose the serial number in sysfs

On Fri, 2020-02-28 at 08:15 +0100, Greg Kroah-Hartman wrote:
> On Fri, Feb 28, 2020 at 05:25:31PM +1100, Andrew Donnellan wrote:
> > On 21/2/20 2:27 pm, Alastair D'Silva wrote:
> > > +int ocxlpmem_sysfs_add(struct ocxlpmem *ocxlpmem)
> > > +{
> > > + int i, rc;
> > > +
> > > + for (i = 0; i < ARRAY_SIZE(attrs); i++) {
> > > + rc = device_create_file(&ocxlpmem->dev, &attrs[i]);
> > > + if (rc) {
> > > + for (; --i >= 0;)
> > > + device_remove_file(&ocxlpmem->dev,
> > > &attrs[i]);
> >
> > I'd rather avoid weird for loop constructs if possible.
> >
> > Is it actually dangerous to call device_remove_file() on an attr
> > that hasn't
> > been added? If not then I'd rather define an err: label and loop
> > over the
> > whole array there.
>
> None of this should be used at all, just use attribute groups
> properly
> and the driver core will handle this all for you.
>
> device_create/remove_file should never be called by anyone anymore if
> at all
> possible.
>
> thanks,
>
> greg k-h


Thanks, I'll rework it to use the .groups member of struct pci_driver.

--
Alastair D'Silva
Open Source Developer
Linux Technology Centre, IBM Australia
mob: 0423 762 819

2020-03-02 05:35:48

by Andrew Donnellan

[permalink] [raw]
Subject: Re: [PATCH v3 16/27] powerpc/powernv/pmem: Register a character device for userspace to interact with

On 21/2/20 2:27 pm, Alastair D'Silva wrote:
> From: Alastair D'Silva <[email protected]>
>
> This patch introduces a character device (/dev/ocxl-scmX) which further
> patches will use to interact with userspace.

As with the comments on other patches in this series, this commit
message is lacking in explanation. What's the purpose of this device?

>
> Signed-off-by: Alastair D'Silva <[email protected]>
> ---
> arch/powerpc/platforms/powernv/pmem/ocxl.c | 116 +++++++++++++++++-
> .../platforms/powernv/pmem/ocxl_internal.h | 2 +
> 2 files changed, 116 insertions(+), 2 deletions(-)
>
> diff --git a/arch/powerpc/platforms/powernv/pmem/ocxl.c b/arch/powerpc/platforms/powernv/pmem/ocxl.c
> index b8bd7e703b19..63109a870d2c 100644
> --- a/arch/powerpc/platforms/powernv/pmem/ocxl.c
> +++ b/arch/powerpc/platforms/powernv/pmem/ocxl.c
> @@ -10,6 +10,7 @@
> #include <misc/ocxl.h>
> #include <linux/delay.h>
> #include <linux/ndctl.h>
> +#include <linux/fs.h>
> #include <linux/mm_types.h>
> #include <linux/memory_hotplug.h>
> #include "ocxl_internal.h"
> @@ -339,6 +340,9 @@ static void free_ocxlpmem(struct ocxlpmem *ocxlpmem)
>
> free_minor(ocxlpmem);
>
> + if (ocxlpmem->cdev.owner)
> + cdev_del(&ocxlpmem->cdev);
> +
> if (ocxlpmem->metadata_addr)
> devm_memunmap(&ocxlpmem->dev, ocxlpmem->metadata_addr);
>
> @@ -396,6 +400,70 @@ static int ocxlpmem_register(struct ocxlpmem *ocxlpmem)
> return device_register(&ocxlpmem->dev);
> }
>
> +static void ocxlpmem_put(struct ocxlpmem *ocxlpmem)
> +{
> + put_device(&ocxlpmem->dev);
> +}
> +
> +static struct ocxlpmem *ocxlpmem_get(struct ocxlpmem *ocxlpmem)
> +{
> + return (get_device(&ocxlpmem->dev) == NULL) ? NULL : ocxlpmem;
> +}
> +
> +static struct ocxlpmem *find_and_get_ocxlpmem(dev_t devno)
> +{
> + struct ocxlpmem *ocxlpmem;
> + int minor = MINOR(devno);
> + /*
> + * We don't declare an RCU critical section here, as our AFU
> + * is protected by a re0ference counter on the device. By the time the
> + * minor number of a device is removed from the idr, the ref count of
> + * the device is already at 0, so no user API will access that AFU and
> + * this function can't return it.
> + */
> + ocxlpmem = idr_find(&minors_idr, minor);
> + if (ocxlpmem)
> + ocxlpmem_get(ocxlpmem);
> + return ocxlpmem;
> +}
> +
> +static int file_open(struct inode *inode, struct file *file)
> +{
> + struct ocxlpmem *ocxlpmem;
> +
> + ocxlpmem = find_and_get_ocxlpmem(inode->i_rdev);
> + if (!ocxlpmem)
> + return -ENODEV;
> +
> + file->private_data = ocxlpmem;
> + return 0;
> +}
> +
> +static int file_release(struct inode *inode, struct file *file)
> +{
> + struct ocxlpmem *ocxlpmem = file->private_data;
> +
> + ocxlpmem_put(ocxlpmem);
> + return 0;
> +}
> +
> +static const struct file_operations fops = {
> + .owner = THIS_MODULE,
> + .open = file_open,
> + .release = file_release,
> +};
> +
> +/**
> + * create_cdev() - Create the chardev in /dev for the device
> + * @ocxlpmem: the SCM metadata
> + * Return: 0 on success, negative on failure
> + */
> +static int create_cdev(struct ocxlpmem *ocxlpmem)
> +{
> + cdev_init(&ocxlpmem->cdev, &fops);
> + return cdev_add(&ocxlpmem->cdev, ocxlpmem->dev.devt, 1);
> +}
> +
> /**
> * ocxlpmem_remove() - Free an OpenCAPI persistent memory device
> * @pdev: the PCI device information struct
> @@ -572,6 +640,11 @@ static int probe(struct pci_dev *pdev, const struct pci_device_id *ent)
> goto err;
> }
>
> + if (create_cdev(ocxlpmem)) {
> + dev_err(&pdev->dev, "Could not create character device\n");
> + goto err;
> + }
> +
> elapsed = 0;
> timeout = ocxlpmem->readiness_timeout + ocxlpmem->memory_available_timeout;
> while (!is_usable(ocxlpmem, false)) {
> @@ -613,20 +686,59 @@ static struct pci_driver pci_driver = {
> .shutdown = ocxlpmem_remove,
> };
>
> +static int file_init(void)
> +{
> + int rc;
> +
> + mutex_init(&minors_idr_lock);
> + idr_init(&minors_idr);
> +
> + rc = alloc_chrdev_region(&ocxlpmem_dev, 0, NUM_MINORS, "ocxl-pmem");

If the driver is going to be called "ocxlpmem" can we standardise on
that without the extra hyphen?

> + if (rc) {
> + idr_destroy(&minors_idr);
> + pr_err("Unable to allocate OpenCAPI persistent memory major number: %d\n", rc);
> + return rc;
> + }
> +
> + ocxlpmem_class = class_create(THIS_MODULE, "ocxl-pmem");
> + if (IS_ERR(ocxlpmem_class)) {
> + idr_destroy(&minors_idr);
> + pr_err("Unable to create ocxl-pmem class\n");
> + unregister_chrdev_region(ocxlpmem_dev, NUM_MINORS);
> + return PTR_ERR(ocxlpmem_class);
> + }
> +
> + return 0;
> +}
> +
> +static void file_exit(void)
> +{
> + class_destroy(ocxlpmem_class);
> + unregister_chrdev_region(ocxlpmem_dev, NUM_MINORS);
> + idr_destroy(&minors_idr);
> +}
> +
> static int __init ocxlpmem_init(void)
> {
> - int rc = 0;
> + int rc;
>
> - rc = pci_register_driver(&pci_driver);
> + rc = file_init();
> if (rc)
> return rc;
>
> + rc = pci_register_driver(&pci_driver);
> + if (rc) {
> + file_exit();
> + return rc;
> + }
> +
> return 0;
> }
>
> static void ocxlpmem_exit(void)
> {
> pci_unregister_driver(&pci_driver);
> + file_exit();
> }
>
> module_init(ocxlpmem_init);
--
Andrew Donnellan OzLabs, ADL Canberra
[email protected] IBM Australia Limited

2020-03-02 05:40:48

by Alastair D'Silva

[permalink] [raw]
Subject: RE: [PATCH v3 25/27] powerpc/powernv/pmem: Expose the serial number in sysfs

On Mon, 2020-03-02 at 10:42 +1100, Alastair D'Silva wrote:
> On Fri, 2020-02-28 at 08:15 +0100, Greg Kroah-Hartman wrote:
> > On Fri, Feb 28, 2020 at 05:25:31PM +1100, Andrew Donnellan wrote:
> > > On 21/2/20 2:27 pm, Alastair D'Silva wrote:
> > > > +int ocxlpmem_sysfs_add(struct ocxlpmem *ocxlpmem)
> > > > +{
> > > > + int i, rc;
> > > > +
> > > > + for (i = 0; i < ARRAY_SIZE(attrs); i++) {
> > > > + rc = device_create_file(&ocxlpmem->dev,
> > > > &attrs[i]);
> > > > + if (rc) {
> > > > + for (; --i >= 0;)
> > > > + device_remove_file(&ocxlpmem-
> > > > >dev,
> > > > &attrs[i]);
> > >
> > > I'd rather avoid weird for loop constructs if possible.
> > >
> > > Is it actually dangerous to call device_remove_file() on an attr
> > > that hasn't
> > > been added? If not then I'd rather define an err: label and loop
> > > over the
> > > whole array there.
> >
> > None of this should be used at all, just use attribute groups
> > properly
> > and the driver core will handle this all for you.
> >
> > device_create/remove_file should never be called by anyone anymore
> > if
> > at all
> > possible.
> >
> > thanks,
> >
> > greg k-h
>
> Thanks, I'll rework it to use the .groups member of struct
> pci_driver.
>

I ended up making these available as DIMM attributes instead.

--
Alastair D'Silva
Open Source Developer
Linux Technology Centre, IBM Australia
mob: 0423 762 819

2020-03-02 05:42:46

by Alastair D'Silva

[permalink] [raw]
Subject: Re: [PATCH v3 21/27] powerpc/powernv/pmem: Add an IOCTL to request controller health & perf data

On Fri, 2020-02-28 at 17:12 +1100, Andrew Donnellan wrote:
> On 21/2/20 2:27 pm, Alastair D'Silva wrote:
> > From: Alastair D'Silva <[email protected]>
> >
> > When health & performance data is requested from the controller,
> > it responds with an error log containing the requested information.
> >
> > This patch allows the request to me issued via an IOCTL.
>
> A better explanation would be good - this IOCTL triggers a request
> to
> the controller to collect controller health/perf data, and the
> controller will later respond with an error log that can be picked
> up
> via the error log IOCTL that you've defined earlier.
>
>

Ok

--
Alastair D'Silva
Open Source Developer
Linux Technology Centre, IBM Australia
mob: 0423 762 819

2020-03-02 06:06:36

by Alastair D'Silva

[permalink] [raw]
Subject: Re: [PATCH v3 16/27] powerpc/powernv/pmem: Register a character device for userspace to interact with

On Mon, 2020-03-02 at 16:34 +1100, Andrew Donnellan wrote:
> On 21/2/20 2:27 pm, Alastair D'Silva wrote:
> > From: Alastair D'Silva <[email protected]>
> >
> > This patch introduces a character device (/dev/ocxl-scmX) which
> > further
> > patches will use to interact with userspace.
>
> As with the comments on other patches in this series, this commit
> message is lacking in explanation. What's the purpose of this device?
>

I'll reword this for v4.

> > Signed-off-by: Alastair D'Silva <[email protected]>
> > ---
> > arch/powerpc/platforms/powernv/pmem/ocxl.c | 116
> > +++++++++++++++++-
> > .../platforms/powernv/pmem/ocxl_internal.h | 2 +
> > 2 files changed, 116 insertions(+), 2 deletions(-)
> >
> > diff --git a/arch/powerpc/platforms/powernv/pmem/ocxl.c
> > b/arch/powerpc/platforms/powernv/pmem/ocxl.c
> > index b8bd7e703b19..63109a870d2c 100644
> > --- a/arch/powerpc/platforms/powernv/pmem/ocxl.c
> > +++ b/arch/powerpc/platforms/powernv/pmem/ocxl.c
> > @@ -10,6 +10,7 @@
> > #include <misc/ocxl.h>
> > #include <linux/delay.h>
> > #include <linux/ndctl.h>
> > +#include <linux/fs.h>
> > #include <linux/mm_types.h>
> > #include <linux/memory_hotplug.h>
> > #include "ocxl_internal.h"
> > @@ -339,6 +340,9 @@ static void free_ocxlpmem(struct ocxlpmem
> > *ocxlpmem)
> >
> > free_minor(ocxlpmem);
> >
> > + if (ocxlpmem->cdev.owner)
> > + cdev_del(&ocxlpmem->cdev);
> > +
> > if (ocxlpmem->metadata_addr)
> > devm_memunmap(&ocxlpmem->dev, ocxlpmem->metadata_addr);
> >
> > @@ -396,6 +400,70 @@ static int ocxlpmem_register(struct ocxlpmem
> > *ocxlpmem)
> > return device_register(&ocxlpmem->dev);
> > }
> >
> > +static void ocxlpmem_put(struct ocxlpmem *ocxlpmem)
> > +{
> > + put_device(&ocxlpmem->dev);
> > +}
> > +
> > +static struct ocxlpmem *ocxlpmem_get(struct ocxlpmem *ocxlpmem)
> > +{
> > + return (get_device(&ocxlpmem->dev) == NULL) ? NULL : ocxlpmem;
> > +}
> > +
> > +static struct ocxlpmem *find_and_get_ocxlpmem(dev_t devno)
> > +{
> > + struct ocxlpmem *ocxlpmem;
> > + int minor = MINOR(devno);
> > + /*
> > + * We don't declare an RCU critical section here, as our AFU
> > + * is protected by a re0ference counter on the device. By the
> > time the
> > + * minor number of a device is removed from the idr, the ref
> > count of
> > + * the device is already at 0, so no user API will access that
> > AFU and
> > + * this function can't return it.
> > + */
> > + ocxlpmem = idr_find(&minors_idr, minor);
> > + if (ocxlpmem)
> > + ocxlpmem_get(ocxlpmem);
> > + return ocxlpmem;
> > +}
> > +
> > +static int file_open(struct inode *inode, struct file *file)
> > +{
> > + struct ocxlpmem *ocxlpmem;
> > +
> > + ocxlpmem = find_and_get_ocxlpmem(inode->i_rdev);
> > + if (!ocxlpmem)
> > + return -ENODEV;
> > +
> > + file->private_data = ocxlpmem;
> > + return 0;
> > +}
> > +
> > +static int file_release(struct inode *inode, struct file *file)
> > +{
> > + struct ocxlpmem *ocxlpmem = file->private_data;
> > +
> > + ocxlpmem_put(ocxlpmem);
> > + return 0;
> > +}
> > +
> > +static const struct file_operations fops = {
> > + .owner = THIS_MODULE,
> > + .open = file_open,
> > + .release = file_release,
> > +};
> > +
> > +/**
> > + * create_cdev() - Create the chardev in /dev for the device
> > + * @ocxlpmem: the SCM metadata
> > + * Return: 0 on success, negative on failure
> > + */
> > +static int create_cdev(struct ocxlpmem *ocxlpmem)
> > +{
> > + cdev_init(&ocxlpmem->cdev, &fops);
> > + return cdev_add(&ocxlpmem->cdev, ocxlpmem->dev.devt, 1);
> > +}
> > +
> > /**
> > * ocxlpmem_remove() - Free an OpenCAPI persistent memory device
> > * @pdev: the PCI device information struct
> > @@ -572,6 +640,11 @@ static int probe(struct pci_dev *pdev, const
> > struct pci_device_id *ent)
> > goto err;
> > }
> >
> > + if (create_cdev(ocxlpmem)) {
> > + dev_err(&pdev->dev, "Could not create character
> > device\n");
> > + goto err;
> > + }
> > +
> > elapsed = 0;
> > timeout = ocxlpmem->readiness_timeout + ocxlpmem-
> > >memory_available_timeout;
> > while (!is_usable(ocxlpmem, false)) {
> > @@ -613,20 +686,59 @@ static struct pci_driver pci_driver = {
> > .shutdown = ocxlpmem_remove,
> > };
> >
> > +static int file_init(void)
> > +{
> > + int rc;
> > +
> > + mutex_init(&minors_idr_lock);
> > + idr_init(&minors_idr);
> > +
> > + rc = alloc_chrdev_region(&ocxlpmem_dev, 0, NUM_MINORS, "ocxl-
> > pmem");
>
> If the driver is going to be called "ocxlpmem" can we standardise on
> that without the extra hyphen?

Ok

> > + if (rc) {
> > + idr_destroy(&minors_idr);
> > + pr_err("Unable to allocate OpenCAPI persistent memory
> > major number: %d\n", rc);
> > + return rc;
> > + }
> > +
> > + ocxlpmem_class = class_create(THIS_MODULE, "ocxl-pmem");
> > + if (IS_ERR(ocxlpmem_class)) {
> > + idr_destroy(&minors_idr);
> > + pr_err("Unable to create ocxl-pmem class\n");
> > + unregister_chrdev_region(ocxlpmem_dev, NUM_MINORS);
> > + return PTR_ERR(ocxlpmem_class);
> > + }
> > +
> > + return 0;
> > +}
> > +
> > +static void file_exit(void)
> > +{
> > + class_destroy(ocxlpmem_class);
> > + unregister_chrdev_region(ocxlpmem_dev, NUM_MINORS);
> > + idr_destroy(&minors_idr);
> > +}
> > +
> > static int __init ocxlpmem_init(void)
> > {
> > - int rc = 0;
> > + int rc;
> >
> > - rc = pci_register_driver(&pci_driver);
> > + rc = file_init();
> > if (rc)
> > return rc;
> >
> > + rc = pci_register_driver(&pci_driver);
> > + if (rc) {
> > + file_exit();
> > + return rc;
> > + }
> > +
> > return 0;
> > }
> >
> > static void ocxlpmem_exit(void)
> > {
> > pci_unregister_driver(&pci_driver);
> > + file_exit();
> > }
> >
> > module_init(ocxlpmem_init);
--
Alastair D'Silva
Open Source Developer
Linux Technology Centre, IBM Australia
mob: 0423 762 819

2020-03-03 06:11:58

by Andrew Donnellan

[permalink] [raw]
Subject: Re: [PATCH v3 03/27] powerpc: Map & release OpenCAPI LPC memory

On 21/2/20 2:26 pm, Alastair D'Silva wrote:> +#ifdef
CONFIG_MEMORY_HOTPLUG_SPARSE
> +u64 pnv_ocxl_platform_lpc_setup(struct pci_dev *pdev, u64 size)
> +{
> + struct pci_controller *hose = pci_bus_to_host(pdev->bus);
> + struct pnv_phb *phb = hose->private_data;
> + u32 bdfn = pci_dev_id(pdev);
> + __be64 base_addr_be64;
> + u64 base_addr;
> + int rc;
> +
> + rc = opal_npu_mem_alloc(phb->opal_id, bdfn, size, &base_addr_be64);

Sparse warning:

https://openpower.xyz/job/snowpatch/job/snowpatch-linux-sparse/15776//artifact/linux/report.txt

I think in patch 1 we need to change a uint64_t to a __be64.

--
Andrew Donnellan OzLabs, ADL Canberra
[email protected] IBM Australia Limited

2020-03-03 07:03:33

by Andrew Donnellan

[permalink] [raw]
Subject: Re: [PATCH v3 20/27] powerpc/powernv/pmem: Forward events to userspace

On 21/2/20 2:27 pm, Alastair D'Silva wrote:> @@ -938,6 +955,51 @@ static
int ioctl_controller_stats(struct ocxlpmem *ocxlpmem,
> return rc;
> }
>
> +static int ioctl_eventfd(struct ocxlpmem *ocxlpmem,
> + struct ioctl_ocxl_pmem_eventfd __user *uarg)
> +{
> + struct ioctl_ocxl_pmem_eventfd args;
> +
> + if (copy_from_user(&args, uarg, sizeof(args)))
> + return -EFAULT;
> +
> + if (ocxlpmem->ev_ctx)
> + return -EINVAL;

I think EBUSY is more appropriate here.

> +
> + ocxlpmem->ev_ctx = eventfd_ctx_fdget(args.eventfd);
> + if (!ocxlpmem->ev_ctx)
> + return -EFAULT;
> +
> + return 0;
> +}
> +
> +static int ioctl_event_check(struct ocxlpmem *ocxlpmem, u64 __user *uarg)
> +{
> + u64 val = 0;
> + int rc;
> + u64 chi = 0;
> +
> + rc = ocxlpmem_chi(ocxlpmem, &chi);
> + if (rc < 0)
> + return rc;
> +
> + if (chi & GLOBAL_MMIO_CHI_ELA)
> + val |= IOCTL_OCXL_PMEM_EVENT_ERROR_LOG_AVAILABLE;
> +
> + if (chi & GLOBAL_MMIO_CHI_CDA)
> + val |= IOCTL_OCXL_PMEM_EVENT_CONTROLLER_DUMP_AVAILABLE;
> +
> + if (chi & GLOBAL_MMIO_CHI_CFFS)
> + val |= IOCTL_OCXL_PMEM_EVENT_FIRMWARE_FATAL;
> +
> + if (chi & GLOBAL_MMIO_CHI_CHFS)
> + val |= IOCTL_OCXL_PMEM_EVENT_HARDWARE_FATAL;
> +
> + rc = copy_to_user((u64 __user *) uarg, &val, sizeof(val));
> +
> + return rc;
> +}
> +
> static long file_ioctl(struct file *file, unsigned int cmd, unsigned long args)
> {
> struct ocxlpmem *ocxlpmem = file->private_data;
> @@ -966,6 +1028,15 @@ static long file_ioctl(struct file *file, unsigned int cmd, unsigned long args)
> rc = ioctl_controller_stats(ocxlpmem,
> (struct ioctl_ocxl_pmem_controller_stats __user *)args);
> break;
> +
> + case IOCTL_OCXL_PMEM_EVENTFD:
> + rc = ioctl_eventfd(ocxlpmem,
> + (struct ioctl_ocxl_pmem_eventfd __user *)args);
> + break;
> +
> + case IOCTL_OCXL_PMEM_EVENT_CHECK:
> + rc = ioctl_event_check(ocxlpmem, (u64 __user *)args);
> + break;
> }
>
> return rc;
> @@ -1107,6 +1178,146 @@ static void dump_error_log(struct ocxlpmem *ocxlpmem)
> kfree(buf);
> }
>
> +static irqreturn_t imn0_handler(void *private)
> +{
> + struct ocxlpmem *ocxlpmem = private;
> + u64 chi = 0;
> +
> + (void)ocxlpmem_chi(ocxlpmem, &chi);
> +
> + if (chi & GLOBAL_MMIO_CHI_ELA) {
> + dev_warn(&ocxlpmem->dev, "Error log is available\n");
> +
> + if (ocxlpmem->ev_ctx)
> + eventfd_signal(ocxlpmem->ev_ctx, 1);
> + }
> +
> + if (chi & GLOBAL_MMIO_CHI_CDA) {
> + dev_warn(&ocxlpmem->dev, "Controller dump is available\n");
> +
> + if (ocxlpmem->ev_ctx)
> + eventfd_signal(ocxlpmem->ev_ctx, 1);
> + }
> +
> +
> + return IRQ_HANDLED;
> +}
> +
> +static irqreturn_t imn1_handler(void *private)
> +{
> + struct ocxlpmem *ocxlpmem = private;
> + u64 chi = 0;
> +
> + (void)ocxlpmem_chi(ocxlpmem, &chi);
> +
> + if (chi & (GLOBAL_MMIO_CHI_CFFS | GLOBAL_MMIO_CHI_CHFS)) {
> + dev_err(&ocxlpmem->dev,
> + "Controller status is fatal, chi=0x%llx, going offline\n", chi);
> +
> + if (ocxlpmem->nvdimm_bus) {
> + nvdimm_bus_unregister(ocxlpmem->nvdimm_bus);
> + ocxlpmem->nvdimm_bus = NULL;
> + }
> +
> + if (ocxlpmem->ev_ctx)
> + eventfd_signal(ocxlpmem->ev_ctx, 1);
> + }
> +
> + return IRQ_HANDLED;
> +}
> +
> +
> +/**
> + * ocxlpmem_setup_irq() - Set up the IRQs for the OpenCAPI Persistent Memory device
> + * @ocxlpmem: the device metadata
> + * Return: 0 on success, negative on failure
> + */
> +static int ocxlpmem_setup_irq(struct ocxlpmem *ocxlpmem)
> +{
> + int rc;
> + u64 irq_addr;
> +
> + rc = ocxl_afu_irq_alloc(ocxlpmem->ocxl_context, &ocxlpmem->irq_id[0]);
> + if (rc)
> + return rc;
> +
> + rc = ocxl_irq_set_handler(ocxlpmem->ocxl_context, ocxlpmem->irq_id[0],
> + imn0_handler, NULL, ocxlpmem);
> +
> + irq_addr = ocxl_afu_irq_get_addr(ocxlpmem->ocxl_context, ocxlpmem->irq_id[0]);
> + if (!irq_addr)
> + return -EINVAL;
> +
> + ocxlpmem->irq_addr[0] = ioremap(irq_addr, PAGE_SIZE);
> + if (!ocxlpmem->irq_addr[0])
> + return -EINVAL;

Something other than EINVAL for these two

> +
> + rc = ocxl_global_mmio_write64(ocxlpmem->ocxl_afu, GLOBAL_MMIO_IMA0_OHP,
> + OCXL_LITTLE_ENDIAN,
> + (u64)ocxlpmem->irq_addr[0]);
> + if (rc)
> + goto out_irq0;
> +
> + rc = ocxl_global_mmio_write64(ocxlpmem->ocxl_afu, GLOBAL_MMIO_IMA0_CFP,
> + OCXL_LITTLE_ENDIAN, 0);
> + if (rc)
> + goto out_irq0;
> +
> + rc = ocxl_afu_irq_alloc(ocxlpmem->ocxl_context, &ocxlpmem->irq_id[1]);
> + if (rc)
> + goto out_irq0;
> +
> +
> + rc = ocxl_irq_set_handler(ocxlpmem->ocxl_context, ocxlpmem->irq_id[1],
> + imn1_handler, NULL, ocxlpmem);
> + if (rc)
> + goto out_irq0;
> +
> + irq_addr = ocxl_afu_irq_get_addr(ocxlpmem->ocxl_context, ocxlpmem->irq_id[1]);
> + if (!irq_addr) {
> + rc = -EFAULT;
> + goto out_irq0;
> + }
> +
> + ocxlpmem->irq_addr[1] = ioremap(irq_addr, PAGE_SIZE);
> + if (!ocxlpmem->irq_addr[1]) {
> + rc = -EINVAL;
> + goto out_irq0;
> + }
> +
> + rc = ocxl_global_mmio_write64(ocxlpmem->ocxl_afu, GLOBAL_MMIO_IMA1_OHP,
> + OCXL_LITTLE_ENDIAN,
> + (u64)ocxlpmem->irq_addr[1]);
> + if (rc)
> + goto out_irq1;
> +
> + rc = ocxl_global_mmio_write64(ocxlpmem->ocxl_afu, GLOBAL_MMIO_IMA1_CFP,
> + OCXL_LITTLE_ENDIAN, 0);
> + if (rc)
> + goto out_irq1;
> +
> + // Enable doorbells
> + rc = ocxl_global_mmio_set64(ocxlpmem->ocxl_afu, GLOBAL_MMIO_CHIE,
> + OCXL_LITTLE_ENDIAN,
> + GLOBAL_MMIO_CHI_ELA | GLOBAL_MMIO_CHI_CDA |
> + GLOBAL_MMIO_CHI_CFFS | GLOBAL_MMIO_CHI_CHFS |
> + GLOBAL_MMIO_CHI_NSCRA);

We don't actually do anything in the handlers with NSCRA...

> + if (rc)
> + goto out_irq1;
> +
> + return 0;
> +
> +out_irq1:
> + iounmap(ocxlpmem->irq_addr[1]);
> + ocxlpmem->irq_addr[1] = NULL;
> +
> +out_irq0:
> + iounmap(ocxlpmem->irq_addr[0]);
> + ocxlpmem->irq_addr[0] = NULL;
> +
> + return rc;
> +}
> +
> /**
> * probe_function0() - Set up function 0 for an OpenCAPI persistent memory device
> * This is important as it enables templates higher than 0 across all other functions,
> @@ -1216,6 +1427,11 @@ static int probe(struct pci_dev *pdev, const struct pci_device_id *ent)
> goto err;
> }
>
> + if (ocxlpmem_setup_irq(ocxlpmem)) {
> + dev_err(&pdev->dev, "Could not set up OCXL IRQs\n");
> + goto err;
> + }
> +
> if (setup_command_metadata(ocxlpmem)) {
> dev_err(&pdev->dev, "Could not read OCXL command matada\n");
> goto err;
> diff --git a/arch/powerpc/platforms/powernv/pmem/ocxl_internal.h b/arch/powerpc/platforms/powernv/pmem/ocxl_internal.h
> index b953ee522ed4..927690f4888f 100644
> --- a/arch/powerpc/platforms/powernv/pmem/ocxl_internal.h
> +++ b/arch/powerpc/platforms/powernv/pmem/ocxl_internal.h
> @@ -103,6 +103,10 @@ struct ocxlpmem {
> struct pci_dev *pdev;
> struct cdev cdev;
> struct ocxl_fn *ocxl_fn;
> +#define SCM_IRQ_COUNT 2
> + int irq_id[SCM_IRQ_COUNT];
> + struct dev_pagemap irq_pgmap[SCM_IRQ_COUNT];
> + void *irq_addr[SCM_IRQ_COUNT];

I think this should be tagged __iomem

--
Andrew Donnellan OzLabs, ADL Canberra
[email protected] IBM Australia Limited

2020-03-03 09:30:21

by Frederic Barrat

[permalink] [raw]
Subject: Re: [PATCH v3 16/27] powerpc/powernv/pmem: Register a character device for userspace to interact with



Le 21/02/2020 à 04:27, Alastair D'Silva a écrit :
> From: Alastair D'Silva <[email protected]>
>
> This patch introduces a character device (/dev/ocxl-scmX) which further
> patches will use to interact with userspace.
>
> Signed-off-by: Alastair D'Silva <[email protected]>
> ---
> arch/powerpc/platforms/powernv/pmem/ocxl.c | 116 +++++++++++++++++-
> .../platforms/powernv/pmem/ocxl_internal.h | 2 +
> 2 files changed, 116 insertions(+), 2 deletions(-)
>
> diff --git a/arch/powerpc/platforms/powernv/pmem/ocxl.c b/arch/powerpc/platforms/powernv/pmem/ocxl.c
> index b8bd7e703b19..63109a870d2c 100644
> --- a/arch/powerpc/platforms/powernv/pmem/ocxl.c
> +++ b/arch/powerpc/platforms/powernv/pmem/ocxl.c
> @@ -10,6 +10,7 @@
> #include <misc/ocxl.h>
> #include <linux/delay.h>
> #include <linux/ndctl.h>
> +#include <linux/fs.h>
> #include <linux/mm_types.h>
> #include <linux/memory_hotplug.h>
> #include "ocxl_internal.h"
> @@ -339,6 +340,9 @@ static void free_ocxlpmem(struct ocxlpmem *ocxlpmem)
>
> free_minor(ocxlpmem);
>
> + if (ocxlpmem->cdev.owner)
> + cdev_del(&ocxlpmem->cdev);
> +
> if (ocxlpmem->metadata_addr)
> devm_memunmap(&ocxlpmem->dev, ocxlpmem->metadata_addr);
>
> @@ -396,6 +400,70 @@ static int ocxlpmem_register(struct ocxlpmem *ocxlpmem)
> return device_register(&ocxlpmem->dev);
> }
>
> +static void ocxlpmem_put(struct ocxlpmem *ocxlpmem)
> +{
> + put_device(&ocxlpmem->dev);
> +}
> +
> +static struct ocxlpmem *ocxlpmem_get(struct ocxlpmem *ocxlpmem)
> +{
> + return (get_device(&ocxlpmem->dev) == NULL) ? NULL : ocxlpmem;
> +}
> +
> +static struct ocxlpmem *find_and_get_ocxlpmem(dev_t devno)
> +{
> + struct ocxlpmem *ocxlpmem;
> + int minor = MINOR(devno);
> + /*
> + * We don't declare an RCU critical section here, as our AFU
> + * is protected by a reference counter on the device. By the time the
> + * minor number of a device is removed from the idr, the ref count of
> + * the device is already at 0, so no user API will access that AFU and
> + * this function can't return it.
> + */


I fixed something related in the ocxl driver (which had enough changes
with the introduction of the "info" device to make a similar comment
become wrong). See commit a58d37bce0d21. The issue is handling a
simultaneous open() and removal of the device through /sysfs as best we can.

We are on a file open path and it's not like we're going to have a
thousand clients, so performance is not that critical. We can take the
mutex before searching in the IDR and release it after we increment the
reference count on the device.
But that's not enough: we could still find the device in the IDR while
it is being removed in free_ocxlpmem(). I believe the only safe way to
address it is by removing the user-facing APIs (the char device) before
calling device_unregister(). So that it's not possible to find the
device in file_open() if it's in the middle of being removed.

Fred


> + ocxlpmem = idr_find(&minors_idr, minor);
> + if (ocxlpmem)
> + ocxlpmem_get(ocxlpmem);
> + return ocxlpmem;
> +}
> +
> +static int file_open(struct inode *inode, struct file *file)
> +{
> + struct ocxlpmem *ocxlpmem;
> +
> + ocxlpmem = find_and_get_ocxlpmem(inode->i_rdev);
> + if (!ocxlpmem)
> + return -ENODEV;
> +
> + file->private_data = ocxlpmem;
> + return 0;
> +}
> +
> +static int file_release(struct inode *inode, struct file *file)
> +{
> + struct ocxlpmem *ocxlpmem = file->private_data;
> +
> + ocxlpmem_put(ocxlpmem);
> + return 0;
> +}
> +
> +static const struct file_operations fops = {
> + .owner = THIS_MODULE,
> + .open = file_open,
> + .release = file_release,
> +};
> +
> +/**
> + * create_cdev() - Create the chardev in /dev for the device
> + * @ocxlpmem: the SCM metadata
> + * Return: 0 on success, negative on failure
> + */
> +static int create_cdev(struct ocxlpmem *ocxlpmem)
> +{
> + cdev_init(&ocxlpmem->cdev, &fops);
> + return cdev_add(&ocxlpmem->cdev, ocxlpmem->dev.devt, 1);
> +}
> +
> /**
> * ocxlpmem_remove() - Free an OpenCAPI persistent memory device
> * @pdev: the PCI device information struct
> @@ -572,6 +640,11 @@ static int probe(struct pci_dev *pdev, const struct pci_device_id *ent)
> goto err;
> }
>
> + if (create_cdev(ocxlpmem)) {
> + dev_err(&pdev->dev, "Could not create character device\n");
> + goto err;
> + }


As already mentioned in a previous patch, we branch to the err label so
rc needs to be set to a valid error.



> +
> elapsed = 0;
> timeout = ocxlpmem->readiness_timeout + ocxlpmem->memory_available_timeout;
> while (!is_usable(ocxlpmem, false)) {
> @@ -613,20 +686,59 @@ static struct pci_driver pci_driver = {
> .shutdown = ocxlpmem_remove,
> };
>
> +static int file_init(void)
> +{
> + int rc;
> +
> + mutex_init(&minors_idr_lock);
> + idr_init(&minors_idr);
> +
> + rc = alloc_chrdev_region(&ocxlpmem_dev, 0, NUM_MINORS, "ocxl-pmem");
> + if (rc) {
> + idr_destroy(&minors_idr);
> + pr_err("Unable to allocate OpenCAPI persistent memory major number: %d\n", rc);
> + return rc;
> + }
> +
> + ocxlpmem_class = class_create(THIS_MODULE, "ocxl-pmem");
> + if (IS_ERR(ocxlpmem_class)) {
> + idr_destroy(&minors_idr);
> + pr_err("Unable to create ocxl-pmem class\n");
> + unregister_chrdev_region(ocxlpmem_dev, NUM_MINORS);
> + return PTR_ERR(ocxlpmem_class);
> + }
> +
> + return 0;
> +}
> +
> +static void file_exit(void)
> +{
> + class_destroy(ocxlpmem_class);
> + unregister_chrdev_region(ocxlpmem_dev, NUM_MINORS);
> + idr_destroy(&minors_idr);
> +}
> +
> static int __init ocxlpmem_init(void)
> {
> - int rc = 0;
> + int rc;
>
> - rc = pci_register_driver(&pci_driver);
> + rc = file_init();
> if (rc)
> return rc;
>
> + rc = pci_register_driver(&pci_driver);
> + if (rc) {
> + file_exit();
> + return rc;
> + }
> +
> return 0;
> }
>
> static void ocxlpmem_exit(void)
> {
> pci_unregister_driver(&pci_driver);
> + file_exit();
> }
>
> module_init(ocxlpmem_init);
> diff --git a/arch/powerpc/platforms/powernv/pmem/ocxl_internal.h b/arch/powerpc/platforms/powernv/pmem/ocxl_internal.h
> index 28e2020f6355..d2d81fec7bb1 100644
> --- a/arch/powerpc/platforms/powernv/pmem/ocxl_internal.h
> +++ b/arch/powerpc/platforms/powernv/pmem/ocxl_internal.h
> @@ -2,6 +2,7 @@
> // Copyright 2019 IBM Corp.
>
> #include <linux/pci.h>
> +#include <linux/cdev.h>
> #include <misc/ocxl.h>
> #include <linux/libnvdimm.h>
> #include <linux/mm.h>
> @@ -99,6 +100,7 @@ struct ocxlpmem_function0 {
> struct ocxlpmem {
> struct device dev;
> struct pci_dev *pdev;
> + struct cdev cdev;
> struct ocxl_fn *ocxl_fn;
> struct nd_interleave_set nd_set;
> struct nvdimm_bus_descriptor bus_desc;
>

2020-03-03 18:07:02

by Frederic Barrat

[permalink] [raw]
Subject: Re: [PATCH v3 18/27] powerpc/powernv/pmem: Add controller dump IOCTLs



Le 21/02/2020 à 04:27, Alastair D'Silva a écrit :
> From: Alastair D'Silva <[email protected]>
>
> This patch adds IOCTLs to allow userspace to request & fetch dumps
> of the internal controller state.
>
> This is useful during debugging or when a fatal error on the controller
> has occurred.
>
> Signed-off-by: Alastair D'Silva <[email protected]>
> ---
> arch/powerpc/platforms/powernv/pmem/ocxl.c | 132 +++++++++++++++++++++
> include/uapi/nvdimm/ocxl-pmem.h | 15 +++
> 2 files changed, 147 insertions(+)
>
> diff --git a/arch/powerpc/platforms/powernv/pmem/ocxl.c b/arch/powerpc/platforms/powernv/pmem/ocxl.c
> index 2b64504f9129..2cabafe1fc58 100644
> --- a/arch/powerpc/platforms/powernv/pmem/ocxl.c
> +++ b/arch/powerpc/platforms/powernv/pmem/ocxl.c
> @@ -640,6 +640,124 @@ static int ioctl_error_log(struct ocxlpmem *ocxlpmem,
> return 0;
> }
>
> +static int ioctl_controller_dump_data(struct ocxlpmem *ocxlpmem,
> + struct ioctl_ocxl_pmem_controller_dump_data __user *uarg)
> +{
> + struct ioctl_ocxl_pmem_controller_dump_data args;
> + u16 i;
> + u64 val;
> + int rc;
> +
> + if (copy_from_user(&args, uarg, sizeof(args)))
> + return -EFAULT;
> +
> + if (args.buf_size % 8)
> + return -EINVAL;
> +
> + if (args.buf_size > ocxlpmem->admin_command.data_size)
> + return -EINVAL;
> +
> + mutex_lock(&ocxlpmem->admin_command.lock);
> +
> + rc = admin_command_request(ocxlpmem, ADMIN_COMMAND_CONTROLLER_DUMP);
> + if (rc)
> + goto out;
> +
> + val = ((u64)args.offset) << 32;
> + val |= args.buf_size;
> + rc = ocxl_global_mmio_write64(ocxlpmem->ocxl_afu,
> + ocxlpmem->admin_command.request_offset + 0x08,
> + OCXL_LITTLE_ENDIAN, val);
> + if (rc)
> + goto out;
> +
> + rc = admin_command_execute(ocxlpmem);
> + if (rc)
> + goto out;
> +
> + rc = admin_command_complete_timeout(ocxlpmem,
> + ADMIN_COMMAND_CONTROLLER_DUMP);
> + if (rc < 0) {
> + dev_warn(&ocxlpmem->dev, "Controller dump timed out\n");
> + goto out;
> + }
> +
> + rc = admin_response(ocxlpmem);
> + if (rc < 0)
> + goto out;
> + if (rc != STATUS_SUCCESS) {
> + warn_status(ocxlpmem,
> + "Unexpected status from retrieve error log",
> + rc);
> + goto out;
> + }



It would help if there was a comment indicating how the 3 ioctls are
used. My understanding is that the userland is:
- requesting the controller to prepare a state dump
- then one or more ioctls to fetch the data. The number of calls
required to get the full state really depends on the size of the buffer
passed by user
- a last ioctl to tell the controller that we're done, presumably to let
it free some resources.


> +
> + for (i = 0; i < args.buf_size; i += 8) {
> + u64 val;
> +
> + rc = ocxl_global_mmio_read64(ocxlpmem->ocxl_afu,
> + ocxlpmem->admin_command.data_offset + i,
> + OCXL_HOST_ENDIAN, &val);
> + if (rc)
> + goto out;
> +
> + if (copy_to_user(&args.buf[i], &val, sizeof(u64))) {
> + rc = -EFAULT;
> + goto out;
> + }
> + }
> +
> + if (copy_to_user(uarg, &args, sizeof(args))) {
> + rc = -EFAULT;
> + goto out;
> + }
> +
> + rc = admin_response_handled(ocxlpmem);
> + if (rc)
> + goto out;
> +
> +out:
> + mutex_unlock(&ocxlpmem->admin_command.lock);
> + return rc;
> +}
> +
> +int request_controller_dump(struct ocxlpmem *ocxlpmem)
> +{
> + int rc;
> + u64 busy = 1;
> +
> + rc = ocxl_global_mmio_set64(ocxlpmem->ocxl_afu, GLOBAL_MMIO_CHIC,
> + OCXL_LITTLE_ENDIAN,
> + GLOBAL_MMIO_CHI_CDA);
> +


rc is not checked here.


> +
> + rc = ocxl_global_mmio_set64(ocxlpmem->ocxl_afu, GLOBAL_MMIO_HCI,
> + OCXL_LITTLE_ENDIAN,
> + GLOBAL_MMIO_HCI_CONTROLLER_DUMP);
> + if (rc)
> + return rc;
> +
> + while (busy) {
> + rc = ocxl_global_mmio_read64(ocxlpmem->ocxl_afu,
> + GLOBAL_MMIO_HCI,
> + OCXL_LITTLE_ENDIAN, &busy);
> + if (rc)
> + return rc;
> +
> + busy &= GLOBAL_MMIO_HCI_CONTROLLER_DUMP;


Setting 'busy' doesn't hurt, but it's not really useful, is it?

We should add some kind of timeout so that if the controller hits an
issue, we don't spin in kernel space endlessly.



> + cond_resched();
> + }
> +
> + return 0;
> +}
> +
> +static int ioctl_controller_dump_complete(struct ocxlpmem *ocxlpmem)
> +{
> + return ocxl_global_mmio_set64(ocxlpmem->ocxl_afu, GLOBAL_MMIO_HCI,
> + OCXL_LITTLE_ENDIAN,
> + GLOBAL_MMIO_HCI_CONTROLLER_DUMP_COLLECTED);
> +}
> +
> static long file_ioctl(struct file *file, unsigned int cmd, unsigned long args)
> {
> struct ocxlpmem *ocxlpmem = file->private_data;
> @@ -650,7 +768,21 @@ static long file_ioctl(struct file *file, unsigned int cmd, unsigned long args)
> rc = ioctl_error_log(ocxlpmem,
> (struct ioctl_ocxl_pmem_error_log __user *)args);
> break;
> +
> + case IOCTL_OCXL_PMEM_CONTROLLER_DUMP:
> + rc = request_controller_dump(ocxlpmem);
> + break;
> +
> + case IOCTL_OCXL_PMEM_CONTROLLER_DUMP_DATA:
> + rc = ioctl_controller_dump_data(ocxlpmem,
> + (struct ioctl_ocxl_pmem_controller_dump_data __user *)args);
> + break;
> +
> + case IOCTL_OCXL_PMEM_CONTROLLER_DUMP_COMPLETE:
> + rc = ioctl_controller_dump_complete(ocxlpmem);
> + break;
> }
> +
> return rc;
> }
>
> diff --git a/include/uapi/nvdimm/ocxl-pmem.h b/include/uapi/nvdimm/ocxl-pmem.h
> index b10f8ac0c20f..d4d8512d03f7 100644
> --- a/include/uapi/nvdimm/ocxl-pmem.h
> +++ b/include/uapi/nvdimm/ocxl-pmem.h
> @@ -38,9 +38,24 @@ struct ioctl_ocxl_pmem_error_log {
> __u8 *buf; /* pointer to output buffer */
> };
>
> +struct ioctl_ocxl_pmem_controller_dump_data {
> + __u8 *buf; /* pointer to output buffer */


We only support 64-bit user app on powerpc, but using a pointer type in
a kernel ABI is unusual. We should use a know size like __u64.
(also applies to buf pointer in struct ioctl_ocxl_pmem_error_log from
previous patch too)

The rest of the structure will also be padded by the compiler, which we
should avoid.

Fred



> + __u16 buf_size; /* in/out, buffer size provided/required.
> + * If required is greater than provided, the buffer
> + * will be truncated to the amount provided. If its
> + * less, then only the required bytes will be populated.
> + * If it is 0, then there is no more dump data available.
> + */
> + __u32 offset; /* in, Offset within the dump */
> + __u64 reserved[8];
> +};
> +
> /* ioctl numbers */
> #define OCXL_PMEM_MAGIC 0x5C
> /* SCM devices */
> #define IOCTL_OCXL_PMEM_ERROR_LOG _IOWR(OCXL_PMEM_MAGIC, 0x01, struct ioctl_ocxl_pmem_error_log)
> +#define IOCTL_OCXL_PMEM_CONTROLLER_DUMP _IO(OCXL_PMEM_MAGIC, 0x02)
> +#define IOCTL_OCXL_PMEM_CONTROLLER_DUMP_DATA _IOWR(OCXL_PMEM_MAGIC, 0x03, struct ioctl_ocxl_pmem_controller_dump_data)
> +#define IOCTL_OCXL_PMEM_CONTROLLER_DUMP_COMPLETE _IO(OCXL_PMEM_MAGIC, 0x04)
>
> #endif /* _UAPI_OCXL_SCM_H */
>

2020-03-04 05:33:48

by Alastair D'Silva

[permalink] [raw]
Subject: Re: [PATCH v3 03/27] powerpc: Map & release OpenCAPI LPC memory

On Tue, 2020-03-03 at 17:10 +1100, Andrew Donnellan wrote:
> On 21/2/20 2:26 pm, Alastair D'Silva wrote:> +#ifdef
> CONFIG_MEMORY_HOTPLUG_SPARSE
> > +u64 pnv_ocxl_platform_lpc_setup(struct pci_dev *pdev, u64 size)
> > +{
> > + struct pci_controller *hose = pci_bus_to_host(pdev->bus);
> > + struct pnv_phb *phb = hose->private_data;
> > + u32 bdfn = pci_dev_id(pdev);
> > + __be64 base_addr_be64;
> > + u64 base_addr;
> > + int rc;
> > +
> > + rc = opal_npu_mem_alloc(phb->opal_id, bdfn, size,
> > &base_addr_be64);
>
> Sparse warning:
>
> https://openpower.xyz/job/snowpatch/job/snowpatch-linux-sparse/15776//artifact/linux/report.txt
>
> I think in patch 1 we need to change a uint64_t to a __be64.
>

Ok, thanks

--
Alastair D'Silva
Open Source Developer
Linux Technology Centre, IBM Australia
mob: 0423 762 819

2020-03-04 05:48:44

by Alastair D'Silva

[permalink] [raw]
Subject: Re: [PATCH v3 20/27] powerpc/powernv/pmem: Forward events to userspace

On Tue, 2020-03-03 at 18:02 +1100, Andrew Donnellan wrote:
> On 21/2/20 2:27 pm, Alastair D'Silva wrote:> @@ -938,6 +955,51 @@
> static
> int ioctl_controller_stats(struct ocxlpmem *ocxlpmem,
> > return rc;
> > }
> >
> > +static int ioctl_eventfd(struct ocxlpmem *ocxlpmem,
> > + struct ioctl_ocxl_pmem_eventfd __user *uarg)
> > +{
> > + struct ioctl_ocxl_pmem_eventfd args;
> > +
> > + if (copy_from_user(&args, uarg, sizeof(args)))
> > + return -EFAULT;
> > +
> > + if (ocxlpmem->ev_ctx)
> > + return -EINVAL;
>
> I think EBUSY is more appropriate here.
>

Ok

> > +
> > + ocxlpmem->ev_ctx = eventfd_ctx_fdget(args.eventfd);
> > + if (!ocxlpmem->ev_ctx)
> > + return -EFAULT;
> > +
> > + return 0;
> > +}
> > +
> > +static int ioctl_event_check(struct ocxlpmem *ocxlpmem, u64 __user
> > *uarg)
> > +{
> > + u64 val = 0;
> > + int rc;
> > + u64 chi = 0;
> > +
> > + rc = ocxlpmem_chi(ocxlpmem, &chi);
> > + if (rc < 0)
> > + return rc;
> > +
> > + if (chi & GLOBAL_MMIO_CHI_ELA)
> > + val |= IOCTL_OCXL_PMEM_EVENT_ERROR_LOG_AVAILABLE;
> > +
> > + if (chi & GLOBAL_MMIO_CHI_CDA)
> > + val |= IOCTL_OCXL_PMEM_EVENT_CONTROLLER_DUMP_AVAILABLE;
> > +
> > + if (chi & GLOBAL_MMIO_CHI_CFFS)
> > + val |= IOCTL_OCXL_PMEM_EVENT_FIRMWARE_FATAL;
> > +
> > + if (chi & GLOBAL_MMIO_CHI_CHFS)
> > + val |= IOCTL_OCXL_PMEM_EVENT_HARDWARE_FATAL;
> > +
> > + rc = copy_to_user((u64 __user *) uarg, &val, sizeof(val));
> > +
> > + return rc;
> > +}
> > +
> > static long file_ioctl(struct file *file, unsigned int cmd,
> > unsigned long args)
> > {
> > struct ocxlpmem *ocxlpmem = file->private_data;
> > @@ -966,6 +1028,15 @@ static long file_ioctl(struct file *file,
> > unsigned int cmd, unsigned long args)
> > rc = ioctl_controller_stats(ocxlpmem,
> > (struct
> > ioctl_ocxl_pmem_controller_stats __user *)args);
> > break;
> > +
> > + case IOCTL_OCXL_PMEM_EVENTFD:
> > + rc = ioctl_eventfd(ocxlpmem,
> > + (struct ioctl_ocxl_pmem_eventfd
> > __user *)args);
> > + break;
> > +
> > + case IOCTL_OCXL_PMEM_EVENT_CHECK:
> > + rc = ioctl_event_check(ocxlpmem, (u64 __user *)args);
> > + break;
> > }
> >
> > return rc;
> > @@ -1107,6 +1178,146 @@ static void dump_error_log(struct ocxlpmem
> > *ocxlpmem)
> > kfree(buf);
> > }
> >
> > +static irqreturn_t imn0_handler(void *private)
> > +{
> > + struct ocxlpmem *ocxlpmem = private;
> > + u64 chi = 0;
> > +
> > + (void)ocxlpmem_chi(ocxlpmem, &chi);
> > +
> > + if (chi & GLOBAL_MMIO_CHI_ELA) {
> > + dev_warn(&ocxlpmem->dev, "Error log is available\n");
> > +
> > + if (ocxlpmem->ev_ctx)
> > + eventfd_signal(ocxlpmem->ev_ctx, 1);
> > + }
> > +
> > + if (chi & GLOBAL_MMIO_CHI_CDA) {
> > + dev_warn(&ocxlpmem->dev, "Controller dump is
> > available\n");
> > +
> > + if (ocxlpmem->ev_ctx)
> > + eventfd_signal(ocxlpmem->ev_ctx, 1);
> > + }
> > +
> > +
> > + return IRQ_HANDLED;
> > +}
> > +
> > +static irqreturn_t imn1_handler(void *private)
> > +{
> > + struct ocxlpmem *ocxlpmem = private;
> > + u64 chi = 0;
> > +
> > + (void)ocxlpmem_chi(ocxlpmem, &chi);
> > +
> > + if (chi & (GLOBAL_MMIO_CHI_CFFS | GLOBAL_MMIO_CHI_CHFS)) {
> > + dev_err(&ocxlpmem->dev,
> > + "Controller status is fatal, chi=0x%llx, going
> > offline\n", chi);
> > +
> > + if (ocxlpmem->nvdimm_bus) {
> > + nvdimm_bus_unregister(ocxlpmem->nvdimm_bus);
> > + ocxlpmem->nvdimm_bus = NULL;
> > + }
> > +
> > + if (ocxlpmem->ev_ctx)
> > + eventfd_signal(ocxlpmem->ev_ctx, 1);
> > + }
> > +
> > + return IRQ_HANDLED;
> > +}
> > +
> > +
> > +/**
> > + * ocxlpmem_setup_irq() - Set up the IRQs for the OpenCAPI
> > Persistent Memory device
> > + * @ocxlpmem: the device metadata
> > + * Return: 0 on success, negative on failure
> > + */
> > +static int ocxlpmem_setup_irq(struct ocxlpmem *ocxlpmem)
> > +{
> > + int rc;
> > + u64 irq_addr;
> > +
> > + rc = ocxl_afu_irq_alloc(ocxlpmem->ocxl_context, &ocxlpmem-
> > >irq_id[0]);
> > + if (rc)
> > + return rc;
> > +
> > + rc = ocxl_irq_set_handler(ocxlpmem->ocxl_context, ocxlpmem-
> > >irq_id[0],
> > + imn0_handler, NULL, ocxlpmem);
> > +
> > + irq_addr = ocxl_afu_irq_get_addr(ocxlpmem->ocxl_context,
> > ocxlpmem->irq_id[0]);
> > + if (!irq_addr)
> > + return -EINVAL;
> > +
> > + ocxlpmem->irq_addr[0] = ioremap(irq_addr, PAGE_SIZE);
> > + if (!ocxlpmem->irq_addr[0])
> > + return -EINVAL;
>
> Something other than EINVAL for these two

Ok

>
> > +
> > + rc = ocxl_global_mmio_write64(ocxlpmem->ocxl_afu,
> > GLOBAL_MMIO_IMA0_OHP,
> > + OCXL_LITTLE_ENDIAN,
> > + (u64)ocxlpmem->irq_addr[0]);
> > + if (rc)
> > + goto out_irq0;
> > +
> > + rc = ocxl_global_mmio_write64(ocxlpmem->ocxl_afu,
> > GLOBAL_MMIO_IMA0_CFP,
> > + OCXL_LITTLE_ENDIAN, 0);
> > + if (rc)
> > + goto out_irq0;
> > +
> > + rc = ocxl_afu_irq_alloc(ocxlpmem->ocxl_context, &ocxlpmem-
> > >irq_id[1]);
> > + if (rc)
> > + goto out_irq0;
> > +
> > +
> > + rc = ocxl_irq_set_handler(ocxlpmem->ocxl_context, ocxlpmem-
> > >irq_id[1],
> > + imn1_handler, NULL, ocxlpmem);
> > + if (rc)
> > + goto out_irq0;
> > +
> > + irq_addr = ocxl_afu_irq_get_addr(ocxlpmem->ocxl_context,
> > ocxlpmem->irq_id[1]);
> > + if (!irq_addr) {
> > + rc = -EFAULT;
> > + goto out_irq0;
> > + }
> > +
> > + ocxlpmem->irq_addr[1] = ioremap(irq_addr, PAGE_SIZE);
> > + if (!ocxlpmem->irq_addr[1]) {
> > + rc = -EINVAL;
> > + goto out_irq0;
> > + }
> > +
> > + rc = ocxl_global_mmio_write64(ocxlpmem->ocxl_afu,
> > GLOBAL_MMIO_IMA1_OHP,
> > + OCXL_LITTLE_ENDIAN,
> > + (u64)ocxlpmem->irq_addr[1]);
> > + if (rc)
> > + goto out_irq1;
> > +
> > + rc = ocxl_global_mmio_write64(ocxlpmem->ocxl_afu,
> > GLOBAL_MMIO_IMA1_CFP,
> > + OCXL_LITTLE_ENDIAN, 0);
> > + if (rc)
> > + goto out_irq1;
> > +
> > + // Enable doorbells
> > + rc = ocxl_global_mmio_set64(ocxlpmem->ocxl_afu,
> > GLOBAL_MMIO_CHIE,
> > + OCXL_LITTLE_ENDIAN,
> > + GLOBAL_MMIO_CHI_ELA |
> > GLOBAL_MMIO_CHI_CDA |
> > + GLOBAL_MMIO_CHI_CFFS |
> > GLOBAL_MMIO_CHI_CHFS |
> > + GLOBAL_MMIO_CHI_NSCRA);
>
> We don't actually do anything in the handlers with NSCRA...

Good catch, this belongs in the overwrite patch (which was dropped from
this series).

>
> > + if (rc)
> > + goto out_irq1;
> > +
> > + return 0;
> > +
> > +out_irq1:
> > + iounmap(ocxlpmem->irq_addr[1]);
> > + ocxlpmem->irq_addr[1] = NULL;
> > +
> > +out_irq0:
> > + iounmap(ocxlpmem->irq_addr[0]);
> > + ocxlpmem->irq_addr[0] = NULL;
> > +
> > + return rc;
> > +}
> > +
> > /**
> > * probe_function0() - Set up function 0 for an OpenCAPI
> > persistent memory device
> > * This is important as it enables templates higher than 0 across
> > all other functions,
> > @@ -1216,6 +1427,11 @@ static int probe(struct pci_dev *pdev, const
> > struct pci_device_id *ent)
> > goto err;
> > }
> >
> > + if (ocxlpmem_setup_irq(ocxlpmem)) {
> > + dev_err(&pdev->dev, "Could not set up OCXL IRQs\n");
> > + goto err;
> > + }
> > +
> > if (setup_command_metadata(ocxlpmem)) {
> > dev_err(&pdev->dev, "Could not read OCXL command
> > matada\n");
> > goto err;
> > diff --git a/arch/powerpc/platforms/powernv/pmem/ocxl_internal.h
> > b/arch/powerpc/platforms/powernv/pmem/ocxl_internal.h
> > index b953ee522ed4..927690f4888f 100644
> > --- a/arch/powerpc/platforms/powernv/pmem/ocxl_internal.h
> > +++ b/arch/powerpc/platforms/powernv/pmem/ocxl_internal.h
> > @@ -103,6 +103,10 @@ struct ocxlpmem {
> > struct pci_dev *pdev;
> > struct cdev cdev;
> > struct ocxl_fn *ocxl_fn;
> > +#define SCM_IRQ_COUNT 2
> > + int irq_id[SCM_IRQ_COUNT];
> > + struct dev_pagemap irq_pgmap[SCM_IRQ_COUNT];
> > + void *irq_addr[SCM_IRQ_COUNT];
>
> I think this should be tagged __iomem
>

Ok

--
Alastair D'Silva
Open Source Developer
Linux Technology Centre, IBM Australia
mob: 0423 762 819

2020-03-04 06:53:48

by Andrew Donnellan

[permalink] [raw]
Subject: Re: [PATCH v3 18/27] powerpc/powernv/pmem: Add controller dump IOCTLs

On 21/2/20 2:27 pm, Alastair D'Silva wrote:
> +static int ioctl_controller_dump_data(struct ocxlpmem *ocxlpmem,
> + struct ioctl_ocxl_pmem_controller_dump_data __user *uarg)
> +{
> + struct ioctl_ocxl_pmem_controller_dump_data args;
> + u16 i;
> + u64 val;
> + int rc;
> +
> + if (copy_from_user(&args, uarg, sizeof(args)))
> + return -EFAULT;
> +
> + if (args.buf_size % 8)
> + return -EINVAL;
> +
> + if (args.buf_size > ocxlpmem->admin_command.data_size)
> + return -EINVAL;
> +
> + mutex_lock(&ocxlpmem->admin_command.lock);
> +
> + rc = admin_command_request(ocxlpmem, ADMIN_COMMAND_CONTROLLER_DUMP);
> + if (rc)
> + goto out;
> +
> + val = ((u64)args.offset) << 32;
> + val |= args.buf_size;
> + rc = ocxl_global_mmio_write64(ocxlpmem->ocxl_afu,
> + ocxlpmem->admin_command.request_offset + 0x08,
> + OCXL_LITTLE_ENDIAN, val);
> + if (rc)
> + goto out;
> +
> + rc = admin_command_execute(ocxlpmem);
> + if (rc)
> + goto out;
> +
> + rc = admin_command_complete_timeout(ocxlpmem,
> + ADMIN_COMMAND_CONTROLLER_DUMP);
> + if (rc < 0) {
> + dev_warn(&ocxlpmem->dev, "Controller dump timed out\n");
> + goto out;
> + }
> +
> + rc = admin_response(ocxlpmem);
> + if (rc < 0)
> + goto out;
> + if (rc != STATUS_SUCCESS) {
> + warn_status(ocxlpmem,
> + "Unexpected status from retrieve error log",

Controller dump

> + rc);
> + goto out;
> + }
> +
> + for (i = 0; i < args.buf_size; i += 8) {
> + u64 val;
> +
> + rc = ocxl_global_mmio_read64(ocxlpmem->ocxl_afu,
> + ocxlpmem->admin_command.data_offset + i,
> + OCXL_HOST_ENDIAN, &val);

Is a controller dump something where we want to do endian swapping?

Any reason we're not doing the usual check of the data identifier,
additional data length etc?

> + if (rc)
> + goto out;
> +
> + if (copy_to_user(&args.buf[i], &val, sizeof(u64))) {
> + rc = -EFAULT;
> + goto out;
> + }
> + }
> +
> + if (copy_to_user(uarg, &args, sizeof(args))) {
> + rc = -EFAULT;
> + goto out;
> + }
> +
> + rc = admin_response_handled(ocxlpmem);
> + if (rc)
> + goto out;
> +
> +out:
> + mutex_unlock(&ocxlpmem->admin_command.lock);
> + return rc;
> +}
> +
> +int request_controller_dump(struct ocxlpmem *ocxlpmem)
> +{
> + int rc;
> + u64 busy = 1;
> +
> + rc = ocxl_global_mmio_set64(ocxlpmem->ocxl_afu, GLOBAL_MMIO_CHIC,
> + OCXL_LITTLE_ENDIAN,
> + GLOBAL_MMIO_CHI_CDA);

This return code is ignored

> +
> +
> + rc = ocxl_global_mmio_set64(ocxlpmem->ocxl_afu, GLOBAL_MMIO_HCI,
> + OCXL_LITTLE_ENDIAN,
> + GLOBAL_MMIO_HCI_CONTROLLER_DUMP);
> + if (rc)
> + return rc;
> +
> + while (busy) {
> + rc = ocxl_global_mmio_read64(ocxlpmem->ocxl_afu,
> + GLOBAL_MMIO_HCI,
> + OCXL_LITTLE_ENDIAN, &busy);
> + if (rc)
> + return rc;
> +
> + busy &= GLOBAL_MMIO_HCI_CONTROLLER_DUMP;
> + cond_resched();
> + }
> +
> + return 0;
> +}


--
Andrew Donnellan OzLabs, ADL Canberra
[email protected] IBM Australia Limited

2020-03-04 09:25:51

by Frederic Barrat

[permalink] [raw]
Subject: Re: [PATCH v3 19/27] powerpc/powernv/pmem: Add an IOCTL to report controller statistics



Le 21/02/2020 à 04:27, Alastair D'Silva a écrit :
> From: Alastair D'Silva <[email protected]>
>
> The controller can report a number of statistics that are useful
> in evaluating the performance and reliability of the card.
>
> This patch exposes this information via an IOCTL.
>
> Signed-off-by: Alastair D'Silva <[email protected]>
> ---
> arch/powerpc/platforms/powernv/pmem/ocxl.c | 185 +++++++++++++++++++++
> include/uapi/nvdimm/ocxl-pmem.h | 17 ++
> 2 files changed, 202 insertions(+)
>
> diff --git a/arch/powerpc/platforms/powernv/pmem/ocxl.c b/arch/powerpc/platforms/powernv/pmem/ocxl.c
> index 2cabafe1fc58..009d4fd29e7d 100644
> --- a/arch/powerpc/platforms/powernv/pmem/ocxl.c
> +++ b/arch/powerpc/platforms/powernv/pmem/ocxl.c
> @@ -758,6 +758,186 @@ static int ioctl_controller_dump_complete(struct ocxlpmem *ocxlpmem)
> GLOBAL_MMIO_HCI_CONTROLLER_DUMP_COLLECTED);
> }
>
> +/**
> + * controller_stats_header_parse() - Parse the first 64 bits of the controller stats admin command response
> + * @ocxlpmem: the device metadata
> + * @length: out, returns the number of bytes in the response (excluding the 64 bit header)
> + */
> +static int controller_stats_header_parse(struct ocxlpmem *ocxlpmem,
> + u32 *length)
> +{
> + int rc;
> + u64 val;
> +


unexpected empty line


> + u16 data_identifier;
> + u32 data_length;
> +
> + rc = ocxl_global_mmio_read64(ocxlpmem->ocxl_afu,
> + ocxlpmem->admin_command.data_offset,
> + OCXL_LITTLE_ENDIAN, &val);
> + if (rc)
> + return rc;
> +
> + data_identifier = val >> 48;
> + data_length = val & 0xFFFFFFFF;
> +
> + if (data_identifier != 0x4353) { // 'CS'
> + dev_err(&ocxlpmem->dev,
> + "Bad data identifier for controller stats, expected 'CS', got '%-.*s'\n",
> + 2, (char *)&data_identifier);



Wow, I'm clueless what that string format looks like :-)
2 arguments? Did you check the kernel string formatter does what you want?
You may consider unifying the format though, the error log patch uses a
simpler (better?) format for a similar message.



> + return -EINVAL;
> + }
> +
> + *length = data_length;
> + return 0;
> +}
> +
> +static int ioctl_controller_stats(struct ocxlpmem *ocxlpmem,
> + struct ioctl_ocxl_pmem_controller_stats __user *uarg)
> +{
> + struct ioctl_ocxl_pmem_controller_stats args;
> + u32 length;
> + int rc;
> + u64 val;
> +
> + memset(&args, '\0', sizeof(args));
> +
> + mutex_lock(&ocxlpmem->admin_command.lock);
> +
> + rc = admin_command_request(ocxlpmem, ADMIN_COMMAND_CONTROLLER_STATS);
> + if (rc)
> + goto out;
> +
> + rc = ocxl_global_mmio_write64(ocxlpmem->ocxl_afu,
> + ocxlpmem->admin_command.request_offset + 0x08,
> + OCXL_LITTLE_ENDIAN, 0);
> + if (rc)
> + goto out;
> +
> + rc = admin_command_execute(ocxlpmem);
> + if (rc)
> + goto out;
> +
> +
> + rc = admin_command_complete_timeout(ocxlpmem,
> + ADMIN_COMMAND_CONTROLLER_STATS);
> + if (rc < 0) {
> + dev_warn(&ocxlpmem->dev, "Controller stats timed out\n");
> + goto out;
> + }
> +
> + rc = admin_response(ocxlpmem);
> + if (rc < 0)
> + goto out;
> + if (rc != STATUS_SUCCESS) {
> + warn_status(ocxlpmem,
> + "Unexpected status from controller stats", rc);
> + goto out;
> + }


All those ioctls commands follow the same pattern:
1. admin_command_request()
2. optionnaly, set some mmio registers specific to the command
3. admin_command_execute()
4. admin_command_complete_timeout()
5. admin_response()

By swapping 1 and 2, we could then factorize steps 1, 3, 4 and 5 in a
function and simplify/shorten the code each time a command is called.

Regarding step 2 (and that's true for all similar patches), a comment
about what the mmio tuning does would help and avoid looking up the
spec. Looking up the spec during the review is expected, but it will
ease reading the code 6 months from now.



> +
> + rc = controller_stats_header_parse(ocxlpmem, &length);
> + if (rc)
> + goto out;
> +
> + if (length != 0x140)
> + warn_status(ocxlpmem,
> + "Unexpected length for controller stats data, expected 0x140, got 0x%x",
> + length);
> +
> + rc = ocxl_global_mmio_read64(ocxlpmem->ocxl_afu,
> + ocxlpmem->admin_command.data_offset + 0x08 + 0x08,
> + OCXL_LITTLE_ENDIAN, &val);
> + if (rc)
> + goto out;
> +
> + args.reset_count = val >> 32;
> + args.reset_uptime = val & 0xFFFFFFFF;
> +
> + rc = ocxl_global_mmio_read64(ocxlpmem->ocxl_afu,
> + ocxlpmem->admin_command.data_offset + 0x08 + 0x10,
> + OCXL_LITTLE_ENDIAN, &val);
> + if (rc)
> + goto out;
> +
> + args.power_on_uptime = val >> 32;
> +
> + rc = ocxl_global_mmio_read64(ocxlpmem->ocxl_afu,
> + ocxlpmem->admin_command.data_offset + 0x08 + 0x40 + 0x08,
> + OCXL_LITTLE_ENDIAN, &args.host_load_count);


Those offsets are hard to understand, even with the spec next to me. And
it seems that we could harden things a bit:
each block as a "statistics parameter ID" and the length of the data for
that block. We should check that and make sure we're reading what we expect.
For example, from the spec I'm looking (110d), I would expect the host
load count to be at offset 0x10. It's entirely possible I'm misreading
it though.



> + if (rc)
> + goto out;
> +
> + rc = ocxl_global_mmio_read64(ocxlpmem->ocxl_afu,
> + ocxlpmem->admin_command.data_offset + 0x08 + 0x40 + 0x10,
> + OCXL_LITTLE_ENDIAN, &args.host_store_count);
> + if (rc)
> + goto out;
> +
> + rc = ocxl_global_mmio_read64(ocxlpmem->ocxl_afu,
> + ocxlpmem->admin_command.data_offset + 0x08 + 0x40 + 0x18,
> + OCXL_LITTLE_ENDIAN, &args.media_read_count);
> + if (rc)
> + goto out;
> +
> + rc = ocxl_global_mmio_read64(ocxlpmem->ocxl_afu,
> + ocxlpmem->admin_command.data_offset + 0x08 + 0x40 + 0x20,
> + OCXL_LITTLE_ENDIAN, &args.media_write_count);
> + if (rc)
> + goto out;
> +
> + rc = ocxl_global_mmio_read64(ocxlpmem->ocxl_afu,
> + ocxlpmem->admin_command.data_offset + 0x08 + 0x40 + 0x28,
> + OCXL_LITTLE_ENDIAN, &args.cache_hit_count);
> + if (rc)
> + goto out;
> +
> + rc = ocxl_global_mmio_read64(ocxlpmem->ocxl_afu,
> + ocxlpmem->admin_command.data_offset + 0x08 + 0x40 + 0x30,
> + OCXL_LITTLE_ENDIAN, &args.cache_miss_count);
> + if (rc)
> + goto out;
> +
> + rc = ocxl_global_mmio_read64(ocxlpmem->ocxl_afu,
> + ocxlpmem->admin_command.data_offset + 0x08 + 0x40 + 0x38,
> + OCXL_LITTLE_ENDIAN, &args.media_read_latency);
> + if (rc)
> + goto out;
> +
> + rc = ocxl_global_mmio_read64(ocxlpmem->ocxl_afu,
> + ocxlpmem->admin_command.data_offset + 0x08 + 0x40 + 0x40,
> + OCXL_LITTLE_ENDIAN, &args.media_write_latency);
> + if (rc)
> + goto out;
> +
> + rc = ocxl_global_mmio_read64(ocxlpmem->ocxl_afu,
> + ocxlpmem->admin_command.data_offset + 0x08 + 0x40 + 0x48,
> + OCXL_LITTLE_ENDIAN, &args.cache_read_latency);
> + if (rc)
> + goto out;
> +
> + rc = ocxl_global_mmio_read64(ocxlpmem->ocxl_afu,
> + ocxlpmem->admin_command.data_offset + 0x08 + 0x40 + 0x50,
> + OCXL_LITTLE_ENDIAN, &args.cache_write_latency);
> + if (rc)
> + goto out;
> +
> + if (copy_to_user(uarg, &args, sizeof(args))) {
> + rc = -EFAULT;
> + goto out;
> + }
> +
> + rc = admin_response_handled(ocxlpmem);
> + if (rc)
> + goto out;
> +
> + rc = 0;
> + goto out;


That may be more of a personal habit, but that final goto disrupts the
"good case" flow. And I think it's pretty unusual within the kernel.


> +
> +out:
> + mutex_unlock(&ocxlpmem->admin_command.lock);
> + return rc;
> +}
> +
> static long file_ioctl(struct file *file, unsigned int cmd, unsigned long args)
> {
> struct ocxlpmem *ocxlpmem = file->private_data;
> @@ -781,6 +961,11 @@ static long file_ioctl(struct file *file, unsigned int cmd, unsigned long args)
> case IOCTL_OCXL_PMEM_CONTROLLER_DUMP_COMPLETE:
> rc = ioctl_controller_dump_complete(ocxlpmem);
> break;
> +
> + case IOCTL_OCXL_PMEM_CONTROLLER_STATS:
> + rc = ioctl_controller_stats(ocxlpmem,
> + (struct ioctl_ocxl_pmem_controller_stats __user *)args);
> + break;
> }
>
> return rc;
> diff --git a/include/uapi/nvdimm/ocxl-pmem.h b/include/uapi/nvdimm/ocxl-pmem.h
> index d4d8512d03f7..add223aa2fdb 100644
> --- a/include/uapi/nvdimm/ocxl-pmem.h
> +++ b/include/uapi/nvdimm/ocxl-pmem.h
> @@ -50,6 +50,22 @@ struct ioctl_ocxl_pmem_controller_dump_data {
> __u64 reserved[8];
> };
>
> +struct ioctl_ocxl_pmem_controller_stats {
> + __u32 reset_count;
> + __u32 reset_uptime; /* seconds */
> + __u32 power_on_uptime; /* seconds */


Same as before, we're going to have some padding here.

Fred


> + __u64 host_load_count;
> + __u64 host_store_count;
> + __u64 media_read_count;
> + __u64 media_write_count;
> + __u64 cache_hit_count;
> + __u64 cache_miss_count;
> + __u64 media_read_latency; /* nanoseconds */
> + __u64 media_write_latency; /* nanoseconds */
> + __u64 cache_read_latency; /* nanoseconds */
> + __u64 cache_write_latency; /* nanoseconds */
> +};
> +
> /* ioctl numbers */
> #define OCXL_PMEM_MAGIC 0x5C
> /* SCM devices */
> @@ -57,5 +73,6 @@ struct ioctl_ocxl_pmem_controller_dump_data {
> #define IOCTL_OCXL_PMEM_CONTROLLER_DUMP _IO(OCXL_PMEM_MAGIC, 0x02)
> #define IOCTL_OCXL_PMEM_CONTROLLER_DUMP_DATA _IOWR(OCXL_PMEM_MAGIC, 0x03, struct ioctl_ocxl_pmem_controller_dump_data)
> #define IOCTL_OCXL_PMEM_CONTROLLER_DUMP_COMPLETE _IO(OCXL_PMEM_MAGIC, 0x04)
> +#define IOCTL_OCXL_PMEM_CONTROLLER_STATS _IO(OCXL_PMEM_MAGIC, 0x05)
>
> #endif /* _UAPI_OCXL_SCM_H */
>

2020-03-04 11:02:11

by Frederic Barrat

[permalink] [raw]
Subject: Re: [PATCH v3 20/27] powerpc/powernv/pmem: Forward events to userspace



Le 21/02/2020 à 04:27, Alastair D'Silva a écrit :
> From: Alastair D'Silva <[email protected]>
>
> Some of the interrupts that the card generates are better handled
> by the userspace daemon, in particular:
> Controller Hardware/Firmware Fatal
> Controller Dump Available
> Error Log available
>
> This patch allows a userspace application to register an eventfd with
> the driver via SCM_IOCTL_EVENTFD to receive notifications of these
> interrupts.
>
> Userspace can then identify what events have occurred by calling
> SCM_IOCTL_EVENT_CHECK and checking against the SCM_IOCTL_EVENT_FOO
> masks.
>
> Signed-off-by: Alastair D'Silva <[email protected]>
> ---
> arch/powerpc/platforms/powernv/pmem/ocxl.c | 216 ++++++++++++++++++
> .../platforms/powernv/pmem/ocxl_internal.h | 5 +
> include/uapi/nvdimm/ocxl-pmem.h | 16 ++
> 3 files changed, 237 insertions(+)
>
> diff --git a/arch/powerpc/platforms/powernv/pmem/ocxl.c b/arch/powerpc/platforms/powernv/pmem/ocxl.c
> index 009d4fd29e7d..e46696d3cc36 100644
> --- a/arch/powerpc/platforms/powernv/pmem/ocxl.c
> +++ b/arch/powerpc/platforms/powernv/pmem/ocxl.c
> @@ -10,6 +10,7 @@
> #include <misc/ocxl.h>
> #include <linux/delay.h>
> #include <linux/ndctl.h>
> +#include <linux/eventfd.h>
> #include <linux/fs.h>
> #include <linux/mm_types.h>
> #include <linux/memory_hotplug.h>
> @@ -335,11 +336,22 @@ static void free_ocxlpmem(struct ocxlpmem *ocxlpmem)
> {
> int rc;
>
> + // Disable doorbells
> + (void)ocxl_global_mmio_set64(ocxlpmem->ocxl_afu, GLOBAL_MMIO_CHIEC,
> + OCXL_LITTLE_ENDIAN,
> + GLOBAL_MMIO_CHI_ALL);
> +
> if (ocxlpmem->nvdimm_bus)
> nvdimm_bus_unregister(ocxlpmem->nvdimm_bus);
>
> free_minor(ocxlpmem);
>
> + if (ocxlpmem->irq_addr[1])
> + iounmap(ocxlpmem->irq_addr[1]);
> +
> + if (ocxlpmem->irq_addr[0])
> + iounmap(ocxlpmem->irq_addr[0]);
> +
> if (ocxlpmem->cdev.owner)
> cdev_del(&ocxlpmem->cdev);
>
> @@ -443,6 +455,11 @@ static int file_release(struct inode *inode, struct file *file)
> {
> struct ocxlpmem *ocxlpmem = file->private_data;
>
> + if (ocxlpmem->ev_ctx) {
> + eventfd_ctx_put(ocxlpmem->ev_ctx);
> + ocxlpmem->ev_ctx = NULL;
> + }
> +
> ocxlpmem_put(ocxlpmem);
> return 0;
> }
> @@ -938,6 +955,51 @@ static int ioctl_controller_stats(struct ocxlpmem *ocxlpmem,
> return rc;
> }
>
> +static int ioctl_eventfd(struct ocxlpmem *ocxlpmem,
> + struct ioctl_ocxl_pmem_eventfd __user *uarg)
> +{
> + struct ioctl_ocxl_pmem_eventfd args;
> +
> + if (copy_from_user(&args, uarg, sizeof(args)))
> + return -EFAULT;
> +
> + if (ocxlpmem->ev_ctx)
> + return -EINVAL;


EBUSY?


> +
> + ocxlpmem->ev_ctx = eventfd_ctx_fdget(args.eventfd);
> + if (!ocxlpmem->ev_ctx)
> + return -EFAULT;


Why not use what eventfd_ctx_fdget() returned? (through some IS_ERR()
and PTR_ERR() convolution)


> +
> + return 0;
> +}
> +
> +static int ioctl_event_check(struct ocxlpmem *ocxlpmem, u64 __user *uarg)
> +{
> + u64 val = 0;
> + int rc;
> + u64 chi = 0;
> +
> + rc = ocxlpmem_chi(ocxlpmem, &chi);
> + if (rc < 0)
> + return rc;
> +
> + if (chi & GLOBAL_MMIO_CHI_ELA)
> + val |= IOCTL_OCXL_PMEM_EVENT_ERROR_LOG_AVAILABLE;
> +
> + if (chi & GLOBAL_MMIO_CHI_CDA)
> + val |= IOCTL_OCXL_PMEM_EVENT_CONTROLLER_DUMP_AVAILABLE;
> +
> + if (chi & GLOBAL_MMIO_CHI_CFFS)
> + val |= IOCTL_OCXL_PMEM_EVENT_FIRMWARE_FATAL;
> +
> + if (chi & GLOBAL_MMIO_CHI_CHFS)
> + val |= IOCTL_OCXL_PMEM_EVENT_HARDWARE_FATAL;
> +
> + rc = copy_to_user((u64 __user *) uarg, &val, sizeof(val));
> +


copy_to_user doesn't return an errno. Should be:

if (copy_to_user((u64 __user *) uarg, &val, sizeof(val)))
return -EFAULT;


> + return rc;
> +}
> +
> static long file_ioctl(struct file *file, unsigned int cmd, unsigned long args)
> {
> struct ocxlpmem *ocxlpmem = file->private_data;
> @@ -966,6 +1028,15 @@ static long file_ioctl(struct file *file, unsigned int cmd, unsigned long args)
> rc = ioctl_controller_stats(ocxlpmem,
> (struct ioctl_ocxl_pmem_controller_stats __user *)args);
> break;
> +
> + case IOCTL_OCXL_PMEM_EVENTFD:
> + rc = ioctl_eventfd(ocxlpmem,
> + (struct ioctl_ocxl_pmem_eventfd __user *)args);
> + break;
> +
> + case IOCTL_OCXL_PMEM_EVENT_CHECK:
> + rc = ioctl_event_check(ocxlpmem, (u64 __user *)args);
> + break;
> }
>
> return rc;
> @@ -1107,6 +1178,146 @@ static void dump_error_log(struct ocxlpmem *ocxlpmem)
> kfree(buf);
> }
>
> +static irqreturn_t imn0_handler(void *private)
> +{
> + struct ocxlpmem *ocxlpmem = private;
> + u64 chi = 0;
> +
> + (void)ocxlpmem_chi(ocxlpmem, &chi);
> +
> + if (chi & GLOBAL_MMIO_CHI_ELA) {
> + dev_warn(&ocxlpmem->dev, "Error log is available\n");
> +
> + if (ocxlpmem->ev_ctx)
> + eventfd_signal(ocxlpmem->ev_ctx, 1);
> + }
> +
> + if (chi & GLOBAL_MMIO_CHI_CDA) {
> + dev_warn(&ocxlpmem->dev, "Controller dump is available\n");
> +
> + if (ocxlpmem->ev_ctx)
> + eventfd_signal(ocxlpmem->ev_ctx, 1);
> + }
> +
> +


(at least) one empty line too many.


> + return IRQ_HANDLED;
> +}
> +
> +static irqreturn_t imn1_handler(void *private)
> +{
> + struct ocxlpmem *ocxlpmem = private;
> + u64 chi = 0;
> +
> + (void)ocxlpmem_chi(ocxlpmem, &chi);
> +
> + if (chi & (GLOBAL_MMIO_CHI_CFFS | GLOBAL_MMIO_CHI_CHFS)) {
> + dev_err(&ocxlpmem->dev,
> + "Controller status is fatal, chi=0x%llx, going offline\n", chi);
> +
> + if (ocxlpmem->nvdimm_bus) {
> + nvdimm_bus_unregister(ocxlpmem->nvdimm_bus);
> + ocxlpmem->nvdimm_bus = NULL;
> + }
> +
> + if (ocxlpmem->ev_ctx)
> + eventfd_signal(ocxlpmem->ev_ctx, 1);
> + }
> +
> + return IRQ_HANDLED;
> +}
> +
> +
> +/**
> + * ocxlpmem_setup_irq() - Set up the IRQs for the OpenCAPI Persistent Memory device
> + * @ocxlpmem: the device metadata
> + * Return: 0 on success, negative on failure
> + */
> +static int ocxlpmem_setup_irq(struct ocxlpmem *ocxlpmem)
> +{
> + int rc;
> + u64 irq_addr;
> +
> + rc = ocxl_afu_irq_alloc(ocxlpmem->ocxl_context, &ocxlpmem->irq_id[0]);
> + if (rc)
> + return rc;
> +
> + rc = ocxl_irq_set_handler(ocxlpmem->ocxl_context, ocxlpmem->irq_id[0],
> + imn0_handler, NULL, ocxlpmem);
> +
> + irq_addr = ocxl_afu_irq_get_addr(ocxlpmem->ocxl_context, ocxlpmem->irq_id[0]);
> + if (!irq_addr)
> + return -EINVAL;
> +
> + ocxlpmem->irq_addr[0] = ioremap(irq_addr, PAGE_SIZE);
> + if (!ocxlpmem->irq_addr[0])
> + return -EINVAL;
> +
> + rc = ocxl_global_mmio_write64(ocxlpmem->ocxl_afu, GLOBAL_MMIO_IMA0_OHP,
> + OCXL_LITTLE_ENDIAN,
> + (u64)ocxlpmem->irq_addr[0]);
> + if (rc)
> + goto out_irq0;
> +
> + rc = ocxl_global_mmio_write64(ocxlpmem->ocxl_afu, GLOBAL_MMIO_IMA0_CFP,
> + OCXL_LITTLE_ENDIAN, 0);
> + if (rc)
> + goto out_irq0;


That's a few lines of duplicate code. On the other hand, there's enough
varying parameters between the 2 interrupts that factorizing in a
subfunction would be slightly less readable. So duplicating is probably ok.



> + rc = ocxl_afu_irq_alloc(ocxlpmem->ocxl_context, &ocxlpmem->irq_id[1]);
> + if (rc)
> + goto out_irq0;
> +
> +
> + rc = ocxl_irq_set_handler(ocxlpmem->ocxl_context, ocxlpmem->irq_id[1],
> + imn1_handler, NULL, ocxlpmem);
> + if (rc)
> + goto out_irq0;
> +
> + irq_addr = ocxl_afu_irq_get_addr(ocxlpmem->ocxl_context, ocxlpmem->irq_id[1]);
> + if (!irq_addr) {
> + rc = -EFAULT;
> + goto out_irq0;
> + }
> +
> + ocxlpmem->irq_addr[1] = ioremap(irq_addr, PAGE_SIZE);
> + if (!ocxlpmem->irq_addr[1]) {
> + rc = -EINVAL;
> + goto out_irq0;
> + }
> +
> + rc = ocxl_global_mmio_write64(ocxlpmem->ocxl_afu, GLOBAL_MMIO_IMA1_OHP,
> + OCXL_LITTLE_ENDIAN,
> + (u64)ocxlpmem->irq_addr[1]);
> + if (rc)
> + goto out_irq1;
> +
> + rc = ocxl_global_mmio_write64(ocxlpmem->ocxl_afu, GLOBAL_MMIO_IMA1_CFP,
> + OCXL_LITTLE_ENDIAN, 0);
> + if (rc)
> + goto out_irq1;
> +
> + // Enable doorbells
> + rc = ocxl_global_mmio_set64(ocxlpmem->ocxl_afu, GLOBAL_MMIO_CHIE,
> + OCXL_LITTLE_ENDIAN,
> + GLOBAL_MMIO_CHI_ELA | GLOBAL_MMIO_CHI_CDA |
> + GLOBAL_MMIO_CHI_CFFS | GLOBAL_MMIO_CHI_CHFS |
> + GLOBAL_MMIO_CHI_NSCRA);


GLOBAL_MMIO_CHI_NSCRA doesn't seem to be handled in the handlers.



> + if (rc)
> + goto out_irq1;
> +
> + return 0;
> +
> +out_irq1:
> + iounmap(ocxlpmem->irq_addr[1]);
> + ocxlpmem->irq_addr[1] = NULL;
> +
> +out_irq0:
> + iounmap(ocxlpmem->irq_addr[0]);
> + ocxlpmem->irq_addr[0] = NULL;
> +
> + return rc;
> +}
> +
> /**
> * probe_function0() - Set up function 0 for an OpenCAPI persistent memory device
> * This is important as it enables templates higher than 0 across all other functions,
> @@ -1216,6 +1427,11 @@ static int probe(struct pci_dev *pdev, const struct pci_device_id *ent)
> goto err;
> }
>
> + if (ocxlpmem_setup_irq(ocxlpmem)) {
> + dev_err(&pdev->dev, "Could not set up OCXL IRQs\n");


Like with other patches, rc needs to be set.


> + goto err;
> + }
> +
> if (setup_command_metadata(ocxlpmem)) {
> dev_err(&pdev->dev, "Could not read OCXL command matada\n");
> goto err;
> diff --git a/arch/powerpc/platforms/powernv/pmem/ocxl_internal.h b/arch/powerpc/platforms/powernv/pmem/ocxl_internal.h
> index b953ee522ed4..927690f4888f 100644
> --- a/arch/powerpc/platforms/powernv/pmem/ocxl_internal.h
> +++ b/arch/powerpc/platforms/powernv/pmem/ocxl_internal.h
> @@ -103,6 +103,10 @@ struct ocxlpmem {
> struct pci_dev *pdev;
> struct cdev cdev;
> struct ocxl_fn *ocxl_fn;
> +#define SCM_IRQ_COUNT 2
> + int irq_id[SCM_IRQ_COUNT];
> + struct dev_pagemap irq_pgmap[SCM_IRQ_COUNT];


irq_pgmap is not used.


> + void *irq_addr[SCM_IRQ_COUNT];
> struct nd_interleave_set nd_set;
> struct nvdimm_bus_descriptor bus_desc;
> struct nvdimm_bus *nvdimm_bus;
> @@ -113,6 +117,7 @@ struct ocxlpmem {
> struct command_metadata ns_command;
> struct resource pmem_res;
> struct nd_region *nd_region;
> + struct eventfd_ctx *ev_ctx;
> char fw_version[8+1];
> u32 timeouts[ADMIN_COMMAND_MAX+1];
>
> diff --git a/include/uapi/nvdimm/ocxl-pmem.h b/include/uapi/nvdimm/ocxl-pmem.h
> index add223aa2fdb..988eb0bc413d 100644
> --- a/include/uapi/nvdimm/ocxl-pmem.h
> +++ b/include/uapi/nvdimm/ocxl-pmem.h
> @@ -66,6 +66,20 @@ struct ioctl_ocxl_pmem_controller_stats {
> __u64 cache_write_latency; /* nanoseconds */
> };
>
> +struct ioctl_ocxl_pmem_eventfd {
> + __s32 eventfd;
> + __u32 reserved;
> +};
> +
> +#ifndef BIT_ULL
> +#define BIT_ULL(nr) (1ULL << (nr))
> +#endif
> +
> +#define IOCTL_OCXL_PMEM_EVENT_CONTROLLER_DUMP_AVAILABLE BIT_ULL(0)
> +#define IOCTL_OCXL_PMEM_EVENT_ERROR_LOG_AVAILABLE BIT_ULL(1)
> +#define IOCTL_OCXL_PMEM_EVENT_HARDWARE_FATAL BIT_ULL(2)
> +#define IOCTL_OCXL_PMEM_EVENT_FIRMWARE_FATAL BIT_ULL(3)
> +


I'm not fond of adding a macro with such a generic name as BIT_ULL() in
a user header file. What's wrong with:

#define IOCTL_OCXL_PMEM_EVENT_CONTROLLER_DUMP_AVAILABLE 0x1
#define IOCTL_OCXL_PMEM_EVENT_ERROR_LOG_AVAILABLE 0x2
#define IOCTL_OCXL_PMEM_EVENT_HARDWARE_FATAL 0x4
#define IOCTL_OCXL_PMEM_EVENT_FIRMWARE_FATAL 0x8


Fred


> /* ioctl numbers */
> #define OCXL_PMEM_MAGIC 0x5C
> /* SCM devices */
> @@ -74,5 +88,7 @@ struct ioctl_ocxl_pmem_controller_stats {
> #define IOCTL_OCXL_PMEM_CONTROLLER_DUMP_DATA _IOWR(OCXL_PMEM_MAGIC, 0x03, struct ioctl_ocxl_pmem_controller_dump_data)
> #define IOCTL_OCXL_PMEM_CONTROLLER_DUMP_COMPLETE _IO(OCXL_PMEM_MAGIC, 0x04)
> #define IOCTL_OCXL_PMEM_CONTROLLER_STATS _IO(OCXL_PMEM_MAGIC, 0x05)
> +#define IOCTL_OCXL_PMEM_EVENTFD _IOW(OCXL_PMEM_MAGIC, 0x06, struct ioctl_ocxl_pmem_eventfd)
> +#define IOCTL_OCXL_PMEM_EVENT_CHECK _IOR(OCXL_PMEM_MAGIC, 0x07, __u64)
>
> #endif /* _UAPI_OCXL_SCM_H */
>

2020-03-04 11:07:33

by Frederic Barrat

[permalink] [raw]
Subject: Re: [PATCH v3 21/27] powerpc/powernv/pmem: Add an IOCTL to request controller health & perf data



Le 28/02/2020 à 07:12, Andrew Donnellan a écrit :
> On 21/2/20 2:27 pm, Alastair D'Silva wrote:
>> From: Alastair D'Silva <[email protected]>
>>
>> When health & performance data is requested from the controller,
>> it responds with an error log containing the requested information.
>>
>> This patch allows the request to me issued via an IOCTL.
>
> A better explanation would be good - this IOCTL triggers a request to
> the controller to collect controller health/perf data, and the
> controller will later respond with an error log that can be picked up
> via the error log IOCTL that you've defined earlier.

And even more precisely (to also check my understanding):

> this IOCTL triggers a request to
> the controller to collect controller health/perf data, and the
> controller will later respond

by raising an interrupt to let the user app know that

> an error log that can be picked up
> via the error log IOCTL that you've defined earlier.


The rest of the patch looks ok to me.

Fred

2020-03-05 00:47:46

by Andrew Donnellan

[permalink] [raw]
Subject: Re: [PATCH v3 19/27] powerpc/powernv/pmem: Add an IOCTL to report controller statistics

On 21/2/20 2:27 pm, Alastair D'Silva wrote:
> From: Alastair D'Silva <[email protected]>
>
> The controller can report a number of statistics that are useful
> in evaluating the performance and reliability of the card.
>
> This patch exposes this information via an IOCTL.
>
> Signed-off-by: Alastair D'Silva <[email protected]>
> ---
> arch/powerpc/platforms/powernv/pmem/ocxl.c | 185 +++++++++++++++++++++
> include/uapi/nvdimm/ocxl-pmem.h | 17 ++
> 2 files changed, 202 insertions(+)
>
> diff --git a/arch/powerpc/platforms/powernv/pmem/ocxl.c b/arch/powerpc/platforms/powernv/pmem/ocxl.c
> index 2cabafe1fc58..009d4fd29e7d 100644
> --- a/arch/powerpc/platforms/powernv/pmem/ocxl.c
> +++ b/arch/powerpc/platforms/powernv/pmem/ocxl.c
> @@ -758,6 +758,186 @@ static int ioctl_controller_dump_complete(struct ocxlpmem *ocxlpmem)
> GLOBAL_MMIO_HCI_CONTROLLER_DUMP_COLLECTED);
> }
>
> +/**
> + * controller_stats_header_parse() - Parse the first 64 bits of the controller stats admin command response
> + * @ocxlpmem: the device metadata
> + * @length: out, returns the number of bytes in the response (excluding the 64 bit header)
> + */
> +static int controller_stats_header_parse(struct ocxlpmem *ocxlpmem,
> + u32 *length)
> +{
> + int rc;
> + u64 val;
> +
> + u16 data_identifier;
> + u32 data_length;
> +
> + rc = ocxl_global_mmio_read64(ocxlpmem->ocxl_afu,
> + ocxlpmem->admin_command.data_offset,
> + OCXL_LITTLE_ENDIAN, &val);
> + if (rc)
> + return rc;
> +
> + data_identifier = val >> 48;
> + data_length = val & 0xFFFFFFFF;
> +
> + if (data_identifier != 0x4353) { // 'CS'
> + dev_err(&ocxlpmem->dev,
> + "Bad data identifier for controller stats, expected 'CS', got '%-.*s'\n",
> + 2, (char *)&data_identifier);
> + return -EINVAL;

Same comment as earlier patches re EINVAL

> + }
> +
> + *length = data_length;
> + return 0;
> +}
> +
> +static int ioctl_controller_stats(struct ocxlpmem *ocxlpmem,
> + struct ioctl_ocxl_pmem_controller_stats __user *uarg)
> +{
> + struct ioctl_ocxl_pmem_controller_stats args;
> + u32 length;
> + int rc;
> + u64 val;
> +
> + memset(&args, '\0', sizeof(args));
> +
> + mutex_lock(&ocxlpmem->admin_command.lock);
> +
> + rc = admin_command_request(ocxlpmem, ADMIN_COMMAND_CONTROLLER_STATS);
> + if (rc)
> + goto out;
> +
> + rc = ocxl_global_mmio_write64(ocxlpmem->ocxl_afu,
> + ocxlpmem->admin_command.request_offset + 0x08,
> + OCXL_LITTLE_ENDIAN, 0);
> + if (rc)
> + goto out;
> +
> + rc = admin_command_execute(ocxlpmem);
> + if (rc)
> + goto out;
> +
> +
> + rc = admin_command_complete_timeout(ocxlpmem,
> + ADMIN_COMMAND_CONTROLLER_STATS);
> + if (rc < 0) {
> + dev_warn(&ocxlpmem->dev, "Controller stats timed out\n");
> + goto out;
> + }
> +
> + rc = admin_response(ocxlpmem);
> + if (rc < 0)
> + goto out;
> + if (rc != STATUS_SUCCESS) {
> + warn_status(ocxlpmem,
> + "Unexpected status from controller stats", rc);
> + goto out;
> + }
> +
> + rc = controller_stats_header_parse(ocxlpmem, &length);
> + if (rc)
> + goto out;
> +
> + if (length != 0x140)
> + warn_status(ocxlpmem,
> + "Unexpected length for controller stats data, expected 0x140, got 0x%x",
> + length);

Might be worth a comment to explain where 0x140 comes from (it looks
correct from my reading of the spec)

> +
> + rc = ocxl_global_mmio_read64(ocxlpmem->ocxl_afu,
> + ocxlpmem->admin_command.data_offset + 0x08 + 0x08,
> + OCXL_LITTLE_ENDIAN, &val);
> + if (rc)
> + goto out;
> +
> + args.reset_count = val >> 32;
> + args.reset_uptime = val & 0xFFFFFFFF;
> +
> + rc = ocxl_global_mmio_read64(ocxlpmem->ocxl_afu,
> + ocxlpmem->admin_command.data_offset + 0x08 + 0x10,
> + OCXL_LITTLE_ENDIAN, &val);
> + if (rc)
> + goto out;
> +
> + args.power_on_uptime = val >> 32;

We're not collecting life remaining?

> +
> + rc = ocxl_global_mmio_read64(ocxlpmem->ocxl_afu,
> + ocxlpmem->admin_command.data_offset + 0x08 + 0x40 + 0x08,
> + OCXL_LITTLE_ENDIAN, &args.host_load_count);

My reading of the spec says HLC is at +0x10

> + if (rc)
> + goto out;
> +
> + rc = ocxl_global_mmio_read64(ocxlpmem->ocxl_afu,
> + ocxlpmem->admin_command.data_offset + 0x08 + 0x40 + 0x10,
> + OCXL_LITTLE_ENDIAN, &args.host_store_count);

HSC at +0x18

> + if (rc)
> + goto out;
> +
> + rc = ocxl_global_mmio_read64(ocxlpmem->ocxl_afu,
> + ocxlpmem->admin_command.data_offset + 0x08 + 0x40 + 0x18,
> + OCXL_LITTLE_ENDIAN, &args.media_read_count);

MRC is at +0x50

And you're missing CRU, HLD, HSD

> + if (rc)
> + goto out;
> +
> + rc = ocxl_global_mmio_read64(ocxlpmem->ocxl_afu,
> + ocxlpmem->admin_command.data_offset + 0x08 + 0x40 + 0x20,
> + OCXL_LITTLE_ENDIAN, &args.media_write_count);

MWC at +0x58

> + if (rc)
> + goto out;
> +
> + rc = ocxl_global_mmio_read64(ocxlpmem->ocxl_afu,
> + ocxlpmem->admin_command.data_offset + 0x08 + 0x40 + 0x28,
> + OCXL_LITTLE_ENDIAN, &args.cache_hit_count);

CRHC at +0x90

> + if (rc)
> + goto out;
> +
> + rc = ocxl_global_mmio_read64(ocxlpmem->ocxl_afu,
> + ocxlpmem->admin_command.data_offset + 0x08 + 0x40 + 0x30,
> + OCXL_LITTLE_ENDIAN, &args.cache_miss_count);

This field doesn't seem to exist at all in my copy of the spec

> + if (rc)
> + goto out;
> +
> + rc = ocxl_global_mmio_read64(ocxlpmem->ocxl_afu,
> + ocxlpmem->admin_command.data_offset + 0x08 + 0x40 + 0x38,
> + OCXL_LITTLE_ENDIAN, &args.media_read_latency);

Nor this one

> + if (rc)
> + goto out;
> +
> + rc = ocxl_global_mmio_read64(ocxlpmem->ocxl_afu,
> + ocxlpmem->admin_command.data_offset + 0x08 + 0x40 + 0x40,
> + OCXL_LITTLE_ENDIAN, &args.media_write_latency);

Nor this one

> + if (rc)
> + goto out;
> +
> + rc = ocxl_global_mmio_read64(ocxlpmem->ocxl_afu,
> + ocxlpmem->admin_command.data_offset + 0x08 + 0x40 + 0x48,
> + OCXL_LITTLE_ENDIAN, &args.cache_read_latency);

Nor this one

> + if (rc)
> + goto out;
> +
> + rc = ocxl_global_mmio_read64(ocxlpmem->ocxl_afu,
> + ocxlpmem->admin_command.data_offset + 0x08 + 0x40 + 0x50,
> + OCXL_LITTLE_ENDIAN, &args.cache_write_latency);

Nor this one

> + if (rc)
> + goto out;
> +
> + if (copy_to_user(uarg, &args, sizeof(args))) {
> + rc = -EFAULT;
> + goto out;
> + }
> +
> + rc = admin_response_handled(ocxlpmem);
> + if (rc)
> + goto out;
> +
> + rc = 0;
> + goto out;

Per Fred this pattern isn't common in the kernel, but perhaps this is
just personal taste

> +
> +out:
> + mutex_unlock(&ocxlpmem->admin_command.lock);
> + return rc;
> +}
> +
> static long file_ioctl(struct file *file, unsigned int cmd, unsigned long args)
> {
> struct ocxlpmem *ocxlpmem = file->private_data;
> @@ -781,6 +961,11 @@ static long file_ioctl(struct file *file, unsigned int cmd, unsigned long args)
> case IOCTL_OCXL_PMEM_CONTROLLER_DUMP_COMPLETE:
> rc = ioctl_controller_dump_complete(ocxlpmem);
> break;
> +
> + case IOCTL_OCXL_PMEM_CONTROLLER_STATS:
> + rc = ioctl_controller_stats(ocxlpmem,
> + (struct ioctl_ocxl_pmem_controller_stats __user *)args);
> + break;
> }
>
> return rc;
> diff --git a/include/uapi/nvdimm/ocxl-pmem.h b/include/uapi/nvdimm/ocxl-pmem.h
> index d4d8512d03f7..add223aa2fdb 100644
> --- a/include/uapi/nvdimm/ocxl-pmem.h
> +++ b/include/uapi/nvdimm/ocxl-pmem.h
> @@ -50,6 +50,22 @@ struct ioctl_ocxl_pmem_controller_dump_data {
> __u64 reserved[8];
> };
>
> +struct ioctl_ocxl_pmem_controller_stats {
> + __u32 reset_count;
> + __u32 reset_uptime; /* seconds */
> + __u32 power_on_uptime; /* seconds */
> + __u64 host_load_count;
> + __u64 host_store_count;
> + __u64 media_read_count;
> + __u64 media_write_count;
> + __u64 cache_hit_count;
> + __u64 cache_miss_count;
> + __u64 media_read_latency; /* nanoseconds */
> + __u64 media_write_latency; /* nanoseconds */
> + __u64 cache_read_latency; /* nanoseconds */
> + __u64 cache_write_latency; /* nanoseconds */
> +};
> +
> /* ioctl numbers */
> #define OCXL_PMEM_MAGIC 0x5C
> /* SCM devices */
> @@ -57,5 +73,6 @@ struct ioctl_ocxl_pmem_controller_dump_data {
> #define IOCTL_OCXL_PMEM_CONTROLLER_DUMP _IO(OCXL_PMEM_MAGIC, 0x02)
> #define IOCTL_OCXL_PMEM_CONTROLLER_DUMP_DATA _IOWR(OCXL_PMEM_MAGIC, 0x03, struct ioctl_ocxl_pmem_controller_dump_data)
> #define IOCTL_OCXL_PMEM_CONTROLLER_DUMP_COMPLETE _IO(OCXL_PMEM_MAGIC, 0x04)
> +#define IOCTL_OCXL_PMEM_CONTROLLER_STATS _IO(OCXL_PMEM_MAGIC, 0x05)
>
> #endif /* _UAPI_OCXL_SCM_H */
>

--
Andrew Donnellan OzLabs, ADL Canberra
[email protected] IBM Australia Limited

2020-03-05 03:41:12

by Alastair D'Silva

[permalink] [raw]
Subject: Re: [PATCH v3 16/27] powerpc/powernv/pmem: Register a character device for userspace to interact with

On Tue, 2020-03-03 at 10:28 +0100, Frederic Barrat wrote:
>
> Le 21/02/2020 à 04:27, Alastair D'Silva a écrit :
> > From: Alastair D'Silva <[email protected]>
> >
> > This patch introduces a character device (/dev/ocxl-scmX) which
> > further
> > patches will use to interact with userspace.
> >
> > Signed-off-by: Alastair D'Silva <[email protected]>
> > ---
> > arch/powerpc/platforms/powernv/pmem/ocxl.c | 116
> > +++++++++++++++++-
> > .../platforms/powernv/pmem/ocxl_internal.h | 2 +
> > 2 files changed, 116 insertions(+), 2 deletions(-)
> >
> > diff --git a/arch/powerpc/platforms/powernv/pmem/ocxl.c
> > b/arch/powerpc/platforms/powernv/pmem/ocxl.c
> > index b8bd7e703b19..63109a870d2c 100644
> > --- a/arch/powerpc/platforms/powernv/pmem/ocxl.c
> > +++ b/arch/powerpc/platforms/powernv/pmem/ocxl.c
> > @@ -10,6 +10,7 @@
> > #include <misc/ocxl.h>
> > #include <linux/delay.h>
> > #include <linux/ndctl.h>
> > +#include <linux/fs.h>
> > #include <linux/mm_types.h>
> > #include <linux/memory_hotplug.h>
> > #include "ocxl_internal.h"
> > @@ -339,6 +340,9 @@ static void free_ocxlpmem(struct ocxlpmem
> > *ocxlpmem)
> >
> > free_minor(ocxlpmem);
> >
> > + if (ocxlpmem->cdev.owner)
> > + cdev_del(&ocxlpmem->cdev);
> > +
> > if (ocxlpmem->metadata_addr)
> > devm_memunmap(&ocxlpmem->dev, ocxlpmem->metadata_addr);
> >
> > @@ -396,6 +400,70 @@ static int ocxlpmem_register(struct ocxlpmem
> > *ocxlpmem)
> > return device_register(&ocxlpmem->dev);
> > }
> >
> > +static void ocxlpmem_put(struct ocxlpmem *ocxlpmem)
> > +{
> > + put_device(&ocxlpmem->dev);
> > +}
> > +
> > +static struct ocxlpmem *ocxlpmem_get(struct ocxlpmem *ocxlpmem)
> > +{
> > + return (get_device(&ocxlpmem->dev) == NULL) ? NULL : ocxlpmem;
> > +}
> > +
> > +static struct ocxlpmem *find_and_get_ocxlpmem(dev_t devno)
> > +{
> > + struct ocxlpmem *ocxlpmem;
> > + int minor = MINOR(devno);
> > + /*
> > + * We don't declare an RCU critical section here, as our AFU
> > + * is protected by a reference counter on the device. By the
> > time the
> > + * minor number of a device is removed from the idr, the ref
> > count of
> > + * the device is already at 0, so no user API will access that
> > AFU and
> > + * this function can't return it.
> > + */
>
> I fixed something related in the ocxl driver (which had enough
> changes
> with the introduction of the "info" device to make a similar comment
> become wrong). See commit a58d37bce0d21. The issue is handling a
> simultaneous open() and removal of the device through /sysfs as best
> we can.
>
> We are on a file open path and it's not like we're going to have a
> thousand clients, so performance is not that critical. We can take
> the
> mutex before searching in the IDR and release it after we increment
> the
> reference count on the device.
> But that's not enough: we could still find the device in the IDR
> while
> it is being removed in free_ocxlpmem(). I believe the only safe way
> to
> address it is by removing the user-facing APIs (the char device)
> before
> calling device_unregister(). So that it's not possible to find the
> device in file_open() if it's in the middle of being removed.
>
> Fred
>
>

Ok, I'll replicate that patch & follow your advice.

> > + ocxlpmem = idr_find(&minors_idr, minor);
> > + if (ocxlpmem)
> > + ocxlpmem_get(ocxlpmem);
> > + return ocxlpmem;
> > +}
> > +
> > +static int file_open(struct inode *inode, struct file *file)
> > +{
> > + struct ocxlpmem *ocxlpmem;
> > +
> > + ocxlpmem = find_and_get_ocxlpmem(inode->i_rdev);
> > + if (!ocxlpmem)
> > + return -ENODEV;
> > +
> > + file->private_data = ocxlpmem;
> > + return 0;
> > +}
> > +
> > +static int file_release(struct inode *inode, struct file *file)
> > +{
> > + struct ocxlpmem *ocxlpmem = file->private_data;
> > +
> > + ocxlpmem_put(ocxlpmem);
> > + return 0;
> > +}
> > +
> > +static const struct file_operations fops = {
> > + .owner = THIS_MODULE,
> > + .open = file_open,
> > + .release = file_release,
> > +};
> > +
> > +/**
> > + * create_cdev() - Create the chardev in /dev for the device
> > + * @ocxlpmem: the SCM metadata
> > + * Return: 0 on success, negative on failure
> > + */
> > +static int create_cdev(struct ocxlpmem *ocxlpmem)
> > +{
> > + cdev_init(&ocxlpmem->cdev, &fops);
> > + return cdev_add(&ocxlpmem->cdev, ocxlpmem->dev.devt, 1);
> > +}
> > +
> > /**
> > * ocxlpmem_remove() - Free an OpenCAPI persistent memory device
> > * @pdev: the PCI device information struct
> > @@ -572,6 +640,11 @@ static int probe(struct pci_dev *pdev, const
> > struct pci_device_id *ent)
> > goto err;
> > }
> >
> > + if (create_cdev(ocxlpmem)) {
> > + dev_err(&pdev->dev, "Could not create character
> > device\n");
> > + goto err;
> > + }
>
> As already mentioned in a previous patch, we branch to the err label
> so
> rc needs to be set to a valid error.
>

Ok

>
>
> > +
> > elapsed = 0;
> > timeout = ocxlpmem->readiness_timeout + ocxlpmem-
> > >memory_available_timeout;
> > while (!is_usable(ocxlpmem, false)) {
> > @@ -613,20 +686,59 @@ static struct pci_driver pci_driver = {
> > .shutdown = ocxlpmem_remove,
> > };
> >
> > +static int file_init(void)
> > +{
> > + int rc;
> > +
> > + mutex_init(&minors_idr_lock);
> > + idr_init(&minors_idr);
> > +
> > + rc = alloc_chrdev_region(&ocxlpmem_dev, 0, NUM_MINORS, "ocxl-
> > pmem");
> > + if (rc) {
> > + idr_destroy(&minors_idr);
> > + pr_err("Unable to allocate OpenCAPI persistent memory
> > major number: %d\n", rc);
> > + return rc;
> > + }
> > +
> > + ocxlpmem_class = class_create(THIS_MODULE, "ocxl-pmem");
> > + if (IS_ERR(ocxlpmem_class)) {
> > + idr_destroy(&minors_idr);
> > + pr_err("Unable to create ocxl-pmem class\n");
> > + unregister_chrdev_region(ocxlpmem_dev, NUM_MINORS);
> > + return PTR_ERR(ocxlpmem_class);
> > + }
> > +
> > + return 0;
> > +}
> > +
> > +static void file_exit(void)
> > +{
> > + class_destroy(ocxlpmem_class);
> > + unregister_chrdev_region(ocxlpmem_dev, NUM_MINORS);
> > + idr_destroy(&minors_idr);
> > +}
> > +
> > static int __init ocxlpmem_init(void)
> > {
> > - int rc = 0;
> > + int rc;
> >
> > - rc = pci_register_driver(&pci_driver);
> > + rc = file_init();
> > if (rc)
> > return rc;
> >
> > + rc = pci_register_driver(&pci_driver);
> > + if (rc) {
> > + file_exit();
> > + return rc;
> > + }
> > +
> > return 0;
> > }
> >
> > static void ocxlpmem_exit(void)
> > {
> > pci_unregister_driver(&pci_driver);
> > + file_exit();
> > }
> >
> > module_init(ocxlpmem_init);
> > diff --git a/arch/powerpc/platforms/powernv/pmem/ocxl_internal.h
> > b/arch/powerpc/platforms/powernv/pmem/ocxl_internal.h
> > index 28e2020f6355..d2d81fec7bb1 100644
> > --- a/arch/powerpc/platforms/powernv/pmem/ocxl_internal.h
> > +++ b/arch/powerpc/platforms/powernv/pmem/ocxl_internal.h
> > @@ -2,6 +2,7 @@
> > // Copyright 2019 IBM Corp.
> >
> > #include <linux/pci.h>
> > +#include <linux/cdev.h>
> > #include <misc/ocxl.h>
> > #include <linux/libnvdimm.h>
> > #include <linux/mm.h>
> > @@ -99,6 +100,7 @@ struct ocxlpmem_function0 {
> > struct ocxlpmem {
> > struct device dev;
> > struct pci_dev *pdev;
> > + struct cdev cdev;
> > struct ocxl_fn *ocxl_fn;
> > struct nd_interleave_set nd_set;
> > struct nvdimm_bus_descriptor bus_desc;
> >
--
Alastair D'Silva
Open Source Developer
Linux Technology Centre, IBM Australia
mob: 0423 762 819

2020-03-05 23:40:39

by Alastair D'Silva

[permalink] [raw]
Subject: Re: [PATCH v3 18/27] powerpc/powernv/pmem: Add controller dump IOCTLs

On Tue, 2020-03-03 at 19:04 +0100, Frederic Barrat wrote:
>
> Le 21/02/2020 à 04:27, Alastair D'Silva a écrit :
> > From: Alastair D'Silva <[email protected]>
> >
> > This patch adds IOCTLs to allow userspace to request & fetch dumps
> > of the internal controller state.
> >
> > This is useful during debugging or when a fatal error on the
> > controller
> > has occurred.
> >
> > Signed-off-by: Alastair D'Silva <[email protected]>
> > ---
> > arch/powerpc/platforms/powernv/pmem/ocxl.c | 132
> > +++++++++++++++++++++
> > include/uapi/nvdimm/ocxl-pmem.h | 15 +++
> > 2 files changed, 147 insertions(+)
> >
> > diff --git a/arch/powerpc/platforms/powernv/pmem/ocxl.c
> > b/arch/powerpc/platforms/powernv/pmem/ocxl.c
> > index 2b64504f9129..2cabafe1fc58 100644
> > --- a/arch/powerpc/platforms/powernv/pmem/ocxl.c
> > +++ b/arch/powerpc/platforms/powernv/pmem/ocxl.c
> > @@ -640,6 +640,124 @@ static int ioctl_error_log(struct ocxlpmem
> > *ocxlpmem,
> > return 0;
> > }
> >
> > +static int ioctl_controller_dump_data(struct ocxlpmem *ocxlpmem,
> > + struct ioctl_ocxl_pmem_controller_dump_data __user
> > *uarg)
> > +{
> > + struct ioctl_ocxl_pmem_controller_dump_data args;
> > + u16 i;
> > + u64 val;
> > + int rc;
> > +
> > + if (copy_from_user(&args, uarg, sizeof(args)))
> > + return -EFAULT;
> > +
> > + if (args.buf_size % 8)
> > + return -EINVAL;
> > +
> > + if (args.buf_size > ocxlpmem->admin_command.data_size)
> > + return -EINVAL;
> > +
> > + mutex_lock(&ocxlpmem->admin_command.lock);
> > +
> > + rc = admin_command_request(ocxlpmem,
> > ADMIN_COMMAND_CONTROLLER_DUMP);
> > + if (rc)
> > + goto out;
> > +
> > + val = ((u64)args.offset) << 32;
> > + val |= args.buf_size;
> > + rc = ocxl_global_mmio_write64(ocxlpmem->ocxl_afu,
> > + ocxlpmem-
> > >admin_command.request_offset + 0x08,
> > + OCXL_LITTLE_ENDIAN, val);
> > + if (rc)
> > + goto out;
> > +
> > + rc = admin_command_execute(ocxlpmem);
> > + if (rc)
> > + goto out;
> > +
> > + rc = admin_command_complete_timeout(ocxlpmem,
> > + ADMIN_COMMAND_CONTROLLER_DU
> > MP);
> > + if (rc < 0) {
> > + dev_warn(&ocxlpmem->dev, "Controller dump timed
> > out\n");
> > + goto out;
> > + }
> > +
> > + rc = admin_response(ocxlpmem);
> > + if (rc < 0)
> > + goto out;
> > + if (rc != STATUS_SUCCESS) {
> > + warn_status(ocxlpmem,
> > + "Unexpected status from retrieve error
> > log",
> > + rc);
> > + goto out;
> > + }
>
>
> It would help if there was a comment indicating how the 3 ioctls are
> used. My understanding is that the userland is:
> - requesting the controller to prepare a state dump
> - then one or more ioctls to fetch the data. The number of calls
> required to get the full state really depends on the size of the
> buffer
> passed by user
> - a last ioctl to tell the controller that we're done, presumably to
> let
> it free some resources.
>

Ok, will add it to the blurb.
>
> > +
> > + for (i = 0; i < args.buf_size; i += 8) {
> > + u64 val;
> > +
> > + rc = ocxl_global_mmio_read64(ocxlpmem->ocxl_afu,
> > + ocxlpmem-
> > >admin_command.data_offset + i,
> > + OCXL_HOST_ENDIAN, &val);
> > + if (rc)
> > + goto out;
> > +
> > + if (copy_to_user(&args.buf[i], &val, sizeof(u64))) {
> > + rc = -EFAULT;
> > + goto out;
> > + }
> > + }
> > +
> > + if (copy_to_user(uarg, &args, sizeof(args))) {
> > + rc = -EFAULT;
> > + goto out;
> > + }
> > +
> > + rc = admin_response_handled(ocxlpmem);
> > + if (rc)
> > + goto out;
> > +
> > +out:
> > + mutex_unlock(&ocxlpmem->admin_command.lock);
> > + return rc;
> > +}
> > +
> > +int request_controller_dump(struct ocxlpmem *ocxlpmem)
> > +{
> > + int rc;
> > + u64 busy = 1;
> > +
> > + rc = ocxl_global_mmio_set64(ocxlpmem->ocxl_afu,
> > GLOBAL_MMIO_CHIC,
> > + OCXL_LITTLE_ENDIAN,
> > + GLOBAL_MMIO_CHI_CDA);
> > +
>
> rc is not checked here.

Whoops

>
>
> > +
> > + rc = ocxl_global_mmio_set64(ocxlpmem->ocxl_afu,
> > GLOBAL_MMIO_HCI,
> > + OCXL_LITTLE_ENDIAN,
> > + GLOBAL_MMIO_HCI_CONTROLLER_DUMP);
> > + if (rc)
> > + return rc;
> > +
> > + while (busy) {
> > + rc = ocxl_global_mmio_read64(ocxlpmem->ocxl_afu,
> > + GLOBAL_MMIO_HCI,
> > + OCXL_LITTLE_ENDIAN,
> > &busy);
> > + if (rc)
> > + return rc;
> > +
> > + busy &= GLOBAL_MMIO_HCI_CONTROLLER_DUMP;
>
> Setting 'busy' doesn't hurt, but it's not really useful, is it?
>
> We should add some kind of timeout so that if the controller hits an
> issue, we don't spin in kernel space endlessly.
>
>

Here we are polling the controller dump bit of the HCI register until
the controller clears it - that line is masking off the bits we don't
care about.

I'll talk to the firmware team about adding a timeout for that to the
spec so we know how long to wait for before giving up.

>
> > + cond_resched();
> > + }
> > +
> > + return 0;
> > +}

> > +
> > +static int ioctl_controller_dump_complete(struct ocxlpmem
> > *ocxlpmem)
> > +{
> > + return ocxl_global_mmio_set64(ocxlpmem->ocxl_afu,
> > GLOBAL_MMIO_HCI,
> > + OCXL_LITTLE_ENDIAN,
> > + GLOBAL_MMIO_HCI_CONTROLLER_DUMP_COL
> > LECTED);
> > +}
> > +
> > static long file_ioctl(struct file *file, unsigned int cmd,
> > unsigned long args)
> > {
> > struct ocxlpmem *ocxlpmem = file->private_data;
> > @@ -650,7 +768,21 @@ static long file_ioctl(struct file *file,
> > unsigned int cmd, unsigned long args)
> > rc = ioctl_error_log(ocxlpmem,
> > (struct ioctl_ocxl_pmem_error_log
> > __user *)args);
> > break;
> > +
> > + case IOCTL_OCXL_PMEM_CONTROLLER_DUMP:
> > + rc = request_controller_dump(ocxlpmem);
> > + break;
> > +
> > + case IOCTL_OCXL_PMEM_CONTROLLER_DUMP_DATA:
> > + rc = ioctl_controller_dump_data(ocxlpmem,
> > + (struct
> > ioctl_ocxl_pmem_controller_dump_data __user *)args);
> > + break;
> > +
> > + case IOCTL_OCXL_PMEM_CONTROLLER_DUMP_COMPLETE:
> > + rc = ioctl_controller_dump_complete(ocxlpmem);
> > + break;
> > }
> > +
> > return rc;
> > }
> >
> > diff --git a/include/uapi/nvdimm/ocxl-pmem.h
> > b/include/uapi/nvdimm/ocxl-pmem.h
> > index b10f8ac0c20f..d4d8512d03f7 100644
> > --- a/include/uapi/nvdimm/ocxl-pmem.h
> > +++ b/include/uapi/nvdimm/ocxl-pmem.h
> > @@ -38,9 +38,24 @@ struct ioctl_ocxl_pmem_error_log {
> > __u8 *buf; /* pointer to output buffer */
> > };
> >
> > +struct ioctl_ocxl_pmem_controller_dump_data {
> > + __u8 *buf; /* pointer to output buffer */
>
> We only support 64-bit user app on powerpc, but using a pointer type
> in
> a kernel ABI is unusual. We should use a know size like __u64.
> (also applies to buf pointer in struct ioctl_ocxl_pmem_error_log
> from
> previous patch too)
>
> The rest of the structure will also be padded by the compiler, which
> we
> should avoid.
>
> Fred
>

Ok, I'll co-erce the pointers into a __u64.

>
>
> > + __u16 buf_size; /* in/out, buffer size provided/required.
> > + * If required is greater than provided, the
> > buffer
> > + * will be truncated to the amount provided. If
> > its
> > + * less, then only the required bytes will be
> > populated.
> > + * If it is 0, then there is no more dump data
> > available.
> > + */
> > + __u32 offset; /* in, Offset within the dump */
> > + __u64 reserved[8];
> > +};
> > +
> > /* ioctl numbers */
> > #define OCXL_PMEM_MAGIC 0x5C
> > /* SCM devices */
> > #define IOCTL_OCXL_PMEM_ERROR_LOG _IOWR(OCXL_PMEM
> > _MAGIC, 0x01, struct ioctl_ocxl_pmem_error_log)
> > +#define IOCTL_OCXL_PMEM_CONTROLLER_DUMP _IO(OCX
> > L_PMEM_MAGIC, 0x02)
> > +#define IOCTL_OCXL_PMEM_CONTROLLER_DUMP_DATA _IOWR(O
> > CXL_PMEM_MAGIC, 0x03, struct ioctl_ocxl_pmem_controller_dump_data)
> > +#define IOCTL_OCXL_PMEM_CONTROLLER_DUMP_COMPLETE _IO(OCXL_PMEM_M
> > AGIC, 0x04)
> >
> > #endif /* _UAPI_OCXL_SCM_H */
> >
--
Alastair D'Silva
Open Source Developer
Linux Technology Centre, IBM Australia
mob: 0423 762 819

2020-03-06 03:35:30

by Alastair D'Silva

[permalink] [raw]
Subject: Re: [PATCH v3 18/27] powerpc/powernv/pmem: Add controller dump IOCTLs

On Wed, 2020-03-04 at 17:53 +1100, Andrew Donnellan wrote:
> On 21/2/20 2:27 pm, Alastair D'Silva wrote:
> > +static int ioctl_controller_dump_data(struct ocxlpmem *ocxlpmem,
> > + struct ioctl_ocxl_pmem_controller_dump_data __user
> > *uarg)
> > +{
> > + struct ioctl_ocxl_pmem_controller_dump_data args;
> > + u16 i;
> > + u64 val;
> > + int rc;
> > +
> > + if (copy_from_user(&args, uarg, sizeof(args)))
> > + return -EFAULT;
> > +
> > + if (args.buf_size % 8)
> > + return -EINVAL;
> > +
> > + if (args.buf_size > ocxlpmem->admin_command.data_size)
> > + return -EINVAL;
> > +
> > + mutex_lock(&ocxlpmem->admin_command.lock);
> > +
> > + rc = admin_command_request(ocxlpmem,
> > ADMIN_COMMAND_CONTROLLER_DUMP);
> > + if (rc)
> > + goto out;
> > +
> > + val = ((u64)args.offset) << 32;
> > + val |= args.buf_size;
> > + rc = ocxl_global_mmio_write64(ocxlpmem->ocxl_afu,
> > + ocxlpmem-
> > >admin_command.request_offset + 0x08,
> > + OCXL_LITTLE_ENDIAN, val);
> > + if (rc)
> > + goto out;
> > +
> > + rc = admin_command_execute(ocxlpmem);
> > + if (rc)
> > + goto out;
> > +
> > + rc = admin_command_complete_timeout(ocxlpmem,
> > + ADMIN_COMMAND_CONTROLLER_DU
> > MP);
> > + if (rc < 0) {
> > + dev_warn(&ocxlpmem->dev, "Controller dump timed
> > out\n");
> > + goto out;
> > + }
> > +
> > + rc = admin_response(ocxlpmem);
> > + if (rc < 0)
> > + goto out;
> > + if (rc != STATUS_SUCCESS) {
> > + warn_status(ocxlpmem,
> > + "Unexpected status from retrieve error
> > log",
>
> Controller dump
>

Ok

> > + rc);
> > + goto out;
> > + }
> > +
> > + for (i = 0; i < args.buf_size; i += 8) {
> > + u64 val;
> > +
> > + rc = ocxl_global_mmio_read64(ocxlpmem->ocxl_afu,
> > + ocxlpmem-
> > >admin_command.data_offset + i,
> > + OCXL_HOST_ENDIAN, &val);
>
> Is a controller dump something where we want to do endian swapping?
>

No, we just have raw binary data that we want to pass through.
OCXL_HOST_ENDIAN does no swapping.

> Any reason we're not doing the usual check of the data identifier,
> additional data length etc?
>

I'll add that

> > + if (rc)
> > + goto out;
> > +
> > + if (copy_to_user(&args.buf[i], &val, sizeof(u64))) {
> > + rc = -EFAULT;
> > + goto out;
> > + }
> > + }
> > +
> > + if (copy_to_user(uarg, &args, sizeof(args))) {
> > + rc = -EFAULT;
> > + goto out;
> > + }
> > +
> > + rc = admin_response_handled(ocxlpmem);
> > + if (rc)
> > + goto out;
> > +
> > +out:
> > + mutex_unlock(&ocxlpmem->admin_command.lock);
> > + return rc;
> > +}
> > +
> > +int request_controller_dump(struct ocxlpmem *ocxlpmem)
> > +{
> > + int rc;
> > + u64 busy = 1;
> > +
> > + rc = ocxl_global_mmio_set64(ocxlpmem->ocxl_afu,
> > GLOBAL_MMIO_CHIC,
> > + OCXL_LITTLE_ENDIAN,
> > + GLOBAL_MMIO_CHI_CDA);
>
> This return code is ignored
>
> > +
> > +
> > + rc = ocxl_global_mmio_set64(ocxlpmem->ocxl_afu,
> > GLOBAL_MMIO_HCI,
> > + OCXL_LITTLE_ENDIAN,
> > + GLOBAL_MMIO_HCI_CONTROLLER_DUMP);
> > + if (rc)
> > + return rc;
> > +
> > + while (busy) {
> > + rc = ocxl_global_mmio_read64(ocxlpmem->ocxl_afu,
> > + GLOBAL_MMIO_HCI,
> > + OCXL_LITTLE_ENDIAN,
> > &busy);
> > + if (rc)
> > + return rc;
> > +
> > + busy &= GLOBAL_MMIO_HCI_CONTROLLER_DUMP;
> > + cond_resched();
> > + }
> > +
> > + return 0;
> > +}
>
>
--
Alastair D'Silva
Open Source Developer
Linux Technology Centre, IBM Australia
mob: 0423 762 819

2020-03-11 03:33:12

by Alastair D'Silva

[permalink] [raw]
Subject: Re: [PATCH v3 20/27] powerpc/powernv/pmem: Forward events to userspace

On Wed, 2020-03-04 at 12:00 +0100, Frederic Barrat wrote:
>
> Le 21/02/2020 à 04:27, Alastair D'Silva a écrit :
> > From: Alastair D'Silva <[email protected]>
> >
> > Some of the interrupts that the card generates are better handled
> > by the userspace daemon, in particular:
> > Controller Hardware/Firmware Fatal
> > Controller Dump Available
> > Error Log available
> >
> > This patch allows a userspace application to register an eventfd
> > with
> > the driver via SCM_IOCTL_EVENTFD to receive notifications of these
> > interrupts.
> >
> > Userspace can then identify what events have occurred by calling
> > SCM_IOCTL_EVENT_CHECK and checking against the SCM_IOCTL_EVENT_FOO
> > masks.
> >
> > Signed-off-by: Alastair D'Silva <[email protected]>
> > ---
> > arch/powerpc/platforms/powernv/pmem/ocxl.c | 216
> > ++++++++++++++++++
> > .../platforms/powernv/pmem/ocxl_internal.h | 5 +
> > include/uapi/nvdimm/ocxl-pmem.h | 16 ++
> > 3 files changed, 237 insertions(+)
> >
> > diff --git a/arch/powerpc/platforms/powernv/pmem/ocxl.c
> > b/arch/powerpc/platforms/powernv/pmem/ocxl.c
> > index 009d4fd29e7d..e46696d3cc36 100644
> > --- a/arch/powerpc/platforms/powernv/pmem/ocxl.c
> > +++ b/arch/powerpc/platforms/powernv/pmem/ocxl.c
> > @@ -10,6 +10,7 @@
> > #include <misc/ocxl.h>
> > #include <linux/delay.h>
> > #include <linux/ndctl.h>
> > +#include <linux/eventfd.h>
> > #include <linux/fs.h>
> > #include <linux/mm_types.h>
> > #include <linux/memory_hotplug.h>
> > @@ -335,11 +336,22 @@ static void free_ocxlpmem(struct ocxlpmem
> > *ocxlpmem)
> > {
> > int rc;
> >
> > + // Disable doorbells
> > + (void)ocxl_global_mmio_set64(ocxlpmem->ocxl_afu,
> > GLOBAL_MMIO_CHIEC,
> > + OCXL_LITTLE_ENDIAN,
> > + GLOBAL_MMIO_CHI_ALL);
> > +
> > if (ocxlpmem->nvdimm_bus)
> > nvdimm_bus_unregister(ocxlpmem->nvdimm_bus);
> >
> > free_minor(ocxlpmem);
> >
> > + if (ocxlpmem->irq_addr[1])
> > + iounmap(ocxlpmem->irq_addr[1]);
> > +
> > + if (ocxlpmem->irq_addr[0])
> > + iounmap(ocxlpmem->irq_addr[0]);
> > +
> > if (ocxlpmem->cdev.owner)
> > cdev_del(&ocxlpmem->cdev);
> >
> > @@ -443,6 +455,11 @@ static int file_release(struct inode *inode,
> > struct file *file)
> > {
> > struct ocxlpmem *ocxlpmem = file->private_data;
> >
> > + if (ocxlpmem->ev_ctx) {
> > + eventfd_ctx_put(ocxlpmem->ev_ctx);
> > + ocxlpmem->ev_ctx = NULL;
> > + }
> > +
> > ocxlpmem_put(ocxlpmem);
> > return 0;
> > }
> > @@ -938,6 +955,51 @@ static int ioctl_controller_stats(struct
> > ocxlpmem *ocxlpmem,
> > return rc;
> > }
> >
> > +static int ioctl_eventfd(struct ocxlpmem *ocxlpmem,
> > + struct ioctl_ocxl_pmem_eventfd __user *uarg)
> > +{
> > + struct ioctl_ocxl_pmem_eventfd args;
> > +
> > + if (copy_from_user(&args, uarg, sizeof(args)))
> > + return -EFAULT;
> > +
> > + if (ocxlpmem->ev_ctx)
> > + return -EINVAL;
>
> EBUSY?
>
Ok

>
> > +
> > + ocxlpmem->ev_ctx = eventfd_ctx_fdget(args.eventfd);
> > + if (!ocxlpmem->ev_ctx)
> > + return -EFAULT;
>
> Why not use what eventfd_ctx_fdget() returned? (through some
> IS_ERR()
> and PTR_ERR() convolution)
>

Ok
>
> > +
> > + return 0;
> > +}
> > +
> > +static int ioctl_event_check(struct ocxlpmem *ocxlpmem, u64 __user
> > *uarg)
> > +{
> > + u64 val = 0;
> > + int rc;
> > + u64 chi = 0;
> > +
> > + rc = ocxlpmem_chi(ocxlpmem, &chi);
> > + if (rc < 0)
> > + return rc;
> > +
> > + if (chi & GLOBAL_MMIO_CHI_ELA)
> > + val |= IOCTL_OCXL_PMEM_EVENT_ERROR_LOG_AVAILABLE;
> > +
> > + if (chi & GLOBAL_MMIO_CHI_CDA)
> > + val |= IOCTL_OCXL_PMEM_EVENT_CONTROLLER_DUMP_AVAILABLE;
> > +
> > + if (chi & GLOBAL_MMIO_CHI_CFFS)
> > + val |= IOCTL_OCXL_PMEM_EVENT_FIRMWARE_FATAL;
> > +
> > + if (chi & GLOBAL_MMIO_CHI_CHFS)
> > + val |= IOCTL_OCXL_PMEM_EVENT_HARDWARE_FATAL;
> > +
> > + rc = copy_to_user((u64 __user *) uarg, &val, sizeof(val));
> > +
>
> copy_to_user doesn't return an errno. Should be:
>
> if (copy_to_user((u64 __user *) uarg, &val, sizeof(val)))
> return -EFAULT;
>
Ok

>
> > + return rc;
> > +}
> > +
> > static long file_ioctl(struct file *file, unsigned int cmd,
> > unsigned long args)
> > {
> > struct ocxlpmem *ocxlpmem = file->private_data;
> > @@ -966,6 +1028,15 @@ static long file_ioctl(struct file *file,
> > unsigned int cmd, unsigned long args)
> > rc = ioctl_controller_stats(ocxlpmem,
> > (struct
> > ioctl_ocxl_pmem_controller_stats __user *)args);
> > break;
> > +
> > + case IOCTL_OCXL_PMEM_EVENTFD:
> > + rc = ioctl_eventfd(ocxlpmem,
> > + (struct ioctl_ocxl_pmem_eventfd
> > __user *)args);
> > + break;
> > +
> > + case IOCTL_OCXL_PMEM_EVENT_CHECK:
> > + rc = ioctl_event_check(ocxlpmem, (u64 __user *)args);
> > + break;
> > }
> >
> > return rc;
> > @@ -1107,6 +1178,146 @@ static void dump_error_log(struct ocxlpmem
> > *ocxlpmem)
> > kfree(buf);
> > }
> >
> > +static irqreturn_t imn0_handler(void *private)
> > +{
> > + struct ocxlpmem *ocxlpmem = private;
> > + u64 chi = 0;
> > +
> > + (void)ocxlpmem_chi(ocxlpmem, &chi);
> > +
> > + if (chi & GLOBAL_MMIO_CHI_ELA) {
> > + dev_warn(&ocxlpmem->dev, "Error log is available\n");
> > +
> > + if (ocxlpmem->ev_ctx)
> > + eventfd_signal(ocxlpmem->ev_ctx, 1);
> > + }
> > +
> > + if (chi & GLOBAL_MMIO_CHI_CDA) {
> > + dev_warn(&ocxlpmem->dev, "Controller dump is
> > available\n");
> > +
> > + if (ocxlpmem->ev_ctx)
> > + eventfd_signal(ocxlpmem->ev_ctx, 1);
> > + }
> > +
> > +
>
> (at least) one empty line too many.
>

Ok

>
> > + return IRQ_HANDLED;
> > +}
> > +
> > +static irqreturn_t imn1_handler(void *private)
> > +{
> > + struct ocxlpmem *ocxlpmem = private;
> > + u64 chi = 0;
> > +
> > + (void)ocxlpmem_chi(ocxlpmem, &chi);
> > +
> > + if (chi & (GLOBAL_MMIO_CHI_CFFS | GLOBAL_MMIO_CHI_CHFS)) {
> > + dev_err(&ocxlpmem->dev,
> > + "Controller status is fatal, chi=0x%llx, going
> > offline\n", chi);
> > +
> > + if (ocxlpmem->nvdimm_bus) {
> > + nvdimm_bus_unregister(ocxlpmem->nvdimm_bus);
> > + ocxlpmem->nvdimm_bus = NULL;
> > + }
> > +
> > + if (ocxlpmem->ev_ctx)
> > + eventfd_signal(ocxlpmem->ev_ctx, 1);
> > + }
> > +
> > + return IRQ_HANDLED;
> > +}
> > +
> > +
> > +/**
> > + * ocxlpmem_setup_irq() - Set up the IRQs for the OpenCAPI
> > Persistent Memory device
> > + * @ocxlpmem: the device metadata
> > + * Return: 0 on success, negative on failure
> > + */
> > +static int ocxlpmem_setup_irq(struct ocxlpmem *ocxlpmem)
> > +{
> > + int rc;
> > + u64 irq_addr;
> > +
> > + rc = ocxl_afu_irq_alloc(ocxlpmem->ocxl_context, &ocxlpmem-
> > >irq_id[0]);
> > + if (rc)
> > + return rc;
> > +
> > + rc = ocxl_irq_set_handler(ocxlpmem->ocxl_context, ocxlpmem-
> > >irq_id[0],
> > + imn0_handler, NULL, ocxlpmem);
> > +
> > + irq_addr = ocxl_afu_irq_get_addr(ocxlpmem->ocxl_context,
> > ocxlpmem->irq_id[0]);
> > + if (!irq_addr)
> > + return -EINVAL;
> > +
> > + ocxlpmem->irq_addr[0] = ioremap(irq_addr, PAGE_SIZE);
> > + if (!ocxlpmem->irq_addr[0])
> > + return -EINVAL;
> > +
> > + rc = ocxl_global_mmio_write64(ocxlpmem->ocxl_afu,
> > GLOBAL_MMIO_IMA0_OHP,
> > + OCXL_LITTLE_ENDIAN,
> > + (u64)ocxlpmem->irq_addr[0]);
> > + if (rc)
> > + goto out_irq0;
> > +
> > + rc = ocxl_global_mmio_write64(ocxlpmem->ocxl_afu,
> > GLOBAL_MMIO_IMA0_CFP,
> > + OCXL_LITTLE_ENDIAN, 0);
> > + if (rc)
> > + goto out_irq0;
>
> That's a few lines of duplicate code. On the other hand, there's
> enough
> varying parameters between the 2 interrupts that factorizing in a
> subfunction would be slightly less readable. So duplicating is
> probably ok.
>
>
>
> > + rc = ocxl_afu_irq_alloc(ocxlpmem->ocxl_context, &ocxlpmem-
> > >irq_id[1]);
> > + if (rc)
> > + goto out_irq0;
> > +
> > +
> > + rc = ocxl_irq_set_handler(ocxlpmem->ocxl_context, ocxlpmem-
> > >irq_id[1],
> > + imn1_handler, NULL, ocxlpmem);
> > + if (rc)
> > + goto out_irq0;
> > +
> > + irq_addr = ocxl_afu_irq_get_addr(ocxlpmem->ocxl_context,
> > ocxlpmem->irq_id[1]);
> > + if (!irq_addr) {
> > + rc = -EFAULT;
> > + goto out_irq0;
> > + }
> > +
> > + ocxlpmem->irq_addr[1] = ioremap(irq_addr, PAGE_SIZE);
> > + if (!ocxlpmem->irq_addr[1]) {
> > + rc = -EINVAL;
> > + goto out_irq0;
> > + }
> > +
> > + rc = ocxl_global_mmio_write64(ocxlpmem->ocxl_afu,
> > GLOBAL_MMIO_IMA1_OHP,
> > + OCXL_LITTLE_ENDIAN,
> > + (u64)ocxlpmem->irq_addr[1]);
> > + if (rc)
> > + goto out_irq1;
> > +
> > + rc = ocxl_global_mmio_write64(ocxlpmem->ocxl_afu,
> > GLOBAL_MMIO_IMA1_CFP,
> > + OCXL_LITTLE_ENDIAN, 0);
> > + if (rc)
> > + goto out_irq1;
> > +
> > + // Enable doorbells
> > + rc = ocxl_global_mmio_set64(ocxlpmem->ocxl_afu,
> > GLOBAL_MMIO_CHIE,
> > + OCXL_LITTLE_ENDIAN,
> > + GLOBAL_MMIO_CHI_ELA |
> > GLOBAL_MMIO_CHI_CDA |
> > + GLOBAL_MMIO_CHI_CFFS |
> > GLOBAL_MMIO_CHI_CHFS |
> > + GLOBAL_MMIO_CHI_NSCRA);
>
> GLOBAL_MMIO_CHI_NSCRA doesn't seem to be handled in the handlers.
>

This will be moved to the overwrite patch.

>
>
> > + if (rc)
> > + goto out_irq1;
> > +
> > + return 0;
> > +
> > +out_irq1:
> > + iounmap(ocxlpmem->irq_addr[1]);
> > + ocxlpmem->irq_addr[1] = NULL;
> > +
> > +out_irq0:
> > + iounmap(ocxlpmem->irq_addr[0]);
> > + ocxlpmem->irq_addr[0] = NULL;
> > +
> > + return rc;
> > +}
> > +
> > /**
> > * probe_function0() - Set up function 0 for an OpenCAPI
> > persistent memory device
> > * This is important as it enables templates higher than 0 across
> > all other functions,
> > @@ -1216,6 +1427,11 @@ static int probe(struct pci_dev *pdev, const
> > struct pci_device_id *ent)
> > goto err;
> > }
> >
> > + if (ocxlpmem_setup_irq(ocxlpmem)) {
> > + dev_err(&pdev->dev, "Could not set up OCXL IRQs\n");
>
> Like with other patches, rc needs to be set.
>
ok

>
> > + goto err;
> > + }
> > +
> > if (setup_command_metadata(ocxlpmem)) {
> > dev_err(&pdev->dev, "Could not read OCXL command
> > matada\n");
> > goto err;
> > diff --git a/arch/powerpc/platforms/powernv/pmem/ocxl_internal.h
> > b/arch/powerpc/platforms/powernv/pmem/ocxl_internal.h
> > index b953ee522ed4..927690f4888f 100644
> > --- a/arch/powerpc/platforms/powernv/pmem/ocxl_internal.h
> > +++ b/arch/powerpc/platforms/powernv/pmem/ocxl_internal.h
> > @@ -103,6 +103,10 @@ struct ocxlpmem {
> > struct pci_dev *pdev;
> > struct cdev cdev;
> > struct ocxl_fn *ocxl_fn;
> > +#define SCM_IRQ_COUNT 2
> > + int irq_id[SCM_IRQ_COUNT];
> > + struct dev_pagemap irq_pgmap[SCM_IRQ_COUNT];
>
> irq_pgmap is not used.

Ok
>
>
> > + void *irq_addr[SCM_IRQ_COUNT];
> > struct nd_interleave_set nd_set;
> > struct nvdimm_bus_descriptor bus_desc;
> > struct nvdimm_bus *nvdimm_bus;
> > @@ -113,6 +117,7 @@ struct ocxlpmem {
> > struct command_metadata ns_command;
> > struct resource pmem_res;
> > struct nd_region *nd_region;
> > + struct eventfd_ctx *ev_ctx;
> > char fw_version[8+1];
> > u32 timeouts[ADMIN_COMMAND_MAX+1];
> >
> > diff --git a/include/uapi/nvdimm/ocxl-pmem.h
> > b/include/uapi/nvdimm/ocxl-pmem.h
> > index add223aa2fdb..988eb0bc413d 100644
> > --- a/include/uapi/nvdimm/ocxl-pmem.h
> > +++ b/include/uapi/nvdimm/ocxl-pmem.h
> > @@ -66,6 +66,20 @@ struct ioctl_ocxl_pmem_controller_stats {
> > __u64 cache_write_latency; /* nanoseconds */
> > };
> >
> > +struct ioctl_ocxl_pmem_eventfd {
> > + __s32 eventfd;
> > + __u32 reserved;
> > +};
> > +
> > +#ifndef BIT_ULL
> > +#define BIT_ULL(nr) (1ULL << (nr))
> > +#endif
> > +
> > +#define IOCTL_OCXL_PMEM_EVENT_CONTROLLER_DUMP_AVAILABLE BIT_ULL
> > (0)
> > +#define IOCTL_OCXL_PMEM_EVENT_ERROR_LOG_AVAILABLE BIT_ULL(1)
> > +#define IOCTL_OCXL_PMEM_EVENT_HARDWARE_FATAL BIT_ULL
> > (2)
> > +#define IOCTL_OCXL_PMEM_EVENT_FIRMWARE_FATAL BIT_ULL
> > (3)
> > +
>
> I'm not fond of adding a macro with such a generic name as BIT_ULL()
> in
> a user header file. What's wrong with:
>
> #define IOCTL_OCXL_PMEM_EVENT_CONTROLLER_DUMP_AVAILABLE 0x1
> #define IOCTL_OCXL_PMEM_EVENT_ERROR_LOG_AVAILABLE 0x2
> #define IOCTL_OCXL_PMEM_EVENT_HARDWARE_FATAL 0x4
> #define IOCTL_OCXL_PMEM_EVENT_FIRMWARE_FATAL 0x8
>
>

Nothing, I'll change it.

> Fred
>
>
> > /* ioctl numbers */
> > #define OCXL_PMEM_MAGIC 0x5C
> > /* SCM devices */
> > @@ -74,5 +88,7 @@ struct ioctl_ocxl_pmem_controller_stats {
> > #define IOCTL_OCXL_PMEM_CONTROLLER_DUMP_DATA _IOWR(O
> > CXL_PMEM_MAGIC, 0x03, struct ioctl_ocxl_pmem_controller_dump_data)
> > #define IOCTL_OCXL_PMEM_CONTROLLER_DUMP_COMPLETE _IO(OCXL_PMEM_M
> > AGIC, 0x04)
> > #define IOCTL_OCXL_PMEM_CONTROLLER_STATS _IO(OCXL_PMEM_M
> > AGIC, 0x05)
> > +#define IOCTL_OCXL_PMEM_EVENTFD _IOW(OC
> > XL_PMEM_MAGIC, 0x06, struct ioctl_ocxl_pmem_eventfd)
> > +#define IOCTL_OCXL_PMEM_EVENT_CHECK _IOR(OC
> > XL_PMEM_MAGIC, 0x07, __u64)
> >
> > #endif /* _UAPI_OCXL_SCM_H */
> >
--
Alastair D'Silva
Open Source Developer
Linux Technology Centre, IBM Australia
mob: 0423 762 819

2020-03-11 03:40:46

by Alastair D'Silva

[permalink] [raw]
Subject: Re: [PATCH v3 21/27] powerpc/powernv/pmem: Add an IOCTL to request controller health & perf data

On Wed, 2020-03-04 at 12:06 +0100, Frederic Barrat wrote:
>
> Le 28/02/2020 à 07:12, Andrew Donnellan a écrit :
> > On 21/2/20 2:27 pm, Alastair D'Silva wrote:
> > > From: Alastair D'Silva <[email protected]>
> > >
> > > When health & performance data is requested from the controller,
> > > it responds with an error log containing the requested
> > > information.
> > >
> > > This patch allows the request to me issued via an IOCTL.
> >
> > A better explanation would be good - this IOCTL triggers a request
> > to
> > the controller to collect controller health/perf data, and the
> > controller will later respond with an error log that can be picked
> > up
> > via the error log IOCTL that you've defined earlier.
>
> And even more precisely (to also check my understanding):
>
> > this IOCTL triggers a request to
> > the controller to collect controller health/perf data, and the
> > controller will later respond
>
> by raising an interrupt to let the user app know that
>
> > an error log that can be picked up
> > via the error log IOCTL that you've defined earlier.
>
>
> The rest of the patch looks ok to me.
>
> Fred

Ok

--
Alastair D'Silva
Open Source Developer
Linux Technology Centre, IBM Australia
mob: 0423 762 819

2020-03-12 00:22:37

by Alastair D'Silva

[permalink] [raw]
Subject: Re: [PATCH v3 19/27] powerpc/powernv/pmem: Add an IOCTL to report controller statistics

On Wed, 2020-03-04 at 10:25 +0100, Frederic Barrat wrote:
>
> Le 21/02/2020 à 04:27, Alastair D'Silva a écrit :
> > From: Alastair D'Silva <[email protected]>
> >
> > The controller can report a number of statistics that are useful
> > in evaluating the performance and reliability of the card.
> >
> > This patch exposes this information via an IOCTL.
> >
> > Signed-off-by: Alastair D'Silva <[email protected]>
> > ---
> > arch/powerpc/platforms/powernv/pmem/ocxl.c | 185
> > +++++++++++++++++++++
> > include/uapi/nvdimm/ocxl-pmem.h | 17 ++
> > 2 files changed, 202 insertions(+)
> >
> > diff --git a/arch/powerpc/platforms/powernv/pmem/ocxl.c
> > b/arch/powerpc/platforms/powernv/pmem/ocxl.c
> > index 2cabafe1fc58..009d4fd29e7d 100644
> > --- a/arch/powerpc/platforms/powernv/pmem/ocxl.c
> > +++ b/arch/powerpc/platforms/powernv/pmem/ocxl.c
> > @@ -758,6 +758,186 @@ static int
> > ioctl_controller_dump_complete(struct ocxlpmem *ocxlpmem)
> > GLOBAL_MMIO_HCI_CONTROLLER_DUMP_COL
> > LECTED);
> > }
> >
> > +/**
> > + * controller_stats_header_parse() - Parse the first 64 bits of
> > the controller stats admin command response
> > + * @ocxlpmem: the device metadata
> > + * @length: out, returns the number of bytes in the response
> > (excluding the 64 bit header)
> > + */
> > +static int controller_stats_header_parse(struct ocxlpmem
> > *ocxlpmem,
> > + u32 *length)
> > +{
> > + int rc;
> > + u64 val;
> > +
>
> unexpected empty line
>

Ok

>
> > + u16 data_identifier;
> > + u32 data_length;
> > +
> > + rc = ocxl_global_mmio_read64(ocxlpmem->ocxl_afu,
> > + ocxlpmem-
> > >admin_command.data_offset,
> > + OCXL_LITTLE_ENDIAN, &val);
> > + if (rc)
> > + return rc;
> > +
> > + data_identifier = val >> 48;
> > + data_length = val & 0xFFFFFFFF;
> > +
> > + if (data_identifier != 0x4353) { // 'CS'
> > + dev_err(&ocxlpmem->dev,
> > + "Bad data identifier for controller stats,
> > expected 'CS', got '%-.*s'\n",
> > + 2, (char *)&data_identifier);
>
>
> Wow, I'm clueless what that string format looks like :-)
> 2 arguments? Did you check the kernel string formatter does what you
> want?
> You may consider unifying the format though, the error log patch uses
> a
> simpler (better?) format for a similar message.
>

Sorry, force of habit from my old job where we dealt with a lot of
variable length, non-NULL terminated buffers. FYI - it takes the string
length from the first argument.

I'll change it to a fixed length string like the others :)

>
>
> > + return -EINVAL;
> > + }
> > +
> > + *length = data_length;
> > + return 0;
> > +}
> > +
> > +static int ioctl_controller_stats(struct ocxlpmem *ocxlpmem,
> > + struct
> > ioctl_ocxl_pmem_controller_stats __user *uarg)
> > +{
> > + struct ioctl_ocxl_pmem_controller_stats args;
> > + u32 length;
> > + int rc;
> > + u64 val;
> > +
> > + memset(&args, '\0', sizeof(args));
> > +
> > + mutex_lock(&ocxlpmem->admin_command.lock);
> > +
> > + rc = admin_command_request(ocxlpmem,
> > ADMIN_COMMAND_CONTROLLER_STATS);
> > + if (rc)
> > + goto out;
> > +
> > + rc = ocxl_global_mmio_write64(ocxlpmem->ocxl_afu,
> > + ocxlpmem-
> > >admin_command.request_offset + 0x08,
> > + OCXL_LITTLE_ENDIAN, 0);
> > + if (rc)
> > + goto out;
> > +
> > + rc = admin_command_execute(ocxlpmem);
> > + if (rc)
> > + goto out;
> > +
> > +
> > + rc = admin_command_complete_timeout(ocxlpmem,
> > + ADMIN_COMMAND_CONTROLLER_ST
> > ATS);
> > + if (rc < 0) {
> > + dev_warn(&ocxlpmem->dev, "Controller stats timed
> > out\n");
> > + goto out;
> > + }
> > +
> > + rc = admin_response(ocxlpmem);
> > + if (rc < 0)
> > + goto out;
> > + if (rc != STATUS_SUCCESS) {
> > + warn_status(ocxlpmem,
> > + "Unexpected status from controller stats",
> > rc);
> > + goto out;
> > + }
>
> All those ioctls commands follow the same pattern:
> 1. admin_command_request()
> 2. optionnaly, set some mmio registers specific to the command
> 3. admin_command_execute()
> 4. admin_command_complete_timeout()
> 5. admin_response()
>
> By swapping 1 and 2, we could then factorize steps 1, 3, 4 and 5 in
> a
> function and simplify/shorten the code each time a command is called.
>
> Regarding step 2 (and that's true for all similar patches), a
> comment
> about what the mmio tuning does would help and avoid looking up the
> spec. Looking up the spec during the review is expected, but it will
> ease reading the code 6 months from now.
>
>

I'll rework this and add a wrapper in the Admin Commands patch.

>
> > +
> > + rc = controller_stats_header_parse(ocxlpmem, &length);
> > + if (rc)
> > + goto out;
> > +
> > + if (length != 0x140)
> > + warn_status(ocxlpmem,
> > + "Unexpected length for controller stats
> > data, expected 0x140, got 0x%x",
> > + length);
> > +
> > + rc = ocxl_global_mmio_read64(ocxlpmem->ocxl_afu,
> > + ocxlpmem-
> > >admin_command.data_offset + 0x08 + 0x08,
> > + OCXL_LITTLE_ENDIAN, &val);
> > + if (rc)
> > + goto out;
> > +
> > + args.reset_count = val >> 32;
> > + args.reset_uptime = val & 0xFFFFFFFF;
> > +
> > + rc = ocxl_global_mmio_read64(ocxlpmem->ocxl_afu,
> > + ocxlpmem-
> > >admin_command.data_offset + 0x08 + 0x10,
> > + OCXL_LITTLE_ENDIAN, &val);
> > + if (rc)
> > + goto out;
> > +
> > + args.power_on_uptime = val >> 32;
> > +
> > + rc = ocxl_global_mmio_read64(ocxlpmem->ocxl_afu,
> > + ocxlpmem-
> > >admin_command.data_offset + 0x08 + 0x40 + 0x08,
> > + OCXL_LITTLE_ENDIAN,
> &args.host_load_count);
>
>
> Those offsets are hard to understand, even with the spec next to me.
> And
> it seems that we could harden things a bit:
> each block as a "statistics parameter ID" and the length of the data
> for
> that block. We should check that and make sure we're reading what we
> expect.
> For example, from the spec I'm looking (110d), I would expect the
> host
> load count to be at offset 0x10. It's entirely possible I'm
> misreading
> it though.
>

I'll rework this too.

>
>
> > + if (rc)
> > + goto out;
> > +
> > + rc = ocxl_global_mmio_read64(ocxlpmem->ocxl_afu,
> > + ocxlpmem-
> > >admin_command.data_offset + 0x08 + 0x40 + 0x10,
> > + OCXL_LITTLE_ENDIAN,
> > &args.host_store_count);
> > + if (rc)
> > + goto out;
> > +
> > + rc = ocxl_global_mmio_read64(ocxlpmem->ocxl_afu,
> > + ocxlpmem-
> > >admin_command.data_offset + 0x08 + 0x40 + 0x18,
> > + OCXL_LITTLE_ENDIAN,
> > &args.media_read_count);
> > + if (rc)
> > + goto out;
> > +
> > + rc = ocxl_global_mmio_read64(ocxlpmem->ocxl_afu,
> > + ocxlpmem-
> > >admin_command.data_offset + 0x08 + 0x40 + 0x20,
> > + OCXL_LITTLE_ENDIAN,
> > &args.media_write_count);
> > + if (rc)
> > + goto out;
> > +
> > + rc = ocxl_global_mmio_read64(ocxlpmem->ocxl_afu,
> > + ocxlpmem-
> > >admin_command.data_offset + 0x08 + 0x40 + 0x28,
> > + OCXL_LITTLE_ENDIAN,
> > &args.cache_hit_count);
> > + if (rc)
> > + goto out;
> > +
> > + rc = ocxl_global_mmio_read64(ocxlpmem->ocxl_afu,
> > + ocxlpmem-
> > >admin_command.data_offset + 0x08 + 0x40 + 0x30,
> > + OCXL_LITTLE_ENDIAN,
> > &args.cache_miss_count);
> > + if (rc)
> > + goto out;
> > +
> > + rc = ocxl_global_mmio_read64(ocxlpmem->ocxl_afu,
> > + ocxlpmem-
> > >admin_command.data_offset + 0x08 + 0x40 + 0x38,
> > + OCXL_LITTLE_ENDIAN,
> > &args.media_read_latency);
> > + if (rc)
> > + goto out;
> > +
> > + rc = ocxl_global_mmio_read64(ocxlpmem->ocxl_afu,
> > + ocxlpmem-
> > >admin_command.data_offset + 0x08 + 0x40 + 0x40,
> > + OCXL_LITTLE_ENDIAN,
> > &args.media_write_latency);
> > + if (rc)
> > + goto out;
> > +
> > + rc = ocxl_global_mmio_read64(ocxlpmem->ocxl_afu,
> > + ocxlpmem-
> > >admin_command.data_offset + 0x08 + 0x40 + 0x48,
> > + OCXL_LITTLE_ENDIAN,
> > &args.cache_read_latency);
> > + if (rc)
> > + goto out;
> > +
> > + rc = ocxl_global_mmio_read64(ocxlpmem->ocxl_afu,
> > + ocxlpmem-
> > >admin_command.data_offset + 0x08 + 0x40 + 0x50,
> > + OCXL_LITTLE_ENDIAN,
> > &args.cache_write_latency);
> > + if (rc)
> > + goto out;
> > +
> > + if (copy_to_user(uarg, &args, sizeof(args))) {
> > + rc = -EFAULT;
> > + goto out;
> > + }
> > +
> > + rc = admin_response_handled(ocxlpmem);
> > + if (rc)
> > + goto out;
> > +
> > + rc = 0;
> > + goto out;
>
> That may be more of a personal habit, but that final goto disrupts
> the
> "good case" flow. And I think it's pretty unusual within the kernel.
>

Ok

>
> > +
> > +out:
> > + mutex_unlock(&ocxlpmem->admin_command.lock);
> > + return rc;
> > +}
> > +
> > static long file_ioctl(struct file *file, unsigned int cmd,
> > unsigned long args)
> > {
> > struct ocxlpmem *ocxlpmem = file->private_data;
> > @@ -781,6 +961,11 @@ static long file_ioctl(struct file *file,
> > unsigned int cmd, unsigned long args)
> > case IOCTL_OCXL_PMEM_CONTROLLER_DUMP_COMPLETE:
> > rc = ioctl_controller_dump_complete(ocxlpmem);
> > break;
> > +
> > + case IOCTL_OCXL_PMEM_CONTROLLER_STATS:
> > + rc = ioctl_controller_stats(ocxlpmem,
> > + (struct
> > ioctl_ocxl_pmem_controller_stats __user *)args);
> > + break;
> > }
> >
> > return rc;
> > diff --git a/include/uapi/nvdimm/ocxl-pmem.h
> > b/include/uapi/nvdimm/ocxl-pmem.h
> > index d4d8512d03f7..add223aa2fdb 100644
> > --- a/include/uapi/nvdimm/ocxl-pmem.h
> > +++ b/include/uapi/nvdimm/ocxl-pmem.h
> > @@ -50,6 +50,22 @@ struct ioctl_ocxl_pmem_controller_dump_data {
> > __u64 reserved[8];
> > };
> >
> > +struct ioctl_ocxl_pmem_controller_stats {
> > + __u32 reset_count;
> > + __u32 reset_uptime; /* seconds */
> > + __u32 power_on_uptime; /* seconds */
>
> Same as before, we're going to have some padding here.
>
> Fred
>
Ok

>
> > + __u64 host_load_count;
> > + __u64 host_store_count;
> > + __u64 media_read_count;
> > + __u64 media_write_count;
> > + __u64 cache_hit_count;
> > + __u64 cache_miss_count;
> > + __u64 media_read_latency; /* nanoseconds */
> > + __u64 media_write_latency; /* nanoseconds */
> > + __u64 cache_read_latency; /* nanoseconds */
> > + __u64 cache_write_latency; /* nanoseconds */
> > +};
> > +
> > /* ioctl numbers */
> > #define OCXL_PMEM_MAGIC 0x5C
> > /* SCM devices */
> > @@ -57,5 +73,6 @@ struct ioctl_ocxl_pmem_controller_dump_data {
> > #define IOCTL_OCXL_PMEM_CONTROLLER_DUMP _IO(OCX
> > L_PMEM_MAGIC, 0x02)
> > #define IOCTL_OCXL_PMEM_CONTROLLER_DUMP_DATA _IOWR(O
> > CXL_PMEM_MAGIC, 0x03, struct ioctl_ocxl_pmem_controller_dump_data)
> > #define IOCTL_OCXL_PMEM_CONTROLLER_DUMP_COMPLETE _IO(OCXL_PMEM_M
> > AGIC, 0x04)
> > +#define IOCTL_OCXL_PMEM_CONTROLLER_STATS _IO(OCXL_PMEM_M
> > AGIC, 0x05)
> >
> > #endif /* _UAPI_OCXL_SCM_H */
> >
--
Alastair D'Silva
Open Source Developer
Linux Technology Centre, IBM Australia
mob: 0423 762 819

2020-03-12 04:48:01

by Alastair D'Silva

[permalink] [raw]
Subject: Re: [PATCH v3 19/27] powerpc/powernv/pmem: Add an IOCTL to report controller statistics

On Thu, 2020-03-05 at 11:46 +1100, Andrew Donnellan wrote:
> On 21/2/20 2:27 pm, Alastair D'Silva wrote:
> > From: Alastair D'Silva <[email protected]>
> >
> > The controller can report a number of statistics that are useful
> > in evaluating the performance and reliability of the card.
> >
> > This patch exposes this information via an IOCTL.
> >
> > Signed-off-by: Alastair D'Silva <[email protected]>
> > ---
> > arch/powerpc/platforms/powernv/pmem/ocxl.c | 185
> > +++++++++++++++++++++
> > include/uapi/nvdimm/ocxl-pmem.h | 17 ++
> > 2 files changed, 202 insertions(+)
> >
> > diff --git a/arch/powerpc/platforms/powernv/pmem/ocxl.c
> > b/arch/powerpc/platforms/powernv/pmem/ocxl.c
> > index 2cabafe1fc58..009d4fd29e7d 100644
> > --- a/arch/powerpc/platforms/powernv/pmem/ocxl.c
> > +++ b/arch/powerpc/platforms/powernv/pmem/ocxl.c
> > @@ -758,6 +758,186 @@ static int
> > ioctl_controller_dump_complete(struct ocxlpmem *ocxlpmem)
> > GLOBAL_MMIO_HCI_CONTROLLER_DUMP_COL
> > LECTED);
> > }
> >
> > +/**
> > + * controller_stats_header_parse() - Parse the first 64 bits of
> > the controller stats admin command response
> > + * @ocxlpmem: the device metadata
> > + * @length: out, returns the number of bytes in the response
> > (excluding the 64 bit header)
> > + */
> > +static int controller_stats_header_parse(struct ocxlpmem
> > *ocxlpmem,
> > + u32 *length)
> > +{
> > + int rc;
> > + u64 val;
> > +
> > + u16 data_identifier;
> > + u32 data_length;
> > +
> > + rc = ocxl_global_mmio_read64(ocxlpmem->ocxl_afu,
> > + ocxlpmem-
> > >admin_command.data_offset,
> > + OCXL_LITTLE_ENDIAN, &val);
> > + if (rc)
> > + return rc;
> > +
> > + data_identifier = val >> 48;
> > + data_length = val & 0xFFFFFFFF;
> > +
> > + if (data_identifier != 0x4353) { // 'CS'
> > + dev_err(&ocxlpmem->dev,
> > + "Bad data identifier for controller stats,
> > expected 'CS', got '%-.*s'\n",
> > + 2, (char *)&data_identifier);
> > + return -EINVAL;
>
> Same comment as earlier patches re EINVAL
>

I don't think I've seen a comment yet on these particular blocks. Can
you suggest a better return value?

> > + }
> > +
> > + *length = data_length;
> > + return 0;
> > +}
> > +
> > +static int ioctl_controller_stats(struct ocxlpmem *ocxlpmem,
> > + struct
> > ioctl_ocxl_pmem_controller_stats __user *uarg)
> > +{
> > + struct ioctl_ocxl_pmem_controller_stats args;
> > + u32 length;
> > + int rc;
> > + u64 val;
> > +
> > + memset(&args, '\0', sizeof(args));
> > +
> > + mutex_lock(&ocxlpmem->admin_command.lock);
> > +
> > + rc = admin_command_request(ocxlpmem,
> > ADMIN_COMMAND_CONTROLLER_STATS);
> > + if (rc)
> > + goto out;
> > +
> > + rc = ocxl_global_mmio_write64(ocxlpmem->ocxl_afu,
> > + ocxlpmem-
> > >admin_command.request_offset + 0x08,
> > + OCXL_LITTLE_ENDIAN, 0);
> > + if (rc)
> > + goto out;
> > +
> > + rc = admin_command_execute(ocxlpmem);
> > + if (rc)
> > + goto out;
> > +
> > +
> > + rc = admin_command_complete_timeout(ocxlpmem,
> > + ADMIN_COMMAND_CONTROLLER_ST
> > ATS);
> > + if (rc < 0) {
> > + dev_warn(&ocxlpmem->dev, "Controller stats timed
> > out\n");
> > + goto out;
> > + }
> > +
> > + rc = admin_response(ocxlpmem);
> > + if (rc < 0)
> > + goto out;
> > + if (rc != STATUS_SUCCESS) {
> > + warn_status(ocxlpmem,
> > + "Unexpected status from controller stats",
> > rc);
> > + goto out;
> > + }
> > +
> > + rc = controller_stats_header_parse(ocxlpmem, &length);
> > + if (rc)
> > + goto out;
> > +
> > + if (length != 0x140)
> > + warn_status(ocxlpmem,
> > + "Unexpected length for controller stats
> > data, expected 0x140, got 0x%x",
> > + length);
>
> Might be worth a comment to explain where 0x140 comes from (it looks
> correct from my reading of the spec)

Ok

>
> > +
> > + rc = ocxl_global_mmio_read64(ocxlpmem->ocxl_afu,
> > + ocxlpmem-
> > >admin_command.data_offset + 0x08 + 0x08,
> > + OCXL_LITTLE_ENDIAN, &val);
> > + if (rc)
> > + goto out;
> > +
> > + args.reset_count = val >> 32;
> > + args.reset_uptime = val & 0xFFFFFFFF;
> > +
> > + rc = ocxl_global_mmio_read64(ocxlpmem->ocxl_afu,
> > + ocxlpmem-
> > >admin_command.data_offset + 0x08 + 0x10,
> > + OCXL_LITTLE_ENDIAN, &val);
> > + if (rc)
> > + goto out;
> > +
> > + args.power_on_uptime = val >> 32;
>
> We're not collecting life remaining?
>

It looks like my implementation is out of date. I'll bring it in line
with the spec.

> > +
> > + rc = ocxl_global_mmio_read64(ocxlpmem->ocxl_afu,
> > + ocxlpmem-
> > >admin_command.data_offset + 0x08 + 0x40 + 0x08,
> > + OCXL_LITTLE_ENDIAN,
> > &args.host_load_count);
>
> My reading of the spec says HLC is at +0x10
>
Ditto

> > + if (rc)
> > + goto out;
> > +
> > + rc = ocxl_global_mmio_read64(ocxlpmem->ocxl_afu,
> > + ocxlpmem-
> > >admin_command.data_offset + 0x08 + 0x40 + 0x10,
> > + OCXL_LITTLE_ENDIAN,
> > &args.host_store_count);
>
> HSC at +0x18
>
Ditto

> > + if (rc)
> > + goto out;
> > +
> > + rc = ocxl_global_mmio_read64(ocxlpmem->ocxl_afu,
> > + ocxlpmem-
> > >admin_command.data_offset + 0x08 + 0x40 + 0x18,
> > + OCXL_LITTLE_ENDIAN,
> > &args.media_read_count);
>
> MRC is at +0x50
>
> And you're missing CRU, HLD, HSD
>
> > + if (rc)
> > + goto out;
> > +
> > + rc = ocxl_global_mmio_read64(ocxlpmem->ocxl_afu,
> > + ocxlpmem-
> > >admin_command.data_offset + 0x08 + 0x40 + 0x20,
> > + OCXL_LITTLE_ENDIAN,
> > &args.media_write_count);
>
> MWC at +0x58
>
> > + if (rc)
> > + goto out;
> > +
> > + rc = ocxl_global_mmio_read64(ocxlpmem->ocxl_afu,
> > + ocxlpmem-
> > >admin_command.data_offset + 0x08 + 0x40 + 0x28,
> > + OCXL_LITTLE_ENDIAN,
> > &args.cache_hit_count);
>
> CRHC at +0x90
>
> > + if (rc)
> > + goto out;
> > +
> > + rc = ocxl_global_mmio_read64(ocxlpmem->ocxl_afu,
> > + ocxlpmem-
> > >admin_command.data_offset + 0x08 + 0x40 + 0x30,
> > + OCXL_LITTLE_ENDIAN,
> > &args.cache_miss_count);
>
> This field doesn't seem to exist at all in my copy of the spec
>
> > + if (rc)
> > + goto out;
> > +
> > + rc = ocxl_global_mmio_read64(ocxlpmem->ocxl_afu,
> > + ocxlpmem-
> > >admin_command.data_offset + 0x08 + 0x40 + 0x38,
> > + OCXL_LITTLE_ENDIAN,
> > &args.media_read_latency);
>
> Nor this one
>
> > + if (rc)
> > + goto out;
> > +
> > + rc = ocxl_global_mmio_read64(ocxlpmem->ocxl_afu,
> > + ocxlpmem-
> > >admin_command.data_offset + 0x08 + 0x40 + 0x40,
> > + OCXL_LITTLE_ENDIAN,
> > &args.media_write_latency);
>
> Nor this one
>
> > + if (rc)
> > + goto out;
> > +
> > + rc = ocxl_global_mmio_read64(ocxlpmem->ocxl_afu,
> > + ocxlpmem-
> > >admin_command.data_offset + 0x08 + 0x40 + 0x48,
> > + OCXL_LITTLE_ENDIAN,
> > &args.cache_read_latency);
>
> Nor this one
>
> > + if (rc)
> > + goto out;
> > +
> > + rc = ocxl_global_mmio_read64(ocxlpmem->ocxl_afu,
> > + ocxlpmem-
> > >admin_command.data_offset + 0x08 + 0x40 + 0x50,
> > + OCXL_LITTLE_ENDIAN,
> > &args.cache_write_latency);
>
> Nor this one
>
> > + if (rc)
> > + goto out;
> > +
> > + if (copy_to_user(uarg, &args, sizeof(args))) {
> > + rc = -EFAULT;
> > + goto out;
> > + }
> > +
> > + rc = admin_response_handled(ocxlpmem);
> > + if (rc)
> > + goto out;
> > +
> > + rc = 0;
> > + goto out;
>
> Per Fred this pattern isn't common in the kernel, but perhaps this
> is
> just personal taste
>

Ok

> > +
> > +out:
> > + mutex_unlock(&ocxlpmem->admin_command.lock);
> > + return rc;
> > +}
> > +
> > static long file_ioctl(struct file *file, unsigned int cmd,
> > unsigned long args)
> > {
> > struct ocxlpmem *ocxlpmem = file->private_data;
> > @@ -781,6 +961,11 @@ static long file_ioctl(struct file *file,
> > unsigned int cmd, unsigned long args)
> > case IOCTL_OCXL_PMEM_CONTROLLER_DUMP_COMPLETE:
> > rc = ioctl_controller_dump_complete(ocxlpmem);
> > break;
> > +
> > + case IOCTL_OCXL_PMEM_CONTROLLER_STATS:
> > + rc = ioctl_controller_stats(ocxlpmem,
> > + (struct
> > ioctl_ocxl_pmem_controller_stats __user *)args);
> > + break;
> > }
> >
> > return rc;
> > diff --git a/include/uapi/nvdimm/ocxl-pmem.h
> > b/include/uapi/nvdimm/ocxl-pmem.h
> > index d4d8512d03f7..add223aa2fdb 100644
> > --- a/include/uapi/nvdimm/ocxl-pmem.h
> > +++ b/include/uapi/nvdimm/ocxl-pmem.h
> > @@ -50,6 +50,22 @@ struct ioctl_ocxl_pmem_controller_dump_data {
> > __u64 reserved[8];
> > };
> >
> > +struct ioctl_ocxl_pmem_controller_stats {
> > + __u32 reset_count;
> > + __u32 reset_uptime; /* seconds */
> > + __u32 power_on_uptime; /* seconds */
> > + __u64 host_load_count;
> > + __u64 host_store_count;
> > + __u64 media_read_count;
> > + __u64 media_write_count;
> > + __u64 cache_hit_count;
> > + __u64 cache_miss_count;
> > + __u64 media_read_latency; /* nanoseconds */
> > + __u64 media_write_latency; /* nanoseconds */
> > + __u64 cache_read_latency; /* nanoseconds */
> > + __u64 cache_write_latency; /* nanoseconds */
> > +};
> > +
> > /* ioctl numbers */
> > #define OCXL_PMEM_MAGIC 0x5C
> > /* SCM devices */
> > @@ -57,5 +73,6 @@ struct ioctl_ocxl_pmem_controller_dump_data {
> > #define IOCTL_OCXL_PMEM_CONTROLLER_DUMP _IO(OCX
> > L_PMEM_MAGIC, 0x02)
> > #define IOCTL_OCXL_PMEM_CONTROLLER_DUMP_DATA _IOWR(O
> > CXL_PMEM_MAGIC, 0x03, struct ioctl_ocxl_pmem_controller_dump_data)
> > #define IOCTL_OCXL_PMEM_CONTROLLER_DUMP_COMPLETE _IO(OCXL_PMEM_M
> > AGIC, 0x04)
> > +#define IOCTL_OCXL_PMEM_CONTROLLER_STATS _IO(OCXL_PMEM_M
> > AGIC, 0x05)
> >
> > #endif /* _UAPI_OCXL_SCM_H */
> >
--
Alastair D'Silva
Open Source Developer
Linux Technology Centre, IBM Australia
mob: 0423 762 819