2023-10-13 03:42:48

by Alistair Francis

[permalink] [raw]
Subject: [PATCH v9 1/3] PCI/DOE: Rename DOE protocol to feature

DOE r1.1 replaced all occurrences of "protocol" with the term "feature"
or "Data Object Type".

PCIe r6.1 (which was published July 24) incorporated that change.

Rename the existing terms protocol with feature. This patch
also clears up some confusion of type vs protocol/feature, where
previously feature would cover the vid and type pair.

Signed-off-by: Alistair Francis <[email protected]>
Reviewed-by: Jonathan Cameron <[email protected]>
Reviewed-by: Lukas Wunner <[email protected]>
---
v9:
- Rename two more DOE macros
v8:
- Rename prot to feat as well
v7:
- Initial patch


drivers/pci/doe.c | 94 +++++++++++++++++------------------
include/uapi/linux/pci_regs.h | 2 +-
2 files changed, 48 insertions(+), 48 deletions(-)

diff --git a/drivers/pci/doe.c b/drivers/pci/doe.c
index e3aab5edaf70..db2a86edf2e1 100644
--- a/drivers/pci/doe.c
+++ b/drivers/pci/doe.c
@@ -22,7 +22,7 @@

#include "pci.h"

-#define PCI_DOE_PROTOCOL_DISCOVERY 0
+#define PCI_DOE_FEATURE_DISCOVERY 0

/* Timeout of 1 second from 6.30.2 Operation, PCI Spec r6.0 */
#define PCI_DOE_TIMEOUT HZ
@@ -43,7 +43,7 @@
*
* @pdev: PCI device this mailbox belongs to
* @cap_offset: Capability offset
- * @prots: Array of protocols supported (encoded as long values)
+ * @feats: Array of features supported (encoded as long values)
* @wq: Wait queue for work item
* @work_queue: Queue of pci_doe_work items
* @flags: Bit array of PCI_DOE_FLAG_* flags
@@ -51,14 +51,14 @@
struct pci_doe_mb {
struct pci_dev *pdev;
u16 cap_offset;
- struct xarray prots;
+ struct xarray feats;

wait_queue_head_t wq;
struct workqueue_struct *work_queue;
unsigned long flags;
};

-struct pci_doe_protocol {
+struct pci_doe_feature {
u16 vid;
u8 type;
};
@@ -66,7 +66,7 @@ struct pci_doe_protocol {
/**
* struct pci_doe_task - represents a single query/response
*
- * @prot: DOE Protocol
+ * @feat: DOE Feature
* @request_pl: The request payload
* @request_pl_sz: Size of the request payload (bytes)
* @response_pl: The response payload
@@ -78,7 +78,7 @@ struct pci_doe_protocol {
* @doe_mb: Used internally by the mailbox
*/
struct pci_doe_task {
- struct pci_doe_protocol prot;
+ struct pci_doe_feature feat;
const __le32 *request_pl;
size_t request_pl_sz;
__le32 *response_pl;
@@ -171,8 +171,8 @@ static int pci_doe_send_req(struct pci_doe_mb *doe_mb,
length = 0;

/* Write DOE Header */
- val = FIELD_PREP(PCI_DOE_DATA_OBJECT_HEADER_1_VID, task->prot.vid) |
- FIELD_PREP(PCI_DOE_DATA_OBJECT_HEADER_1_TYPE, task->prot.type);
+ val = FIELD_PREP(PCI_DOE_DATA_OBJECT_HEADER_1_VID, task->feat.vid) |
+ FIELD_PREP(PCI_DOE_DATA_OBJECT_HEADER_1_TYPE, task->feat.type);
pci_write_config_dword(pdev, offset + PCI_DOE_WRITE, val);
pci_write_config_dword(pdev, offset + PCI_DOE_WRITE,
FIELD_PREP(PCI_DOE_DATA_OBJECT_HEADER_2_LENGTH,
@@ -217,12 +217,12 @@ static int pci_doe_recv_resp(struct pci_doe_mb *doe_mb, struct pci_doe_task *tas
int i = 0;
u32 val;

- /* Read the first dword to get the protocol */
+ /* Read the first dword to get the feature */
pci_read_config_dword(pdev, offset + PCI_DOE_READ, &val);
- if ((FIELD_GET(PCI_DOE_DATA_OBJECT_HEADER_1_VID, val) != task->prot.vid) ||
- (FIELD_GET(PCI_DOE_DATA_OBJECT_HEADER_1_TYPE, val) != task->prot.type)) {
- dev_err_ratelimited(&pdev->dev, "[%x] expected [VID, Protocol] = [%04x, %02x], got [%04x, %02x]\n",
- doe_mb->cap_offset, task->prot.vid, task->prot.type,
+ if ((FIELD_GET(PCI_DOE_DATA_OBJECT_HEADER_1_VID, val) != task->feat.vid) ||
+ (FIELD_GET(PCI_DOE_DATA_OBJECT_HEADER_1_TYPE, val) != task->feat.type)) {
+ dev_err_ratelimited(&pdev->dev, "[%x] expected [VID, Feature] = [%04x, %02x], got [%04x, %02x]\n",
+ doe_mb->cap_offset, task->feat.vid, task->feat.type,
FIELD_GET(PCI_DOE_DATA_OBJECT_HEADER_1_VID, val),
FIELD_GET(PCI_DOE_DATA_OBJECT_HEADER_1_TYPE, val));
return -EIO;
@@ -384,7 +384,7 @@ static void pci_doe_task_complete(struct pci_doe_task *task)
}

static int pci_doe_discovery(struct pci_doe_mb *doe_mb, u8 *index, u16 *vid,
- u8 *protocol)
+ u8 *feature)
{
u32 request_pl = FIELD_PREP(PCI_DOE_DATA_OBJECT_DISC_REQ_3_INDEX,
*index);
@@ -393,7 +393,7 @@ static int pci_doe_discovery(struct pci_doe_mb *doe_mb, u8 *index, u16 *vid,
u32 response_pl;
int rc;

- rc = pci_doe(doe_mb, PCI_VENDOR_ID_PCI_SIG, PCI_DOE_PROTOCOL_DISCOVERY,
+ rc = pci_doe(doe_mb, PCI_VENDOR_ID_PCI_SIG, PCI_DOE_FEATURE_DISCOVERY,
&request_pl_le, sizeof(request_pl_le),
&response_pl_le, sizeof(response_pl_le));
if (rc < 0)
@@ -404,7 +404,7 @@ static int pci_doe_discovery(struct pci_doe_mb *doe_mb, u8 *index, u16 *vid,

response_pl = le32_to_cpu(response_pl_le);
*vid = FIELD_GET(PCI_DOE_DATA_OBJECT_DISC_RSP_3_VID, response_pl);
- *protocol = FIELD_GET(PCI_DOE_DATA_OBJECT_DISC_RSP_3_PROTOCOL,
+ *feature = FIELD_GET(PCI_DOE_DATA_OBJECT_DISC_RSP_3_TYPE,
response_pl);
*index = FIELD_GET(PCI_DOE_DATA_OBJECT_DISC_RSP_3_NEXT_INDEX,
response_pl);
@@ -412,12 +412,12 @@ static int pci_doe_discovery(struct pci_doe_mb *doe_mb, u8 *index, u16 *vid,
return 0;
}

-static void *pci_doe_xa_prot_entry(u16 vid, u8 prot)
+static void *pci_doe_xa_feat_entry(u16 vid, u8 type)
{
- return xa_mk_value((vid << 8) | prot);
+ return xa_mk_value((vid << 8) | type);
}

-static int pci_doe_cache_protocols(struct pci_doe_mb *doe_mb)
+static int pci_doe_cache_features(struct pci_doe_mb *doe_mb)
{
u8 index = 0;
u8 xa_idx = 0;
@@ -425,18 +425,18 @@ static int pci_doe_cache_protocols(struct pci_doe_mb *doe_mb)
do {
int rc;
u16 vid;
- u8 prot;
+ u8 type;

- rc = pci_doe_discovery(doe_mb, &index, &vid, &prot);
+ rc = pci_doe_discovery(doe_mb, &index, &vid, &type);
if (rc)
return rc;

pci_dbg(doe_mb->pdev,
- "[%x] Found protocol %d vid: %x prot: %x\n",
- doe_mb->cap_offset, xa_idx, vid, prot);
+ "[%x] Found feature %d vid: %x type: %x\n",
+ doe_mb->cap_offset, xa_idx, vid, type);

- rc = xa_insert(&doe_mb->prots, xa_idx++,
- pci_doe_xa_prot_entry(vid, prot), GFP_KERNEL);
+ rc = xa_insert(&doe_mb->feats, xa_idx++,
+ pci_doe_xa_feat_entry(vid, type), GFP_KERNEL);
if (rc)
return rc;
} while (index);
@@ -460,7 +460,7 @@ static void pci_doe_cancel_tasks(struct pci_doe_mb *doe_mb)
* @pdev: PCI device to create the DOE mailbox for
* @cap_offset: Offset of the DOE mailbox
*
- * Create a single mailbox object to manage the mailbox protocol at the
+ * Create a single mailbox object to manage the mailbox feature at the
* cap_offset specified.
*
* RETURNS: created mailbox object on success
@@ -479,7 +479,7 @@ static struct pci_doe_mb *pci_doe_create_mb(struct pci_dev *pdev,
doe_mb->pdev = pdev;
doe_mb->cap_offset = cap_offset;
init_waitqueue_head(&doe_mb->wq);
- xa_init(&doe_mb->prots);
+ xa_init(&doe_mb->feats);

doe_mb->work_queue = alloc_ordered_workqueue("%s %s DOE [%x]", 0,
dev_bus_name(&pdev->dev),
@@ -502,11 +502,11 @@ static struct pci_doe_mb *pci_doe_create_mb(struct pci_dev *pdev,

/*
* The state machine and the mailbox should be in sync now;
- * Use the mailbox to query protocols.
+ * Use the mailbox to query features.
*/
- rc = pci_doe_cache_protocols(doe_mb);
+ rc = pci_doe_cache_features(doe_mb);
if (rc) {
- pci_err(pdev, "[%x] failed to cache protocols : %d\n",
+ pci_err(pdev, "[%x] failed to cache features : %d\n",
doe_mb->cap_offset, rc);
goto err_cancel;
}
@@ -515,7 +515,7 @@ static struct pci_doe_mb *pci_doe_create_mb(struct pci_dev *pdev,

err_cancel:
pci_doe_cancel_tasks(doe_mb);
- xa_destroy(&doe_mb->prots);
+ xa_destroy(&doe_mb->feats);
err_destroy_wq:
destroy_workqueue(doe_mb->work_queue);
err_free:
@@ -533,31 +533,31 @@ static struct pci_doe_mb *pci_doe_create_mb(struct pci_dev *pdev,
static void pci_doe_destroy_mb(struct pci_doe_mb *doe_mb)
{
pci_doe_cancel_tasks(doe_mb);
- xa_destroy(&doe_mb->prots);
+ xa_destroy(&doe_mb->feats);
destroy_workqueue(doe_mb->work_queue);
kfree(doe_mb);
}

/**
- * pci_doe_supports_prot() - Return if the DOE instance supports the given
- * protocol
+ * pci_doe_supports_feat() - Return if the DOE instance supports the given
+ * feature
* @doe_mb: DOE mailbox capability to query
- * @vid: Protocol Vendor ID
- * @type: Protocol type
+ * @vid: Feature Vendor ID
+ * @type: Feature type
*
- * RETURNS: True if the DOE mailbox supports the protocol specified
+ * RETURNS: True if the DOE mailbox supports the feature specified
*/
-static bool pci_doe_supports_prot(struct pci_doe_mb *doe_mb, u16 vid, u8 type)
+static bool pci_doe_supports_feat(struct pci_doe_mb *doe_mb, u16 vid, u8 type)
{
unsigned long index;
void *entry;

- /* The discovery protocol must always be supported */
- if (vid == PCI_VENDOR_ID_PCI_SIG && type == PCI_DOE_PROTOCOL_DISCOVERY)
+ /* The discovery feature must always be supported */
+ if (vid == PCI_VENDOR_ID_PCI_SIG && type == PCI_DOE_FEATURE_DISCOVERY)
return true;

- xa_for_each(&doe_mb->prots, index, entry)
- if (entry == pci_doe_xa_prot_entry(vid, type))
+ xa_for_each(&doe_mb->feats, index, entry)
+ if (entry == pci_doe_xa_feat_entry(vid, type))
return true;

return false;
@@ -585,7 +585,7 @@ static bool pci_doe_supports_prot(struct pci_doe_mb *doe_mb, u16 vid, u8 type)
static int pci_doe_submit_task(struct pci_doe_mb *doe_mb,
struct pci_doe_task *task)
{
- if (!pci_doe_supports_prot(doe_mb, task->prot.vid, task->prot.type))
+ if (!pci_doe_supports_feat(doe_mb, task->feat.vid, task->feat.type))
return -EINVAL;

if (test_bit(PCI_DOE_FLAG_DEAD, &doe_mb->flags))
@@ -631,8 +631,8 @@ int pci_doe(struct pci_doe_mb *doe_mb, u16 vendor, u8 type,
{
DECLARE_COMPLETION_ONSTACK(c);
struct pci_doe_task task = {
- .prot.vid = vendor,
- .prot.type = type,
+ .feat.vid = vendor,
+ .feat.type = type,
.request_pl = request,
.request_pl_sz = request_sz,
.response_pl = response,
@@ -659,7 +659,7 @@ EXPORT_SYMBOL_GPL(pci_doe);
* @vendor: Vendor ID
* @type: Data Object Type
*
- * Find first DOE mailbox of a PCI device which supports the given protocol.
+ * Find first DOE mailbox of a PCI device which supports the given feature.
*
* RETURNS: Pointer to the DOE mailbox or NULL if none was found.
*/
@@ -670,7 +670,7 @@ struct pci_doe_mb *pci_find_doe_mailbox(struct pci_dev *pdev, u16 vendor,
unsigned long index;

xa_for_each(&pdev->doe_mbs, index, doe_mb)
- if (pci_doe_supports_prot(doe_mb, vendor, type))
+ if (pci_doe_supports_feat(doe_mb, vendor, type))
return doe_mb;

return NULL;
diff --git a/include/uapi/linux/pci_regs.h b/include/uapi/linux/pci_regs.h
index e5f558d96493..26ca62bd3473 100644
--- a/include/uapi/linux/pci_regs.h
+++ b/include/uapi/linux/pci_regs.h
@@ -1131,7 +1131,7 @@

#define PCI_DOE_DATA_OBJECT_DISC_REQ_3_INDEX 0x000000ff
#define PCI_DOE_DATA_OBJECT_DISC_RSP_3_VID 0x0000ffff
-#define PCI_DOE_DATA_OBJECT_DISC_RSP_3_PROTOCOL 0x00ff0000
+#define PCI_DOE_DATA_OBJECT_DISC_RSP_3_TYPE 0x00ff0000
#define PCI_DOE_DATA_OBJECT_DISC_RSP_3_NEXT_INDEX 0xff000000

#endif /* LINUX_PCI_REGS_H */
--
2.40.1


2023-10-13 03:43:12

by Alistair Francis

[permalink] [raw]
Subject: [PATCH v9 3/3] PCI/DOE: Allow enabling DOE without CXL

PCIe devices (not CXL) can support DOE as well, so allow DOE to be
enabled even if CXL isn't.

Signed-off-by: Alistair Francis <[email protected]>
Reviewed-by: Jonathan Cameron <[email protected]>
---
v9:
- No changes
v8:
- No changes
v7:
- Initial patch

drivers/pci/Kconfig | 5 ++++-
1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/drivers/pci/Kconfig b/drivers/pci/Kconfig
index e9ae66cc4189..672a1f5178c6 100644
--- a/drivers/pci/Kconfig
+++ b/drivers/pci/Kconfig
@@ -117,7 +117,10 @@ config PCI_ATS
bool

config PCI_DOE
- bool
+ bool "Enable PCI Data Object Exchange (DOE) support"
+ help
+ Say Y here if you want be able to communicate with PCIe DOE
+ mailboxes.

config PCI_ECAM
bool
--
2.40.1

2023-10-13 03:43:20

by Alistair Francis

[permalink] [raw]
Subject: [PATCH v9 2/3] PCI/DOE: Expose the DOE features via sysfs

The PCIe 6 specification added support for the Data Object Exchange (DOE).
When DOE is supported the DOE Discovery Feature must be implemented per
PCIe r6.1 sec 6.30.1.1. The protocol allows a requester to obtain
information about the other DOE features supported by the device.

The kernel is already querying the DOE features supported and cacheing
the values. Expose the values in sysfs to allow user space to
determine which DOE features are supported by the PCIe device.

By exposing the information to userspace tools like lspci can relay the
information to users. By listing all of the supported features we can
allow userspace to parse the list, which might include
vendor specific features as well as yet to be supported features.

Signed-off-by: Alistair Francis <[email protected]>
Reviewed-by: Jonathan Cameron <[email protected]>
---
v9:
- Add a teardown function
- Rename functions to be clearer
- Tidy up the commit message
- Remove #ifdef from header
v8:
- Inlucde an example in the docs
- Fixup removing a file that wasn't added
- Remove a blank line
v7:
- Fixup the #ifdefs to keep the test robot happy
v6:
- Use "feature" instead of protocol
- Don't use any devm_* functions
- Add two more patches to the series
v5:
- Return the file name as the file contents
- Code cleanups and simplifications
v4:
- Fixup typos in the documentation
- Make it clear that the file names contain the information
- Small code cleanups
- Remove most #ifdefs
- Remove extra NULL assignment
v3:
- Expose each DOE feature as a separate file
v2:
- Add documentation
- Code cleanups

Documentation/ABI/testing/sysfs-bus-pci | 24 +++++
drivers/pci/doe.c | 134 ++++++++++++++++++++++++
drivers/pci/pci-sysfs.c | 14 +++
drivers/pci/pci.h | 1 +
include/linux/pci-doe.h | 2 +
5 files changed, 175 insertions(+)

diff --git a/Documentation/ABI/testing/sysfs-bus-pci b/Documentation/ABI/testing/sysfs-bus-pci
index ecf47559f495..66d9d66e3584 100644
--- a/Documentation/ABI/testing/sysfs-bus-pci
+++ b/Documentation/ABI/testing/sysfs-bus-pci
@@ -500,3 +500,27 @@ Description:
console drivers from the device. Raw users of pci-sysfs
resourceN attributes must be terminated prior to resizing.
Success of the resizing operation is not guaranteed.
+
+What: /sys/bus/pci/devices/.../doe_features
+Date: October 2023
+Contact: Linux PCI developers <[email protected]>
+Description:
+ This directory contains a list of the supported
+ Data Object Exchange (DOE) features. The feature values are in
+ the file name. The contents of each file are the same as the
+ name.
+
+ The value comes from the device and specifies the vendor and
+ data object type supported. The lower (RHS of the colon) is
+ the data object type in hex. The upper (LHS of the colon)
+ is the vendor ID.
+
+ As all DOE devices must support the DOE discovery protocol, if
+ DOE is supported you will at least see this file, with this
+ contents
+
+ # cat doe_features/0x0001:00
+ 0x0001:00
+
+ If the device supports other protocols you will see other files
+ as well.
diff --git a/drivers/pci/doe.c b/drivers/pci/doe.c
index db2a86edf2e1..1ffc5441e6d1 100644
--- a/drivers/pci/doe.c
+++ b/drivers/pci/doe.c
@@ -47,6 +47,7 @@
* @wq: Wait queue for work item
* @work_queue: Queue of pci_doe_work items
* @flags: Bit array of PCI_DOE_FLAG_* flags
+ * @sysfs_attrs: Array of sysfs device attributes
*/
struct pci_doe_mb {
struct pci_dev *pdev;
@@ -56,6 +57,10 @@ struct pci_doe_mb {
wait_queue_head_t wq;
struct workqueue_struct *work_queue;
unsigned long flags;
+
+#ifdef CONFIG_SYSFS
+ struct device_attribute *sysfs_attrs;
+#endif
};

struct pci_doe_feature {
@@ -92,6 +97,135 @@ struct pci_doe_task {
struct pci_doe_mb *doe_mb;
};

+#ifdef CONFIG_SYSFS
+static umode_t pci_doe_sysfs_attr_is_visible(struct kobject *kobj,
+ struct attribute *a, int n)
+{
+ struct pci_dev *pdev = to_pci_dev(kobj_to_dev(kobj));
+ struct pci_doe_mb *doe_mb;
+ unsigned long index, j;
+ void *entry;
+
+ xa_for_each(&pdev->doe_mbs, index, doe_mb) {
+ xa_for_each(&doe_mb->feats, j, entry)
+ return a->mode;
+ }
+
+ return 0;
+}
+
+static struct attribute *pci_dev_doe_feature_attrs[] = {
+ NULL,
+};
+
+const struct attribute_group pci_dev_doe_feature_group = {
+ .name = "doe_features",
+ .attrs = pci_dev_doe_feature_attrs,
+ .is_visible = pci_doe_sysfs_attr_is_visible,
+};
+
+static ssize_t pci_doe_sysfs_feature_show(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ return sysfs_emit(buf, "%s\n", attr->attr.name);
+}
+
+static void pci_doe_sysfs_feature_remove(struct pci_dev *pdev,
+ struct pci_doe_mb *doe_mb)
+{
+ struct device *dev = &pdev->dev;
+ struct device_attribute *attrs = doe_mb->sysfs_attrs;
+ unsigned long i;
+ void *entry;
+
+ if (!attrs)
+ return;
+
+ doe_mb->sysfs_attrs = NULL;
+ xa_for_each(&doe_mb->feats, i, entry) {
+ if (attrs[i].show)
+ sysfs_remove_file_from_group(&dev->kobj, &attrs[i].attr,
+ pci_dev_doe_feature_group.name);
+ kfree(attrs[i].attr.name);
+ }
+ kfree(attrs);
+}
+
+static int pci_doe_sysfs_feature_populate(struct pci_dev *pdev,
+ struct pci_doe_mb *doe_mb)
+{
+ struct device *dev = &pdev->dev;
+ struct device_attribute *attrs;
+ unsigned long num_features = 0;
+ unsigned long vid, type;
+ unsigned long i;
+ void *entry;
+ int ret;
+
+ xa_for_each(&doe_mb->feats, i, entry)
+ num_features++;
+
+ attrs = kcalloc(num_features, sizeof(*attrs), GFP_KERNEL);
+ if (!attrs)
+ return -ENOMEM;
+
+ doe_mb->sysfs_attrs = attrs;
+ xa_for_each(&doe_mb->feats, i, entry) {
+ sysfs_attr_init(&attrs[i].attr);
+ vid = xa_to_value(entry) >> 8;
+ type = xa_to_value(entry) & 0xFF;
+ attrs[i].attr.name = kasprintf(GFP_KERNEL,
+ "0x%04lX:%02lX", vid, type);
+ if (!attrs[i].attr.name) {
+ ret = -ENOMEM;
+ goto fail;
+ }
+
+ attrs[i].attr.mode = 0444;
+ attrs[i].show = pci_doe_sysfs_feature_show;
+
+ ret = sysfs_add_file_to_group(&dev->kobj, &attrs[i].attr,
+ pci_dev_doe_feature_group.name);
+ if (ret) {
+ attrs[i].show = NULL;
+ goto fail;
+ }
+ }
+
+ return 0;
+
+fail:
+ pci_doe_sysfs_feature_remove(pdev, doe_mb);
+ return ret;
+}
+
+void pci_doe_sysfs_teardown(struct pci_dev *pdev)
+{
+ struct pci_doe_mb *doe_mb;
+ unsigned long index;
+
+ xa_for_each(&pdev->doe_mbs, index, doe_mb) {
+ pci_doe_sysfs_feature_remove(pdev, doe_mb);
+ }
+}
+
+int pci_doe_sysfs_init(struct pci_dev *pdev)
+{
+ struct pci_doe_mb *doe_mb;
+ unsigned long index;
+ int ret;
+
+ xa_for_each(&pdev->doe_mbs, index, doe_mb) {
+ ret = pci_doe_sysfs_feature_populate(pdev, doe_mb);
+ if (ret)
+ return ret;
+ }
+
+ return 0;
+}
+#endif
+
static int pci_doe_wait(struct pci_doe_mb *doe_mb, unsigned long timeout)
{
if (wait_event_timeout(doe_mb->wq,
diff --git a/drivers/pci/pci-sysfs.c b/drivers/pci/pci-sysfs.c
index d9eede2dbc0e..64bf765df5e2 100644
--- a/drivers/pci/pci-sysfs.c
+++ b/drivers/pci/pci-sysfs.c
@@ -16,6 +16,7 @@
#include <linux/kernel.h>
#include <linux/sched.h>
#include <linux/pci.h>
+#include <linux/pci-doe.h>
#include <linux/stat.h>
#include <linux/export.h>
#include <linux/topology.h>
@@ -1153,6 +1154,10 @@ static void pci_remove_resource_files(struct pci_dev *pdev)
{
int i;

+ if (IS_ENABLED(CONFIG_PCI_DOE)) {
+ pci_doe_sysfs_teardown(pdev);
+ }
+
for (i = 0; i < PCI_STD_NUM_BARS; i++) {
struct bin_attribute *res_attr;

@@ -1230,6 +1235,12 @@ static int pci_create_resource_files(struct pci_dev *pdev)
int i;
int retval;

+ if (IS_ENABLED(CONFIG_PCI_DOE)) {
+ retval = pci_doe_sysfs_init(pdev);
+ if (retval)
+ return retval;
+ }
+
/* Expose the PCI resources from this device as files */
for (i = 0; i < PCI_STD_NUM_BARS; i++) {

@@ -1655,6 +1666,9 @@ static const struct attribute_group *pci_dev_attr_groups[] = {
#endif
#ifdef CONFIG_PCIEASPM
&aspm_ctrl_attr_group,
+#endif
+#ifdef CONFIG_PCI_DOE
+ &pci_dev_doe_feature_group,
#endif
NULL,
};
diff --git a/drivers/pci/pci.h b/drivers/pci/pci.h
index 39a8932dc340..83065e07c491 100644
--- a/drivers/pci/pci.h
+++ b/drivers/pci/pci.h
@@ -186,6 +186,7 @@ extern const struct attribute_group *pci_dev_groups[];
extern const struct attribute_group *pcibus_groups[];
extern const struct device_type pci_dev_type;
extern const struct attribute_group *pci_bus_groups[];
+extern const struct attribute_group pci_dev_doe_feature_group;

extern unsigned long pci_hotplug_io_size;
extern unsigned long pci_hotplug_mmio_size;
diff --git a/include/linux/pci-doe.h b/include/linux/pci-doe.h
index 1f14aed4354b..c5529ae604ff 100644
--- a/include/linux/pci-doe.h
+++ b/include/linux/pci-doe.h
@@ -22,4 +22,6 @@ int pci_doe(struct pci_doe_mb *doe_mb, u16 vendor, u8 type,
const void *request, size_t request_sz,
void *response, size_t response_sz);

+int pci_doe_sysfs_init(struct pci_dev *pci_dev);
+void pci_doe_sysfs_teardown(struct pci_dev *pdev);
#endif
--
2.40.1

2023-10-17 08:34:38

by Lukas Wunner

[permalink] [raw]
Subject: Re: [PATCH v9 2/3] PCI/DOE: Expose the DOE features via sysfs

On Fri, Oct 13, 2023 at 01:41:57PM +1000, Alistair Francis wrote:
> +#ifdef CONFIG_SYSFS
> +static umode_t pci_doe_sysfs_attr_is_visible(struct kobject *kobj,
> + struct attribute *a, int n)
> +{
> + struct pci_dev *pdev = to_pci_dev(kobj_to_dev(kobj));
> + struct pci_doe_mb *doe_mb;
> + unsigned long index, j;
> + void *entry;
> +
> + xa_for_each(&pdev->doe_mbs, index, doe_mb) {
> + xa_for_each(&doe_mb->feats, j, entry)
> + return a->mode;
> + }
> +
> + return 0;
> +}

Out of curiosity: Does this method prevent creation of a
"doe_features" directory for devices which don't have any
DOE mailboxes?

(If it does, a code comment explaining that might be helpful.)


> +const struct attribute_group pci_dev_doe_feature_group = {
> + .name = "doe_features",
> + .attrs = pci_dev_doe_feature_attrs,
> + .is_visible = pci_doe_sysfs_attr_is_visible,
> +};

Nit: Wondering why the "=" is aligned for .name and .attrs
but not for .is_visible?


> +static void pci_doe_sysfs_feature_remove(struct pci_dev *pdev,
> + struct pci_doe_mb *doe_mb)
> +{
> + struct device *dev = &pdev->dev;
> + struct device_attribute *attrs = doe_mb->sysfs_attrs;
> + unsigned long i;
> + void *entry;

Nit: Inverse Christmas tree?


> +static int pci_doe_sysfs_feature_populate(struct pci_dev *pdev,
> + struct pci_doe_mb *doe_mb)
> +{
> + struct device *dev = &pdev->dev;
> + struct device_attribute *attrs;
> + unsigned long num_features = 0;
> + unsigned long vid, type;
> + unsigned long i;
> + void *entry;
> + int ret;
> +
> + xa_for_each(&doe_mb->feats, i, entry)
> + num_features++;
> +
> + attrs = kcalloc(num_features, sizeof(*attrs), GFP_KERNEL);
> + if (!attrs)
> + return -ENOMEM;
> +
> + doe_mb->sysfs_attrs = attrs;
> + xa_for_each(&doe_mb->feats, i, entry) {
> + sysfs_attr_init(&attrs[i].attr);
> + vid = xa_to_value(entry) >> 8;
> + type = xa_to_value(entry) & 0xFF;
> + attrs[i].attr.name = kasprintf(GFP_KERNEL,
> + "0x%04lX:%02lX", vid, type);

Nit: Capital X conversion specifier will result in upper case hex
characters in filename and contents, whereas existing sysfs attributes
such as "vendor", "device" contain lower case hex characters.

Might want to consider lower case x for consistency.


> +void pci_doe_sysfs_teardown(struct pci_dev *pdev)
> +{
> + struct pci_doe_mb *doe_mb;
> + unsigned long index;
> +
> + xa_for_each(&pdev->doe_mbs, index, doe_mb) {
> + pci_doe_sysfs_feature_remove(pdev, doe_mb);
> + }

Nit: Curly braces not necessary.


> @@ -1153,6 +1154,10 @@ static void pci_remove_resource_files(struct pci_dev *pdev)
> {
> int i;
>
> + if (IS_ENABLED(CONFIG_PCI_DOE)) {
> + pci_doe_sysfs_teardown(pdev);
> + }

Nit: Curly braces not necessary.


> @@ -1230,6 +1235,12 @@ static int pci_create_resource_files(struct pci_dev *pdev)
> int i;
> int retval;
>
> + if (IS_ENABLED(CONFIG_PCI_DOE)) {
> + retval = pci_doe_sysfs_init(pdev);
> + if (retval)
> + return retval;
> + }
> +
> /* Expose the PCI resources from this device as files */
> for (i = 0; i < PCI_STD_NUM_BARS; i++) {

I think this needs to be added to pci_create_sysfs_dev_files() instead.

pci_create_resource_files() only deals with creation of resource files,
as the name implies, which is unrelated to DOE features.

Worse, pci_create_resource_files() is also called from
pci_dev_resource_resize_attr(), i.e. every time user space
writes to the "resource##n##_resize" attributes.

Similarly, the call to pci_doe_sysfs_teardown() belongs in
pci_remove_sysfs_dev_files().

Thanks,

Lukas

2023-10-18 22:24:38

by Bjorn Helgaas

[permalink] [raw]
Subject: Re: [PATCH v9 1/3] PCI/DOE: Rename DOE protocol to feature

On Fri, Oct 13, 2023 at 01:41:56PM +1000, Alistair Francis wrote:
> DOE r1.1 replaced all occurrences of "protocol" with the term "feature"
> or "Data Object Type".
>
> PCIe r6.1 (which was published July 24) incorporated that change.
>
> Rename the existing terms protocol with feature. This patch
> also clears up some confusion of type vs protocol/feature, where
> previously feature would cover the vid and type pair.

Would you mind splitting this up? It sounds like:

- A purely textual change from "protocol" to "feature", and

- Clearing up some confusion about a vendor ID and a type?

I don't want to have sort through the cosmetic changes to find the
confusion fix, which sounds like it should have a closer look.

The words "this patch also" in a commit log is always a hint that
there are two separate things going on that might not need to be in
the same patch.

> Signed-off-by: Alistair Francis <[email protected]>
> Reviewed-by: Jonathan Cameron <[email protected]>
> Reviewed-by: Lukas Wunner <[email protected]>
> ---
> v9:
> - Rename two more DOE macros
> v8:
> - Rename prot to feat as well
> v7:
> - Initial patch
>
>
> drivers/pci/doe.c | 94 +++++++++++++++++------------------
> include/uapi/linux/pci_regs.h | 2 +-
> 2 files changed, 48 insertions(+), 48 deletions(-)
>
> diff --git a/drivers/pci/doe.c b/drivers/pci/doe.c
> index e3aab5edaf70..db2a86edf2e1 100644
> --- a/drivers/pci/doe.c
> +++ b/drivers/pci/doe.c
> @@ -22,7 +22,7 @@
>
> #include "pci.h"
>
> -#define PCI_DOE_PROTOCOL_DISCOVERY 0
> +#define PCI_DOE_FEATURE_DISCOVERY 0
>
> /* Timeout of 1 second from 6.30.2 Operation, PCI Spec r6.0 */
> #define PCI_DOE_TIMEOUT HZ
> @@ -43,7 +43,7 @@
> *
> * @pdev: PCI device this mailbox belongs to
> * @cap_offset: Capability offset
> - * @prots: Array of protocols supported (encoded as long values)
> + * @feats: Array of features supported (encoded as long values)
> * @wq: Wait queue for work item
> * @work_queue: Queue of pci_doe_work items
> * @flags: Bit array of PCI_DOE_FLAG_* flags
> @@ -51,14 +51,14 @@
> struct pci_doe_mb {
> struct pci_dev *pdev;
> u16 cap_offset;
> - struct xarray prots;
> + struct xarray feats;
>
> wait_queue_head_t wq;
> struct workqueue_struct *work_queue;
> unsigned long flags;
> };
>
> -struct pci_doe_protocol {
> +struct pci_doe_feature {
> u16 vid;
> u8 type;
> };
> @@ -66,7 +66,7 @@ struct pci_doe_protocol {
> /**
> * struct pci_doe_task - represents a single query/response
> *
> - * @prot: DOE Protocol
> + * @feat: DOE Feature
> * @request_pl: The request payload
> * @request_pl_sz: Size of the request payload (bytes)
> * @response_pl: The response payload
> @@ -78,7 +78,7 @@ struct pci_doe_protocol {
> * @doe_mb: Used internally by the mailbox
> */
> struct pci_doe_task {
> - struct pci_doe_protocol prot;
> + struct pci_doe_feature feat;
> const __le32 *request_pl;
> size_t request_pl_sz;
> __le32 *response_pl;
> @@ -171,8 +171,8 @@ static int pci_doe_send_req(struct pci_doe_mb *doe_mb,
> length = 0;
>
> /* Write DOE Header */
> - val = FIELD_PREP(PCI_DOE_DATA_OBJECT_HEADER_1_VID, task->prot.vid) |
> - FIELD_PREP(PCI_DOE_DATA_OBJECT_HEADER_1_TYPE, task->prot.type);
> + val = FIELD_PREP(PCI_DOE_DATA_OBJECT_HEADER_1_VID, task->feat.vid) |
> + FIELD_PREP(PCI_DOE_DATA_OBJECT_HEADER_1_TYPE, task->feat.type);
> pci_write_config_dword(pdev, offset + PCI_DOE_WRITE, val);
> pci_write_config_dword(pdev, offset + PCI_DOE_WRITE,
> FIELD_PREP(PCI_DOE_DATA_OBJECT_HEADER_2_LENGTH,
> @@ -217,12 +217,12 @@ static int pci_doe_recv_resp(struct pci_doe_mb *doe_mb, struct pci_doe_task *tas
> int i = 0;
> u32 val;
>
> - /* Read the first dword to get the protocol */
> + /* Read the first dword to get the feature */
> pci_read_config_dword(pdev, offset + PCI_DOE_READ, &val);
> - if ((FIELD_GET(PCI_DOE_DATA_OBJECT_HEADER_1_VID, val) != task->prot.vid) ||
> - (FIELD_GET(PCI_DOE_DATA_OBJECT_HEADER_1_TYPE, val) != task->prot.type)) {
> - dev_err_ratelimited(&pdev->dev, "[%x] expected [VID, Protocol] = [%04x, %02x], got [%04x, %02x]\n",
> - doe_mb->cap_offset, task->prot.vid, task->prot.type,
> + if ((FIELD_GET(PCI_DOE_DATA_OBJECT_HEADER_1_VID, val) != task->feat.vid) ||
> + (FIELD_GET(PCI_DOE_DATA_OBJECT_HEADER_1_TYPE, val) != task->feat.type)) {
> + dev_err_ratelimited(&pdev->dev, "[%x] expected [VID, Feature] = [%04x, %02x], got [%04x, %02x]\n",
> + doe_mb->cap_offset, task->feat.vid, task->feat.type,
> FIELD_GET(PCI_DOE_DATA_OBJECT_HEADER_1_VID, val),
> FIELD_GET(PCI_DOE_DATA_OBJECT_HEADER_1_TYPE, val));
> return -EIO;
> @@ -384,7 +384,7 @@ static void pci_doe_task_complete(struct pci_doe_task *task)
> }
>
> static int pci_doe_discovery(struct pci_doe_mb *doe_mb, u8 *index, u16 *vid,
> - u8 *protocol)
> + u8 *feature)
> {
> u32 request_pl = FIELD_PREP(PCI_DOE_DATA_OBJECT_DISC_REQ_3_INDEX,
> *index);
> @@ -393,7 +393,7 @@ static int pci_doe_discovery(struct pci_doe_mb *doe_mb, u8 *index, u16 *vid,
> u32 response_pl;
> int rc;
>
> - rc = pci_doe(doe_mb, PCI_VENDOR_ID_PCI_SIG, PCI_DOE_PROTOCOL_DISCOVERY,
> + rc = pci_doe(doe_mb, PCI_VENDOR_ID_PCI_SIG, PCI_DOE_FEATURE_DISCOVERY,
> &request_pl_le, sizeof(request_pl_le),
> &response_pl_le, sizeof(response_pl_le));
> if (rc < 0)
> @@ -404,7 +404,7 @@ static int pci_doe_discovery(struct pci_doe_mb *doe_mb, u8 *index, u16 *vid,
>
> response_pl = le32_to_cpu(response_pl_le);
> *vid = FIELD_GET(PCI_DOE_DATA_OBJECT_DISC_RSP_3_VID, response_pl);
> - *protocol = FIELD_GET(PCI_DOE_DATA_OBJECT_DISC_RSP_3_PROTOCOL,
> + *feature = FIELD_GET(PCI_DOE_DATA_OBJECT_DISC_RSP_3_TYPE,
> response_pl);
> *index = FIELD_GET(PCI_DOE_DATA_OBJECT_DISC_RSP_3_NEXT_INDEX,
> response_pl);
> @@ -412,12 +412,12 @@ static int pci_doe_discovery(struct pci_doe_mb *doe_mb, u8 *index, u16 *vid,
> return 0;
> }
>
> -static void *pci_doe_xa_prot_entry(u16 vid, u8 prot)
> +static void *pci_doe_xa_feat_entry(u16 vid, u8 type)
> {
> - return xa_mk_value((vid << 8) | prot);
> + return xa_mk_value((vid << 8) | type);
> }
>
> -static int pci_doe_cache_protocols(struct pci_doe_mb *doe_mb)
> +static int pci_doe_cache_features(struct pci_doe_mb *doe_mb)
> {
> u8 index = 0;
> u8 xa_idx = 0;
> @@ -425,18 +425,18 @@ static int pci_doe_cache_protocols(struct pci_doe_mb *doe_mb)
> do {
> int rc;
> u16 vid;
> - u8 prot;
> + u8 type;
>
> - rc = pci_doe_discovery(doe_mb, &index, &vid, &prot);
> + rc = pci_doe_discovery(doe_mb, &index, &vid, &type);
> if (rc)
> return rc;
>
> pci_dbg(doe_mb->pdev,
> - "[%x] Found protocol %d vid: %x prot: %x\n",
> - doe_mb->cap_offset, xa_idx, vid, prot);
> + "[%x] Found feature %d vid: %x type: %x\n",
> + doe_mb->cap_offset, xa_idx, vid, type);
>
> - rc = xa_insert(&doe_mb->prots, xa_idx++,
> - pci_doe_xa_prot_entry(vid, prot), GFP_KERNEL);
> + rc = xa_insert(&doe_mb->feats, xa_idx++,
> + pci_doe_xa_feat_entry(vid, type), GFP_KERNEL);
> if (rc)
> return rc;
> } while (index);
> @@ -460,7 +460,7 @@ static void pci_doe_cancel_tasks(struct pci_doe_mb *doe_mb)
> * @pdev: PCI device to create the DOE mailbox for
> * @cap_offset: Offset of the DOE mailbox
> *
> - * Create a single mailbox object to manage the mailbox protocol at the
> + * Create a single mailbox object to manage the mailbox feature at the
> * cap_offset specified.
> *
> * RETURNS: created mailbox object on success
> @@ -479,7 +479,7 @@ static struct pci_doe_mb *pci_doe_create_mb(struct pci_dev *pdev,
> doe_mb->pdev = pdev;
> doe_mb->cap_offset = cap_offset;
> init_waitqueue_head(&doe_mb->wq);
> - xa_init(&doe_mb->prots);
> + xa_init(&doe_mb->feats);
>
> doe_mb->work_queue = alloc_ordered_workqueue("%s %s DOE [%x]", 0,
> dev_bus_name(&pdev->dev),
> @@ -502,11 +502,11 @@ static struct pci_doe_mb *pci_doe_create_mb(struct pci_dev *pdev,
>
> /*
> * The state machine and the mailbox should be in sync now;
> - * Use the mailbox to query protocols.
> + * Use the mailbox to query features.
> */
> - rc = pci_doe_cache_protocols(doe_mb);
> + rc = pci_doe_cache_features(doe_mb);
> if (rc) {
> - pci_err(pdev, "[%x] failed to cache protocols : %d\n",
> + pci_err(pdev, "[%x] failed to cache features : %d\n",
> doe_mb->cap_offset, rc);
> goto err_cancel;
> }
> @@ -515,7 +515,7 @@ static struct pci_doe_mb *pci_doe_create_mb(struct pci_dev *pdev,
>
> err_cancel:
> pci_doe_cancel_tasks(doe_mb);
> - xa_destroy(&doe_mb->prots);
> + xa_destroy(&doe_mb->feats);
> err_destroy_wq:
> destroy_workqueue(doe_mb->work_queue);
> err_free:
> @@ -533,31 +533,31 @@ static struct pci_doe_mb *pci_doe_create_mb(struct pci_dev *pdev,
> static void pci_doe_destroy_mb(struct pci_doe_mb *doe_mb)
> {
> pci_doe_cancel_tasks(doe_mb);
> - xa_destroy(&doe_mb->prots);
> + xa_destroy(&doe_mb->feats);
> destroy_workqueue(doe_mb->work_queue);
> kfree(doe_mb);
> }
>
> /**
> - * pci_doe_supports_prot() - Return if the DOE instance supports the given
> - * protocol
> + * pci_doe_supports_feat() - Return if the DOE instance supports the given
> + * feature
> * @doe_mb: DOE mailbox capability to query
> - * @vid: Protocol Vendor ID
> - * @type: Protocol type
> + * @vid: Feature Vendor ID
> + * @type: Feature type
> *
> - * RETURNS: True if the DOE mailbox supports the protocol specified
> + * RETURNS: True if the DOE mailbox supports the feature specified
> */
> -static bool pci_doe_supports_prot(struct pci_doe_mb *doe_mb, u16 vid, u8 type)
> +static bool pci_doe_supports_feat(struct pci_doe_mb *doe_mb, u16 vid, u8 type)
> {
> unsigned long index;
> void *entry;
>
> - /* The discovery protocol must always be supported */
> - if (vid == PCI_VENDOR_ID_PCI_SIG && type == PCI_DOE_PROTOCOL_DISCOVERY)
> + /* The discovery feature must always be supported */
> + if (vid == PCI_VENDOR_ID_PCI_SIG && type == PCI_DOE_FEATURE_DISCOVERY)
> return true;
>
> - xa_for_each(&doe_mb->prots, index, entry)
> - if (entry == pci_doe_xa_prot_entry(vid, type))
> + xa_for_each(&doe_mb->feats, index, entry)
> + if (entry == pci_doe_xa_feat_entry(vid, type))
> return true;
>
> return false;
> @@ -585,7 +585,7 @@ static bool pci_doe_supports_prot(struct pci_doe_mb *doe_mb, u16 vid, u8 type)
> static int pci_doe_submit_task(struct pci_doe_mb *doe_mb,
> struct pci_doe_task *task)
> {
> - if (!pci_doe_supports_prot(doe_mb, task->prot.vid, task->prot.type))
> + if (!pci_doe_supports_feat(doe_mb, task->feat.vid, task->feat.type))
> return -EINVAL;
>
> if (test_bit(PCI_DOE_FLAG_DEAD, &doe_mb->flags))
> @@ -631,8 +631,8 @@ int pci_doe(struct pci_doe_mb *doe_mb, u16 vendor, u8 type,
> {
> DECLARE_COMPLETION_ONSTACK(c);
> struct pci_doe_task task = {
> - .prot.vid = vendor,
> - .prot.type = type,
> + .feat.vid = vendor,
> + .feat.type = type,
> .request_pl = request,
> .request_pl_sz = request_sz,
> .response_pl = response,
> @@ -659,7 +659,7 @@ EXPORT_SYMBOL_GPL(pci_doe);
> * @vendor: Vendor ID
> * @type: Data Object Type
> *
> - * Find first DOE mailbox of a PCI device which supports the given protocol.
> + * Find first DOE mailbox of a PCI device which supports the given feature.
> *
> * RETURNS: Pointer to the DOE mailbox or NULL if none was found.
> */
> @@ -670,7 +670,7 @@ struct pci_doe_mb *pci_find_doe_mailbox(struct pci_dev *pdev, u16 vendor,
> unsigned long index;
>
> xa_for_each(&pdev->doe_mbs, index, doe_mb)
> - if (pci_doe_supports_prot(doe_mb, vendor, type))
> + if (pci_doe_supports_feat(doe_mb, vendor, type))
> return doe_mb;
>
> return NULL;
> diff --git a/include/uapi/linux/pci_regs.h b/include/uapi/linux/pci_regs.h
> index e5f558d96493..26ca62bd3473 100644
> --- a/include/uapi/linux/pci_regs.h
> +++ b/include/uapi/linux/pci_regs.h
> @@ -1131,7 +1131,7 @@
>
> #define PCI_DOE_DATA_OBJECT_DISC_REQ_3_INDEX 0x000000ff
> #define PCI_DOE_DATA_OBJECT_DISC_RSP_3_VID 0x0000ffff
> -#define PCI_DOE_DATA_OBJECT_DISC_RSP_3_PROTOCOL 0x00ff0000
> +#define PCI_DOE_DATA_OBJECT_DISC_RSP_3_TYPE 0x00ff0000
> #define PCI_DOE_DATA_OBJECT_DISC_RSP_3_NEXT_INDEX 0xff000000
>
> #endif /* LINUX_PCI_REGS_H */
> --
> 2.40.1
>

2023-10-19 16:58:53

by Bjorn Helgaas

[permalink] [raw]
Subject: Re: [PATCH v9 2/3] PCI/DOE: Expose the DOE features via sysfs

On Fri, Oct 13, 2023 at 01:41:57PM +1000, Alistair Francis wrote:
> The PCIe 6 specification added support for the Data Object Exchange (DOE).
> When DOE is supported the DOE Discovery Feature must be implemented per
> PCIe r6.1 sec 6.30.1.1. The protocol allows a requester to obtain
> information about the other DOE features supported by the device.
> ...

> +static umode_t pci_doe_sysfs_attr_is_visible(struct kobject *kobj,
> + struct attribute *a, int n)
> +{
> + struct pci_dev *pdev = to_pci_dev(kobj_to_dev(kobj));
> + struct pci_doe_mb *doe_mb;
> + unsigned long index, j;
> + void *entry;
> +
> + xa_for_each(&pdev->doe_mbs, index, doe_mb) {
> + xa_for_each(&doe_mb->feats, j, entry)
> + return a->mode;
> + }
> +
> + return 0;

The nested loops that don't test anything look a little weird and
maybe I'm missing something, but this looks like it returns a->mode if
any mailbox with a feature exists, and 0 otherwise.

Is that the same as this:

if (pdev->doe_mbs)
return a->mode;

return 0;

since it sounds like a mailbox must support at least one feature?

> +}
> +
> +static struct attribute *pci_dev_doe_feature_attrs[] = {
> + NULL,
> +};
> +
> +const struct attribute_group pci_dev_doe_feature_group = {
> + .name = "doe_features",
> + .attrs = pci_dev_doe_feature_attrs,
> + .is_visible = pci_doe_sysfs_attr_is_visible,
> +};
> +
> +static ssize_t pci_doe_sysfs_feature_show(struct device *dev,
> + struct device_attribute *attr,
> + char *buf)
> +{
> + return sysfs_emit(buf, "%s\n", attr->attr.name);
> +}
> +
> +static void pci_doe_sysfs_feature_remove(struct pci_dev *pdev,
> + struct pci_doe_mb *doe_mb)
> +{
> + struct device *dev = &pdev->dev;
> + struct device_attribute *attrs = doe_mb->sysfs_attrs;
> + unsigned long i;
> + void *entry;
> +
> + if (!attrs)
> + return;
> +
> + doe_mb->sysfs_attrs = NULL;
> + xa_for_each(&doe_mb->feats, i, entry) {
> + if (attrs[i].show)
> + sysfs_remove_file_from_group(&dev->kobj, &attrs[i].attr,
> + pci_dev_doe_feature_group.name);
> + kfree(attrs[i].attr.name);
> + }
> + kfree(attrs);
> +}
> +
> +static int pci_doe_sysfs_feature_populate(struct pci_dev *pdev,
> + struct pci_doe_mb *doe_mb)
> +{
> + struct device *dev = &pdev->dev;
> + struct device_attribute *attrs;
> + unsigned long num_features = 0;
> + unsigned long vid, type;
> + unsigned long i;
> + void *entry;
> + int ret;
> +
> + xa_for_each(&doe_mb->feats, i, entry)
> + num_features++;
> +
> + attrs = kcalloc(num_features, sizeof(*attrs), GFP_KERNEL);
> + if (!attrs)
> + return -ENOMEM;
> +
> + doe_mb->sysfs_attrs = attrs;
> + xa_for_each(&doe_mb->feats, i, entry) {
> + sysfs_attr_init(&attrs[i].attr);
> + vid = xa_to_value(entry) >> 8;
> + type = xa_to_value(entry) & 0xFF;
> + attrs[i].attr.name = kasprintf(GFP_KERNEL,
> + "0x%04lX:%02lX", vid, type);

What's the rationale for using "0x" on the vendor ID but not on the
type? "0x1234:10" hints that the "10" might be decimal since it lacks
"0x".

Suggest lower-case "%04lx:%02lx" either way.

FWIW, there's no "0x" prefix on the hex vendor IDs in "lspci -n"
output and dmesg messages like this:

pci 0000:01:00.0: [10de:13b6] type 00

> + if (!attrs[i].attr.name) {
> + ret = -ENOMEM;
> + goto fail;
> + }
> +
> + attrs[i].attr.mode = 0444;
> + attrs[i].show = pci_doe_sysfs_feature_show;
> +
> + ret = sysfs_add_file_to_group(&dev->kobj, &attrs[i].attr,
> + pci_dev_doe_feature_group.name);
> + if (ret) {
> + attrs[i].show = NULL;
> + goto fail;
> + }
> + }
> +
> + return 0;
> +
> +fail:
> + pci_doe_sysfs_feature_remove(pdev, doe_mb);
> + return ret;

Not sure we should treat this failure this seriously. Looks like it
will prevent creation of the BAR resource files, and possibly even
abort pci_sysfs_init() early. I think the pci_dev will still be
usable (lacking DOE sysfs) even if this fails.

> +}
> +
> +void pci_doe_sysfs_teardown(struct pci_dev *pdev)
> +{
> + struct pci_doe_mb *doe_mb;
> + unsigned long index;
> +
> + xa_for_each(&pdev->doe_mbs, index, doe_mb) {
> + pci_doe_sysfs_feature_remove(pdev, doe_mb);
> + }
> +}
> +
> +int pci_doe_sysfs_init(struct pci_dev *pdev)
> +{
> + struct pci_doe_mb *doe_mb;
> + unsigned long index;
> + int ret;
> +
> + xa_for_each(&pdev->doe_mbs, index, doe_mb) {
> + ret = pci_doe_sysfs_feature_populate(pdev, doe_mb);
> + if (ret)
> + return ret;
> + }

I agree with Lukas that pci_create_resource_files() is not the right
place to call this.

I try hard to avoid calling *anything* from the
pci_create_sysfs_dev_files() path because it has the nasty
"sysfs_initialized" check and the associated pci_sysfs_init()
initcall.

It's so much cleaner when we can set up static attributes that are
automatically added in the device_add() path. I don't know whether
that's possible. I see lots of discussion with Greg KH that might be
related, but I'm not sure.

I do know that we enumerate the mailboxes and features during
pci_init_capabilities(), which happens before device_add(), so the
information about which attributes should be present is at least
*available* early enough:

pci_host_probe
pci_scan_root_bus_bridge
...
pci_scan_single_device
pci_device_add
pci_init_capabilities
pci_doe_init
while (pci_find_next_ext_capability(PCI_EXT_CAP_ID_DOE))
pci_doe_create_mb
pci_doe_cache_features
pci_doe_discovery
xa_insert(&doe_mb->feats) <--
device_add
device_add_attrs
device_add_groups
pci_bus_add_devices
pci_bus_add_device
pci_create_sysfs_dev_files
...
pci_doe_sysfs_init <--
xa_for_each(&pdev->doe_mbs)
pci_doe_sysfs_feature_populate
xa_for_each(&doe_mb->feats)
sysfs_add_file_to_group(pci_dev_doe_feature_group.name)

Is it feasible to build an attribute group in pci_doe_init() and add
it to dev->groups so device_add() will automatically add them?

It looks like __power_supply_register() does something like that:
https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/drivers/power/supply/power_supply_core.c?id=v6.5#n1375

> --- a/include/linux/pci-doe.h
> +++ b/include/linux/pci-doe.h
> @@ -22,4 +22,6 @@ int pci_doe(struct pci_doe_mb *doe_mb, u16 vendor, u8 type,
> const void *request, size_t request_sz,
> void *response, size_t response_sz);
>
> +int pci_doe_sysfs_init(struct pci_dev *pci_dev);
> +void pci_doe_sysfs_teardown(struct pci_dev *pdev);

These declarations look like they should be in drivers/pci/pci.h as
well. I don't think anything outside the PCI core should need these.

Bjorn

2023-10-19 19:33:03

by Lukas Wunner

[permalink] [raw]
Subject: Re: [PATCH v9 2/3] PCI/DOE: Expose the DOE features via sysfs

On Thu, Oct 19, 2023 at 11:58:29AM -0500, Bjorn Helgaas wrote:
> On Fri, Oct 13, 2023 at 01:41:57PM +1000, Alistair Francis wrote:
> > + xa_for_each(&pdev->doe_mbs, index, doe_mb) {
> > + xa_for_each(&doe_mb->feats, j, entry)
> > + return a->mode;
> > + }
> > +
> > + return 0;
>
> The nested loops that don't test anything look a little weird and
> maybe I'm missing something, but this looks like it returns a->mode if
> any mailbox with a feature exists, and 0 otherwise.
>
> Is that the same as this:
>
> if (pdev->doe_mbs)
> return a->mode;
>
> return 0;
>
> since it sounds like a mailbox must support at least one feature?

In theory it's the same, in practice there *might* be non-compliant
devices which lack support for the discovery feature.


> > + attrs[i].attr.name = kasprintf(GFP_KERNEL,
> > + "0x%04lX:%02lX", vid, type);
>
> What's the rationale for using "0x" on the vendor ID but not on the
> type? "0x1234:10" hints that the "10" might be decimal since it lacks
> "0x".
>
> Suggest lower-case "%04lx:%02lx" either way.
>
> FWIW, there's no "0x" prefix on the hex vendor IDs in "lspci -n"
> output and dmesg messages like this:
>
> pci 0000:01:00.0: [10de:13b6] type 00

The existing attributes "vendor", "device" etc do emit the "0x".

From drivers/pci/pci-sysfs.c:

pci_config_attr(vendor, "0x%04x\n");
pci_config_attr(device, "0x%04x\n");
pci_config_attr(subsystem_vendor, "0x%04x\n");
pci_config_attr(subsystem_device, "0x%04x\n");
pci_config_attr(revision, "0x%02x\n");
pci_config_attr(class, "0x%06x\n");


> I try hard to avoid calling *anything* from the
> pci_create_sysfs_dev_files() path because it has the nasty
> "sysfs_initialized" check and the associated pci_sysfs_init()
> initcall.

What's the purpose of sysfs_initialized anyway?

It was introduced by this historic commit:
https://git.kernel.org/tglx/history/c/f6d553444da2

Can PCI_ROM_RESOURCEs appear after device enumeration but before
the late_initcall stage?

If sysfs_initialized is only needed for PCI_ROM_RESOURCEs, can we
constrain pci_sysfs_init() to those and avoid creating all the
other runtime sysfs attributes in the initcall?

Thanks,

Lukas

2023-10-19 20:01:33

by Bjorn Helgaas

[permalink] [raw]
Subject: Re: [PATCH v9 2/3] PCI/DOE: Expose the DOE features via sysfs

On Thu, Oct 19, 2023 at 09:32:46PM +0200, Lukas Wunner wrote:
> On Thu, Oct 19, 2023 at 11:58:29AM -0500, Bjorn Helgaas wrote:
> > On Fri, Oct 13, 2023 at 01:41:57PM +1000, Alistair Francis wrote:
> > > + xa_for_each(&pdev->doe_mbs, index, doe_mb) {
> > > + xa_for_each(&doe_mb->feats, j, entry)
> > > + return a->mode;
> > > + }
> > > +
> > > + return 0;
> >
> > The nested loops that don't test anything look a little weird and
> > maybe I'm missing something, but this looks like it returns a->mode if
> > any mailbox with a feature exists, and 0 otherwise.
> >
> > Is that the same as this:
> >
> > if (pdev->doe_mbs)
> > return a->mode;
> >
> > return 0;
> >
> > since it sounds like a mailbox must support at least one feature?
>
> In theory it's the same, in practice there *might* be non-compliant
> devices which lack support for the discovery feature.

Is there any point in setting ->doe_mbs if there's no feature?

> > > + attrs[i].attr.name = kasprintf(GFP_KERNEL,
> > > + "0x%04lX:%02lX", vid, type);
> >
> > What's the rationale for using "0x" on the vendor ID but not on the
> > type? "0x1234:10" hints that the "10" might be decimal since it lacks
> > "0x".

This is my main question. Seems like it should be both or neither.

> > I try hard to avoid calling *anything* from the
> > pci_create_sysfs_dev_files() path because it has the nasty
> > "sysfs_initialized" check and the associated pci_sysfs_init()
> > initcall.
>
> What's the purpose of sysfs_initialized anyway?
>
> It was introduced by this historic commit:
> https://git.kernel.org/tglx/history/c/f6d553444da2
>
> Can PCI_ROM_RESOURCEs appear after device enumeration but before
> the late_initcall stage?
>
> If sysfs_initialized is only needed for PCI_ROM_RESOURCEs, can we
> constrain pci_sysfs_init() to those and avoid creating all the
> other runtime sysfs attributes in the initcall?

I think pci_sysfs_init() is already constrained to only the BARs and
ROM. Constraining it to only the ROM would be an improvement, but I'd
really like to get rid of it altogether. Krzysztof W. moved a lot of
stuff out of pci_sysfs_init() a while ago, but the BARs are harder
because of some arch/alpha wrinkles, IIRC.

I think the reason for pci_sysfs_init() exists in the first place is
because those resources may be assigned after pci_device_add(), and
(my memory is hazy here) it seems like changing the size of binary
attributes is hard, which might fit with the
pci_remove_resource_files() and pci_create_resource_files() in the
resource##n##_resize_store() macro:

https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/drivers/pci/pci-sysfs.c?id=v6.5#n1440

Bjorn

2023-11-03 00:19:32

by Alistair Francis

[permalink] [raw]
Subject: Re: [PATCH v9 1/3] PCI/DOE: Rename DOE protocol to feature

On Thu, Oct 19, 2023 at 8:24 AM Bjorn Helgaas <[email protected]> wrote:
>
> On Fri, Oct 13, 2023 at 01:41:56PM +1000, Alistair Francis wrote:
> > DOE r1.1 replaced all occurrences of "protocol" with the term "feature"
> > or "Data Object Type".
> >
> > PCIe r6.1 (which was published July 24) incorporated that change.
> >
> > Rename the existing terms protocol with feature. This patch
> > also clears up some confusion of type vs protocol/feature, where
> > previously feature would cover the vid and type pair.
>
> Would you mind splitting this up? It sounds like:
>
> - A purely textual change from "protocol" to "feature", and
>
> - Clearing up some confusion about a vendor ID and a type?

These are both just renaming.

- protocol -> feature
- prot -> type

The newest PCIe spec doesn't use protocol any more, instead they use
feature and Data Object Type. So this patch is just renaming things to
match.

I have split the patch up into two rename operations in the next version

Alistair

>
> I don't want to have sort through the cosmetic changes to find the
> confusion fix, which sounds like it should have a closer look.
>
> The words "this patch also" in a commit log is always a hint that
> there are two separate things going on that might not need to be in
> the same patch.
>
> > Signed-off-by: Alistair Francis <[email protected]>
> > Reviewed-by: Jonathan Cameron <[email protected]>
> > Reviewed-by: Lukas Wunner <[email protected]>
> > ---
> > v9:
> > - Rename two more DOE macros
> > v8:
> > - Rename prot to feat as well
> > v7:
> > - Initial patch
> >
> >
> > drivers/pci/doe.c | 94 +++++++++++++++++------------------
> > include/uapi/linux/pci_regs.h | 2 +-
> > 2 files changed, 48 insertions(+), 48 deletions(-)
> >
> > diff --git a/drivers/pci/doe.c b/drivers/pci/doe.c
> > index e3aab5edaf70..db2a86edf2e1 100644
> > --- a/drivers/pci/doe.c
> > +++ b/drivers/pci/doe.c
> > @@ -22,7 +22,7 @@
> >
> > #include "pci.h"
> >
> > -#define PCI_DOE_PROTOCOL_DISCOVERY 0
> > +#define PCI_DOE_FEATURE_DISCOVERY 0
> >
> > /* Timeout of 1 second from 6.30.2 Operation, PCI Spec r6.0 */
> > #define PCI_DOE_TIMEOUT HZ
> > @@ -43,7 +43,7 @@
> > *
> > * @pdev: PCI device this mailbox belongs to
> > * @cap_offset: Capability offset
> > - * @prots: Array of protocols supported (encoded as long values)
> > + * @feats: Array of features supported (encoded as long values)
> > * @wq: Wait queue for work item
> > * @work_queue: Queue of pci_doe_work items
> > * @flags: Bit array of PCI_DOE_FLAG_* flags
> > @@ -51,14 +51,14 @@
> > struct pci_doe_mb {
> > struct pci_dev *pdev;
> > u16 cap_offset;
> > - struct xarray prots;
> > + struct xarray feats;
> >
> > wait_queue_head_t wq;
> > struct workqueue_struct *work_queue;
> > unsigned long flags;
> > };
> >
> > -struct pci_doe_protocol {
> > +struct pci_doe_feature {
> > u16 vid;
> > u8 type;
> > };
> > @@ -66,7 +66,7 @@ struct pci_doe_protocol {
> > /**
> > * struct pci_doe_task - represents a single query/response
> > *
> > - * @prot: DOE Protocol
> > + * @feat: DOE Feature
> > * @request_pl: The request payload
> > * @request_pl_sz: Size of the request payload (bytes)
> > * @response_pl: The response payload
> > @@ -78,7 +78,7 @@ struct pci_doe_protocol {
> > * @doe_mb: Used internally by the mailbox
> > */
> > struct pci_doe_task {
> > - struct pci_doe_protocol prot;
> > + struct pci_doe_feature feat;
> > const __le32 *request_pl;
> > size_t request_pl_sz;
> > __le32 *response_pl;
> > @@ -171,8 +171,8 @@ static int pci_doe_send_req(struct pci_doe_mb *doe_mb,
> > length = 0;
> >
> > /* Write DOE Header */
> > - val = FIELD_PREP(PCI_DOE_DATA_OBJECT_HEADER_1_VID, task->prot.vid) |
> > - FIELD_PREP(PCI_DOE_DATA_OBJECT_HEADER_1_TYPE, task->prot.type);
> > + val = FIELD_PREP(PCI_DOE_DATA_OBJECT_HEADER_1_VID, task->feat.vid) |
> > + FIELD_PREP(PCI_DOE_DATA_OBJECT_HEADER_1_TYPE, task->feat.type);
> > pci_write_config_dword(pdev, offset + PCI_DOE_WRITE, val);
> > pci_write_config_dword(pdev, offset + PCI_DOE_WRITE,
> > FIELD_PREP(PCI_DOE_DATA_OBJECT_HEADER_2_LENGTH,
> > @@ -217,12 +217,12 @@ static int pci_doe_recv_resp(struct pci_doe_mb *doe_mb, struct pci_doe_task *tas
> > int i = 0;
> > u32 val;
> >
> > - /* Read the first dword to get the protocol */
> > + /* Read the first dword to get the feature */
> > pci_read_config_dword(pdev, offset + PCI_DOE_READ, &val);
> > - if ((FIELD_GET(PCI_DOE_DATA_OBJECT_HEADER_1_VID, val) != task->prot.vid) ||
> > - (FIELD_GET(PCI_DOE_DATA_OBJECT_HEADER_1_TYPE, val) != task->prot.type)) {
> > - dev_err_ratelimited(&pdev->dev, "[%x] expected [VID, Protocol] = [%04x, %02x], got [%04x, %02x]\n",
> > - doe_mb->cap_offset, task->prot.vid, task->prot.type,
> > + if ((FIELD_GET(PCI_DOE_DATA_OBJECT_HEADER_1_VID, val) != task->feat.vid) ||
> > + (FIELD_GET(PCI_DOE_DATA_OBJECT_HEADER_1_TYPE, val) != task->feat.type)) {
> > + dev_err_ratelimited(&pdev->dev, "[%x] expected [VID, Feature] = [%04x, %02x], got [%04x, %02x]\n",
> > + doe_mb->cap_offset, task->feat.vid, task->feat.type,
> > FIELD_GET(PCI_DOE_DATA_OBJECT_HEADER_1_VID, val),
> > FIELD_GET(PCI_DOE_DATA_OBJECT_HEADER_1_TYPE, val));
> > return -EIO;
> > @@ -384,7 +384,7 @@ static void pci_doe_task_complete(struct pci_doe_task *task)
> > }
> >
> > static int pci_doe_discovery(struct pci_doe_mb *doe_mb, u8 *index, u16 *vid,
> > - u8 *protocol)
> > + u8 *feature)
> > {
> > u32 request_pl = FIELD_PREP(PCI_DOE_DATA_OBJECT_DISC_REQ_3_INDEX,
> > *index);
> > @@ -393,7 +393,7 @@ static int pci_doe_discovery(struct pci_doe_mb *doe_mb, u8 *index, u16 *vid,
> > u32 response_pl;
> > int rc;
> >
> > - rc = pci_doe(doe_mb, PCI_VENDOR_ID_PCI_SIG, PCI_DOE_PROTOCOL_DISCOVERY,
> > + rc = pci_doe(doe_mb, PCI_VENDOR_ID_PCI_SIG, PCI_DOE_FEATURE_DISCOVERY,
> > &request_pl_le, sizeof(request_pl_le),
> > &response_pl_le, sizeof(response_pl_le));
> > if (rc < 0)
> > @@ -404,7 +404,7 @@ static int pci_doe_discovery(struct pci_doe_mb *doe_mb, u8 *index, u16 *vid,
> >
> > response_pl = le32_to_cpu(response_pl_le);
> > *vid = FIELD_GET(PCI_DOE_DATA_OBJECT_DISC_RSP_3_VID, response_pl);
> > - *protocol = FIELD_GET(PCI_DOE_DATA_OBJECT_DISC_RSP_3_PROTOCOL,
> > + *feature = FIELD_GET(PCI_DOE_DATA_OBJECT_DISC_RSP_3_TYPE,
> > response_pl);
> > *index = FIELD_GET(PCI_DOE_DATA_OBJECT_DISC_RSP_3_NEXT_INDEX,
> > response_pl);
> > @@ -412,12 +412,12 @@ static int pci_doe_discovery(struct pci_doe_mb *doe_mb, u8 *index, u16 *vid,
> > return 0;
> > }
> >
> > -static void *pci_doe_xa_prot_entry(u16 vid, u8 prot)
> > +static void *pci_doe_xa_feat_entry(u16 vid, u8 type)
> > {
> > - return xa_mk_value((vid << 8) | prot);
> > + return xa_mk_value((vid << 8) | type);
> > }
> >
> > -static int pci_doe_cache_protocols(struct pci_doe_mb *doe_mb)
> > +static int pci_doe_cache_features(struct pci_doe_mb *doe_mb)
> > {
> > u8 index = 0;
> > u8 xa_idx = 0;
> > @@ -425,18 +425,18 @@ static int pci_doe_cache_protocols(struct pci_doe_mb *doe_mb)
> > do {
> > int rc;
> > u16 vid;
> > - u8 prot;
> > + u8 type;
> >
> > - rc = pci_doe_discovery(doe_mb, &index, &vid, &prot);
> > + rc = pci_doe_discovery(doe_mb, &index, &vid, &type);
> > if (rc)
> > return rc;
> >
> > pci_dbg(doe_mb->pdev,
> > - "[%x] Found protocol %d vid: %x prot: %x\n",
> > - doe_mb->cap_offset, xa_idx, vid, prot);
> > + "[%x] Found feature %d vid: %x type: %x\n",
> > + doe_mb->cap_offset, xa_idx, vid, type);
> >
> > - rc = xa_insert(&doe_mb->prots, xa_idx++,
> > - pci_doe_xa_prot_entry(vid, prot), GFP_KERNEL);
> > + rc = xa_insert(&doe_mb->feats, xa_idx++,
> > + pci_doe_xa_feat_entry(vid, type), GFP_KERNEL);
> > if (rc)
> > return rc;
> > } while (index);
> > @@ -460,7 +460,7 @@ static void pci_doe_cancel_tasks(struct pci_doe_mb *doe_mb)
> > * @pdev: PCI device to create the DOE mailbox for
> > * @cap_offset: Offset of the DOE mailbox
> > *
> > - * Create a single mailbox object to manage the mailbox protocol at the
> > + * Create a single mailbox object to manage the mailbox feature at the
> > * cap_offset specified.
> > *
> > * RETURNS: created mailbox object on success
> > @@ -479,7 +479,7 @@ static struct pci_doe_mb *pci_doe_create_mb(struct pci_dev *pdev,
> > doe_mb->pdev = pdev;
> > doe_mb->cap_offset = cap_offset;
> > init_waitqueue_head(&doe_mb->wq);
> > - xa_init(&doe_mb->prots);
> > + xa_init(&doe_mb->feats);
> >
> > doe_mb->work_queue = alloc_ordered_workqueue("%s %s DOE [%x]", 0,
> > dev_bus_name(&pdev->dev),
> > @@ -502,11 +502,11 @@ static struct pci_doe_mb *pci_doe_create_mb(struct pci_dev *pdev,
> >
> > /*
> > * The state machine and the mailbox should be in sync now;
> > - * Use the mailbox to query protocols.
> > + * Use the mailbox to query features.
> > */
> > - rc = pci_doe_cache_protocols(doe_mb);
> > + rc = pci_doe_cache_features(doe_mb);
> > if (rc) {
> > - pci_err(pdev, "[%x] failed to cache protocols : %d\n",
> > + pci_err(pdev, "[%x] failed to cache features : %d\n",
> > doe_mb->cap_offset, rc);
> > goto err_cancel;
> > }
> > @@ -515,7 +515,7 @@ static struct pci_doe_mb *pci_doe_create_mb(struct pci_dev *pdev,
> >
> > err_cancel:
> > pci_doe_cancel_tasks(doe_mb);
> > - xa_destroy(&doe_mb->prots);
> > + xa_destroy(&doe_mb->feats);
> > err_destroy_wq:
> > destroy_workqueue(doe_mb->work_queue);
> > err_free:
> > @@ -533,31 +533,31 @@ static struct pci_doe_mb *pci_doe_create_mb(struct pci_dev *pdev,
> > static void pci_doe_destroy_mb(struct pci_doe_mb *doe_mb)
> > {
> > pci_doe_cancel_tasks(doe_mb);
> > - xa_destroy(&doe_mb->prots);
> > + xa_destroy(&doe_mb->feats);
> > destroy_workqueue(doe_mb->work_queue);
> > kfree(doe_mb);
> > }
> >
> > /**
> > - * pci_doe_supports_prot() - Return if the DOE instance supports the given
> > - * protocol
> > + * pci_doe_supports_feat() - Return if the DOE instance supports the given
> > + * feature
> > * @doe_mb: DOE mailbox capability to query
> > - * @vid: Protocol Vendor ID
> > - * @type: Protocol type
> > + * @vid: Feature Vendor ID
> > + * @type: Feature type
> > *
> > - * RETURNS: True if the DOE mailbox supports the protocol specified
> > + * RETURNS: True if the DOE mailbox supports the feature specified
> > */
> > -static bool pci_doe_supports_prot(struct pci_doe_mb *doe_mb, u16 vid, u8 type)
> > +static bool pci_doe_supports_feat(struct pci_doe_mb *doe_mb, u16 vid, u8 type)
> > {
> > unsigned long index;
> > void *entry;
> >
> > - /* The discovery protocol must always be supported */
> > - if (vid == PCI_VENDOR_ID_PCI_SIG && type == PCI_DOE_PROTOCOL_DISCOVERY)
> > + /* The discovery feature must always be supported */
> > + if (vid == PCI_VENDOR_ID_PCI_SIG && type == PCI_DOE_FEATURE_DISCOVERY)
> > return true;
> >
> > - xa_for_each(&doe_mb->prots, index, entry)
> > - if (entry == pci_doe_xa_prot_entry(vid, type))
> > + xa_for_each(&doe_mb->feats, index, entry)
> > + if (entry == pci_doe_xa_feat_entry(vid, type))
> > return true;
> >
> > return false;
> > @@ -585,7 +585,7 @@ static bool pci_doe_supports_prot(struct pci_doe_mb *doe_mb, u16 vid, u8 type)
> > static int pci_doe_submit_task(struct pci_doe_mb *doe_mb,
> > struct pci_doe_task *task)
> > {
> > - if (!pci_doe_supports_prot(doe_mb, task->prot.vid, task->prot.type))
> > + if (!pci_doe_supports_feat(doe_mb, task->feat.vid, task->feat.type))
> > return -EINVAL;
> >
> > if (test_bit(PCI_DOE_FLAG_DEAD, &doe_mb->flags))
> > @@ -631,8 +631,8 @@ int pci_doe(struct pci_doe_mb *doe_mb, u16 vendor, u8 type,
> > {
> > DECLARE_COMPLETION_ONSTACK(c);
> > struct pci_doe_task task = {
> > - .prot.vid = vendor,
> > - .prot.type = type,
> > + .feat.vid = vendor,
> > + .feat.type = type,
> > .request_pl = request,
> > .request_pl_sz = request_sz,
> > .response_pl = response,
> > @@ -659,7 +659,7 @@ EXPORT_SYMBOL_GPL(pci_doe);
> > * @vendor: Vendor ID
> > * @type: Data Object Type
> > *
> > - * Find first DOE mailbox of a PCI device which supports the given protocol.
> > + * Find first DOE mailbox of a PCI device which supports the given feature.
> > *
> > * RETURNS: Pointer to the DOE mailbox or NULL if none was found.
> > */
> > @@ -670,7 +670,7 @@ struct pci_doe_mb *pci_find_doe_mailbox(struct pci_dev *pdev, u16 vendor,
> > unsigned long index;
> >
> > xa_for_each(&pdev->doe_mbs, index, doe_mb)
> > - if (pci_doe_supports_prot(doe_mb, vendor, type))
> > + if (pci_doe_supports_feat(doe_mb, vendor, type))
> > return doe_mb;
> >
> > return NULL;
> > diff --git a/include/uapi/linux/pci_regs.h b/include/uapi/linux/pci_regs.h
> > index e5f558d96493..26ca62bd3473 100644
> > --- a/include/uapi/linux/pci_regs.h
> > +++ b/include/uapi/linux/pci_regs.h
> > @@ -1131,7 +1131,7 @@
> >
> > #define PCI_DOE_DATA_OBJECT_DISC_REQ_3_INDEX 0x000000ff
> > #define PCI_DOE_DATA_OBJECT_DISC_RSP_3_VID 0x0000ffff
> > -#define PCI_DOE_DATA_OBJECT_DISC_RSP_3_PROTOCOL 0x00ff0000
> > +#define PCI_DOE_DATA_OBJECT_DISC_RSP_3_TYPE 0x00ff0000
> > #define PCI_DOE_DATA_OBJECT_DISC_RSP_3_NEXT_INDEX 0xff000000
> >
> > #endif /* LINUX_PCI_REGS_H */
> > --
> > 2.40.1
> >

2023-11-03 01:28:18

by Alistair Francis

[permalink] [raw]
Subject: Re: [PATCH v9 2/3] PCI/DOE: Expose the DOE features via sysfs

On Tue, Oct 17, 2023 at 6:34 PM Lukas Wunner <[email protected]> wrote:
>
> On Fri, Oct 13, 2023 at 01:41:57PM +1000, Alistair Francis wrote:
> > +#ifdef CONFIG_SYSFS
> > +static umode_t pci_doe_sysfs_attr_is_visible(struct kobject *kobj,
> > + struct attribute *a, int n)
> > +{
> > + struct pci_dev *pdev = to_pci_dev(kobj_to_dev(kobj));
> > + struct pci_doe_mb *doe_mb;
> > + unsigned long index, j;
> > + void *entry;
> > +
> > + xa_for_each(&pdev->doe_mbs, index, doe_mb) {
> > + xa_for_each(&doe_mb->feats, j, entry)
> > + return a->mode;
> > + }
> > +
> > + return 0;
> > +}
>
> Out of curiosity: Does this method prevent creation of a
> "doe_features" directory for devices which don't have any
> DOE mailboxes?

It does once this patch (or something similar) is applied:

https://lkml.org/lkml/2022/8/24/607

GKH and I are working on getting a patch like that working and merged

Alistair

>
> (If it does, a code comment explaining that might be helpful.)
>
>
> > +const struct attribute_group pci_dev_doe_feature_group = {
> > + .name = "doe_features",
> > + .attrs = pci_dev_doe_feature_attrs,
> > + .is_visible = pci_doe_sysfs_attr_is_visible,
> > +};
>
> Nit: Wondering why the "=" is aligned for .name and .attrs
> but not for .is_visible?
>
>
> > +static void pci_doe_sysfs_feature_remove(struct pci_dev *pdev,
> > + struct pci_doe_mb *doe_mb)
> > +{
> > + struct device *dev = &pdev->dev;
> > + struct device_attribute *attrs = doe_mb->sysfs_attrs;
> > + unsigned long i;
> > + void *entry;
>
> Nit: Inverse Christmas tree?
>
>
> > +static int pci_doe_sysfs_feature_populate(struct pci_dev *pdev,
> > + struct pci_doe_mb *doe_mb)
> > +{
> > + struct device *dev = &pdev->dev;
> > + struct device_attribute *attrs;
> > + unsigned long num_features = 0;
> > + unsigned long vid, type;
> > + unsigned long i;
> > + void *entry;
> > + int ret;
> > +
> > + xa_for_each(&doe_mb->feats, i, entry)
> > + num_features++;
> > +
> > + attrs = kcalloc(num_features, sizeof(*attrs), GFP_KERNEL);
> > + if (!attrs)
> > + return -ENOMEM;
> > +
> > + doe_mb->sysfs_attrs = attrs;
> > + xa_for_each(&doe_mb->feats, i, entry) {
> > + sysfs_attr_init(&attrs[i].attr);
> > + vid = xa_to_value(entry) >> 8;
> > + type = xa_to_value(entry) & 0xFF;
> > + attrs[i].attr.name = kasprintf(GFP_KERNEL,
> > + "0x%04lX:%02lX", vid, type);
>
> Nit: Capital X conversion specifier will result in upper case hex
> characters in filename and contents, whereas existing sysfs attributes
> such as "vendor", "device" contain lower case hex characters.
>
> Might want to consider lower case x for consistency.
>
>
> > +void pci_doe_sysfs_teardown(struct pci_dev *pdev)
> > +{
> > + struct pci_doe_mb *doe_mb;
> > + unsigned long index;
> > +
> > + xa_for_each(&pdev->doe_mbs, index, doe_mb) {
> > + pci_doe_sysfs_feature_remove(pdev, doe_mb);
> > + }
>
> Nit: Curly braces not necessary.
>
>
> > @@ -1153,6 +1154,10 @@ static void pci_remove_resource_files(struct pci_dev *pdev)
> > {
> > int i;
> >
> > + if (IS_ENABLED(CONFIG_PCI_DOE)) {
> > + pci_doe_sysfs_teardown(pdev);
> > + }
>
> Nit: Curly braces not necessary.
>
>
> > @@ -1230,6 +1235,12 @@ static int pci_create_resource_files(struct pci_dev *pdev)
> > int i;
> > int retval;
> >
> > + if (IS_ENABLED(CONFIG_PCI_DOE)) {
> > + retval = pci_doe_sysfs_init(pdev);
> > + if (retval)
> > + return retval;
> > + }
> > +
> > /* Expose the PCI resources from this device as files */
> > for (i = 0; i < PCI_STD_NUM_BARS; i++) {
>
> I think this needs to be added to pci_create_sysfs_dev_files() instead.
>
> pci_create_resource_files() only deals with creation of resource files,
> as the name implies, which is unrelated to DOE features.
>
> Worse, pci_create_resource_files() is also called from
> pci_dev_resource_resize_attr(), i.e. every time user space
> writes to the "resource##n##_resize" attributes.
>
> Similarly, the call to pci_doe_sysfs_teardown() belongs in
> pci_remove_sysfs_dev_files().
>
> Thanks,
>
> Lukas

2023-11-03 02:18:19

by Alistair Francis

[permalink] [raw]
Subject: Re: [PATCH v9 2/3] PCI/DOE: Expose the DOE features via sysfs

On Fri, Oct 20, 2023 at 2:58 AM Bjorn Helgaas <[email protected]> wrote:
>
> On Fri, Oct 13, 2023 at 01:41:57PM +1000, Alistair Francis wrote:
> > The PCIe 6 specification added support for the Data Object Exchange (DOE).
> > When DOE is supported the DOE Discovery Feature must be implemented per
> > PCIe r6.1 sec 6.30.1.1. The protocol allows a requester to obtain
> > information about the other DOE features supported by the device.
> > ...
>
> > +static umode_t pci_doe_sysfs_attr_is_visible(struct kobject *kobj,
> > + struct attribute *a, int n)
> > +{
> > + struct pci_dev *pdev = to_pci_dev(kobj_to_dev(kobj));
> > + struct pci_doe_mb *doe_mb;
> > + unsigned long index, j;
> > + void *entry;
> > +
> > + xa_for_each(&pdev->doe_mbs, index, doe_mb) {
> > + xa_for_each(&doe_mb->feats, j, entry)
> > + return a->mode;
> > + }
> > +
> > + return 0;
>
> The nested loops that don't test anything look a little weird and
> maybe I'm missing something, but this looks like it returns a->mode if
> any mailbox with a feature exists, and 0 otherwise.
>
> Is that the same as this:
>
> if (pdev->doe_mbs)
> return a->mode;
>
> return 0;
>
> since it sounds like a mailbox must support at least one feature?

I don't think this is the exact same.

pdev->doe_mbs exist (created by xa_init()) even if there are no
features supported.

I do think it's important we make sure DOE features exist before we
show the property.

>
> > +}
> > +
> > +static struct attribute *pci_dev_doe_feature_attrs[] = {
> > + NULL,
> > +};
> > +
> > +const struct attribute_group pci_dev_doe_feature_group = {
> > + .name = "doe_features",
> > + .attrs = pci_dev_doe_feature_attrs,
> > + .is_visible = pci_doe_sysfs_attr_is_visible,
> > +};
> > +
> > +static ssize_t pci_doe_sysfs_feature_show(struct device *dev,
> > + struct device_attribute *attr,
> > + char *buf)
> > +{
> > + return sysfs_emit(buf, "%s\n", attr->attr.name);
> > +}
> > +
> > +static void pci_doe_sysfs_feature_remove(struct pci_dev *pdev,
> > + struct pci_doe_mb *doe_mb)
> > +{
> > + struct device *dev = &pdev->dev;
> > + struct device_attribute *attrs = doe_mb->sysfs_attrs;
> > + unsigned long i;
> > + void *entry;
> > +
> > + if (!attrs)
> > + return;
> > +
> > + doe_mb->sysfs_attrs = NULL;
> > + xa_for_each(&doe_mb->feats, i, entry) {
> > + if (attrs[i].show)
> > + sysfs_remove_file_from_group(&dev->kobj, &attrs[i].attr,
> > + pci_dev_doe_feature_group.name);
> > + kfree(attrs[i].attr.name);
> > + }
> > + kfree(attrs);
> > +}
> > +
> > +static int pci_doe_sysfs_feature_populate(struct pci_dev *pdev,
> > + struct pci_doe_mb *doe_mb)
> > +{
> > + struct device *dev = &pdev->dev;
> > + struct device_attribute *attrs;
> > + unsigned long num_features = 0;
> > + unsigned long vid, type;
> > + unsigned long i;
> > + void *entry;
> > + int ret;
> > +
> > + xa_for_each(&doe_mb->feats, i, entry)
> > + num_features++;
> > +
> > + attrs = kcalloc(num_features, sizeof(*attrs), GFP_KERNEL);
> > + if (!attrs)
> > + return -ENOMEM;
> > +
> > + doe_mb->sysfs_attrs = attrs;
> > + xa_for_each(&doe_mb->feats, i, entry) {
> > + sysfs_attr_init(&attrs[i].attr);
> > + vid = xa_to_value(entry) >> 8;
> > + type = xa_to_value(entry) & 0xFF;
> > + attrs[i].attr.name = kasprintf(GFP_KERNEL,
> > + "0x%04lX:%02lX", vid, type);
>
> What's the rationale for using "0x" on the vendor ID but not on the
> type? "0x1234:10" hints that the "10" might be decimal since it lacks
> "0x".
>
> Suggest lower-case "%04lx:%02lx" either way.

Fixed!

>
> FWIW, there's no "0x" prefix on the hex vendor IDs in "lspci -n"
> output and dmesg messages like this:
>
> pci 0000:01:00.0: [10de:13b6] type 00
>
> > + if (!attrs[i].attr.name) {
> > + ret = -ENOMEM;
> > + goto fail;
> > + }
> > +
> > + attrs[i].attr.mode = 0444;
> > + attrs[i].show = pci_doe_sysfs_feature_show;
> > +
> > + ret = sysfs_add_file_to_group(&dev->kobj, &attrs[i].attr,
> > + pci_dev_doe_feature_group.name);
> > + if (ret) {
> > + attrs[i].show = NULL;
> > + goto fail;
> > + }
> > + }
> > +
> > + return 0;
> > +
> > +fail:
> > + pci_doe_sysfs_feature_remove(pdev, doe_mb);
> > + return ret;
>
> Not sure we should treat this failure this seriously. Looks like it
> will prevent creation of the BAR resource files, and possibly even
> abort pci_sysfs_init() early. I think the pci_dev will still be
> usable (lacking DOE sysfs) even if this fails.

I can change the call in pci_create_resource_files() to not return?

>
> > +}
> > +
> > +void pci_doe_sysfs_teardown(struct pci_dev *pdev)
> > +{
> > + struct pci_doe_mb *doe_mb;
> > + unsigned long index;
> > +
> > + xa_for_each(&pdev->doe_mbs, index, doe_mb) {
> > + pci_doe_sysfs_feature_remove(pdev, doe_mb);
> > + }
> > +}
> > +
> > +int pci_doe_sysfs_init(struct pci_dev *pdev)
> > +{
> > + struct pci_doe_mb *doe_mb;
> > + unsigned long index;
> > + int ret;
> > +
> > + xa_for_each(&pdev->doe_mbs, index, doe_mb) {
> > + ret = pci_doe_sysfs_feature_populate(pdev, doe_mb);
> > + if (ret)
> > + return ret;
> > + }
>
> I agree with Lukas that pci_create_resource_files() is not the right
> place to call this.
>
> I try hard to avoid calling *anything* from the
> pci_create_sysfs_dev_files() path because it has the nasty
> "sysfs_initialized" check and the associated pci_sysfs_init()
> initcall.
>
> It's so much cleaner when we can set up static attributes that are
> automatically added in the device_add() path. I don't know whether
> that's possible. I see lots of discussion with Greg KH that might be
> related, but I'm not sure.

I don't think it's possible, at least not that I or anyone else has
been able to figure out yet.

>
> I do know that we enumerate the mailboxes and features during
> pci_init_capabilities(), which happens before device_add(), so the
> information about which attributes should be present is at least
> *available* early enough:
>
> pci_host_probe
> pci_scan_root_bus_bridge
> ...
> pci_scan_single_device
> pci_device_add
> pci_init_capabilities
> pci_doe_init
> while (pci_find_next_ext_capability(PCI_EXT_CAP_ID_DOE))
> pci_doe_create_mb
> pci_doe_cache_features
> pci_doe_discovery
> xa_insert(&doe_mb->feats) <--
> device_add
> device_add_attrs
> device_add_groups
> pci_bus_add_devices
> pci_bus_add_device
> pci_create_sysfs_dev_files
> ...
> pci_doe_sysfs_init <--
> xa_for_each(&pdev->doe_mbs)
> pci_doe_sysfs_feature_populate
> xa_for_each(&doe_mb->feats)
> sysfs_add_file_to_group(pci_dev_doe_feature_group.name)
>
> Is it feasible to build an attribute group in pci_doe_init() and add
> it to dev->groups so device_add() will automatically add them?

That doesn't work as the sysfs_add_file_to_group() function will seg
fault when trying to find the parent as I don't think it exists yet.

[ 0.767581] BUG: kernel NULL pointer dereference, address: 0000000000000008
[ 0.767835] #PF: supervisor read access in kernel mode
[ 0.767835] #PF: error_code(0x0000) - not-present page
[ 0.767835] PGD 0 P4D 0
[ 0.767835] Oops: 0000 [#1] PREEMPT SMP NOPTI
[ 0.767835] CPU: 0 PID: 1 Comm: swapper/0 Not tainted
6.6.0-10270-g5dda351a02c8-dirty #10
[ 0.767835] Hardware name: QEMU Standard PC (Q35 + ICH9, 2009),
BIOS rel-1.16.2-14-g1e1da7a96300-prebuilt.qemu.org 04/01/2014
[ 0.767835] RIP: 0010:kernfs_find_and_get_ns+0x10/0x70
[ 0.767835] Code: 66 2e 0f 1f 84 00 00 00 00 00 90 90 90 90 90 90
90 90 90 90 90 90 90 90 90 90 f3 0f 1e fa 41 55 49 89 d5 41 54 49 89
f4 55 53 <48> 8b 0
[ 0.767835] RSP: 0018:ffff96f9c00138a8 EFLAGS: 00000246
[ 0.767835] RAX: 0000000000000000 RBX: 0000000000000000 RCX: 0000000000000030
[ 0.767835] RDX: 0000000000000000 RSI: ffffffffafec53d9 RDI: 0000000000000000
[ 0.767835] RBP: ffff957b4180e0b8 R08: 0000000000000000 R09: 0000000000ffff10
[ 0.767835] R10: 0000000000000000 R11: ffffffffaf677c80 R12: ffffffffafec53d9
[ 0.767835] R13: 0000000000000000 R14: ffff957b413c1ea0 R15: ffff957b4180e000
[ 0.767835] FS: 0000000000000000(0000) GS:ffff957bbdc00000(0000)
knlGS:0000000000000000
[ 0.767835] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[ 0.767835] CR2: 0000000000000008 CR3: 000000004442e000 CR4: 00000000000006f0
[ 0.767835] Call Trace:
[ 0.767835] <TASK>
[ 0.767835] ? __die+0x1e/0x60
[ 0.767835] ? page_fault_oops+0x17c/0x470
[ 0.767835] ? search_module_extables+0x14/0x50
[ 0.767835] ? exc_page_fault+0x67/0x150
[ 0.767835] ? asm_exc_page_fault+0x26/0x30
[ 0.767835] ? __pfx_pci_mmcfg_read+0x10/0x10
[ 0.767835] ? kernfs_find_and_get_ns+0x10/0x70
[ 0.767835] ? kasprintf+0x5a/0x80
[ 0.767835] sysfs_add_file_to_group+0x4c/0x110
[ 0.767835] pci_doe_sysfs_init+0x13b/0x240
[ 0.767835] pci_device_add+0x1d7/0x620
[ 0.767835] pci_scan_single_device+0xc8/0x100
[ 0.767835] pci_scan_slot+0x6f/0x1e0
[ 0.767835] pci_scan_child_bus_extend+0x30/0x210
[ 0.767835] pci_scan_bridge_extend+0x5f4/0x710
[ 0.767835] pci_scan_child_bus_extend+0xc2/0x210
[ 0.767835] acpi_pci_root_create+0x283/0x2f0
[ 0.767835] pci_acpi_scan_root+0x199/0x200
[ 0.767835] acpi_pci_root_add+0x1ba/0x370
[ 0.767835] acpi_bus_attach+0x140/0x260
[ 0.767835] ? __pfx_acpi_dev_for_one_check+0x10/0x10
[ 0.767835] device_for_each_child+0x68/0xa0
[ 0.767835] acpi_dev_for_each_child+0x37/0x60
[ 0.767835] ? __pfx_acpi_bus_attach+0x10/0x10
[ 0.767835] acpi_bus_attach+0x21e/0x260
[ 0.767835] ? __pfx_acpi_dev_for_one_check+0x10/0x10
[ 0.767835] device_for_each_child+0x68/0xa0
[ 0.767835] acpi_dev_for_each_child+0x37/0x60
[ 0.767835] ? __pfx_acpi_bus_attach+0x10/0x10
[ 0.767835] acpi_bus_attach+0x21e/0x260
[ 0.767835] acpi_bus_scan+0x6b/0x1e0
[ 0.767835] acpi_scan_init+0xdc/0x290
[ 0.767835] acpi_init+0x22b/0x500
[ 0.767835] ? __pfx_acpi_init+0x10/0x10
[ 0.767835] do_one_initcall+0x56/0x220
[ 0.767835] kernel_init_freeable+0x19e/0x2d0
[ 0.767835] ? __pfx_kernel_init+0x10/0x10
[ 0.767835] kernel_init+0x15/0x1b0
[ 0.767835] ret_from_fork+0x2f/0x50
[ 0.767835] ? __pfx_kernel_init+0x10/0x10
[ 0.767835] ret_from_fork_asm+0x1b/0x30

I can move this to pci_create_sysfs_dev_files() instead if that's at
least better?

>
> It looks like __power_supply_register() does something like that:
> https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/drivers/power/supply/power_supply_core.c?id=v6.5#n1375
>
> > --- a/include/linux/pci-doe.h
> > +++ b/include/linux/pci-doe.h
> > @@ -22,4 +22,6 @@ int pci_doe(struct pci_doe_mb *doe_mb, u16 vendor, u8 type,
> > const void *request, size_t request_sz,
> > void *response, size_t response_sz);
> >
> > +int pci_doe_sysfs_init(struct pci_dev *pci_dev);
> > +void pci_doe_sysfs_teardown(struct pci_dev *pdev);
>
> These declarations look like they should be in drivers/pci/pci.h as
> well. I don't think anything outside the PCI core should need these.

I will move these.

Alistair

>
> Bjorn