2019-09-25 23:09:26

by Hanna Hawa

[permalink] [raw]
Subject: [PATCH v4 0/2] Add an API for edac device, for mulriple errors

Add an API for EDAC device to report for multiple errors, and move the
old report function to use the new API.

Changes from v3:
----------------
- Move count check to inline function
- Fix count variable describtion
(Reported-by: kbuild test robot <[email protected]>)

Changes from v2:
----------------
- Remove copy of edac_device_handle_*() functions, modify the existing
functions.

Changes from v1:
----------------
- use 'unsigned int' instead of u16
- update variable name to be count
- remove WARN_ON and simply exit if count is zero
- add inline functions in header file

Hanna Hawa (2):
edac: Add an API for edac device to report for multiple errors
edac: move edac_device_handle_*() API functions to header

drivers/edac/edac_device.c | 44 +++++++++++++++++----------------
drivers/edac/edac_device.h | 50 +++++++++++++++++++++++++++++++++-----
2 files changed, 67 insertions(+), 27 deletions(-)

--
2.17.1


2019-09-25 23:09:54

by Hanna Hawa

[permalink] [raw]
Subject: [PATCH v4 2/2] edac: move edac_device_handle_*() API functions to header

Move edac_device_handle_*() functions from source file to header file as
inline funtcion that use the new API with single error.

Signed-off-by: Hanna Hawa <[email protected]>
Acked-by: Robert Richter <[email protected]>
---
drivers/edac/edac_device.c | 14 --------------
drivers/edac/edac_device.h | 35 +++++++++++++----------------------
2 files changed, 13 insertions(+), 36 deletions(-)

diff --git a/drivers/edac/edac_device.c b/drivers/edac/edac_device.c
index 6701fd3a8ce0..9460d892d222 100644
--- a/drivers/edac/edac_device.c
+++ b/drivers/edac/edac_device.c
@@ -645,17 +645,3 @@ void __edac_device_handle_ue(struct edac_device_ctl_info *edac_dev,
block ? block->name : "N/A", count, msg);
}
EXPORT_SYMBOL_GPL(__edac_device_handle_ue);
-
-void edac_device_handle_ce(struct edac_device_ctl_info *edac_dev,
- int inst_nr, int block_nr, const char *msg)
-{
- __edac_device_handle_ce(edac_dev, 1, inst_nr, block_nr, msg);
-}
-EXPORT_SYMBOL_GPL(edac_device_handle_ce);
-
-void edac_device_handle_ue(struct edac_device_ctl_info *edac_dev,
- int inst_nr, int block_nr, const char *msg)
-{
- __edac_device_handle_ue(edac_dev, 1, inst_nr, block_nr, msg);
-}
-EXPORT_SYMBOL_GPL(edac_device_handle_ue);
diff --git a/drivers/edac/edac_device.h b/drivers/edac/edac_device.h
index 7869f036138a..e1d4e2544e24 100644
--- a/drivers/edac/edac_device.h
+++ b/drivers/edac/edac_device.h
@@ -312,28 +312,19 @@ void __edac_device_handle_ce(struct edac_device_ctl_info *edac_dev,
unsigned int count, int inst_nr, int block_nr,
const char *msg);

-/**
- * edac_device_handle_ue():
- * perform a common output and handling of an 'edac_dev' UE event
- *
- * @edac_dev: pointer to struct &edac_device_ctl_info
- * @inst_nr: number of the instance where the UE error happened
- * @block_nr: number of the block where the UE error happened
- * @msg: message to be printed
- */
-extern void edac_device_handle_ue(struct edac_device_ctl_info *edac_dev,
- int inst_nr, int block_nr, const char *msg);
-/**
- * edac_device_handle_ce():
- * perform a common output and handling of an 'edac_dev' CE event
- *
- * @edac_dev: pointer to struct &edac_device_ctl_info
- * @inst_nr: number of the instance where the CE error happened
- * @block_nr: number of the block where the CE error happened
- * @msg: message to be printed
- */
-extern void edac_device_handle_ce(struct edac_device_ctl_info *edac_dev,
- int inst_nr, int block_nr, const char *msg);
+static inline void edac_device_handle_ce(struct edac_device_ctl_info *edac_dev,
+ int inst_nr, int block_nr,
+ const char *msg)
+{
+ __edac_device_handle_ce(edac_dev, 1, inst_nr, block_nr, msg);
+}
+
+static inline void edac_device_handle_ue(struct edac_device_ctl_info *edac_dev,
+ int inst_nr, int block_nr,
+ const char *msg)
+{
+ __edac_device_handle_ue(edac_dev, 1, inst_nr, block_nr, msg);
+}

static inline void edac_device_handle_ce_count(struct edac_device_ctl_info *edac_dev,
unsigned int count, int inst_nr,
--
2.17.1

2019-09-26 03:52:36

by Hanna Hawa

[permalink] [raw]
Subject: [PATCH v4 1/2] edac: Add an API for edac device to report for multiple errors

Add an API for EDAC device to report multiple errors with same type.

Signed-off-by: Hanna Hawa <[email protected]>
Acked-by: Robert Richter <[email protected]>
---
drivers/edac/edac_device.c | 56 ++++++++++++++++++++++++--------------
drivers/edac/edac_device.h | 47 ++++++++++++++++++++++++++++++++
2 files changed, 83 insertions(+), 20 deletions(-)

diff --git a/drivers/edac/edac_device.c b/drivers/edac/edac_device.c
index 65cf2b9355c4..6701fd3a8ce0 100644
--- a/drivers/edac/edac_device.c
+++ b/drivers/edac/edac_device.c
@@ -555,8 +555,9 @@ static inline int edac_device_get_panic_on_ue(struct edac_device_ctl_info
return edac_dev->panic_on_ue;
}

-void edac_device_handle_ce(struct edac_device_ctl_info *edac_dev,
- int inst_nr, int block_nr, const char *msg)
+void __edac_device_handle_ce(struct edac_device_ctl_info *edac_dev,
+ unsigned int count, int inst_nr, int block_nr,
+ const char *msg)
{
struct edac_device_instance *instance;
struct edac_device_block *block = NULL;
@@ -582,23 +583,24 @@ void edac_device_handle_ce(struct edac_device_ctl_info *edac_dev,

if (instance->nr_blocks > 0) {
block = instance->blocks + block_nr;
- block->counters.ce_count++;
+ block->counters.ce_count += count;
}

/* Propagate the count up the 'totals' tree */
- instance->counters.ce_count++;
- edac_dev->counters.ce_count++;
+ instance->counters.ce_count += count;
+ edac_dev->counters.ce_count += count;

if (edac_device_get_log_ce(edac_dev))
edac_device_printk(edac_dev, KERN_WARNING,
- "CE: %s instance: %s block: %s '%s'\n",
- edac_dev->ctl_name, instance->name,
- block ? block->name : "N/A", msg);
+ "CE: %s instance: %s block: %s count: %d '%s'\n",
+ edac_dev->ctl_name, instance->name,
+ block ? block->name : "N/A", count, msg);
}
-EXPORT_SYMBOL_GPL(edac_device_handle_ce);
+EXPORT_SYMBOL_GPL(__edac_device_handle_ce);

-void edac_device_handle_ue(struct edac_device_ctl_info *edac_dev,
- int inst_nr, int block_nr, const char *msg)
+void __edac_device_handle_ue(struct edac_device_ctl_info *edac_dev,
+ unsigned int count, int inst_nr, int block_nr,
+ const char *msg)
{
struct edac_device_instance *instance;
struct edac_device_block *block = NULL;
@@ -624,22 +626,36 @@ void edac_device_handle_ue(struct edac_device_ctl_info *edac_dev,

if (instance->nr_blocks > 0) {
block = instance->blocks + block_nr;
- block->counters.ue_count++;
+ block->counters.ue_count += count;
}

/* Propagate the count up the 'totals' tree */
- instance->counters.ue_count++;
- edac_dev->counters.ue_count++;
+ instance->counters.ue_count += count;
+ edac_dev->counters.ue_count += count;

if (edac_device_get_log_ue(edac_dev))
edac_device_printk(edac_dev, KERN_EMERG,
- "UE: %s instance: %s block: %s '%s'\n",
- edac_dev->ctl_name, instance->name,
- block ? block->name : "N/A", msg);
+ "UE: %s instance: %s block: %s count: %d '%s'\n",
+ edac_dev->ctl_name, instance->name,
+ block ? block->name : "N/A", count, msg);

if (edac_device_get_panic_on_ue(edac_dev))
- panic("EDAC %s: UE instance: %s block %s '%s'\n",
- edac_dev->ctl_name, instance->name,
- block ? block->name : "N/A", msg);
+ panic("EDAC %s: UE instance: %s block %s count: %d '%s'\n",
+ edac_dev->ctl_name, instance->name,
+ block ? block->name : "N/A", count, msg);
+}
+EXPORT_SYMBOL_GPL(__edac_device_handle_ue);
+
+void edac_device_handle_ce(struct edac_device_ctl_info *edac_dev,
+ int inst_nr, int block_nr, const char *msg)
+{
+ __edac_device_handle_ce(edac_dev, 1, inst_nr, block_nr, msg);
+}
+EXPORT_SYMBOL_GPL(edac_device_handle_ce);
+
+void edac_device_handle_ue(struct edac_device_ctl_info *edac_dev,
+ int inst_nr, int block_nr, const char *msg)
+{
+ __edac_device_handle_ue(edac_dev, 1, inst_nr, block_nr, msg);
}
EXPORT_SYMBOL_GPL(edac_device_handle_ue);
diff --git a/drivers/edac/edac_device.h b/drivers/edac/edac_device.h
index 1aaba74ae411..7869f036138a 100644
--- a/drivers/edac/edac_device.h
+++ b/drivers/edac/edac_device.h
@@ -285,6 +285,33 @@ extern int edac_device_add_device(struct edac_device_ctl_info *edac_dev);
*/
extern struct edac_device_ctl_info *edac_device_del_device(struct device *dev);

+/**
+ * __edac_device_handle_ue():
+ * perform a common output and handling of an 'edac_dev' UE event
+ *
+ * @edac_dev: pointer to struct &edac_device_ctl_info
+ * @count: number of errors of the same type
+ * @inst_nr: number of the instance where the UE error happened
+ * @block_nr: number of the block where the UE error happened
+ * @msg: message to be printed
+ */
+void __edac_device_handle_ue(struct edac_device_ctl_info *edac_dev,
+ unsigned int count, int inst_nr, int block_nr,
+ const char *msg);
+/**
+ * __edac_device_handle_ce():
+ * perform a common output and handling of an 'edac_dev' CE event
+ *
+ * @edac_dev: pointer to struct &edac_device_ctl_info
+ * @count: number of errors of the same type
+ * @inst_nr: number of the instance where the CE error happened
+ * @block_nr: number of the block where the CE error happened
+ * @msg: message to be printed
+ */
+void __edac_device_handle_ce(struct edac_device_ctl_info *edac_dev,
+ unsigned int count, int inst_nr, int block_nr,
+ const char *msg);
+
/**
* edac_device_handle_ue():
* perform a common output and handling of an 'edac_dev' UE event
@@ -308,6 +335,26 @@ extern void edac_device_handle_ue(struct edac_device_ctl_info *edac_dev,
extern void edac_device_handle_ce(struct edac_device_ctl_info *edac_dev,
int inst_nr, int block_nr, const char *msg);

+static inline void edac_device_handle_ce_count(struct edac_device_ctl_info *edac_dev,
+ unsigned int count, int inst_nr,
+ int block_nr, const char *msg)
+{
+ if (!count)
+ return;
+
+ __edac_device_handle_ce(edac_dev, count, inst_nr, block_nr, msg);
+}
+
+static inline void edac_device_handle_ue_count(struct edac_device_ctl_info *edac_dev,
+ unsigned int count, int inst_nr,
+ int block_nr, const char *msg)
+{
+ if (!count)
+ return;
+
+ __edac_device_handle_ue(edac_dev, count, inst_nr, block_nr, msg);
+}
+
/**
* edac_device_alloc_index: Allocate a unique device index number
*
--
2.17.1

2019-09-26 07:37:26

by Robert Richter

[permalink] [raw]
Subject: Re: [PATCH v4 0/2] Add an API for edac device, for mulriple errors

On 23.09.19 20:17:39, Hanna Hawa wrote:
> Add an API for EDAC device to report for multiple errors, and move the
> old report function to use the new API.
>
> Changes from v3:
> ----------------
> - Move count check to inline function
> - Fix count variable describtion
> (Reported-by: kbuild test robot <[email protected]>)

Looks good to me. If another respin happens, please fix the 80 char
limitation for the static inline functions, you could line break after
the type definition.

Thanks,

-Robert

2019-09-30 14:52:48

by Borislav Petkov

[permalink] [raw]
Subject: Re: [PATCH v4 1/2] edac: Add an API for edac device to report for multiple errors

On Mon, Sep 23, 2019 at 08:17:40PM +0100, Hanna Hawa wrote:
> +void edac_device_handle_ce(struct edac_device_ctl_info *edac_dev,
> + int inst_nr, int block_nr, const char *msg)
> +{
> + __edac_device_handle_ce(edac_dev, 1, inst_nr, block_nr, msg);
> +}
> +EXPORT_SYMBOL_GPL(edac_device_handle_ce);

Eww, I don't like that: exporting the function *and* the __ counterpart.
The user will get confused and that is unnecessary.

See below for a better version. This way you solve the whole deal with a
single patch.

---
From: Hanna Hawa <[email protected]>
Date: Mon, 23 Sep 2019 20:17:40 +0100
Subject: [PATCH] EDAC/device: Rework error logging API

Make the main workhorse the "count" functions which can log a @count
of errors. Have the current APIs edac_device_handle_{ce,ue}() call
the _count() variants and this way keep the exported symbols number
unchanged.

[ bp: Rewrite. ]

Signed-off-by: Hanna Hawa <[email protected]>
Signed-off-by: Borislav Petkov <[email protected]>
Cc: [email protected]
Cc: [email protected]
Cc: [email protected]
Cc: James Morse <[email protected]>
Cc: [email protected]
Cc: linux-edac <[email protected]>
Cc: Mauro Carvalho Chehab <[email protected]>
Cc: [email protected]
Cc: [email protected]
Cc: Tony Luck <[email protected]>
Link: https://lkml.kernel.org/r/[email protected]
---
drivers/edac/edac_device.c | 44 ++++++++++++++++---------------
drivers/edac/edac_device.h | 54 ++++++++++++++++++++++++++++++--------
2 files changed, 66 insertions(+), 32 deletions(-)

diff --git a/drivers/edac/edac_device.c b/drivers/edac/edac_device.c
index 65cf2b9355c4..d4d8bed5b55d 100644
--- a/drivers/edac/edac_device.c
+++ b/drivers/edac/edac_device.c
@@ -555,8 +555,9 @@ static inline int edac_device_get_panic_on_ue(struct edac_device_ctl_info
return edac_dev->panic_on_ue;
}

-void edac_device_handle_ce(struct edac_device_ctl_info *edac_dev,
- int inst_nr, int block_nr, const char *msg)
+void edac_device_handle_ce_count(struct edac_device_ctl_info *edac_dev,
+ unsigned int count, int inst_nr, int block_nr,
+ const char *msg)
{
struct edac_device_instance *instance;
struct edac_device_block *block = NULL;
@@ -582,23 +583,24 @@ void edac_device_handle_ce(struct edac_device_ctl_info *edac_dev,

if (instance->nr_blocks > 0) {
block = instance->blocks + block_nr;
- block->counters.ce_count++;
+ block->counters.ce_count += count;
}

/* Propagate the count up the 'totals' tree */
- instance->counters.ce_count++;
- edac_dev->counters.ce_count++;
+ instance->counters.ce_count += count;
+ edac_dev->counters.ce_count += count;

if (edac_device_get_log_ce(edac_dev))
edac_device_printk(edac_dev, KERN_WARNING,
- "CE: %s instance: %s block: %s '%s'\n",
- edac_dev->ctl_name, instance->name,
- block ? block->name : "N/A", msg);
+ "CE: %s instance: %s block: %s count: %d '%s'\n",
+ edac_dev->ctl_name, instance->name,
+ block ? block->name : "N/A", count, msg);
}
-EXPORT_SYMBOL_GPL(edac_device_handle_ce);
+EXPORT_SYMBOL_GPL(edac_device_handle_ce_count);

-void edac_device_handle_ue(struct edac_device_ctl_info *edac_dev,
- int inst_nr, int block_nr, const char *msg)
+void edac_device_handle_ue_count(struct edac_device_ctl_info *edac_dev,
+ unsigned int count, int inst_nr, int block_nr,
+ const char *msg)
{
struct edac_device_instance *instance;
struct edac_device_block *block = NULL;
@@ -624,22 +626,22 @@ void edac_device_handle_ue(struct edac_device_ctl_info *edac_dev,

if (instance->nr_blocks > 0) {
block = instance->blocks + block_nr;
- block->counters.ue_count++;
+ block->counters.ue_count += count;
}

/* Propagate the count up the 'totals' tree */
- instance->counters.ue_count++;
- edac_dev->counters.ue_count++;
+ instance->counters.ue_count += count;
+ edac_dev->counters.ue_count += count;

if (edac_device_get_log_ue(edac_dev))
edac_device_printk(edac_dev, KERN_EMERG,
- "UE: %s instance: %s block: %s '%s'\n",
- edac_dev->ctl_name, instance->name,
- block ? block->name : "N/A", msg);
+ "UE: %s instance: %s block: %s count: %d '%s'\n",
+ edac_dev->ctl_name, instance->name,
+ block ? block->name : "N/A", count, msg);

if (edac_device_get_panic_on_ue(edac_dev))
- panic("EDAC %s: UE instance: %s block %s '%s'\n",
- edac_dev->ctl_name, instance->name,
- block ? block->name : "N/A", msg);
+ panic("EDAC %s: UE instance: %s block %s count: %d '%s'\n",
+ edac_dev->ctl_name, instance->name,
+ block ? block->name : "N/A", count, msg);
}
-EXPORT_SYMBOL_GPL(edac_device_handle_ue);
+EXPORT_SYMBOL_GPL(edac_device_handle_ue_count);
diff --git a/drivers/edac/edac_device.h b/drivers/edac/edac_device.h
index 1aaba74ae411..c4c0e0bdce14 100644
--- a/drivers/edac/edac_device.h
+++ b/drivers/edac/edac_device.h
@@ -286,27 +286,60 @@ extern int edac_device_add_device(struct edac_device_ctl_info *edac_dev);
extern struct edac_device_ctl_info *edac_device_del_device(struct device *dev);

/**
- * edac_device_handle_ue():
- * perform a common output and handling of an 'edac_dev' UE event
+ * Log correctable errors.
*
* @edac_dev: pointer to struct &edac_device_ctl_info
- * @inst_nr: number of the instance where the UE error happened
- * @block_nr: number of the block where the UE error happened
+ * @inst_nr: number of the instance where the CE error happened
+ * @count: Number of errors to log.
+ * @block_nr: number of the block where the CE error happened
+ * @msg: message to be printed
+ */
+void edac_device_handle_ce_count(struct edac_device_ctl_info *edac_dev,
+ unsigned int count, int inst_nr, int block_nr,
+ const char *msg);
+
+/**
+ * Log uncorrectable errors.
+ *
+ * @edac_dev: pointer to struct &edac_device_ctl_info
+ * @inst_nr: number of the instance where the CE error happened
+ * @count: Number of errors to log.
+ * @block_nr: number of the block where the CE error happened
* @msg: message to be printed
*/
-extern void edac_device_handle_ue(struct edac_device_ctl_info *edac_dev,
- int inst_nr, int block_nr, const char *msg);
+void edac_device_handle_ue_count(struct edac_device_ctl_info *edac_dev,
+ unsigned int count, int inst_nr, int block_nr,
+ const char *msg);
+
/**
- * edac_device_handle_ce():
- * perform a common output and handling of an 'edac_dev' CE event
+ * edac_device_handle_ce(): Log a single correctable error
*
* @edac_dev: pointer to struct &edac_device_ctl_info
* @inst_nr: number of the instance where the CE error happened
* @block_nr: number of the block where the CE error happened
* @msg: message to be printed
*/
-extern void edac_device_handle_ce(struct edac_device_ctl_info *edac_dev,
- int inst_nr, int block_nr, const char *msg);
+static inline void
+edac_device_handle_ce(struct edac_device_ctl_info *edac_dev, int inst_nr,
+ int block_nr, const char *msg)
+{
+ edac_device_handle_ce_count(edac_dev, 1, inst_nr, block_nr, msg);
+}
+
+/**
+ * edac_device_handle_ue(): Log a single uncorrectable error
+ *
+ * @edac_dev: pointer to struct &edac_device_ctl_info
+ * @inst_nr: number of the instance where the UE error happened
+ * @block_nr: number of the block where the UE error happened
+ * @msg: message to be printed
+ */
+static inline void
+edac_device_handle_ue(struct edac_device_ctl_info *edac_dev, int inst_nr,
+ int block_nr, const char *msg)
+{
+ edac_device_handle_ue_count(edac_dev, 1, inst_nr, block_nr, msg);
+}

/**
* edac_device_alloc_index: Allocate a unique device index number
@@ -316,5 +349,4 @@ extern void edac_device_handle_ce(struct edac_device_ctl_info *edac_dev,
*/
extern int edac_device_alloc_index(void);
extern const char *edac_layer_name[];
-
#endif
--
2.21.0

--
Regards/Gruss,
Boris.

https://people.kernel.org/tglx/notes-about-netiquette

2019-10-01 06:59:15

by Robert Richter

[permalink] [raw]
Subject: Re: [PATCH v4 1/2] edac: Add an API for edac device to report for multiple errors

On 30.09.19 16:50:46, Borislav Petkov wrote:
> ----------------------------------------------------------------------
> On Mon, Sep 23, 2019 at 08:17:40PM +0100, Hanna Hawa wrote:
> > +void edac_device_handle_ce(struct edac_device_ctl_info *edac_dev,
> > + int inst_nr, int block_nr, const char *msg)
> > +{
> > + __edac_device_handle_ce(edac_dev, 1, inst_nr, block_nr, msg);
> > +}
> > +EXPORT_SYMBOL_GPL(edac_device_handle_ce);
>
> Eww, I don't like that: exporting the function *and* the __ counterpart.
> The user will get confused and that is unnecessary.

It is *not* the counterpart. The __* version already has the
additional count argument in. There are 2 patches:

1) introduce new function __edac_device_handle_ce/ue() including the
*_count() inline functions, but keep the old symbols (note the
count=1 parameter).

2) remove old symbol edac_device_handle_ce/ue() and replace it with an
inline function to use the counterpart symbol too.

The first patch only adds functionality but keeps the abi. Thus it
makes a backport easier. The 2nd switches completely to the new
function.

-Robert

2019-10-01 08:36:24

by Borislav Petkov

[permalink] [raw]
Subject: Re: [PATCH v4 1/2] edac: Add an API for edac device to report for multiple errors

On Tue, Oct 01, 2019 at 06:56:58AM +0000, Robert Richter wrote:
> It is *not* the counterpart. The __* version already has the...

Lemme cut to the chase:

"Make the main workhorse the "count" functions which can log a @count
of errors. Have the current APIs edac_device_handle_{ce,ue}() call
the _count() variants and this way keep the exported symbols number
unchanged."

I want to see exactly *two* pairs of functions:

edac_device_handle_{ce,ue}_count - logs a @count of errors
edac_device_handle_{ce,ue} - logs a single error

Not three pairs. I.e., the "__" versions are not needed.

> The first patch only adds functionality but keeps the abi. Thus it
> makes a backport easier.

Works just the same with my version - single backport.

--
Regards/Gruss,
Boris.

https://people.kernel.org/tglx/notes-about-netiquette

2019-10-01 09:49:12

by Robert Richter

[permalink] [raw]
Subject: Re: [PATCH v4 1/2] edac: Add an API for edac device to report for multiple errors

On 01.10.19 10:32:42, Borislav Petkov wrote:

> ----------------------------------------------------------------------
> On Tue, Oct 01, 2019 at 06:56:58AM +0000, Robert Richter wrote:
> > It is *not* the counterpart. The __* version already has the...
>
> Lemme cut to the chase:
>
> "Make the main workhorse the "count" functions which can log a @count
> of errors. Have the current APIs edac_device_handle_{ce,ue}() call
> the _count() variants and this way keep the exported symbols number
> unchanged."
>
> I want to see exactly *two* pairs of functions:
>
> edac_device_handle_{ce,ue}_count - logs a @count of errors
> edac_device_handle_{ce,ue} - logs a single error
>
> Not three pairs. I.e., the "__" versions are not needed.
>
> > The first patch only adds functionality but keeps the abi. Thus it
> > makes a backport easier.
>
> Works just the same with my version - single backport.

If you move to static inline for edac_device_handle_{ce,ue} the
symbols vanish and this breaks the abi. That's why the split in two
patches.

Your comment to not have a __ version as a third variant of the
interface makes sense to me. But to keep ABI your patch still needs to
be split. The first patch still must contain symbols for
edac_device_handle_{ce,ue}. I am not sure if ABI compatability is
something we want to make easier here. I personally like the approach.

-Robert

2019-10-01 10:26:51

by Borislav Petkov

[permalink] [raw]
Subject: Re: [PATCH v4 1/2] edac: Add an API for edac device to report for multiple errors

On Tue, Oct 01, 2019 at 09:47:07AM +0000, Robert Richter wrote:
> If you move to static inline for edac_device_handle_{ce,ue} the
> symbols vanish and this breaks the abi. That's why the split in two
> patches.

ABI issues do not concern upstream. And that coming from me working at a
company who dance a lot to make ABI happy.

Also, I'm missing the reasoning why you use the ABI as an argument at
all: do you know of a particular case where people are thinking of
backporting this or this is all hypothetical.

> Your comment to not have a __ version as a third variant of the
> interface makes sense to me. But to keep ABI your patch still needs to
> be split.

Not really - normally, when you fix ABI issues with symbols
disappearing, all of a sudden, you add dummy ones so that the ABI
checker is happy.

--
Regards/Gruss,
Boris.

https://people.kernel.org/tglx/notes-about-netiquette

2019-10-01 10:56:48

by Robert Richter

[permalink] [raw]
Subject: Re: [PATCH v4 1/2] edac: Add an API for edac device to report for multiple errors

On 01.10.19 12:25:39, Borislav Petkov wrote:
> On Tue, Oct 01, 2019 at 09:47:07AM +0000, Robert Richter wrote:
> > If you move to static inline for edac_device_handle_{ce,ue} the
> > symbols vanish and this breaks the abi. That's why the split in two
> > patches.
>
> ABI issues do not concern upstream. And that coming from me working at a
> company who dance a lot to make ABI happy.
>
> Also, I'm missing the reasoning why you use the ABI as an argument at
> all: do you know of a particular case where people are thinking of
> backporting this or this is all hypothetical.
>
> > Your comment to not have a __ version as a third variant of the
> > interface makes sense to me. But to keep ABI your patch still needs to
> > be split.
>
> Not really - normally, when you fix ABI issues with symbols
> disappearing, all of a sudden, you add dummy ones so that the ABI
> checker is happy.

Let's go with a single patch then and the function naming you
suggested before.

Thanks,

-Robert

2019-10-02 09:31:01

by Hanna Hawa

[permalink] [raw]
Subject: Re: [PATCH v4 1/2] edac: Add an API for edac device to report for multiple errors



On 9/30/2019 5:50 PM, Borislav Petkov wrote:
> On Mon, Sep 23, 2019 at 08:17:40PM +0100, Hanna Hawa wrote:
>> +void edac_device_handle_ce(struct edac_device_ctl_info *edac_dev,
>> + int inst_nr, int block_nr, const char *msg)
>> +{
>> + __edac_device_handle_ce(edac_dev, 1, inst_nr, block_nr, msg);
>> +}
>> +EXPORT_SYMBOL_GPL(edac_device_handle_ce);
>
> Eww, I don't like that: exporting the function *and* the __ counterpart.
> The user will get confused and that is unnecessary.
>
> See below for a better version. This way you solve the whole deal with a
> single patch.
I'm okay with this version, minor comment below.

>
> ---
> From: Hanna Hawa <[email protected]>
> Date: Mon, 23 Sep 2019 20:17:40 +0100
> Subject: [PATCH] EDAC/device: Rework error logging API
>
> Make the main workhorse the "count" functions which can log a @count
> of errors. Have the current APIs edac_device_handle_{ce,ue}() call
> the _count() variants and this way keep the exported symbols number
> unchanged.
>
> [ bp: Rewrite. ]
>
> Signed-off-by: Hanna Hawa <[email protected]>
> Signed-off-by: Borislav Petkov <[email protected]>
> Cc: [email protected]
> Cc: [email protected]
> Cc: [email protected]
> Cc: James Morse <[email protected]>
> Cc: [email protected]
> Cc: linux-edac <[email protected]>
> Cc: Mauro Carvalho Chehab <[email protected]>
> Cc: [email protected]
> Cc: [email protected]
> Cc: Tony Luck <[email protected]>
> Link: https://lkml.kernel.org/r/[email protected]
> ---
> drivers/edac/edac_device.c | 44 ++++++++++++++++---------------
> drivers/edac/edac_device.h | 54 ++++++++++++++++++++++++++++++--------
> 2 files changed, 66 insertions(+), 32 deletions(-)
>
> diff --git a/drivers/edac/edac_device.c b/drivers/edac/edac_device.c
> index 65cf2b9355c4..d4d8bed5b55d 100644
> --- a/drivers/edac/edac_device.c
> +++ b/drivers/edac/edac_device.c
> @@ -555,8 +555,9 @@ static inline int edac_device_get_panic_on_ue(struct edac_device_ctl_info
> return edac_dev->panic_on_ue;
> }
>
> -void edac_device_handle_ce(struct edac_device_ctl_info *edac_dev,
> - int inst_nr, int block_nr, const char *msg)
> +void edac_device_handle_ce_count(struct edac_device_ctl_info *edac_dev,
> + unsigned int count, int inst_nr, int block_nr,
> + const char *msg)
> {
> struct edac_device_instance *instance;
> struct edac_device_block *block = NULL;

Missing count check, same in *_ue_count():
if (count)
return;

> @@ -582,23 +583,24 @@ void edac_device_handle_ce(struct edac_device_ctl_info *edac_dev,
>
> if (instance->nr_blocks > 0) {
> block = instance->blocks + block_nr;
> - block->counters.ce_count++;
> + block->counters.ce_count += count;
> }
>
> /* Propagate the count up the 'totals' tree */
> - instance->counters.ce_count++;
> - edac_dev->counters.ce_count++;
> + instance->counters.ce_count += count;
> + edac_dev->counters.ce_count += count;
>
> if (edac_device_get_log_ce(edac_dev))
> edac_device_printk(edac_dev, KERN_WARNING,
> - "CE: %s instance: %s block: %s '%s'\n",
> - edac_dev->ctl_name, instance->name,
> - block ? block->name : "N/A", msg);
> + "CE: %s instance: %s block: %s count: %d '%s'\n",
> + edac_dev->ctl_name, instance->name,
> + block ? block->name : "N/A", count, msg);
> }
> -EXPORT_SYMBOL_GPL(edac_device_handle_ce);
> +EXPORT_SYMBOL_GPL(edac_device_handle_ce_count);
>
> -void edac_device_handle_ue(struct edac_device_ctl_info *edac_dev,
> - int inst_nr, int block_nr, const char *msg)
> +void edac_device_handle_ue_count(struct edac_device_ctl_info *edac_dev,
> + unsigned int count, int inst_nr, int block_nr,
> + const char *msg)
> {
> struct edac_device_instance *instance;
> struct edac_device_block *block = NULL;
> @@ -624,22 +626,22 @@ void edac_device_handle_ue(struct edac_device_ctl_info *edac_dev,
>
> if (instance->nr_blocks > 0) {
> block = instance->blocks + block_nr;
> - block->counters.ue_count++;
> + block->counters.ue_count += count;
> }
>
> /* Propagate the count up the 'totals' tree */
> - instance->counters.ue_count++;
> - edac_dev->counters.ue_count++;
> + instance->counters.ue_count += count;
> + edac_dev->counters.ue_count += count;
>
> if (edac_device_get_log_ue(edac_dev))
> edac_device_printk(edac_dev, KERN_EMERG,
> - "UE: %s instance: %s block: %s '%s'\n",
> - edac_dev->ctl_name, instance->name,
> - block ? block->name : "N/A", msg);
> + "UE: %s instance: %s block: %s count: %d '%s'\n",
> + edac_dev->ctl_name, instance->name,
> + block ? block->name : "N/A", count, msg);
>
> if (edac_device_get_panic_on_ue(edac_dev))
> - panic("EDAC %s: UE instance: %s block %s '%s'\n",
> - edac_dev->ctl_name, instance->name,
> - block ? block->name : "N/A", msg);
> + panic("EDAC %s: UE instance: %s block %s count: %d '%s'\n",
> + edac_dev->ctl_name, instance->name,
> + block ? block->name : "N/A", count, msg);
> }
> -EXPORT_SYMBOL_GPL(edac_device_handle_ue);
> +EXPORT_SYMBOL_GPL(edac_device_handle_ue_count);
> diff --git a/drivers/edac/edac_device.h b/drivers/edac/edac_device.h
> index 1aaba74ae411..c4c0e0bdce14 100644
> --- a/drivers/edac/edac_device.h
> +++ b/drivers/edac/edac_device.h
> @@ -286,27 +286,60 @@ extern int edac_device_add_device(struct edac_device_ctl_info *edac_dev);
> extern struct edac_device_ctl_info *edac_device_del_device(struct device *dev);
>
> /**
> - * edac_device_handle_ue():
> - * perform a common output and handling of an 'edac_dev' UE event
> + * Log correctable errors.
> *
> * @edac_dev: pointer to struct &edac_device_ctl_info
> - * @inst_nr: number of the instance where the UE error happened
> - * @block_nr: number of the block where the UE error happened
> + * @inst_nr: number of the instance where the CE error happened
> + * @count: Number of errors to log.
> + * @block_nr: number of the block where the CE error happened
> + * @msg: message to be printed
> + */
> +void edac_device_handle_ce_count(struct edac_device_ctl_info *edac_dev,
> + unsigned int count, int inst_nr, int block_nr,
> + const char *msg);
> +
> +/**
> + * Log uncorrectable errors.
> + *
> + * @edac_dev: pointer to struct &edac_device_ctl_info
> + * @inst_nr: number of the instance where the CE error happened
> + * @count: Number of errors to log.
> + * @block_nr: number of the block where the CE error happened
> * @msg: message to be printed
> */
> -extern void edac_device_handle_ue(struct edac_device_ctl_info *edac_dev,
> - int inst_nr, int block_nr, const char *msg);
> +void edac_device_handle_ue_count(struct edac_device_ctl_info *edac_dev,
> + unsigned int count, int inst_nr, int block_nr,
> + const char *msg);
> +
> /**
> - * edac_device_handle_ce():
> - * perform a common output and handling of an 'edac_dev' CE event
> + * edac_device_handle_ce(): Log a single correctable error
> *
> * @edac_dev: pointer to struct &edac_device_ctl_info
> * @inst_nr: number of the instance where the CE error happened
> * @block_nr: number of the block where the CE error happened
> * @msg: message to be printed
> */
> -extern void edac_device_handle_ce(struct edac_device_ctl_info *edac_dev,
> - int inst_nr, int block_nr, const char *msg);
> +static inline void
> +edac_device_handle_ce(struct edac_device_ctl_info *edac_dev, int inst_nr,
> + int block_nr, const char *msg)
> +{
> + edac_device_handle_ce_count(edac_dev, 1, inst_nr, block_nr, msg);
> +}
> +
> +/**
> + * edac_device_handle_ue(): Log a single uncorrectable error
> + *
> + * @edac_dev: pointer to struct &edac_device_ctl_info
> + * @inst_nr: number of the instance where the UE error happened
> + * @block_nr: number of the block where the UE error happened
> + * @msg: message to be printed
> + */
> +static inline void
> +edac_device_handle_ue(struct edac_device_ctl_info *edac_dev, int inst_nr,
> + int block_nr, const char *msg)
> +{
> + edac_device_handle_ue_count(edac_dev, 1, inst_nr, block_nr, msg);
> +}
>
> /**
> * edac_device_alloc_index: Allocate a unique device index number
> @@ -316,5 +349,4 @@ extern void edac_device_handle_ce(struct edac_device_ctl_info *edac_dev,
> */
> extern int edac_device_alloc_index(void);
> extern const char *edac_layer_name[];
> -
> #endif
>

2019-10-04 16:57:56

by Borislav Petkov

[permalink] [raw]
Subject: [PATCH -v2] EDAC/device: Rework error logging API

On Wed, Oct 02, 2019 at 12:25:55PM +0300, Hawa, Hanna wrote:
> Missing count check, same in *_ue_count():
> if (count)

I think you meant:

if (!count)

Anyway, fixed:

---
From 0e49a27859b947d2abded91ee3558639bf8ec0bd Mon Sep 17 00:00:00 2001
From: Hanna Hawa <[email protected]>
Date: Mon, 23 Sep 2019 20:17:40 +0100
Subject: [PATCH] EDAC/device: Rework error logging API

Make the main workhorse the "count" functions which can log a @count
of errors. Have the current APIs edac_device_handle_{ce,ue}() call
the _count() variants and this way keep the exported symbols number
unchanged.

[ bp: Rewrite. ]

Signed-off-by: Hanna Hawa <[email protected]>
Signed-off-by: Borislav Petkov <[email protected]>
Cc: [email protected]
Cc: [email protected]
Cc: [email protected]
Cc: James Morse <[email protected]>
Cc: [email protected]
Cc: linux-edac <[email protected]>
Cc: Mauro Carvalho Chehab <[email protected]>
Cc: [email protected]
Cc: [email protected]
Cc: Tony Luck <[email protected]>
Link: https://lkml.kernel.org/r/[email protected]
---
drivers/edac/edac_device.c | 50 ++++++++++++++++++++---------------
drivers/edac/edac_device.h | 54 ++++++++++++++++++++++++++++++--------
2 files changed, 72 insertions(+), 32 deletions(-)

diff --git a/drivers/edac/edac_device.c b/drivers/edac/edac_device.c
index 65cf2b9355c4..8c4d947fb848 100644
--- a/drivers/edac/edac_device.c
+++ b/drivers/edac/edac_device.c
@@ -555,12 +555,16 @@ static inline int edac_device_get_panic_on_ue(struct edac_device_ctl_info
return edac_dev->panic_on_ue;
}

-void edac_device_handle_ce(struct edac_device_ctl_info *edac_dev,
- int inst_nr, int block_nr, const char *msg)
+void edac_device_handle_ce_count(struct edac_device_ctl_info *edac_dev,
+ unsigned int count, int inst_nr, int block_nr,
+ const char *msg)
{
struct edac_device_instance *instance;
struct edac_device_block *block = NULL;

+ if (!count)
+ return;
+
if ((inst_nr >= edac_dev->nr_instances) || (inst_nr < 0)) {
edac_device_printk(edac_dev, KERN_ERR,
"INTERNAL ERROR: 'instance' out of range "
@@ -582,27 +586,31 @@ void edac_device_handle_ce(struct edac_device_ctl_info *edac_dev,

if (instance->nr_blocks > 0) {
block = instance->blocks + block_nr;
- block->counters.ce_count++;
+ block->counters.ce_count += count;
}

/* Propagate the count up the 'totals' tree */
- instance->counters.ce_count++;
- edac_dev->counters.ce_count++;
+ instance->counters.ce_count += count;
+ edac_dev->counters.ce_count += count;

if (edac_device_get_log_ce(edac_dev))
edac_device_printk(edac_dev, KERN_WARNING,
- "CE: %s instance: %s block: %s '%s'\n",
- edac_dev->ctl_name, instance->name,
- block ? block->name : "N/A", msg);
+ "CE: %s instance: %s block: %s count: %d '%s'\n",
+ edac_dev->ctl_name, instance->name,
+ block ? block->name : "N/A", count, msg);
}
-EXPORT_SYMBOL_GPL(edac_device_handle_ce);
+EXPORT_SYMBOL_GPL(edac_device_handle_ce_count);

-void edac_device_handle_ue(struct edac_device_ctl_info *edac_dev,
- int inst_nr, int block_nr, const char *msg)
+void edac_device_handle_ue_count(struct edac_device_ctl_info *edac_dev,
+ unsigned int count, int inst_nr, int block_nr,
+ const char *msg)
{
struct edac_device_instance *instance;
struct edac_device_block *block = NULL;

+ if (!count)
+ return;
+
if ((inst_nr >= edac_dev->nr_instances) || (inst_nr < 0)) {
edac_device_printk(edac_dev, KERN_ERR,
"INTERNAL ERROR: 'instance' out of range "
@@ -624,22 +632,22 @@ void edac_device_handle_ue(struct edac_device_ctl_info *edac_dev,

if (instance->nr_blocks > 0) {
block = instance->blocks + block_nr;
- block->counters.ue_count++;
+ block->counters.ue_count += count;
}

/* Propagate the count up the 'totals' tree */
- instance->counters.ue_count++;
- edac_dev->counters.ue_count++;
+ instance->counters.ue_count += count;
+ edac_dev->counters.ue_count += count;

if (edac_device_get_log_ue(edac_dev))
edac_device_printk(edac_dev, KERN_EMERG,
- "UE: %s instance: %s block: %s '%s'\n",
- edac_dev->ctl_name, instance->name,
- block ? block->name : "N/A", msg);
+ "UE: %s instance: %s block: %s count: %d '%s'\n",
+ edac_dev->ctl_name, instance->name,
+ block ? block->name : "N/A", count, msg);

if (edac_device_get_panic_on_ue(edac_dev))
- panic("EDAC %s: UE instance: %s block %s '%s'\n",
- edac_dev->ctl_name, instance->name,
- block ? block->name : "N/A", msg);
+ panic("EDAC %s: UE instance: %s block %s count: %d '%s'\n",
+ edac_dev->ctl_name, instance->name,
+ block ? block->name : "N/A", count, msg);
}
-EXPORT_SYMBOL_GPL(edac_device_handle_ue);
+EXPORT_SYMBOL_GPL(edac_device_handle_ue_count);
diff --git a/drivers/edac/edac_device.h b/drivers/edac/edac_device.h
index 1aaba74ae411..c4c0e0bdce14 100644
--- a/drivers/edac/edac_device.h
+++ b/drivers/edac/edac_device.h
@@ -286,27 +286,60 @@ extern int edac_device_add_device(struct edac_device_ctl_info *edac_dev);
extern struct edac_device_ctl_info *edac_device_del_device(struct device *dev);

/**
- * edac_device_handle_ue():
- * perform a common output and handling of an 'edac_dev' UE event
+ * Log correctable errors.
*
* @edac_dev: pointer to struct &edac_device_ctl_info
- * @inst_nr: number of the instance where the UE error happened
- * @block_nr: number of the block where the UE error happened
+ * @inst_nr: number of the instance where the CE error happened
+ * @count: Number of errors to log.
+ * @block_nr: number of the block where the CE error happened
+ * @msg: message to be printed
+ */
+void edac_device_handle_ce_count(struct edac_device_ctl_info *edac_dev,
+ unsigned int count, int inst_nr, int block_nr,
+ const char *msg);
+
+/**
+ * Log uncorrectable errors.
+ *
+ * @edac_dev: pointer to struct &edac_device_ctl_info
+ * @inst_nr: number of the instance where the CE error happened
+ * @count: Number of errors to log.
+ * @block_nr: number of the block where the CE error happened
* @msg: message to be printed
*/
-extern void edac_device_handle_ue(struct edac_device_ctl_info *edac_dev,
- int inst_nr, int block_nr, const char *msg);
+void edac_device_handle_ue_count(struct edac_device_ctl_info *edac_dev,
+ unsigned int count, int inst_nr, int block_nr,
+ const char *msg);
+
/**
- * edac_device_handle_ce():
- * perform a common output and handling of an 'edac_dev' CE event
+ * edac_device_handle_ce(): Log a single correctable error
*
* @edac_dev: pointer to struct &edac_device_ctl_info
* @inst_nr: number of the instance where the CE error happened
* @block_nr: number of the block where the CE error happened
* @msg: message to be printed
*/
-extern void edac_device_handle_ce(struct edac_device_ctl_info *edac_dev,
- int inst_nr, int block_nr, const char *msg);
+static inline void
+edac_device_handle_ce(struct edac_device_ctl_info *edac_dev, int inst_nr,
+ int block_nr, const char *msg)
+{
+ edac_device_handle_ce_count(edac_dev, 1, inst_nr, block_nr, msg);
+}
+
+/**
+ * edac_device_handle_ue(): Log a single uncorrectable error
+ *
+ * @edac_dev: pointer to struct &edac_device_ctl_info
+ * @inst_nr: number of the instance where the UE error happened
+ * @block_nr: number of the block where the UE error happened
+ * @msg: message to be printed
+ */
+static inline void
+edac_device_handle_ue(struct edac_device_ctl_info *edac_dev, int inst_nr,
+ int block_nr, const char *msg)
+{
+ edac_device_handle_ue_count(edac_dev, 1, inst_nr, block_nr, msg);
+}

/**
* edac_device_alloc_index: Allocate a unique device index number
@@ -316,5 +349,4 @@ extern void edac_device_handle_ce(struct edac_device_ctl_info *edac_dev,
*/
extern int edac_device_alloc_index(void);
extern const char *edac_layer_name[];
-
#endif
--
2.21.0


--
Regards/Gruss,
Boris.

https://people.kernel.org/tglx/notes-about-netiquette