2009-12-15 20:10:15

by Keith Mannthey

[permalink] [raw]
Subject: [RFC][Patch] IBM Real-Time "SMI Free" mode drive -v2

This driver supports the Real-Time Linux (RTL) BIOS feature. The RTL
feature allows non-fatal System Management Interrupts (SMIs) to be
disabled on supported IBM platforms.


The Device is presented as a special "_rtl_" table to the OS in the
Extended BIOS Data Area. There is a simple protocol for entering and
exiting the mode at runtime. This driver creates a simple sysfs
interface to allow a simple entry and exit from RTL mode in the
UFI/BIOS.


Signed-off-by: Keith Mannthey <[email protected]>

---

--- linux-2.6.32/drivers/misc/ibmrtl/ibm_rtl.c 1969-12-31 16:00:00.000000000 -0800
+++ linux-2.6.32-rtl/drivers/misc/ibmrtl/ibm_rtl.c 2009-12-14 16:37:19.000000000 -0800
@@ -0,0 +1,189 @@
+/*
+ * IBM Premium Real-Time Mode Device Driver
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ *
+ * Copyright (C) IBM Corporation, 2008, 2009
+ *
+ * Author: Keith Mannthey <[email protected]>
+ *
+ */
+
+#include <linux/kernel.h>
+#include <linux/delay.h>
+#include <linux/module.h>
+#include <linux/io.h>
+#include <linux/sysdev.h>
+#include <asm/byteorder.h>
+#include "rtl.h"
+
+static spinlock_t rtl_lock;
+static void __iomem *rtl_table;
+static void __iomem *edba_map;
+
+static int ibm_rtl_write(u8 value)
+{
+ int count = 0, ret = 0;
+ u32 cmd_prt_value, cmd_port_addr;
+
+ if ((value != RTL_ENABLE) && (value != RTL_DISABLE))
+ return -EINVAL;
+
+ spin_lock(&rtl_lock);
+
+ if (readb(rtl_table+RTL_STATE) != value) {
+ writeb(value, (rtl_table+RTL_CMD));
+
+ /* Trigger the command to be processed*/
+ cmd_prt_value = readw(rtl_table + RTL_CMD_PORT_VALUE);
+ cmd_port_addr = readw(rtl_table + RTL_CMD_PORT_ADDR);
+ outb(cmd_prt_value, cmd_port_addr);
+
+ /* Wait for the command to finish */
+ while (readb(rtl_table+RTL_CMD)) {
+ mdelay(10);
+ if (count++ > 10000) {
+ printk(KERN_ERR "IBM_RTL: Hardware not "
+ "responding to mode switch request\n");
+ spin_unlock(&rtl_lock);
+ return -EIO;
+ }
+ }
+
+ if (readb(rtl_table + RTL_CMD_STATUS))
+ ret = -EIO;
+ }
+
+ spin_unlock(&rtl_lock);
+
+ return ret;
+}
+
+static ssize_t rtl_show_version(struct sysdev_class *dev, char *buf)
+{
+ return sprintf(buf, "%d\n", (int) readb(rtl_table+RTL_VERSION));
+}
+
+static ssize_t rtl_show_state(struct sysdev_class *dev, char *buf)
+{
+ return sprintf(buf, "%d\n", readb(rtl_table+RTL_STATE));
+}
+
+static ssize_t rtl_set_state(struct sysdev_class *dev, const char *buf,
+ size_t size)
+{
+ ssize_t ret;
+
+ if (size != 2)
+ return -EINVAL;
+
+ switch (buf[0]) {
+ case '0':
+ ret = ibm_rtl_write(RTL_DISABLE);
+ break;
+ case '1':
+ ret = ibm_rtl_write(RTL_ENABLE);
+ break;
+ default:
+ ret = -EINVAL;
+ }
+ if (ret >= 0)
+ ret = size;
+
+ return ret;
+}
+
+static struct sysdev_class class_rtl = {
+ .name = "ibm_rtl",
+};
+
+static SYSDEV_CLASS_ATTR(version, S_IRUGO, rtl_show_version, NULL);
+static SYSDEV_CLASS_ATTR(state, 0600, rtl_show_state, rtl_set_state);
+
+static struct sysdev_class_attribute *rtl_attributes[] = {
+ &attr_version,
+ &attr_state,
+ NULL
+};
+
+static int rtl_setup_sysfs(void)
+{
+ int ret, i;
+
+ ret = sysdev_class_register(&class_rtl);
+
+ if (!ret) {
+ for (i = 0; rtl_attributes[i]; i++)
+ sysdev_class_create_file(&class_rtl, rtl_attributes[i]);
+ }
+ return ret;
+}
+
+static void rtl_teardown_sysfs(void)
+{
+ int i;
+
+ for (i = 0; rtl_attributes[i]; i++)
+ sysdev_class_remove_file(&class_rtl, rtl_attributes[i]);
+
+ sysdev_class_unregister(&class_rtl);
+ return;
+}
+
+/* Only allow the modules to load if the _RTL_ table can be found */
+int init_module(void)
+{
+ unsigned long ebda_size, ebda_addr;
+ unsigned int ebda_kb;
+ u32 *table;
+ int ret;
+
+ /* Get the address for the Extende Biso Data Area */
+ ebda_addr = *(u16 *) phys_to_virt(EDBA_ADDR);
+ ebda_addr <<= 4;
+ edba_map = ioremap(ebda_addr, 4);
+ if (!edba_map)
+ return -ENOMEM;
+
+ /* First word in the EDBA is the Size in KB */
+ ebda_kb = *(u32 *) edba_map;
+ ebda_size = ebda_kb*1024;
+ iounmap(edba_map);
+
+ /* Remap the whole table */
+ edba_map = ioremap(ebda_addr, ebda_size);
+ if (!edba_map)
+ return -ENOMEM;
+
+ for (table = (u32 *) edba_map ; \
+ table < (u32 *) edba_map + ebda_size/4; table++)
+ if (*table == RTL_MAGIC_IDENT) {
+ rtl_table = table;
+ ret = rtl_setup_sysfs();
+ return ret;
+ }
+
+ ret = -ENODEV;
+ iounmap(edba_map);
+ return ret;
+}
+
+void cleanup_module(void)
+{
+ rtl_teardown_sysfs();
+ iounmap(edba_map);
+}
+
+MODULE_LICENSE("GPL");
diff -urN linux-2.6.32/drivers/misc/ibmrtl/Makefile linux-2.6.32-rtl/drivers/misc/ibmrtl/Makefile
--- linux-2.6.32/drivers/misc/ibmrtl/Makefile 1969-12-31 16:00:00.000000000 -0800
+++ linux-2.6.32-rtl/drivers/misc/ibmrtl/Makefile 2009-12-11 10:24:23.000000000 -0800
@@ -0,0 +1 @@
+obj-$(CONFIG_IBM_RTL) := ibm_rtl.o
diff -urN linux-2.6.32/drivers/misc/ibmrtl/rtl.h linux-2.6.32-rtl/drivers/misc/ibmrtl/rtl.h
--- linux-2.6.32/drivers/misc/ibmrtl/rtl.h 1969-12-31 16:00:00.000000000 -0800
+++ linux-2.6.32-rtl/drivers/misc/ibmrtl/rtl.h 2009-12-11 10:24:23.000000000 -0800
@@ -0,0 +1,27 @@
+#include <linux/io.h>
+
+/* The RTL table looks something like
+ u8 signature[5];
+ u8 version;
+ u8 RT_Status;
+ u8 Command;
+ u8 CommandStatus;
+ u8 CMDAddressType;
+ u8 CmdGranularity;
+ u8 CmdOffset;
+ u16 Reserve1;
+ u8 CmdPortAddress[4];
+ u8 CmdPortValue[4];
+*/
+#define RTL_TABLE_SIZE 0x16
+#define RTL_MAGIC_IDENT (('L'<<24)|('T'<<16)|('R'<<8)|'_')
+#define RTL_VERSION 0x5
+#define RTL_STATE 0x6
+#define RTL_CMD 0x7
+#define RTL_CMD_STATUS 0x8
+#define RTL_CMD_PORT_ADDR 0xE
+#define RTL_CMD_PORT_VALUE 0x12
+
+#define EDBA_ADDR 0x40E
+#define RTL_ENABLE 1
+#define RTL_DISABLE 2
diff -urN linux-2.6.32/drivers/misc/Kconfig linux-2.6.32-rtl/drivers/misc/Kconfig
--- linux-2.6.32/drivers/misc/Kconfig 2009-12-02 19:51:21.000000000 -0800
+++ linux-2.6.32-rtl/drivers/misc/Kconfig 2009-12-11 10:24:23.000000000 -0800
@@ -76,6 +76,22 @@
information on the specific driver level and support statement
for your IBM server.

+config IBM_RTL
+ tristate "Device driver to enable IBM PRTL support"
+ depends on X86_MPPARSE && PCI && EXPERIMENTAL
+ ---help---
+ Enable support for IBM Premium Real Time Mode (PRTM).
+ This module will allow you the enter and exit PRTM in the BIOS via
+ sysfs on platforms that support this feature. System in PRTM will
+ not receive cpu generated SMIs for recoverable errors. Use of this
+ feature without proper support may void your hardware warranty.
+
+ If the proper bios support is found the driver will load and create
+ /sys/devices/system/ibm_rtl/. The "state" variable will indicate
+ weather or not the BIOS is in PRTM.
+ state = 0 (BIOS SMI's on)
+ state = 1 (BIOS SMI's off)
+
config PHANTOM
tristate "Sensable PHANToM (PCI)"
depends on PCI
diff -urN linux-2.6.32/drivers/misc/Makefile linux-2.6.32-rtl/drivers/misc/Makefile
--- linux-2.6.32/drivers/misc/Makefile 2009-12-02 19:51:21.000000000 -0800
+++ linux-2.6.32-rtl/drivers/misc/Makefile 2009-12-11 10:24:23.000000000 -0800
@@ -3,6 +3,7 @@
#

obj-$(CONFIG_IBM_ASM) += ibmasm/
+obj-$(CONFIG_IBM_RTL) += ibmrtl/
obj-$(CONFIG_HDPU_FEATURES) += hdpuftrs/
obj-$(CONFIG_ATMEL_PWM) += atmel_pwm.o
obj-$(CONFIG_ATMEL_SSC) += atmel-ssc.o


2009-12-15 23:13:41

by Randy Dunlap

[permalink] [raw]
Subject: Re: [RFC][Patch] IBM Real-Time "SMI Free" mode drive -v2

On Tue, 15 Dec 2009 12:09:48 -0800 Keith Mannthey wrote:

> diff -urN linux-2.6.32/drivers/misc/ibmrtl/rtl.h linux-2.6.32-rtl/drivers/misc/ibmrtl/rtl.h
> --- linux-2.6.32/drivers/misc/ibmrtl/rtl.h 1969-12-31 16:00:00.000000000 -0800
> +++ linux-2.6.32-rtl/drivers/misc/ibmrtl/rtl.h 2009-12-11 10:24:23.000000000 -0800
> @@ -0,0 +1,27 @@
> +#include <linux/io.h>
> +
> +/* The RTL table looks something like

Is this documented somewhere? (besides here ;)

> + u8 signature[5];
> + u8 version;
> + u8 RT_Status;
> + u8 Command;
> + u8 CommandStatus;
> + u8 CMDAddressType;
> + u8 CmdGranularity;
> + u8 CmdOffset;
> + u16 Reserve1;
> + u8 CmdPortAddress[4];
> + u8 CmdPortValue[4];
> +*/
> +#define RTL_TABLE_SIZE 0x16
> +#define RTL_MAGIC_IDENT (('L'<<24)|('T'<<16)|('R'<<8)|'_')
> +#define RTL_VERSION 0x5
> +#define RTL_STATE 0x6
> +#define RTL_CMD 0x7
> +#define RTL_CMD_STATUS 0x8
> +#define RTL_CMD_PORT_ADDR 0xE
> +#define RTL_CMD_PORT_VALUE 0x12
> +
> +#define EDBA_ADDR 0x40E
> +#define RTL_ENABLE 1
> +#define RTL_DISABLE 2
> diff -urN linux-2.6.32/drivers/misc/Kconfig linux-2.6.32-rtl/drivers/misc/Kconfig
> --- linux-2.6.32/drivers/misc/Kconfig 2009-12-02 19:51:21.000000000 -0800
> +++ linux-2.6.32-rtl/drivers/misc/Kconfig 2009-12-11 10:24:23.000000000 -0800
> @@ -76,6 +76,22 @@
> information on the specific driver level and support statement
> for your IBM server.
>
> +config IBM_RTL
> + tristate "Device driver to enable IBM PRTL support"
> + depends on X86_MPPARSE && PCI && EXPERIMENTAL
> + ---help---
> + Enable support for IBM Premium Real Time Mode (PRTM).
> + This module will allow you the enter and exit PRTM in the BIOS via
> + sysfs on platforms that support this feature. System in PRTM will
> + not receive cpu generated SMIs for recoverable errors. Use of this

CPU-generated

> + feature without proper support may void your hardware warranty.
> +
> + If the proper bios support is found the driver will load and create

BIOS

> + /sys/devices/system/ibm_rtl/. The "state" variable will indicate
> + weather or not the BIOS is in PRTM.
> + state = 0 (BIOS SMI's on)
> + state = 1 (BIOS SMI's off)
> +
> config PHANTOM
> tristate "Sensable PHANToM (PCI)"
> depends on PCI


---
~Randy

2009-12-15 23:49:57

by Jonathan Corbet

[permalink] [raw]
Subject: Re: [RFC][Patch] IBM Real-Time "SMI Free" mode drive -v2

On Tue, 15 Dec 2009 12:09:48 -0800
Keith Mannthey <[email protected]> wrote:

> This driver supports the Real-Time Linux (RTL) BIOS feature. The RTL
> feature allows non-fatal System Management Interrupts (SMIs) to be
> disabled on supported IBM platforms.

...and that seems like a really useful thing to be able to do.

A few notes, while I was in the neighborhood...


> --- linux-2.6.32/drivers/misc/ibmrtl/ibm_rtl.c 1969-12-31 16:00:00.000000000 -0800
> +++ linux-2.6.32-rtl/drivers/misc/ibmrtl/ibm_rtl.c 2009-12-14 16:37:19.000000000 -0800

Do you need to create a new directory for a single .c file?

> @@ -0,0 +1,189 @@
> +/*
> + * IBM Premium Real-Time Mode Device Driver
> + *
> + * This program is free software; you can redistribute it and/or modify
> + * it under the terms of the GNU General Public License as published by
> + * the Free Software Foundation; either version 2 of the License, or
> + * (at your option) any later version.
> + *
> + * This program is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
> + * GNU General Public License for more details.
> + *
> + * You should have received a copy of the GNU General Public License
> + * along with this program; if not, write to the Free Software
> + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
> + *
> + * Copyright (C) IBM Corporation, 2008, 2009
> + *
> + * Author: Keith Mannthey <[email protected]>
> + *
> + */
> +
> +#include <linux/kernel.h>
> +#include <linux/delay.h>
> +#include <linux/module.h>
> +#include <linux/io.h>
> +#include <linux/sysdev.h>
> +#include <asm/byteorder.h>
> +#include "rtl.h"
> +
> +static spinlock_t rtl_lock;

You should probably include <linux/spinlock.h> for this.

> +static void __iomem *rtl_table;
> +static void __iomem *edba_map;
> +
> +static int ibm_rtl_write(u8 value)
> +{
> + int count = 0, ret = 0;
> + u32 cmd_prt_value, cmd_port_addr;
> +
> + if ((value != RTL_ENABLE) && (value != RTL_DISABLE))
> + return -EINVAL;
> +
> + spin_lock(&rtl_lock);
> +
> + if (readb(rtl_table+RTL_STATE) != value) {
> + writeb(value, (rtl_table+RTL_CMD));
> +
> + /* Trigger the command to be processed*/
> + cmd_prt_value = readw(rtl_table + RTL_CMD_PORT_VALUE);
> + cmd_port_addr = readw(rtl_table + RTL_CMD_PORT_ADDR);
> + outb(cmd_prt_value, cmd_port_addr);
> +
> + /* Wait for the command to finish */
> + while (readb(rtl_table+RTL_CMD)) {
> + mdelay(10);

For a driver which is intended to help reduce latencies, a 10ms delay with
a spinlock held seems a little harsh. It seems like maybe you could drop
the lock and use msleep()?

> + if (count++ > 10000) {

...that's 100 seconds total - a *long* time...

> + printk(KERN_ERR "IBM_RTL: Hardware not "
> + "responding to mode switch request\n");
> + spin_unlock(&rtl_lock);
> + return -EIO;
> + }
> + }
> +
> + if (readb(rtl_table + RTL_CMD_STATUS))
> + ret = -EIO;
> + }
> +
> + spin_unlock(&rtl_lock);
> +
> + return ret;
> +}
> +
> +static ssize_t rtl_show_version(struct sysdev_class *dev, char *buf)
> +{
> + return sprintf(buf, "%d\n", (int) readb(rtl_table+RTL_VERSION));
> +}
> +
> +static ssize_t rtl_show_state(struct sysdev_class *dev, char *buf)
> +{
> + return sprintf(buf, "%d\n", readb(rtl_table+RTL_STATE));
> +}
> +
> +static ssize_t rtl_set_state(struct sysdev_class *dev, const char *buf,
> + size_t size)
> +{
> + ssize_t ret;
> +
> + if (size != 2)
> + return -EINVAL;

People do use "echo -n" to write to sysfs files sometimes.

> +
> + switch (buf[0]) {
> + case '0':
> + ret = ibm_rtl_write(RTL_DISABLE);
> + break;
> + case '1':
> + ret = ibm_rtl_write(RTL_ENABLE);
> + break;
> + default:
> + ret = -EINVAL;
> + }
> + if (ret >= 0)
> + ret = size;
> +
> + return ret;
> +}
> +
> +static struct sysdev_class class_rtl = {
> + .name = "ibm_rtl",
> +};
> +
> +static SYSDEV_CLASS_ATTR(version, S_IRUGO, rtl_show_version, NULL);
> +static SYSDEV_CLASS_ATTR(state, 0600, rtl_show_state, rtl_set_state);
> +
> +static struct sysdev_class_attribute *rtl_attributes[] = {
> + &attr_version,
> + &attr_state,
> + NULL
> +};
> +
> +static int rtl_setup_sysfs(void)
> +{
> + int ret, i;
> +
> + ret = sysdev_class_register(&class_rtl);
> +
> + if (!ret) {
> + for (i = 0; rtl_attributes[i]; i++)
> + sysdev_class_create_file(&class_rtl, rtl_attributes[i]);
> + }
> + return ret;
> +}
> +
> +static void rtl_teardown_sysfs(void)
> +{
> + int i;
> +
> + for (i = 0; rtl_attributes[i]; i++)
> + sysdev_class_remove_file(&class_rtl, rtl_attributes[i]);
> +
> + sysdev_class_unregister(&class_rtl);
> + return;
> +}
> +
> +/* Only allow the modules to load if the _RTL_ table can be found */
> +int init_module(void)

This needs to be static.

> +{
> + unsigned long ebda_size, ebda_addr;
> + unsigned int ebda_kb;
> + u32 *table;
> + int ret;
> +
> + /* Get the address for the Extende Biso Data Area */
> + ebda_addr = *(u16 *) phys_to_virt(EDBA_ADDR);
> + ebda_addr <<= 4;
> + edba_map = ioremap(ebda_addr, 4);

Me confused. Where is this data area, and how are you remapping it? This
could use an explanatory comment.

> + if (!edba_map)
> + return -ENOMEM;
> +
> + /* First word in the EDBA is the Size in KB */
> + ebda_kb = *(u32 *) edba_map;

Probably better to use ioread32() here.

> + ebda_size = ebda_kb*1024;
> + iounmap(edba_map);
> +
> + /* Remap the whole table */
> + edba_map = ioremap(ebda_addr, ebda_size);
> + if (!edba_map)
> + return -ENOMEM;
> +
> + for (table = (u32 *) edba_map ; \
> + table < (u32 *) edba_map + ebda_size/4; table++)
> + if (*table == RTL_MAGIC_IDENT) {

Again, you should use ioread32() here.

> + rtl_table = table;
> + ret = rtl_setup_sysfs();
> + return ret;
> + }
> +
> + ret = -ENODEV;
> + iounmap(edba_map);
> + return ret;
> +}
> +
> +void cleanup_module(void)

static

> +{
> + rtl_teardown_sysfs();
> + iounmap(edba_map);
> +}
> +
> +MODULE_LICENSE("GPL");
> diff -urN linux-2.6.32/drivers/misc/ibmrtl/Makefile linux-2.6.32-rtl/drivers/misc/ibmrtl/Makefile
> --- linux-2.6.32/drivers/misc/ibmrtl/Makefile 1969-12-31 16:00:00.000000000 -0800
> +++ linux-2.6.32-rtl/drivers/misc/ibmrtl/Makefile 2009-12-11 10:24:23.000000000 -0800
> @@ -0,0 +1 @@
> +obj-$(CONFIG_IBM_RTL) := ibm_rtl.o
> diff -urN linux-2.6.32/drivers/misc/ibmrtl/rtl.h linux-2.6.32-rtl/drivers/misc/ibmrtl/rtl.h
> --- linux-2.6.32/drivers/misc/ibmrtl/rtl.h 1969-12-31 16:00:00.000000000 -0800
> +++ linux-2.6.32-rtl/drivers/misc/ibmrtl/rtl.h 2009-12-11 10:24:23.000000000 -0800
> @@ -0,0 +1,27 @@
> +#include <linux/io.h>
> +
> +/* The RTL table looks something like
> + u8 signature[5];
> + u8 version;
> + u8 RT_Status;
> + u8 Command;
> + u8 CommandStatus;
> + u8 CMDAddressType;
> + u8 CmdGranularity;
> + u8 CmdOffset;
> + u16 Reserve1;
> + u8 CmdPortAddress[4];
> + u8 CmdPortValue[4];
> +*/
> +#define RTL_TABLE_SIZE 0x16
> +#define RTL_MAGIC_IDENT (('L'<<24)|('T'<<16)|('R'<<8)|'_')
> +#define RTL_VERSION 0x5
> +#define RTL_STATE 0x6
> +#define RTL_CMD 0x7
> +#define RTL_CMD_STATUS 0x8
> +#define RTL_CMD_PORT_ADDR 0xE
> +#define RTL_CMD_PORT_VALUE 0x12
> +
> +#define EDBA_ADDR 0x40E
> +#define RTL_ENABLE 1
> +#define RTL_DISABLE 2

These seem like a bit of a strange obfuscation; why not just use a normal,
zero-or-one int value?

jon

2009-12-15 23:58:29

by Keith Mannthey

[permalink] [raw]
Subject: Re: [RFC][Patch] IBM Real-Time "SMI Free" mode drive -v2

On Tue, 2009-12-15 at 15:12 -0800, Randy Dunlap wrote:
> On Tue, 15 Dec 2009 12:09:48 -0800 Keith Mannthey wrote:
>
> > diff -urN linux-2.6.32/drivers/misc/ibmrtl/rtl.h linux-2.6.32-rtl/drivers/misc/ibmrtl/rtl.h
> > --- linux-2.6.32/drivers/misc/ibmrtl/rtl.h 1969-12-31 16:00:00.000000000 -0800
> > +++ linux-2.6.32-rtl/drivers/misc/ibmrtl/rtl.h 2009-12-11 10:24:23.000000000 -0800
> > @@ -0,0 +1,27 @@
> > +#include <linux/io.h>
> > +
> > +/* The RTL table looks something like
>
> Is this documented somewhere? (besides here ;)

Sorry not at this time to my knowledge.

Thanks for the review and the comment suggestions. I will integrate
them.

Thanks,
Keith Mannthey
LTC Real-Time

2009-12-16 00:01:29

by Alan

[permalink] [raw]
Subject: Re: [RFC][Patch] IBM Real-Time "SMI Free" mode drive -v2

> --- linux-2.6.32/drivers/misc/ibmrtl/ibm_rtl.c 1969-12-31 16:00:00.000000000 -0800
> +++ linux-2.6.32-rtl/drivers/misc/ibmrtl/ibm_rtl.c 2009-12-14 16:37:19.000000000 -0800

Probably belongs in drivers/firmware as its not a misc_* interface user.

> + /* Get the address for the Extende Biso Data Area */
> + ebda_addr = *(u16 *) phys_to_virt(EDBA_ADDR);
> + ebda_addr <<= 4;
> + edba_map = ioremap(ebda_addr, 4);
> + if (!edba_map)
> + return -ENOMEM;

This is wrong. I wish we had a single proper EBDA handling function
because this keeps coming up.

The rules for the EBDA on PC class hardware as that the EBDA address
provides a real segment offset for the EBDA *if one is present*. Lots of
machines including quite a few suprisingly modern boxes (eg AMD76x
systems without a PS/2 mouse plugged in) don't have an EBDA so you need
to check if the word you read is zero before doing an ioremap.

> + for (table = (u32 *) edba_map ; \
> + table < (u32 *) edba_map + ebda_size/4; table++)

You need to check the table is valid and has a correct signature. Not
doing so is completely unsafe.

The rest of the code looks pretty sane to me.

Alan

2009-12-16 00:09:26

by Thomas Gleixner

[permalink] [raw]
Subject: Re: [RFC][Patch] IBM Real-Time "SMI Free" mode drive -v2

On Tue, 15 Dec 2009, Jonathan Corbet wrote:
> On Tue, 15 Dec 2009 12:09:48 -0800
> Keith Mannthey <[email protected]> wrote:
>
> > +#include <linux/sysdev.h>
> > +#include <asm/byteorder.h>
> > +#include "rtl.h"
> > +
> > +static spinlock_t rtl_lock;
>
> You should probably include <linux/spinlock.h> for this.

And use:

DEFINE_SPINLOCK(rtl_lock);

because the lock is uninitialized otherwise. Running that code with
lockdep would have warned about that.

Thanks,

tglx

2009-12-16 00:38:10

by Keith Mannthey

[permalink] [raw]
Subject: Re: [RFC][Patch] IBM Real-Time "SMI Free" mode drive -v2

On Tue, 2009-12-15 at 16:49 -0700, Jonathan Corbet wrote:
> On Tue, 15 Dec 2009 12:09:48 -0800
> Keith Mannthey <[email protected]> wrote:
>
> > This driver supports the Real-Time Linux (RTL) BIOS feature. The RTL
> > feature allows non-fatal System Management Interrupts (SMIs) to be
> > disabled on supported IBM platforms.
>
> ...and that seems like a really useful thing to be able to do.
>
> A few notes, while I was in the neighborhood...
>
>
> > --- linux-2.6.32/drivers/misc/ibmrtl/ibm_rtl.c 1969-12-31 16:00:00.000000000 -0800
> > +++ linux-2.6.32-rtl/drivers/misc/ibmrtl/ibm_rtl.c 2009-12-14 16:37:19.000000000 -0800
>
> Do you need to create a new directory for a single .c file?

Perhaps not. I have a single .h and a single .c file.
Do you think I should move them right into misc (with maybe a better
name the rtl.h)?

> > @@ -0,0 +1,189 @@
> > +/*
> > + * IBM Premium Real-Time Mode Device Driver
> > + *
> > + * This program is free software; you can redistribute it and/or modify
> > + * it under the terms of the GNU General Public License as published by
> > + * the Free Software Foundation; either version 2 of the License, or
> > + * (at your option) any later version.
> > + *
> > + * This program is distributed in the hope that it will be useful,
> > + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> > + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
> > + * GNU General Public License for more details.
> > + *
> > + * You should have received a copy of the GNU General Public License
> > + * along with this program; if not, write to the Free Software
> > + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
> > + *
> > + * Copyright (C) IBM Corporation, 2008, 2009
> > + *
> > + * Author: Keith Mannthey <[email protected]>
> > + *
> > + */
> > +
> > +#include <linux/kernel.h>
> > +#include <linux/delay.h>
> > +#include <linux/module.h>
> > +#include <linux/io.h>
> > +#include <linux/sysdev.h>
> > +#include <asm/byteorder.h>
> > +#include "rtl.h"
> > +
> > +static spinlock_t rtl_lock;
>
> You should probably include <linux/spinlock.h> for this.

ack.

> > +static void __iomem *rtl_table;
> > +static void __iomem *edba_map;
> > +
> > +static int ibm_rtl_write(u8 value)
> > +{
> > + int count = 0, ret = 0;
> > + u32 cmd_prt_value, cmd_port_addr;
> > +
> > + if ((value != RTL_ENABLE) && (value != RTL_DISABLE))
> > + return -EINVAL;
> > +
> > + spin_lock(&rtl_lock);
> > +
> > + if (readb(rtl_table+RTL_STATE) != value) {
> > + writeb(value, (rtl_table+RTL_CMD));
> > +
> > + /* Trigger the command to be processed*/
> > + cmd_prt_value = readw(rtl_table + RTL_CMD_PORT_VALUE);
> > + cmd_port_addr = readw(rtl_table + RTL_CMD_PORT_ADDR);
> > + outb(cmd_prt_value, cmd_port_addr);
> > +
> > + /* Wait for the command to finish */
> > + while (readb(rtl_table+RTL_CMD)) {
> > + mdelay(10);
>
> For a driver which is intended to help reduce latencies, a 10ms delay with
> a spinlock held seems a little harsh. It seems like maybe you could drop
> the lock and use msleep()?

Irony is that doing this special write actually triggers an SMI.

Well I really don't want any other possible action to happen until after
the command finishes. I can definitely shorten the amount of time of
the mdelay but I don't want any possible way to start another write
until the command finishes.

> > + if (count++ > 10000) {
>
> ...that's 100 seconds total - a *long* time...

The is a "possible" error condition that I have given a large window
too. More than happy to shorten it up to say a small human amount of
time maybe 2 seconds?

> > + printk(KERN_ERR "IBM_RTL: Hardware not "
> > + "responding to mode switch request\n");
> > + spin_unlock(&rtl_lock);
> > + return -EIO;
> > + }
> > + }
> > +
> > + if (readb(rtl_table + RTL_CMD_STATUS))
> > + ret = -EIO;
> > + }
> > +
> > + spin_unlock(&rtl_lock);
> > +
> > + return ret;
> > +}
> > +
> > +static ssize_t rtl_show_version(struct sysdev_class *dev, char *buf)
> > +{
> > + return sprintf(buf, "%d\n", (int) readb(rtl_table+RTL_VERSION));
> > +}
> > +
> > +static ssize_t rtl_show_state(struct sysdev_class *dev, char *buf)
> > +{
> > + return sprintf(buf, "%d\n", readb(rtl_table+RTL_STATE));
> > +}
> > +
> > +static ssize_t rtl_set_state(struct sysdev_class *dev, const char *buf,
> > + size_t size)
> > +{
> > + ssize_t ret;
> > +
> > + if (size != 2)
> > + return -EINVAL;
>
> People do use "echo -n" to write to sysfs files sometimes.

I fill fix this up to allow for non null terminated inputs.

What is the proper return? Always whatever was inputted should I always
be retuning 1?

> > +
> > + switch (buf[0]) {
> > + case '0':
> > + ret = ibm_rtl_write(RTL_DISABLE);
> > + break;
> > + case '1':
> > + ret = ibm_rtl_write(RTL_ENABLE);
> > + break;
> > + default:
> > + ret = -EINVAL;
> > + }
> > + if (ret >= 0)
> > + ret = size;
> > +
> > + return ret;
> > +}
> > +
> > +static struct sysdev_class class_rtl = {
> > + .name = "ibm_rtl",
> > +};
> > +
> > +static SYSDEV_CLASS_ATTR(version, S_IRUGO, rtl_show_version, NULL);
> > +static SYSDEV_CLASS_ATTR(state, 0600, rtl_show_state, rtl_set_state);
> > +
> > +static struct sysdev_class_attribute *rtl_attributes[] = {
> > + &attr_version,
> > + &attr_state,
> > + NULL
> > +};
> > +
> > +static int rtl_setup_sysfs(void)
> > +{
> > + int ret, i;
> > +
> > + ret = sysdev_class_register(&class_rtl);
> > +
> > + if (!ret) {
> > + for (i = 0; rtl_attributes[i]; i++)
> > + sysdev_class_create_file(&class_rtl, rtl_attributes[i]);
> > + }
> > + return ret;
> > +}
> > +
> > +static void rtl_teardown_sysfs(void)
> > +{
> > + int i;
> > +
> > + for (i = 0; rtl_attributes[i]; i++)
> > + sysdev_class_remove_file(&class_rtl, rtl_attributes[i]);
> > +
> > + sysdev_class_unregister(&class_rtl);
> > + return;
> > +}
> > +
> > +/* Only allow the modules to load if the _RTL_ table can be found */
> > +int init_module(void)
>
> This needs to be static.
>
> > +{
> > + unsigned long ebda_size, ebda_addr;
> > + unsigned int ebda_kb;
> > + u32 *table;
> > + int ret;
> > +
> > + /* Get the address for the Extende Biso Data Area */
> > + ebda_addr = *(u16 *) phys_to_virt(EDBA_ADDR);
> > + ebda_addr <<= 4;
> > + edba_map = ioremap(ebda_addr, 4);
>
> Me confused. Where is this data area, and how are you remapping it? This
> could use an explanatory comment.

Sorry I foobrrd that comment. This table is the EDBA (Extened BIOS Data
Area). As Alan has pointed up I am still not accessing it correctly.
On machines where it is present the EDBA is at 0x40E physical.

> > + if (!edba_map)
> > + return -ENOMEM;
> > +
> > + /* First word in the EDBA is the Size in KB */
> > + ebda_kb = *(u32 *) edba_map;
>
> Probably better to use ioread32() here.

Yes I appears I need to do an ioread to even tell if the table is
there.

> > + ebda_size = ebda_kb*1024;
> > + iounmap(edba_map);
> > +
> > + /* Remap the whole table */
> > + edba_map = ioremap(ebda_addr, ebda_size);
> > + if (!edba_map)
> > + return -ENOMEM;
> > +
> > + for (table = (u32 *) edba_map ; \
> > + table < (u32 *) edba_map + ebda_size/4; table++)
> > + if (*table == RTL_MAGIC_IDENT) {
>
> Again, you should use ioread32() here.

I should never directly access the ioremap region?

> > + rtl_table = table;
> > + ret = rtl_setup_sysfs();
> > + return ret;
> > + }
> > +
> > + ret = -ENODEV;
> > + iounmap(edba_map);
> > + return ret;
> > +}
> > +
> > +void cleanup_module(void)
>
> static
>
> > +{
> > + rtl_teardown_sysfs();
> > + iounmap(edba_map);
> > +}
> > +
> > +MODULE_LICENSE("GPL");
> > diff -urN linux-2.6.32/drivers/misc/ibmrtl/Makefile linux-2.6.32-rtl/drivers/misc/ibmrtl/Makefile
> > --- linux-2.6.32/drivers/misc/ibmrtl/Makefile 1969-12-31 16:00:00.000000000 -0800
> > +++ linux-2.6.32-rtl/drivers/misc/ibmrtl/Makefile 2009-12-11 10:24:23.000000000 -0800
> > @@ -0,0 +1 @@
> > +obj-$(CONFIG_IBM_RTL) := ibm_rtl.o
> > diff -urN linux-2.6.32/drivers/misc/ibmrtl/rtl.h linux-2.6.32-rtl/drivers/misc/ibmrtl/rtl.h
> > --- linux-2.6.32/drivers/misc/ibmrtl/rtl.h 1969-12-31 16:00:00.000000000 -0800
> > +++ linux-2.6.32-rtl/drivers/misc/ibmrtl/rtl.h 2009-12-11 10:24:23.000000000 -0800
> > @@ -0,0 +1,27 @@
> > +#include <linux/io.h>
> > +
> > +/* The RTL table looks something like
> > + u8 signature[5];
> > + u8 version;
> > + u8 RT_Status;
> > + u8 Command;
> > + u8 CommandStatus;
> > + u8 CMDAddressType;
> > + u8 CmdGranularity;
> > + u8 CmdOffset;
> > + u16 Reserve1;
> > + u8 CmdPortAddress[4];
> > + u8 CmdPortValue[4];
> > +*/
> > +#define RTL_TABLE_SIZE 0x16
> > +#define RTL_MAGIC_IDENT (('L'<<24)|('T'<<16)|('R'<<8)|'_')
> > +#define RTL_VERSION 0x5
> > +#define RTL_STATE 0x6
> > +#define RTL_CMD 0x7
> > +#define RTL_CMD_STATUS 0x8
> > +#define RTL_CMD_PORT_ADDR 0xE
> > +#define RTL_CMD_PORT_VALUE 0x12
> > +
> > +#define EDBA_ADDR 0x40E
> > +#define RTL_ENABLE 1
> > +#define RTL_DISABLE 2
>
> These seem like a bit of a strange obfuscation; why not just use a normal,
> zero-or-one int value?

The firmware uses 1 and 2 to represent "on and off". The driver uses 0
and 1 at the sysfs level as it is more analogous to me for "on and off".
Passing "1" or "2" as arguments into the function seems more unclear to
me at least.

Thanks,
Keith Mannthey
LTC Real-Time

2009-12-16 00:50:34

by Jonathan Corbet

[permalink] [raw]
Subject: Re: [RFC][Patch] IBM Real-Time "SMI Free" mode drive -v2

On Tue, 15 Dec 2009 16:38:05 -0800
Keith Mannthey <[email protected]> wrote:

> > Do you need to create a new directory for a single .c file?
>
> Perhaps not. I have a single .h and a single .c file.
> Do you think I should move them right into misc (with maybe a better
> name the rtl.h)?

That's what I would do, but this is a detail.

> > For a driver which is intended to help reduce latencies, a 10ms delay with
> > a spinlock held seems a little harsh. It seems like maybe you could drop
> > the lock and use msleep()?
>
> Irony is that doing this special write actually triggers an SMI.
>
> Well I really don't want any other possible action to happen until after
> the command finishes. I can definitely shorten the amount of time of
> the mdelay but I don't want any possible way to start another write
> until the command finishes.

Why not protect it with a mutex and use msleep()? I see no reason why you
need a spinlock there.

> > > + if (count++ > 10000) {
> >
> > ...that's 100 seconds total - a *long* time...
>
> The is a "possible" error condition that I have given a large window
> too. More than happy to shorten it up to say a small human amount of
> time maybe 2 seconds?

Whatever makes sense for the hardware. Certainly there comes a point where
you can conclude it's not going to get back to you. If you get rid of the
busy wait, you can delay a lot longer here without creating trouble.

> > Again, you should use ioread32() here.
>
> I should never directly access the ioremap region?

No, you really shouldn't. Probably, on any machine where this driver is
relevant you get away with it just fine, but it's not the way to write
portable code.

jon

2009-12-16 23:10:13

by Peter Zijlstra

[permalink] [raw]
Subject: Re: [RFC][Patch] IBM Real-Time "SMI Free" mode drive -v2

On Tue, 2009-12-15 at 12:09 -0800, Keith Mannthey wrote:
> This driver supports the Real-Time Linux (RTL) BIOS feature. The RTL
> feature allows non-fatal System Management Interrupts (SMIs) to be
> disabled on supported IBM platforms.
>
>
> The Device is presented as a special "_rtl_" table to the OS in the
> Extended BIOS Data Area. There is a simple protocol for entering and
> exiting the mode at runtime. This driver creates a simple sysfs
> interface to allow a simple entry and exit from RTL mode in the
> UFI/BIOS.

Why not simply always run with these non-fatal SMIs disabled and provide
their function through the OS proper?

That way you don't need no silly switches and gain consistent platform
behaviour.

2009-12-17 01:39:29

by john stultz

[permalink] [raw]
Subject: Re: [RFC][Patch] IBM Real-Time "SMI Free" mode drive -v2

On Thu, 2009-12-17 at 00:09 +0100, Peter Zijlstra wrote:
> On Tue, 2009-12-15 at 12:09 -0800, Keith Mannthey wrote:
> > This driver supports the Real-Time Linux (RTL) BIOS feature. The RTL
> > feature allows non-fatal System Management Interrupts (SMIs) to be
> > disabled on supported IBM platforms.
> >
> >
> > The Device is presented as a special "_rtl_" table to the OS in the
> > Extended BIOS Data Area. There is a simple protocol for entering and
> > exiting the mode at runtime. This driver creates a simple sysfs
> > interface to allow a simple entry and exit from RTL mode in the
> > UFI/BIOS.
>
> Why not simply always run with these non-fatal SMIs disabled and provide
> their function through the OS proper?
>
> That way you don't need no silly switches and gain consistent platform
> behaviour.

Keith can probably correct me here if I'm wrong, but my understanding is
that the SMIs provide hardware error detection, that while non-fatal
are still important to the management of the system.

The hardware may be used with other OSes or older Linux distros that do
not provide a replacement for the SMI functionality. Further, disabling
the SMIs can limit other features like power-throttling by the hardware,
so its not something that can be always disabled in the hardware.

So this driver provides a switch to allow the System to notify the
hardware that the OS is capable of providing the error detection and is
taking that responsibility over.

The second piece required, which uses this interface to notify the BIOS
its taking over this responsibility, is the ibm-prtmd daemon. This is
the userland app that monitors the edac driver and sends the ipmi
messages to the management module.

thanks
-john

2009-12-17 01:51:24

by Keith Mannthey

[permalink] [raw]
Subject: Re: [RFC][Patch] IBM Real-Time "SMI Free" mode drive -v2

On Thu, 2009-12-17 at 00:09 +0100, Peter Zijlstra wrote:
> On Tue, 2009-12-15 at 12:09 -0800, Keith Mannthey wrote:
> > This driver supports the Real-Time Linux (RTL) BIOS feature. The RTL
> > feature allows non-fatal System Management Interrupts (SMIs) to be
> > disabled on supported IBM platforms.
> >
> >
> > The Device is presented as a special "_rtl_" table to the OS in the
> > Extended BIOS Data Area. There is a simple protocol for entering and
> > exiting the mode at runtime. This driver creates a simple sysfs
> > interface to allow a simple entry and exit from RTL mode in the
> > UFI/BIOS.
>
> Why not simply always run with these non-fatal SMIs disabled and provide
> their function through the OS proper?
>
> That way you don't need no silly switches and gain consistent platform
> behaviour.

Peter,

This driver will load and work on widely used Intel and AMD based IBM
hardware. A decent amount of the IBM blades and a few of the rack-mount
servers with regular firmware support this mode. This is not special
hardware with special features is is regular hardware/firmware that can
enter this special mode.

There are 2 major things that change when you in this mode that keep it
from being a good fit for many situations.

1. ECC memory error detection and reporting is the sole responsibility
of the OS. I have practical experience with this and let just say this
does not "just work" everywhere even within Linux.

2. The cpu can no longer be externally throttled. This means externally
implemented cooling or power saving options just don't work.

These 2 things make it impractical for it to be an all the time
everywhere type of thing. I think a run time state change the OS can
request is a good fit for the issues and complications at hand.

Thanks,
Keith Mannthey
LTC Real-Time

2009-12-17 02:06:31

by Keith Mannthey

[permalink] [raw]
Subject: Re: [RFC][Patch] IBM Real-Time "SMI Free" mode drive -v2

On Wed, 2009-12-16 at 17:37 -0800, john stultz wrote:
> On Thu, 2009-12-17 at 00:09 +0100, Peter Zijlstra wrote:
> > On Tue, 2009-12-15 at 12:09 -0800, Keith Mannthey wrote:
> > > This driver supports the Real-Time Linux (RTL) BIOS feature. The RTL
> > > feature allows non-fatal System Management Interrupts (SMIs) to be
> > > disabled on supported IBM platforms.
> > >
> > >
> > > The Device is presented as a special "_rtl_" table to the OS in the
> > > Extended BIOS Data Area. There is a simple protocol for entering and
> > > exiting the mode at runtime. This driver creates a simple sysfs
> > > interface to allow a simple entry and exit from RTL mode in the
> > > UFI/BIOS.
> >
> > Why not simply always run with these non-fatal SMIs disabled and provide
> > their function through the OS proper?
> >
> > That way you don't need no silly switches and gain consistent platform
> > behaviour.
>
> Keith can probably correct me here if I'm wrong, but my understanding is
> that the SMIs provide hardware error detection, that while non-fatal
> are still important to the management of the system.
>
> The hardware may be used with other OSes or older Linux distros that do
> not provide a replacement for the SMI functionality. Further, disabling
> the SMIs can limit other features like power-throttling by the hardware,
> so its not something that can be always disabled in the hardware.
>
> So this driver provides a switch to allow the System to notify the
> hardware that the OS is capable of providing the error detection and is
> taking that responsibility over.
>
> The second piece required, which uses this interface to notify the BIOS
> its taking over this responsibility, is the ibm-prtmd daemon. This is
> the userland app that monitors the edac driver and sends the ipmi
> messages to the management module.

Just to clarify a bit:

To properly enter the mode you need to ask the BIOS to enter the mode
(this driver) and then ask the BMC (service processor via ipmi), then
you need to start real ecc detection and reporting services. With all
that working you are running with comparable hardware RAS.

The 3 parts are:
RTL kernel driver (this patch)
Working ECC error detection (EDAC drivers presently)
Userspace to manage the mode changes and ecc error reporting (ibm-prtm)

https://sourceforge.net/projects/ibm-prtm there is not much more than
the code there but that is the user space part of the equation.

Thanks,
Keith Mannthey
LTC Real-Time

2009-12-17 02:09:38

by Keith Mannthey

[permalink] [raw]
Subject: Re: [RFC][Patch] IBM Real-Time "SMI Free" mode drive -v2

On Wed, 2009-12-16 at 00:02 +0000, Alan Cox wrote:
> > --- linux-2.6.32/drivers/misc/ibmrtl/ibm_rtl.c 1969-12-31 16:00:00.000000000 -0800
> > +++ linux-2.6.32-rtl/drivers/misc/ibmrtl/ibm_rtl.c 2009-12-14 16:37:19.000000000 -0800
>
> Probably belongs in drivers/firmware as its not a misc_* interface user.

Ack.

> > + /* Get the address for the Extende Biso Data Area */
> > + ebda_addr = *(u16 *) phys_to_virt(EDBA_ADDR);
> > + ebda_addr <<= 4;
> > + edba_map = ioremap(ebda_addr, 4);
> > + if (!edba_map)
> > + return -ENOMEM;
>
> This is wrong. I wish we had a single proper EBDA handling function
> because this keeps coming up.
>
> The rules for the EBDA on PC class hardware as that the EBDA address
> provides a real segment offset for the EBDA *if one is present*. Lots of
> machines including quite a few suprisingly modern boxes (eg AMD76x
> systems without a PS/2 mouse plugged in) don't have an EBDA so you need
> to check if the word you read is zero before doing an ioremap.

Ok so if I can read a word at 0x40E that means an EDBA table is present?

> > + for (table = (u32 *) edba_map ; \
> > + table < (u32 *) edba_map + ebda_size/4; table++)
>
> You need to check the table is valid and has a correct signature. Not
> doing so is completely unsafe.

What does a valid signature on an EDBA look like, I really do not have
much info on the table.

How do I check the signature? I don't really have lot of into about the
EDBA region any info is helpful.

Thanks,
Keith Mannthey
LTC Real-Time