2020-01-16 14:19:27

by Guenter Roeck

[permalink] [raw]
Subject: [RFT PATCH 0/4] hwmon: k10temp driver improvements

This patch series implements various improvements for the k10temp driver.

Patch 1/4 introduces the use of bit operations.

Patch 2/4 converts the driver to use the devm_hwmon_device_register_with_info
API. This not only simplifies the code and reduces its size, it also
makes the code easier to maintain and enhance.

Patch 3/4 adds support for reporting Core Complex Die (CCD) temperatures
on Ryzen 3 (Zen2) CPUs.

Patch 4/4 adds support for reporting core and SoC current and voltage
information on Ryzen CPUs.

With all patches in place, output on Ryzen 3900 CPUs looks as follows
(with the system under load).

k10temp-pci-00c3
Adapter: PCI adapter
Vcore: +1.36 V
Vsoc: +1.18 V
Tdie: +86.8°C (high = +70.0°C)
Tctl: +86.8°C
Tccd1: +80.0°C
Tccd2: +81.8°C
Icore: +44.14 A
Isoc: +13.83 A

The patch series has only been tested with Ryzen 3900 CPUs. Further test
coverage will be necessary before the changes can be applied to the Linux
kernel.


2020-01-16 14:19:28

by Guenter Roeck

[permalink] [raw]
Subject: [RFT PATCH 1/4] hwmon: (k10temp) Use bitops

Using bitops makes bit masks and shifts easier to read.

Signed-off-by: Guenter Roeck <[email protected]>
---
drivers/hwmon/k10temp.c | 16 ++++++++++------
1 file changed, 10 insertions(+), 6 deletions(-)

diff --git a/drivers/hwmon/k10temp.c b/drivers/hwmon/k10temp.c
index 5c1dddde193c..8807d7da68db 100644
--- a/drivers/hwmon/k10temp.c
+++ b/drivers/hwmon/k10temp.c
@@ -5,6 +5,7 @@
* Copyright (c) 2009 Clemens Ladisch <[email protected]>
*/

+#include <linux/bitops.h>
#include <linux/err.h>
#include <linux/hwmon.h>
#include <linux/hwmon-sysfs.h>
@@ -31,22 +32,22 @@ static DEFINE_MUTEX(nb_smu_ind_mutex);
#endif

/* CPUID function 0x80000001, ebx */
-#define CPUID_PKGTYPE_MASK 0xf0000000
+#define CPUID_PKGTYPE_MASK GENMASK(31, 28)
#define CPUID_PKGTYPE_F 0x00000000
#define CPUID_PKGTYPE_AM2R2_AM3 0x10000000

/* DRAM controller (PCI function 2) */
#define REG_DCT0_CONFIG_HIGH 0x094
-#define DDR3_MODE 0x00000100
+#define DDR3_MODE BIT(8)

/* miscellaneous (PCI function 3) */
#define REG_HARDWARE_THERMAL_CONTROL 0x64
-#define HTC_ENABLE 0x00000001
+#define HTC_ENABLE BIT(0)

#define REG_REPORTED_TEMPERATURE 0xa4

#define REG_NORTHBRIDGE_CAPABILITIES 0xe8
-#define NB_CAP_HTC 0x00000400
+#define NB_CAP_HTC BIT(10)

/*
* For F15h M60h and M70h, REG_HARDWARE_THERMAL_CONTROL
@@ -60,6 +61,9 @@ static DEFINE_MUTEX(nb_smu_ind_mutex);
/* F17h M01h Access througn SMN */
#define F17H_M01H_REPORTED_TEMP_CTRL_OFFSET 0x00059800

+#define CUR_TEMP_SHIFT 21
+#define CUR_TEMP_RANGE_SEL_MASK BIT(19)
+
struct k10temp_data {
struct pci_dev *pdev;
void (*read_htcreg)(struct pci_dev *pdev, u32 *regval);
@@ -129,7 +133,7 @@ static unsigned int get_raw_temp(struct k10temp_data *data)
u32 regval;

data->read_tempreg(data->pdev, &regval);
- temp = (regval >> 21) * 125;
+ temp = (regval >> CUR_TEMP_SHIFT) * 125;
if (regval & data->temp_adjust_mask)
temp -= 49000;
return temp;
@@ -312,7 +316,7 @@ static int k10temp_probe(struct pci_dev *pdev,
data->read_htcreg = read_htcreg_nb_f15;
data->read_tempreg = read_tempreg_nb_f15;
} else if (boot_cpu_data.x86 == 0x17 || boot_cpu_data.x86 == 0x18) {
- data->temp_adjust_mask = 0x80000;
+ data->temp_adjust_mask = CUR_TEMP_RANGE_SEL_MASK;
data->read_tempreg = read_tempreg_nb_f17;
data->show_tdie = true;
} else {
--
2.17.1

2020-01-16 14:19:40

by Guenter Roeck

[permalink] [raw]
Subject: [RFT PATCH 2/4] hmon: (k10temp) Convert to use devm_hwmon_device_register_with_info

Convert driver to use devm_hwmon_device_register_with_info to simplify
the code and to reduce its size.

Old size (x86_64):
text data bss dec hex filename
8247 4488 64 12799 31ff drivers/hwmon/k10temp.o
New size:
text data bss dec hex filename
6778 2792 64 9634 25a2 drivers/hwmon/k10temp.o

Signed-off-by: Guenter Roeck <[email protected]>
---
drivers/hwmon/k10temp.c | 213 +++++++++++++++++++++-------------------
1 file changed, 112 insertions(+), 101 deletions(-)

diff --git a/drivers/hwmon/k10temp.c b/drivers/hwmon/k10temp.c
index 8807d7da68db..c45f6498a59b 100644
--- a/drivers/hwmon/k10temp.c
+++ b/drivers/hwmon/k10temp.c
@@ -1,14 +1,15 @@
// SPDX-License-Identifier: GPL-2.0-or-later
/*
- * k10temp.c - AMD Family 10h/11h/12h/14h/15h/16h processor hardware monitoring
+ * k10temp.c - AMD Family 10h/11h/12h/14h/15h/16h/17h
+ * processor hardware monitoring
*
* Copyright (c) 2009 Clemens Ladisch <[email protected]>
+ * Copyright (c) 2020 Guenter Roeck <[email protected]>
*/

#include <linux/bitops.h>
#include <linux/err.h>
#include <linux/hwmon.h>
-#include <linux/hwmon-sysfs.h>
#include <linux/init.h>
#include <linux/module.h>
#include <linux/pci.h>
@@ -127,10 +128,10 @@ static void read_tempreg_nb_f17(struct pci_dev *pdev, u32 *regval)
F17H_M01H_REPORTED_TEMP_CTRL_OFFSET, regval);
}

-static unsigned int get_raw_temp(struct k10temp_data *data)
+static long get_raw_temp(struct k10temp_data *data)
{
- unsigned int temp;
u32 regval;
+ long temp;

data->read_tempreg(data->pdev, &regval);
temp = (regval >> CUR_TEMP_SHIFT) * 125;
@@ -139,118 +140,108 @@ static unsigned int get_raw_temp(struct k10temp_data *data)
return temp;
}

-static ssize_t temp1_input_show(struct device *dev,
- struct device_attribute *attr, char *buf)
-{
- struct k10temp_data *data = dev_get_drvdata(dev);
- unsigned int temp = get_raw_temp(data);
-
- if (temp > data->temp_offset)
- temp -= data->temp_offset;
- else
- temp = 0;
-
- return sprintf(buf, "%u\n", temp);
-}
-
-static ssize_t temp2_input_show(struct device *dev,
- struct device_attribute *devattr, char *buf)
-{
- struct k10temp_data *data = dev_get_drvdata(dev);
- unsigned int temp = get_raw_temp(data);
-
- return sprintf(buf, "%u\n", temp);
-}
-
-static ssize_t temp_label_show(struct device *dev,
- struct device_attribute *devattr, char *buf)
-{
- struct sensor_device_attribute *attr = to_sensor_dev_attr(devattr);
-
- return sprintf(buf, "%s\n", attr->index ? "Tctl" : "Tdie");
-}
+const char *k10temp_temp_label[] = {
+ "Tdie",
+ "Tctl",
+};

-static ssize_t temp1_max_show(struct device *dev,
- struct device_attribute *attr, char *buf)
+static int k10temp_read_labels(struct device *dev,
+ enum hwmon_sensor_types type,
+ u32 attr, int channel, const char **str)
{
- return sprintf(buf, "%d\n", 70 * 1000);
+ *str = k10temp_temp_label[channel];
+ return 0;
}

-static ssize_t temp_crit_show(struct device *dev,
- struct device_attribute *devattr, char *buf)
+static int k10temp_read(struct device *dev, enum hwmon_sensor_types type,
+ u32 attr, int channel, long *val)
{
- struct sensor_device_attribute *attr = to_sensor_dev_attr(devattr);
struct k10temp_data *data = dev_get_drvdata(dev);
- int show_hyst = attr->index;
u32 regval;
- int value;

- data->read_htcreg(data->pdev, &regval);
- value = ((regval >> 16) & 0x7f) * 500 + 52000;
- if (show_hyst)
- value -= ((regval >> 24) & 0xf) * 500;
- return sprintf(buf, "%d\n", value);
+ switch (attr) {
+ case hwmon_temp_input:
+ switch (channel) {
+ case 0: /* Tdie */
+ *val = get_raw_temp(data) - data->temp_offset;
+ if (*val < 0)
+ *val = 0;
+ break;
+ case 1: /* Tctl */
+ *val = get_raw_temp(data);
+ if (*val < 0)
+ *val = 0;
+ break;
+ default:
+ return -EOPNOTSUPP;
+ }
+ break;
+ case hwmon_temp_max:
+ *val = 70 * 1000;
+ break;
+ case hwmon_temp_crit:
+ data->read_htcreg(data->pdev, &regval);
+ *val = ((regval >> 16) & 0x7f) * 500 + 52000;
+ break;
+ case hwmon_temp_crit_hyst:
+ data->read_htcreg(data->pdev, &regval);
+ *val = (((regval >> 16) & 0x7f)
+ - ((regval >> 24) & 0xf)) * 500 + 52000;
+ break;
+ default:
+ return -EOPNOTSUPP;
+ }
+ return 0;
}

-static DEVICE_ATTR_RO(temp1_input);
-static DEVICE_ATTR_RO(temp1_max);
-static SENSOR_DEVICE_ATTR_RO(temp1_crit, temp_crit, 0);
-static SENSOR_DEVICE_ATTR_RO(temp1_crit_hyst, temp_crit, 1);
-
-static SENSOR_DEVICE_ATTR_RO(temp1_label, temp_label, 0);
-static DEVICE_ATTR_RO(temp2_input);
-static SENSOR_DEVICE_ATTR_RO(temp2_label, temp_label, 1);
-
-static umode_t k10temp_is_visible(struct kobject *kobj,
- struct attribute *attr, int index)
+static umode_t k10temp_is_visible(const void *_data,
+ enum hwmon_sensor_types type,
+ u32 attr, int channel)
{
- struct device *dev = container_of(kobj, struct device, kobj);
- struct k10temp_data *data = dev_get_drvdata(dev);
+ const struct k10temp_data *data = _data;
struct pci_dev *pdev = data->pdev;
u32 reg;

- switch (index) {
- case 0 ... 1: /* temp1_input, temp1_max */
- default:
- break;
- case 2 ... 3: /* temp1_crit, temp1_crit_hyst */
- if (!data->read_htcreg)
- return 0;
-
- pci_read_config_dword(pdev, REG_NORTHBRIDGE_CAPABILITIES,
- &reg);
- if (!(reg & NB_CAP_HTC))
- return 0;
-
- data->read_htcreg(data->pdev, &reg);
- if (!(reg & HTC_ENABLE))
- return 0;
- break;
- case 4 ... 6: /* temp1_label, temp2_input, temp2_label */
- if (!data->show_tdie)
+ switch (type) {
+ case hwmon_temp:
+ switch (attr) {
+ case hwmon_temp_input:
+ if (channel && !data->show_tdie)
+ return 0;
+ break;
+ case hwmon_temp_max:
+ if (channel)
+ return 0;
+ break;
+ case hwmon_temp_crit:
+ case hwmon_temp_crit_hyst:
+ if (channel || !data->read_htcreg)
+ return 0;
+
+ pci_read_config_dword(pdev,
+ REG_NORTHBRIDGE_CAPABILITIES,
+ &reg);
+ if (!(reg & NB_CAP_HTC))
+ return 0;
+
+ data->read_htcreg(data->pdev, &reg);
+ if (!(reg & HTC_ENABLE))
+ return 0;
+ break;
+ case hwmon_temp_label:
+ if (!data->show_tdie)
+ return 0;
+ break;
+ default:
return 0;
+ }
break;
+ default:
+ return 0;
}
- return attr->mode;
+ return 0444;
}

-static struct attribute *k10temp_attrs[] = {
- &dev_attr_temp1_input.attr,
- &dev_attr_temp1_max.attr,
- &sensor_dev_attr_temp1_crit.dev_attr.attr,
- &sensor_dev_attr_temp1_crit_hyst.dev_attr.attr,
- &sensor_dev_attr_temp1_label.dev_attr.attr,
- &dev_attr_temp2_input.attr,
- &sensor_dev_attr_temp2_label.dev_attr.attr,
- NULL
-};
-
-static const struct attribute_group k10temp_group = {
- .attrs = k10temp_attrs,
- .is_visible = k10temp_is_visible,
-};
-__ATTRIBUTE_GROUPS(k10temp);
-
static bool has_erratum_319(struct pci_dev *pdev)
{
u32 pkg_type, reg_dram_cfg;
@@ -285,8 +276,27 @@ static bool has_erratum_319(struct pci_dev *pdev)
(boot_cpu_data.x86_model == 4 && boot_cpu_data.x86_stepping <= 2);
}

-static int k10temp_probe(struct pci_dev *pdev,
- const struct pci_device_id *id)
+static const struct hwmon_channel_info *k10temp_info[] = {
+ HWMON_CHANNEL_INFO(temp,
+ HWMON_T_INPUT | HWMON_T_MAX |
+ HWMON_T_CRIT | HWMON_T_CRIT_HYST |
+ HWMON_T_LABEL,
+ HWMON_T_INPUT | HWMON_T_LABEL),
+ NULL
+};
+
+static const struct hwmon_ops k10temp_hwmon_ops = {
+ .is_visible = k10temp_is_visible,
+ .read = k10temp_read,
+ .read_string = k10temp_read_labels,
+};
+
+static const struct hwmon_chip_info k10temp_chip_info = {
+ .ops = &k10temp_hwmon_ops,
+ .info = k10temp_info,
+};
+
+static int k10temp_probe(struct pci_dev *pdev, const struct pci_device_id *id)
{
int unreliable = has_erratum_319(pdev);
struct device *dev = &pdev->dev;
@@ -334,8 +344,9 @@ static int k10temp_probe(struct pci_dev *pdev,
}
}

- hwmon_dev = devm_hwmon_device_register_with_groups(dev, "k10temp", data,
- k10temp_groups);
+ hwmon_dev = devm_hwmon_device_register_with_info(dev, "k10temp", data,
+ &k10temp_chip_info,
+ NULL);
return PTR_ERR_OR_ZERO(hwmon_dev);
}

--
2.17.1

2020-01-16 14:20:52

by Guenter Roeck

[permalink] [raw]
Subject: [RFT PATCH 3/4] hwmon: (k10temp) Report temperatures per CPU die

Zen2 reports reporting temperatures per CPU die (called Core Complex Dies,
or CCD, by AMD). Add support for it to the k10temp driver.

Signed-off-by: Guenter Roeck <[email protected]>
---
drivers/hwmon/k10temp.c | 79 ++++++++++++++++++++++++++++++++++++++++-
1 file changed, 78 insertions(+), 1 deletion(-)

diff --git a/drivers/hwmon/k10temp.c b/drivers/hwmon/k10temp.c
index c45f6498a59b..944ba8008bc4 100644
--- a/drivers/hwmon/k10temp.c
+++ b/drivers/hwmon/k10temp.c
@@ -5,6 +5,12 @@
*
* Copyright (c) 2009 Clemens Ladisch <[email protected]>
* Copyright (c) 2020 Guenter Roeck <[email protected]>
+ *
+ * Implementation notes:
+ * - CCD1 and CCD2 register address information as well as the calculation to
+ * convert raw register values is from https://github.com/ocerman/zenpower.
+ * The information is not confirmed from chip datasheets, but experiments
+ * suggest that it provides reasonable temperature values.
*/

#include <linux/bitops.h>
@@ -61,6 +67,8 @@ static DEFINE_MUTEX(nb_smu_ind_mutex);

/* F17h M01h Access througn SMN */
#define F17H_M01H_REPORTED_TEMP_CTRL_OFFSET 0x00059800
+#define F17H_M70H_CCD1_TEMP 0x00059954
+#define F17H_M70H_CCD2_TEMP 0x00059958

#define CUR_TEMP_SHIFT 21
#define CUR_TEMP_RANGE_SEL_MASK BIT(19)
@@ -72,6 +80,8 @@ struct k10temp_data {
int temp_offset;
u32 temp_adjust_mask;
bool show_tdie;
+ bool show_tccd1;
+ bool show_tccd2;
};

struct tctl_offset {
@@ -143,6 +153,8 @@ static long get_raw_temp(struct k10temp_data *data)
const char *k10temp_temp_label[] = {
"Tdie",
"Tctl",
+ "Tccd1",
+ "Tccd2",
};

static int k10temp_read_labels(struct device *dev,
@@ -172,6 +184,16 @@ static int k10temp_read(struct device *dev, enum hwmon_sensor_types type,
if (*val < 0)
*val = 0;
break;
+ case 2: /* Tccd1 */
+ amd_smn_read(amd_pci_dev_to_node_id(data->pdev),
+ F17H_M70H_CCD1_TEMP, &regval);
+ *val = (regval & 0xfff) * 125 - 305000;
+ break;
+ case 3: /* Tccd2 */
+ amd_smn_read(amd_pci_dev_to_node_id(data->pdev),
+ F17H_M70H_CCD2_TEMP, &regval);
+ *val = (regval & 0xfff) * 125 - 305000;
+ break;
default:
return -EOPNOTSUPP;
}
@@ -206,8 +228,24 @@ static umode_t k10temp_is_visible(const void *_data,
case hwmon_temp:
switch (attr) {
case hwmon_temp_input:
- if (channel && !data->show_tdie)
+ switch (channel) {
+ case 0: /* Tdie, or Tctl if we don't show it */
+ break;
+ case 1: /* Tctl */
+ if (!data->show_tdie)
+ return 0;
+ break;
+ case 2: /* Tccd1 */
+ if (!data->show_tccd1)
+ return 0;
+ break;
+ case 3: /* Tccd2 */
+ if (!data->show_tccd2)
+ return 0;
+ break;
+ default:
return 0;
+ }
break;
case hwmon_temp_max:
if (channel)
@@ -229,8 +267,24 @@ static umode_t k10temp_is_visible(const void *_data,
return 0;
break;
case hwmon_temp_label:
+ /* No labels if we don't show the die temperature */
if (!data->show_tdie)
return 0;
+ switch (channel) {
+ case 0: /* Tdie */
+ case 1: /* Tctl */
+ break;
+ case 2: /* Tccd1 */
+ if (!data->show_tccd1)
+ return 0;
+ break;
+ case 3: /* Tccd2 */
+ if (!data->show_tccd2)
+ return 0;
+ break;
+ default:
+ return 0;
+ }
break;
default:
return 0;
@@ -281,6 +335,8 @@ static const struct hwmon_channel_info *k10temp_info[] = {
HWMON_T_INPUT | HWMON_T_MAX |
HWMON_T_CRIT | HWMON_T_CRIT_HYST |
HWMON_T_LABEL,
+ HWMON_T_INPUT | HWMON_T_LABEL,
+ HWMON_T_INPUT | HWMON_T_LABEL,
HWMON_T_INPUT | HWMON_T_LABEL),
NULL
};
@@ -326,9 +382,30 @@ static int k10temp_probe(struct pci_dev *pdev, const struct pci_device_id *id)
data->read_htcreg = read_htcreg_nb_f15;
data->read_tempreg = read_tempreg_nb_f15;
} else if (boot_cpu_data.x86 == 0x17 || boot_cpu_data.x86 == 0x18) {
+ u32 regval;
+
data->temp_adjust_mask = CUR_TEMP_RANGE_SEL_MASK;
data->read_tempreg = read_tempreg_nb_f17;
data->show_tdie = true;
+
+ switch (boot_cpu_data.x86_model) {
+ case 0x1: /* Zen */
+ case 0x8: /* Zen+ */
+ case 0x11: /* Zen APU */
+ case 0x18: /* Zen+ APU */
+ break;
+ case 0x71: /* Zen2 */
+ amd_smn_read(amd_pci_dev_to_node_id(pdev),
+ F17H_M70H_CCD1_TEMP, &regval);
+ if (regval & 0xfff)
+ data->show_tccd1 = true;
+
+ amd_smn_read(amd_pci_dev_to_node_id(pdev),
+ F17H_M70H_CCD2_TEMP, &regval);
+ if (regval & 0xfff)
+ data->show_tccd2 = true;
+ break;
+ }
} else {
data->read_htcreg = read_htcreg_pci;
data->read_tempreg = read_tempreg_pci;
--
2.17.1

2020-01-16 14:21:03

by Guenter Roeck

[permalink] [raw]
Subject: [RFT PATCH 4/4] hwmon: (k10temp) Show core and SoC current and voltages on Zen CPUs

Zen CPUs report core and SoC voltages and currents. Add support for it
to the k10temp driver.

Signed-off-by: Guenter Roeck <[email protected]>
---
drivers/hwmon/k10temp.c | 116 ++++++++++++++++++++++++++++++++++++++--
1 file changed, 113 insertions(+), 3 deletions(-)

diff --git a/drivers/hwmon/k10temp.c b/drivers/hwmon/k10temp.c
index 944ba8008bc4..bce862bacbc5 100644
--- a/drivers/hwmon/k10temp.c
+++ b/drivers/hwmon/k10temp.c
@@ -11,6 +11,13 @@
* convert raw register values is from https://github.com/ocerman/zenpower.
* The information is not confirmed from chip datasheets, but experiments
* suggest that it provides reasonable temperature values.
+ * - Register addresses to read chip voltage and current is also from
+ * https://github.com/ocerman/zenpower, and not confirmed from chip
+ * datasheets. Experiments suggest that reported current and voltage
+ * information is reasonable.
+ * - It is unknown if the mechanism to read CCD1/CCD2 temperature as well as
+ * current and voltage information works on higher-end Ryzen CPUs, or if
+ * additional information is available on those CPUs.
*/

#include <linux/bitops.h>
@@ -70,6 +77,10 @@ static DEFINE_MUTEX(nb_smu_ind_mutex);
#define F17H_M70H_CCD1_TEMP 0x00059954
#define F17H_M70H_CCD2_TEMP 0x00059958

+#define F17H_M01H_SVI 0x0005A000
+#define F17H_M01H_SVI_TEL_PLANE0 (F17H_M01H_SVI + 0xc)
+#define F17H_M01H_SVI_TEL_PLANE1 (F17H_M01H_SVI + 0x10)
+
#define CUR_TEMP_SHIFT 21
#define CUR_TEMP_RANGE_SEL_MASK BIT(19)

@@ -82,6 +93,9 @@ struct k10temp_data {
bool show_tdie;
bool show_tccd1;
bool show_tccd2;
+ u32 svi_addr[2];
+ bool show_current;
+ int cfactor[2];
};

struct tctl_offset {
@@ -157,16 +171,76 @@ const char *k10temp_temp_label[] = {
"Tccd2",
};

+const char *k10temp_in_label[] = {
+ "Vcore",
+ "Vsoc",
+};
+
+const char *k10temp_curr_label[] = {
+ "Icore",
+ "Isoc",
+};
+
static int k10temp_read_labels(struct device *dev,
enum hwmon_sensor_types type,
u32 attr, int channel, const char **str)
{
- *str = k10temp_temp_label[channel];
+ switch (type) {
+ case hwmon_temp:
+ *str = k10temp_temp_label[channel];
+ break;
+ case hwmon_in:
+ *str = k10temp_in_label[channel];
+ break;
+ case hwmon_curr:
+ *str = k10temp_curr_label[channel];
+ break;
+ default:
+ return -EOPNOTSUPP;
+ }
return 0;
}

-static int k10temp_read(struct device *dev, enum hwmon_sensor_types type,
- u32 attr, int channel, long *val)
+static int k10temp_read_curr(struct device *dev, u32 attr, int channel,
+ long *val)
+{
+ struct k10temp_data *data = dev_get_drvdata(dev);
+ u32 regval;
+
+ switch (attr) {
+ case hwmon_curr_input:
+ amd_smn_read(amd_pci_dev_to_node_id(data->pdev),
+ data->svi_addr[channel], &regval);
+ *val = DIV_ROUND_CLOSEST(data->cfactor[channel] *
+ (regval & 0xff),
+ 1000);
+ break;
+ default:
+ return -EOPNOTSUPP;
+ }
+ return 0;
+}
+
+static int k10temp_read_in(struct device *dev, u32 attr, int channel, long *val)
+{
+ struct k10temp_data *data = dev_get_drvdata(dev);
+ u32 regval;
+
+ switch (attr) {
+ case hwmon_in_input:
+ amd_smn_read(amd_pci_dev_to_node_id(data->pdev),
+ data->svi_addr[channel], &regval);
+ regval = (regval >> 16) & 0xff;
+ *val = 1550 - DIV_ROUND_CLOSEST(625 * regval, 100);
+ break;
+ default:
+ return -EOPNOTSUPP;
+ }
+ return 0;
+}
+
+static int k10temp_read_temp(struct device *dev, u32 attr, int channel,
+ long *val)
{
struct k10temp_data *data = dev_get_drvdata(dev);
u32 regval;
@@ -216,6 +290,21 @@ static int k10temp_read(struct device *dev, enum hwmon_sensor_types type,
return 0;
}

+static int k10temp_read(struct device *dev, enum hwmon_sensor_types type,
+ u32 attr, int channel, long *val)
+{
+ switch (type) {
+ case hwmon_temp:
+ return k10temp_read_temp(dev, attr, channel, val);
+ case hwmon_in:
+ return k10temp_read_in(dev, attr, channel, val);
+ case hwmon_curr:
+ return k10temp_read_curr(dev, attr, channel, val);
+ default:
+ return -EOPNOTSUPP;
+ }
+}
+
static umode_t k10temp_is_visible(const void *_data,
enum hwmon_sensor_types type,
u32 attr, int channel)
@@ -290,6 +379,11 @@ static umode_t k10temp_is_visible(const void *_data,
return 0;
}
break;
+ case hwmon_in:
+ case hwmon_curr:
+ if (!data->show_current)
+ return 0;
+ break;
default:
return 0;
}
@@ -338,6 +432,12 @@ static const struct hwmon_channel_info *k10temp_info[] = {
HWMON_T_INPUT | HWMON_T_LABEL,
HWMON_T_INPUT | HWMON_T_LABEL,
HWMON_T_INPUT | HWMON_T_LABEL),
+ HWMON_CHANNEL_INFO(in,
+ HWMON_I_INPUT | HWMON_I_LABEL,
+ HWMON_I_INPUT | HWMON_I_LABEL),
+ HWMON_CHANNEL_INFO(curr,
+ HWMON_C_INPUT | HWMON_C_LABEL,
+ HWMON_C_INPUT | HWMON_C_LABEL),
NULL
};

@@ -393,8 +493,18 @@ static int k10temp_probe(struct pci_dev *pdev, const struct pci_device_id *id)
case 0x8: /* Zen+ */
case 0x11: /* Zen APU */
case 0x18: /* Zen+ APU */
+ data->show_current = true;
+ data->svi_addr[0] = F17H_M01H_SVI_TEL_PLANE0;
+ data->svi_addr[1] = F17H_M01H_SVI_TEL_PLANE1;
+ data->cfactor[0] = 1039211; /* core */
+ data->cfactor[1] = 360772; /* SoC */
break;
case 0x71: /* Zen2 */
+ data->show_current = true;
+ data->cfactor[0] = 658823; /* core */
+ data->cfactor[1] = 294300; /* SoC */
+ data->svi_addr[0] = F17H_M01H_SVI_TEL_PLANE1;
+ data->svi_addr[1] = F17H_M01H_SVI_TEL_PLANE0;
amd_smn_read(amd_pci_dev_to_node_id(pdev),
F17H_M70H_CCD1_TEMP, &regval);
if (regval & 0xfff)
--
2.17.1

2020-01-16 23:25:07

by Guenter Roeck

[permalink] [raw]
Subject: Re: [RFT PATCH 0/4] hwmon: k10temp driver improvements

On Thu, Jan 16, 2020 at 08:55:16PM +0000, Darren Salt wrote:
> Tested-By: Darren Salt <[email protected]>
>
Thanks!

Guenter

> Linux 5.4.12, Ryzen 5 1600. Patches were applied cleanly. No problems noticed in
>
> $ sensors k10temp-pci-00c3
> k10temp-pci-00c3
> Adapter: PCI adapter
> Vcore: +1.11 V
> Vsoc: +0.94 V
> Tdie: +42.8?C (high = +70.0?C)
> Tctl: +42.8?C
> Icore: +15.59 A
> Isoc: +12.63 A
>
> $
>
> --
> | _ | Darren Salt, using Debian GNU/Linux (and Android)
> | ( ) |
> | X | ASCII Ribbon campaign against HTML e-mail
> | / \ |

2020-01-16 23:28:25

by Bernhard Gebetsberger

[permalink] [raw]
Subject: Re: [RFT PATCH 0/4] hwmon: k10temp driver improvements

Tested-by: Bernhard Gebetsberger <[email protected]>

Patches applied cleanly on top of 5.5-rc6, no issues using a Ryzen 3 2200G:
k10temp-pci-00c3
Adapter: PCI adapter
Vcore:         1.29 V 
Vsoc:          1.12 V 
Tdie:         +28.2°C  (high = +70.0°C)
Tctl:         +28.2°C 
Icore:        23.90 A 
Isoc:          6.49 A

- Bernhard

2020-01-17 01:07:42

by Guenter Roeck

[permalink] [raw]
Subject: Re: [RFT PATCH 0/4] hwmon: k10temp driver improvements

On Thu, Jan 16, 2020 at 11:46:47PM +0100, Bernhard Gebetsberger wrote:
> Tested-by: Bernhard Gebetsberger <[email protected]>
>
> Patches applied cleanly on top of 5.5-rc6, no issues using a Ryzen 3 2200G:
> k10temp-pci-00c3
> Adapter: PCI adapter
> Vcore:???????? 1.29 V?
> Vsoc:????????? 1.12 V?
> Tdie:???????? +28.2?C? (high = +70.0?C)
> Tctl:???????? +28.2?C?
> Icore:??????? 23.90 A?
> Isoc:????????? 6.49 A
>

Thanks!

Guenter

2020-01-17 02:05:54

by Darren Salt

[permalink] [raw]
Subject: Re: [RFT PATCH 0/4] hwmon: k10temp driver improvements

Tested-By: Darren Salt <[email protected]>

Linux 5.4.12, Ryzen 5 1600. Patches were applied cleanly. No problems noticed in

$ sensors k10temp-pci-00c3
k10temp-pci-00c3
Adapter: PCI adapter
Vcore: +1.11 V
Vsoc: +0.94 V
Tdie: +42.8°C (high = +70.0°C)
Tctl: +42.8°C
Icore: +15.59 A
Isoc: +12.63 A

$

--
| _ | Darren Salt, using Debian GNU/Linux (and Android)
| ( ) |
| X | ASCII Ribbon campaign against HTML e-mail
| / \ |

2020-01-17 03:53:16

by Ken Moffat

[permalink] [raw]
Subject: Re: [RFT PATCH 0/4] hwmon: k10temp driver improvements

On Thu, 16 Jan 2020 at 14:18, Guenter Roeck <[email protected]> wrote:
>
> This patch series implements various improvements for the k10temp driver.
>
> Patch 1/4 introduces the use of bit operations.
>
> Patch 2/4 converts the driver to use the devm_hwmon_device_register_with_info
> API. This not only simplifies the code and reduces its size, it also
> makes the code easier to maintain and enhance.
>
> Patch 3/4 adds support for reporting Core Complex Die (CCD) temperatures
> on Ryzen 3 (Zen2) CPUs.
>
> Patch 4/4 adds support for reporting core and SoC current and voltage
> information on Ryzen CPUs.
>

> k10temp-pci-00c3
> Adapter: PCI adapter
> Vcore: +1.36 V
> Vsoc: +1.18 V
> Tdie: +86.8°C (high = +70.0°C)
> Tctl: +86.8°C
> Tccd1: +80.0°C
> Tccd2: +81.8°C
> Icore: +44.14 A
> Isoc: +13.83 A
>
> The patch series has only been tested with Ryzen 3900 CPUs. Further test
> coverage will be necessary before the changes can be applied to the Linux
> kernel.

I have some Zen1 and Zen1+ here.

My Ryzen 3 1300X, applied to 5.5.0-rc5

machine idle, I thought at first the temperature may be a bit low, so
I've added other reported temperatures. I now think it is maybe ok.

k10temp-pci-00c3
Adapter: PCI adapter
Vcore: +1.41 V
Vsoc: +0.89 V
Tdie: +21.2°C (high = +70.0°C)
Tctl: +21.2°C
Icore: +30.14 A
Isoc: +8.66 A

SYSTIN: +29.0°C (high = +0.0°C, hyst = +0.0°C)
ALARM sensor = thermistor
CPUTIN: +25.5°C (high = +80.0°C, hyst = +75.0°C)
sensor = thermistor
AUXTIN0: -1.5°C sensor = thermistor
AUXTIN1: +87.0°C sensor = thermistor
AUXTIN2: +23.0°C sensor = thermistor
AUXTIN3: -27.0°C sensor = thermistor
SMBUSMASTER 0: +20.5°C

After about 2 minutes of make -j8 on kernel, to load it

k10temp-pci-00c3
Adapter: PCI adapter
Vcore: +1.26 V
Vsoc: +0.89 V
Tdie: +46.2°C (high = +70.0°C)
Tctl: +46.2°C
Icore: +45.73 A
Isoc: +11.18 A

SYSTIN: +29.0°C (high = +0.0°C, hyst = +0.0°C)
ALARM sensor = thermistor
CPUTIN: +38.5°C (high = +80.0°C, hyst = +75.0°C)
sensor = thermistor
AUXTIN0: -7.5°C sensor = thermistor
AUXTIN1: +85.0°C sensor = thermistor
AUXTIN2: +23.0°C sensor = thermistor
AUXTIN3: -27.0°C sensor = thermistor
SMBUSMASTER 0: +46.0°C

So I guess the temperatures *are* in the right area.
Interestingly, the Vcore restores to above +1.4V when idle.

And my Ryzen 5 3400G (Zen+), applied to 5.4.12, box is idle,
also showing the gpu measurements of this APU to confirm the
temperature:

k10temp-pci-00c3
Adapter: PCI adapter
Vcore: +0.94 V
Vsoc: +1.09 V
Tdie: +34.8°C (high = +70.0°C)
Tctl: +34.8°C
Icore: +6.24 A
Isoc: +8.30 A

amdgpu-pci-0900
Adapter: PCI adapter
vddgfx: N/A
vddnb: N/A
edge: +34.0°C (crit = +80.0°C, hyst = +0.0°C)

For my Ryzen 5 2500u laptop (Zen1), again showing the gpu:

k10temp-pci-00c3
Adapter: PCI adapter
Vcore: +0.97 V
Vsoc: +0.93 V
Tdie: +37.2°C (high = +70.0°C)
Tctl: +37.2°C
Icore: +19.75 A
Isoc: +8.66 A

amdgpu-pci-0300
Adapter: PCI adapter
vddgfx: N/A
vddnb: N/A
edge: +37.0°C (crit = +80.0°C, hyst = +0.0°C)

Thanks.
ĸen

--
We hope and trust that our values and loyal customers will bear with
us in the coming months as we interact synergistically with change
management in our striving for excellence. That is our mission.

2020-01-17 05:27:09

by Ken Moffat

[permalink] [raw]
Subject: Re: [RFT PATCH 0/4] hwmon: k10temp driver improvements

On Fri, 17 Jan 2020 at 03:58, Guenter Roeck <[email protected]> wrote:
>
> Hi Ken,
>
> SMBUSMASTER 0 is the CPU, so we have a match with the temperatures.
>
OK, thanks for that information.

>
> Both Vcore and Icore should be much less when idle, and higher under
> load. The data from the Super-IO chip suggests that it is a Nuvoton
> chip. Can you report its first voltage (in0) ? That should roughly
> match Vcore.
>
> All other data looks ok.
>
> Thanks,
> Guenter

Hi Guenter,

unfortunately I don't have any report of in0. I'm guessing I need some
module(s) which did not seem to do anything useful in the past.

All I have in the 'in' area is
nct6779-isa-0290
Adapter: ISA adapter
Vcore: +0.30 V (min = +0.00 V, max = +1.74 V)
in1: +0.00 V (min = +0.00 V, max = +0.00 V)
AVCC: +3.39 V (min = +0.00 V, max = +0.00 V) ALARM
+3.3V: +3.39 V (min = +0.00 V, max = +0.00 V) ALARM
in4: +1.90 V (min = +0.00 V, max = +0.00 V) ALARM
in5: +0.90 V (min = +0.00 V, max = +0.00 V) ALARM
in6: +1.50 V (min = +0.00 V, max = +0.00 V) ALARM
3VSB: +3.47 V (min = +0.00 V, max = +0.00 V) ALARM
Vbat: +3.26 V (min = +0.00 V, max = +0.00 V) ALARM
in9: +0.00 V (min = +0.00 V, max = +0.00 V)
in10: +0.32 V (min = +0.00 V, max = +0.00 V) ALARM
in11: +1.06 V (min = +0.00 V, max = +0.00 V) ALARM
in12: +1.70 V (min = +0.00 V, max = +0.00 V) ALARM
in13: +0.94 V (min = +0.00 V, max = +0.00 V) ALARM
in14: +1.84 V (min = +0.00 V, max = +0.00 V) ALARM

and at that point Vcore was reported as 1.41V (system idle)

ĸen
--
Also Spuke Zerothruster
(Finnegans Wake)

2020-01-17 05:34:08

by Guenter Roeck

[permalink] [raw]
Subject: Re: [RFT PATCH 0/4] hwmon: k10temp driver improvements

Hi Ken,

On 1/16/20 4:38 PM, Ken Moffat wrote:
> On Thu, 16 Jan 2020 at 14:18, Guenter Roeck <[email protected]> wrote:
[ ... ]
> I have some Zen1 and Zen1+ here.
>
> My Ryzen 3 1300X, applied to 5.5.0-rc5
>
> machine idle, I thought at first the temperature may be a bit low, so
> I've added other reported temperatures. I now think it is maybe ok.
>
> k10temp-pci-00c3
> Adapter: PCI adapter
> Vcore: +1.41 V
> Vsoc: +0.89 V
> Tdie: +21.2°C (high = +70.0°C)
> Tctl: +21.2°C
> Icore: +30.14 A
> Isoc: +8.66 A
>
> SYSTIN: +29.0°C (high = +0.0°C, hyst = +0.0°C)
> ALARM sensor = thermistor
> CPUTIN: +25.5°C (high = +80.0°C, hyst = +75.0°C)
> sensor = thermistor
> AUXTIN0: -1.5°C sensor = thermistor
> AUXTIN1: +87.0°C sensor = thermistor
> AUXTIN2: +23.0°C sensor = thermistor
> AUXTIN3: -27.0°C sensor = thermistor
> SMBUSMASTER 0: +20.5°C
>
SMBUSMASTER 0 is the CPU, so we have a match with the temperatures.

> After about 2 minutes of make -j8 on kernel, to load it
>
> k10temp-pci-00c3
> Adapter: PCI adapter
> Vcore: +1.26 V
> Vsoc: +0.89 V
> Tdie: +46.2°C (high = +70.0°C)
> Tctl: +46.2°C
> Icore: +45.73 A
> Isoc: +11.18 A
>

Both Vcore and Icore should be much less when idle, and higher under
load. The data from the Super-IO chip suggests that it is a Nuvoton
chip. Can you report its first voltage (in0) ? That should roughly
match Vcore.

> SYSTIN: +29.0°C (high = +0.0°C, hyst = +0.0°C)
> ALARM sensor = thermistor
> CPUTIN: +38.5°C (high = +80.0°C, hyst = +75.0°C)
> sensor = thermistor
> AUXTIN0: -7.5°C sensor = thermistor
> AUXTIN1: +85.0°C sensor = thermistor
> AUXTIN2: +23.0°C sensor = thermistor
> AUXTIN3: -27.0°C sensor = thermistor
> SMBUSMASTER 0: +46.0°C
>
> So I guess the temperatures *are* in the right area.
> Interestingly, the Vcore restores to above +1.4V when idle.
>
It should be much lower when idle, actually, not higher.

All other data looks ok.

Thanks,
Guenter

2020-01-17 10:00:02

by Ondrej Čerman

[permalink] [raw]
Subject: Re: [RFT PATCH 0/4] hwmon: k10temp driver improvements

Dňa 16. 1. 2020 o 15:17 Guenter Roeck napísal(a):
> This patch series implements various improvements for the k10temp driver.
>
> Patch 1/4 introduces the use of bit operations.
>
> Patch 2/4 converts the driver to use the devm_hwmon_device_register_with_info
> API. This not only simplifies the code and reduces its size, it also
> makes the code easier to maintain and enhance.
>
> Patch 3/4 adds support for reporting Core Complex Die (CCD) temperatures
> on Ryzen 3 (Zen2) CPUs.
>
> Patch 4/4 adds support for reporting core and SoC current and voltage
> information on Ryzen CPUs.
>
> With all patches in place, output on Ryzen 3900 CPUs looks as follows
> (with the system under load).
>
> k10temp-pci-00c3
> Adapter: PCI adapter
> Vcore: +1.36 V
> Vsoc: +1.18 V
> Tdie: +86.8°C (high = +70.0°C)
> Tctl: +86.8°C
> Tccd1: +80.0°C
> Tccd2: +81.8°C
> Icore: +44.14 A
> Isoc: +13.83 A
>
> The patch series has only been tested with Ryzen 3900 CPUs. Further test
> coverage will be necessary before the changes can be applied to the Linux
> kernel.
>
Hello everyone, I am the author of https://github.com/ocerman/zenpower/ .

It is nice to see this merged.

I just want to warn you that there have been reported issues with
Threadripper CPUs to zenpower issue tracker. Also I think that no-one
tested EPYC CPUs.

Most of the stuff I was able to figure out by trial-and-error approach
and unfortunately because I do not own any Threadripper CPU I was not
able to test and fix reported problems.

Ondrej.

2020-01-17 10:00:07

by Holger Kiehl

[permalink] [raw]
Subject: Re: [RFT PATCH 0/4] hwmon: k10temp driver improvements

On Thu, 16 Jan 2020, Guenter Roeck wrote:

> This patch series implements various improvements for the k10temp driver.
>
> Patch 1/4 introduces the use of bit operations.
>
> Patch 2/4 converts the driver to use the devm_hwmon_device_register_with_info
> API. This not only simplifies the code and reduces its size, it also
> makes the code easier to maintain and enhance.
>
> Patch 3/4 adds support for reporting Core Complex Die (CCD) temperatures
> on Ryzen 3 (Zen2) CPUs.
>
> Patch 4/4 adds support for reporting core and SoC current and voltage
> information on Ryzen CPUs.
>
> With all patches in place, output on Ryzen 3900 CPUs looks as follows
> (with the system under load).
>
> k10temp-pci-00c3
> Adapter: PCI adapter
> Vcore: +1.36 V
> Vsoc: +1.18 V
> Tdie: +86.8°C (high = +70.0°C)
> Tctl: +86.8°C
> Tccd1: +80.0°C
> Tccd2: +81.8°C
> Icore: +44.14 A
> Isoc: +13.83 A
>
> The patch series has only been tested with Ryzen 3900 CPUs. Further test
> coverage will be necessary before the changes can be applied to the Linux
> kernel.
>
Here from my little Asrock A300 with a Ryzen 2400G:

sensors
k10temp-pci-00c3
Adapter: PCI adapter
Vcore: +0.78 V
Vsoc: +1.11 V
Tdie: +44.8°C (high = +70.0°C)
Tctl: +44.8°C
Icore: +5.20 A
Isoc: +2.17 A

nvme-pci-0100
Adapter: PCI adapter
Composite: +41.9°C (low = -273.1°C, high = +80.8°C)
(crit = +80.8°C)
Sensor 1: +41.9°C (low = -273.1°C, high = +65261.8°C)
Sensor 2: +44.9°C (low = -273.1°C, high = +65261.8°C)

nct6793-isa-0290
Adapter: ISA adapter
in0: +0.34 V (min = +0.00 V, max = +1.74 V)
in1: +1.84 V (min = +0.00 V, max = +0.00 V) ALARM
in2: +3.41 V (min = +0.00 V, max = +0.00 V) ALARM
in3: +3.39 V (min = +0.00 V, max = +0.00 V) ALARM
in4: +0.26 V (min = +0.00 V, max = +0.00 V) ALARM
in5: +0.14 V (min = +0.00 V, max = +0.00 V) ALARM
in6: +0.67 V (min = +0.00 V, max = +0.00 V) ALARM
in7: +3.39 V (min = +0.00 V, max = +0.00 V) ALARM
in8: +3.26 V (min = +0.00 V, max = +0.00 V) ALARM
in9: +1.84 V (min = +0.00 V, max = +0.00 V) ALARM
in10: +0.19 V (min = +0.00 V, max = +0.00 V) ALARM
in11: +0.14 V (min = +0.00 V, max = +0.00 V) ALARM
in12: +1.85 V (min = +0.00 V, max = +0.00 V) ALARM
in13: +1.72 V (min = +0.00 V, max = +0.00 V) ALARM
in14: +0.20 V (min = +0.00 V, max = +0.00 V) ALARM
fan1: 0 RPM (min = 0 RPM)
fan2: 317 RPM (min = 0 RPM)
fan3: 0 RPM (min = 0 RPM)
fan4: 0 RPM (min = 0 RPM)
fan5: 0 RPM (min = 0 RPM)
SYSTIN: +113.0°C (high = +0.0°C, hyst = +0.0°C) sensor = thermistor
CPUTIN: +59.5°C (high = +80.0°C, hyst = +75.0°C) sensor = thermistor
AUXTIN0: +45.0°C (high = +0.0°C, hyst = +0.0°C) ALARM sensor = thermistor
AUXTIN1: +107.0°C sensor = thermistor
AUXTIN2: +106.0°C sensor = thermistor
AUXTIN3: +103.0°C sensor = thermistor
SMBUSMASTER 0: +44.5°C
PCH_CHIP_CPU_MAX_TEMP: +0.0°C
PCH_CHIP_TEMP: +0.0°C
PCH_CPU_TEMP: +0.0°C
intrusion0: OK
intrusion1: ALARM
beep_enable: disabled

amdgpu-pci-0300
Adapter: PCI adapter
vddgfx: N/A
vddnb: N/A
edge: +44.0°C (crit = +80.0°C, hyst = +0.0°C)

Patches applied without any problem against Linus git tree.

Many thanks for this work!

Regards,
Holger

2020-01-17 14:16:59

by Guenter Roeck

[permalink] [raw]
Subject: Re: [RFT PATCH 0/4] hwmon: k10temp driver improvements

On 1/16/20 8:47 PM, Ken Moffat wrote:
> On Fri, 17 Jan 2020 at 03:58, Guenter Roeck <[email protected]> wrote:
>>
>> Hi Ken,
>>
>> SMBUSMASTER 0 is the CPU, so we have a match with the temperatures.
>>
> OK, thanks for that information.
>
>>
>> Both Vcore and Icore should be much less when idle, and higher under
>> load. The data from the Super-IO chip suggests that it is a Nuvoton
>> chip. Can you report its first voltage (in0) ? That should roughly
>> match Vcore.
>>
>> All other data looks ok.
>>
>> Thanks,
>> Guenter
>
> Hi Guenter,
>
> unfortunately I don't have any report of in0. I'm guessing I need some
> module(s) which did not seem to do anything useful in the past.
>
> All I have in the 'in' area is
> nct6779-isa-0290
> Adapter: ISA adapter
> Vcore: +0.30 V (min = +0.00 V, max = +1.74 V)
> in1: +0.00 V (min = +0.00 V, max = +0.00 V)
> AVCC: +3.39 V (min = +0.00 V, max = +0.00 V) ALARM
> +3.3V: +3.39 V (min = +0.00 V, max = +0.00 V) ALARM
> in4: +1.90 V (min = +0.00 V, max = +0.00 V) ALARM
> in5: +0.90 V (min = +0.00 V, max = +0.00 V) ALARM
> in6: +1.50 V (min = +0.00 V, max = +0.00 V) ALARM
> 3VSB: +3.47 V (min = +0.00 V, max = +0.00 V) ALARM
> Vbat: +3.26 V (min = +0.00 V, max = +0.00 V) ALARM
> in9: +0.00 V (min = +0.00 V, max = +0.00 V)
> in10: +0.32 V (min = +0.00 V, max = +0.00 V) ALARM
> in11: +1.06 V (min = +0.00 V, max = +0.00 V) ALARM
> in12: +1.70 V (min = +0.00 V, max = +0.00 V) ALARM
> in13: +0.94 V (min = +0.00 V, max = +0.00 V) ALARM
> in14: +1.84 V (min = +0.00 V, max = +0.00 V) ALARM
>
> and at that point Vcore was reported as 1.41V (system idle)
>

Looks like someone configured /etc/sensors3.conf on that system which tells it
to report in0 as Vcore. So there is a very clear mismatch. Can you report
the values seen when the system is under load ?

Thanks,
Guenter

2020-01-17 18:47:24

by Guenter Roeck

[permalink] [raw]
Subject: Re: [RFT PATCH 0/4] hwmon: k10temp driver improvements

On Fri, Jan 17, 2020 at 10:46:25AM +0100, Ondrej Čerman wrote:
> Dňa 16. 1. 2020 o 15:17 Guenter Roeck napísal(a):
> > This patch series implements various improvements for the k10temp driver.
> >
> > Patch 1/4 introduces the use of bit operations.
> >
> > Patch 2/4 converts the driver to use the devm_hwmon_device_register_with_info
> > API. This not only simplifies the code and reduces its size, it also
> > makes the code easier to maintain and enhance.
> >
> > Patch 3/4 adds support for reporting Core Complex Die (CCD) temperatures
> > on Ryzen 3 (Zen2) CPUs.
> >
> > Patch 4/4 adds support for reporting core and SoC current and voltage
> > information on Ryzen CPUs.
> >
> > With all patches in place, output on Ryzen 3900 CPUs looks as follows
> > (with the system under load).
> >
> > k10temp-pci-00c3
> > Adapter: PCI adapter
> > Vcore: +1.36 V
> > Vsoc: +1.18 V
> > Tdie: +86.8°C (high = +70.0°C)
> > Tctl: +86.8°C
> > Tccd1: +80.0°C
> > Tccd2: +81.8°C
> > Icore: +44.14 A
> > Isoc: +13.83 A
> >
> > The patch series has only been tested with Ryzen 3900 CPUs. Further test
> > coverage will be necessary before the changes can be applied to the Linux
> > kernel.
> >
> Hello everyone, I am the author of https://github.com/ocerman/zenpower/ .
>
> It is nice to see this merged.
>
> I just want to warn you that there have been reported issues with
> Threadripper CPUs to zenpower issue tracker. Also I think that no-one tested
> EPYC CPUs.
>
> Most of the stuff I was able to figure out by trial-and-error approach and
> unfortunately because I do not own any Threadripper CPU I was not able to
> test and fix reported problems.
>
Thanks a lot for the note. The key problem seems to be that Threadripper
doesn't report SoC current and voltage. Is that correct ? If so, that
should be easy to solve.

On a side note, drivers/gpu/drm/amd/include/asic_reg/thm/thm_10_0_offset.h
suggests that two more temperature sensors might be available at 0x0005995C
and 0x00059960 (DIE3_TEMP and SW_TEMP). Have you ever tried that ?

Thanks,
Guenter

2020-01-17 18:59:52

by Ken Moffat

[permalink] [raw]
Subject: Re: [RFT PATCH 0/4] hwmon: k10temp driver improvements

On Fri, 17 Jan 2020 at 14:14, Guenter Roeck <[email protected]> wrote:
>
> On 1/16/20 8:47 PM, Ken Moffat wrote:
> > unfortunately I don't have any report of in0. I'm guessing I need some
> > module(s) which did not seem to do anything useful in the past.
> >
> > All I have in the 'in' area is
> > nct6779-isa-0290
> > Adapter: ISA adapter
> > Vcore: +0.30 V (min = +0.00 V, max = +1.74 V)
> > in1: +0.00 V (min = +0.00 V, max = +0.00 V)
> > AVCC: +3.39 V (min = +0.00 V, max = +0.00 V) ALARM
> > +3.3V: +3.39 V (min = +0.00 V, max = +0.00 V) ALARM

>
> Looks like someone configured /etc/sensors3.conf on that system which tells it
> to report in0 as Vcore. So there is a very clear mismatch. Can you report
> the values seen when the system is under load ?
>
> Thanks,
> Guenter

I do have sensors3.conf from lm_sensors-3.4.0. Here are the figures
under load.

Vcore: +0.65 V (min = +0.00 V, max = +1.74 V)

k10temp-pci-00c3
Adapter: PCI adapter
Vcore: +1.27 V
Vsoc: +0.89 V
Tdie: +46.2°C (high = +70.0°C)
Tctl: +46.2°C
Icore: +48.84 A
Isoc: +10.10 A

ĸen
--
Before the universe began, there was a sound. It went: "One, two, ONE,
two, three, four" [...] The cataclysmic power chord that followed was
the creation of time and space and matter and it does Not Fade Away.

2020-01-17 19:16:26

by Sebastian Reichel

[permalink] [raw]
Subject: Re: [RFT PATCH 0/4] hwmon: k10temp driver improvements

Hi,

On Thu, Jan 16, 2020 at 06:17:56AM -0800, Guenter Roeck wrote:
> This patch series implements various improvements for the k10temp driver.
>
> Patch 1/4 introduces the use of bit operations.
>
> Patch 2/4 converts the driver to use the devm_hwmon_device_register_with_info
> API. This not only simplifies the code and reduces its size, it also
> makes the code easier to maintain and enhance.
>
> Patch 3/4 adds support for reporting Core Complex Die (CCD) temperatures
> on Ryzen 3 (Zen2) CPUs.
>
> Patch 4/4 adds support for reporting core and SoC current and voltage
> information on Ryzen CPUs.
>
> With all patches in place, output on Ryzen 3900 CPUs looks as follows
> (with the system under load).
>
> k10temp-pci-00c3
> Adapter: PCI adapter
> Vcore: +1.36 V
> Vsoc: +1.18 V
> Tdie: +86.8?C (high = +70.0?C)
> Tctl: +86.8?C
> Tccd1: +80.0?C
> Tccd2: +81.8?C
> Icore: +44.14 A
> Isoc: +13.83 A
>
> The patch series has only been tested with Ryzen 3900 CPUs. Further test
> coverage will be necessary before the changes can be applied to the Linux
> kernel.

Looks ok on 3800X (idle):

$ lscpu | grep "Model name"
Model name: AMD Ryzen 7 3800X 8-Core Processor
$ sensors "k10temp-*"
k10temp-pci-00c3
Adapter: PCI adapter
Vcore: 937.00 mV
Vsoc: 1.01 V
Tdie: +35.2?C (high = +70.0?C)
Tctl: +35.2?C
Tccd1: +35.8?C
Icore: 4.61 A
Isoc: 6.18 A

And after compiling the kernel with 32 threads for 1 minute:

$ sensors "k10temp-*"
k10temp-pci-00c3
Adapter: PCI adapter
Vcore: 1.29 V
Vsoc: 1.01 V
Tdie: +77.1?C (high = +70.0?C)
Tctl: +77.1?C
Tccd1: +78.8?C
Icore: 39.53 A
Isoc: 6.18 A

Board Information during the idle check:

$ sudo dmidecode -s system-manufacturer
Gigabyte Technology Co., Ltd.
$ sudo dmidecode -s system-product-name
X570 AORUS ULTRA
$ sensors "it8792-*"
it8792-isa-0a60
Adapter: ISA adapter
in0: 1.79 V (min = +0.00 V, max = +2.78 V)
in1: 589.00 mV (min = +0.00 V, max = +2.78 V)
in2: 981.00 mV (min = +0.00 V, max = +2.78 V)
+3.3V: 1.68 V (min = +0.00 V, max = +2.78 V)
in4: 1.79 V (min = +0.00 V, max = +2.78 V)
in5: 1.18 V (min = +0.00 V, max = +2.78 V)
in6: 2.78 V (min = +0.00 V, max = +2.78 V) ALARM
3VSB: 1.68 V (min = +0.00 V, max = +2.78 V)
Vbat: 1.61 V
fan1: 0 RPM (min = 0 RPM)
fan2: 0 RPM (min = 0 RPM)
fan3: 0 RPM (min = 0 RPM)
temp1: +37.0?C (low = +127.0?C, high = +127.0?C) sensor = thermistor
temp3: +36.0?C (low = +127.0?C, high = +127.0?C) sensor = thermistor
intrusion0: ALARM

-- Sebastian


Attachments:
(No filename) (2.85 kB)
signature.asc (849.00 B)
Download all attachments

2020-01-17 22:50:04

by Ondrej Čerman

[permalink] [raw]
Subject: Re: [RFT PATCH 0/4] hwmon: k10temp driver improvements


Dňa 17. 1. 2020 o 19:46 Guenter Roeck napísal(a):
> On Fri, Jan 17, 2020 at 10:46:25AM +0100, Ondrej Čerman wrote:
>> Dňa 16. 1. 2020 o 15:17 Guenter Roeck napísal(a):
>>> This patch series implements various improvements for the k10temp driver.
>>>
>>> Patch 1/4 introduces the use of bit operations.
>>>
>>> Patch 2/4 converts the driver to use the devm_hwmon_device_register_with_info
>>> API. This not only simplifies the code and reduces its size, it also
>>> makes the code easier to maintain and enhance.
>>>
>>> Patch 3/4 adds support for reporting Core Complex Die (CCD) temperatures
>>> on Ryzen 3 (Zen2) CPUs.
>>>
>>> Patch 4/4 adds support for reporting core and SoC current and voltage
>>> information on Ryzen CPUs.
>>>
>>> With all patches in place, output on Ryzen 3900 CPUs looks as follows
>>> (with the system under load).
>>>
>>> k10temp-pci-00c3
>>> Adapter: PCI adapter
>>> Vcore: +1.36 V
>>> Vsoc: +1.18 V
>>> Tdie: +86.8°C (high = +70.0°C)
>>> Tctl: +86.8°C
>>> Tccd1: +80.0°C
>>> Tccd2: +81.8°C
>>> Icore: +44.14 A
>>> Isoc: +13.83 A
>>>
>>> The patch series has only been tested with Ryzen 3900 CPUs. Further test
>>> coverage will be necessary before the changes can be applied to the Linux
>>> kernel.
>>>
>> Hello everyone, I am the author of https://github.com/ocerman/zenpower/ .
>>
>> It is nice to see this merged.
>>
>> I just want to warn you that there have been reported issues with
>> Threadripper CPUs to zenpower issue tracker. Also I think that no-one tested
>> EPYC CPUs.
>>
>> Most of the stuff I was able to figure out by trial-and-error approach and
>> unfortunately because I do not own any Threadripper CPU I was not able to
>> test and fix reported problems.
>>
> Thanks a lot for the note. The key problem seems to be that Threadripper
> doesn't report SoC current and voltage. Is that correct ? If so, that
> should be easy to solve.

Hello,

I thought that initially, but I was wrong. It seems like that these
multi-node CPUs are reporting SOC and Core voltage/current data at
particular node. Look at this HWiNFO64 screenshot of 2990WX for
reference: https://i.imgur.com/yM9X5nd.jpg . They also may be using
different addresses and/or factors.

> On a side note, drivers/gpu/drm/amd/include/asic_reg/thm/thm_10_0_offset.h
> suggests that two more temperature sensors might be available at 0x0005995C
> and 0x00059960 (DIE3_TEMP and SW_TEMP). Have you ever tried that ?
>
> Thanks,
> Guenter

I was aware of 0005995c and I thought that it could be Tdie3 (that's why
I have included it in debug output, someone already shared that 3960X is
reporting data on that address). I think this one can be safely included.

I was not aware of the other address, I will try it.

Ondrej.

2020-01-18 09:18:44

by Brad Campbell

[permalink] [raw]
Subject: Re: [RFT PATCH 0/4] hwmon: k10temp driver improvements

On 16/1/20 10:17 pm, Guenter Roeck wrote:
> This patch series implements various improvements for the k10temp driver.
>

Looks good here. Identical motherboards (ASUS x370 Prime-Pro), different
CPUs.

3950x

k10temp-pci-00c3
Adapter: PCI adapter
Vcore: +1.38 V
Vsoc: +1.08 V
Tdie: +69.1°C (high = +70.0°C)
Tctl: +69.1°C
Tccd1: +54.2°C
Tccd2: +57.0°C
Icore: +27.67 A
Isoc: +14.13 A

it8665-isa-0290
Adapter: ISA adapter
Vcore: +1.41 V (min = +0.83 V, max = +1.65 V)
in1: +2.51 V (min = +1.98 V, max = +2.73 V)
+12V: +11.98 V (min = +11.20 V, max = +12.40 V)
+5V: +5.01 V (min = +4.74 V, max = +5.61 V)
3VSB: +6.67 V (min = +2.83 V, max = +3.40 V)
Vbat: +6.58 V
+3.3V: +3.33 V
CPU Fan: 3409 RPM (min = 1500 RPM)
Back Fan: 0 RPM (min = 0 RPM)
MB CPU Temp: +56.0°C (low = +13.0°C, high = +88.0°C)
Ambient: +35.0°C (low = +13.0°C, high = +43.0°C) sensor = thermistor
PCH: +46.0°C (low = +18.0°C, high = +61.0°C) sensor = thermistor

1800x

k10temp-pci-00c3
Adapter: PCI adapter
Vcore: +1.26 V
Vsoc: +0.91 V
Tdie: +36.0°C (high = +70.0°C)
Tctl: +56.0°C
Icore: +15.59 A
Isoc: +7.94 A

it8665-isa-0290
Adapter: ISA adapter
Vcore: +1.25 V (min = +0.83 V, max = +1.65 V)
in1: +2.48 V (min = +1.98 V, max = +2.73 V)
+12V: +11.98 V (min = +11.20 V, max = +12.40 V)
+5V: +4.96 V (min = +4.74 V, max = +5.61 V)
3VSB: +6.54 V (min = +2.83 V, max = +3.40 V)
Vbat: +6.37 V
+3.3V: +3.31 V
CPU Fan: 1171 RPM (min = 1500 RPM) ALARM
Back Fan: 0 RPM (min = 0 RPM)
MB CPU Temp: +36.0°C (low = +13.0°C, high = +88.0°C)
Ambient: +44.0°C (low = +13.0°C, high = +43.0°C) sensor = thermistor
PCH: +38.0°C (low = +18.0°C, high = +61.0°C) sensor = thermistor

Regards,
Brad

2020-01-18 17:15:41

by Guenter Roeck

[permalink] [raw]
Subject: Re: [RFT PATCH 0/4] hwmon: k10temp driver improvements

On 1/18/20 12:52 AM, Brad Campbell wrote:
> On 16/1/20 10:17 pm, Guenter Roeck wrote:
>> This patch series implements various improvements for the k10temp driver.
>>
>
> Looks good here. Identical motherboards (ASUS x370 Prime-Pro), different CPUs.
>
> 3950x
>
Interesting. I thought the 3950X needs a newer motherboard. Is that CPU as amazing
as everyone says it is ? And does it really need liquid cooling ?

Anyway, thanks a lot for testing!

Guenter

2020-01-19 02:00:27

by Brad Campbell

[permalink] [raw]
Subject: Re: [RFT PATCH 0/4] hwmon: k10temp driver improvements

On 19/1/20 1:14 am, Guenter Roeck wrote:
> On 1/18/20 12:52 AM, Brad Campbell wrote:
>> On 16/1/20 10:17 pm, Guenter Roeck wrote:
>>> This patch series implements various improvements for the k10temp
>>> driver.
>>>
>>
>> Looks good here. Identical motherboards (ASUS x370 Prime-Pro),
>> different CPUs.
>>
>> 3950x
>>
> Interesting. I thought the 3950X needs a newer motherboard.

This board has the 3950x listed on the compatibility matrix, so I took
the punt. I am running a beta BIOS with the 1.0.0.4 AGESA but it's
running in a stock production environment and has been stable. I don't
overclock or game, it's predominantly a VM host.

> Is that CPU as amazing as everyone says it is ?

It is fairly impressive and a significant update over the 1800x it
replaced. Kernel compiles are pretty quick :)

> And does it really need liquid cooling ?

No. I'm using a stock AMD Wraith Prism cooler in a 4U rack case.

It might reach higher boost clocks with better cooling, but under an
all-core load I still see > 4.1GHz across all cores.

Regards,
Brad