This patch series implements various improvements for the k10temp driver.
Patch 1/5 introduces the use of bit operations.
Patch 2/5 converts the driver to use the devm_hwmon_device_register_with_info
API. This not only simplifies the code and reduces its size, it also
makes the code easier to maintain and enhance.
Patch 3/5 adds support for reporting Core Complex Die (CCD) temperatures
on Ryzen 3 (Zen2) CPUs.
Patch 4/5 adds support for reporting core and SoC current and voltage
information on Ryzen CPUs.
Patch 5/5 removes the maximum temperature from Tdie for Ryzen CPUs.
It is inaccurate, misleading, and it just doesn't make sense to report
wrong information.
With all patches in place, output on Ryzen 3900X CPUs looks as follows
(with the system under load).
k10temp-pci-00c3
Adapter: PCI adapter
Vcore: +1.36 V
Vsoc: +1.18 V
Tdie: +86.8°C
Tctl: +86.8°C
Tccd1: +80.0°C
Tccd2: +81.8°C
Icore: +44.14 A
Isoc: +13.83 A
The voltage and current information is limited to Ryzen CPUs. Voltage
and current reporting on Threadripper and EPYC CPUs is different, and the
reported information is either incomplete or wrong. Exclude it for the time
being; it can always be added if/when more information becomes available.
Tested with the following Ryzen CPUs:
1300X A user with this CPU in the system reported somewhat unexpected
values for Vcore; it isn't entirely if at all clear why that is
the case. Overall this does not warrant holding up the series.
1600
1800X
2200G
2400G
3800X
3900X
3950X
v2: Added tested-by: tags as received.
Don't display voltage and current information for Threadripper and EPYC.
Stop displaying the fixed (and wrong) maximum temperature of 70 degrees C
for Tdie on model 17h/18h CPUs.
Convert driver to use devm_hwmon_device_register_with_info to simplify
the code and to reduce its size.
Old size (x86_64):
text data bss dec hex filename
8247 4488 64 12799 31ff drivers/hwmon/k10temp.o
New size:
text data bss dec hex filename
6778 2792 64 9634 25a2 drivers/hwmon/k10temp.o
Tested-by: Bernhard Gebetsberger <[email protected]>
Tested-by: Darren Salt <[email protected]>
Signed-off-by: Guenter Roeck <[email protected]>
---
v2: Added Tested-by: tags
drivers/hwmon/k10temp.c | 213 +++++++++++++++++++++-------------------
1 file changed, 112 insertions(+), 101 deletions(-)
diff --git a/drivers/hwmon/k10temp.c b/drivers/hwmon/k10temp.c
index 8807d7da68db..c45f6498a59b 100644
--- a/drivers/hwmon/k10temp.c
+++ b/drivers/hwmon/k10temp.c
@@ -1,14 +1,15 @@
// SPDX-License-Identifier: GPL-2.0-or-later
/*
- * k10temp.c - AMD Family 10h/11h/12h/14h/15h/16h processor hardware monitoring
+ * k10temp.c - AMD Family 10h/11h/12h/14h/15h/16h/17h
+ * processor hardware monitoring
*
* Copyright (c) 2009 Clemens Ladisch <[email protected]>
+ * Copyright (c) 2020 Guenter Roeck <[email protected]>
*/
#include <linux/bitops.h>
#include <linux/err.h>
#include <linux/hwmon.h>
-#include <linux/hwmon-sysfs.h>
#include <linux/init.h>
#include <linux/module.h>
#include <linux/pci.h>
@@ -127,10 +128,10 @@ static void read_tempreg_nb_f17(struct pci_dev *pdev, u32 *regval)
F17H_M01H_REPORTED_TEMP_CTRL_OFFSET, regval);
}
-static unsigned int get_raw_temp(struct k10temp_data *data)
+static long get_raw_temp(struct k10temp_data *data)
{
- unsigned int temp;
u32 regval;
+ long temp;
data->read_tempreg(data->pdev, ®val);
temp = (regval >> CUR_TEMP_SHIFT) * 125;
@@ -139,118 +140,108 @@ static unsigned int get_raw_temp(struct k10temp_data *data)
return temp;
}
-static ssize_t temp1_input_show(struct device *dev,
- struct device_attribute *attr, char *buf)
-{
- struct k10temp_data *data = dev_get_drvdata(dev);
- unsigned int temp = get_raw_temp(data);
-
- if (temp > data->temp_offset)
- temp -= data->temp_offset;
- else
- temp = 0;
-
- return sprintf(buf, "%u\n", temp);
-}
-
-static ssize_t temp2_input_show(struct device *dev,
- struct device_attribute *devattr, char *buf)
-{
- struct k10temp_data *data = dev_get_drvdata(dev);
- unsigned int temp = get_raw_temp(data);
-
- return sprintf(buf, "%u\n", temp);
-}
-
-static ssize_t temp_label_show(struct device *dev,
- struct device_attribute *devattr, char *buf)
-{
- struct sensor_device_attribute *attr = to_sensor_dev_attr(devattr);
-
- return sprintf(buf, "%s\n", attr->index ? "Tctl" : "Tdie");
-}
+const char *k10temp_temp_label[] = {
+ "Tdie",
+ "Tctl",
+};
-static ssize_t temp1_max_show(struct device *dev,
- struct device_attribute *attr, char *buf)
+static int k10temp_read_labels(struct device *dev,
+ enum hwmon_sensor_types type,
+ u32 attr, int channel, const char **str)
{
- return sprintf(buf, "%d\n", 70 * 1000);
+ *str = k10temp_temp_label[channel];
+ return 0;
}
-static ssize_t temp_crit_show(struct device *dev,
- struct device_attribute *devattr, char *buf)
+static int k10temp_read(struct device *dev, enum hwmon_sensor_types type,
+ u32 attr, int channel, long *val)
{
- struct sensor_device_attribute *attr = to_sensor_dev_attr(devattr);
struct k10temp_data *data = dev_get_drvdata(dev);
- int show_hyst = attr->index;
u32 regval;
- int value;
- data->read_htcreg(data->pdev, ®val);
- value = ((regval >> 16) & 0x7f) * 500 + 52000;
- if (show_hyst)
- value -= ((regval >> 24) & 0xf) * 500;
- return sprintf(buf, "%d\n", value);
+ switch (attr) {
+ case hwmon_temp_input:
+ switch (channel) {
+ case 0: /* Tdie */
+ *val = get_raw_temp(data) - data->temp_offset;
+ if (*val < 0)
+ *val = 0;
+ break;
+ case 1: /* Tctl */
+ *val = get_raw_temp(data);
+ if (*val < 0)
+ *val = 0;
+ break;
+ default:
+ return -EOPNOTSUPP;
+ }
+ break;
+ case hwmon_temp_max:
+ *val = 70 * 1000;
+ break;
+ case hwmon_temp_crit:
+ data->read_htcreg(data->pdev, ®val);
+ *val = ((regval >> 16) & 0x7f) * 500 + 52000;
+ break;
+ case hwmon_temp_crit_hyst:
+ data->read_htcreg(data->pdev, ®val);
+ *val = (((regval >> 16) & 0x7f)
+ - ((regval >> 24) & 0xf)) * 500 + 52000;
+ break;
+ default:
+ return -EOPNOTSUPP;
+ }
+ return 0;
}
-static DEVICE_ATTR_RO(temp1_input);
-static DEVICE_ATTR_RO(temp1_max);
-static SENSOR_DEVICE_ATTR_RO(temp1_crit, temp_crit, 0);
-static SENSOR_DEVICE_ATTR_RO(temp1_crit_hyst, temp_crit, 1);
-
-static SENSOR_DEVICE_ATTR_RO(temp1_label, temp_label, 0);
-static DEVICE_ATTR_RO(temp2_input);
-static SENSOR_DEVICE_ATTR_RO(temp2_label, temp_label, 1);
-
-static umode_t k10temp_is_visible(struct kobject *kobj,
- struct attribute *attr, int index)
+static umode_t k10temp_is_visible(const void *_data,
+ enum hwmon_sensor_types type,
+ u32 attr, int channel)
{
- struct device *dev = container_of(kobj, struct device, kobj);
- struct k10temp_data *data = dev_get_drvdata(dev);
+ const struct k10temp_data *data = _data;
struct pci_dev *pdev = data->pdev;
u32 reg;
- switch (index) {
- case 0 ... 1: /* temp1_input, temp1_max */
- default:
- break;
- case 2 ... 3: /* temp1_crit, temp1_crit_hyst */
- if (!data->read_htcreg)
- return 0;
-
- pci_read_config_dword(pdev, REG_NORTHBRIDGE_CAPABILITIES,
- ®);
- if (!(reg & NB_CAP_HTC))
- return 0;
-
- data->read_htcreg(data->pdev, ®);
- if (!(reg & HTC_ENABLE))
- return 0;
- break;
- case 4 ... 6: /* temp1_label, temp2_input, temp2_label */
- if (!data->show_tdie)
+ switch (type) {
+ case hwmon_temp:
+ switch (attr) {
+ case hwmon_temp_input:
+ if (channel && !data->show_tdie)
+ return 0;
+ break;
+ case hwmon_temp_max:
+ if (channel)
+ return 0;
+ break;
+ case hwmon_temp_crit:
+ case hwmon_temp_crit_hyst:
+ if (channel || !data->read_htcreg)
+ return 0;
+
+ pci_read_config_dword(pdev,
+ REG_NORTHBRIDGE_CAPABILITIES,
+ ®);
+ if (!(reg & NB_CAP_HTC))
+ return 0;
+
+ data->read_htcreg(data->pdev, ®);
+ if (!(reg & HTC_ENABLE))
+ return 0;
+ break;
+ case hwmon_temp_label:
+ if (!data->show_tdie)
+ return 0;
+ break;
+ default:
return 0;
+ }
break;
+ default:
+ return 0;
}
- return attr->mode;
+ return 0444;
}
-static struct attribute *k10temp_attrs[] = {
- &dev_attr_temp1_input.attr,
- &dev_attr_temp1_max.attr,
- &sensor_dev_attr_temp1_crit.dev_attr.attr,
- &sensor_dev_attr_temp1_crit_hyst.dev_attr.attr,
- &sensor_dev_attr_temp1_label.dev_attr.attr,
- &dev_attr_temp2_input.attr,
- &sensor_dev_attr_temp2_label.dev_attr.attr,
- NULL
-};
-
-static const struct attribute_group k10temp_group = {
- .attrs = k10temp_attrs,
- .is_visible = k10temp_is_visible,
-};
-__ATTRIBUTE_GROUPS(k10temp);
-
static bool has_erratum_319(struct pci_dev *pdev)
{
u32 pkg_type, reg_dram_cfg;
@@ -285,8 +276,27 @@ static bool has_erratum_319(struct pci_dev *pdev)
(boot_cpu_data.x86_model == 4 && boot_cpu_data.x86_stepping <= 2);
}
-static int k10temp_probe(struct pci_dev *pdev,
- const struct pci_device_id *id)
+static const struct hwmon_channel_info *k10temp_info[] = {
+ HWMON_CHANNEL_INFO(temp,
+ HWMON_T_INPUT | HWMON_T_MAX |
+ HWMON_T_CRIT | HWMON_T_CRIT_HYST |
+ HWMON_T_LABEL,
+ HWMON_T_INPUT | HWMON_T_LABEL),
+ NULL
+};
+
+static const struct hwmon_ops k10temp_hwmon_ops = {
+ .is_visible = k10temp_is_visible,
+ .read = k10temp_read,
+ .read_string = k10temp_read_labels,
+};
+
+static const struct hwmon_chip_info k10temp_chip_info = {
+ .ops = &k10temp_hwmon_ops,
+ .info = k10temp_info,
+};
+
+static int k10temp_probe(struct pci_dev *pdev, const struct pci_device_id *id)
{
int unreliable = has_erratum_319(pdev);
struct device *dev = &pdev->dev;
@@ -334,8 +344,9 @@ static int k10temp_probe(struct pci_dev *pdev,
}
}
- hwmon_dev = devm_hwmon_device_register_with_groups(dev, "k10temp", data,
- k10temp_groups);
+ hwmon_dev = devm_hwmon_device_register_with_info(dev, "k10temp", data,
+ &k10temp_chip_info,
+ NULL);
return PTR_ERR_OR_ZERO(hwmon_dev);
}
--
2.17.1
Zen2 reports reporting temperatures per CPU die (called Core Complex Dies,
or CCD, by AMD). Add support for it to the k10temp driver.
Tested-by: Bernhard Gebetsberger <[email protected]>
Tested-by: Darren Salt <[email protected]>
Signed-off-by: Guenter Roeck <[email protected]>
---
v2: Added Tested-by: tags
drivers/hwmon/k10temp.c | 79 ++++++++++++++++++++++++++++++++++++++++-
1 file changed, 78 insertions(+), 1 deletion(-)
diff --git a/drivers/hwmon/k10temp.c b/drivers/hwmon/k10temp.c
index c45f6498a59b..944ba8008bc4 100644
--- a/drivers/hwmon/k10temp.c
+++ b/drivers/hwmon/k10temp.c
@@ -5,6 +5,12 @@
*
* Copyright (c) 2009 Clemens Ladisch <[email protected]>
* Copyright (c) 2020 Guenter Roeck <[email protected]>
+ *
+ * Implementation notes:
+ * - CCD1 and CCD2 register address information as well as the calculation to
+ * convert raw register values is from https://github.com/ocerman/zenpower.
+ * The information is not confirmed from chip datasheets, but experiments
+ * suggest that it provides reasonable temperature values.
*/
#include <linux/bitops.h>
@@ -61,6 +67,8 @@ static DEFINE_MUTEX(nb_smu_ind_mutex);
/* F17h M01h Access througn SMN */
#define F17H_M01H_REPORTED_TEMP_CTRL_OFFSET 0x00059800
+#define F17H_M70H_CCD1_TEMP 0x00059954
+#define F17H_M70H_CCD2_TEMP 0x00059958
#define CUR_TEMP_SHIFT 21
#define CUR_TEMP_RANGE_SEL_MASK BIT(19)
@@ -72,6 +80,8 @@ struct k10temp_data {
int temp_offset;
u32 temp_adjust_mask;
bool show_tdie;
+ bool show_tccd1;
+ bool show_tccd2;
};
struct tctl_offset {
@@ -143,6 +153,8 @@ static long get_raw_temp(struct k10temp_data *data)
const char *k10temp_temp_label[] = {
"Tdie",
"Tctl",
+ "Tccd1",
+ "Tccd2",
};
static int k10temp_read_labels(struct device *dev,
@@ -172,6 +184,16 @@ static int k10temp_read(struct device *dev, enum hwmon_sensor_types type,
if (*val < 0)
*val = 0;
break;
+ case 2: /* Tccd1 */
+ amd_smn_read(amd_pci_dev_to_node_id(data->pdev),
+ F17H_M70H_CCD1_TEMP, ®val);
+ *val = (regval & 0xfff) * 125 - 305000;
+ break;
+ case 3: /* Tccd2 */
+ amd_smn_read(amd_pci_dev_to_node_id(data->pdev),
+ F17H_M70H_CCD2_TEMP, ®val);
+ *val = (regval & 0xfff) * 125 - 305000;
+ break;
default:
return -EOPNOTSUPP;
}
@@ -206,8 +228,24 @@ static umode_t k10temp_is_visible(const void *_data,
case hwmon_temp:
switch (attr) {
case hwmon_temp_input:
- if (channel && !data->show_tdie)
+ switch (channel) {
+ case 0: /* Tdie, or Tctl if we don't show it */
+ break;
+ case 1: /* Tctl */
+ if (!data->show_tdie)
+ return 0;
+ break;
+ case 2: /* Tccd1 */
+ if (!data->show_tccd1)
+ return 0;
+ break;
+ case 3: /* Tccd2 */
+ if (!data->show_tccd2)
+ return 0;
+ break;
+ default:
return 0;
+ }
break;
case hwmon_temp_max:
if (channel)
@@ -229,8 +267,24 @@ static umode_t k10temp_is_visible(const void *_data,
return 0;
break;
case hwmon_temp_label:
+ /* No labels if we don't show the die temperature */
if (!data->show_tdie)
return 0;
+ switch (channel) {
+ case 0: /* Tdie */
+ case 1: /* Tctl */
+ break;
+ case 2: /* Tccd1 */
+ if (!data->show_tccd1)
+ return 0;
+ break;
+ case 3: /* Tccd2 */
+ if (!data->show_tccd2)
+ return 0;
+ break;
+ default:
+ return 0;
+ }
break;
default:
return 0;
@@ -281,6 +335,8 @@ static const struct hwmon_channel_info *k10temp_info[] = {
HWMON_T_INPUT | HWMON_T_MAX |
HWMON_T_CRIT | HWMON_T_CRIT_HYST |
HWMON_T_LABEL,
+ HWMON_T_INPUT | HWMON_T_LABEL,
+ HWMON_T_INPUT | HWMON_T_LABEL,
HWMON_T_INPUT | HWMON_T_LABEL),
NULL
};
@@ -326,9 +382,30 @@ static int k10temp_probe(struct pci_dev *pdev, const struct pci_device_id *id)
data->read_htcreg = read_htcreg_nb_f15;
data->read_tempreg = read_tempreg_nb_f15;
} else if (boot_cpu_data.x86 == 0x17 || boot_cpu_data.x86 == 0x18) {
+ u32 regval;
+
data->temp_adjust_mask = CUR_TEMP_RANGE_SEL_MASK;
data->read_tempreg = read_tempreg_nb_f17;
data->show_tdie = true;
+
+ switch (boot_cpu_data.x86_model) {
+ case 0x1: /* Zen */
+ case 0x8: /* Zen+ */
+ case 0x11: /* Zen APU */
+ case 0x18: /* Zen+ APU */
+ break;
+ case 0x71: /* Zen2 */
+ amd_smn_read(amd_pci_dev_to_node_id(pdev),
+ F17H_M70H_CCD1_TEMP, ®val);
+ if (regval & 0xfff)
+ data->show_tccd1 = true;
+
+ amd_smn_read(amd_pci_dev_to_node_id(pdev),
+ F17H_M70H_CCD2_TEMP, ®val);
+ if (regval & 0xfff)
+ data->show_tccd2 = true;
+ break;
+ }
} else {
data->read_htcreg = read_htcreg_pci;
data->read_tempreg = read_tempreg_pci;
--
2.17.1
Ryzen CPUs report core and SoC voltages and currents. Add support
for it to the k10temp driver.
For the time being, only report voltages and currents for Ryzen
CPUs. Threadripper and EPYC appear to use a different mechanism.
Tested-by: Bernhard Gebetsberger <[email protected]>
Tested-by: Darren Salt <[email protected]>
Signed-off-by: Guenter Roeck <[email protected]>
---
v2: Added Tested-by: tags.
Don't try to report voltage and current information on Threadripper
and EPYC CPUs.
drivers/hwmon/k10temp.c | 126 +++++++++++++++++++++++++++++++++++++++-
1 file changed, 123 insertions(+), 3 deletions(-)
diff --git a/drivers/hwmon/k10temp.c b/drivers/hwmon/k10temp.c
index 944ba8008bc4..a4313b662a3a 100644
--- a/drivers/hwmon/k10temp.c
+++ b/drivers/hwmon/k10temp.c
@@ -11,6 +11,13 @@
* convert raw register values is from https://github.com/ocerman/zenpower.
* The information is not confirmed from chip datasheets, but experiments
* suggest that it provides reasonable temperature values.
+ * - Register addresses to read chip voltage and current is also from
+ * https://github.com/ocerman/zenpower, and not confirmed from chip
+ * datasheets. Experiments suggest that reported current and voltage
+ * information is reasonable.
+ * - It is unknown if the mechanism to read CCD1/CCD2 temperature as well as
+ * current and voltage information works on higher-end Ryzen CPUs, or if
+ * additional information is available on those CPUs.
*/
#include <linux/bitops.h>
@@ -70,6 +77,10 @@ static DEFINE_MUTEX(nb_smu_ind_mutex);
#define F17H_M70H_CCD1_TEMP 0x00059954
#define F17H_M70H_CCD2_TEMP 0x00059958
+#define F17H_M01H_SVI 0x0005A000
+#define F17H_M01H_SVI_TEL_PLANE0 (F17H_M01H_SVI + 0xc)
+#define F17H_M01H_SVI_TEL_PLANE1 (F17H_M01H_SVI + 0x10)
+
#define CUR_TEMP_SHIFT 21
#define CUR_TEMP_RANGE_SEL_MASK BIT(19)
@@ -82,6 +93,9 @@ struct k10temp_data {
bool show_tdie;
bool show_tccd1;
bool show_tccd2;
+ u32 svi_addr[2];
+ bool show_current;
+ int cfactor[2];
};
struct tctl_offset {
@@ -99,6 +113,16 @@ static const struct tctl_offset tctl_offset_table[] = {
{ 0x17, "AMD Ryzen Threadripper 29", 27000 }, /* 29{20,50,70,90}[W]X */
};
+static bool is_threadripper(void)
+{
+ return strstr(boot_cpu_data.x86_model_id, "Threadripper");
+}
+
+static bool is_epyc(void)
+{
+ return strstr(boot_cpu_data.x86_model_id, "EPYC");
+}
+
static void read_htcreg_pci(struct pci_dev *pdev, u32 *regval)
{
pci_read_config_dword(pdev, REG_HARDWARE_THERMAL_CONTROL, regval);
@@ -157,16 +181,76 @@ const char *k10temp_temp_label[] = {
"Tccd2",
};
+const char *k10temp_in_label[] = {
+ "Vcore",
+ "Vsoc",
+};
+
+const char *k10temp_curr_label[] = {
+ "Icore",
+ "Isoc",
+};
+
static int k10temp_read_labels(struct device *dev,
enum hwmon_sensor_types type,
u32 attr, int channel, const char **str)
{
- *str = k10temp_temp_label[channel];
+ switch (type) {
+ case hwmon_temp:
+ *str = k10temp_temp_label[channel];
+ break;
+ case hwmon_in:
+ *str = k10temp_in_label[channel];
+ break;
+ case hwmon_curr:
+ *str = k10temp_curr_label[channel];
+ break;
+ default:
+ return -EOPNOTSUPP;
+ }
return 0;
}
-static int k10temp_read(struct device *dev, enum hwmon_sensor_types type,
- u32 attr, int channel, long *val)
+static int k10temp_read_curr(struct device *dev, u32 attr, int channel,
+ long *val)
+{
+ struct k10temp_data *data = dev_get_drvdata(dev);
+ u32 regval;
+
+ switch (attr) {
+ case hwmon_curr_input:
+ amd_smn_read(amd_pci_dev_to_node_id(data->pdev),
+ data->svi_addr[channel], ®val);
+ *val = DIV_ROUND_CLOSEST(data->cfactor[channel] *
+ (regval & 0xff),
+ 1000);
+ break;
+ default:
+ return -EOPNOTSUPP;
+ }
+ return 0;
+}
+
+static int k10temp_read_in(struct device *dev, u32 attr, int channel, long *val)
+{
+ struct k10temp_data *data = dev_get_drvdata(dev);
+ u32 regval;
+
+ switch (attr) {
+ case hwmon_in_input:
+ amd_smn_read(amd_pci_dev_to_node_id(data->pdev),
+ data->svi_addr[channel], ®val);
+ regval = (regval >> 16) & 0xff;
+ *val = DIV_ROUND_CLOSEST(155000 - regval * 625, 100);
+ break;
+ default:
+ return -EOPNOTSUPP;
+ }
+ return 0;
+}
+
+static int k10temp_read_temp(struct device *dev, u32 attr, int channel,
+ long *val)
{
struct k10temp_data *data = dev_get_drvdata(dev);
u32 regval;
@@ -216,6 +300,21 @@ static int k10temp_read(struct device *dev, enum hwmon_sensor_types type,
return 0;
}
+static int k10temp_read(struct device *dev, enum hwmon_sensor_types type,
+ u32 attr, int channel, long *val)
+{
+ switch (type) {
+ case hwmon_temp:
+ return k10temp_read_temp(dev, attr, channel, val);
+ case hwmon_in:
+ return k10temp_read_in(dev, attr, channel, val);
+ case hwmon_curr:
+ return k10temp_read_curr(dev, attr, channel, val);
+ default:
+ return -EOPNOTSUPP;
+ }
+}
+
static umode_t k10temp_is_visible(const void *_data,
enum hwmon_sensor_types type,
u32 attr, int channel)
@@ -290,6 +389,11 @@ static umode_t k10temp_is_visible(const void *_data,
return 0;
}
break;
+ case hwmon_in:
+ case hwmon_curr:
+ if (!data->show_current)
+ return 0;
+ break;
default:
return 0;
}
@@ -338,6 +442,12 @@ static const struct hwmon_channel_info *k10temp_info[] = {
HWMON_T_INPUT | HWMON_T_LABEL,
HWMON_T_INPUT | HWMON_T_LABEL,
HWMON_T_INPUT | HWMON_T_LABEL),
+ HWMON_CHANNEL_INFO(in,
+ HWMON_I_INPUT | HWMON_I_LABEL,
+ HWMON_I_INPUT | HWMON_I_LABEL),
+ HWMON_CHANNEL_INFO(curr,
+ HWMON_C_INPUT | HWMON_C_LABEL,
+ HWMON_C_INPUT | HWMON_C_LABEL),
NULL
};
@@ -393,8 +503,18 @@ static int k10temp_probe(struct pci_dev *pdev, const struct pci_device_id *id)
case 0x8: /* Zen+ */
case 0x11: /* Zen APU */
case 0x18: /* Zen+ APU */
+ data->show_current = !is_threadripper() && !is_epyc();
+ data->svi_addr[0] = F17H_M01H_SVI_TEL_PLANE0;
+ data->svi_addr[1] = F17H_M01H_SVI_TEL_PLANE1;
+ data->cfactor[0] = 1039211; /* core */
+ data->cfactor[1] = 360772; /* SoC */
break;
case 0x71: /* Zen2 */
+ data->show_current = !is_threadripper() && !is_epyc();
+ data->cfactor[0] = 658823; /* core */
+ data->cfactor[1] = 294300; /* SoC */
+ data->svi_addr[0] = F17H_M01H_SVI_TEL_PLANE1;
+ data->svi_addr[1] = F17H_M01H_SVI_TEL_PLANE0;
amd_smn_read(amd_pci_dev_to_node_id(pdev),
F17H_M70H_CCD1_TEMP, ®val);
if (regval & 0xfff)
--
2.17.1
The maximum Tdie or Tctl is not published for Ryzen CPUs. What is
known, however, is that the traditional value of 70 degrees C is no
longer correct. Displaying it is meaningless, confusing, and wrong.
Stop doing it.
Signed-off-by: Guenter Roeck <[email protected]>
---
v2: Added patch.
drivers/hwmon/k10temp.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/drivers/hwmon/k10temp.c b/drivers/hwmon/k10temp.c
index a4313b662a3a..cdfebe435003 100644
--- a/drivers/hwmon/k10temp.c
+++ b/drivers/hwmon/k10temp.c
@@ -347,7 +347,7 @@ static umode_t k10temp_is_visible(const void *_data,
}
break;
case hwmon_temp_max:
- if (channel)
+ if (channel || data->show_tdie)
return 0;
break;
case hwmon_temp_crit:
--
2.17.1
On Sat, 18 Jan 2020 at 17:26, Guenter Roeck <[email protected]> wrote:
>
> This patch series implements various improvements for the k10temp driver.
>
> Patch 1/5 introduces the use of bit operations.
>
> Patch 2/5 converts the driver to use the devm_hwmon_device_register_with_info
> API. This not only simplifies the code and reduces its size, it also
> makes the code easier to maintain and enhance.
>
> Patch 3/5 adds support for reporting Core Complex Die (CCD) temperatures
> on Ryzen 3 (Zen2) CPUs.
>
> Patch 4/5 adds support for reporting core and SoC current and voltage
> information on Ryzen CPUs.
>
> Patch 5/5 removes the maximum temperature from Tdie for Ryzen CPUs.
> It is inaccurate, misleading, and it just doesn't make sense to report
> wrong information.
>
> With all patches in place, output on Ryzen 3900X CPUs looks as follows
> (with the system under load).
>
> k10temp-pci-00c3
> Adapter: PCI adapter
> Vcore: +1.36 V
> Vsoc: +1.18 V
> Tdie: +86.8°C
> Tctl: +86.8°C
> Tccd1: +80.0°C
> Tccd2: +81.8°C
> Icore: +44.14 A
> Isoc: +13.83 A
>
> The voltage and current information is limited to Ryzen CPUs. Voltage
> and current reporting on Threadripper and EPYC CPUs is different, and the
> reported information is either incomplete or wrong. Exclude it for the time
> being; it can always be added if/when more information becomes available.
>
> Tested with the following Ryzen CPUs:
> 1300X A user with this CPU in the system reported somewhat unexpected
> values for Vcore; it isn't entirely if at all clear why that is
> the case. Overall this does not warrant holding up the series.
As the owner of that machine, very much agreed.
> 1600
> 1800X
> 2200G
> 2400G
> 3800X
> 3900X
> 3950X
>
I also had sensible results for v1 on 2500U and 3400G
> v2: Added tested-by: tags as received.
> Don't display voltage and current information for Threadripper and EPYC.
> Stop displaying the fixed (and wrong) maximum temperature of 70 degrees C
> for Tdie on model 17h/18h CPUs.
For v2 on my 2500U, system idle and then under load -
--- k10temp-idle 2020-01-19 00:16:18.812002121 +0000
+++ k10temp-load 2020-01-19 00:22:05.595470877 +0000
@@ -1,15 +1,15 @@
k10temp-pci-00c3
Adapter: PCI adapter
-Vcore: +0.98 V
+Vcore: +1.15 V
Vsoc: +0.93 V
-Tdie: +38.2°C
-Tctl: +38.2°C
-Icore: +10.39 A
-Isoc: +6.49 A
+Tdie: +76.2°C
+Tctl: +76.2°C
+Icore: +51.96 A
+Isoc: +7.58 A
amdgpu-pci-0300
Adapter: PCI adapter
vddgfx: N/A
vddnb: N/A
-edge: +38.0°C (crit = +80.0°C, hyst = +0.0°C)
+edge: +76.0°C (crit = +80.0°C, hyst = +0.0°C)
I'll ony test v2 on the 3400G if you think the results would add something.
ĸen
--
If a man stands before a mirror and sees in it his reflection, what
he sees is not a true reproduction, but a picture of himself when he
was a younger man. -- de Selby
On 1/18/20 4:33 PM, Ken Moffat wrote:
> On Sat, 18 Jan 2020 at 17:26, Guenter Roeck <[email protected]> wrote:
>>
>> This patch series implements various improvements for the k10temp driver.
>>
>> Patch 1/5 introduces the use of bit operations.
>>
>> Patch 2/5 converts the driver to use the devm_hwmon_device_register_with_info
>> API. This not only simplifies the code and reduces its size, it also
>> makes the code easier to maintain and enhance.
>>
>> Patch 3/5 adds support for reporting Core Complex Die (CCD) temperatures
>> on Ryzen 3 (Zen2) CPUs.
>>
>> Patch 4/5 adds support for reporting core and SoC current and voltage
>> information on Ryzen CPUs.
>>
>> Patch 5/5 removes the maximum temperature from Tdie for Ryzen CPUs.
>> It is inaccurate, misleading, and it just doesn't make sense to report
>> wrong information.
>>
>> With all patches in place, output on Ryzen 3900X CPUs looks as follows
>> (with the system under load).
>>
>> k10temp-pci-00c3
>> Adapter: PCI adapter
>> Vcore: +1.36 V
>> Vsoc: +1.18 V
>> Tdie: +86.8°C
>> Tctl: +86.8°C
>> Tccd1: +80.0°C
>> Tccd2: +81.8°C
>> Icore: +44.14 A
>> Isoc: +13.83 A
>>
>> The voltage and current information is limited to Ryzen CPUs. Voltage
>> and current reporting on Threadripper and EPYC CPUs is different, and the
>> reported information is either incomplete or wrong. Exclude it for the time
>> being; it can always be added if/when more information becomes available.
>>
>> Tested with the following Ryzen CPUs:
>> 1300X A user with this CPU in the system reported somewhat unexpected
>> values for Vcore; it isn't entirely if at all clear why that is
>> the case. Overall this does not warrant holding up the series.
>
> As the owner of that machine, very much agreed.
> >> 1600
>> 1800X
>> 2200G
>> 2400G
>> 3800X
>> 3900X
>> 3950X
>>
>
> I also had sensible results for v1 on 2500U and 3400G
>
Sorry, I somehow missed that.
>> v2: Added tested-by: tags as received.
>> Don't display voltage and current information for Threadripper and EPYC.
>> Stop displaying the fixed (and wrong) maximum temperature of 70 degrees C
>> for Tdie on model 17h/18h CPUs.
>
> For v2 on my 2500U, system idle and then under load -
>
> --- k10temp-idle 2020-01-19 00:16:18.812002121 +0000
> +++ k10temp-load 2020-01-19 00:22:05.595470877 +0000
> @@ -1,15 +1,15 @@
> k10temp-pci-00c3
> Adapter: PCI adapter
> -Vcore: +0.98 V
> +Vcore: +1.15 V
> Vsoc: +0.93 V
> -Tdie: +38.2°C
> -Tctl: +38.2°C
> -Icore: +10.39 A
> -Isoc: +6.49 A
> +Tdie: +76.2°C
> +Tctl: +76.2°C
> +Icore: +51.96 A
> +Isoc: +7.58 A
>
> amdgpu-pci-0300
> Adapter: PCI adapter
> vddgfx: N/A
> vddnb: N/A
> -edge: +38.0°C (crit = +80.0°C, hyst = +0.0°C)
> +edge: +76.0°C (crit = +80.0°C, hyst = +0.0°C)
>
> I'll ony test v2 on the 3400G if you think the results would add something.
>
Thanks a lot for the additional testing! I don't think we need another
test on 3400G; after all, the actual measurement code didn't change.
Everyone: I'll be happy to add Tested-by: tags with your name and e-mail
address to the series, but you'll have to send it to me. I appreciate
all your testing and would like to acknowledge it, but I can not add
Tested-by: tags (or any other tags, for that matter) on my own.
Thanks,
Guenter
On Sun, 19 Jan 2020 at 00:49, Guenter Roeck <[email protected]> wrote:
>
> On 1/18/20 4:33 PM, Ken Moffat wrote:
> > On Sat, 18 Jan 2020 at 17:26, Guenter Roeck <[email protected]> wrote:
> >>
> >> This patch series implements various improvements for the k10temp driver.
> >>
> >> Patch 1/5 introduces the use of bit operations.
> >>
> >> Patch 2/5 converts the driver to use the devm_hwmon_device_register_with_info
> >> API. This not only simplifies the code and reduces its size, it also
> >> makes the code easier to maintain and enhance.
> >>
> >> Patch 3/5 adds support for reporting Core Complex Die (CCD) temperatures
> >> on Ryzen 3 (Zen2) CPUs.
> >>
> >> Patch 4/5 adds support for reporting core and SoC current and voltage
> >> information on Ryzen CPUs.
> >>
> >> Patch 5/5 removes the maximum temperature from Tdie for Ryzen CPUs.
> >> It is inaccurate, misleading, and it just doesn't make sense to report
> >> wrong information.
> >>
> >> With all patches in place, output on Ryzen 3900X CPUs looks as follows
> >> (with the system under load).
> >>
> >> k10temp-pci-00c3
> >> Adapter: PCI adapter
> >> Vcore: +1.36 V
> >> Vsoc: +1.18 V
> >> Tdie: +86.8°C
> >> Tctl: +86.8°C
> >> Tccd1: +80.0°C
> >> Tccd2: +81.8°C
> >> Icore: +44.14 A
> >> Isoc: +13.83 A
> >>
> >> The voltage and current information is limited to Ryzen CPUs. Voltage
> >> and current reporting on Threadripper and EPYC CPUs is different, and the
> >> reported information is either incomplete or wrong. Exclude it for the time
> >> being; it can always be added if/when more information becomes available.
> >>
> >> Tested with the following Ryzen CPUs:
> >> 1300X A user with this CPU in the system reported somewhat unexpected
> >> values for Vcore; it isn't entirely if at all clear why that is
> >> the case. Overall this does not warrant holding up the series.
> >
> > As the owner of that machine, very much agreed.
> > >> 1600
> >> 1800X
> >> 2200G
> >> 2400G
> >> 3800X
> >> 3900X
> >> 3950X
> >>
> >
> > I also had sensible results for v1 on 2500U and 3400G
> >
> Sorry, I somehow missed that.
>
> >> v2: Added tested-by: tags as received.
> >> Don't display voltage and current information for Threadripper and EPYC.
> >> Stop displaying the fixed (and wrong) maximum temperature of 70 degrees C
> >> for Tdie on model 17h/18h CPUs.
> >
> > For v2 on my 2500U, system idle and then under load -
> >
> > --- k10temp-idle 2020-01-19 00:16:18.812002121 +0000
> > +++ k10temp-load 2020-01-19 00:22:05.595470877 +0000
> > @@ -1,15 +1,15 @@
> > k10temp-pci-00c3
> > Adapter: PCI adapter
> > -Vcore: +0.98 V
> > +Vcore: +1.15 V
> > Vsoc: +0.93 V
> > -Tdie: +38.2°C
> > -Tctl: +38.2°C
> > -Icore: +10.39 A
> > -Isoc: +6.49 A
> > +Tdie: +76.2°C
> > +Tctl: +76.2°C
> > +Icore: +51.96 A
> > +Isoc: +7.58 A
> >
> > amdgpu-pci-0300
> > Adapter: PCI adapter
> > vddgfx: N/A
> > vddnb: N/A
> > -edge: +38.0°C (crit = +80.0°C, hyst = +0.0°C)
> > +edge: +76.0°C (crit = +80.0°C, hyst = +0.0°C)
> >
> > I'll ony test v2 on the 3400G if you think the results would add something.
> >
>
> Thanks a lot for the additional testing! I don't think we need another
> test on 3400G; after all, the actual measurement code didn't change.
>
> Everyone: I'll be happy to add Tested-by: tags with your name and e-mail
> address to the series, but you'll have to send it to me. I appreciate
> all your testing and would like to acknowledge it, but I can not add
> Tested-by: tags (or any other tags, for that matter) on my own.
>
> Thanks,
> Guenter
For the little it is worth:
Tested-by Ken Moffat <[email protected]>
On 19/1/20 8:48 am, Guenter Roeck wrote:
> Everyone: I'll be happy to add Tested-by: tags with your name and e-mail
> address to the series, but you'll have to send it to me. I appreciate
> all your testing and would like to acknowledge it, but I can not add
> Tested-by: tags (or any other tags, for that matter) on my own.
>
> Thanks,
> Guenter
>
Tested-by: Brad Campbell <[email protected]>
In article <[email protected]> (earth.lists.linux-kernel) you wrote:
> This patch series implements various improvements for the k10temp driver.
...
> The voltage and current information is limited to Ryzen CPUs. Voltage
> and current reporting on Threadripper and EPYC CPUs is different, and the
> reported information is either incomplete or wrong. Exclude it for the time
> being; it can always be added if/when more information becomes available.
> Tested with the following Ryzen CPUs:
Tested-By: Jonathan McDowell <[email protected]>
Tested on a Ryzen 7 2700 (patched on top of 5.4.13):
| k10temp-pci-00c3
| Adapter: PCI adapter
| Vcore: +0.80 V
| Vsoc: +0.81 V
| Tdie: +37.0?C
| Tctl: +37.0?C
| Icore: +8.31 A
| Isoc: +6.86 A
Like the 1300X case I see a discrepancy compared to what the nct6779
driver says Vcore is:
| nct6779-isa-0290
| Adapter: ISA adapter
| Vcore: +0.33 V (min = +0.00 V, max = +1.74 V)
| in1: +0.32 V (min = +0.00 V, max = +0.00 V) ALARM
| AVCC: +3.39 V (min = +0.00 V, max = +0.00 V) ALARM
| +3.3V: +3.39 V (min = +0.00 V, max = +0.00 V) ALARM
| in4: +1.88 V (min = +0.00 V, max = +0.00 V) ALARM
| in5: +0.82 V (min = +0.00 V, max = +0.00 V) ALARM
| in6: +0.30 V (min = +0.00 V, max = +0.00 V) ALARM
| 3VSB: +3.42 V (min = +0.00 V, max = +0.00 V) ALARM
| Vbat: +3.25 V (min = +0.00 V, max = +0.00 V) ALARM
| in9: +0.00 V (min = +0.00 V, max = +0.00 V)
| in10: +0.22 V (min = +0.00 V, max = +0.00 V) ALARM
| in11: +1.06 V (min = +0.00 V, max = +0.00 V) ALARM
| in12: +1.70 V (min = +0.00 V, max = +0.00 V) ALARM
| in13: +1.04 V (min = +0.00 V, max = +0.00 V) ALARM
| in14: +1.79 V (min = +0.00 V, max = +0.00 V) ALARM
| fan1: 0 RPM (min = 0 RPM)
| fan2: 1708 RPM (min = 0 RPM)
| fan3: 0 RPM (min = 0 RPM)
| fan4: 0 RPM (min = 0 RPM)
| fan5: 0 RPM (min = 0 RPM)
| SYSTIN: +33.0?C (high = +0.0?C, hyst = +0.0?C) ALARM
| sensor = thermistor
| CPUTIN: -62.5?C (high = +80.0?C, hyst = +75.0?C)
| sensor = thermistor
| AUXTIN0: +79.0?C sensor = thermistor
| AUXTIN1: +96.0?C sensor = thermistor
| AUXTIN2: +23.0?C sensor = thermistor
| AUXTIN3: -22.0?C sensor = thermistor
| SMBUSMASTER 0: +39.0?C
| PCH_CHIP_CPU_MAX_TEMP: +0.0?C
| PCH_CHIP_TEMP: +0.0?C
| PCH_CPU_TEMP: +0.0?C
| intrusion0: ALARM
| intrusion1: ALARM
| beep_enable: disabled
I suspect the nct6779 is not reporting correctly (or needs some
configuration) here, as I see that's what Ken is using with his 1300X as
well.
(ASRock B450M Pro4 motherboard, fwiw.)
J.
--
Verily go thee not unto the internet for they will tell you both yea
and nay
On Sat, 18 Jan 2020, Guenter Roeck wrote:
> This patch series implements various improvements for the k10temp driver.
>
> Patch 1/5 introduces the use of bit operations.
>
> Patch 2/5 converts the driver to use the devm_hwmon_device_register_with_info
> API. This not only simplifies the code and reduces its size, it also
> makes the code easier to maintain and enhance.
>
> Patch 3/5 adds support for reporting Core Complex Die (CCD) temperatures
> on Ryzen 3 (Zen2) CPUs.
>
> Patch 4/5 adds support for reporting core and SoC current and voltage
> information on Ryzen CPUs.
>
> Patch 5/5 removes the maximum temperature from Tdie for Ryzen CPUs.
> It is inaccurate, misleading, and it just doesn't make sense to report
> wrong information.
>
> With all patches in place, output on Ryzen 3900X CPUs looks as follows
> (with the system under load).
>
> k10temp-pci-00c3
> Adapter: PCI adapter
> Vcore: +1.36 V
> Vsoc: +1.18 V
> Tdie: +86.8?C
> Tctl: +86.8?C
> Tccd1: +80.0?C
> Tccd2: +81.8?C
> Icore: +44.14 A
> Isoc: +13.83 A
>
> The voltage and current information is limited to Ryzen CPUs. Voltage
> and current reporting on Threadripper and EPYC CPUs is different, and the
> reported information is either incomplete or wrong. Exclude it for the time
> being; it can always be added if/when more information becomes available.
>
> Tested with the following Ryzen CPUs:
> 1300X A user with this CPU in the system reported somewhat unexpected
> values for Vcore; it isn't entirely if at all clear why that is
> the case. Overall this does not warrant holding up the series.
> 1600
> 1800X
> 2200G
> 2400G
> 3800X
> 3900X
> 3950X
>
> v2: Added tested-by: tags as received.
> Don't display voltage and current information for Threadripper and EPYC.
> Stop displaying the fixed (and wrong) maximum temperature of 70 degrees C
> for Tdie on model 17h/18h CPUs.
>
Just tested this on a 2400G. Here idle values:
k10temp-pci-00c3
Adapter: PCI adapter
Vcore: +0.77 V
Vsoc: +1.11 V
Tdie: +45.0?C
Tctl: +45.0?C
Icore: +10.39 A
Isoc: +2.89 A
nvme-pci-0100
Adapter: PCI adapter
Composite: +43.9?C (low = -273.1?C, high = +80.8?C)
(crit = +80.8?C)
Sensor 1: +43.9?C (low = -273.1?C, high = +65261.8?C)
Sensor 2: +48.9?C (low = -273.1?C, high = +65261.8?C)
nct6793-isa-0290
Adapter: ISA adapter
in0: +0.35 V (min = +0.00 V, max = +1.74 V)
in1: +1.85 V (min = +0.00 V, max = +0.00 V) ALARM
in2: +3.41 V (min = +0.00 V, max = +0.00 V) ALARM
in3: +3.39 V (min = +0.00 V, max = +0.00 V) ALARM
in4: +0.26 V (min = +0.00 V, max = +0.00 V) ALARM
in5: +0.14 V (min = +0.00 V, max = +0.00 V) ALARM
in6: +0.66 V (min = +0.00 V, max = +0.00 V) ALARM
in7: +3.39 V (min = +0.00 V, max = +0.00 V) ALARM
in8: +3.26 V (min = +0.00 V, max = +0.00 V) ALARM
in9: +1.83 V (min = +0.00 V, max = +0.00 V) ALARM
in10: +0.19 V (min = +0.00 V, max = +0.00 V) ALARM
in11: +0.14 V (min = +0.00 V, max = +0.00 V) ALARM
in12: +1.84 V (min = +0.00 V, max = +0.00 V) ALARM
in13: +1.72 V (min = +0.00 V, max = +0.00 V) ALARM
in14: +0.21 V (min = +0.00 V, max = +0.00 V) ALARM
fan1: 0 RPM (min = 0 RPM)
fan2: 323 RPM (min = 0 RPM)
fan3: 0 RPM (min = 0 RPM)
fan4: 0 RPM (min = 0 RPM)
fan5: 0 RPM (min = 0 RPM)
SYSTIN: +112.0?C (high = +0.0?C, hyst = +0.0?C) sensor = thermistor
CPUTIN: +60.0?C (high = +80.0?C, hyst = +75.0?C) sensor = thermistor
AUXTIN0: +46.0?C (high = +0.0?C, hyst = +0.0?C) ALARM sensor = thermistor
AUXTIN1: +106.0?C sensor = thermistor
AUXTIN2: +105.0?C sensor = thermistor
AUXTIN3: +102.0?C sensor = thermistor
SMBUSMASTER 0: +45.0?C
PCH_CHIP_CPU_MAX_TEMP: +0.0?C
PCH_CHIP_TEMP: +0.0?C
PCH_CPU_TEMP: +0.0?C
intrusion0: OK
intrusion1: ALARM
beep_enable: disabled
amdgpu-pci-0300
Adapter: PCI adapter
vddgfx: N/A
vddnb: N/A
edge: +45.0?C (crit = +80.0?C, hyst = +0.0?C)
And here with some high load:
k10temp-pci-00c3
Adapter: PCI adapter
Vcore: +1.32 V
Vsoc: +1.11 V
Tdie: +77.1?C
Tctl: +77.1?C
Icore: +85.22 A
Isoc: +3.61 A
nvme-pci-0100
Adapter: PCI adapter
Composite: +42.9?C (low = -273.1?C, high = +80.8?C)
(crit = +80.8?C)
Sensor 1: +42.9?C (low = -273.1?C, high = +65261.8?C)
Sensor 2: +45.9?C (low = -273.1?C, high = +65261.8?C)
nct6793-isa-0290
Adapter: ISA adapter
in0: +0.68 V (min = +0.00 V, max = +1.74 V)
in1: +1.84 V (min = +0.00 V, max = +0.00 V) ALARM
in2: +3.39 V (min = +0.00 V, max = +0.00 V) ALARM
in3: +3.39 V (min = +0.00 V, max = +0.00 V) ALARM
in4: +0.26 V (min = +0.00 V, max = +0.00 V) ALARM
in5: +0.14 V (min = +0.00 V, max = +0.00 V) ALARM
in6: +0.66 V (min = +0.00 V, max = +0.00 V) ALARM
in7: +3.39 V (min = +0.00 V, max = +0.00 V) ALARM
in8: +3.26 V (min = +0.00 V, max = +0.00 V) ALARM
in9: +1.83 V (min = +0.00 V, max = +0.00 V) ALARM
in10: +0.19 V (min = +0.00 V, max = +0.00 V) ALARM
in11: +0.14 V (min = +0.00 V, max = +0.00 V) ALARM
in12: +1.84 V (min = +0.00 V, max = +0.00 V) ALARM
in13: +1.72 V (min = +0.00 V, max = +0.00 V) ALARM
in14: +0.20 V (min = +0.00 V, max = +0.00 V) ALARM
fan1: 0 RPM (min = 0 RPM)
fan2: 1931 RPM (min = 0 RPM)
fan3: 0 RPM (min = 0 RPM)
fan4: 0 RPM (min = 0 RPM)
fan5: 0 RPM (min = 0 RPM)
SYSTIN: +113.0?C (high = +0.0?C, hyst = +0.0?C) sensor = thermistor
CPUTIN: +64.5?C (high = +80.0?C, hyst = +75.0?C) sensor = thermistor
AUXTIN0: +45.0?C (high = +0.0?C, hyst = +0.0?C) ALARM sensor = thermistor
AUXTIN1: +107.0?C sensor = thermistor
AUXTIN2: +105.0?C sensor = thermistor
AUXTIN3: +102.0?C sensor = thermistor
SMBUSMASTER 0: +77.0?C
PCH_CHIP_CPU_MAX_TEMP: +0.0?C
PCH_CHIP_TEMP: +0.0?C
PCH_CPU_TEMP: +0.0?C
intrusion0: OK
intrusion1: ALARM
beep_enable: disabled
amdgpu-pci-0300
Adapter: PCI adapter
vddgfx: N/A
vddnb: N/A
edge: +77.0?C (crit = +80.0?C, hyst = +0.0?C)
Have also tried this on a EPYC 7302. Before the patch:
k10temp-pci-00c3
Adapter: PCI adapter
Tdie: +28.1?C (high = +70.0?C)
Tctl: +28.1?C
and after:
k10temp-pci-00c3
Adapter: PCI adapter
Tdie: +28.2?C
Tctl: +28.2?C
No extra values shown, but I think this is expected.
Tested-by Holger Kiehl <[email protected]>
Holger
On 1/19/20 2:18 AM, Jonathan McDowell wrote:
>
> In article <[email protected]> (earth.lists.linux-kernel) you wrote:
>> This patch series implements various improvements for the k10temp driver.
> ...
>> The voltage and current information is limited to Ryzen CPUs. Voltage
>> and current reporting on Threadripper and EPYC CPUs is different, and the
>> reported information is either incomplete or wrong. Exclude it for the time
>> being; it can always be added if/when more information becomes available.
>
>> Tested with the following Ryzen CPUs:
>
> Tested-By: Jonathan McDowell <[email protected]>
>
Thanks!
> Tested on a Ryzen 7 2700 (patched on top of 5.4.13):
>
> | k10temp-pci-00c3
> | Adapter: PCI adapter
> | Vcore: +0.80 V
> | Vsoc: +0.81 V
> | Tdie: +37.0°C
> | Tctl: +37.0°C
> | Icore: +8.31 A
> | Isoc: +6.86 A
>
> Like the 1300X case I see a discrepancy compared to what the nct6779
> driver says Vcore is:
>
> | nct6779-isa-0290
> | Adapter: ISA adapter
> | Vcore: +0.33 V (min = +0.00 V, max = +1.74 V)
I see that on all of my boards as well (3900X, different boards and board vendors),
with temperatures reported by the Super-IO chip sometimes as low as 0.18V (!).
Yet, there is a clear correlation of that voltage with CPU load.
I suspect the measurement by the Super-IO chip is a different voltage.
I don't think there is anything we can do about that without access to more
information.
> | in1: +0.32 V (min = +0.00 V, max = +0.00 V) ALARM
> | AVCC: +3.39 V (min = +0.00 V, max = +0.00 V) ALARM
> | +3.3V: +3.39 V (min = +0.00 V, max = +0.00 V) ALARM
> | in4: +1.88 V (min = +0.00 V, max = +0.00 V) ALARM
> | in5: +0.82 V (min = +0.00 V, max = +0.00 V) ALARM
> | in6: +0.30 V (min = +0.00 V, max = +0.00 V) ALARM
> | 3VSB: +3.42 V (min = +0.00 V, max = +0.00 V) ALARM
> | Vbat: +3.25 V (min = +0.00 V, max = +0.00 V) ALARM
> | in9: +0.00 V (min = +0.00 V, max = +0.00 V)
> | in10: +0.22 V (min = +0.00 V, max = +0.00 V) ALARM
> | in11: +1.06 V (min = +0.00 V, max = +0.00 V) ALARM
> | in12: +1.70 V (min = +0.00 V, max = +0.00 V) ALARM
> | in13: +1.04 V (min = +0.00 V, max = +0.00 V) ALARM
> | in14: +1.79 V (min = +0.00 V, max = +0.00 V) ALARM
> | fan1: 0 RPM (min = 0 RPM)
> | fan2: 1708 RPM (min = 0 RPM)
> | fan3: 0 RPM (min = 0 RPM)
> | fan4: 0 RPM (min = 0 RPM)
> | fan5: 0 RPM (min = 0 RPM)
> | SYSTIN: +33.0°C (high = +0.0°C, hyst = +0.0°C) ALARM
> | sensor = thermistor
> | CPUTIN: -62.5°C (high = +80.0°C, hyst = +75.0°C)
> | sensor = thermistor
> | AUXTIN0: +79.0°C sensor = thermistor
> | AUXTIN1: +96.0°C sensor = thermistor
> | AUXTIN2: +23.0°C sensor = thermistor
> | AUXTIN3: -22.0°C sensor = thermistor
> | SMBUSMASTER 0: +39.0°C
> | PCH_CHIP_CPU_MAX_TEMP: +0.0°C
> | PCH_CHIP_TEMP: +0.0°C
> | PCH_CPU_TEMP: +0.0°C
> | intrusion0: ALARM
> | intrusion1: ALARM
> | beep_enable: disabled
>
> I suspect the nct6779 is not reporting correctly (or needs some
> configuration) here, as I see that's what Ken is using with his 1300X as
> well.
>
Initially I thought the voltage reported by the Super-IO chip would help
us understand what is going on, but that is not really the case.
The problem with Ken's board is that idle current and voltage are very high.
The idle voltage claims to be higher than the voltage under load, which
doesn't really make sense. This is only reflected in the voltage and current
reported by the CPU, but not by the voltage reported by the Super-IO chip.
Thanks,
Guenter
On 1/19/20 5:38 AM, Holger Kiehl wrote:
> On Sat, 18 Jan 2020, Guenter Roeck wrote:
>
>> This patch series implements various improvements for the k10temp driver.
>>
>> Patch 1/5 introduces the use of bit operations.
>>
>> Patch 2/5 converts the driver to use the devm_hwmon_device_register_with_info
>> API. This not only simplifies the code and reduces its size, it also
>> makes the code easier to maintain and enhance.
>>
>> Patch 3/5 adds support for reporting Core Complex Die (CCD) temperatures
>> on Ryzen 3 (Zen2) CPUs.
>>
>> Patch 4/5 adds support for reporting core and SoC current and voltage
>> information on Ryzen CPUs.
>>
>> Patch 5/5 removes the maximum temperature from Tdie for Ryzen CPUs.
>> It is inaccurate, misleading, and it just doesn't make sense to report
>> wrong information.
>>
>> With all patches in place, output on Ryzen 3900X CPUs looks as follows
>> (with the system under load).
>>
>> k10temp-pci-00c3
>> Adapter: PCI adapter
>> Vcore: +1.36 V
>> Vsoc: +1.18 V
>> Tdie: +86.8?C
>> Tctl: +86.8?C
>> Tccd1: +80.0?C
>> Tccd2: +81.8?C
>> Icore: +44.14 A
>> Isoc: +13.83 A
>>
>> The voltage and current information is limited to Ryzen CPUs. Voltage
>> and current reporting on Threadripper and EPYC CPUs is different, and the
>> reported information is either incomplete or wrong. Exclude it for the time
>> being; it can always be added if/when more information becomes available.
>>
>> Tested with the following Ryzen CPUs:
>> 1300X A user with this CPU in the system reported somewhat unexpected
>> values for Vcore; it isn't entirely if at all clear why that is
>> the case. Overall this does not warrant holding up the series.
>> 1600
>> 1800X
>> 2200G
>> 2400G
>> 3800X
>> 3900X
>> 3950X
>>
>> v2: Added tested-by: tags as received.
>> Don't display voltage and current information for Threadripper and EPYC.
>> Stop displaying the fixed (and wrong) maximum temperature of 70 degrees C
>> for Tdie on model 17h/18h CPUs.
>>
> Just tested this on a 2400G. Here idle values:
>
> k10temp-pci-00c3
> Adapter: PCI adapter
> Vcore: +0.77 V
> Vsoc: +1.11 V
> Tdie: +45.0?C
> Tctl: +45.0?C
> Icore: +10.39 A
> Isoc: +2.89 A
>
> nvme-pci-0100
> Adapter: PCI adapter
> Composite: +43.9?C (low = -273.1?C, high = +80.8?C)
> (crit = +80.8?C)
> Sensor 1: +43.9?C (low = -273.1?C, high = +65261.8?C)
> Sensor 2: +48.9?C (low = -273.1?C, high = +65261.8?C)
>
> nct6793-isa-0290
> Adapter: ISA adapter
> in0: +0.35 V (min = +0.00 V, max = +1.74 V)
> in1: +1.85 V (min = +0.00 V, max = +0.00 V) ALARM
> in2: +3.41 V (min = +0.00 V, max = +0.00 V) ALARM
> in3: +3.39 V (min = +0.00 V, max = +0.00 V) ALARM
> in4: +0.26 V (min = +0.00 V, max = +0.00 V) ALARM
> in5: +0.14 V (min = +0.00 V, max = +0.00 V) ALARM
> in6: +0.66 V (min = +0.00 V, max = +0.00 V) ALARM
> in7: +3.39 V (min = +0.00 V, max = +0.00 V) ALARM
> in8: +3.26 V (min = +0.00 V, max = +0.00 V) ALARM
> in9: +1.83 V (min = +0.00 V, max = +0.00 V) ALARM
> in10: +0.19 V (min = +0.00 V, max = +0.00 V) ALARM
> in11: +0.14 V (min = +0.00 V, max = +0.00 V) ALARM
> in12: +1.84 V (min = +0.00 V, max = +0.00 V) ALARM
> in13: +1.72 V (min = +0.00 V, max = +0.00 V) ALARM
> in14: +0.21 V (min = +0.00 V, max = +0.00 V) ALARM
> fan1: 0 RPM (min = 0 RPM)
> fan2: 323 RPM (min = 0 RPM)
> fan3: 0 RPM (min = 0 RPM)
> fan4: 0 RPM (min = 0 RPM)
> fan5: 0 RPM (min = 0 RPM)
> SYSTIN: +112.0?C (high = +0.0?C, hyst = +0.0?C) sensor = thermistor
> CPUTIN: +60.0?C (high = +80.0?C, hyst = +75.0?C) sensor = thermistor
> AUXTIN0: +46.0?C (high = +0.0?C, hyst = +0.0?C) ALARM sensor = thermistor
> AUXTIN1: +106.0?C sensor = thermistor
> AUXTIN2: +105.0?C sensor = thermistor
> AUXTIN3: +102.0?C sensor = thermistor
> SMBUSMASTER 0: +45.0?C
> PCH_CHIP_CPU_MAX_TEMP: +0.0?C
> PCH_CHIP_TEMP: +0.0?C
> PCH_CPU_TEMP: +0.0?C
> intrusion0: OK
> intrusion1: ALARM
> beep_enable: disabled
>
> amdgpu-pci-0300
> Adapter: PCI adapter
> vddgfx: N/A
> vddnb: N/A
> edge: +45.0?C (crit = +80.0?C, hyst = +0.0?C)
>
> And here with some high load:
>
> k10temp-pci-00c3
> Adapter: PCI adapter
> Vcore: +1.32 V
> Vsoc: +1.11 V
> Tdie: +77.1?C
> Tctl: +77.1?C
> Icore: +85.22 A
> Isoc: +3.61 A
>
> nvme-pci-0100
> Adapter: PCI adapter
> Composite: +42.9?C (low = -273.1?C, high = +80.8?C)
> (crit = +80.8?C)
> Sensor 1: +42.9?C (low = -273.1?C, high = +65261.8?C)
> Sensor 2: +45.9?C (low = -273.1?C, high = +65261.8?C)
>
> nct6793-isa-0290
> Adapter: ISA adapter
> in0: +0.68 V (min = +0.00 V, max = +1.74 V)
> in1: +1.84 V (min = +0.00 V, max = +0.00 V) ALARM
> in2: +3.39 V (min = +0.00 V, max = +0.00 V) ALARM
> in3: +3.39 V (min = +0.00 V, max = +0.00 V) ALARM
> in4: +0.26 V (min = +0.00 V, max = +0.00 V) ALARM
> in5: +0.14 V (min = +0.00 V, max = +0.00 V) ALARM
> in6: +0.66 V (min = +0.00 V, max = +0.00 V) ALARM
> in7: +3.39 V (min = +0.00 V, max = +0.00 V) ALARM
> in8: +3.26 V (min = +0.00 V, max = +0.00 V) ALARM
> in9: +1.83 V (min = +0.00 V, max = +0.00 V) ALARM
> in10: +0.19 V (min = +0.00 V, max = +0.00 V) ALARM
> in11: +0.14 V (min = +0.00 V, max = +0.00 V) ALARM
> in12: +1.84 V (min = +0.00 V, max = +0.00 V) ALARM
> in13: +1.72 V (min = +0.00 V, max = +0.00 V) ALARM
> in14: +0.20 V (min = +0.00 V, max = +0.00 V) ALARM
> fan1: 0 RPM (min = 0 RPM)
> fan2: 1931 RPM (min = 0 RPM)
> fan3: 0 RPM (min = 0 RPM)
> fan4: 0 RPM (min = 0 RPM)
> fan5: 0 RPM (min = 0 RPM)
> SYSTIN: +113.0?C (high = +0.0?C, hyst = +0.0?C) sensor = thermistor
> CPUTIN: +64.5?C (high = +80.0?C, hyst = +75.0?C) sensor = thermistor
> AUXTIN0: +45.0?C (high = +0.0?C, hyst = +0.0?C) ALARM sensor = thermistor
> AUXTIN1: +107.0?C sensor = thermistor
> AUXTIN2: +105.0?C sensor = thermistor
> AUXTIN3: +102.0?C sensor = thermistor
> SMBUSMASTER 0: +77.0?C
> PCH_CHIP_CPU_MAX_TEMP: +0.0?C
> PCH_CHIP_TEMP: +0.0?C
> PCH_CPU_TEMP: +0.0?C
> intrusion0: OK
> intrusion1: ALARM
> beep_enable: disabled
>
> amdgpu-pci-0300
> Adapter: PCI adapter
> vddgfx: N/A
> vddnb: N/A
> edge: +77.0?C (crit = +80.0?C, hyst = +0.0?C)
>
> Have also tried this on a EPYC 7302. Before the patch:
>
> k10temp-pci-00c3
> Adapter: PCI adapter
> Tdie: +28.1?C (high = +70.0?C)
> Tctl: +28.1?C
>
> and after:
>
> k10temp-pci-00c3
> Adapter: PCI adapter
> Tdie: +28.2?C
> Tctl: +28.2?C
>
> No extra values shown, but I think this is expected.
>
Unfortunately yes, but it helps to confirm that the detection works.
> Tested-by Holger Kiehl <[email protected]>
>
Thanks again!
Guenter
On Sun, Jan 19, 2020 at 07:46:11AM -0800, Guenter Roeck wrote:
> On 1/19/20 2:18 AM, Jonathan McDowell wrote:
> >
> > In article <[email protected]> (earth.lists.linux-kernel) you wrote:
> > > This patch series implements various improvements for the k10temp driver.
> > ...
> > > The voltage and current information is limited to Ryzen CPUs. Voltage
> > > and current reporting on Threadripper and EPYC CPUs is different, and the
> > > reported information is either incomplete or wrong. Exclude it for the time
> > > being; it can always be added if/when more information becomes available.
> >
> > > Tested with the following Ryzen CPUs:
> >
> > Tested-By: Jonathan McDowell <[email protected]>
> >
> Thanks!
>
> > Tested on a Ryzen 7 2700 (patched on top of 5.4.13):
> >
> > | k10temp-pci-00c3
> > | Adapter: PCI adapter
> > | Vcore: +0.80 V
> > | Vsoc: +0.81 V
> > | Tdie: +37.0?C
> > | Tctl: +37.0?C
> > | Icore: +8.31 A
> > | Isoc: +6.86 A
> >
> > Like the 1300X case I see a discrepancy compared to what the nct6779
> > driver says Vcore is:
> >
> > | nct6779-isa-0290
> > | Adapter: ISA adapter
> > | Vcore: +0.33 V (min = +0.00 V, max = +1.74 V)
>
> I see that on all of my boards as well (3900X, different boards and board vendors),
> with temperatures reported by the Super-IO chip sometimes as low as 0.18V (!).
> Yet, there is a clear correlation of that voltage with CPU load.
> I suspect the measurement by the Super-IO chip is a different voltage.
>
> I don't think there is anything we can do about that without access to more
> information.
...
> The problem with Ken's board is that idle current and voltage are very high.
> The idle voltage claims to be higher than the voltage under load, which
> doesn't really make sense. This is only reflected in the voltage and current
> reported by the CPU, but not by the voltage reported by the Super-IO chip.
I see clear correlation between load/Vcore/Icore/Tdie from your patched
k10temp driver which leads me to believe these numbers are valid for the
2700. Vsoc is fairly consistent and Isoc doesn't vary much either
(6.3-8.1A range over the past 8 hours).
J.
--
... "f u cn rd ths, u cn gt a gd jb n cmptr prgrmmng." -- Simon Cozens,
ox.os.linux
On 18.01.20 at 18:26 Guenter Roeck wrote:
> This patch series implements various improvements for the k10temp driver.
Tested with Asus Prime X370-PRO / 1700X / Linux 5.4.x
Tested-By: Andreas Hartmann <[email protected]>
Idle:
it8665-isa-0290
Adapter: ISA adapter
in0: +0.88 V (min = +1.27 V, max = +2.00 V)
in1: +2.52 V (min = +2.76 V, max = +2.55 V)
in2: +2.05 V (min = +1.23 V, max = +0.16 V)
in3: +2.01 V (min = +1.53 V, max = +2.25 V)
in4: +0.03 V (min = +2.67 V, max = +2.43 V)
in5: +0.03 V (min = +2.39 V, max = +1.21 V)
in6: +0.03 V (min = +1.67 V, max = +0.56 V)
3VSB: +3.31 V (min = +2.46 V, max = +5.43 V)
Vbat: +3.25 V
+3.3V: +3.31 V
Fan CPU: 594 RPM (min = 10 RPM)
Fan back: 380 RPM (min = 11 RPM)
Fan front: 415 RPM (min = 12 RPM)
fan5: 0 RPM (min = -1 RPM) ALARM
fan6: 0 RPM (min = -1 RPM) ALARM
CPU: +28.0°C (low = +112.0°C, high = -5.0°C)
Motherboard: +30.0°C (low = -33.0°C, high = -18.0°C) sensor = thermistor
temp3: +29.0°C (low = -1.0°C, high = -105.0°C) sensor = thermistor
temp4: +29.0°C (low = -74.0°C, high = +126.0°C) sensor = thermistor
temp5: +29.0°C (low = -49.0°C, high = +127.0°C) sensor = thermistor
temp6: +29.0°C (low = -82.0°C, high = -79.0°C) sensor = thermistor
intrusion0: ALARM
k10temp-pci-00c3
Adapter: PCI adapter
Vcore: +0.90 V
Vsoc: +0.90 V
CPU: +28.6°C (high = +70.0°C)
Tctl: +48.6°C
Icore: +5.20 A
Isoc: +7.21 A
Load:
it8665-isa-0290
Adapter: ISA adapter
in0: +1.38 V (min = +1.27 V, max = +2.00 V)
in1: +2.52 V (min = +2.76 V, max = +2.55 V)
in2: +2.04 V (min = +1.23 V, max = +0.16 V)
in3: +2.01 V (min = +1.53 V, max = +2.25 V)
in4: +0.03 V (min = +2.67 V, max = +2.43 V)
in5: +0.03 V (min = +2.39 V, max = +1.21 V)
in6: +0.03 V (min = +1.67 V, max = +0.56 V)
3VSB: +3.33 V (min = +2.46 V, max = +5.43 V)
Vbat: +3.25 V
+3.3V: +3.33 V
Fan CPU: 798 RPM (min = 10 RPM)
Fan back: 481 RPM (min = 11 RPM)
Fan front: 460 RPM (min = 12 RPM)
fan5: 0 RPM (min = -1 RPM) ALARM
fan6: 0 RPM (min = -1 RPM) ALARM
CPU: +43.0°C (low = +112.0°C, high = -5.0°C)
Motherboard: +30.0°C (low = -33.0°C, high = -18.0°C) sensor = thermistor
temp3: +33.0°C (low = -1.0°C, high = -105.0°C) sensor = thermistor
temp4: +33.0°C (low = -74.0°C, high = +126.0°C) sensor = thermistor
temp5: +33.0°C (low = -49.0°C, high = +127.0°C) sensor = thermistor
temp6: +33.0°C (low = -82.0°C, high = -79.0°C) sensor = thermistor
intrusion0: ALARM
k10temp-pci-00c3
Adapter: PCI adapter
Vcore: +1.30 V
Vsoc: +0.90 V
CPU: +49.5°C (high = +70.0°C)
Tctl: +69.5°C
Icore: +77.94 A
Isoc: +9.02 A
it8665-isa-0290
Adapter: ISA adapter
in0: +1.36 V (min = +1.27 V, max = +2.00 V)
in1: +2.52 V (min = +2.76 V, max = +2.55 V)
in2: +2.04 V (min = +1.23 V, max = +0.16 V)
in3: +2.00 V (min = +1.53 V, max = +2.25 V)
in4: +0.03 V (min = +2.67 V, max = +2.43 V)
in5: +0.03 V (min = +2.39 V, max = +1.21 V)
in6: +0.03 V (min = +1.67 V, max = +0.56 V)
3VSB: +3.33 V (min = +2.46 V, max = +5.43 V)
Vbat: +3.25 V
+3.3V: +3.33 V
Fan CPU: 1044 RPM (min = 10 RPM)
Fan back: 722 RPM (min = 11 RPM)
Fan front: 633 RPM (min = 12 RPM)
fan5: 0 RPM (min = -1 RPM) ALARM
fan6: 0 RPM (min = -1 RPM) ALARM
CPU: +49.0°C (low = +112.0°C, high = -5.0°C)
Motherboard: +30.0°C (low = -33.0°C, high = -18.0°C) sensor = thermistor
temp3: +39.0°C (low = -1.0°C, high = -105.0°C) sensor = thermistor
temp4: +39.0°C (low = -74.0°C, high = +126.0°C) sensor = thermistor
temp5: +39.0°C (low = -49.0°C, high = +127.0°C) sensor = thermistor
temp6: +39.0°C (low = -82.0°C, high = -79.0°C) sensor = thermistor
intrusion0: ALARM
k10temp-pci-00c3
Adapter: PCI adapter
Vcore: +1.30 V
Vsoc: +0.90 V
CPU: +52.1°C (high = +70.0°C)
Tctl: +72.1°C
Icore: +78.98 A
Isoc: +9.02 A
=> I can see slight differences regarding CPU temperature and Vcore between it8665 and k10temp - especially on high load (kernel compile).
Thanks
Andreas