Date: Tue, 8 Mar 2016 10:22:39 +0100 (CET)
From: Thomas Gleixner <tglx@linutronix.de>
To: Vikas Shivappa <vikas.shivappa@linux.intel.com>
cc: vikas.shivappa@intel.com, linux-kernel@vger.kernel.org, x86@kernel.org,
        hpa@zytor.com, mingo@kernel.org, peterz@infradead.org,
        ravi.v.shankar@intel.com, tony.luck@intel.com, fenghua.yu@intel.com,
        h.peter.anvin@intel.com
Subject: Re: [PATCH 2/6] x86,perf/cqm: Fix cqm memory leak and notifier
 leak
In-Reply-To: <1456962839-29170-1-git-send-email-vikas.shivappa@linux.intel.com>
Message-ID: <alpine.DEB.2.11.1603081012170.6083@nanos>
References: <1456876108-28770-3-git-send-email-vikas.shivappa@linux.intel.com> <1456962839-29170-1-git-send-email-vikas.shivappa@linux.intel.com>
User-Agent: Alpine 2.11 (DEB 23 2013-08-11)
MIME-Version: 1.0
Content-Type: TEXT/PLAIN; charset=US-ASCII
Sender: linux-kernel-owner@vger.kernel.org
Content-Length: 4465
Lines: 183

On Wed, 2 Mar 2016, Vikas Shivappa wrote:

Please fix the subject line prefix: "x86/perf/intel/cqm:"

> Fixes the hotcpu notifier leak and other global variable memory leaks
> during cqm(cache quality of service monitoring) initialization.
> 
> Reviewed-by: Tony Luck <tony.luck@intel.com>
> Signed-off-by: Vikas Shivappa <vikas.shivappa@linux.intel.com>
> ---
> 
> Fixed the memory leak for cqm_rmid_ptrs as per Thomas feedback.
>   
>  arch/x86/kernel/cpu/perf_event_intel_cqm.c | 34 ++++++++++++++++++++++++------
>  1 file changed, 28 insertions(+), 6 deletions(-)
> 
> diff --git a/arch/x86/kernel/cpu/perf_event_intel_cqm.c b/arch/x86/kernel/cpu/perf_event_intel_cqm.c
> index e6be335..37a93fa 100644
> --- a/arch/x86/kernel/cpu/perf_event_intel_cqm.c
> +++ b/arch/x86/kernel/cpu/perf_event_intel_cqm.c
> @@ -250,10 +250,13 @@ static int intel_cqm_setup_rmid_cache(void)
>  
>  	return 0;
>  fail:
> -	while (r--)
> +	while (r--) {
>  		kfree(cqm_rmid_ptrs[r]);
> +		cqm_rmid_ptrs[r] = NULL;
> +	}
>  
>  	kfree(cqm_rmid_ptrs);
> +	cqm_rmid_ptrs = NULL;

This partial rollback is crap. Why can't you utilize cqm_cleanup() ?
Performance certainly is not an issue here.

>  	return -ENOMEM;
>  }
>  
> @@ -1320,9 +1323,19 @@ static const struct x86_cpu_id intel_cqm_match[] = {
>  	{}
>  };
>  
> +static void cqm_cleanup(void)
> +{
> +	int r = cqm_max_rmid;
> +
> +	while (r--)
> +		kfree(cqm_rmid_ptrs[r]);
> +
> +	kfree(cqm_rmid_ptrs);
> +}
> +
>  static int __init intel_cqm_init(void)
>  {
> -	char *str, scale[20];
> +	char *str = NULL, scale[20];
>  	int i, cpu, ret;
>  
>  	if (!x86_match_cpu(intel_cqm_match))
> @@ -1382,16 +1395,25 @@ static int __init intel_cqm_init(void)
>  		cqm_pick_event_reader(i);
>  	}
>  
> -	__perf_cpu_notifier(intel_cqm_cpu_notifier);
> -
>  	ret = perf_pmu_register(&intel_cqm_pmu, "intel_cqm", -1);
> -	if (ret)
> +	if (ret) {
>  		pr_err("Intel CQM perf registration failed: %d\n", ret);
> -	else
> +		goto out;
> +	} else {
>  		pr_info("Intel CQM monitoring enabled\n");
> +	}
>  
> +	/*
> +	 * Register the hot cpu notifier once we are sure cqm
> +	 * is enabled to avoid notifier leak.
> +	 */
> +	__perf_cpu_notifier(intel_cqm_cpu_notifier);
>  out:
>  	cpu_notifier_register_done();
> +	if (ret) {
> +		kfree(str);
> +		cqm_cleanup();

This is still broken. If intel_cqm_setup_rmid_cache() fails, you clear
cqm_rmid_ptrs and then jump to out. ret is -ENOMEM, so you will call
cqm_cleanup which will dereference cqm_rmid_ptrs .....

Find below a delta patch, which fixes that proper and safe.

Thanks,

	tglx

8<----------------

 arch/x86/events/intel/cqm.c |   40 +++++++++++++++++++---------------------
 1 file changed, 19 insertions(+), 21 deletions(-)

--- a/arch/x86/events/intel/cqm.c
+++ b/arch/x86/events/intel/cqm.c
@@ -211,6 +211,20 @@ static void __put_rmid(u32 rmid)
 	list_add_tail(&entry->list, &cqm_rmid_limbo_lru);
 }
 
+static void cqm_cleanup(void)
+{
+	int i;
+
+	if (!cqm_rmid_ptrs)
+		return;
+
+	for (i = 0; i < cqm_max_rmid; i++)
+		kfree(cqm_rmid_ptrs[i]);
+
+	kfree(cqm_rmid_ptrs);
+	cqm_rmid_ptrs = NULL;
+}
+
 static int intel_cqm_setup_rmid_cache(void)
 {
 	struct cqm_rmid_entry *entry;
@@ -218,7 +232,7 @@ static int intel_cqm_setup_rmid_cache(vo
 	int r = 0;
 
 	nr_rmids = cqm_max_rmid + 1;
-	cqm_rmid_ptrs = kmalloc(sizeof(struct cqm_rmid_entry *) *
+	cqm_rmid_ptrs = kzalloc(sizeof(struct cqm_rmid_entry *) *
 				nr_rmids, GFP_KERNEL);
 	if (!cqm_rmid_ptrs)
 		return -ENOMEM;
@@ -247,16 +261,10 @@ static int intel_cqm_setup_rmid_cache(vo
 	mutex_lock(&cache_mutex);
 	intel_cqm_rotation_rmid = __get_rmid();
 	mutex_unlock(&cache_mutex);
-
 	return 0;
-fail:
-	while (r--) {
-		kfree(cqm_rmid_ptrs[r]);
-		cqm_rmid_ptrs[r] = NULL;
-	}
 
-	kfree(cqm_rmid_ptrs);
-	cqm_rmid_ptrs = NULL;
+fail:
+	cqm_cleanup();
 	return -ENOMEM;
 }
 
@@ -1313,16 +1321,6 @@ static const struct x86_cpu_id intel_cqm
 	{}
 };
 
-static void cqm_cleanup(void)
-{
-	int r = cqm_max_rmid;
-
-	while (r--)
-		kfree(cqm_rmid_ptrs[r]);
-
-	kfree(cqm_rmid_ptrs);
-}
-
 static int __init intel_cqm_init(void)
 {
 	char *str = NULL, scale[20];
@@ -1389,10 +1387,10 @@ static int __init intel_cqm_init(void)
 	if (ret) {
 		pr_err("Intel CQM perf registration failed: %d\n", ret);
 		goto out;
-	} else {
-		pr_info("Intel CQM monitoring enabled\n");
 	}
 
+	pr_info("Intel CQM monitoring enabled\n");
+
 	/*
 	 * Register the hot cpu notifier once we are sure cqm
 	 * is enabled to avoid notifier leak.