Return-Path: Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1751841AbaL3EiR (ORCPT ); Mon, 29 Dec 2014 23:38:17 -0500 Received: from LGEMRELSE7Q.lge.com ([156.147.1.151]:38010 "EHLO lgemrelse7q.lge.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1751459AbaL3EiQ (ORCPT ); Mon, 29 Dec 2014 23:38:16 -0500 X-Original-SENDERIP: 10.177.220.158 X-Original-MAILFROM: iamjoonsoo.kim@lge.com Date: Tue, 30 Dec 2014 13:38:14 +0900 From: Joonsoo Kim To: "Stefan I. Strogin" Cc: linux-mm@kvack.org, linux-kernel@vger.kernel.org, Andrew Morton , Marek Szyprowski , Michal Nazarewicz , aneesh.kumar@linux.vnet.ibm.com, Laurent Pinchart , Dmitry Safonov , Pintu Kumar , Weijie Yang , Laura Abbott , SeongJae Park , Hui Zhu , Minchan Kim , Dyasly Sergey , Vyacheslav Tyrtov Subject: Re: [PATCH 2/3] mm: cma: introduce /proc/cmainfo Message-ID: <20141230043814.GB4588@js1304-P5Q-DELUXE> References: <264ce8ad192124f2afec9a71a2fc28779d453ba7.1419602920.git.s.strogin@partner.samsung.com> MIME-Version: 1.0 Content-Type: text/plain; charset=us-ascii Content-Disposition: inline In-Reply-To: <264ce8ad192124f2afec9a71a2fc28779d453ba7.1419602920.git.s.strogin@partner.samsung.com> User-Agent: Mutt/1.5.21 (2010-09-15) Sender: linux-kernel-owner@vger.kernel.org List-ID: X-Mailing-List: linux-kernel@vger.kernel.org On Fri, Dec 26, 2014 at 05:39:03PM +0300, Stefan I. Strogin wrote: > /proc/cmainfo contains a list of currently allocated CMA buffers for every > CMA area when CONFIG_CMA_DEBUG is enabled. Hello, I think that providing these information looks useful, but, we need better implementation. As Laura said, it is better to use debugfs. And, instead of re-implementing the wheel, how about using tracepoint to print these information? See below comments. > > Format is: > > - ( kB), allocated by \ > (), latency us > > > Signed-off-by: Stefan I. Strogin > --- > mm/cma.c | 202 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ > 1 file changed, 202 insertions(+) > > diff --git a/mm/cma.c b/mm/cma.c > index a85ae28..ffaea26 100644 > --- a/mm/cma.c > +++ b/mm/cma.c > @@ -34,6 +34,10 @@ > #include > #include > #include > +#include > +#include > +#include > +#include > > struct cma { > unsigned long base_pfn; > @@ -41,8 +45,25 @@ struct cma { > unsigned long *bitmap; > unsigned int order_per_bit; /* Order of pages represented by one bit */ > struct mutex lock; > +#ifdef CONFIG_CMA_DEBUG > + struct list_head buffers_list; > + struct mutex list_lock; > +#endif > }; > > +#ifdef CONFIG_CMA_DEBUG > +struct cma_buffer { > + unsigned long pfn; > + unsigned long count; > + pid_t pid; > + char comm[TASK_COMM_LEN]; > + unsigned int latency; > + unsigned long trace_entries[16]; > + unsigned int nr_entries; > + struct list_head list; > +}; > +#endif > + > static struct cma cma_areas[MAX_CMA_AREAS]; > static unsigned cma_area_count; > static DEFINE_MUTEX(cma_mutex); > @@ -132,6 +153,10 @@ static int __init cma_activate_area(struct cma *cma) > } while (--i); > > mutex_init(&cma->lock); > +#ifdef CONFIG_CMA_DEBUG > + INIT_LIST_HEAD(&cma->buffers_list); > + mutex_init(&cma->list_lock); > +#endif > return 0; > > err: > @@ -347,6 +372,86 @@ err: > return ret; > } > > +#ifdef CONFIG_CMA_DEBUG > +/** > + * cma_buffer_list_add() - add a new entry to a list of allocated buffers > + * @cma: Contiguous memory region for which the allocation is performed. > + * @pfn: Base PFN of the allocated buffer. > + * @count: Number of allocated pages. > + * @latency: Nanoseconds spent to allocate the buffer. > + * > + * This function adds a new entry to the list of allocated contiguous memory > + * buffers in a CMA area. It uses the CMA area specificated by the device > + * if available or the default global one otherwise. > + */ > +static int cma_buffer_list_add(struct cma *cma, unsigned long pfn, > + int count, s64 latency) > +{ > + struct cma_buffer *cmabuf; > + struct stack_trace trace; > + > + cmabuf = kmalloc(sizeof(struct cma_buffer), GFP_KERNEL); > + if (!cmabuf) > + return -ENOMEM; > + > + trace.nr_entries = 0; > + trace.max_entries = ARRAY_SIZE(cmabuf->trace_entries); > + trace.entries = &cmabuf->trace_entries[0]; > + trace.skip = 2; > + save_stack_trace(&trace); > + > + cmabuf->pfn = pfn; > + cmabuf->count = count; > + cmabuf->pid = task_pid_nr(current); > + cmabuf->nr_entries = trace.nr_entries; > + get_task_comm(cmabuf->comm, current); > + cmabuf->latency = (unsigned int) div_s64(latency, NSEC_PER_USEC); > + > + mutex_lock(&cma->list_lock); > + list_add_tail(&cmabuf->list, &cma->buffers_list); > + mutex_unlock(&cma->list_lock); > + > + return 0; > +} > + > +/** > + * cma_buffer_list_del() - delete an entry from a list of allocated buffers > + * @cma: Contiguous memory region for which the allocation was performed. > + * @pfn: Base PFN of the released buffer. > + * > + * This function deletes a list entry added by cma_buffer_list_add(). > + */ > +static void cma_buffer_list_del(struct cma *cma, unsigned long pfn) > +{ > + struct cma_buffer *cmabuf; > + > + mutex_lock(&cma->list_lock); > + > + list_for_each_entry(cmabuf, &cma->buffers_list, list) > + if (cmabuf->pfn == pfn) { > + list_del(&cmabuf->list); > + kfree(cmabuf); > + goto out; > + } > + Is there more elegant way to find buffer? This linear search overhead would change system behaviour if there are lots of buffers. > + pr_err("%s(pfn %lu): couldn't find buffers list entry\n", > + __func__, pfn); > + > +out: > + mutex_unlock(&cma->list_lock); > +} > +#else > +static int cma_buffer_list_add(struct cma *cma, unsigned long pfn, > + int count, s64 latency) > +{ > + return 0; > +} > + > +static void cma_buffer_list_del(struct cma *cma, unsigned long pfn) > +{ > +} > +#endif /* CONFIG_CMA_DEBUG */ > + > /** > * cma_alloc() - allocate pages from contiguous area > * @cma: Contiguous memory region for which the allocation is performed. > @@ -361,11 +466,15 @@ struct page *cma_alloc(struct cma *cma, int count, unsigned int align) > unsigned long mask, offset, pfn, start = 0; > unsigned long bitmap_maxno, bitmap_no, bitmap_count; > struct page *page = NULL; > + struct timespec ts1, ts2; > + s64 latency; > int ret; > > if (!cma || !cma->count) > return NULL; > > + getnstimeofday(&ts1); > + > pr_debug("%s(cma %p, count %d, align %d)\n", __func__, (void *)cma, > count, align); > > @@ -413,6 +522,19 @@ struct page *cma_alloc(struct cma *cma, int count, unsigned int align) > start = bitmap_no + mask + 1; > } > > + getnstimeofday(&ts2); > + latency = timespec_to_ns(&ts2) - timespec_to_ns(&ts1); > + > + if (page) { > + ret = cma_buffer_list_add(cma, pfn, count, latency); > + if (ret) { > + pr_warn("%s(): cma_buffer_list_add() returned %d\n", > + __func__, ret); > + cma_release(cma, page, count); > + page = NULL; > + } So, we would fail to allocate CMA memory if we can't allocate buffer for debugging. I don't think it makes sense. With tracepoint, we don't need to allocate buffer in runtime. Thanks. -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/