Return-Path: Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S932794AbcCCRFU (ORCPT ); Thu, 3 Mar 2016 12:05:20 -0500 Received: from mga11.intel.com ([192.55.52.93]:46709 "EHLO mga11.intel.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1758161AbcCCQwk (ORCPT ); Thu, 3 Mar 2016 11:52:40 -0500 X-ExtLoop1: 1 X-IronPort-AV: E=Sophos;i="5.22,532,1449561600"; d="scan'208";a="59242012" From: "Kirill A. Shutemov" To: Hugh Dickins , Andrea Arcangeli , Andrew Morton Cc: Dave Hansen , Vlastimil Babka , Christoph Lameter , Naoya Horiguchi , Jerome Marchand , Yang Shi , Sasha Levin , linux-kernel@vger.kernel.org, linux-mm@kvack.org, "Kirill A. Shutemov" Subject: [PATCHv3 26/29] shmem: prepare huge= mount option and sysfs knob Date: Thu, 3 Mar 2016 19:52:16 +0300 Message-Id: <1457023939-98083-27-git-send-email-kirill.shutemov@linux.intel.com> X-Mailer: git-send-email 2.7.0 In-Reply-To: <1457023939-98083-1-git-send-email-kirill.shutemov@linux.intel.com> References: <1457023939-98083-1-git-send-email-kirill.shutemov@linux.intel.com> Sender: linux-kernel-owner@vger.kernel.org List-ID: X-Mailing-List: linux-kernel@vger.kernel.org Content-Length: 8657 Lines: 296 This patch adds new mount option "huge=". It can have following values: - "always": Attempt to allocate huge pages every time we need a new page; - "never": Do not allocate huge pages; - "within_size": Only allocate huge page if it will be fully within i_size. Also respect fadvise()/madvise() hints; - "advise: Only allocate huge pages if requested with fadvise()/madvise(); Default is "never" for now. "mount -o remount,huge= /mountpoint" works fine after mount: remounting huge=never will not attempt to break up huge pages at all, just stop more from being allocated. No new config option: put this under CONFIG_TRANSPARENT_HUGEPAGE, which is the appropriate option to protect those who don't want the new bloat, and with which we shall share some pmd code. Prohibit the option when !CONFIG_TRANSPARENT_HUGEPAGE, just as mpol is invalid without CONFIG_NUMA (was hidden in mpol_parse_str(): make it explicit). Allow enabling THP only if the machine has_transparent_hugepage(). But what about Shmem with no user-visible mount? SysV SHM, memfds, shared anonymous mmaps (of /dev/zero or MAP_ANONYMOUS), GPU drivers' DRM objects, Ashmem. Though unlikely to suit all usages, provide sysfs knob /sys/kernel/mm/transparent_hugepage/shmem_enabled to experiment with huge on those. And allow shmem_enabled two further values: - "deny": For use in emergencies, to force the huge option off from all mounts; - "force": Force the huge option on for all - very useful for testing; Based on patch by Hugh Dickins. Signed-off-by: Kirill A. Shutemov --- include/linux/huge_mm.h | 2 + include/linux/shmem_fs.h | 3 +- mm/huge_memory.c | 3 + mm/shmem.c | 152 +++++++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 159 insertions(+), 1 deletion(-) diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h index c958e3db0a0e..ac6dc46dc65a 100644 --- a/include/linux/huge_mm.h +++ b/include/linux/huge_mm.h @@ -42,6 +42,8 @@ enum transparent_hugepage_flag { #endif }; +extern struct kobj_attribute shmem_enabled_attr; + #define HPAGE_PMD_ORDER (HPAGE_PMD_SHIFT-PAGE_SHIFT) #define HPAGE_PMD_NR (1<gid = make_kgid(current_user_ns(), gid); if (!gid_valid(sbinfo->gid)) goto bad_val; +#ifdef CONFIG_TRANSPARENT_HUGEPAGE + } else if (!strcmp(this_char, "huge")) { + int huge; + huge = shmem_parse_huge(value); + if (huge < 0) + goto bad_val; + if (!has_transparent_hugepage() && + huge != SHMEM_HUGE_NEVER) + goto bad_val; + sbinfo->huge = huge; +#endif +#ifdef CONFIG_NUMA } else if (!strcmp(this_char,"mpol")) { mpol_put(mpol); mpol = NULL; if (mpol_parse_str(value, &mpol)) goto bad_val; +#endif } else { printk(KERN_ERR "tmpfs: Bad mount option %s\n", this_char); @@ -2966,6 +3060,7 @@ static int shmem_remount_fs(struct super_block *sb, int *flags, char *data) goto out; error = 0; + sbinfo->huge = config.huge; sbinfo->max_blocks = config.max_blocks; sbinfo->max_inodes = config.max_inodes; sbinfo->free_inodes = config.max_inodes - inodes; @@ -2999,6 +3094,11 @@ static int shmem_show_options(struct seq_file *seq, struct dentry *root) if (!gid_eq(sbinfo->gid, GLOBAL_ROOT_GID)) seq_printf(seq, ",gid=%u", from_kgid_munged(&init_user_ns, sbinfo->gid)); +#ifdef CONFIG_TRANSPARENT_HUGEPAGE + /* Rightly or wrongly, show huge mount option unmasked by shmem_huge */ + if (sbinfo->huge) + seq_printf(seq, ",huge=%s", shmem_format_huge(sbinfo->huge)); +#endif shmem_show_mpol(seq, sbinfo->mpol); return 0; } @@ -3347,6 +3447,58 @@ out3: return error; } +#if defined(CONFIG_TRANSPARENT_HUGEPAGE) && defined(CONFIG_SYSFS) +static ssize_t shmem_enabled_show(struct kobject *kobj, + struct kobj_attribute *attr, char *buf) +{ + int values[] = { + SHMEM_HUGE_ALWAYS, + SHMEM_HUGE_WITHIN_SIZE, + SHMEM_HUGE_ADVISE, + SHMEM_HUGE_NEVER, + SHMEM_HUGE_DENY, + SHMEM_HUGE_FORCE, + }; + int i, count; + + for (i = 0, count = 0; i < ARRAY_SIZE(values); i++) { + const char *fmt = shmem_huge == values[i] ? "[%s] " : "%s "; + + count += sprintf(buf + count, fmt, + shmem_format_huge(values[i])); + } + buf[count - 1] = '\n'; + return count; +} + +static ssize_t shmem_enabled_store(struct kobject *kobj, + struct kobj_attribute *attr, const char *buf, size_t count) +{ + char tmp[16]; + int huge; + + if (count + 1 > sizeof(tmp)) + return -EINVAL; + memcpy(tmp, buf, count); + tmp[count] = '\0'; + if (count && tmp[count - 1] == '\n') + tmp[count - 1] = '\0'; + + huge = shmem_parse_huge(tmp); + if (huge == -EINVAL) + return -EINVAL; + if (!has_transparent_hugepage() && + huge != SHMEM_HUGE_NEVER && huge != SHMEM_HUGE_DENY) + return -EINVAL; + + shmem_huge = huge; + return count; +} + +struct kobj_attribute shmem_enabled_attr = + __ATTR(shmem_enabled, 0644, shmem_enabled_show, shmem_enabled_store); +#endif /* CONFIG_TRANSPARENT_HUGEPAGE && CONFIG_SYSFS */ + #else /* !CONFIG_SHMEM */ /* -- 2.7.0