changes since v3:
- patch 3: "-Exattr-name-filter" option rather than "--xattr-filter"
option is newly introduced to enable this feature (Gao Xiang)
changes since v2:
- patch 2: introduce xattr_filter_reserved in on-disk superblock;
remove EROFS_XATTR_FILTER_MASK
- patch 3: xattr_filter_reserved is always initialized to 0 by default
changes since RFC:
- the number of hash functions is 1, and now it's implemented as:
xxh32(name, strlen(name), EROFS_XATTR_FILTER_SEED + index),
where the constant magic number EROFS_XATTR_FILTER_SEED [*] is used to
give a better spread for the mapping. (Alexander Larsson)
- fix the value of EROFS_FEATURE_COMPAT_XATTR_FILTER; rename
EROFS_XATTR_BLOOM_* to EROFS_XATTR_FILTER_* (Gao Xiang)
RFC: https://lore.kernel.org/all/[email protected]/
v2: https://lore.kernel.org/all/[email protected]/
v3: https://lore.kernel.org/all/[email protected]/
The xattr bloom filter feature is used to boost the negative xattr
lookup.
Refer to the kernel patch set [*] for more details.
[*] https://lore.kernel.org/all/[email protected]/
Jingbo Xu (3):
erofs-utils: add xxh32 library
erofs-utils: update on-disk format for xattr name filter
erofs-utils: mkfs: enable xattr name filter
include/erofs/config.h | 1 +
include/erofs/internal.h | 1 +
include/erofs/xxhash.h | 35 +++++++++++++++++
include/erofs_fs.h | 12 +++++-
lib/Makefile.am | 3 +-
lib/xattr.c | 74 ++++++++++++++++++++++++++--------
lib/xxhash.c | 85 ++++++++++++++++++++++++++++++++++++++++
mkfs/main.c | 7 ++++
8 files changed, 199 insertions(+), 19 deletions(-)
create mode 100644 include/erofs/xxhash.h
create mode 100644 lib/xxhash.c
--
2.19.1.6.gb485710b
The xattr name bloom filter feature is going to be introduced to speed
up the negative xattr lookup, e.g. system.posix_acl_[access|default]
lookup when running "ls -lR" workload.
There are some commonly used extended attributes (n) and the total
number of these is approximately 30.
trusted.overlay.opaque
trusted.overlay.redirect
trusted.overlay.origin
trusted.overlay.impure
trusted.overlay.nlink
trusted.overlay.upper
trusted.overlay.metacopy
trusted.overlay.protattr
user.overlay.opaque
user.overlay.redirect
user.overlay.origin
user.overlay.impure
user.overlay.nlink
user.overlay.upper
user.overlay.metacopy
user.overlay.protattr
security.evm
security.ima
security.selinux
security.SMACK64
security.SMACK64IPIN
security.SMACK64IPOUT
security.SMACK64EXEC
security.SMACK64TRANSMUTE
security.SMACK64MMAP
security.apparmor
security.capability
system.posix_acl_access
system.posix_acl_default
user.mime_type
Given the number of bits of the bloom filter (m) is 32, the optimal
value for the number of the hash functions (k) is 1 (ln2 * m/n = 0.74).
The single hash function is implemented as:
xxh32(name, strlen(name), EROFS_XATTR_FILTER_SEED + index)
where `index` represents the index of corresponding predefined short name
prefix, while `name` represents the name string after stripping the above
predefined name prefix.
The constant magic number EROFS_XATTR_FILTER_SEED, i.e. 0x25BBE08F, is
used to give a better spread when mapping these 30 extended attributes
into 32-bit bloom filter as:
bit 0: security.ima
bit 1:
bit 2: trusted.overlay.nlink
bit 3:
bit 4: user.overlay.nlink
bit 5: trusted.overlay.upper
bit 6: user.overlay.origin
bit 7: trusted.overlay.protattr
bit 8: security.apparmor
bit 9: user.overlay.protattr
bit 10: user.overlay.opaque
bit 11: security.selinux
bit 12: security.SMACK64TRANSMUTE
bit 13: security.SMACK64
bit 14: security.SMACK64MMAP
bit 15: user.overlay.impure
bit 16: security.SMACK64IPIN
bit 17: trusted.overlay.redirect
bit 18: trusted.overlay.origin
bit 19: security.SMACK64IPOUT
bit 20: trusted.overlay.opaque
bit 21: system.posix_acl_default
bit 22:
bit 23: user.mime_type
bit 24: trusted.overlay.impure
bit 25: security.SMACK64EXEC
bit 26: user.overlay.redirect
bit 27: user.overlay.upper
bit 28: security.evm
bit 29: security.capability
bit 30: system.posix_acl_access
bit 31: trusted.overlay.metacopy, user.overlay.metacopy
h_name_filter is introduced to the on-disk per-inode xattr header to
place the corresponding xattr name filter, where bit value 1 indicates
non-existence for compatibility.
This feature is indicated by EROFS_FEATURE_COMPAT_XATTR_FILTER
compatible feature bit.
Reserve one byte in on-disk superblock as the on-disk format for xattr
name filter may change in the future. With this flag we don't need
bothering these compatible bits again at that time.
Suggested-by: Alexander Larsson <[email protected]>
Signed-off-by: Jingbo Xu <[email protected]>
---
include/erofs_fs.h | 12 ++++++++++--
1 file changed, 10 insertions(+), 2 deletions(-)
diff --git a/include/erofs_fs.h b/include/erofs_fs.h
index 3697882..1789a37 100644
--- a/include/erofs_fs.h
+++ b/include/erofs_fs.h
@@ -14,6 +14,7 @@
#define EROFS_FEATURE_COMPAT_SB_CHKSUM 0x00000001
#define EROFS_FEATURE_COMPAT_MTIME 0x00000002
+#define EROFS_FEATURE_COMPAT_XATTR_FILTER 0x00000004
/*
* Any bits that aren't in EROFS_ALL_FEATURE_INCOMPAT should
@@ -82,7 +83,8 @@ struct erofs_super_block {
__u8 xattr_prefix_count; /* # of long xattr name prefixes */
__le32 xattr_prefix_start; /* start of long xattr prefixes */
__le64 packed_nid; /* nid of the special packed inode */
- __u8 reserved2[24];
+ __u8 xattr_filter_reserved; /* reserved for xattr name filter */
+ __u8 reserved2[23];
};
/*
@@ -201,7 +203,7 @@ struct erofs_inode_extended {
* for read-only fs, no need to introduce h_refcount
*/
struct erofs_xattr_ibody_header {
- __le32 h_reserved;
+ __le32 h_name_filter; /* bit value 1 indicates not-present */
__u8 h_shared_count;
__u8 h_reserved2[7];
__le32 h_shared_xattrs[0]; /* shared xattr id array */
@@ -222,6 +224,12 @@ struct erofs_xattr_ibody_header {
#define EROFS_XATTR_LONG_PREFIX 0x80
#define EROFS_XATTR_LONG_PREFIX_MASK 0x7f
+#define EROFS_XATTR_NAME_LEN_MAX UCHAR_MAX
+
+#define EROFS_XATTR_FILTER_BITS 32
+#define EROFS_XATTR_FILTER_DEFAULT UINT32_MAX
+#define EROFS_XATTR_FILTER_SEED 0x25BBE08F
+
/* xattr entry (for both inline & shared xattrs) */
struct erofs_xattr_entry {
__u8 e_name_len; /* length of name */
--
2.19.1.6.gb485710b
Add xxh32 library which could be used by following xattr bloom filter
feature.
Signed-off-by: Jingbo Xu <[email protected]>
---
include/erofs/xxhash.h | 35 +++++++++++++++++
lib/Makefile.am | 3 +-
lib/xxhash.c | 85 ++++++++++++++++++++++++++++++++++++++++++
3 files changed, 122 insertions(+), 1 deletion(-)
create mode 100644 include/erofs/xxhash.h
create mode 100644 lib/xxhash.c
diff --git a/include/erofs/xxhash.h b/include/erofs/xxhash.h
new file mode 100644
index 0000000..fd9384e
--- /dev/null
+++ b/include/erofs/xxhash.h
@@ -0,0 +1,35 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __EROFS_XXHASH_H
+#define __EROFS_XXHASH_H
+
+#ifdef __cplusplus
+extern "C"
+{
+#endif
+
+#include "defs.h"
+
+/*
+ * Copied from
+ * https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
+ * xxHash - Extremely Fast Hash algorithm
+ */
+
+/**
+ * xxh32() - calculate the 32-bit hash of the input with a given seed.
+ *
+ * @input: The data to hash.
+ * @length: The length of the data to hash.
+ * @seed: The seed can be used to alter the result predictably.
+ *
+ * Speed on Core 2 Duo @ 3 GHz (single thread, SMHasher benchmark) : 5.4 GB/s
+ *
+ * Return: The 32-bit hash of the data.
+ */
+uint32_t xxh32(const void *input, size_t length, uint32_t seed);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/lib/Makefile.am b/lib/Makefile.am
index faa7311..a049af6 100644
--- a/lib/Makefile.am
+++ b/lib/Makefile.am
@@ -23,13 +23,14 @@ noinst_HEADERS = $(top_srcdir)/include/erofs_fs.h \
$(top_srcdir)/include/erofs/xattr.h \
$(top_srcdir)/include/erofs/compress_hints.h \
$(top_srcdir)/include/erofs/fragments.h \
+ $(top_srcdir)/include/erofs/xxhash.h \
$(top_srcdir)/lib/liberofs_private.h
noinst_HEADERS += compressor.h
liberofs_la_SOURCES = config.c io.c cache.c super.c inode.c xattr.c exclude.c \
namei.c data.c compress.c compressor.c zmap.c decompress.c \
compress_hints.c hashmap.c sha256.c blobchunk.c dir.c \
- fragments.c rb_tree.c dedupe.c
+ fragments.c rb_tree.c dedupe.c xxhash.c
liberofs_la_CFLAGS = -Wall -I$(top_srcdir)/include
if ENABLE_LZ4
diff --git a/lib/xxhash.c b/lib/xxhash.c
new file mode 100644
index 0000000..e5f511c
--- /dev/null
+++ b/lib/xxhash.c
@@ -0,0 +1,85 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copied from
+ * https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
+ * xxHash - Extremely Fast Hash algorithm
+ */
+#include "erofs/xxhash.h"
+
+/*-*************************************
+ * Macros
+ **************************************/
+#define xxh_rotl32(x, r) ((x << r) | (x >> (32 - r)))
+
+/*-*************************************
+ * Constants
+ **************************************/
+static const uint32_t PRIME32_1 = 2654435761U;
+static const uint32_t PRIME32_2 = 2246822519U;
+static const uint32_t PRIME32_3 = 3266489917U;
+static const uint32_t PRIME32_4 = 668265263U;
+static const uint32_t PRIME32_5 = 374761393U;
+
+/*-***************************
+ * Simple Hash Functions
+ ****************************/
+static uint32_t xxh32_round(uint32_t seed, const uint32_t input)
+{
+ seed += input * PRIME32_2;
+ seed = xxh_rotl32(seed, 13);
+ seed *= PRIME32_1;
+ return seed;
+}
+
+uint32_t xxh32(const void *input, const size_t len, const uint32_t seed)
+{
+ const uint8_t *p = (const uint8_t *)input;
+ const uint8_t *b_end = p + len;
+ uint32_t h32;
+
+ if (len >= 16) {
+ const uint8_t *const limit = b_end - 16;
+ uint32_t v1 = seed + PRIME32_1 + PRIME32_2;
+ uint32_t v2 = seed + PRIME32_2;
+ uint32_t v3 = seed + 0;
+ uint32_t v4 = seed - PRIME32_1;
+
+ do {
+ v1 = xxh32_round(v1, get_unaligned_le32(p));
+ p += 4;
+ v2 = xxh32_round(v2, get_unaligned_le32(p));
+ p += 4;
+ v3 = xxh32_round(v3, get_unaligned_le32(p));
+ p += 4;
+ v4 = xxh32_round(v4, get_unaligned_le32(p));
+ p += 4;
+ } while (p <= limit);
+
+ h32 = xxh_rotl32(v1, 1) + xxh_rotl32(v2, 7) +
+ xxh_rotl32(v3, 12) + xxh_rotl32(v4, 18);
+ } else {
+ h32 = seed + PRIME32_5;
+ }
+
+ h32 += (uint32_t)len;
+
+ while (p + 4 <= b_end) {
+ h32 += get_unaligned_le32(p) * PRIME32_3;
+ h32 = xxh_rotl32(h32, 17) * PRIME32_4;
+ p += 4;
+ }
+
+ while (p < b_end) {
+ h32 += (*p) * PRIME32_5;
+ h32 = xxh_rotl32(h32, 11) * PRIME32_1;
+ p++;
+ }
+
+ h32 ^= h32 >> 15;
+ h32 *= PRIME32_2;
+ h32 ^= h32 >> 13;
+ h32 *= PRIME32_3;
+ h32 ^= h32 >> 16;
+
+ return h32;
+}
--
2.19.1.6.gb485710b