by Jeffrey Layton

[permalink] [raw]

Subject: [PATCH v12 54/54] ceph: fscrypt support for writepages

Add the appropriate machinery to write back dirty data with encryption.

Signed-off-by: Jeff Layton <[email protected]>
---
fs/ceph/addr.c | 62 ++++++++++++++++++++++++++++++++++++++----------
fs/ceph/crypto.h | 18 +++++++++++++-
2 files changed, 67 insertions(+), 13 deletions(-)

diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c
index 403e7a960a4e..cc4f561bd03c 100644
--- a/fs/ceph/addr.c
+++ b/fs/ceph/addr.c
@@ -556,10 +556,12 @@ static u64 get_writepages_data_length(struct inode *inode,
struct page *page, u64 start)
{
struct ceph_inode_info *ci = ceph_inode(inode);
- struct ceph_snap_context *snapc = page_snap_context(page);
+ struct ceph_snap_context *snapc;
struct ceph_cap_snap *capsnap = NULL;
u64 end = i_size_read(inode);
+ u64 ret;

+ snapc = page_snap_context(ceph_fscrypt_pagecache_page(page));
if (snapc != ci->i_head_snapc) {
bool found = false;
spin_lock(&ci->i_ceph_lock);
@@ -574,9 +576,12 @@ static u64 get_writepages_data_length(struct inode *inode,
spin_unlock(&ci->i_ceph_lock);
WARN_ON(!found);
}
- if (end > page_offset(page) + thp_size(page))
- end = page_offset(page) + thp_size(page);
- return end > start ? end - start : 0;
+ if (end > ceph_fscrypt_page_offset(page) + thp_size(page))
+ end = ceph_fscrypt_page_offset(page) + thp_size(page);
+ ret = end > start ? end - start : 0;
+ if (ret && fscrypt_is_bounce_page(page))
+ ret = round_up(ret, CEPH_FSCRYPT_BLOCK_SIZE);
+ return ret;
}

/*
@@ -792,6 +797,11 @@ static void writepages_finish(struct ceph_osd_request *req)
total_pages += num_pages;
for (j = 0; j < num_pages; j++) {
page = osd_data->pages[j];
+ if (fscrypt_is_bounce_page(page)) {
+ page = fscrypt_pagecache_page(page);
+ fscrypt_free_bounce_page(osd_data->pages[j]);
+ osd_data->pages[j] = page;
+ }
BUG_ON(!page);
WARN_ON(!PageUptodate(page));

@@ -1050,10 +1060,28 @@ static int ceph_writepages_start(struct address_space *mapping,
BLK_RW_ASYNC);
}

+ if (IS_ENCRYPTED(inode)) {
+ pages[locked_pages] =
+ fscrypt_encrypt_pagecache_blocks(page,
+ PAGE_SIZE, 0,
+ locked_pages ? GFP_NOWAIT : GFP_NOFS);
+ if (IS_ERR(pages[locked_pages])) {
+ if (PTR_ERR(pages[locked_pages]) == -EINVAL)
+ pr_err("%s: inode->i_blkbits=%hhu\n",
+ __func__, inode->i_blkbits);
+ /* better not fail on first page! */
+ BUG_ON(locked_pages == 0);
+ pages[locked_pages] = NULL;
+ redirty_page_for_writepage(wbc, page);
+ unlock_page(page);
+ break;
+ }
+ ++locked_pages;
+ } else {
+ pages[locked_pages++] = page;
+ }

- pages[locked_pages++] = page;
pvec.pages[i] = NULL;
-
len += thp_size(page);
}

@@ -1081,7 +1109,7 @@ static int ceph_writepages_start(struct address_space *mapping,
}

new_request:
- offset = page_offset(pages[0]);
+ offset = ceph_fscrypt_page_offset(pages[0]);
len = wsize;

req = ceph_osdc_new_request(&fsc->client->osdc,
@@ -1102,8 +1130,8 @@ static int ceph_writepages_start(struct address_space *mapping,
ceph_wbc.truncate_size, true);
BUG_ON(IS_ERR(req));
}
- BUG_ON(len < page_offset(pages[locked_pages - 1]) +
- thp_size(page) - offset);
+ BUG_ON(len < ceph_fscrypt_page_offset(pages[locked_pages - 1]) +
+ thp_size(pages[locked_pages - 1]) - offset);

req->r_callback = writepages_finish;
req->r_inode = inode;
@@ -1113,7 +1141,9 @@ static int ceph_writepages_start(struct address_space *mapping,
data_pages = pages;
op_idx = 0;
for (i = 0; i < locked_pages; i++) {
- u64 cur_offset = page_offset(pages[i]);
+ struct page *page = ceph_fscrypt_pagecache_page(pages[i]);
+
+ u64 cur_offset = page_offset(page);
/*
* Discontinuity in page range? Ceph can handle that by just passing
* multiple extents in the write op.
@@ -1142,9 +1172,9 @@ static int ceph_writepages_start(struct address_space *mapping,
op_idx++;
}

- set_page_writeback(pages[i]);
+ set_page_writeback(page);
if (caching)
- ceph_set_page_fscache(pages[i]);
+ ceph_set_page_fscache(page);
len += thp_size(page);
}
ceph_fscache_write_to_cache(inode, offset, len, caching);
@@ -1160,8 +1190,16 @@ static int ceph_writepages_start(struct address_space *mapping,
offset);
len = max(len, min_len);
}
+ if (IS_ENCRYPTED(inode))
+ len = round_up(len, CEPH_FSCRYPT_BLOCK_SIZE);
+
dout("writepages got pages at %llu~%llu\n", offset, len);

+ if (IS_ENCRYPTED(inode) &&
+ ((offset | len) & ~CEPH_FSCRYPT_BLOCK_MASK))
+ pr_warn("%s: bad encrypted write offset=%lld len=%llu\n",
+ __func__, offset, len);
+
osd_req_op_extent_osd_data_pages(req, op_idx, data_pages, len,
0, from_pool, false);
osd_req_op_extent_update(req, op_idx, len);
diff --git a/fs/ceph/crypto.h b/fs/ceph/crypto.h
index 92a7b221a975..0cf526f07567 100644
--- a/fs/ceph/crypto.h
+++ b/fs/ceph/crypto.h
@@ -146,6 +146,12 @@ int ceph_fscrypt_decrypt_extents(struct inode *inode, struct page **page, u64 of
struct ceph_sparse_extent *map, u32 ext_cnt);
int ceph_fscrypt_encrypt_pages(struct inode *inode, struct page **page, u64 off,
int len, gfp_t gfp);
+
+static inline struct page *ceph_fscrypt_pagecache_page(struct page *page)
+{
+ return fscrypt_is_bounce_page(page) ? fscrypt_pagecache_page(page) : page;
+}
+
#else /* CONFIG_FS_ENCRYPTION */

static inline void ceph_fscrypt_set_ops(struct super_block *sb)
@@ -235,6 +241,16 @@ static inline int ceph_fscrypt_encrypt_pages(struct inode *inode, struct page **
{
return 0;
}
+
+static inline struct page *ceph_fscrypt_pagecache_page(struct page *page)
+{
+ return page;
+}
#endif /* CONFIG_FS_ENCRYPTION */

-#endif
+static inline loff_t ceph_fscrypt_page_offset(struct page *page)
+{
+ return page_offset(ceph_fscrypt_pagecache_page(page));
+}
+
+#endif /* _CEPH_CRYPTO_H */
--
2.35.1

2022-04-05 00:17:55

by Jeffrey Layton

[permalink] [raw]

Subject: [PATCH v12 28/54] ceph: add support to readdir for encrypted filenames

From: Xiubo Li <[email protected]>

Once we've decrypted the names in a readdir reply, we no longer need the
crypttext, so overwrite them in ceph_mds_reply_dir_entry with the
unencrypted names. Then in both ceph_readdir_prepopulate() and
ceph_readdir() we will use the dencrypted name directly.

[ jlayton: convert some BUG_ONs into error returns ]

Signed-off-by: Xiubo Li <[email protected]>
Signed-off-by: Jeff Layton <[email protected]>
---
fs/ceph/crypto.c | 12 +++++--
fs/ceph/crypto.h | 1 +
fs/ceph/dir.c | 35 +++++++++++++++----
fs/ceph/inode.c | 12 ++++---
fs/ceph/mds_client.c | 81 ++++++++++++++++++++++++++++++++++++++++----
fs/ceph/mds_client.h | 4 +--
6 files changed, 124 insertions(+), 21 deletions(-)

diff --git a/fs/ceph/crypto.c b/fs/ceph/crypto.c
index 7cf45d374c1b..c86cc4a7eaf6 100644
--- a/fs/ceph/crypto.c
+++ b/fs/ceph/crypto.c
@@ -142,7 +142,10 @@ int ceph_encode_encrypted_dname(const struct inode *parent, struct qstr *d_name,
int ret;
u8 *cryptbuf;

- WARN_ON_ONCE(!fscrypt_has_encryption_key(parent));
+ if (!fscrypt_has_encryption_key(parent)) {
+ memcpy(buf, d_name->name, d_name->len);
+ return d_name->len;
+ }

/*
* Convert cleartext d_name to ciphertext. If result is longer than
@@ -184,6 +187,8 @@ int ceph_encode_encrypted_dname(const struct inode *parent, struct qstr *d_name,

int ceph_encode_encrypted_fname(const struct inode *parent, struct dentry *dentry, char *buf)
{
+ WARN_ON_ONCE(!fscrypt_has_encryption_key(parent));
+
return ceph_encode_encrypted_dname(parent, &dentry->d_name, buf);
}

@@ -228,7 +233,10 @@ int ceph_fname_to_usr(const struct ceph_fname *fname, struct fscrypt_str *tname,
* generating a nokey name via fscrypt.
*/
if (!fscrypt_has_encryption_key(fname->dir)) {
- memcpy(oname->name, fname->name, fname->name_len);
+ if (fname->no_copy)
+ oname->name = fname->name;
+ else
+ memcpy(oname->name, fname->name, fname->name_len);
oname->len = fname->name_len;
if (is_nokey)
*is_nokey = true;
diff --git a/fs/ceph/crypto.h b/fs/ceph/crypto.h
index e54150260eba..080905b0c73c 100644
--- a/fs/ceph/crypto.h
+++ b/fs/ceph/crypto.h
@@ -19,6 +19,7 @@ struct ceph_fname {
unsigned char *ctext; // binary crypttext (if any)
u32 name_len; // length of name buffer
u32 ctext_len; // length of crypttext
+ bool no_copy;
};

struct ceph_fscrypt_auth {
diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c
index caf2547c3fe1..5ce2a6384e55 100644
--- a/fs/ceph/dir.c
+++ b/fs/ceph/dir.c
@@ -9,6 +9,7 @@

#include "super.h"
#include "mds_client.h"
+#include "crypto.h"

/*
* Directory operations: readdir, lookup, create, link, unlink,
@@ -241,7 +242,9 @@ static int __dcache_readdir(struct file *file, struct dir_context *ctx,
di = ceph_dentry(dentry);
if (d_unhashed(dentry) ||
d_really_is_negative(dentry) ||
- di->lease_shared_gen != shared_gen) {
+ di->lease_shared_gen != shared_gen ||
+ ((dentry->d_flags & DCACHE_NOKEY_NAME) &&
+ fscrypt_has_encryption_key(dir))) {
spin_unlock(&dentry->d_lock);
dput(dentry);
err = -EAGAIN;
@@ -340,6 +343,10 @@ static int ceph_readdir(struct file *file, struct dir_context *ctx)
ctx->pos = 2;
}

+ err = fscrypt_prepare_readdir(inode);
+ if (err)
+ return err;
+
spin_lock(&ci->i_ceph_lock);
/* request Fx cap. if have Fx, we don't need to release Fs cap
* for later create/unlink. */
@@ -389,6 +396,7 @@ static int ceph_readdir(struct file *file, struct dir_context *ctx)
req = ceph_mdsc_create_request(mdsc, op, USE_AUTH_MDS);
if (IS_ERR(req))
return PTR_ERR(req);
+
err = ceph_alloc_readdir_reply_buffer(req, inode);
if (err) {
ceph_mdsc_put_request(req);
@@ -402,11 +410,20 @@ static int ceph_readdir(struct file *file, struct dir_context *ctx)
req->r_inode_drop = CEPH_CAP_FILE_EXCL;
}
if (dfi->last_name) {
- req->r_path2 = kstrdup(dfi->last_name, GFP_KERNEL);
+ struct qstr d_name = { .name = dfi->last_name,
+ .len = strlen(dfi->last_name) };
+
+ req->r_path2 = kzalloc(NAME_MAX + 1, GFP_KERNEL);
if (!req->r_path2) {
ceph_mdsc_put_request(req);
return -ENOMEM;
}
+
+ err = ceph_encode_encrypted_dname(inode, &d_name, req->r_path2);
+ if (err < 0) {
+ ceph_mdsc_put_request(req);
+ return err;
+ }
} else if (is_hash_order(ctx->pos)) {
req->r_args.readdir.offset_hash =
cpu_to_le32(fpos_hash(ctx->pos));
@@ -511,15 +528,20 @@ static int ceph_readdir(struct file *file, struct dir_context *ctx)
for (; i < rinfo->dir_nr; i++) {
struct ceph_mds_reply_dir_entry *rde = rinfo->dir_entries + i;

- BUG_ON(rde->offset < ctx->pos);
+ if (rde->offset < ctx->pos) {
+ pr_warn("%s: rde->offset 0x%llx ctx->pos 0x%llx\n",
+ __func__, rde->offset, ctx->pos);
+ return -EIO;
+ }
+
+ if (WARN_ON_ONCE(!rde->inode.in))
+ return -EIO;

ctx->pos = rde->offset;
dout("readdir (%d/%d) -> %llx '%.*s' %p\n",
i, rinfo->dir_nr, ctx->pos,
rde->name_len, rde->name, &rde->inode.in);

- BUG_ON(!rde->inode.in);
-
if (!dir_emit(ctx, rde->name, rde->name_len,
ceph_present_ino(inode->i_sb, le64_to_cpu(rde->inode.in->ino)),
le32_to_cpu(rde->inode.in->mode) >> 12)) {
@@ -532,6 +554,8 @@ static int ceph_readdir(struct file *file, struct dir_context *ctx)
dout("filldir stopping us...\n");
return 0;
}
+
+ /* Reset the lengths to their original allocated vals */
ctx->pos++;
}

@@ -586,7 +610,6 @@ static int ceph_readdir(struct file *file, struct dir_context *ctx)
dfi->dir_ordered_count);
spin_unlock(&ci->i_ceph_lock);
}
-
dout("readdir %p file %p done.\n", inode, file);
return 0;
}
diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c
index 684dfc3f006c..98ac1369b353 100644
--- a/fs/ceph/inode.c
+++ b/fs/ceph/inode.c
@@ -1750,7 +1750,8 @@ int ceph_readdir_prepopulate(struct ceph_mds_request *req,
struct ceph_mds_session *session)
{
struct dentry *parent = req->r_dentry;
- struct ceph_inode_info *ci = ceph_inode(d_inode(parent));
+ struct inode *inode = d_inode(parent);
+ struct ceph_inode_info *ci = ceph_inode(inode);
struct ceph_mds_reply_info_parsed *rinfo = &req->r_reply_info;
struct qstr dname;
struct dentry *dn;
@@ -1824,9 +1825,7 @@ int ceph_readdir_prepopulate(struct ceph_mds_request *req,
tvino.snap = le64_to_cpu(rde->inode.in->snapid);

if (rinfo->hash_order) {
- u32 hash = ceph_str_hash(ci->i_dir_layout.dl_dir_hash,
- rde->name, rde->name_len);
- hash = ceph_frag_value(hash);
+ u32 hash = ceph_frag_value(rde->raw_hash);
if (hash != last_hash)
fpos_offset = 2;
last_hash = hash;
@@ -1849,6 +1848,11 @@ int ceph_readdir_prepopulate(struct ceph_mds_request *req,
err = -ENOMEM;
goto out;
}
+ if (rde->is_nokey) {
+ spin_lock(&dn->d_lock);
+ dn->d_flags |= DCACHE_NOKEY_NAME;
+ spin_unlock(&dn->d_lock);
+ }
} else if (d_really_is_positive(dn) &&
(ceph_ino(d_inode(dn)) != tvino.ino ||
ceph_snap(d_inode(dn)) != tvino.snap)) {
diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
index 0a7f18d4df73..50fe77768295 100644
--- a/fs/ceph/mds_client.c
+++ b/fs/ceph/mds_client.c
@@ -439,20 +439,87 @@ static int parse_reply_info_readdir(void **p, void *end,

info->dir_nr = num;
while (num) {
+ struct inode *inode = d_inode(req->r_dentry);
+ struct ceph_inode_info *ci = ceph_inode(inode);
struct ceph_mds_reply_dir_entry *rde = info->dir_entries + i;
+ struct fscrypt_str tname = FSTR_INIT(NULL, 0);
+ struct fscrypt_str oname = FSTR_INIT(NULL, 0);
+ struct ceph_fname fname;
+ u32 altname_len, _name_len;
+ u8 *altname, *_name;
+
/* dentry */
- ceph_decode_32_safe(p, end, rde->name_len, bad);
- ceph_decode_need(p, end, rde->name_len, bad);
- rde->name = *p;
- *p += rde->name_len;
- dout("parsed dir dname '%.*s'\n", rde->name_len, rde->name);
+ ceph_decode_32_safe(p, end, _name_len, bad);
+ ceph_decode_need(p, end, _name_len, bad);
+ _name = *p;
+ *p += _name_len;
+ dout("parsed dir dname '%.*s'\n", _name_len, _name);
+
+ if (info->hash_order)
+ rde->raw_hash = ceph_str_hash(ci->i_dir_layout.dl_dir_hash,
+ _name, _name_len);

/* dentry lease */
err = parse_reply_info_lease(p, end, &rde->lease, features,
- &rde->altname_len, &rde->altname);
+ &altname_len, &altname);
if (err)
goto out_bad;

+ /*
+ * Try to dencrypt the dentry names and update them
+ * in the ceph_mds_reply_dir_entry struct.
+ */
+ fname.dir = inode;
+ fname.name = _name;
+ fname.name_len = _name_len;
+ fname.ctext = altname;
+ fname.ctext_len = altname_len;
+ /*
+ * The _name_len maybe larger than altname_len, such as
+ * when the human readable name length is in range of
+ * (CEPH_NOHASH_NAME_MAX, CEPH_NOHASH_NAME_MAX + SHA256_DIGEST_SIZE),
+ * then the copy in ceph_fname_to_usr will corrupt the
+ * data if there has no encryption key.
+ *
+ * Just set the no_copy flag and then if there has no
+ * encryption key the oname.name will be assigned to
+ * _name always.
+ */
+ fname.no_copy = true;
+ if (altname_len == 0) {
+ /*
+ * Set tname to _name, and this will be used
+ * to do the base64_decode in-place. It's
+ * safe because the decoded string should
+ * always be shorter, which is 3/4 of origin
+ * string.
+ */
+ tname.name = _name;
+
+ /*
+ * Set oname to _name too, and this will be
+ * used to do the dencryption in-place.
+ */
+ oname.name = _name;
+ oname.len = _name_len;
+ } else {
+ /*
+ * This will do the decryption only in-place
+ * from altname cryptext directly.
+ */
+ oname.name = altname;
+ oname.len = altname_len;
+ }
+ rde->is_nokey = false;
+ err = ceph_fname_to_usr(&fname, &tname, &oname, &rde->is_nokey);
+ if (err) {
+ pr_err("%s unable to decode %.*s, got %d\n", __func__,
+ _name_len, _name, err);
+ goto out_bad;
+ }
+ rde->name = oname.name;
+ rde->name_len = oname.len;
+
/* inode */
err = parse_reply_info_in(p, end, &rde->inode, features);
if (err < 0)
@@ -3501,7 +3568,7 @@ static void handle_reply(struct ceph_mds_session *session, struct ceph_msg *msg)
if (err == 0) {
if (result == 0 && (req->r_op == CEPH_MDS_OP_READDIR ||
req->r_op == CEPH_MDS_OP_LSSNAP))
- ceph_readdir_prepopulate(req, req->r_session);
+ err = ceph_readdir_prepopulate(req, req->r_session);
}
current->journal_info = NULL;
mutex_unlock(&req->r_fill_mutex);
diff --git a/fs/ceph/mds_client.h b/fs/ceph/mds_client.h
index cd719691a86d..046a9368c4a9 100644
--- a/fs/ceph/mds_client.h
+++ b/fs/ceph/mds_client.h
@@ -96,10 +96,10 @@ struct ceph_mds_reply_info_in {
};

struct ceph_mds_reply_dir_entry {
+ bool is_nokey;
char *name;
- u8 *altname;
u32 name_len;
- u32 altname_len;
+ u32 raw_hash;
struct ceph_mds_reply_lease *lease;
struct ceph_mds_reply_info_in inode;
loff_t offset;
--
2.35.1