This patchset implements code to support encryption of Ext4 filesystem
instances that have blocksize less than pagesize. The patchset has
been tested on both ppc64 and x86_64 machines.
This patchset changes the prototype of the function
fscrypt_encrypt_page(). I will make the relevant changes to the rest
of the filesystems (e.g. f2fs) and post them in the next version of
the patchset.
Chandan Rajendra (8):
ext4: use EXT4_INODE_ENCRYPT flag to detect encrypted bio
fs/buffer.c: make some functions non-static
ext4: decrypt all contiguous blocks in a page
ext4: decrypt all boundary blocks when doing buffered write
ext4: decrypt the block that needs to be partially zeroed
ext4: encrypt blocks whose size is less than page size
ext4: decrypt blocks whose size is less than page size
ext4: enable encryption for blocksize less than page size
fs/buffer.c | 6 +-
fs/crypto/bio.c | 25 +++-
fs/crypto/crypto.c | 80 ++++++++----
fs/ext4/inode.c | 32 +++--
fs/ext4/page-io.c | 58 ++++++---
fs/ext4/readpage.c | 275 +++++++++++++++++++++++++++++++++++++++-
fs/ext4/super.c | 9 +-
include/linux/buffer_head.h | 4 +
include/linux/fscrypt.h | 1 +
include/linux/fscrypt_notsupp.h | 19 +--
include/linux/fscrypt_supp.h | 14 +-
11 files changed, 436 insertions(+), 87 deletions(-)
--
2.9.5
For supporting encryption in blocksize < pagesize scenario,
bio->bi_private field will be needed to hold the address of the
encryption context structure. Hence this commit uses
ext4_encrypted_inode() to detect the encryption status of a file.
Signed-off-by: Chandan Rajendra <[email protected]>
---
fs/ext4/readpage.c | 8 +++++++-
1 file changed, 7 insertions(+), 1 deletion(-)
diff --git a/fs/ext4/readpage.c b/fs/ext4/readpage.c
index 9ffa6fa..0be590b 100644
--- a/fs/ext4/readpage.c
+++ b/fs/ext4/readpage.c
@@ -50,7 +50,13 @@
static inline bool ext4_bio_encrypted(struct bio *bio)
{
#ifdef CONFIG_EXT4_FS_ENCRYPTION
- return unlikely(bio->bi_private != NULL);
+ if (bio->bi_vcnt) {
+ struct inode *inode = bio->bi_io_vec->bv_page->mapping->host;
+ if (ext4_encrypted_inode(inode) && S_ISREG(inode->i_mode))
+ return true;
+ }
+
+ return false;
#else
return false;
#endif
--
2.9.5
The functions end_buffer_async_read(), block_size_bits() and
create_page_buffers() will be needed by ext4 to implement encryption for
blocksize < pagesize scenario.
Signed-off-by: Chandan Rajendra <[email protected]>
---
fs/buffer.c | 6 +++---
include/linux/buffer_head.h | 4 ++++
2 files changed, 7 insertions(+), 3 deletions(-)
diff --git a/fs/buffer.c b/fs/buffer.c
index 551b781..4360250 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -256,7 +256,7 @@ __find_get_block_slow(struct block_device *bdev, sector_t block)
* I/O completion handler for block_read_full_page() - pages
* which come unlocked at the end of I/O.
*/
-static void end_buffer_async_read(struct buffer_head *bh, int uptodate)
+void end_buffer_async_read(struct buffer_head *bh, int uptodate)
{
unsigned long flags;
struct buffer_head *first;
@@ -1642,12 +1642,12 @@ EXPORT_SYMBOL(clean_bdev_aliases);
* constraints in mind (relevant mostly if some
* architecture has a slow bit-scan instruction)
*/
-static inline int block_size_bits(unsigned int blocksize)
+int block_size_bits(unsigned int blocksize)
{
return ilog2(blocksize);
}
-static struct buffer_head *create_page_buffers(struct page *page, struct inode *inode, unsigned int b_state)
+struct buffer_head *create_page_buffers(struct page *page, struct inode *inode, unsigned int b_state)
{
BUG_ON(!PageLocked(page));
diff --git a/include/linux/buffer_head.h b/include/linux/buffer_head.h
index f1aed01..c26d16c 100644
--- a/include/linux/buffer_head.h
+++ b/include/linux/buffer_head.h
@@ -159,9 +159,13 @@ void set_bh_page(struct buffer_head *bh,
int try_to_free_buffers(struct page *);
struct buffer_head *alloc_page_buffers(struct page *page, unsigned long size,
bool retry);
+int block_size_bits(unsigned int blocksize);
void create_empty_buffers(struct page *, unsigned long,
unsigned long b_state);
+struct buffer_head *create_page_buffers(struct page *page, struct inode *inode,
+ unsigned int b_state);
void end_buffer_read_sync(struct buffer_head *bh, int uptodate);
+void end_buffer_async_read(struct buffer_head *bh, int uptodate);
void end_buffer_write_sync(struct buffer_head *bh, int uptodate);
void end_buffer_async_write(struct buffer_head *bh, int uptodate);
--
2.9.5
__ext4_block_zero_page_range decrypts the entire page. This commit
decrypts the block to be partially zeroed instead of the whole page.
Signed-off-by: Chandan Rajendra <[email protected]>
---
fs/ext4/inode.c | 3 +--
1 file changed, 1 insertion(+), 2 deletions(-)
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index d3baa15..db47f6d 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -4030,9 +4030,8 @@ static int __ext4_block_zero_page_range(handle_t *handle,
ext4_encrypted_inode(inode)) {
/* We expect the key to be set. */
BUG_ON(!fscrypt_has_encryption_key(inode));
- BUG_ON(blocksize != PAGE_SIZE);
WARN_ON_ONCE(fscrypt_decrypt_page(page->mapping->host,
- page, PAGE_SIZE, 0, page->index));
+ page, blocksize, round_down(offset, blocksize), iblock));
}
}
if (ext4_should_journal_data(inode)) {
--
2.9.5
With blocksize < pagesize, a page can contain more than one block. Hence
this commit changes completion_pages() to invoke fscrypt_decrypt_page()
in order to decrypt all the contiguous blocks mapped by the page.
Signed-off-by: Chandan Rajendra <[email protected]>
---
fs/crypto/bio.c | 19 +++++++++++++++++--
1 file changed, 17 insertions(+), 2 deletions(-)
diff --git a/fs/crypto/bio.c b/fs/crypto/bio.c
index 0d5e6a5..eb6e06a 100644
--- a/fs/crypto/bio.c
+++ b/fs/crypto/bio.c
@@ -40,8 +40,23 @@ static void completion_pages(struct work_struct *work)
bio_for_each_segment_all(bv, bio, i) {
struct page *page = bv->bv_page;
- int ret = fscrypt_decrypt_page(page->mapping->host, page,
- PAGE_SIZE, 0, page->index);
+ struct inode *inode = page->mapping->host;
+ const unsigned long blocksize = inode->i_sb->s_blocksize;
+ const unsigned blkbits = inode->i_blkbits;
+ int page_blk = page->index << (PAGE_SHIFT - blkbits);
+ int blk = page_blk + (bv->bv_offset >> blkbits);
+ int nr_blks = bv->bv_len >> blkbits;
+ int ret = 0;
+ int j;
+
+ for (j = 0; j < nr_blks; j++, blk++) {
+ ret = fscrypt_decrypt_page(page->mapping->host,
+ page, blocksize,
+ bv->bv_offset + (j << blkbits),
+ blk);
+ if (ret)
+ break;
+ }
if (ret) {
WARN_ON_ONCE(1);
--
2.9.5
This commit adds code to decrypt all file blocks mapped by page.
Signed-off-by: Chandan Rajendra <[email protected]>
---
fs/crypto/bio.c | 6 +-
fs/ext4/readpage.c | 267 +++++++++++++++++++++++++++++++++++++++-
include/linux/fscrypt.h | 1 +
include/linux/fscrypt_notsupp.h | 4 +-
include/linux/fscrypt_supp.h | 3 +-
5 files changed, 274 insertions(+), 7 deletions(-)
diff --git a/fs/crypto/bio.c b/fs/crypto/bio.c
index eb6e06a..2744e55 100644
--- a/fs/crypto/bio.c
+++ b/fs/crypto/bio.c
@@ -70,9 +70,11 @@ static void completion_pages(struct work_struct *work)
bio_put(bio);
}
-void fscrypt_decrypt_bio_pages(struct fscrypt_ctx *ctx, struct bio *bio)
+void fscrypt_decrypt_bio_pages(struct fscrypt_ctx *ctx, struct bio *bio,
+ void (*process_bio)(struct work_struct *))
{
- INIT_WORK(&ctx->r.work, completion_pages);
+ INIT_WORK(&ctx->r.work,
+ process_bio ? process_bio : completion_pages);
ctx->r.bio = bio;
queue_work(fscrypt_read_workqueue, &ctx->r.work);
}
diff --git a/fs/ext4/readpage.c b/fs/ext4/readpage.c
index 0be590b..e494e2d 100644
--- a/fs/ext4/readpage.c
+++ b/fs/ext4/readpage.c
@@ -62,6 +62,143 @@ static inline bool ext4_bio_encrypted(struct bio *bio)
#endif
}
+static void ext4_complete_block(struct work_struct *work)
+{
+ struct fscrypt_ctx *ctx =
+ container_of(work, struct fscrypt_ctx, r.work);
+ struct buffer_head *first, *bh, *tmp;
+ struct bio *bio;
+ struct bio_vec *bv;
+ struct page *page;
+ struct inode *inode;
+ u64 blk_nr;
+ unsigned long flags;
+ int page_uptodate = 1;
+ int ret;
+
+ bio = ctx->r.bio;
+ BUG_ON(bio->bi_vcnt != 1);
+
+ bv = bio->bi_io_vec;
+ page = bv->bv_page;
+ inode = page->mapping->host;
+
+ BUG_ON(bv->bv_len != i_blocksize(inode));
+
+ blk_nr = page->index << (PAGE_SHIFT - inode->i_blkbits);
+ blk_nr += bv->bv_offset >> inode->i_blkbits;
+
+ bh = ctx->r.bh;
+
+ ret = fscrypt_decrypt_page(inode, page, bv->bv_len,
+ bv->bv_offset, blk_nr);
+ if (ret) {
+ WARN_ON_ONCE(1);
+ SetPageError(page);
+ } else {
+ set_buffer_uptodate(bh);
+ }
+
+ fscrypt_release_ctx(ctx);
+ bio_put(bio);
+
+ first = page_buffers(page);
+ local_irq_save(flags);
+ bit_spin_lock(BH_Uptodate_Lock, &first->b_state);
+
+ clear_buffer_async_read(bh);
+ unlock_buffer(bh);
+ tmp = bh;
+ do {
+ if (!buffer_uptodate(tmp))
+ page_uptodate = 0;
+ if (buffer_async_read(tmp)) {
+ BUG_ON(!buffer_locked(tmp));
+ goto still_busy;
+ }
+ tmp = tmp->b_this_page;
+ } while (tmp != bh);
+
+ bit_spin_unlock(BH_Uptodate_Lock, &first->b_state);
+ local_irq_restore(flags);
+
+ if (page_uptodate && !PageError(page))
+ SetPageUptodate(page);
+ unlock_page(page);
+ return;
+
+still_busy:
+ bit_spin_unlock(BH_Uptodate_Lock, &first->b_state);
+ local_irq_restore(flags);
+ return;
+}
+
+static void block_end_io(struct bio *bio)
+{
+ struct buffer_head *bh;
+ struct buffer_head *first;
+ struct buffer_head *tmp;
+ unsigned long flags;
+ struct page *page;
+ int page_uptodate = 1;
+
+ if (ext4_bio_encrypted(bio)) {
+ struct fscrypt_ctx *ctx = bio->bi_private;
+ bh = ctx->r.bh;
+ if (bio->bi_status) {
+ fscrypt_release_ctx(ctx);
+ } else {
+ fscrypt_decrypt_bio_pages(ctx, bio,
+ ext4_complete_block);
+ return;
+ }
+ } else {
+ bh = bio->bi_private;
+ }
+
+ page = bh->b_page;
+
+ if (!bio->bi_status) {
+ set_buffer_uptodate(bh);
+ } else {
+ clear_buffer_uptodate(bh);
+ /* chandan: buffer_io_error(bh); */
+ SetPageError(page);
+ }
+
+ first = page_buffers(page);
+ local_irq_save(flags);
+ bit_spin_lock(BH_Uptodate_Lock, &first->b_state);
+ clear_buffer_async_read(bh);
+ unlock_buffer(bh);
+ tmp = bh;
+ do {
+ if (!buffer_uptodate(tmp))
+ page_uptodate = 0;
+ if (buffer_async_read(tmp)) {
+ BUG_ON(!buffer_locked(tmp));
+ goto still_busy;
+ }
+ tmp = tmp->b_this_page;
+ } while (tmp != bh);
+ bit_spin_unlock(BH_Uptodate_Lock, &first->b_state);
+ local_irq_restore(flags);
+
+ /*
+ * If none of the buffers had errors and they are all
+ * uptodate then we can set the page uptodate.
+ */
+ if (page_uptodate && !PageError(page))
+ SetPageUptodate(page);
+ unlock_page(page);
+ return;
+
+still_busy:
+ bit_spin_unlock(BH_Uptodate_Lock, &first->b_state);
+ local_irq_restore(flags);
+ return;
+}
+
/*
* I/O completion handler for multipage BIOs.
*
@@ -83,7 +220,7 @@ static void mpage_end_io(struct bio *bio)
if (bio->bi_status) {
fscrypt_release_ctx(bio->bi_private);
} else {
- fscrypt_decrypt_bio_pages(bio->bi_private, bio);
+ fscrypt_decrypt_bio_pages(bio->bi_private, bio, NULL);
return;
}
}
@@ -102,6 +239,132 @@ static void mpage_end_io(struct bio *bio)
bio_put(bio);
}
+int ext4_block_read_full_page(struct page *page)
+{
+ struct inode *inode = page->mapping->host;
+ struct fscrypt_ctx *ctx;
+ struct bio *bio;
+ sector_t iblock, lblock;
+ struct buffer_head *bh, *head, *arr[MAX_BUF_PER_PAGE];
+ unsigned int blocksize, bbits;
+ int nr, i;
+ int fully_mapped = 1;
+ int ret;
+
+ head = create_page_buffers(page, inode, 0);
+ blocksize = head->b_size;
+ bbits = block_size_bits(blocksize);
+
+ iblock = (sector_t)page->index << (PAGE_SHIFT - bbits);
+ lblock = (i_size_read(inode)+blocksize-1) >> bbits;
+ bh = head;
+ nr = 0;
+ i = 0;
+
+ do {
+ if (buffer_uptodate(bh))
+ continue;
+
+ if (!buffer_mapped(bh)) {
+ int err = 0;
+
+ fully_mapped = 0;
+ if (iblock < lblock) {
+ WARN_ON(bh->b_size != blocksize);
+ err = ext4_get_block(inode, iblock, bh, 0);
+ if (err)
+ SetPageError(page);
+ }
+ if (!buffer_mapped(bh)) {
+ zero_user(page, i << bbits, blocksize);
+ if (!err)
+ set_buffer_uptodate(bh);
+ continue;
+ }
+ /*
+ * get_block() might have updated the buffer
+ * synchronously
+ */
+ if (buffer_uptodate(bh))
+ continue;
+ }
+ arr[nr++] = bh;
+ } while (i++, iblock++, (bh = bh->b_this_page) != head);
+
+ if (fully_mapped)
+ SetPageMappedToDisk(page);
+
+ if (!nr) {
+ /*
+ * All buffers are uptodate - we can set the page uptodate
+ * as well. But not if ext4_get_block() returned an error.
+ */
+ if (!PageError(page))
+ SetPageUptodate(page);
+ unlock_page(page);
+ return 0;
+ }
+
+ /* Stage two: lock the buffers */
+ for (i = 0; i < nr; i++) {
+ bh = arr[i];
+ lock_buffer(bh);
+ set_buffer_async_read(bh);
+ }
+
+ /*
+ * Stage 3: start the IO. Check for uptodateness
+ * inside the buffer lock in case another process reading
+ * the underlying blockdev brought it uptodate (the sct fix).
+ */
+ for (i = 0; i < nr; i++) {
+ ctx = NULL;
+ bh = arr[i];
+
+ if (buffer_uptodate(bh)) {
+ end_buffer_async_read(bh, 1);
+ continue;
+ }
+
+ if (ext4_encrypted_inode(inode)
+ && S_ISREG(inode->i_mode)) {
+ ctx = fscrypt_get_ctx(inode, GFP_NOFS);
+ if (IS_ERR(ctx)) {
+ set_page_error:
+ SetPageError(page);
+ zero_user_segment(page, bh_offset(bh), blocksize);
+ continue;
+ }
+ ctx->r.bh = bh;
+ }
+
+ bio = bio_alloc(GFP_KERNEL, 1);
+ if (!bio) {
+ if (ctx)
+ fscrypt_release_ctx(ctx);
+ goto set_page_error;
+ }
+
+ bio->bi_iter.bi_sector = bh->b_blocknr * (blocksize >> 9);
+ bio_set_dev(bio, bh->b_bdev);
+ bio->bi_write_hint = 0;
+
+ ret = bio_add_page(bio, bh->b_page, blocksize, bh_offset(bh));
+ BUG_ON(bio->bi_iter.bi_size != blocksize);
+
+ bio->bi_end_io = block_end_io;
+ if (ctx)
+ bio->bi_private = ctx;
+ else
+ bio->bi_private = bh;
+ bio_set_op_attrs(bio, REQ_OP_READ, 0);
+
+ submit_bio(bio);
+ }
+
+ return 0;
+}
+
int ext4_mpage_readpages(struct address_space *mapping,
struct list_head *pages, struct page *page,
unsigned nr_pages)
@@ -286,7 +549,7 @@ int ext4_mpage_readpages(struct address_space *mapping,
bio = NULL;
}
if (!PageUptodate(page))
- block_read_full_page(page, ext4_get_block);
+ ext4_block_read_full_page(page);
else
unlock_page(page);
next_page:
diff --git a/include/linux/fscrypt.h b/include/linux/fscrypt.h
index 08b4b40..98c51eb 100644
--- a/include/linux/fscrypt.h
+++ b/include/linux/fscrypt.h
@@ -34,6 +34,7 @@ struct fscrypt_ctx {
} w;
struct {
struct bio *bio;
+ struct buffer_head *bh;
struct work_struct work;
} r;
struct list_head free_list; /* Free list */
diff --git a/include/linux/fscrypt_notsupp.h b/include/linux/fscrypt_notsupp.h
index 019ddce..7da0692 100644
--- a/include/linux/fscrypt_notsupp.h
+++ b/include/linux/fscrypt_notsupp.h
@@ -164,8 +164,8 @@ static inline bool fscrypt_match_name(const struct fscrypt_name *fname,
}
/* bio.c */
-static inline void fscrypt_decrypt_bio_pages(struct fscrypt_ctx *ctx,
- struct bio *bio)
+static inline void fscrypt_decrypt_bio_pages(struct fscrypt_ctx *ctx, struct bio *bio,
+ void (* process_bio)(struct work_struct *))
{
return;
}
diff --git a/include/linux/fscrypt_supp.h b/include/linux/fscrypt_supp.h
index 983d06f..1d4e8ae 100644
--- a/include/linux/fscrypt_supp.h
+++ b/include/linux/fscrypt_supp.h
@@ -144,7 +144,8 @@ static inline bool fscrypt_match_name(const struct fscrypt_name *fname,
}
/* bio.c */
-extern void fscrypt_decrypt_bio_pages(struct fscrypt_ctx *, struct bio *);
+extern void fscrypt_decrypt_bio_pages(struct fscrypt_ctx *ctx, struct bio *bio,
+ void (* process_bio)(struct work_struct *));
extern void fscrypt_pullback_bio_page(struct page **, bool);
extern int fscrypt_zeroout_range(const struct inode *, pgoff_t, sector_t,
unsigned int);
--
2.9.5
Now that we have all the code to support encryption for block size less
than page size scenario, this commit removes the conditional check in
filesystem mount code.
Signed-off-by: Chandan Rajendra <[email protected]>
---
fs/ext4/super.c | 9 +--------
1 file changed, 1 insertion(+), 8 deletions(-)
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index ebb7edb..5a52c98 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -4138,14 +4138,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
}
}
- if ((DUMMY_ENCRYPTION_ENABLED(sbi) || ext4_has_feature_encrypt(sb)) &&
- (blocksize != PAGE_SIZE)) {
- ext4_msg(sb, KERN_ERR,
- "Unsupported blocksize for fs encryption");
- goto failed_mount_wq;
- }
This commit adds code to encrypt all file blocks mapped by page.
Signed-off-by: Chandan Rajendra <[email protected]>
---
fs/crypto/crypto.c | 80 ++++++++++++++++++++++++++---------------
fs/ext4/page-io.c | 58 ++++++++++++++++++++----------
include/linux/fscrypt_notsupp.h | 15 ++++----
include/linux/fscrypt_supp.h | 11 ++++--
4 files changed, 108 insertions(+), 56 deletions(-)
diff --git a/fs/crypto/crypto.c b/fs/crypto/crypto.c
index 732a786..52ad5cf 100644
--- a/fs/crypto/crypto.c
+++ b/fs/crypto/crypto.c
@@ -226,15 +226,16 @@ struct page *fscrypt_alloc_bounce_page(struct fscrypt_ctx *ctx,
* Return: A page with the encrypted content on success. Else, an
* error value or NULL.
*/
-struct page *fscrypt_encrypt_page(const struct inode *inode,
- struct page *page,
- unsigned int len,
- unsigned int offs,
- u64 lblk_num, gfp_t gfp_flags)
-
+int fscrypt_encrypt_page(const struct inode *inode,
+ struct page *page,
+ unsigned int len,
+ unsigned int offs,
+ u64 lblk_num,
+ struct fscrypt_ctx **ctx,
+ struct page **ciphertext_page,
+ gfp_t gfp_flags)
{
- struct fscrypt_ctx *ctx;
- struct page *ciphertext_page = page;
+ int mark_pg_private = 0;
int err;
BUG_ON(len % FS_CRYPTO_BLOCK_SIZE != 0);
@@ -242,41 +243,64 @@ struct page *fscrypt_encrypt_page(const struct inode *inode,
if (inode->i_sb->s_cop->flags & FS_CFLG_OWN_PAGES) {
/* with inplace-encryption we just encrypt the page */
err = fscrypt_do_page_crypto(inode, FS_ENCRYPT, lblk_num, page,
- ciphertext_page, len, offs,
+ page, len, offs,
gfp_flags);
if (err)
- return ERR_PTR(err);
+ return err;
- return ciphertext_page;
+ *ciphertext_page = page;
+
+ return 0;
}
BUG_ON(!PageLocked(page));
- ctx = fscrypt_get_ctx(inode, gfp_flags);
- if (IS_ERR(ctx))
- return (struct page *)ctx;
+ if (!*ctx) {
+ BUG_ON(*ciphertext_page);
+ *ctx = fscrypt_get_ctx(inode, gfp_flags);
+ if (IS_ERR(*ctx))
+ return PTR_ERR(*ctx);
+
+ (*ctx)->w.control_page = page;
+ } else {
+ BUG_ON(!*ciphertext_page);
+ }
+
+ if (!*ciphertext_page) {
+ /* The encryption operation will require a bounce page. */
+ *ciphertext_page = fscrypt_alloc_bounce_page(*ctx, gfp_flags);
+ if (IS_ERR(*ciphertext_page)) {
+ err = PTR_ERR(*ciphertext_page);
+ *ciphertext_page = NULL;
+ goto errout;
+ }
+ mark_pg_private = 1;
+ }
- /* The encryption operation will require a bounce page. */
- ciphertext_page = fscrypt_alloc_bounce_page(ctx, gfp_flags);
- if (IS_ERR(ciphertext_page))
- goto errout;
- ctx->w.control_page = page;
err = fscrypt_do_page_crypto(inode, FS_ENCRYPT, lblk_num,
- page, ciphertext_page, len, offs,
+ page, *ciphertext_page, len, offs,
gfp_flags);
- if (err) {
- ciphertext_page = ERR_PTR(err);
+ if (err)
goto errout;
+
+ if (mark_pg_private) {
+ SetPagePrivate(*ciphertext_page);
+ set_page_private(*ciphertext_page, (unsigned long)(*ctx));
+ lock_page(*ciphertext_page);
}
- SetPagePrivate(ciphertext_page);
- set_page_private(ciphertext_page, (unsigned long)ctx);
- lock_page(ciphertext_page);
- return ciphertext_page;
+
+ return 0;
errout:
- fscrypt_release_ctx(ctx);
- return ciphertext_page;
+ if (*ciphertext_page && PagePrivate(*ciphertext_page)) {
+ set_page_private(*ciphertext_page, (unsigned long)NULL);
+ ClearPagePrivate(*ciphertext_page);
+ unlock_page(*ciphertext_page);
+ }
+
+ fscrypt_release_ctx(*ctx);
+ return err;
}
EXPORT_SYMBOL(fscrypt_encrypt_page);
diff --git a/fs/ext4/page-io.c b/fs/ext4/page-io.c
index db75901..9828d77 100644
--- a/fs/ext4/page-io.c
+++ b/fs/ext4/page-io.c
@@ -415,7 +415,7 @@ int ext4_bio_write_page(struct ext4_io_submit *io,
struct writeback_control *wbc,
bool keep_towrite)
{
- struct page *data_page = NULL;
+ struct page *ciphertext_page = NULL;
struct inode *inode = page->mapping->host;
unsigned block_start;
struct buffer_head *bh, *head;
@@ -475,36 +475,56 @@ int ext4_bio_write_page(struct ext4_io_submit *io,
nr_to_submit++;
} while ((bh = bh->b_this_page) != head);
- bh = head = page_buffers(page);
if (ext4_encrypted_inode(inode) && S_ISREG(inode->i_mode) &&
nr_to_submit) {
+ struct fscrypt_ctx *ctx;
+ u64 blk_nr;
gfp_t gfp_flags = GFP_NOFS;
- retry_encrypt:
- data_page = fscrypt_encrypt_page(inode, page, PAGE_SIZE, 0,
- page->index, gfp_flags);
- if (IS_ERR(data_page)) {
- ret = PTR_ERR(data_page);
- if (ret == -ENOMEM && wbc->sync_mode == WB_SYNC_ALL) {
- if (io->io_bio) {
- ext4_io_submit(io);
- congestion_wait(BLK_RW_ASYNC, HZ/50);
+ bh = head = page_buffers(page);
+ blk_nr = page->index << (PAGE_SHIFT - inode->i_blkbits);
+ ctx = NULL;
+ ciphertext_page = NULL;
+
+ do {
+ if (!buffer_async_write(bh))
+ continue;
+ retry_encrypt:
+ ret = fscrypt_encrypt_page(inode, page, bh->b_size,
+ bh_offset(bh),
+ blk_nr, &ctx,
+ &ciphertext_page,
+ gfp_flags);
+ if (ret) {
+ if (ret == -ENOMEM
+ && wbc->sync_mode == WB_SYNC_ALL) {
+ if (io->io_bio) {
+ ext4_io_submit(io);
+ congestion_wait(BLK_RW_ASYNC,
+ HZ/50);
+ }
+ gfp_flags |= __GFP_NOFAIL;
+ bh = head = page_buffers(page);
+ blk_nr = page->index
+ << (PAGE_SHIFT - inode->i_blkbits);
+ ctx = NULL;
+ ciphertext_page = NULL;
+ goto retry_encrypt;
}
- gfp_flags |= __GFP_NOFAIL;
- goto retry_encrypt;
+ ciphertext_page = NULL;
+ goto out;
}
- data_page = NULL;
- goto out;
- }
+ } while (++blk_nr, (bh = bh->b_this_page) != head);
}
+ bh = head = page_buffers(page);
/* Now submit buffers to write */
do {
if (!buffer_async_write(bh))
continue;
ret = io_submit_add_bh(io, inode,
- data_page ? data_page : page, bh);
+ ciphertext_page ? ciphertext_page : page, bh);
if (ret) {
/*
* We only get here on ENOMEM. Not much else
@@ -520,8 +540,8 @@ int ext4_bio_write_page(struct ext4_io_submit *io,
/* Error stopped previous loop? Clean up buffers... */
if (ret) {
out:
- if (data_page)
- fscrypt_restore_control_page(data_page);
+ if (ciphertext_page && !nr_submitted)
+ fscrypt_restore_control_page(ciphertext_page);
printk_ratelimited(KERN_ERR "%s: ret = %d\n", __func__, ret);
redirty_page_for_writepage(wbc, page);
do {
diff --git a/include/linux/fscrypt_notsupp.h b/include/linux/fscrypt_notsupp.h
index 63e5880..019ddce 100644
--- a/include/linux/fscrypt_notsupp.h
+++ b/include/linux/fscrypt_notsupp.h
@@ -26,13 +26,16 @@ static inline void fscrypt_release_ctx(struct fscrypt_ctx *ctx)
return;
}
-static inline struct page *fscrypt_encrypt_page(const struct inode *inode,
- struct page *page,
- unsigned int len,
- unsigned int offs,
- u64 lblk_num, gfp_t gfp_flags)
+static inline int fscrypt_encrypt_page(const struct inode *inode,
+ struct page *page,
+ unsigned int len,
+ unsigned int offs,
+ u64 lblk_num,
+ struct fscrypt_ctx **ctx,
+ struct page **ciphertext_page,
+ gfp_t gfp_flags)
{
- return ERR_PTR(-EOPNOTSUPP);
+ return -EOPNOTSUPP;
}
static inline int fscrypt_decrypt_page(const struct inode *inode,
diff --git a/include/linux/fscrypt_supp.h b/include/linux/fscrypt_supp.h
index cf9e9fc..983d06f 100644
--- a/include/linux/fscrypt_supp.h
+++ b/include/linux/fscrypt_supp.h
@@ -15,9 +15,14 @@
extern struct kmem_cache *fscrypt_info_cachep;
extern struct fscrypt_ctx *fscrypt_get_ctx(const struct inode *, gfp_t);
extern void fscrypt_release_ctx(struct fscrypt_ctx *);
-extern struct page *fscrypt_encrypt_page(const struct inode *, struct page *,
- unsigned int, unsigned int,
- u64, gfp_t);
+extern int fscrypt_encrypt_page(const struct inode *inode,
+ struct page *page,
+ unsigned int len,
+ unsigned int offs,
+ u64 lblk_num,
+ struct fscrypt_ctx **ctx,
+ struct page **ciphertext_page,
+ gfp_t gfp_flags);
extern int fscrypt_decrypt_page(const struct inode *, struct page *, unsigned int,
unsigned int, u64);
extern void fscrypt_restore_control_page(struct page *);
--
2.9.5
With block size < page size, ext4_block_write_begin() may have two
blocks to decrypt. Hence this commit invokes fscrypt_decrypt_page() for
those blocks.
Signed-off-by: Chandan Rajendra <[email protected]>
---
fs/ext4/inode.c | 29 +++++++++++++++++++++--------
1 file changed, 21 insertions(+), 8 deletions(-)
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 6f6589e..d3baa15 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -1158,12 +1158,13 @@ static int ext4_block_write_begin(struct page *page, loff_t pos, unsigned len,
unsigned to = from + len;
struct inode *inode = page->mapping->host;
unsigned block_start, block_end;
- sector_t block;
+ sector_t block, page_blk_nr;
int err = 0;
unsigned blocksize = inode->i_sb->s_blocksize;
unsigned bbits;
struct buffer_head *bh, *head, *wait[2], **wait_bh = wait;
bool decrypt = false;
+ int i;
BUG_ON(!PageLocked(page));
BUG_ON(from > PAGE_SIZE);
@@ -1224,16 +1225,28 @@ static int ext4_block_write_begin(struct page *page, loff_t pos, unsigned len,
/*
* If we issued read requests, let them complete.
*/
- while (wait_bh > wait) {
- wait_on_buffer(*--wait_bh);
- if (!buffer_uptodate(*wait_bh))
+ for (i = 0; (wait + i) < wait_bh; i++) {
+ wait_on_buffer(wait[i]);
+ if (!buffer_uptodate(wait[i]))
err = -EIO;
}
- if (unlikely(err))
+ if (unlikely(err)) {
page_zero_new_buffers(page, from, to);
- else if (decrypt)
- err = fscrypt_decrypt_page(page->mapping->host, page,
- PAGE_SIZE, 0, page->index);
+ } else if (decrypt) {
+ page_blk_nr = (sector_t)page->index << (PAGE_SHIFT - bbits);
+
+ while (wait_bh > wait) {
+ --wait_bh;
+ block = page_blk_nr + (bh_offset(*wait_bh) >> bbits);
+ err = fscrypt_decrypt_page(page->mapping->host, page,
+ (*wait_bh)->b_size,
+ bh_offset(*wait_bh),
+ block);
+ if (err)
+ break;
+ }
+ }
+
return err;
}
#endif
--
2.9.5
On Fri, Jan 12, 2018 at 07:41:23PM +0530, Chandan Rajendra wrote:
> @@ -1642,12 +1642,12 @@ EXPORT_SYMBOL(clean_bdev_aliases);
> * constraints in mind (relevant mostly if some
> * architecture has a slow bit-scan instruction)
> */
> -static inline int block_size_bits(unsigned int blocksize)
> +int block_size_bits(unsigned int blocksize)
> {
> return ilog2(blocksize);
> }
Could you move this to buffer.h instead please?
On 01/12/18 06:11, Chandan Rajendra wrote:
> For supporting encryption in blocksize < pagesize scenario,
> bio->bi_private field will be needed to hold the address of the
> encryption context structure. Hence this commit uses
> ext4_encrypted_inode() to detect the encryption status of a file.
>
> Signed-off-by: Chandan Rajendra <[email protected]>
> ---
> fs/ext4/readpage.c | 8 +++++++-
> 1 file changed, 7 insertions(+), 1 deletion(-)
>
> diff --git a/fs/ext4/readpage.c b/fs/ext4/readpage.c
> index 9ffa6fa..0be590b 100644
> --- a/fs/ext4/readpage.c
> +++ b/fs/ext4/readpage.c
> @@ -50,7 +50,13 @@
> static inline bool ext4_bio_encrypted(struct bio *bio)
> {
> #ifdef CONFIG_EXT4_FS_ENCRYPTION
> - return unlikely(bio->bi_private != NULL);
> + if (bio->bi_vcnt) {
> + struct inode *inode = bio->bi_io_vec->bv_page->mapping->host;
> + if (ext4_encrypted_inode(inode) && S_ISREG(inode->i_mode))
> + return true;
There are lots of spaces instead of tabs above...
> + }
> +
> + return false;
> #else
> return false;
> #endif
>
--
~Randy
On 01/12/18 06:11, Chandan Rajendra wrote:
> This patchset implements code to support encryption of Ext4 filesystem
> instances that have blocksize less than pagesize. The patchset has
> been tested on both ppc64 and x86_64 machines.
strictly "less than" or "less than or equal to"?
thanks,
--
~Randy
On Saturday, January 13, 2018 12:34:04 AM IST Randy Dunlap wrote:
> On 01/12/18 06:11, Chandan Rajendra wrote:
> > For supporting encryption in blocksize < pagesize scenario,
> > bio->bi_private field will be needed to hold the address of the
> > encryption context structure. Hence this commit uses
> > ext4_encrypted_inode() to detect the encryption status of a file.
> >
> > Signed-off-by: Chandan Rajendra <[email protected]>
> > ---
> > fs/ext4/readpage.c | 8 +++++++-
> > 1 file changed, 7 insertions(+), 1 deletion(-)
> >
> > diff --git a/fs/ext4/readpage.c b/fs/ext4/readpage.c
> > index 9ffa6fa..0be590b 100644
> > --- a/fs/ext4/readpage.c
> > +++ b/fs/ext4/readpage.c
> > @@ -50,7 +50,13 @@
> > static inline bool ext4_bio_encrypted(struct bio *bio)
> > {
> > #ifdef CONFIG_EXT4_FS_ENCRYPTION
> > - return unlikely(bio->bi_private != NULL);
> > + if (bio->bi_vcnt) {
> > + struct inode *inode = bio->bi_io_vec->bv_page->mapping->host;
> > + if (ext4_encrypted_inode(inode) && S_ISREG(inode->i_mode))
> > + return true;
>
> There are lots of spaces instead of tabs above...
>
I will fix them up in the next version of the patchset.
Thanks for pointing it out.
--
chandan
On Friday, January 12, 2018 8:08:23 PM IST Matthew Wilcox wrote:
> On Fri, Jan 12, 2018 at 07:41:23PM +0530, Chandan Rajendra wrote:
> > @@ -1642,12 +1642,12 @@ EXPORT_SYMBOL(clean_bdev_aliases);
> > * constraints in mind (relevant mostly if some
> > * architecture has a slow bit-scan instruction)
> > */
> > -static inline int block_size_bits(unsigned int blocksize)
> > +int block_size_bits(unsigned int blocksize)
> > {
> > return ilog2(blocksize);
> > }
>
> Could you move this to buffer.h instead please?
>
>
It just occured to me that I could use inode->i_blkbits instead of
block_size_bits() inside the new function ext4_block_read_full_page().
Hence I will drop the above change from the next version of the patchset.
Thanks for the review.
--
chandan
On Saturday, January 13, 2018 12:37:59 AM IST Randy Dunlap wrote:
> On 01/12/18 06:11, Chandan Rajendra wrote:
> > This patchset implements code to support encryption of Ext4 filesystem
> > instances that have blocksize less than pagesize. The patchset has
> > been tested on both ppc64 and x86_64 machines.
>
> strictly "less than" or "less than or equal to"?
I have tested the patchset for the use cases of "less than or equal to
page size" i.e. On ppc64, the patchset was tested with blocksizes of
4k and 64k. On x86_64, the patchset was tested with blocksizes of 2k
and 4k.
--
chandan
On Sat, Jan 13, 2018 at 10:58:00AM +0530, Chandan Rajendra wrote:
> On Saturday, January 13, 2018 12:37:59 AM IST Randy Dunlap wrote:
> > On 01/12/18 06:11, Chandan Rajendra wrote:
> > > This patchset implements code to support encryption of Ext4 filesystem
> > > instances that have blocksize less than pagesize. The patchset has
> > > been tested on both ppc64 and x86_64 machines.
> >
> > strictly "less than" or "less than or equal to"?
>
> I have tested the patchset for the use cases of "less than or equal to
> page size" i.e. On ppc64, the patchset was tested with blocksizes of
> 4k and 64k. On x86_64, the patchset was tested with blocksizes of 2k
> and 4k.
I think there's a miscommunication here. ext4 already supports encryption
of filesystems with block size == page size. Chandan is adding support
for encrypting filesystems with block size < page size. Without removing
support for block size == page size ;-)
On 01/13/2018 04:48 AM, Matthew Wilcox wrote:
> On Sat, Jan 13, 2018 at 10:58:00AM +0530, Chandan Rajendra wrote:
>> On Saturday, January 13, 2018 12:37:59 AM IST Randy Dunlap wrote:
>>> On 01/12/18 06:11, Chandan Rajendra wrote:
>>>> This patchset implements code to support encryption of Ext4 filesystem
>>>> instances that have blocksize less than pagesize. The patchset has
>>>> been tested on both ppc64 and x86_64 machines.
>>>
>>> strictly "less than" or "less than or equal to"?
>>
>> I have tested the patchset for the use cases of "less than or equal to
>> page size" i.e. On ppc64, the patchset was tested with blocksizes of
>> 4k and 64k. On x86_64, the patchset was tested with blocksizes of 2k
>> and 4k.
>
> I think there's a miscommunication here. ext4 already supports encryption
> of filesystems with block size == page size. Chandan is adding support
> for encrypting filesystems with block size < page size. Without removing
> support for block size == page size ;-)
I see. Thanks for the clarification.
--
~Randy
Hi Chandan,
On Fri, Jan 12, 2018 at 07:41:21PM +0530, Chandan Rajendra wrote:
> This patchset implements code to support encryption of Ext4 filesystem
> instances that have blocksize less than pagesize. The patchset has
> been tested on both ppc64 and x86_64 machines.
>
> This patchset changes the prototype of the function
> fscrypt_encrypt_page(). I will make the relevant changes to the rest
> of the filesystems (e.g. f2fs) and post them in the next version of
> the patchset.
>
> Chandan Rajendra (8):
> ext4: use EXT4_INODE_ENCRYPT flag to detect encrypted bio
> fs/buffer.c: make some functions non-static
> ext4: decrypt all contiguous blocks in a page
> ext4: decrypt all boundary blocks when doing buffered write
> ext4: decrypt the block that needs to be partially zeroed
> ext4: encrypt blocks whose size is less than page size
> ext4: decrypt blocks whose size is less than page size
> ext4: enable encryption for blocksize less than page size
>
Thanks for working on this! We've wanted this for a while (both so that it
works on PowerPC with a 64K PAGE_SIZE, and so that people can't screw up their
1K blocksize filesystems by enabling the 'encrypt' flag), but no one ever got
around to it. And it's not easy!
First, just a few notes that didn't fit into my comments for the individual
patches.
Updating fscrypt_zeroout_range() seems to have been missed. Currently it
assumes block_size == PAGE_SIZE so it will need to be updated too.
The file Documentation/filesystems/fscrypt.rst will also need to be updated, at
least to remove the following sentence: "Currently, only the case where the
filesystem block size is equal to the system's page size (usually 4096 bytes) is
supported.".
Also, on future versions of this patchset can you please also Cc
[email protected]?
Thanks,
Eric
Hi Chandan,
On Fri, Jan 12, 2018 at 07:41:23PM +0530, Chandan Rajendra wrote:
> The functions end_buffer_async_read(), block_size_bits() and
> create_page_buffers() will be needed by ext4 to implement encryption for
> blocksize < pagesize scenario.
>
These all need to have EXPORT_SYMBOL() added, otherwise they won't be usable
when ext4 is built as a module.
block_size_bits() isn't actually needed as you can just use ->i_blkbits.
For the remaining two functions being exposed you need to explain why they are
*really* needed -- to create a version of block_read_full_page() that decrypts
the data after reading it. Which is very ugly, and involves copy-and-pasting
quite a bit of code.
It's true that we've already effectively copy+pasted mpage_readpages() into ext4
(fs/ext4/readpage.c) for essentially the same reason, so your approach isn't
really any worse. But did you consider instead cleaning things up by making the
generic code support encryption? We now have the IS_ENCRYPTED() macro which
allows generic code to efficiently tell whether the file is encrypted or not. I
think the only problem is that the code in fs/crypto/ can be built as a module,
so currently it can't be called directly by core code. So we could either
change that and start requiring that fscrypt be built-in, or we could have the
filesystem pass an (optional) decryption callback to the generic code.
Or at the very least we could put the "encryption-aware" versions of
mpages_readpages() and block_read_full_page() into fs/crypto/ rather than in
fs/ext4/, so that they could be used by other filesystems in the future
(although currently f2fs and ubifs won't need them).
Eric
Hi Chandan,
On Fri, Jan 12, 2018 at 07:41:24PM +0530, Chandan Rajendra wrote:
> With blocksize < pagesize, a page can contain more than one block. Hence
> this commit changes completion_pages() to invoke fscrypt_decrypt_page()
> in order to decrypt all the contiguous blocks mapped by the page.
>
> Signed-off-by: Chandan Rajendra <[email protected]>
> ---
> fs/crypto/bio.c | 19 +++++++++++++++++--
> 1 file changed, 17 insertions(+), 2 deletions(-)
>
> diff --git a/fs/crypto/bio.c b/fs/crypto/bio.c
> index 0d5e6a5..eb6e06a 100644
> --- a/fs/crypto/bio.c
> +++ b/fs/crypto/bio.c
> @@ -40,8 +40,23 @@ static void completion_pages(struct work_struct *work)
>
> bio_for_each_segment_all(bv, bio, i) {
> struct page *page = bv->bv_page;
> - int ret = fscrypt_decrypt_page(page->mapping->host, page,
> - PAGE_SIZE, 0, page->index);
> + struct inode *inode = page->mapping->host;
> + const unsigned long blocksize = inode->i_sb->s_blocksize;
> + const unsigned blkbits = inode->i_blkbits;
> + int page_blk = page->index << (PAGE_SHIFT - blkbits);
> + int blk = page_blk + (bv->bv_offset >> blkbits);
Use 'u64' for the block number:
u64 page_blk = (u64)page->index << (PAGE_SHIFT - blkbits);
u64 blk = page_blk + (bv->bv_offset >> blkbits);
> + int nr_blks = bv->bv_len >> blkbits;
> + int ret = 0;
> + int j;
> +
> + for (j = 0; j < nr_blks; j++, blk++) {
> + ret = fscrypt_decrypt_page(page->mapping->host,
> + page, blocksize,
> + bv->bv_offset + (j << blkbits),
> + blk);
> + if (ret)
> + break;
> + }
>
> if (ret) {
> WARN_ON_ONCE(1);
Since that we'll now actually be operating on blocks rather than pages, some
renaming seems to be in order, otherwise things will get very confusing. e.g.:
fscrypt_decrypt_page() -> fscrypt_decrypt_block()
fscrypt_encrypt_page() -> fscrypt_encrypt_block()
completion_pages() -> completion_blocks()
fscrypt_decrypt_bio_pages() -> fscrypt_decrypt_bio_blocks()
Please also update the comment for completion_pages() / completion_blocks() to
clarify that it is decrypting *blocks*, not *pages*.
(Yes, we should have named all these functions as *_block() originally. But
this is a good time to fix it!)
Eric
On Fri, Jan 12, 2018 at 07:41:25PM +0530, Chandan Rajendra wrote:
> With block size < page size, ext4_block_write_begin() may have two
> blocks to decrypt. Hence this commit invokes fscrypt_decrypt_page() for
> those blocks.
>
> Signed-off-by: Chandan Rajendra <[email protected]>
> ---
> fs/ext4/inode.c | 29 +++++++++++++++++++++--------
> 1 file changed, 21 insertions(+), 8 deletions(-)
>
> diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
> index 6f6589e..d3baa15 100644
> --- a/fs/ext4/inode.c
> +++ b/fs/ext4/inode.c
> @@ -1158,12 +1158,13 @@ static int ext4_block_write_begin(struct page *page, loff_t pos, unsigned len,
> unsigned to = from + len;
> struct inode *inode = page->mapping->host;
> unsigned block_start, block_end;
> - sector_t block;
> + sector_t block, page_blk_nr;
> int err = 0;
> unsigned blocksize = inode->i_sb->s_blocksize;
> unsigned bbits;
> struct buffer_head *bh, *head, *wait[2], **wait_bh = wait;
> bool decrypt = false;
> + int i;
>
> BUG_ON(!PageLocked(page));
> BUG_ON(from > PAGE_SIZE);
> @@ -1224,16 +1225,28 @@ static int ext4_block_write_begin(struct page *page, loff_t pos, unsigned len,
> /*
> * If we issued read requests, let them complete.
> */
> - while (wait_bh > wait) {
> - wait_on_buffer(*--wait_bh);
> - if (!buffer_uptodate(*wait_bh))
> + for (i = 0; (wait + i) < wait_bh; i++) {
> + wait_on_buffer(wait[i]);
> + if (!buffer_uptodate(wait[i]))
> err = -EIO;
> }
> - if (unlikely(err))
> + if (unlikely(err)) {
> page_zero_new_buffers(page, from, to);
> - else if (decrypt)
> - err = fscrypt_decrypt_page(page->mapping->host, page,
> - PAGE_SIZE, 0, page->index);
> + } else if (decrypt) {
> + page_blk_nr = (sector_t)page->index << (PAGE_SHIFT - bbits);
> +
> + while (wait_bh > wait) {
> + --wait_bh;
> + block = page_blk_nr + (bh_offset(*wait_bh) >> bbits);
> + err = fscrypt_decrypt_page(page->mapping->host, page,
> + (*wait_bh)->b_size,
> + bh_offset(*wait_bh),
> + block);
> + if (err)
> + break;
> + }
> + }
> +
> return err;
Iterating through the 'wait' array once by index and once by pointer is
confusing. Why not do it just by index? e.g.
int nr_wait = 0;
int i;
...
wait[nr_wait++] = bh;
...
for (i = 0; i < nr_wait; i++) {
wait_on_buffer(wait[i]);
...
}
for (i = 0; i < nr_wait; i++) {
...
fscrypt_decrypt_block(...)
...
}
Also, you are leaving the buffers uptodate() if the decryption fails, which is
wrong. It is an existing bug even in the block_size == PAGE_SIZE case, but it
should be fixed if possible.
Eric
On Fri, Jan 12, 2018 at 07:41:26PM +0530, Chandan Rajendra wrote:
> __ext4_block_zero_page_range decrypts the entire page. This commit
> decrypts the block to be partially zeroed instead of the whole page.
>
> Signed-off-by: Chandan Rajendra <[email protected]>
> ---
> fs/ext4/inode.c | 3 +--
> 1 file changed, 1 insertion(+), 2 deletions(-)
>
> diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
> index d3baa15..db47f6d 100644
> --- a/fs/ext4/inode.c
> +++ b/fs/ext4/inode.c
> @@ -4030,9 +4030,8 @@ static int __ext4_block_zero_page_range(handle_t *handle,
> ext4_encrypted_inode(inode)) {
> /* We expect the key to be set. */
> BUG_ON(!fscrypt_has_encryption_key(inode));
> - BUG_ON(blocksize != PAGE_SIZE);
> WARN_ON_ONCE(fscrypt_decrypt_page(page->mapping->host,
> - page, PAGE_SIZE, 0, page->index));
> + page, blocksize, round_down(offset, blocksize), iblock));
> }
Please use proper line breaking here.
Eric
On Fri, Jan 12, 2018 at 07:41:27PM +0530, Chandan Rajendra wrote:
> This commit adds code to encrypt all file blocks mapped by page.
>
> Signed-off-by: Chandan Rajendra <[email protected]>
> ---
> fs/crypto/crypto.c | 80 ++++++++++++++++++++++++++---------------
> fs/ext4/page-io.c | 58 ++++++++++++++++++++----------
> include/linux/fscrypt_notsupp.h | 15 ++++----
> include/linux/fscrypt_supp.h | 11 ++++--
> 4 files changed, 108 insertions(+), 56 deletions(-)
>
> diff --git a/fs/crypto/crypto.c b/fs/crypto/crypto.c
> index 732a786..52ad5cf 100644
> --- a/fs/crypto/crypto.c
> +++ b/fs/crypto/crypto.c
> @@ -226,15 +226,16 @@ struct page *fscrypt_alloc_bounce_page(struct fscrypt_ctx *ctx,
> * Return: A page with the encrypted content on success. Else, an
> * error value or NULL.
> */
> -struct page *fscrypt_encrypt_page(const struct inode *inode,
> - struct page *page,
> - unsigned int len,
> - unsigned int offs,
> - u64 lblk_num, gfp_t gfp_flags)
> -
> +int fscrypt_encrypt_page(const struct inode *inode,
> + struct page *page,
> + unsigned int len,
> + unsigned int offs,
> + u64 lblk_num,
> + struct fscrypt_ctx **ctx,
> + struct page **ciphertext_page,
> + gfp_t gfp_flags)
> {
Note that f2fs and ubifs have to be updated to use the new prototype too.
As noted earlier this should be renamed to fscrypt_encrypt_block(). Also the
function comment needs to be updated to match any changes.
But more importantly, the new calling convention is really confusing, especially
how it sometimes allocates resources but sometimes doesn't, and also in the
error path, it will free resources that were *not* allocated by that same
invocation of fscrypt_encrypt_page(), but rather by previous ones. Can you
please switch it to a more standard convention? Really there should be a
separate function which just allocates the fscrypt_ctx and bounce buffer, and
then fscrypt_encrypt_block() would just encrypt into that existing bounce
buffer.
>
> diff --git a/fs/ext4/page-io.c b/fs/ext4/page-io.c
> index db75901..9828d77 100644
> --- a/fs/ext4/page-io.c
> +++ b/fs/ext4/page-io.c
> @@ -415,7 +415,7 @@ int ext4_bio_write_page(struct ext4_io_submit *io,
> struct writeback_control *wbc,
> bool keep_towrite)
> {
> - struct page *data_page = NULL;
> + struct page *ciphertext_page = NULL;
> struct inode *inode = page->mapping->host;
> unsigned block_start;
> struct buffer_head *bh, *head;
> @@ -475,36 +475,56 @@ int ext4_bio_write_page(struct ext4_io_submit *io,
> nr_to_submit++;
> } while ((bh = bh->b_this_page) != head);
>
> - bh = head = page_buffers(page);
>
> if (ext4_encrypted_inode(inode) && S_ISREG(inode->i_mode) &&
> nr_to_submit) {
> + struct fscrypt_ctx *ctx;
> + u64 blk_nr;
> gfp_t gfp_flags = GFP_NOFS;
>
> - retry_encrypt:
> - data_page = fscrypt_encrypt_page(inode, page, PAGE_SIZE, 0,
> - page->index, gfp_flags);
> - if (IS_ERR(data_page)) {
> - ret = PTR_ERR(data_page);
> - if (ret == -ENOMEM && wbc->sync_mode == WB_SYNC_ALL) {
> - if (io->io_bio) {
> - ext4_io_submit(io);
> - congestion_wait(BLK_RW_ASYNC, HZ/50);
> + bh = head = page_buffers(page);
> + blk_nr = page->index << (PAGE_SHIFT - inode->i_blkbits);
> + ctx = NULL;
> + ciphertext_page = NULL;
> +
> + do {
> + if (!buffer_async_write(bh))
> + continue;
> + retry_encrypt:
> + ret = fscrypt_encrypt_page(inode, page, bh->b_size,
> + bh_offset(bh),
> + blk_nr, &ctx,
> + &ciphertext_page,
> + gfp_flags);
> + if (ret) {
> + if (ret == -ENOMEM
> + && wbc->sync_mode == WB_SYNC_ALL) {
> + if (io->io_bio) {
> + ext4_io_submit(io);
> + congestion_wait(BLK_RW_ASYNC,
> + HZ/50);
> + }
> + gfp_flags |= __GFP_NOFAIL;
> + bh = head = page_buffers(page);
> + blk_nr = page->index
> + << (PAGE_SHIFT - inode->i_blkbits);
> + ctx = NULL;
> + ciphertext_page = NULL;
> + goto retry_encrypt;
> }
> - gfp_flags |= __GFP_NOFAIL;
> - goto retry_encrypt;
> + ciphertext_page = NULL;
> + goto out;
> }
> - data_page = NULL;
> - goto out;
> - }
> + } while (++blk_nr, (bh = bh->b_this_page) != head);
The error handling is broken in the block_size < PAGE_SIZE case, for a couple
reasons.
First, in the "non-retry" case where we do 'goto out', we have to clear the
BH_Async_Write flag from all the buffer_heads, since none have been submitted
yet. But it will start at 'bh' which will not necessarily be the first one,
since the encryption loop is also using the 'bh' variable.
Second, in the "retry" case where we do 'goto retry_encrypt', your new code will
leak the 'ctx' and 'ciphertext' page, then try to start at the beginning of the
buffer_head list again. But it doesn't check the BH_Async_Write flag, so it may
try to encrypt a block that doesn't actually need to be written.
Also, in the "retry" case, why not start at the same block again, rather than
discarding the encryptions that have been done and restarting at the first one?
In any case, now that you're adding more logic here, if possible can you please
refactor everything under the 'ext4_encrypted_inode(inode) &&
S_ISREG(inode->i_mode)' condition into its own function? That should make it
easier to clean up some of the above bugs.
Eric
On Fri, Jan 12, 2018 at 07:41:28PM +0530, Chandan Rajendra wrote:
> This commit adds code to decrypt all file blocks mapped by page.
>
[...]
> diff --git a/fs/ext4/readpage.c b/fs/ext4/readpage.c
> index 0be590b..e494e2d 100644
> --- a/fs/ext4/readpage.c
> +++ b/fs/ext4/readpage.c
> @@ -62,6 +62,143 @@ static inline bool ext4_bio_encrypted(struct bio *bio)
> #endif
> }
>
> +static void ext4_complete_block(struct work_struct *work)
> +{
> + struct fscrypt_ctx *ctx =
> + container_of(work, struct fscrypt_ctx, r.work);
> + struct buffer_head *first, *bh, *tmp;
> + struct bio *bio;
> + struct bio_vec *bv;
> + struct page *page;
> + struct inode *inode;
> + u64 blk_nr;
> + unsigned long flags;
> + int page_uptodate = 1;
> + int ret;
> +
> + bio = ctx->r.bio;
> + BUG_ON(bio->bi_vcnt != 1);
> +
> + bv = bio->bi_io_vec;
> + page = bv->bv_page;
> + inode = page->mapping->host;
> +
> + BUG_ON(bv->bv_len != i_blocksize(inode));
> +
> + blk_nr = page->index << (PAGE_SHIFT - inode->i_blkbits);
> + blk_nr += bv->bv_offset >> inode->i_blkbits;
> +
> + bh = ctx->r.bh;
> +
> + ret = fscrypt_decrypt_page(inode, page, bv->bv_len,
> + bv->bv_offset, blk_nr);
> + if (ret) {
> + WARN_ON_ONCE(1);
> + SetPageError(page);
> + } else {
> + set_buffer_uptodate(bh);
> + }
> +
> + fscrypt_release_ctx(ctx);
> + bio_put(bio);
> +
> + first = page_buffers(page);
> + local_irq_save(flags);
> + bit_spin_lock(BH_Uptodate_Lock, &first->b_state);
> +
> + clear_buffer_async_read(bh);
> + unlock_buffer(bh);
> + tmp = bh;
> + do {
> + if (!buffer_uptodate(tmp))
> + page_uptodate = 0;
> + if (buffer_async_read(tmp)) {
> + BUG_ON(!buffer_locked(tmp));
> + goto still_busy;
> + }
> + tmp = tmp->b_this_page;
> + } while (tmp != bh);
> +
> + bit_spin_unlock(BH_Uptodate_Lock, &first->b_state);
> + local_irq_restore(flags);
> +
> + if (page_uptodate && !PageError(page))
> + SetPageUptodate(page);
> + unlock_page(page);
> + return;
> +
> +still_busy:
> + bit_spin_unlock(BH_Uptodate_Lock, &first->b_state);
> + local_irq_restore(flags);
> + return;
> +}
First, see my comments on Patch 2 regarding whether we should really be
copy+pasting all this code from fs/buffer.c into ext4.
If nevertheless we really do have to have this, why can't we call
end_buffer_async_read() from ext4_complete_block() and from block_end_io(),
rather than duplicating it?
> +
> +static void block_end_io(struct bio *bio)
> +{
[...]
>
> +int ext4_block_read_full_page(struct page *page)
> +{
> + struct inode *inode = page->mapping->host;
> + struct fscrypt_ctx *ctx;
> + struct bio *bio;
> + sector_t iblock, lblock;
> + struct buffer_head *bh, *head, *arr[MAX_BUF_PER_PAGE];
> + unsigned int blocksize, bbits;
> + int nr, i;
> + int fully_mapped = 1;
> + int ret;
> +
> + head = create_page_buffers(page, inode, 0);
> + blocksize = head->b_size;
> + bbits = block_size_bits(blocksize);
> +
> + iblock = (sector_t)page->index << (PAGE_SHIFT - bbits);
> + lblock = (i_size_read(inode)+blocksize-1) >> bbits;
> + bh = head;
> + nr = 0;
> + i = 0;
> +
> + do {
> + if (buffer_uptodate(bh))
> + continue;
> +
> + if (!buffer_mapped(bh)) {
> + int err = 0;
> +
> + fully_mapped = 0;
> + if (iblock < lblock) {
> + WARN_ON(bh->b_size != blocksize);
> + err = ext4_get_block(inode, iblock, bh, 0);
> + if (err)
> + SetPageError(page);
> + }
> + if (!buffer_mapped(bh)) {
> + zero_user(page, i << bbits, blocksize);
> + if (!err)
> + set_buffer_uptodate(bh);
> + continue;
> + }
> + /*
> + * get_block() might have updated the buffer
> + * synchronously
> + */
> + if (buffer_uptodate(bh))
> + continue;
> + }
> + arr[nr++] = bh;
> + } while (i++, iblock++, (bh = bh->b_this_page) != head);
> +
> + if (fully_mapped)
> + SetPageMappedToDisk(page);
> +
> + if (!nr) {
> + /*
> + * All buffers are uptodate - we can set the page uptodate
> + * as well. But not if ext4_get_block() returned an error.
> + */
> + if (!PageError(page))
> + SetPageUptodate(page);
> + unlock_page(page);
> + return 0;
> + }
> +
> + /* Stage two: lock the buffers */
> + for (i = 0; i < nr; i++) {
> + bh = arr[i];
> + lock_buffer(bh);
> + set_buffer_async_read(bh);
> + }
> +
> + /*
> + * Stage 3: start the IO. Check for uptodateness
> + * inside the buffer lock in case another process reading
> + * the underlying blockdev brought it uptodate (the sct fix).
> + */
> + for (i = 0; i < nr; i++) {
> + ctx = NULL;
> + bh = arr[i];
> +
> + if (buffer_uptodate(bh)) {
> + end_buffer_async_read(bh, 1);
> + continue;
> + }
> +
> + if (ext4_encrypted_inode(inode)
> + && S_ISREG(inode->i_mode)) {
> + ctx = fscrypt_get_ctx(inode, GFP_NOFS);
> + if (IS_ERR(ctx)) {
> + set_page_error:
> + SetPageError(page);
> + zero_user_segment(page, bh_offset(bh), blocksize);
> + continue;
This isn't cleaning up properly; the buffer_head is being left locked and with
BH_Async_Read set, and that means the page will never be unlocked either.
'end_buffer_async_read(bh, 0)' should do it.
> + }
> + ctx->r.bh = bh;
> + }
> +
> + bio = bio_alloc(GFP_KERNEL, 1);
GFP_NOIO
Eric
On Fri, Jan 12, 2018 at 07:41:29PM +0530, Chandan Rajendra wrote:
> Now that we have all the code to support encryption for block size less
> than page size scenario, this commit removes the conditional check in
> filesystem mount code.
>
> Signed-off-by: Chandan Rajendra <[email protected]>
> ---
> fs/ext4/super.c | 9 +--------
> 1 file changed, 1 insertion(+), 8 deletions(-)
>
> diff --git a/fs/ext4/super.c b/fs/ext4/super.c
> index ebb7edb..5a52c98 100644
> --- a/fs/ext4/super.c
> +++ b/fs/ext4/super.c
> @@ -4138,14 +4138,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
> }
> }
>
> - if ((DUMMY_ENCRYPTION_ENABLED(sbi) || ext4_has_feature_encrypt(sb)) &&
> - (blocksize != PAGE_SIZE)) {
> - ext4_msg(sb, KERN_ERR,
> - "Unsupported blocksize for fs encryption");
> - goto failed_mount_wq;
> - }
> -
> - if (DUMMY_ENCRYPTION_ENABLED(sbi) && !sb_rdonly(sb) &&
> + if (DUMMY_ENCRYPTION_ENABLED(sbi) && !(sb->s_flags & MS_RDONLY) &&
> !ext4_has_feature_encrypt(sb)) {
> ext4_set_feature_encrypt(sb);
> ext4_commit_super(sb, 1);
Why change sb_rdonly() to 'sb->s_flags & MS_RDONLY'?
Eric
On Wednesday, January 17, 2018 8:10:04 AM IST Eric Biggers wrote:
> On Fri, Jan 12, 2018 at 07:41:29PM +0530, Chandan Rajendra wrote:
> > Now that we have all the code to support encryption for block size less
> > than page size scenario, this commit removes the conditional check in
> > filesystem mount code.
> >
> > Signed-off-by: Chandan Rajendra <[email protected]>
> > ---
> > fs/ext4/super.c | 9 +--------
> > 1 file changed, 1 insertion(+), 8 deletions(-)
> >
> > diff --git a/fs/ext4/super.c b/fs/ext4/super.c
> > index ebb7edb..5a52c98 100644
> > --- a/fs/ext4/super.c
> > +++ b/fs/ext4/super.c
> > @@ -4138,14 +4138,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
> > }
> > }
> >
> > - if ((DUMMY_ENCRYPTION_ENABLED(sbi) || ext4_has_feature_encrypt(sb)) &&
> > - (blocksize != PAGE_SIZE)) {
> > - ext4_msg(sb, KERN_ERR,
> > - "Unsupported blocksize for fs encryption");
> > - goto failed_mount_wq;
> > - }
> > -
> > - if (DUMMY_ENCRYPTION_ENABLED(sbi) && !sb_rdonly(sb) &&
> > + if (DUMMY_ENCRYPTION_ENABLED(sbi) && !(sb->s_flags & MS_RDONLY) &&
> > !ext4_has_feature_encrypt(sb)) {
> > ext4_set_feature_encrypt(sb);
> > ext4_commit_super(sb, 1);
>
> Why change sb_rdonly() to 'sb->s_flags & MS_RDONLY'?
>
I am sorry, I messed up this one when rebasing the patchset onto the
latest linux-next. . The old code base had 'sb->s_flags & MS_RDONLY'.
I will fix this up in the next version of this patchset.
--
chandan
On Wednesday, January 17, 2018 7:40:20 AM IST Eric Biggers wrote:
> Hi Chandan,
>
> On Fri, Jan 12, 2018 at 07:41:21PM +0530, Chandan Rajendra wrote:
> > This patchset implements code to support encryption of Ext4 filesystem
> > instances that have blocksize less than pagesize. The patchset has
> > been tested on both ppc64 and x86_64 machines.
> >
> > This patchset changes the prototype of the function
> > fscrypt_encrypt_page(). I will make the relevant changes to the rest
> > of the filesystems (e.g. f2fs) and post them in the next version of
> > the patchset.
> >
> > Chandan Rajendra (8):
> > ext4: use EXT4_INODE_ENCRYPT flag to detect encrypted bio
> > fs/buffer.c: make some functions non-static
> > ext4: decrypt all contiguous blocks in a page
> > ext4: decrypt all boundary blocks when doing buffered write
> > ext4: decrypt the block that needs to be partially zeroed
> > ext4: encrypt blocks whose size is less than page size
> > ext4: decrypt blocks whose size is less than page size
> > ext4: enable encryption for blocksize less than page size
> >
>
> Thanks for working on this! We've wanted this for a while (both so that it
> works on PowerPC with a 64K PAGE_SIZE, and so that people can't screw up their
> 1K blocksize filesystems by enabling the 'encrypt' flag), but no one ever got
> around to it. And it's not easy!
>
> First, just a few notes that didn't fit into my comments for the individual
> patches.
>
> Updating fscrypt_zeroout_range() seems to have been missed. Currently it
> assumes block_size == PAGE_SIZE so it will need to be updated too.
>
> The file Documentation/filesystems/fscrypt.rst will also need to be updated, at
> least to remove the following sentence: "Currently, only the case where the
> filesystem block size is equal to the system's page size (usually 4096 bytes) is
> supported.".
Thanks a lot for the review comments. I will make sure to take care of them
in the next version of the patchset.
>
> Also, on future versions of this patchset can you please also Cc
> [email protected]?
>
Sure, I will do that.
--
chandan