Make alloc_page_buffers support circular buffer list and initialise
b_state field.
Optimize the performance by removing the buffer list traversal to create
circular buffer list.
Signed-off-by: Sean Fu <[email protected]>
---
drivers/md/bitmap.c | 2 +-
fs/buffer.c | 37 +++++++++++++++----------------------
fs/ntfs/aops.c | 2 +-
fs/ntfs/mft.c | 2 +-
include/linux/buffer_head.h | 2 +-
5 files changed, 19 insertions(+), 26 deletions(-)
diff --git a/drivers/md/bitmap.c b/drivers/md/bitmap.c
index f4eace5..615a46e 100644
--- a/drivers/md/bitmap.c
+++ b/drivers/md/bitmap.c
@@ -367,7 +367,7 @@ static int read_page(struct file *file, unsigned long index,
pr_debug("read bitmap file (%dB @ %llu)\n", (int)PAGE_SIZE,
(unsigned long long)index << PAGE_SHIFT);
- bh = alloc_page_buffers(page, 1<<inode->i_blkbits, 0);
+ bh = alloc_page_buffers(page, 1<<inode->i_blkbits, 0, 0, 0);
if (!bh) {
ret = -ENOMEM;
goto out;
diff --git a/fs/buffer.c b/fs/buffer.c
index 161be58..8111eca 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -864,9 +864,9 @@ int remove_inode_buffers(struct inode *inode)
* which may not fail from ordinary buffer allocations.
*/
struct buffer_head *alloc_page_buffers(struct page *page, unsigned long size,
- int retry)
+ int retry, int circular, unsigned long b_state)
{
- struct buffer_head *bh, *head;
+ struct buffer_head *bh, *head, *tail;
long offset;
try_again:
@@ -879,13 +879,21 @@ struct buffer_head *alloc_page_buffers(struct page *page, unsigned long size,
bh->b_this_page = head;
bh->b_blocknr = -1;
- head = bh;
+ if (head == NULL)
+ tail = bh;
+
+ head = bh;
+ bh->b_state = b_state;
bh->b_size = size;
/* Link the buffer to its page */
set_bh_page(bh, page, offset);
}
+
+ if (circular)
+ tail->b_this_page = head;
+
return head;
/*
* In case anything failed, we just free everything we got.
@@ -922,14 +930,6 @@ EXPORT_SYMBOL_GPL(alloc_page_buffers);
static inline void
link_dev_buffers(struct page *page, struct buffer_head *head)
{
- struct buffer_head *bh, *tail;
-
- bh = head;
- do {
- tail = bh;
- bh = bh->b_this_page;
- } while (bh);
- tail->b_this_page = head;
attach_page_buffers(page, head);
}
@@ -1024,7 +1024,7 @@ grow_dev_page(struct block_device *bdev, sector_t block,
/*
* Allocate some buffers for this page
*/
- bh = alloc_page_buffers(page, size, 0);
+ bh = alloc_page_buffers(page, size, 0, 1, 0);
if (!bh)
goto failed;
@@ -1578,16 +1578,9 @@ EXPORT_SYMBOL(block_invalidatepage);
void create_empty_buffers(struct page *page,
unsigned long blocksize, unsigned long b_state)
{
- struct buffer_head *bh, *head, *tail;
+ struct buffer_head *bh, *head;
- head = alloc_page_buffers(page, blocksize, 1);
- bh = head;
- do {
- bh->b_state |= b_state;
- tail = bh;
- bh = bh->b_this_page;
- } while (bh);
- tail->b_this_page = head;
+ head = alloc_page_buffers(page, blocksize, 1, 1, b_state);
spin_lock(&page->mapping->private_lock);
if (PageUptodate(page) || PageDirty(page)) {
@@ -2642,7 +2635,7 @@ int nobh_write_begin(struct address_space *mapping,
* Be careful: the buffer linked list is a NULL terminated one, rather
* than the circular one we're used to.
*/
- head = alloc_page_buffers(page, blocksize, 0);
+ head = alloc_page_buffers(page, blocksize, 0, 0, 0);
if (!head) {
ret = -ENOMEM;
goto out_release;
diff --git a/fs/ntfs/aops.c b/fs/ntfs/aops.c
index cc91856..e692142 100644
--- a/fs/ntfs/aops.c
+++ b/fs/ntfs/aops.c
@@ -1739,7 +1739,7 @@ void mark_ntfs_record_dirty(struct page *page, const unsigned int ofs) {
spin_lock(&mapping->private_lock);
if (unlikely(!page_has_buffers(page))) {
spin_unlock(&mapping->private_lock);
- bh = head = alloc_page_buffers(page, bh_size, 1);
+ bh = head = alloc_page_buffers(page, bh_size, 1, 0, 0);
spin_lock(&mapping->private_lock);
if (likely(!page_has_buffers(page))) {
struct buffer_head *tail;
diff --git a/fs/ntfs/mft.c b/fs/ntfs/mft.c
index b6f4021..175a02b 100644
--- a/fs/ntfs/mft.c
+++ b/fs/ntfs/mft.c
@@ -507,7 +507,7 @@ int ntfs_sync_mft_mirror(ntfs_volume *vol, const unsigned long mft_no,
if (unlikely(!page_has_buffers(page))) {
struct buffer_head *tail;
- bh = head = alloc_page_buffers(page, blocksize, 1);
+ bh = head = alloc_page_buffers(page, blocksize, 1, 0, 0);
do {
set_buffer_uptodate(bh);
tail = bh;
diff --git a/include/linux/buffer_head.h b/include/linux/buffer_head.h
index bd029e52..9a29826 100644
--- a/include/linux/buffer_head.h
+++ b/include/linux/buffer_head.h
@@ -155,7 +155,7 @@ void set_bh_page(struct buffer_head *bh,
struct page *page, unsigned long offset);
int try_to_free_buffers(struct page *);
struct buffer_head *alloc_page_buffers(struct page *page, unsigned long size,
- int retry);
+ int retry, int circular, unsigned long b_state);
void create_empty_buffers(struct page *, unsigned long,
unsigned long b_state);
void end_buffer_read_sync(struct buffer_head *bh, int uptodate);
--
2.6.2
On Mon 19-06-17 21:01:36, Sean Fu wrote:
> Make alloc_page_buffers support circular buffer list and initialise
> b_state field.
> Optimize the performance by removing the buffer list traversal to create
> circular buffer list.
>
> Signed-off-by: Sean Fu <[email protected]>
IMHO this has unmeasurable performance gain and complicates the code. So I
don't think this is really worth it...
Honza
> ---
> drivers/md/bitmap.c | 2 +-
> fs/buffer.c | 37 +++++++++++++++----------------------
> fs/ntfs/aops.c | 2 +-
> fs/ntfs/mft.c | 2 +-
> include/linux/buffer_head.h | 2 +-
> 5 files changed, 19 insertions(+), 26 deletions(-)
>
> diff --git a/drivers/md/bitmap.c b/drivers/md/bitmap.c
> index f4eace5..615a46e 100644
> --- a/drivers/md/bitmap.c
> +++ b/drivers/md/bitmap.c
> @@ -367,7 +367,7 @@ static int read_page(struct file *file, unsigned long index,
> pr_debug("read bitmap file (%dB @ %llu)\n", (int)PAGE_SIZE,
> (unsigned long long)index << PAGE_SHIFT);
>
> - bh = alloc_page_buffers(page, 1<<inode->i_blkbits, 0);
> + bh = alloc_page_buffers(page, 1<<inode->i_blkbits, 0, 0, 0);
> if (!bh) {
> ret = -ENOMEM;
> goto out;
> diff --git a/fs/buffer.c b/fs/buffer.c
> index 161be58..8111eca 100644
> --- a/fs/buffer.c
> +++ b/fs/buffer.c
> @@ -864,9 +864,9 @@ int remove_inode_buffers(struct inode *inode)
> * which may not fail from ordinary buffer allocations.
> */
> struct buffer_head *alloc_page_buffers(struct page *page, unsigned long size,
> - int retry)
> + int retry, int circular, unsigned long b_state)
> {
> - struct buffer_head *bh, *head;
> + struct buffer_head *bh, *head, *tail;
> long offset;
>
> try_again:
> @@ -879,13 +879,21 @@ struct buffer_head *alloc_page_buffers(struct page *page, unsigned long size,
>
> bh->b_this_page = head;
> bh->b_blocknr = -1;
> - head = bh;
>
> + if (head == NULL)
> + tail = bh;
> +
> + head = bh;
> + bh->b_state = b_state;
> bh->b_size = size;
>
> /* Link the buffer to its page */
> set_bh_page(bh, page, offset);
> }
> +
> + if (circular)
> + tail->b_this_page = head;
> +
> return head;
> /*
> * In case anything failed, we just free everything we got.
> @@ -922,14 +930,6 @@ EXPORT_SYMBOL_GPL(alloc_page_buffers);
> static inline void
> link_dev_buffers(struct page *page, struct buffer_head *head)
> {
> - struct buffer_head *bh, *tail;
> -
> - bh = head;
> - do {
> - tail = bh;
> - bh = bh->b_this_page;
> - } while (bh);
> - tail->b_this_page = head;
> attach_page_buffers(page, head);
> }
>
> @@ -1024,7 +1024,7 @@ grow_dev_page(struct block_device *bdev, sector_t block,
> /*
> * Allocate some buffers for this page
> */
> - bh = alloc_page_buffers(page, size, 0);
> + bh = alloc_page_buffers(page, size, 0, 1, 0);
> if (!bh)
> goto failed;
>
> @@ -1578,16 +1578,9 @@ EXPORT_SYMBOL(block_invalidatepage);
> void create_empty_buffers(struct page *page,
> unsigned long blocksize, unsigned long b_state)
> {
> - struct buffer_head *bh, *head, *tail;
> + struct buffer_head *bh, *head;
>
> - head = alloc_page_buffers(page, blocksize, 1);
> - bh = head;
> - do {
> - bh->b_state |= b_state;
> - tail = bh;
> - bh = bh->b_this_page;
> - } while (bh);
> - tail->b_this_page = head;
> + head = alloc_page_buffers(page, blocksize, 1, 1, b_state);
>
> spin_lock(&page->mapping->private_lock);
> if (PageUptodate(page) || PageDirty(page)) {
> @@ -2642,7 +2635,7 @@ int nobh_write_begin(struct address_space *mapping,
> * Be careful: the buffer linked list is a NULL terminated one, rather
> * than the circular one we're used to.
> */
> - head = alloc_page_buffers(page, blocksize, 0);
> + head = alloc_page_buffers(page, blocksize, 0, 0, 0);
> if (!head) {
> ret = -ENOMEM;
> goto out_release;
> diff --git a/fs/ntfs/aops.c b/fs/ntfs/aops.c
> index cc91856..e692142 100644
> --- a/fs/ntfs/aops.c
> +++ b/fs/ntfs/aops.c
> @@ -1739,7 +1739,7 @@ void mark_ntfs_record_dirty(struct page *page, const unsigned int ofs) {
> spin_lock(&mapping->private_lock);
> if (unlikely(!page_has_buffers(page))) {
> spin_unlock(&mapping->private_lock);
> - bh = head = alloc_page_buffers(page, bh_size, 1);
> + bh = head = alloc_page_buffers(page, bh_size, 1, 0, 0);
> spin_lock(&mapping->private_lock);
> if (likely(!page_has_buffers(page))) {
> struct buffer_head *tail;
> diff --git a/fs/ntfs/mft.c b/fs/ntfs/mft.c
> index b6f4021..175a02b 100644
> --- a/fs/ntfs/mft.c
> +++ b/fs/ntfs/mft.c
> @@ -507,7 +507,7 @@ int ntfs_sync_mft_mirror(ntfs_volume *vol, const unsigned long mft_no,
> if (unlikely(!page_has_buffers(page))) {
> struct buffer_head *tail;
>
> - bh = head = alloc_page_buffers(page, blocksize, 1);
> + bh = head = alloc_page_buffers(page, blocksize, 1, 0, 0);
> do {
> set_buffer_uptodate(bh);
> tail = bh;
> diff --git a/include/linux/buffer_head.h b/include/linux/buffer_head.h
> index bd029e52..9a29826 100644
> --- a/include/linux/buffer_head.h
> +++ b/include/linux/buffer_head.h
> @@ -155,7 +155,7 @@ void set_bh_page(struct buffer_head *bh,
> struct page *page, unsigned long offset);
> int try_to_free_buffers(struct page *);
> struct buffer_head *alloc_page_buffers(struct page *page, unsigned long size,
> - int retry);
> + int retry, int circular, unsigned long b_state);
> void create_empty_buffers(struct page *, unsigned long,
> unsigned long b_state);
> void end_buffer_read_sync(struct buffer_head *bh, int uptodate);
> --
> 2.6.2
>
--
Jan Kara <[email protected]>
SUSE Labs, CR
On Mon, Jun 19, 2017 at 09:01:36PM +0800, Sean Fu wrote:
> Make alloc_page_buffers support circular buffer list and initialise
> b_state field.
> Optimize the performance by removing the buffer list traversal to create
> circular buffer list.
> - bh = head = alloc_page_buffers(page, blocksize, 1);
> + bh = head = alloc_page_buffers(page, blocksize, 1, 0, 0);
Frankly, I don't like that change of calling conventions; it's very easy to
mess the order of arguments when using interfaces like that and it's hell
to find when trying to debug the resulting mess.
Do you really get an observable change in performance? What loads are
triggering it?
Hi Sean,
[auto build test WARNING on linus/master]
[also build test WARNING on v4.12-rc6 next-20170619]
[if your patch is applied to the wrong git tree, please drop us a note to help improve the system]
url: https://github.com/0day-ci/linux/commits/Sean-Fu/fs-buffer-Modify-alloc_page_buffers/20170620-012328
config: x86_64-randconfig-x015-201725 (attached as .config)
compiler: gcc-6 (Debian 6.2.0-3) 6.2.0 20160901
reproduce:
# save the attached .config to linux build tree
make ARCH=x86_64
Note: it may well be a FALSE warning. FWIW you are at least aware of it now.
http://gcc.gnu.org/wiki/Better_Uninitialized_Warnings
All warnings (new ones prefixed by >>):
fs/buffer.c: In function 'alloc_page_buffers':
>> fs/buffer.c:895:21: warning: 'tail' may be used uninitialized in this function [-Wmaybe-uninitialized]
tail->b_this_page = head;
~~~~~~~~~~~~~~~~~~^~~~~~
vim +/tail +895 fs/buffer.c
879
880 bh->b_this_page = head;
881 bh->b_blocknr = -1;
882
883 if (head == NULL)
884 tail = bh;
885
886 head = bh;
887 bh->b_state = b_state;
888 bh->b_size = size;
889
890 /* Link the buffer to its page */
891 set_bh_page(bh, page, offset);
892 }
893
894 if (circular)
> 895 tail->b_this_page = head;
896
897 return head;
898 /*
899 * In case anything failed, we just free everything we got.
900 */
901 no_grow:
902 if (head) {
903 do {
---
0-DAY kernel test infrastructure Open Source Technology Center
https://lists.01.org/pipermail/kbuild-all Intel Corporation
On Mon, Jun 19, 2017 at 05:03:16PM +0100, Al Viro wrote:
> On Mon, Jun 19, 2017 at 09:01:36PM +0800, Sean Fu wrote:
> > Make alloc_page_buffers support circular buffer list and initialise
> > b_state field.
> > Optimize the performance by removing the buffer list traversal to create
> > circular buffer list.
>
> > - bh = head = alloc_page_buffers(page, blocksize, 1);
> > + bh = head = alloc_page_buffers(page, blocksize, 1, 0, 0);
>
> Frankly, I don't like that change of calling conventions; it's very easy to
> mess the order of arguments when using interfaces like that and it's hell
> to find when trying to debug the resulting mess.
>
> Do you really get an observable change in performance? What loads are
> triggering it?
Yes, I have got the performance change with ext3 file system which block
size is 1024 bytes. It has at least %5 performance improvement.
I have found the performance improvements when writting/reading a 800M
size of file on ext3 file system with 1024 block size.
In this case, Each page has four buffer. In other word, the buffer list
has 4 elements.
I have compared the time that the process spent in kernel mode.
Improvements via this patch
Before After
Write:
0m5.604s 0m4.116s
0m4.408s 0m3.924s
0m4.184s 0m3.708s
0m4.352s 0m3.656s
0m4.380s 0m3.608s
0m4.240s 0m3.612s
0m4.460s 0m3.552s
0m4.072s 0m3.832s
0m4.300s 0m3.736s
0m4.400s 0m3.480s
Read:
0m3.128s 0m3.036s
0m2.976s 0m2.568s
0m3.384s 0m2.544s
0m3.112s 0m2.752s
0m2.924s 0m2.684s
0m3.084s 0m2.856s
0m3.348s 0m2.576s
0m3.000s 0m2.968s
0m3.012s 0m2.560s
0m2.768s 0m2.752s
Reproduce steps:
1 mkfs.ext3 -b 1024 /dev/sdb1
2 ./test_write.sh ./writetest 10
Test shell script:
#!/bin/bash
i=$2;
while test $((i)) -ge 1; do
mount /dev/sdb1 /mnt/sdb1/
time $1 -b 800 -o /mnt/sdb1/fileout
rm /mnt/sdb1/fileout
sync
sleep 1
umount /mnt/sdb1/
echo "aaa"
i=$i-1
done
The attachment are the code for writetest and test result.
On Mon, Jun 19, 2017 at 05:03:16PM +0100, Al Viro wrote:
> On Mon, Jun 19, 2017 at 09:01:36PM +0800, Sean Fu wrote:
> > Make alloc_page_buffers support circular buffer list and initialise
> > b_state field.
> > Optimize the performance by removing the buffer list traversal to create
> > circular buffer list.
>
> > - bh = head = alloc_page_buffers(page, blocksize, 1);
> > + bh = head = alloc_page_buffers(page, blocksize, 1, 0, 0);
>
> Frankly, I don't like that change of calling conventions; it's very easy to
> mess the order of arguments when using interfaces like that and it's hell
> to find when trying to debug the resulting mess.
>
> Do you really get an observable change in performance? What loads are
> triggering it?
Sorry for my mistake.
Infact, The time of writting a large file depends on saveral other
factors, eg system workload.
Yesterday, I tried to test the time of single calling create_empty_buffers by kretprobe
and found that the performance difference is too small to measure it.
before:
[ 944.632027] create_empty_buffers returned 878736736 and took 2160 ns
to execute
[ 944.632286] create_empty_buffers returned 878962832 and took 451 ns
to execute
[ 944.632302] create_empty_buffers returned 878962016 and took 226 ns
to execute
[ 944.632728] create_empty_buffers returned 878962832 and took 235 ns
to execute
[ 944.633105] create_empty_buffers returned 878962832 and took 167 ns
to execute
[ 944.633421] create_empty_buffers returned 878962832 and took 160 ns
to execute
after:
[39209.076519] create_empty_buffers returned 383804768 and took 1666 ns
to execute
[39209.077032] create_empty_buffers returned 383804768 and took 366 ns
to execute
[39209.077120] create_empty_buffers returned 558412336 and took 179 ns
to execute
[39209.077127] create_empty_buffers returned 558413152 and took 148 ns
to execute
[39209.077525] create_empty_buffers returned 558412336 and took 201 ns
to execute
[39209.078255] create_empty_buffers returned 814328768 and took 880 ns
to execute
[39209.078498] create_empty_buffers returned 558412336 and took 564 ns
to execute
[39209.078737] create_empty_buffers returned 558413152 and took 196 ns
to execute
This patch also complicates code.
Please ignore it.
Thanks all of you.