Currently cgroup_writeback_by_id calls mem_cgroup_wb_stats() to get
dirty pages for a memcg. However mem_cgroup_wb_stats() does a lot more
than just get the number of dirty pages. Just directly get the number of
dirty pages instead of calling mem_cgroup_wb_stats(). Also
cgroup_writeback_by_id() is only called for best-effort dirty flushing,
so remove the unused 'nr' parameter and no need to explicitly flush
memcg stats.
Signed-off-by: Shakeel Butt <[email protected]>
---
fs/fs-writeback.c | 20 +++++++++-----------
include/linux/memcontrol.h | 15 +++++++++++++++
include/linux/writeback.h | 2 +-
mm/memcontrol.c | 13 +------------
4 files changed, 26 insertions(+), 24 deletions(-)
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index 867984e778c3..35894a2dba75 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -1039,20 +1039,20 @@ static void bdi_split_work_to_wbs(struct backing_dev_info *bdi,
* cgroup_writeback_by_id - initiate cgroup writeback from bdi and memcg IDs
* @bdi_id: target bdi id
* @memcg_id: target memcg css id
- * @nr: number of pages to write, 0 for best-effort dirty flushing
* @reason: reason why some writeback work initiated
* @done: target wb_completion
*
* Initiate flush of the bdi_writeback identified by @bdi_id and @memcg_id
* with the specified parameters.
*/
-int cgroup_writeback_by_id(u64 bdi_id, int memcg_id, unsigned long nr,
+int cgroup_writeback_by_id(u64 bdi_id, int memcg_id,
enum wb_reason reason, struct wb_completion *done)
{
struct backing_dev_info *bdi;
struct cgroup_subsys_state *memcg_css;
struct bdi_writeback *wb;
struct wb_writeback_work *work;
+ unsigned long dirty;
int ret;
/* lookup bdi and memcg */
@@ -1081,24 +1081,22 @@ int cgroup_writeback_by_id(u64 bdi_id, int memcg_id, unsigned long nr,
}
/*
- * If @nr is zero, the caller is attempting to write out most of
+ * The caller is attempting to write out most of
* the currently dirty pages. Let's take the current dirty page
* count and inflate it by 25% which should be large enough to
* flush out most dirty pages while avoiding getting livelocked by
* concurrent dirtiers.
+ *
+ * BTW the memcg stats are flushed periodically and this is best-effort
+ * estimation, so some potential error is ok.
*/
- if (!nr) {
- unsigned long filepages, headroom, dirty, writeback;
-
- mem_cgroup_wb_stats(wb, &filepages, &headroom, &dirty,
- &writeback);
- nr = dirty * 10 / 8;
- }
+ dirty = memcg_page_state(mem_cgroup_from_css(memcg_css), NR_FILE_DIRTY);
+ dirty = dirty * 10 / 8;
/* issue the writeback work */
work = kzalloc(sizeof(*work), GFP_NOWAIT | __GFP_NOWARN);
if (work) {
- work->nr_pages = nr;
+ work->nr_pages = dirty;
work->sync_mode = WB_SYNC_NONE;
work->range_cyclic = 1;
work->reason = reason;
diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index b4c6b613e162..7028d8e4a3d7 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -989,6 +989,16 @@ static inline void mod_memcg_state(struct mem_cgroup *memcg,
local_irq_restore(flags);
}
+static inline unsigned long memcg_page_state(struct mem_cgroup *memcg, int idx)
+{
+ long x = READ_ONCE(memcg->vmstats.state[idx]);
+#ifdef CONFIG_SMP
+ if (x < 0)
+ x = 0;
+#endif
+ return x;
+}
+
static inline unsigned long lruvec_page_state(struct lruvec *lruvec,
enum node_stat_item idx)
{
@@ -1444,6 +1454,11 @@ static inline void mod_memcg_state(struct mem_cgroup *memcg,
{
}
+static inline unsigned long memcg_page_state(struct mem_cgroup *memcg, int idx)
+{
+ return 0;
+}
+
static inline unsigned long lruvec_page_state(struct lruvec *lruvec,
enum node_stat_item idx)
{
diff --git a/include/linux/writeback.h b/include/linux/writeback.h
index 1f34ddf284dc..109e0dcd1d21 100644
--- a/include/linux/writeback.h
+++ b/include/linux/writeback.h
@@ -218,7 +218,7 @@ void wbc_attach_and_unlock_inode(struct writeback_control *wbc,
void wbc_detach_inode(struct writeback_control *wbc);
void wbc_account_cgroup_owner(struct writeback_control *wbc, struct page *page,
size_t bytes);
-int cgroup_writeback_by_id(u64 bdi_id, int memcg_id, unsigned long nr_pages,
+int cgroup_writeback_by_id(u64 bdi_id, int memcg_id,
enum wb_reason reason, struct wb_completion *done);
void cgroup_writeback_umount(void);
bool cleanup_offline_cgwb(struct bdi_writeback *wb);
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 35bb5f8f9ea8..6580c2381a3e 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -631,17 +631,6 @@ void __mod_memcg_state(struct mem_cgroup *memcg, int idx, int val)
cgroup_rstat_updated(memcg->css.cgroup, smp_processor_id());
}
-/* idx can be of type enum memcg_stat_item or node_stat_item. */
-static unsigned long memcg_page_state(struct mem_cgroup *memcg, int idx)
-{
- long x = READ_ONCE(memcg->vmstats.state[idx]);
-#ifdef CONFIG_SMP
- if (x < 0)
- x = 0;
-#endif
- return x;
-}
-
/* idx can be of type enum memcg_stat_item or node_stat_item. */
static unsigned long memcg_page_state_local(struct mem_cgroup *memcg, int idx)
{
@@ -4609,7 +4598,7 @@ void mem_cgroup_flush_foreign(struct bdi_writeback *wb)
atomic_read(&frn->done.cnt) == 1) {
frn->at = 0;
trace_flush_foreign(wb, frn->bdi_id, frn->memcg_id);
- cgroup_writeback_by_id(frn->bdi_id, frn->memcg_id, 0,
+ cgroup_writeback_by_id(frn->bdi_id, frn->memcg_id,
WB_REASON_FOREIGN_FLUSH,
&frn->done);
}
--
2.32.0.432.gabb21c7263-goog
On Thu 22-07-21 11:26:27, Shakeel Butt wrote:
> Currently cgroup_writeback_by_id calls mem_cgroup_wb_stats() to get
> dirty pages for a memcg. However mem_cgroup_wb_stats() does a lot more
> than just get the number of dirty pages. Just directly get the number of
> dirty pages instead of calling mem_cgroup_wb_stats(). Also
> cgroup_writeback_by_id() is only called for best-effort dirty flushing,
> so remove the unused 'nr' parameter and no need to explicitly flush
> memcg stats.
>
> Signed-off-by: Shakeel Butt <[email protected]>
Looks good to me. Feel free to add:
Reviewed-by: Jan Kara <[email protected]>
Honza
> ---
> fs/fs-writeback.c | 20 +++++++++-----------
> include/linux/memcontrol.h | 15 +++++++++++++++
> include/linux/writeback.h | 2 +-
> mm/memcontrol.c | 13 +------------
> 4 files changed, 26 insertions(+), 24 deletions(-)
>
> diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
> index 867984e778c3..35894a2dba75 100644
> --- a/fs/fs-writeback.c
> +++ b/fs/fs-writeback.c
> @@ -1039,20 +1039,20 @@ static void bdi_split_work_to_wbs(struct backing_dev_info *bdi,
> * cgroup_writeback_by_id - initiate cgroup writeback from bdi and memcg IDs
> * @bdi_id: target bdi id
> * @memcg_id: target memcg css id
> - * @nr: number of pages to write, 0 for best-effort dirty flushing
> * @reason: reason why some writeback work initiated
> * @done: target wb_completion
> *
> * Initiate flush of the bdi_writeback identified by @bdi_id and @memcg_id
> * with the specified parameters.
> */
> -int cgroup_writeback_by_id(u64 bdi_id, int memcg_id, unsigned long nr,
> +int cgroup_writeback_by_id(u64 bdi_id, int memcg_id,
> enum wb_reason reason, struct wb_completion *done)
> {
> struct backing_dev_info *bdi;
> struct cgroup_subsys_state *memcg_css;
> struct bdi_writeback *wb;
> struct wb_writeback_work *work;
> + unsigned long dirty;
> int ret;
>
> /* lookup bdi and memcg */
> @@ -1081,24 +1081,22 @@ int cgroup_writeback_by_id(u64 bdi_id, int memcg_id, unsigned long nr,
> }
>
> /*
> - * If @nr is zero, the caller is attempting to write out most of
> + * The caller is attempting to write out most of
> * the currently dirty pages. Let's take the current dirty page
> * count and inflate it by 25% which should be large enough to
> * flush out most dirty pages while avoiding getting livelocked by
> * concurrent dirtiers.
> + *
> + * BTW the memcg stats are flushed periodically and this is best-effort
> + * estimation, so some potential error is ok.
> */
> - if (!nr) {
> - unsigned long filepages, headroom, dirty, writeback;
> -
> - mem_cgroup_wb_stats(wb, &filepages, &headroom, &dirty,
> - &writeback);
> - nr = dirty * 10 / 8;
> - }
> + dirty = memcg_page_state(mem_cgroup_from_css(memcg_css), NR_FILE_DIRTY);
> + dirty = dirty * 10 / 8;
>
> /* issue the writeback work */
> work = kzalloc(sizeof(*work), GFP_NOWAIT | __GFP_NOWARN);
> if (work) {
> - work->nr_pages = nr;
> + work->nr_pages = dirty;
> work->sync_mode = WB_SYNC_NONE;
> work->range_cyclic = 1;
> work->reason = reason;
> diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
> index b4c6b613e162..7028d8e4a3d7 100644
> --- a/include/linux/memcontrol.h
> +++ b/include/linux/memcontrol.h
> @@ -989,6 +989,16 @@ static inline void mod_memcg_state(struct mem_cgroup *memcg,
> local_irq_restore(flags);
> }
>
> +static inline unsigned long memcg_page_state(struct mem_cgroup *memcg, int idx)
> +{
> + long x = READ_ONCE(memcg->vmstats.state[idx]);
> +#ifdef CONFIG_SMP
> + if (x < 0)
> + x = 0;
> +#endif
> + return x;
> +}
> +
> static inline unsigned long lruvec_page_state(struct lruvec *lruvec,
> enum node_stat_item idx)
> {
> @@ -1444,6 +1454,11 @@ static inline void mod_memcg_state(struct mem_cgroup *memcg,
> {
> }
>
> +static inline unsigned long memcg_page_state(struct mem_cgroup *memcg, int idx)
> +{
> + return 0;
> +}
> +
> static inline unsigned long lruvec_page_state(struct lruvec *lruvec,
> enum node_stat_item idx)
> {
> diff --git a/include/linux/writeback.h b/include/linux/writeback.h
> index 1f34ddf284dc..109e0dcd1d21 100644
> --- a/include/linux/writeback.h
> +++ b/include/linux/writeback.h
> @@ -218,7 +218,7 @@ void wbc_attach_and_unlock_inode(struct writeback_control *wbc,
> void wbc_detach_inode(struct writeback_control *wbc);
> void wbc_account_cgroup_owner(struct writeback_control *wbc, struct page *page,
> size_t bytes);
> -int cgroup_writeback_by_id(u64 bdi_id, int memcg_id, unsigned long nr_pages,
> +int cgroup_writeback_by_id(u64 bdi_id, int memcg_id,
> enum wb_reason reason, struct wb_completion *done);
> void cgroup_writeback_umount(void);
> bool cleanup_offline_cgwb(struct bdi_writeback *wb);
> diff --git a/mm/memcontrol.c b/mm/memcontrol.c
> index 35bb5f8f9ea8..6580c2381a3e 100644
> --- a/mm/memcontrol.c
> +++ b/mm/memcontrol.c
> @@ -631,17 +631,6 @@ void __mod_memcg_state(struct mem_cgroup *memcg, int idx, int val)
> cgroup_rstat_updated(memcg->css.cgroup, smp_processor_id());
> }
>
> -/* idx can be of type enum memcg_stat_item or node_stat_item. */
> -static unsigned long memcg_page_state(struct mem_cgroup *memcg, int idx)
> -{
> - long x = READ_ONCE(memcg->vmstats.state[idx]);
> -#ifdef CONFIG_SMP
> - if (x < 0)
> - x = 0;
> -#endif
> - return x;
> -}
> -
> /* idx can be of type enum memcg_stat_item or node_stat_item. */
> static unsigned long memcg_page_state_local(struct mem_cgroup *memcg, int idx)
> {
> @@ -4609,7 +4598,7 @@ void mem_cgroup_flush_foreign(struct bdi_writeback *wb)
> atomic_read(&frn->done.cnt) == 1) {
> frn->at = 0;
> trace_flush_foreign(wb, frn->bdi_id, frn->memcg_id);
> - cgroup_writeback_by_id(frn->bdi_id, frn->memcg_id, 0,
> + cgroup_writeback_by_id(frn->bdi_id, frn->memcg_id,
> WB_REASON_FOREIGN_FLUSH,
> &frn->done);
> }
> --
> 2.32.0.432.gabb21c7263-goog
>
--
Jan Kara <[email protected]>
SUSE Labs, CR