1 From 93147736b5b3a21bea24313bfc7a696829932009 Mon Sep 17 00:00:00 2001
2 From: Yu Zhao <yuzhao@google.com>
3 Date: Wed, 21 Dec 2022 21:19:05 -0700
4 Subject: [PATCH 27/29] mm: multi-gen LRU: clarify scan_control flags
6 Among the flags in scan_control:
7 1. sc->may_swap, which indicates swap constraint due to memsw.max, is
9 2. sc->proactive, which indicates reclaim by memory.reclaim, may not
10 opportunistically skip the aging path, since it is considered less
12 3. !(sc->gfp_mask & __GFP_IO), which indicates IO constraint, lowers
13 swappiness to prioritize file LRU, since clean file pages are more
15 4. sc->may_writepage and sc->may_unmap, which indicates opportunistic
16 reclaim, are rejected, since unmapped clean pages are already
17 prioritized. Scanning for more of them is likely futile and can
18 cause high reclaim latency when there is a large number of memcgs.
20 The rest are handled by the existing code.
22 Link: https://lkml.kernel.org/r/20221222041905.2431096-8-yuzhao@google.com
23 Signed-off-by: Yu Zhao <yuzhao@google.com>
24 Cc: Johannes Weiner <hannes@cmpxchg.org>
25 Cc: Jonathan Corbet <corbet@lwn.net>
26 Cc: Michael Larabel <Michael@MichaelLarabel.com>
27 Cc: Michal Hocko <mhocko@kernel.org>
28 Cc: Mike Rapoport <rppt@kernel.org>
29 Cc: Roman Gushchin <roman.gushchin@linux.dev>
30 Cc: Suren Baghdasaryan <surenb@google.com>
31 Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
33 mm/vmscan.c | 55 +++++++++++++++++++++++++++--------------------------
34 1 file changed, 28 insertions(+), 27 deletions(-)
36 diff --git a/mm/vmscan.c b/mm/vmscan.c
37 index 3d8e0665186c..4bcb93df316c 100644
40 @@ -2905,6 +2905,9 @@ static int get_swappiness(struct lruvec *lruvec, struct scan_control *sc)
41 struct mem_cgroup *memcg = lruvec_memcg(lruvec);
42 struct pglist_data *pgdat = lruvec_pgdat(lruvec);
47 if (!can_demote(pgdat->node_id, sc) &&
48 mem_cgroup_get_nr_swap_pages(memcg) < MIN_LRU_BATCH)
50 @@ -3952,7 +3955,7 @@ static void walk_mm(struct lruvec *lruvec, struct mm_struct *mm, struct lru_gen_
51 } while (err == -EAGAIN);
54 -static struct lru_gen_mm_walk *set_mm_walk(struct pglist_data *pgdat)
55 +static struct lru_gen_mm_walk *set_mm_walk(struct pglist_data *pgdat, bool force_alloc)
57 struct lru_gen_mm_walk *walk = current->reclaim_state->mm_walk;
59 @@ -3960,7 +3963,7 @@ static struct lru_gen_mm_walk *set_mm_walk(struct pglist_data *pgdat)
60 VM_WARN_ON_ONCE(walk);
62 walk = &pgdat->mm_walk;
63 - } else if (!pgdat && !walk) {
64 + } else if (!walk && force_alloc) {
65 VM_WARN_ON_ONCE(current_is_kswapd());
67 walk = kzalloc(sizeof(*walk), __GFP_HIGH | __GFP_NOMEMALLOC | __GFP_NOWARN);
68 @@ -4146,7 +4149,7 @@ static bool try_to_inc_max_seq(struct lruvec *lruvec, unsigned long max_seq,
72 - walk = set_mm_walk(NULL);
73 + walk = set_mm_walk(NULL, true);
75 success = iterate_mm_list_nowalk(lruvec, max_seq);
77 @@ -4215,8 +4218,6 @@ static bool lruvec_is_reclaimable(struct lruvec *lruvec, struct scan_control *sc
78 struct mem_cgroup *memcg = lruvec_memcg(lruvec);
79 DEFINE_MIN_SEQ(lruvec);
81 - VM_WARN_ON_ONCE(sc->memcg_low_reclaim);
83 /* see the comment on lru_gen_page */
84 gen = lru_gen_from_seq(min_seq[LRU_GEN_FILE]);
85 birth = READ_ONCE(lruvec->lrugen.timestamps[gen]);
86 @@ -4472,12 +4473,8 @@ static bool isolate_page(struct lruvec *lruvec, struct page *page, struct scan_c
90 - /* unmapping inhibited */
91 - if (!sc->may_unmap && page_mapped(page))
94 /* swapping inhibited */
95 - if (!(sc->may_writepage && (sc->gfp_mask & __GFP_IO)) &&
96 + if (!(sc->gfp_mask & __GFP_IO) &&
98 (PageAnon(page) && !PageSwapCache(page))))
100 @@ -4574,9 +4571,8 @@ static int scan_pages(struct lruvec *lruvec, struct scan_control *sc,
101 __count_vm_events(PGSCAN_ANON + type, isolated);
104 - * There might not be eligible pages due to reclaim_idx, may_unmap and
105 - * may_writepage. Check the remaining to prevent livelock if it's not
107 + * There might not be eligible pages due to reclaim_idx. Check the
108 + * remaining to prevent livelock if it's not making progress.
110 return isolated || !remaining ? scanned : 0;
112 @@ -4836,8 +4832,7 @@ static long get_nr_to_scan(struct lruvec *lruvec, struct scan_control *sc, bool
113 struct mem_cgroup *memcg = lruvec_memcg(lruvec);
114 DEFINE_MAX_SEQ(lruvec);
116 - if (mem_cgroup_below_min(memcg) ||
117 - (mem_cgroup_below_low(memcg) && !sc->memcg_low_reclaim))
118 + if (mem_cgroup_below_min(memcg))
121 if (!should_run_aging(lruvec, max_seq, sc, can_swap, &nr_to_scan))
122 @@ -4865,17 +4860,14 @@ static bool try_to_shrink_lruvec(struct lruvec *lruvec, struct scan_control *sc)
124 unsigned long scanned = 0;
125 unsigned long nr_to_reclaim = get_nr_to_reclaim(sc);
126 + int swappiness = get_swappiness(lruvec, sc);
128 + /* clean file pages are more likely to exist */
129 + if (swappiness && !(sc->gfp_mask & __GFP_IO))
137 - swappiness = get_swappiness(lruvec, sc);
138 - else if (!cgroup_reclaim(sc) && get_swappiness(lruvec, sc))
143 nr_to_scan = get_nr_to_scan(lruvec, sc, swappiness);
145 @@ -5005,12 +4997,13 @@ static void lru_gen_shrink_lruvec(struct lruvec *lruvec, struct scan_control *sc
146 struct blk_plug plug;
148 VM_WARN_ON_ONCE(global_reclaim(sc));
149 + VM_WARN_ON_ONCE(!sc->may_writepage || !sc->may_unmap);
153 blk_start_plug(&plug);
155 - set_mm_walk(lruvec_pgdat(lruvec));
156 + set_mm_walk(NULL, false);
158 if (try_to_shrink_lruvec(lruvec, sc))
159 lru_gen_rotate_memcg(lruvec, MEMCG_LRU_YOUNG);
160 @@ -5066,11 +5059,19 @@ static void lru_gen_shrink_node(struct pglist_data *pgdat, struct scan_control *
162 VM_WARN_ON_ONCE(!global_reclaim(sc));
165 + * Unmapped clean pages are already prioritized. Scanning for more of
166 + * them is likely futile and can cause high reclaim latency when there
167 + * is a large number of memcgs.
169 + if (!sc->may_writepage || !sc->may_unmap)
174 blk_start_plug(&plug);
176 - set_mm_walk(pgdat);
177 + set_mm_walk(pgdat, false);
179 set_initial_priority(pgdat, sc);
181 @@ -5088,7 +5089,7 @@ static void lru_gen_shrink_node(struct pglist_data *pgdat, struct scan_control *
184 blk_finish_plug(&plug);
187 /* kswapd should never fail */
188 pgdat->kswapd_failures = 0;
190 @@ -5656,7 +5657,7 @@ static ssize_t lru_gen_seq_write(struct file *file, const char __user *src,
191 set_task_reclaim_state(current, &sc.reclaim_state);
192 flags = memalloc_noreclaim_save();
193 blk_start_plug(&plug);
194 - if (!set_mm_walk(NULL)) {
195 + if (!set_mm_walk(NULL, true)) {