0a0b439e25f7098f77d95563a4fd4aec42f466f2
[openwrt/staging/hauke.git] / target / linux / generic / backport-6.1 / 020-v6.3-10-UPSTREAM-mm-add-vma_has_recency.patch
1 From 70d216c71ff5c5b17dd1da6294f97b91fb6aba7a Mon Sep 17 00:00:00 2001
2 From: Yu Zhao <yuzhao@google.com>
3 Date: Fri, 30 Dec 2022 14:52:51 -0700
4 Subject: [PATCH 10/19] UPSTREAM: mm: add vma_has_recency()
5
6 Add vma_has_recency() to indicate whether a VMA may exhibit temporal
7 locality that the LRU algorithm relies on.
8
9 This function returns false for VMAs marked by VM_SEQ_READ or
10 VM_RAND_READ. While the former flag indicates linear access, i.e., a
11 special case of spatial locality, both flags indicate a lack of temporal
12 locality, i.e., the reuse of an area within a relatively small duration.
13
14 "Recency" is chosen over "locality" to avoid confusion between temporal
15 and spatial localities.
16
17 Before this patch, the active/inactive LRU only ignored the accessed bit
18 from VMAs marked by VM_SEQ_READ. After this patch, the active/inactive
19 LRU and MGLRU share the same logic: they both ignore the accessed bit if
20 vma_has_recency() returns false.
21
22 For the active/inactive LRU, the following fio test showed a [6, 8]%
23 increase in IOPS when randomly accessing mapped files under memory
24 pressure.
25
26 kb=$(awk '/MemTotal/ { print $2 }' /proc/meminfo)
27 kb=$((kb - 8*1024*1024))
28
29 modprobe brd rd_nr=1 rd_size=$kb
30 dd if=/dev/zero of=/dev/ram0 bs=1M
31
32 mkfs.ext4 /dev/ram0
33 mount /dev/ram0 /mnt/
34 swapoff -a
35
36 fio --name=test --directory=/mnt/ --ioengine=mmap --numjobs=8 \
37 --size=8G --rw=randrw --time_based --runtime=10m \
38 --group_reporting
39
40 The discussion that led to this patch is here [1]. Additional test
41 results are available in that thread.
42
43 [1] https://lore.kernel.org/r/Y31s%2FK8T85jh05wH@google.com/
44
45 Link: https://lkml.kernel.org/r/20221230215252.2628425-1-yuzhao@google.com
46 Change-Id: I291dcb795197659e40e46539cd32b857677c34ad
47 Signed-off-by: Yu Zhao <yuzhao@google.com>
48 Cc: Alexander Viro <viro@zeniv.linux.org.uk>
49 Cc: Andrea Righi <andrea.righi@canonical.com>
50 Cc: Johannes Weiner <hannes@cmpxchg.org>
51 Cc: Michael Larabel <Michael@MichaelLarabel.com>
52 Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
53 (cherry picked from commit 8788f6781486769d9598dcaedc3fe0eb12fc3e59)
54 Bug: 274865848
55 Signed-off-by: T.J. Mercier <tjmercier@google.com>
56 ---
57 include/linux/mm_inline.h | 8 ++++++++
58 mm/memory.c | 7 +++----
59 mm/rmap.c | 42 +++++++++++++++++----------------------
60 mm/vmscan.c | 5 ++++-
61 4 files changed, 33 insertions(+), 29 deletions(-)
62
63 diff --git a/include/linux/mm_inline.h b/include/linux/mm_inline.h
64 index c1fd3922dc5dd..7bb2e5f94734c 100644
65 --- a/include/linux/mm_inline.h
66 +++ b/include/linux/mm_inline.h
67 @@ -595,4 +595,12 @@ pte_install_uffd_wp_if_needed(struct vm_area_struct *vma, unsigned long addr,
68 #endif
69 }
70
71 +static inline bool vma_has_recency(struct vm_area_struct *vma)
72 +{
73 + if (vma->vm_flags & (VM_SEQ_READ | VM_RAND_READ))
74 + return false;
75 +
76 + return true;
77 +}
78 +
79 #endif
80 diff --git a/mm/memory.c b/mm/memory.c
81 index 747b7ea30f890..c2f48f8003c2e 100644
82 --- a/mm/memory.c
83 +++ b/mm/memory.c
84 @@ -1435,8 +1435,7 @@ static unsigned long zap_pte_range(struct mmu_gather *tlb,
85 force_flush = 1;
86 set_page_dirty(page);
87 }
88 - if (pte_young(ptent) &&
89 - likely(!(vma->vm_flags & VM_SEQ_READ)))
90 + if (pte_young(ptent) && likely(vma_has_recency(vma)))
91 mark_page_accessed(page);
92 }
93 rss[mm_counter(page)]--;
94 @@ -5170,8 +5169,8 @@ static inline void mm_account_fault(struct pt_regs *regs,
95 #ifdef CONFIG_LRU_GEN
96 static void lru_gen_enter_fault(struct vm_area_struct *vma)
97 {
98 - /* the LRU algorithm doesn't apply to sequential or random reads */
99 - current->in_lru_fault = !(vma->vm_flags & (VM_SEQ_READ | VM_RAND_READ));
100 + /* the LRU algorithm only applies to accesses with recency */
101 + current->in_lru_fault = vma_has_recency(vma);
102 }
103
104 static void lru_gen_exit_fault(void)
105 diff --git a/mm/rmap.c b/mm/rmap.c
106 index 7da2d8d097d9b..825dac3caa1e5 100644
107 --- a/mm/rmap.c
108 +++ b/mm/rmap.c
109 @@ -823,25 +823,14 @@ static bool folio_referenced_one(struct folio *folio,
110 }
111
112 if (pvmw.pte) {
113 - if (lru_gen_enabled() && pte_young(*pvmw.pte) &&
114 - !(vma->vm_flags & (VM_SEQ_READ | VM_RAND_READ))) {
115 + if (lru_gen_enabled() && pte_young(*pvmw.pte)) {
116 lru_gen_look_around(&pvmw);
117 referenced++;
118 }
119
120 if (ptep_clear_flush_young_notify(vma, address,
121 - pvmw.pte)) {
122 - /*
123 - * Don't treat a reference through
124 - * a sequentially read mapping as such.
125 - * If the folio has been used in another mapping,
126 - * we will catch it; if this other mapping is
127 - * already gone, the unmap path will have set
128 - * the referenced flag or activated the folio.
129 - */
130 - if (likely(!(vma->vm_flags & VM_SEQ_READ)))
131 - referenced++;
132 - }
133 + pvmw.pte))
134 + referenced++;
135 } else if (IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE)) {
136 if (pmdp_clear_flush_young_notify(vma, address,
137 pvmw.pmd))
138 @@ -875,7 +864,20 @@ static bool invalid_folio_referenced_vma(struct vm_area_struct *vma, void *arg)
139 struct folio_referenced_arg *pra = arg;
140 struct mem_cgroup *memcg = pra->memcg;
141
142 - if (!mm_match_cgroup(vma->vm_mm, memcg))
143 + /*
144 + * Ignore references from this mapping if it has no recency. If the
145 + * folio has been used in another mapping, we will catch it; if this
146 + * other mapping is already gone, the unmap path will have set the
147 + * referenced flag or activated the folio in zap_pte_range().
148 + */
149 + if (!vma_has_recency(vma))
150 + return true;
151 +
152 + /*
153 + * If we are reclaiming on behalf of a cgroup, skip counting on behalf
154 + * of references from different cgroups.
155 + */
156 + if (memcg && !mm_match_cgroup(vma->vm_mm, memcg))
157 return true;
158
159 return false;
160 @@ -906,6 +908,7 @@ int folio_referenced(struct folio *folio, int is_locked,
161 .arg = (void *)&pra,
162 .anon_lock = folio_lock_anon_vma_read,
163 .try_lock = true,
164 + .invalid_vma = invalid_folio_referenced_vma,
165 };
166
167 *vm_flags = 0;
168 @@ -921,15 +924,6 @@ int folio_referenced(struct folio *folio, int is_locked,
169 return 1;
170 }
171
172 - /*
173 - * If we are reclaiming on behalf of a cgroup, skip
174 - * counting on behalf of references from different
175 - * cgroups
176 - */
177 - if (memcg) {
178 - rwc.invalid_vma = invalid_folio_referenced_vma;
179 - }
180 -
181 rmap_walk(folio, &rwc);
182 *vm_flags = pra.vm_flags;
183
184 diff --git a/mm/vmscan.c b/mm/vmscan.c
185 index 49da02f841c81..596fed6ae0439 100644
186 --- a/mm/vmscan.c
187 +++ b/mm/vmscan.c
188 @@ -3778,7 +3778,10 @@ static int should_skip_vma(unsigned long start, unsigned long end, struct mm_wal
189 if (is_vm_hugetlb_page(vma))
190 return true;
191
192 - if (vma->vm_flags & (VM_LOCKED | VM_SPECIAL | VM_SEQ_READ | VM_RAND_READ))
193 + if (!vma_has_recency(vma))
194 + return true;
195 +
196 + if (vma->vm_flags & (VM_LOCKED | VM_SPECIAL))
197 return true;
198
199 if (vma == get_gate_vma(vma->vm_mm))
200 --
201 2.40.1
202