Merge branch 'akpm' (patches from Andrew)

author Linus Torvalds <torvalds@linux-foundation.org>

Tue, 2 Jun 2020 19:21:36 +0000 (12:21 -0700)

committer Linus Torvalds <torvalds@linux-foundation.org>

Tue, 2 Jun 2020 19:21:36 +0000 (12:21 -0700)
author Linus Torvalds <torvalds@linux-foundation.org>
Tue, 2 Jun 2020 19:21:36 +0000 (12:21 -0700)
committer Linus Torvalds <torvalds@linux-foundation.org>
Tue, 2 Jun 2020 19:21:36 +0000 (12:21 -0700)
diff --combined Documentation/filesystems/proc.rst

index 06d41c0b91cc032e5c6b4dc79475f5ab5875ad89,092b7b44d158e78c98f34e3afaff037352f65594..430963e0e8c34b6bd441629b45104b36860260a4
--- 1/Documentation/filesystems/proc.rst
--- 2/Documentation/filesystems/proc.rst
+++ b/Documentation/filesystems/proc.rst
@@@ -543,7 -543,6 +543,7 @@@ encoded manner. The codes are the follo
       hg    huge page advise flag
       nh    no huge page advise flag
       mg    mergable advise flag
+ +    bt  - arm64 BTI guarded page
       ==    =======================================
   
   Note that there is no guarantee that every flag and associated mnemonic will
@@@ -1043,8 -1042,8 +1043,8 @@@ PageTable
                 amount of memory dedicated to the lowest level of page
                 tables.
   NFS_Unstable
-               NFS pages sent to the server, but not yet committed to stable
-             storage
+               Always zero. Previous counted pages which had been written to
+               the server, but has not been committed to stable storage.
   Bounce
                 Memory used for block device "bounce buffers"
   WritebackTmp
@@@ -1871,7 -1870,7 +1871,7 @@@ unbindable        mount is unbindabl
   
   For more information on mount propagation see:
   
- -  Documentation/filesystems/sharedsubtree.txt
+ +  Documentation/filesystems/sharedsubtree.rst
   
   
   3.6   /proc/<pid>/comm  & /proc/<pid>/task/<tid>/comm
diff --combined arch/arm64/include/asm/pgtable.h

index e50e4dda90c2b0fd62665f5474f37e6b340092fc,47095216d6a859627faba49df38de7d5ca91fc33..dae0466d19d6ebd474160c6a4958c438e07e4616
--- 1/arch/arm64/include/asm/pgtable.h
--- 2/arch/arm64/include/asm/pgtable.h
+++ b/arch/arm64/include/asm/pgtable.h
@@@ -407,6 -407,9 +407,9 @@@ static inline pmd_t pmd_mkdevmap(pmd_t 
   #define __pgprot_modify(prot,mask,bits) \
         __pgprot((pgprot_val(prot) & ~(mask)) | (bits))
   
+ #define pgprot_nx(prot) \
+       __pgprot_modify(prot, 0, PTE_PXN)
+ 
   /*
    * Mark the prot value as uncacheable and unbufferable.
    */
@@@ -457,7 -460,6 +460,7 @@@ extern pgd_t init_pg_dir[PTRS_PER_PGD]
   extern pgd_t init_pg_end[];
   extern pgd_t swapper_pg_dir[PTRS_PER_PGD];
   extern pgd_t idmap_pg_dir[PTRS_PER_PGD];
+ +extern pgd_t idmap_pg_end[];
   extern pgd_t tramp_pg_dir[PTRS_PER_PGD];
   
   extern void set_swapper_pgd(pgd_t *pgdp, pgd_t pgd);
@@@ -509,7 -511,7 +512,7 @@@ static inline void pte_unmap(pte_t *pte
   #define pte_set_fixmap_offset(pmd, addr)      pte_set_fixmap(pte_offset_phys(pmd, addr))
   #define pte_clear_fixmap()            clear_fixmap(FIX_PTE)
   
- -#define pmd_page(pmd)         pfn_to_page(__phys_to_pfn(__pmd_to_phys(pmd)))
+ +#define pmd_page(pmd)                 phys_to_page(__pmd_to_phys(pmd))
   
   /* use ONLY for statically allocated translation tables */
   #define pte_offset_kimg(dir,addr)     ((pte_t *)__phys_to_kimg(pte_offset_phys((dir), (addr))))
@@@ -567,7 -569,7 +570,7 @@@ static inline phys_addr_t pud_page_padd
   #define pmd_set_fixmap_offset(pud, addr)      pmd_set_fixmap(pmd_offset_phys(pud, addr))
   #define pmd_clear_fixmap()            clear_fixmap(FIX_PMD)
   
- -#define pud_page(pud)         pfn_to_page(__phys_to_pfn(__pud_to_phys(pud)))
+ +#define pud_page(pud)                 phys_to_page(__pud_to_phys(pud))
   
   /* use ONLY for statically allocated translation tables */
   #define pmd_offset_kimg(dir,addr)     ((pmd_t *)__phys_to_kimg(pmd_offset_phys((dir), (addr))))
@@@ -625,7 -627,7 +628,7 @@@ static inline phys_addr_t pgd_page_padd
   #define pud_set_fixmap_offset(pgd, addr)      pud_set_fixmap(pud_offset_phys(pgd, addr))
   #define pud_clear_fixmap()            clear_fixmap(FIX_PUD)
   
- -#define pgd_page(pgd)         pfn_to_page(__phys_to_pfn(__pgd_to_phys(pgd)))
+ +#define pgd_page(pgd)                 phys_to_page(__pgd_to_phys(pgd))
   
   /* use ONLY for statically allocated translation tables */
   #define pud_offset_kimg(dir,addr)     ((pud_t *)__phys_to_kimg(pud_offset_phys((dir), (addr))))
@@@ -661,7 -663,7 +664,7 @@@
   static inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
   {
         const pteval_t mask = PTE_USER | PTE_PXN | PTE_UXN | PTE_RDONLY |
- -                            PTE_PROT_NONE | PTE_VALID | PTE_WRITE;
+ +                            PTE_PROT_NONE | PTE_VALID | PTE_WRITE | PTE_GP;
         /* preserve the hardware dirty information */
         if (pte_hw_dirty(pte))
                 pte = pte_mkdirty(pte);
diff --combined arch/arm64/mm/dump.c

index 78163b7a7dde4271a0dc670c5d4bd5abe5761e1d,d4313bc0c4c1aa1c68dbd764fca5477b6c191f31..0da020c563e6b4e17bf6022dc4f6b9a3cf810723
--- 1/arch/arm64/mm/dump.c
--- 2/arch/arm64/mm/dump.c
+++ b/arch/arm64/mm/dump.c
@@@ -145,11 -145,6 +145,11 @@@ static const struct prot_bits pte_bits[
                 .val    = PTE_UXN,
                 .set    = "UXN",
                 .clear  = "   ",
+ +      }, {
+ +              .mask   = PTE_GP,
+ +              .val    = PTE_GP,
+ +              .set    = "GP",
+ +              .clear  = "  ",
         }, {
                 .mask   = PTE_ATTRINDX_MASK,
                 .val    = PTE_ATTRINDX(MT_DEVICE_nGnRnE),
@@@ -252,7 -247,7 +252,7 @@@ static void note_prot_wx(struct pg_stat
   }
   
   static void note_page(struct ptdump_state *pt_st, unsigned long addr, int level,
-                     unsigned long val)
+                     u64 val)
   {
         struct pg_state *st = container_of(pt_st, struct pg_state, ptdump);
         static const char units[] = "KMGTPE";
diff --combined drivers/base/node.c

index 50b8c0d43859877621c17c532a51c31f02d31723,6012574913f79620980b51ccd93c967ec30b066f..5b02f69769e86f67a8d73835bfc28c6ad4d79ca1
--- 1/drivers/base/node.c
--- 2/drivers/base/node.c
+++ b/drivers/base/node.c
@@@ -415,9 -415,6 +415,9 @@@ static ssize_t node_read_meminfo(struc
                        "Node %d AnonPages:      %8lu kB\n"
                        "Node %d Shmem:          %8lu kB\n"
                        "Node %d KernelStack:    %8lu kB\n"
+ +#ifdef CONFIG_SHADOW_CALL_STACK
+ +                     "Node %d ShadowCallStack:%8lu kB\n"
+ +#endif
                        "Node %d PageTables:     %8lu kB\n"
                        "Node %d NFS_Unstable:   %8lu kB\n"
                        "Node %d Bounce:         %8lu kB\n"
@@@ -441,11 -438,8 +441,11 @@@
                        nid, K(node_page_state(pgdat, NR_ANON_MAPPED)),
                        nid, K(i.sharedram),
                        nid, sum_zone_node_page_state(nid, NR_KERNEL_STACK_KB),
+ +#ifdef CONFIG_SHADOW_CALL_STACK
+ +                     nid, sum_zone_node_page_state(nid, NR_KERNEL_SCS_KB),
+ +#endif
                        nid, K(sum_zone_node_page_state(nid, NR_PAGETABLE)),
-                      nid, K(node_page_state(pgdat, NR_UNSTABLE_NFS)),
+                      nid, 0UL,
                        nid, K(sum_zone_node_page_state(nid, NR_BOUNCE)),
                        nid, K(node_page_state(pgdat, NR_WRITEBACK_TEMP)),
                        nid, K(sreclaimable +
diff --combined fs/open.c

index e62b1db06638910d2401c9d685400cfbd506ae99,d9467a8a7f6a9f31c85b893de4f9d785fe0a4760..6cd48a61cda3b969c0717c30c8bed879efa3d8cc
--- 1/fs/open.c
--- 2/fs/open.c
+++ b/fs/open.c
@@@ -345,14 -345,21 +345,14 @@@ SYSCALL_DEFINE4(fallocate, int, fd, int
    * We do this by temporarily clearing all FS-related capabilities and
    * switching the fsuid/fsgid around to the real ones.
    */
- -long do_faccessat(int dfd, const char __user *filename, int mode)
+ +static const struct cred *access_override_creds(void)
   {
         const struct cred *old_cred;
         struct cred *override_cred;
- -      struct path path;
- -      struct inode *inode;
- -      int res;
- -      unsigned int lookup_flags = LOOKUP_FOLLOW;
- -
- -      if (mode & ~S_IRWXO)    /* where's F_OK, X_OK, W_OK, R_OK? */
- -              return -EINVAL;
   
         override_cred = prepare_creds();
         if (!override_cred)
- -              return -ENOMEM;
+ +              return NULL;
   
         override_cred->fsuid = override_cred->uid;
         override_cred->fsgid = override_cred->gid;
@@@ -387,38 -394,6 +387,38 @@@
         override_cred->non_rcu = 1;
   
         old_cred = override_creds(override_cred);
+ +
+ +      /* override_cred() gets its own ref */
+ +      put_cred(override_cred);
+ +
+ +      return old_cred;
+ +}
+ +
+ +long do_faccessat(int dfd, const char __user *filename, int mode, int flags)
+ +{
+ +      struct path path;
+ +      struct inode *inode;
+ +      int res;
+ +      unsigned int lookup_flags = LOOKUP_FOLLOW;
+ +      const struct cred *old_cred = NULL;
+ +
+ +      if (mode & ~S_IRWXO)    /* where's F_OK, X_OK, W_OK, R_OK? */
+ +              return -EINVAL;
+ +
+ +      if (flags & ~(AT_EACCESS | AT_SYMLINK_NOFOLLOW | AT_EMPTY_PATH))
+ +              return -EINVAL;
+ +
+ +      if (flags & AT_SYMLINK_NOFOLLOW)
+ +              lookup_flags &= ~LOOKUP_FOLLOW;
+ +      if (flags & AT_EMPTY_PATH)
+ +              lookup_flags |= LOOKUP_EMPTY;
+ +
+ +      if (!(flags & AT_EACCESS)) {
+ +              old_cred = access_override_creds();
+ +              if (!old_cred)
+ +                      return -ENOMEM;
+ +      }
+ +
   retry:
         res = user_path_at(dfd, filename, lookup_flags, &path);
         if (res)
@@@ -460,26 -435,19 +460,26 @@@ out_path_release
                 goto retry;
         }
   out:
- -      revert_creds(old_cred);
- -      put_cred(override_cred);
+ +      if (old_cred)
+ +              revert_creds(old_cred);
+ +
         return res;
   }
   
   SYSCALL_DEFINE3(faccessat, int, dfd, const char __user *, filename, int, mode)
   {
- -      return do_faccessat(dfd, filename, mode);
+ +      return do_faccessat(dfd, filename, mode, 0);
+ +}
+ +
+ +SYSCALL_DEFINE4(faccessat2, int, dfd, const char __user *, filename, int, mode,
+ +              int, flags)
+ +{
+ +      return do_faccessat(dfd, filename, mode, flags);
   }
   
   SYSCALL_DEFINE2(access, const char __user *, filename, int, mode)
   {
- -      return do_faccessat(AT_FDCWD, filename, mode);
+ +      return do_faccessat(AT_FDCWD, filename, mode, 0);
   }
   
   int ksys_chdir(const char __user *filename)
@@@ -775,9 -743,8 +775,8 @@@ static int do_dentry_open(struct file *
         path_get(&f->f_path);
         f->f_inode = inode;
         f->f_mapping = inode->i_mapping;
- 
-       /* Ensure that we skip any errors that predate opening of the file */
         f->f_wb_err = filemap_sample_wb_err(f->f_mapping);
+       f->f_sb_err = file_sample_sb_err(f);
   
         if (unlikely(f->f_flags & O_PATH)) {
                 f->f_mode = FMODE_PATH | FMODE_OPENED;
diff --combined fs/proc/meminfo.c

index 09cd51c8d23de4e90a2d0c56758b95e52fed1b1a,9bd94b5a96582ef77d462f00eaabbd2b0c77e2a7..ecc63ce01be7b51c68975b1f681f002aaf1a26f5
--- 1/fs/proc/meminfo.c
--- 2/fs/proc/meminfo.c
+++ b/fs/proc/meminfo.c
@@@ -103,15 -103,10 +103,14 @@@ static int meminfo_proc_show(struct seq
         show_val_kb(m, "SUnreclaim:     ", sunreclaim);
         seq_printf(m, "KernelStack:    %8lu kB\n",
                    global_zone_page_state(NR_KERNEL_STACK_KB));
+ +#ifdef CONFIG_SHADOW_CALL_STACK
+ +      seq_printf(m, "ShadowCallStack:%8lu kB\n",
+ +                 global_zone_page_state(NR_KERNEL_SCS_KB));
+ +#endif
         show_val_kb(m, "PageTables:     ",
                     global_zone_page_state(NR_PAGETABLE));
   
-       show_val_kb(m, "NFS_Unstable:   ",
-                   global_node_page_state(NR_UNSTABLE_NFS));
+       show_val_kb(m, "NFS_Unstable:   ", 0);
         show_val_kb(m, "Bounce:         ",
                     global_zone_page_state(NR_BOUNCE));
         show_val_kb(m, "WritebackTmp:   ",
diff --combined fs/proc/task_mmu.c

index 10a6d472397fd72693b788ec75c3f365b35dc8eb,36dc7417c0df73246dd1d308458ee0b7ee5e9901..6ad407d5efe2ff4bc8f854a682824232f74a007d
--- 1/fs/proc/task_mmu.c
--- 2/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@@ -546,10 -546,17 +546,17 @@@ static void smaps_pmd_entry(pmd_t *pmd
         struct mem_size_stats *mss = walk->private;
         struct vm_area_struct *vma = walk->vma;
         bool locked = !!(vma->vm_flags & VM_LOCKED);
-       struct page *page;
+       struct page *page = NULL;
+ 
+       if (pmd_present(*pmd)) {
+               /* FOLL_DUMP will return -EFAULT on huge zero page */
+               page = follow_trans_huge_pmd(vma, addr, pmd, FOLL_DUMP);
+       } else if (unlikely(thp_migration_supported() && is_swap_pmd(*pmd))) {
+               swp_entry_t entry = pmd_to_swp_entry(*pmd);
   
-       /* FOLL_DUMP will return -EFAULT on huge zero page */
-       page = follow_trans_huge_pmd(vma, addr, pmd, FOLL_DUMP);
+               if (is_migration_entry(entry))
+                       page = migration_entry_to_page(entry);
+       }
         if (IS_ERR_OR_NULL(page))
                 return;
         if (PageAnon(page))
@@@ -578,8 -585,7 +585,7 @@@ static int smaps_pte_range(pmd_t *pmd, 
   
         ptl = pmd_trans_huge_lock(pmd, vma);
         if (ptl) {
-               if (pmd_present(*pmd))
-                       smaps_pmd_entry(pmd, addr, walk);
+               smaps_pmd_entry(pmd, addr, walk);
                 spin_unlock(ptl);
                 goto out;
         }
@@@ -622,6 -628,9 +628,6 @@@ static void show_smap_vma_flags(struct 
                 [ilog2(VM_GROWSDOWN)]   = "gd",
                 [ilog2(VM_PFNMAP)]      = "pf",
                 [ilog2(VM_DENYWRITE)]   = "dw",
- -#ifdef CONFIG_X86_INTEL_MPX
- -              [ilog2(VM_MPX)]         = "mp",
- -#endif
                 [ilog2(VM_LOCKED)]      = "lo",
                 [ilog2(VM_IO)]          = "io",
                 [ilog2(VM_SEQ_READ)]    = "sr",
@@@ -635,9 -644,6 +641,9 @@@
                 [ilog2(VM_ARCH_1)]      = "ar",
                 [ilog2(VM_WIPEONFORK)]  = "wf",
                 [ilog2(VM_DONTDUMP)]    = "dd",
+ +#ifdef CONFIG_ARM64_BTI
+ +              [ilog2(VM_ARM64_BTI)]   = "bt",
+ +#endif
   #ifdef CONFIG_MEM_SOFT_DIRTY
                 [ilog2(VM_SOFTDIRTY)]   = "sd",
   #endif
diff --combined fs/squashfs/decompressor_multi_percpu.c

index e206ebfe003caad05b4fffc5f436164524217937,d93e12d9b71282639232ff13575cdfa494f3aa1c..b881b9283b7ffd8be2c447a86eafcf003910b270
--- 1/fs/squashfs/decompressor_multi_percpu.c
--- 2/fs/squashfs/decompressor_multi_percpu.c
+++ b/fs/squashfs/decompressor_multi_percpu.c
@@@ -8,7 -8,6 +8,7 @@@
   #include <linux/slab.h>
   #include <linux/percpu.h>
   #include <linux/buffer_head.h>
+ +#include <linux/local_lock.h>
   
   #include "squashfs_fs.h"
   #include "squashfs_fs_sb.h"
@@@ -21,8 -20,7 +21,8 @@@
    */
   
   struct squashfs_stream {
- -      void            *stream;
+ +      void                    *stream;
+ +      local_lock_t    lock;
   };
   
   void *squashfs_decompressor_create(struct squashfs_sb_info *msblk,
@@@ -43,7 -41,6 +43,7 @@@
                         err = PTR_ERR(stream->stream);
                         goto out;
                 }
+ +              local_lock_init(&stream->lock);
         }
   
         kfree(comp_opts);
@@@ -75,19 -72,18 +75,19 @@@ void squashfs_decompressor_destroy(stru
         }
   }
   
- int squashfs_decompress(struct squashfs_sb_info *msblk, struct buffer_head **bh,
-       int b, int offset, int length, struct squashfs_page_actor *output)
+ int squashfs_decompress(struct squashfs_sb_info *msblk, struct bio *bio,
+       int offset, int length, struct squashfs_page_actor *output)
   {
- -      struct squashfs_stream __percpu *percpu;
         struct squashfs_stream *stream;
         int res;
   
- -      percpu = (struct squashfs_stream __percpu *)msblk->stream;
- -      stream = get_cpu_ptr(percpu);
+ +      local_lock(&msblk->stream->lock);
+ +      stream = this_cpu_ptr(msblk->stream);
+ +
-       res = msblk->decompressor->decompress(msblk, stream->stream, bh, b,
-                       offset, length, output);
+       res = msblk->decompressor->decompress(msblk, stream->stream, bio,
+                                             offset, length, output);
- -      put_cpu_ptr(stream);
+ +
+ +      local_unlock(&msblk->stream->lock);
   
         if (res < 0)
                 ERROR("%s decompression failed, data probably corrupt\n",
diff --combined include/linux/fs.h

index 109b5d9dbdc791d978922dd77331bb651131f6e8,1434ed801b809a3b2d5ab7801b8a8ecc31e50717..ef6acd2062eb272cbdf963aa136e401f82a9021c
--- 1/include/linux/fs.h
--- 2/include/linux/fs.h
+++ b/include/linux/fs.h
@@@ -292,6 -292,7 +292,7 @@@ enum positive_aop_returns 
   struct page;
   struct address_space;
   struct writeback_control;
+ struct readahead_control;
   
   /*
    * Write life time hint values.
@@@ -375,6 -376,7 +376,7 @@@ struct address_space_operations 
          */
         int (*readpages)(struct file *filp, struct address_space *mapping,
                         struct list_head *pages, unsigned nr_pages);
+       void (*readahead)(struct readahead_control *);
   
         int (*write_begin)(struct file *, struct address_space *mapping,
                                 loff_t pos, unsigned len, unsigned flags,
@@@ -976,6 -978,7 +978,7 @@@ struct file 
   #endif /* #ifdef CONFIG_EPOLL */
         struct address_space    *f_mapping;
         errseq_t                f_wb_err;
+       errseq_t                f_sb_err; /* for syncfs */
   } __randomize_layout
     __attribute__((aligned(4)));        /* lest something weird decides that 2 is OK */
   
@@@ -1520,6 -1523,9 +1523,9 @@@ struct super_block 
         /* Being remounted read-only */
         int s_readonly_remount;
   
+       /* per-sb errseq_t for reporting writeback errors via syncfs */
+       errseq_t s_wb_err;
+ 
         /* AIO completions deferred from interrupt context */
         struct workqueue_struct *s_dio_done_wq;
         struct hlist_head s_pins;
@@@ -1721,11 -1727,7 +1727,11 @@@ extern int vfs_link(struct dentry *, st
   extern int vfs_rmdir(struct inode *, struct dentry *);
   extern int vfs_unlink(struct inode *, struct dentry *, struct inode **);
   extern int vfs_rename(struct inode *, struct dentry *, struct inode *, struct dentry *, struct inode **, unsigned int);
- -extern int vfs_whiteout(struct inode *, struct dentry *);
+ +
+ +static inline int vfs_whiteout(struct inode *dir, struct dentry *dentry)
+ +{
+ +      return vfs_mknod(dir, dentry, S_IFCHR | WHITEOUT_MODE, WHITEOUT_DEV);
+ +}
   
   extern struct dentry *vfs_tmpfile(struct dentry *dentry, umode_t mode,
                                   int open_flag);
@@@ -2831,6 -2833,18 +2837,18 @@@ static inline errseq_t filemap_sample_w
         return errseq_sample(&mapping->wb_err);
   }
   
+ /**
+  * file_sample_sb_err - sample the current errseq_t to test for later errors
+  * @mapping: mapping to be sampled
+  *
+  * Grab the most current superblock-level errseq_t value for the given
+  * struct file.
+  */
+ static inline errseq_t file_sample_sb_err(struct file *file)
+ {
+       return errseq_sample(&file->f_path.dentry->d_sb->s_wb_err);
+ }
+ 
   static inline int filemap_nr_thps(struct address_space *mapping)
   {
   #ifdef CONFIG_READ_ONLY_THP_FOR_FS
diff --combined include/linux/mm.h

index 3468dbb2cad1ed3b5f2cdc140d0438f834218250,fda41eb7f1c8bcd2513a3943e2dbf546fa7be946..6e6c71cdfa13440d1cc8584a0a865fe427cdfcab
--- 1/include/linux/mm.h
--- 2/include/linux/mm.h
+++ b/include/linux/mm.h
@@@ -325,13 -325,17 +325,13 @@@ extern unsigned int kobjsize(const voi
   #elif defined(CONFIG_SPARC64)
   # define VM_SPARC_ADI VM_ARCH_1       /* Uses ADI tag for access control */
   # define VM_ARCH_CLEAR        VM_SPARC_ADI
+ +#elif defined(CONFIG_ARM64)
+ +# define VM_ARM64_BTI VM_ARCH_1       /* BTI guarded page, a.k.a. GP bit */
+ +# define VM_ARCH_CLEAR        VM_ARM64_BTI
   #elif !defined(CONFIG_MMU)
   # define VM_MAPPED_COPY       VM_ARCH_1       /* T if mapped copy of data (nommu mmap) */
   #endif
   
- -#if defined(CONFIG_X86_INTEL_MPX)
- -/* MPX specific bounds table or bounds directory */
- -# define VM_MPX               VM_HIGH_ARCH_4
- -#else
- -# define VM_MPX               VM_NONE
- -#endif
- -
   #ifndef VM_GROWSUP
   # define VM_GROWSUP   VM_NONE
   #endif
@@@ -1226,7 -1230,7 +1226,7 @@@ void unpin_user_pages(struct page **pag
    * used to track the pincount (instead using of the GUP_PIN_COUNTING_BIAS
    * scheme).
    *
- - * For more information, please see Documentation/vm/pin_user_pages.rst.
+ + * For more information, please see Documentation/core-api/pin_user_pages.rst.
    *
    * @page:     pointer to page to be queried.
    * @Return:   True, if it is likely that the page has been "dma-pinned".
@@@ -1709,6 -1713,8 +1709,8 @@@ long get_user_pages_locked(unsigned lon
                     unsigned int gup_flags, struct page **pages, int *locked);
   long get_user_pages_unlocked(unsigned long start, unsigned long nr_pages,
                     struct page **pages, unsigned int gup_flags);
+ long pin_user_pages_unlocked(unsigned long start, unsigned long nr_pages,
+                   struct page **pages, unsigned int gup_flags);
   
   int get_user_pages_fast(unsigned long start, int nr_pages,
                         unsigned int gup_flags, struct page **pages);
@@@ -2085,13 -2091,54 +2087,54 @@@ static inline pud_t *pud_alloc(struct m
         return (unlikely(p4d_none(*p4d)) && __pud_alloc(mm, p4d, address)) ?
                 NULL : pud_offset(p4d, address);
   }
+ 
+ static inline p4d_t *p4d_alloc_track(struct mm_struct *mm, pgd_t *pgd,
+                                    unsigned long address,
+                                    pgtbl_mod_mask *mod_mask)
+ 
+ {
+       if (unlikely(pgd_none(*pgd))) {
+               if (__p4d_alloc(mm, pgd, address))
+                       return NULL;
+               *mod_mask |= PGTBL_PGD_MODIFIED;
+       }
+ 
+       return p4d_offset(pgd, address);
+ }
+ 
   #endif /* !__ARCH_HAS_5LEVEL_HACK */
   
+ static inline pud_t *pud_alloc_track(struct mm_struct *mm, p4d_t *p4d,
+                                    unsigned long address,
+                                    pgtbl_mod_mask *mod_mask)
+ {
+       if (unlikely(p4d_none(*p4d))) {
+               if (__pud_alloc(mm, p4d, address))
+                       return NULL;
+               *mod_mask |= PGTBL_P4D_MODIFIED;
+       }
+ 
+       return pud_offset(p4d, address);
+ }
+ 
   static inline pmd_t *pmd_alloc(struct mm_struct *mm, pud_t *pud, unsigned long address)
   {
         return (unlikely(pud_none(*pud)) && __pmd_alloc(mm, pud, address))?
                 NULL: pmd_offset(pud, address);
   }
+ 
+ static inline pmd_t *pmd_alloc_track(struct mm_struct *mm, pud_t *pud,
+                                    unsigned long address,
+                                    pgtbl_mod_mask *mod_mask)
+ {
+       if (unlikely(pud_none(*pud))) {
+               if (__pmd_alloc(mm, pud, address))
+                       return NULL;
+               *mod_mask |= PGTBL_PUD_MODIFIED;
+       }
+ 
+       return pmd_offset(pud, address);
+ }
   #endif /* CONFIG_MMU */
   
   #if USE_SPLIT_PTE_PTLOCKS
@@@ -2207,6 -2254,11 +2250,11 @@@ static inline void pgtable_pte_page_dto
         ((unlikely(pmd_none(*(pmd))) && __pte_alloc_kernel(pmd))? \
                 NULL: pte_offset_kernel(pmd, address))
   
+ #define pte_alloc_kernel_track(pmd, address, mask)                    \
+       ((unlikely(pmd_none(*(pmd))) &&                                 \
+         (__pte_alloc_kernel(pmd) || ({*(mask)|=PGTBL_PMD_MODIFIED;0;})))?\
+               NULL: pte_offset_kernel(pmd, address))
+ 
   #if USE_SPLIT_PMD_PTLOCKS
   
   static struct page *pmd_to_page(pmd_t *pmd)
@@@ -2608,25 -2660,6 +2656,6 @@@ extern vm_fault_t filemap_page_mkwrite(
   int __must_check write_one_page(struct page *page);
   void task_dirty_inc(struct task_struct *tsk);
   
- /* readahead.c */
- #define VM_READAHEAD_PAGES    (SZ_128K / PAGE_SIZE)
- 
- int force_page_cache_readahead(struct address_space *mapping, struct file *filp,
-                       pgoff_t offset, unsigned long nr_to_read);
- 
- void page_cache_sync_readahead(struct address_space *mapping,
-                              struct file_ra_state *ra,
-                              struct file *filp,
-                              pgoff_t offset,
-                              unsigned long size);
- 
- void page_cache_async_readahead(struct address_space *mapping,
-                               struct file_ra_state *ra,
-                               struct file *filp,
-                               struct page *pg,
-                               pgoff_t offset,
-                               unsigned long size);
- 
   extern unsigned long stack_guard_gap;
   /* Generic expand stack which grows the stack according to GROWS{UP,DOWN} */
   extern int expand_stack(struct vm_area_struct *vma, unsigned long address);
@@@ -2841,7 -2874,7 +2870,7 @@@ struct page *follow_page(struct vm_area
    * releasing pages: get_user_pages*() pages must be released via put_page(),
    * while pin_user_pages*() pages must be released via unpin_user_page().
    *
- - * Please see Documentation/vm/pin_user_pages.rst for more information.
+ + * Please see Documentation/core-api/pin_user_pages.rst for more information.
    */
   
   static inline int vm_fault_to_errno(vm_fault_t vm_fault, int foll_flags)
diff --combined include/linux/mmzone.h

index acffc3bc61785f420a47591b88cfd034b2bd036c,a89f47515eb17c047a3945783dfc3d37e61f0679..fdd9beb5efedd5c95a5d9f8ff6d13e6eb984b354
--- 1/include/linux/mmzone.h
--- 2/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@@ -156,9 -156,6 +156,9 @@@ enum zone_stat_item 
         NR_MLOCK,               /* mlock()ed pages found and moved off LRU */
         NR_PAGETABLE,           /* used for pagetables */
         NR_KERNEL_STACK_KB,     /* measured in KiB */
+ +#if IS_ENABLED(CONFIG_SHADOW_CALL_STACK)
+ +      NR_KERNEL_SCS_KB,       /* measured in KiB */
+ +#endif
         /* Second 128 byte cacheline */
         NR_BOUNCE,
   #if IS_ENABLED(CONFIG_ZSMALLOC)
@@@ -196,7 -193,6 +196,6 @@@ enum node_stat_item 
         NR_FILE_THPS,
         NR_FILE_PMDMAPPED,
         NR_ANON_THPS,
-       NR_UNSTABLE_NFS,        /* NFS unstable pages */
         NR_VMSCAN_WRITE,
         NR_VMSCAN_IMMEDIATE,    /* Prioritise for reclaim when writeback ends */
         NR_DIRTIED,             /* page dirtyings since bootup */
diff --combined include/linux/sched.h

index 57d0ed061ae40a952a0f5aa193ff76840f062c83,12ef0c7532842026668ec08a420c26f2ffcd1118..33bb7c539246cc0dac3b93f60fbc81e9bde37b28
--- 1/include/linux/sched.h
--- 2/include/linux/sched.h
+++ b/include/linux/sched.h
@@@ -613,7 -613,7 +613,7 @@@ union rcu_special 
                 u8                      blocked;
                 u8                      need_qs;
                 u8                      exp_hint; /* Hint for performance. */
- -              u8                      deferred_qs;
+ +              u8                      need_mb; /* Readers need smp_mb(). */
         } b; /* Bits. */
         u32 s; /* Set of bits. */
   };
@@@ -724,14 -724,6 +724,14 @@@ struct task_struct 
         struct list_head                rcu_tasks_holdout_list;
   #endif /* #ifdef CONFIG_TASKS_RCU */
   
+ +#ifdef CONFIG_TASKS_TRACE_RCU
+ +      int                             trc_reader_nesting;
+ +      int                             trc_ipi_to_cpu;
+ +      union rcu_special               trc_reader_special;
+ +      bool                            trc_reader_checked;
+ +      struct list_head                trc_holdout_list;
+ +#endif /* #ifdef CONFIG_TASKS_TRACE_RCU */
+ +
         struct sched_info               sched_info;
   
         struct list_head                tasks;
@@@ -1297,12 -1289,6 +1297,12 @@@
         unsigned long                   prev_lowest_stack;
   #endif
   
+ +#ifdef CONFIG_X86_MCE
+ +      u64                             mce_addr;
+ +      u64                             mce_status;
+ +      struct callback_head            mce_kill_me;
+ +#endif
+ +
         /*
          * New fields for task_struct should be added above here, so that
          * they are included in the randomized portion of task_struct.
@@@ -1495,7 -1481,8 +1495,8 @@@ extern struct pid *cad_pid
   #define PF_KSWAPD             0x00020000      /* I am kswapd */
   #define PF_MEMALLOC_NOFS      0x00040000      /* All allocation requests will inherit GFP_NOFS */
   #define PF_MEMALLOC_NOIO      0x00080000      /* All allocation requests will inherit GFP_NOIO */
- #define PF_LESS_THROTTLE      0x00100000      /* Throttle me less: I clean memory */
+ #define PF_LOCAL_THROTTLE     0x00100000      /* Throttle writes only against the bdi I write to,
+                                                * I am cleaning dirty pages from some other bdi. */
   #define PF_KTHREAD            0x00200000      /* I am a kernel thread */
   #define PF_RANDOMIZE          0x00400000      /* Randomize virtual address space */
   #define PF_SWAPWRITE          0x00800000      /* Allowed to write to swap */
diff --combined include/linux/swap.h

index 25181d2dd0b9f2272c30dd5d2af026eaad07dfbe,68ef7638311f3f2e037a264c74a20020900ed213..e92176fc882427d215b17ae6af0e77708a2cd9f9
--- 1/include/linux/swap.h
--- 2/include/linux/swap.h
+++ b/include/linux/swap.h
@@@ -183,12 -183,17 +183,17 @@@ enum 
   #define SWAP_CLUSTER_MAX 32UL
   #define COMPACT_CLUSTER_MAX SWAP_CLUSTER_MAX
   
- #define SWAP_MAP_MAX  0x3e    /* Max duplication count, in first swap_map */
- #define SWAP_MAP_BAD  0x3f    /* Note pageblock is bad, in first swap_map */
+ /* Bit flag in swap_map */
   #define SWAP_HAS_CACHE        0x40    /* Flag page is cached, in first swap_map */
- #define SWAP_CONT_MAX 0x7f    /* Max count, in each swap_map continuation */
- #define COUNT_CONTINUED       0x80    /* See swap_map continuation for full count */
- #define SWAP_MAP_SHMEM        0xbf    /* Owned by shmem/tmpfs, in first swap_map */
+ #define COUNT_CONTINUED       0x80    /* Flag swap_map continuation for full count */
+ 
+ /* Special value in first swap_map */
+ #define SWAP_MAP_MAX  0x3e    /* Max count */
+ #define SWAP_MAP_BAD  0x3f    /* Note page is bad */
+ #define SWAP_MAP_SHMEM        0xbf    /* Owned by shmem/tmpfs */
+ 
+ /* Special value in each swap_map continuation */
+ #define SWAP_CONT_MAX 0x7f    /* Max count */
   
   /*
    * We use this to track usage of a cluster. A cluster is a block of swap disk
@@@ -247,6 -252,7 +252,7 @@@ struct swap_info_struct 
         unsigned int inuse_pages;       /* number of those currently in use */
         unsigned int cluster_next;      /* likely index for next allocation */
         unsigned int cluster_nr;        /* countdown to next cluster search */
+       unsigned int __percpu *cluster_next_cpu; /*percpu index for next allocation */
         struct percpu_cluster __percpu *percpu_cluster; /* per cpu's swap location */
         struct rb_root swap_extent_root;/* root of the swap extent rbtree */
         struct block_device *bdev;      /* swap device or bdev of swap file */
@@@ -337,7 -343,6 +343,7 @@@ extern void activate_page(struct page *
   extern void mark_page_accessed(struct page *);
   extern void lru_add_drain(void);
   extern void lru_add_drain_cpu(int cpu);
+ +extern void lru_add_drain_cpu_zone(struct zone *zone);
   extern void lru_add_drain_all(void);
   extern void rotate_reclaimable_page(struct page *page);
   extern void deactivate_file_page(struct page *page);
@@@ -409,7 -414,6 +415,6 @@@ extern unsigned long total_swapcache_pa
   extern void show_swap_cache_info(void);
   extern int add_to_swap(struct page *page);
   extern int add_to_swap_cache(struct page *, swp_entry_t, gfp_t);
- extern int __add_to_swap_cache(struct page *page, swp_entry_t entry);
   extern void __delete_from_swap_cache(struct page *, swp_entry_t entry);
   extern void delete_from_swap_cache(struct page *);
   extern void free_page_and_swap_cache(struct page *);
diff --combined kernel/module.c

index 64a2b4daaaa5285b802d994664536d41564779cb,086618a0058f4d769c21a75e85024ff071ae2cee..a0f201d2e18480d2af25ccf67a3e1a7374b8cefe
--- 1/kernel/module.c
--- 2/kernel/module.c
+++ b/kernel/module.c
@@@ -2400,7 -2400,7 +2400,7 @@@ static void layout_sections(struct modu
                         if ((s->sh_flags & masks[m][0]) != masks[m][0]
                             || (s->sh_flags & masks[m][1])
                             || s->sh_entsize != ~0UL
- -                          || strstarts(sname, ".init"))
+ +                          || module_init_section(sname))
                                 continue;
                         s->sh_entsize = get_offset(mod, &mod->core_layout.size, s, i);
                         pr_debug("\t%s\n", sname);
@@@ -2433,7 -2433,7 +2433,7 @@@
                         if ((s->sh_flags & masks[m][0]) != masks[m][0]
                             || (s->sh_flags & masks[m][1])
                             || s->sh_entsize != ~0UL
- -                          || !strstarts(sname, ".init"))
+ +                          || !module_init_section(sname))
                                 continue;
                         s->sh_entsize = (get_offset(mod, &mod->init_layout.size, s, i)
                                          | INIT_OFFSET_MASK);
@@@ -2768,11 -2768,6 +2768,11 @@@ void * __weak module_alloc(unsigned lon
         return vmalloc_exec(size);
   }
   
+ +bool __weak module_init_section(const char *name)
+ +{
+ +      return strstarts(name, ".init");
+ +}
+ +
   bool __weak module_exit_section(const char *name)
   {
         return strstarts(name, ".exit");
@@@ -2951,8 -2946,7 +2951,7 @@@ static int copy_module_from_user(const 
                 return err;
   
         /* Suck in entire file: we'll want most of it. */
-       info->hdr = __vmalloc(info->len,
-                       GFP_KERNEL | __GFP_NOWARN, PAGE_KERNEL);
+       info->hdr = __vmalloc(info->len, GFP_KERNEL | __GFP_NOWARN);
         if (!info->hdr)
                 return -ENOMEM;
   
@@@ -3155,9 -3149,6 +3154,9 @@@ static int find_module_sections(struct 
         }
   #endif
   
+ +      mod->noinstr_text_start = section_objs(info, ".noinstr.text", 1,
+ +                                              &mod->noinstr_text_size);
+ +
   #ifdef CONFIG_TRACEPOINTS
         mod->tracepoints_ptrs = section_objs(info, "__tracepoints_ptrs",
                                              sizeof(*mod->tracepoints_ptrs),
@@@ -3201,13 -3192,6 +3200,13 @@@
         mod->ei_funcs = section_objs(info, "_error_injection_whitelist",
                                             sizeof(*mod->ei_funcs),
                                             &mod->num_ei_funcs);
+ +#endif
+ +#ifdef CONFIG_KPROBES
+ +      mod->kprobes_text_start = section_objs(info, ".kprobes.text", 1,
+ +                                              &mod->kprobes_text_size);
+ +      mod->kprobe_blacklist = section_objs(info, "_kprobe_blacklist",
+ +                                              sizeof(unsigned long),
+ +                                              &mod->num_kprobe_blacklist);
   #endif
         mod->extable = section_objs(info, "__ex_table",
                                     sizeof(*mod->extable), &mod->num_exentries);
diff --combined kernel/sys.c

index b4a0324a0699d414a8064ca6dd33fc67187e7bc4,180a2fa33f7fe4210bd6a34c11e3aa83d7a0224a..891667a49bb78496f18f5d9256ef9dfe23604895
--- 1/kernel/sys.c
--- 2/kernel/sys.c
+++ b/kernel/sys.c
@@@ -2262,7 -2262,7 +2262,7 @@@ int __weak arch_prctl_spec_ctrl_set(str
         return -EINVAL;
   }
   
- #define PR_IO_FLUSHER (PF_MEMALLOC_NOIO | PF_LESS_THROTTLE)
+ #define PR_IO_FLUSHER (PF_MEMALLOC_NOIO | PF_LOCAL_THROTTLE)
   
   SYSCALL_DEFINE5(prctl, int, option, unsigned long, arg2, unsigned long, arg3,
                 unsigned long, arg4, unsigned long, arg5)
@@@ -2634,7 -2634,6 +2634,7 @@@ struct compat_sysinfo 
   COMPAT_SYSCALL_DEFINE1(sysinfo, struct compat_sysinfo __user *, info)
   {
         struct sysinfo s;
+ +      struct compat_sysinfo s_32;
   
         do_sysinfo(&s);
   
@@@ -2659,23 -2658,23 +2659,23 @@@
                 s.freehigh >>= bitcount;
         }
   
- -      if (!access_ok(info, sizeof(struct compat_sysinfo)) ||
- -          __put_user(s.uptime, &info->uptime) ||
- -          __put_user(s.loads[0], &info->loads[0]) ||
- -          __put_user(s.loads[1], &info->loads[1]) ||
- -          __put_user(s.loads[2], &info->loads[2]) ||
- -          __put_user(s.totalram, &info->totalram) ||
- -          __put_user(s.freeram, &info->freeram) ||
- -          __put_user(s.sharedram, &info->sharedram) ||
- -          __put_user(s.bufferram, &info->bufferram) ||
- -          __put_user(s.totalswap, &info->totalswap) ||
- -          __put_user(s.freeswap, &info->freeswap) ||
- -          __put_user(s.procs, &info->procs) ||
- -          __put_user(s.totalhigh, &info->totalhigh) ||
- -          __put_user(s.freehigh, &info->freehigh) ||
- -          __put_user(s.mem_unit, &info->mem_unit))
+ +      memset(&s_32, 0, sizeof(s_32));
+ +      s_32.uptime = s.uptime;
+ +      s_32.loads[0] = s.loads[0];
+ +      s_32.loads[1] = s.loads[1];
+ +      s_32.loads[2] = s.loads[2];
+ +      s_32.totalram = s.totalram;
+ +      s_32.freeram = s.freeram;
+ +      s_32.sharedram = s.sharedram;
+ +      s_32.bufferram = s.bufferram;
+ +      s_32.totalswap = s.totalswap;
+ +      s_32.freeswap = s.freeswap;
+ +      s_32.procs = s.procs;
+ +      s_32.totalhigh = s.totalhigh;
+ +      s_32.freehigh = s.freehigh;
+ +      s_32.mem_unit = s.mem_unit;
+ +      if (copy_to_user(info, &s_32, sizeof(s_32)))
                 return -EFAULT;
- -
         return 0;
   }
   #endif /* CONFIG_COMPAT */
diff --combined mm/gup.c

index 4aa2f5ab6e1f7d7a286fc9316324092f5cce1fca,1d84291543fd39a8c0d55d713108c66fda8565c7..3edf740a3897c60ba8644699685bc9d06a5416d4
--- 1/mm/gup.c
--- 2/mm/gup.c
+++ b/mm/gup.c
@@@ -382,22 -382,13 +382,22 @@@ static int follow_pfn_pte(struct vm_are
   }
   
   /*
- - * FOLL_FORCE can write to even unwritable pte's, but only
- - * after we've gone through a COW cycle and they are dirty.
+ + * FOLL_FORCE or a forced COW break can write even to unwritable pte's,
+ + * but only after we've gone through a COW cycle and they are dirty.
    */
   static inline bool can_follow_write_pte(pte_t pte, unsigned int flags)
   {
- -      return pte_write(pte) ||
- -              ((flags & FOLL_FORCE) && (flags & FOLL_COW) && pte_dirty(pte));
+ +      return pte_write(pte) || ((flags & FOLL_COW) && pte_dirty(pte));
+ +}
+ +
+ +/*
+ + * A (separate) COW fault might break the page the other way and
+ + * get_user_pages() would return the page from what is now the wrong
+ + * VM. So we need to force a COW break at GUP time even for reads.
+ + */
+ +static inline bool should_force_cow_break(struct vm_area_struct *vma, unsigned int flags)
+ +{
+ +      return is_cow_mapping(vma->vm_flags) && (flags & (FOLL_GET | FOLL_PIN));
   }
   
   static struct page *follow_page_pte(struct vm_area_struct *vma,
@@@ -1075,11 -1066,9 +1075,11 @@@ static long __get_user_pages(struct tas
                                 goto out;
                         }
                         if (is_vm_hugetlb_page(vma)) {
+ +                              if (should_force_cow_break(vma, foll_flags))
+ +                                      foll_flags |= FOLL_WRITE;
                                 i = follow_hugetlb_page(mm, vma, pages, vmas,
                                                 &start, &nr_pages, i,
- -                                              gup_flags, locked);
+ +                                              foll_flags, locked);
                                 if (locked && *locked == 0) {
                                         /*
                                          * We've got a VM_FAULT_RETRY
@@@ -1093,10 -1082,6 +1093,10 @@@
                                 continue;
                         }
                 }
+ +
+ +              if (should_force_cow_break(vma, foll_flags))
+ +                      foll_flags |= FOLL_WRITE;
+ +
   retry:
                 /*
                  * If we have a pending SIGKILL, don't keep faulting pages and
@@@ -1183,7 -1168,7 +1183,7 @@@ static bool vma_permits_fault(struct vm
         return true;
   }
   
- /*
+ /**
    * fixup_user_fault() - manually resolve a user page fault
    * @tsk:      the task_struct to use for page fault accounting, or
    *            NULL if faults are not to be recorded.
@@@ -1191,7 -1176,8 +1191,8 @@@
    * @address:  user address
    * @fault_flags:flags to pass down to handle_mm_fault()
    * @unlocked: did we unlock the mmap_sem while retrying, maybe NULL if caller
-  *            does not allow retry
+  *            does not allow retry. If NULL, the caller must guarantee
+  *            that fault_flags does not contain FAULT_FLAG_ALLOW_RETRY.
    *
    * This is meant to be called in the specific scenario where for locking reasons
    * we try to access user memory in atomic context (within a pagefault_disable()
@@@ -1854,7 -1840,7 +1855,7 @@@ static long __get_user_pages_remote(str
                                        gup_flags | FOLL_TOUCH | FOLL_REMOTE);
   }
   
- /*
+ /**
    * get_user_pages_remote() - pin user pages in memory
    * @tsk:      the task_struct to use for page fault accounting, or
    *            NULL if faults are not to be recorded.
@@@ -1885,13 -1871,13 +1886,13 @@@
    *
    * Must be called with mmap_sem held for read or write.
    *
-  * get_user_pages walks a process's page tables and takes a reference to
-  * each struct page that each user address corresponds to at a given
+  * get_user_pages_remote walks a process's page tables and takes a reference
+  * to each struct page that each user address corresponds to at a given
    * instant. That is, it takes the page that would be accessed if a user
    * thread accesses the given user virtual address at that instant.
    *
    * This does not guarantee that the page exists in the user mappings when
-  * get_user_pages returns, and there may even be a completely different
+  * get_user_pages_remote returns, and there may even be a completely different
    * page there in some cases (eg. if mmapped pagecache has been invalidated
    * and subsequently re faulted). However it does guarantee that the page
    * won't be freed completely. And mostly callers simply care that the page
@@@ -1903,17 -1889,17 +1904,17 @@@
    * is written to, set_page_dirty (or set_page_dirty_lock, as appropriate) must
    * be called after the page is finished with, and before put_page is called.
    *
-  * get_user_pages is typically used for fewer-copy IO operations, to get a
-  * handle on the memory by some means other than accesses via the user virtual
-  * addresses. The pages may be submitted for DMA to devices or accessed via
-  * their kernel linear mapping (via the kmap APIs). Care should be taken to
-  * use the correct cache flushing APIs.
+  * get_user_pages_remote is typically used for fewer-copy IO operations,
+  * to get a handle on the memory by some means other than accesses
+  * via the user virtual addresses. The pages may be submitted for
+  * DMA to devices or accessed via their kernel linear mapping (via the
+  * kmap APIs). Care should be taken to use the correct cache flushing APIs.
    *
    * See also get_user_pages_fast, for performance critical applications.
    *
-  * get_user_pages should be phased out in favor of
+  * get_user_pages_remote should be phased out in favor of
    * get_user_pages_locked|unlocked or get_user_pages_fast. Nothing
-  * should use get_user_pages because it cannot pass
+  * should use get_user_pages_remote because it cannot pass
    * FAULT_FLAG_ALLOW_RETRY to handle_mm_fault.
    */
   long get_user_pages_remote(struct task_struct *tsk, struct mm_struct *mm,
@@@ -1952,7 -1938,17 +1953,17 @@@ static long __get_user_pages_remote(str
   }
   #endif /* !CONFIG_MMU */
   
- /*
+ /**
+  * get_user_pages() - pin user pages in memory
+  * @start:      starting user address
+  * @nr_pages:   number of pages from start to pin
+  * @gup_flags:  flags modifying lookup behaviour
+  * @pages:      array that receives pointers to the pages pinned.
+  *              Should be at least nr_pages long. Or NULL, if caller
+  *              only intends to ensure the pages are faulted in.
+  * @vmas:       array of pointers to vmas corresponding to each page.
+  *              Or NULL if the caller does not require them.
+  *
    * This is the same as get_user_pages_remote(), just with a
    * less-flexible calling convention where we assume that the task
    * and mm being operated on are the current task's and don't allow
@@@ -1975,11 -1971,7 +1986,7 @@@ long get_user_pages(unsigned long start
   }
   EXPORT_SYMBOL(get_user_pages);
   
- /*
-  * We can leverage the VM_FAULT_RETRY functionality in the page fault
-  * paths better by using either get_user_pages_locked() or
-  * get_user_pages_unlocked().
-  *
+ /**
    * get_user_pages_locked() is suitable to replace the form:
    *
    *      down_read(&mm->mmap_sem);
@@@ -1995,6 -1987,21 +2002,21 @@@
    *      get_user_pages_locked(tsk, mm, ..., pages, &locked);
    *      if (locked)
    *          up_read(&mm->mmap_sem);
+  *
+  * @start:      starting user address
+  * @nr_pages:   number of pages from start to pin
+  * @gup_flags:  flags modifying lookup behaviour
+  * @pages:      array that receives pointers to the pages pinned.
+  *              Should be at least nr_pages long. Or NULL, if caller
+  *              only intends to ensure the pages are faulted in.
+  * @locked:     pointer to lock flag indicating whether lock is held and
+  *              subsequently whether VM_FAULT_RETRY functionality can be
+  *              utilised. Lock must initially be held.
+  *
+  * We can leverage the VM_FAULT_RETRY functionality in the page fault
+  * paths better by using either get_user_pages_locked() or
+  * get_user_pages_unlocked().
+  *
    */
   long get_user_pages_locked(unsigned long start, unsigned long nr_pages,
                            unsigned int gup_flags, struct page **pages,
@@@ -2689,10 -2696,6 +2711,10 @@@ static bool gup_fast_permitted(unsigne
    *
    * If the architecture does not support this function, simply return with no
    * pages pinned.
+ + *
+ + * Careful, careful! COW breaking can go either way, so a non-write
+ + * access can get ambiguous page results. If you call this function without
+ + * 'write' set, you'd better be sure that you're ok with that ambiguity.
    */
   int __get_user_pages_fast(unsigned long start, int nr_pages, int write,
                           struct page **pages)
@@@ -2728,12 -2731,6 +2750,12 @@@
          *
          * We do not adopt an rcu_read_lock(.) here as we also want to
          * block IPIs that come from THPs splitting.
+ +       *
+ +       * NOTE! We allow read-only gup_fast() here, but you'd better be
+ +       * careful about possible COW pages. You'll get _a_ COW page, but
+ +       * not necessarily the one you intended to get depending on what
+ +       * COW event happens after this. COW may break the page copy in a
+ +       * random direction.
          */
   
         if (IS_ENABLED(CONFIG_HAVE_FAST_GUP) &&
@@@ -2791,17 -2788,10 +2813,17 @@@ static int internal_get_user_pages_fast
         if (unlikely(!access_ok((void __user *)start, len)))
                 return -EFAULT;
   
+ +      /*
+ +       * The FAST_GUP case requires FOLL_WRITE even for pure reads,
+ +       * because get_user_pages() may need to cause an early COW in
+ +       * order to avoid confusing the normal COW routines. So only
+ +       * targets that are already writable are safe to do by just
+ +       * looking at the page tables.
+ +       */
         if (IS_ENABLED(CONFIG_HAVE_FAST_GUP) &&
             gup_fast_permitted(start, end)) {
                 local_irq_disable();
- -              gup_pgd_range(addr, end, gup_flags, pages, &nr_pinned);
+ +              gup_pgd_range(addr, end, gup_flags | FOLL_WRITE, pages, &nr_pinned);
                 local_irq_enable();
                 ret = nr_pinned;
         }
@@@ -2877,9 -2867,9 +2899,9 @@@ EXPORT_SYMBOL_GPL(get_user_pages_fast)
    * the arguments here are identical.
    *
    * FOLL_PIN means that the pages must be released via unpin_user_page(). Please
- - * see Documentation/vm/pin_user_pages.rst for further details.
+ + * see Documentation/core-api/pin_user_pages.rst for further details.
    *
- - * This is intended for Case 1 (DIO) in Documentation/vm/pin_user_pages.rst. It
+ + * This is intended for Case 1 (DIO) in Documentation/core-api/pin_user_pages.rst. It
    * is NOT intended for Case 2 (RDMA: long-term pins).
    */
   int pin_user_pages_fast(unsigned long start, int nr_pages,
@@@ -2917,9 -2907,9 +2939,9 @@@ EXPORT_SYMBOL_GPL(pin_user_pages_fast)
    * the arguments here are identical.
    *
    * FOLL_PIN means that the pages must be released via unpin_user_page(). Please
- - * see Documentation/vm/pin_user_pages.rst for details.
+ + * see Documentation/core-api/pin_user_pages.rst for details.
    *
- - * This is intended for Case 1 (DIO) in Documentation/vm/pin_user_pages.rst. It
+ + * This is intended for Case 1 (DIO) in Documentation/core-api/pin_user_pages.rst. It
    * is NOT intended for Case 2 (RDMA: long-term pins).
    */
   long pin_user_pages_remote(struct task_struct *tsk, struct mm_struct *mm,
@@@ -2953,9 -2943,9 +2975,9 @@@ EXPORT_SYMBOL(pin_user_pages_remote)
    * FOLL_PIN is set.
    *
    * FOLL_PIN means that the pages must be released via unpin_user_page(). Please
- - * see Documentation/vm/pin_user_pages.rst for details.
+ + * see Documentation/core-api/pin_user_pages.rst for details.
    *
- - * This is intended for Case 1 (DIO) in Documentation/vm/pin_user_pages.rst. It
+ + * This is intended for Case 1 (DIO) in Documentation/core-api/pin_user_pages.rst. It
    * is NOT intended for Case 2 (RDMA: long-term pins).
    */
   long pin_user_pages(unsigned long start, unsigned long nr_pages,
@@@ -2971,3 -2961,20 +2993,20 @@@
                                      pages, vmas, gup_flags);
   }
   EXPORT_SYMBOL(pin_user_pages);
+ 
+ /*
+  * pin_user_pages_unlocked() is the FOLL_PIN variant of
+  * get_user_pages_unlocked(). Behavior is the same, except that this one sets
+  * FOLL_PIN and rejects FOLL_GET.
+  */
+ long pin_user_pages_unlocked(unsigned long start, unsigned long nr_pages,
+                            struct page **pages, unsigned int gup_flags)
+ {
+       /* FOLL_GET and FOLL_PIN are mutually exclusive. */
+       if (WARN_ON_ONCE(gup_flags & FOLL_GET))
+               return -EINVAL;
+ 
+       gup_flags |= FOLL_PIN;
+       return get_user_pages_unlocked(start, nr_pages, pages, gup_flags);
+ }
+ EXPORT_SYMBOL(pin_user_pages_unlocked);
diff --combined mm/page_alloc.c

index cbf0301602053666220d20fc90ff83e36d5534c0,45ad73122e826f77c801f1365000bc25c2d530d2..ca864102bebe969890a0a16732f2a1abb97864c6
--- 1/mm/page_alloc.c
--- 2/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@@ -5319,7 -5319,7 +5319,7 @@@ void show_free_areas(unsigned int filte
   
         printk("active_anon:%lu inactive_anon:%lu isolated_anon:%lu\n"
                 " active_file:%lu inactive_file:%lu isolated_file:%lu\n"
-               " unevictable:%lu dirty:%lu writeback:%lu unstable:%lu\n"
+               " unevictable:%lu dirty:%lu writeback:%lu\n"
                 " slab_reclaimable:%lu slab_unreclaimable:%lu\n"
                 " mapped:%lu shmem:%lu pagetables:%lu bounce:%lu\n"
                 " free:%lu free_pcp:%lu free_cma:%lu\n",
@@@ -5332,7 -5332,6 +5332,6 @@@
                 global_node_page_state(NR_UNEVICTABLE),
                 global_node_page_state(NR_FILE_DIRTY),
                 global_node_page_state(NR_WRITEBACK),
-               global_node_page_state(NR_UNSTABLE_NFS),
                 global_node_page_state(NR_SLAB_RECLAIMABLE),
                 global_node_page_state(NR_SLAB_UNRECLAIMABLE),
                 global_node_page_state(NR_FILE_MAPPED),
@@@ -5365,7 -5364,6 +5364,6 @@@
                         " anon_thp: %lukB"
   #endif
                         " writeback_tmp:%lukB"
-                       " unstable:%lukB"
                         " all_unreclaimable? %s"
                         "\n",
                         pgdat->node_id,
@@@ -5387,7 -5385,6 +5385,6 @@@
                         K(node_page_state(pgdat, NR_ANON_THPS) * HPAGE_PMD_NR),
   #endif
                         K(node_page_state(pgdat, NR_WRITEBACK_TEMP)),
-                       K(node_page_state(pgdat, NR_UNSTABLE_NFS)),
                         pgdat->kswapd_failures >= MAX_RECLAIM_RETRIES ?
                                 "yes" : "no");
         }
@@@ -5420,9 -5417,6 +5417,9 @@@
                         " managed:%lukB"
                         " mlocked:%lukB"
                         " kernel_stack:%lukB"
+ +#ifdef CONFIG_SHADOW_CALL_STACK
+ +                      " shadow_call_stack:%lukB"
+ +#endif
                         " pagetables:%lukB"
                         " bounce:%lukB"
                         " free_pcp:%lukB"
@@@ -5445,9 -5439,6 +5442,9 @@@
                         K(zone_managed_pages(zone)),
                         K(zone_page_state(zone, NR_MLOCK)),
                         zone_page_state(zone, NR_KERNEL_STACK_KB),
+ +#ifdef CONFIG_SHADOW_CALL_STACK
+ +                      zone_page_state(zone, NR_KERNEL_SCS_KB),
+ +#endif
                         K(zone_page_state(zone, NR_PAGETABLE)),
                         K(zone_page_state(zone, NR_BOUNCE)),
                         K(free_pcp),
@@@ -8253,7 -8244,7 +8250,7 @@@ void *__init alloc_large_system_hash(co
                                 table = memblock_alloc_raw(size,
                                                            SMP_CACHE_BYTES);
                 } else if (get_order(size) >= MAX_ORDER || hashdist) {
-                       table = __vmalloc(size, gfp_flags, PAGE_KERNEL);
+                       table = __vmalloc(size, gfp_flags);
                         virt = true;
                 } else {
                         /*
diff --combined mm/vmstat.c

index 2435d2c246570f7b72d1aa9a9babb8f01f934d5f,b1582fdf757ce7f8a6d21462a23e8688cc1034aa..5e241434cab28887d55f4f66a65a13656848c234
--- 1/mm/vmstat.c
--- 2/mm/vmstat.c
+++ b/mm/vmstat.c
@@@ -1108,7 -1108,7 +1108,7 @@@ int fragmentation_index(struct zone *zo
                                         TEXT_FOR_HIGHMEM(xx) xx "_movable",
   
   const char * const vmstat_text[] = {
-       /* enum zone_stat_item countes */
+       /* enum zone_stat_item counters */
         "nr_free_pages",
         "nr_zone_inactive_anon",
         "nr_zone_active_anon",
@@@ -1119,9 -1119,6 +1119,9 @@@
         "nr_mlock",
         "nr_page_table_pages",
         "nr_kernel_stack",
+ +#if IS_ENABLED(CONFIG_SHADOW_CALL_STACK)
+ +      "nr_shadow_call_stack",
+ +#endif
         "nr_bounce",
   #if IS_ENABLED(CONFIG_ZSMALLOC)
         "nr_zspages",
@@@ -1165,7 -1162,6 +1165,6 @@@
         "nr_file_hugepages",
         "nr_file_pmdmapped",
         "nr_anon_transparent_hugepages",
-       "nr_unstable",
         "nr_vmscan_write",
         "nr_vmscan_immediate_reclaim",
         "nr_dirtied",
@@@ -1726,6 -1722,14 +1725,14 @@@ static int vmstat_show(struct seq_file 
         seq_puts(m, vmstat_text[off]);
         seq_put_decimal_ull(m, " ", *l);
         seq_putc(m, '\n');
+ 
+       if (off == NR_VMSTAT_ITEMS - 1) {
+               /*
+                * We've come to the end - add any deprecated counters to avoid
+                * breaking userspace which might depend on them being present.
+                */
+               seq_puts(m, "nr_unstable 0\n");
+       }
         return 0;
   }
author	Linus Torvalds <torvalds@linux-foundation.org>
	Tue, 2 Jun 2020 19:21:36 +0000 (12:21 -0700)
committer	Linus Torvalds <torvalds@linux-foundation.org>
	Tue, 2 Jun 2020 19:21:36 +0000 (12:21 -0700)
		1	2
Documentation/filesystems/proc.rst	patch \|	diff1 \|	diff2 \|	blob \| history
arch/arm64/include/asm/pgtable.h	patch \|	diff1 \|	diff2 \|	blob \| history
arch/arm64/mm/dump.c	patch \|	diff1 \|	diff2 \|	blob \| history
drivers/base/node.c	patch \|	diff1 \|	diff2 \|	blob \| history
fs/open.c	patch \|	diff1 \|	diff2 \|	blob \| history
fs/proc/meminfo.c	patch \|	diff1 \|	diff2 \|	blob \| history
fs/proc/task_mmu.c	patch \|	diff1 \|	diff2 \|	blob \| history
fs/squashfs/decompressor_multi_percpu.c	patch \|	diff1 \|	diff2 \|	blob \| history
include/linux/fs.h	patch \|	diff1 \|	diff2 \|	blob \| history
include/linux/mm.h	patch \|	diff1 \|	diff2 \|	blob \| history
include/linux/mmzone.h	patch \|	diff1 \|	diff2 \|	blob \| history
include/linux/sched.h	patch \|	diff1 \|	diff2 \|	blob \| history
include/linux/swap.h	patch \|	diff1 \|	diff2 \|	blob \| history
kernel/module.c	patch \|	diff1 \|	diff2 \|	blob \| history
kernel/sys.c	patch \|	diff1 \|	diff2 \|	blob \| history
mm/gup.c	patch \|	diff1 \|	diff2 \|	blob \| history
mm/page_alloc.c	patch \|	diff1 \|	diff2 \|	blob \| history
mm/vmstat.c	patch \|	diff1 \|	diff2 \|	blob \| history