aboutsummaryrefslogtreecommitdiff
path: root/www/firefox/files/patch-bug1440203
diff options
context:
space:
mode:
Diffstat (limited to 'www/firefox/files/patch-bug1440203')
-rw-r--r--www/firefox/files/patch-bug1440203788
1 files changed, 788 insertions, 0 deletions
diff --git a/www/firefox/files/patch-bug1440203 b/www/firefox/files/patch-bug1440203
new file mode 100644
index 000000000000..d66aebf5ff72
--- /dev/null
+++ b/www/firefox/files/patch-bug1440203
@@ -0,0 +1,788 @@
+commit cc6e7ab13380
+Author: Jed Davis <jld@mozilla.com>
+Date: Thu Oct 22 21:23:32 2020 +0000
+
+ Bug 1440203 - Support memfd_create in IPC shared memory. r=glandium
+
+ This commit also allows `memfd_create` in the seccomp-bpf policy for all
+ process types.
+
+ `memfd_create` is an API added in Linux 3.17 (and adopted by FreeBSD
+ for the upcoming version 13) for creating anonymous shared memory
+ not connected to any filesystem. Supporting it means that sandboxed
+ child processes on Linux can create shared memory directly instead of
+ messaging a broker, which is unavoidably slower, and it should avoid
+ the problems we'd been seeing with overly small `/dev/shm` in container
+ environments (which were causing serious problems for using Firefox for
+ automated testing of frontend projects).
+
+ `memfd_create` also introduces the related operation of file seals:
+ irrevocably preventing types of modifications to a file. Unfortunately,
+ the most useful one, `F_SEAL_WRITE`, can't be relied on; see the large
+ comment in `SharedMemory:ReadOnlyCopy` for details. So we still use
+ the applicable seals as defense in depth, but read-only copies are
+ implemented on Linux by using procfs (and see the comments on the
+ `ReadOnlyCopy` function in `shared_memory_posix.cc` for the subtleties
+ there).
+
+ There's also a FreeBSD implementation, using `cap_rights_limit` for
+ read-only copies, if the build host is new enough to have the
+ `memfd_create` function.
+
+ The support code for Android, which doesn't support shm_open and can't
+ use the memfd backend because of issues with its SELinux policy (see bug
+ 1670277), has been reorganized to reflect that we'll always use its own
+ API, ashmem, in that case.
+
+ Differential Revision: https://phabricator.services.mozilla.com/D90605
+---
+ build/moz.configure/headers.configure | 8 +
+ config/system-headers.mozbuild | 5 +
+ ipc/chromium/src/base/linux_memfd_defs.h | 69 +++++
+ ipc/chromium/src/base/shared_memory.h | 3 +-
+ ipc/chromium/src/base/shared_memory_posix.cc | 428 +++++++++++++++++++++------
+ ipc/gtest/TestSharedMemory.cpp | 46 +++
+ security/sandbox/linux/SandboxFilter.cpp | 9 +-
+ 7 files changed, 472 insertions(+), 96 deletions(-)
+
+diff --git build/moz.configure/headers.configure build/moz.configure/headers.configure
+index 9445f10a4d53..0f4455d8eeea 100644
+--- build/moz.configure/headers.configure
++++ build/moz.configure/headers.configure
+@@ -65,6 +65,14 @@ check_headers(
+ 'byteswap.h',
+ )
+
++# memfd_create(2) -- Note that older versions of the Linux man-pages
++# project incorrectly cite <sys/memfd.h>, which doesn't exist; this
++# was fixed in the man-pages-5.00 release.
++set_define('HAVE_MEMFD_CREATE',
++ try_compile(includes=['sys/mman.h'],
++ body='memfd_create("", 0);',
++ check_msg='for memfd_create in sys/mman.h'))
++
+ # TODO: Move these checks to file specific to --enable-project=js.
+ have_perf_event_h = check_header('linux/perf_event.h',
+ when=building_linux)
+diff --git config/system-headers.mozbuild config/system-headers.mozbuild
+index 1dfd3f0a48b8..3c355e3f1d71 100644
+--- config/system-headers.mozbuild
++++ config/system-headers.mozbuild
+@@ -1352,5 +1352,10 @@ if CONFIG['OS_TARGET'] == 'Linux' and CONFIG['CPU_ARCH'].startswith('mips'):
+ 'sys/cachectl.h',
+ ]
+
++if CONFIG['OS_TARGET'] == 'FreeBSD':
++ system_headers += [
++ 'sys/capsicum.h',
++ ]
++
+ if CONFIG['MOZ_APP_SYSTEM_HEADERS']:
+ include("../" + CONFIG['MOZ_BUILD_APP'] + "/app-system-headers.mozbuild")
+diff --git ipc/chromium/src/base/linux_memfd_defs.h ipc/chromium/src/base/linux_memfd_defs.h
+new file mode 100644
+index 000000000000..f5b0de1de853
+--- /dev/null
++++ ipc/chromium/src/base/linux_memfd_defs.h
+@@ -0,0 +1,69 @@
++/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
++/* vim: set ts=8 sts=2 et sw=2 tw=80: */
++/* This Source Code Form is subject to the terms of the Mozilla Public
++ * License, v. 2.0. If a copy of the MPL was not distributed with this
++ * file, You can obtain one at https://mozilla.org/MPL/2.0/. */
++
++#ifndef BASE_LINUX_MEMFD_DEFS_H
++#define BASE_LINUX_MEMFD_DEFS_H
++
++#include <sys/syscall.h>
++
++// glibc before 2.27 didn't have a memfd_create wrapper, and if the
++// build system is old enough then it won't have the syscall number
++// and various related constants either.
++
++#if defined(__x86_64__)
++# define MEMFD_CREATE_NR 319
++#elif defined(__i386__)
++# define MEMFD_CREATE_NR 356
++#elif defined(__aarch64__)
++# define MEMFD_CREATE_NR 279
++#elif defined(__arm__)
++# define MEMFD_CREATE_NR 385
++#elif defined(__powerpc__)
++# define MEMFD_CREATE_NR 360
++#elif defined(__s390__)
++# define MEMFD_CREATE_NR 350
++#elif defined(__mips__)
++# include <sgidefs.h>
++# if _MIPS_SIM == _MIPS_SIM_ABI32
++# define MEMFD_CREATE_NR 4354
++# elif _MIPS_SIM == _MIPS_SIM_ABI64
++# define MEMFD_CREATE_NR 5314
++# elif _MIPS_SIM == _MIPS_SIM_NABI32
++# define MEMFD_CREATE_NR 6318
++# endif // mips subarch
++#endif // arch
++
++#ifdef MEMFD_CREATE_NR
++# ifdef SYS_memfd_create
++static_assert(MEMFD_CREATE_NR == SYS_memfd_create,
++ "MEMFD_CREATE_NR should match the actual SYS_memfd_create value");
++# else // defined here but not in system headers
++# define SYS_memfd_create MEMFD_CREATE_NR
++# endif
++#endif
++
++#ifndef MFD_CLOEXEC
++# define MFD_CLOEXEC 0x0001U
++# define MFD_ALLOW_SEALING 0x0002U
++#endif
++
++#ifndef F_ADD_SEALS
++# ifndef F_LINUX_SPECIFIC_BASE
++# define F_LINUX_SPECIFIC_BASE 1024
++# endif
++# define F_ADD_SEALS (F_LINUX_SPECIFIC_BASE + 9)
++# define F_GET_SEALS (F_LINUX_SPECIFIC_BASE + 10)
++# define F_SEAL_SEAL 0x0001 /* prevent further seals from being set */
++# define F_SEAL_SHRINK 0x0002 /* prevent file from shrinking */
++# define F_SEAL_GROW 0x0004 /* prevent file from growing */
++# define F_SEAL_WRITE 0x0008 /* prevent writes */
++#endif
++
++#ifndef F_SEAL_FUTURE_WRITE
++# define F_SEAL_FUTURE_WRITE 0x0010
++#endif
++
++#endif // BASE_LINUX_MEMFD_DEFS_H
+diff --git ipc/chromium/src/base/shared_memory.h ipc/chromium/src/base/shared_memory.h
+index 93372f4ab333..49d614164a0b 100644
+--- ipc/chromium/src/base/shared_memory.h
++++ ipc/chromium/src/base/shared_memory.h
+@@ -216,8 +216,9 @@ class SharedMemory {
+ // If true indicates this came from an external source so needs extra checks
+ // before being mapped.
+ bool external_section_ = false;
+-#elif defined(OS_POSIX)
++#elif defined(OS_POSIX) && !defined(ANDROID)
+ mozilla::UniqueFileHandle frozen_file_;
++ bool is_memfd_ = false;
+ #endif
+ bool read_only_ = false;
+ bool freezeable_ = false;
+diff --git ipc/chromium/src/base/shared_memory_posix.cc ipc/chromium/src/base/shared_memory_posix.cc
+index d5f734ac91b8..e36805aedb89 100644
+--- ipc/chromium/src/base/shared_memory_posix.cc
++++ ipc/chromium/src/base/shared_memory_posix.cc
+@@ -16,6 +16,18 @@
+ # include "mozilla/Ashmem.h"
+ #endif
+
++#ifdef OS_LINUX
++# include "linux_memfd_defs.h"
++#endif
++
++#ifdef __FreeBSD__
++# include <sys/capsicum.h>
++#endif
++
++#ifdef MOZ_VALGRIND
++# include <valgrind/valgrind.h>
++#endif
++
+ #include "base/eintr_wrapper.h"
+ #include "base/logging.h"
+ #include "base/string_util.h"
+@@ -46,11 +58,14 @@ SharedMemory::~SharedMemory() {
+
+ bool SharedMemory::SetHandle(SharedMemoryHandle handle, bool read_only) {
+ DCHECK(!mapped_file_);
++#ifndef ANDROID
+ DCHECK(!frozen_file_);
++#endif
+
+ freezeable_ = false;
+ mapped_file_.reset(handle.fd);
+ read_only_ = read_only;
++ // is_memfd_ only matters for freezing, which isn't possible
+ return true;
+ }
+
+@@ -62,11 +77,187 @@ bool SharedMemory::IsHandleValid(const SharedMemoryHandle& handle) {
+ // static
+ SharedMemoryHandle SharedMemory::NULLHandle() { return SharedMemoryHandle(); }
+
+-// static
++#ifdef ANDROID
++
++// Android has its own shared memory API, ashmem. It doesn't support
++// POSIX shm_open, and the memfd support (see below) also doesn't work
++// because its SELinux policy prevents the procfs operations we'd use
++// (see bug 1670277 for more details).
++
+ bool SharedMemory::AppendPosixShmPrefix(std::string* str, pid_t pid) {
+-#if defined(ANDROID)
+ return false;
+-#else
++}
++
++bool SharedMemory::CreateInternal(size_t size, bool freezeable) {
++ read_only_ = false;
++
++ DCHECK(size > 0);
++ DCHECK(!mapped_file_);
++
++ int fd = mozilla::android::ashmem_create(nullptr, size);
++ if (fd < 0) {
++ CHROMIUM_LOG(WARNING) << "failed to open shm: " << strerror(errno);
++ return false;
++ }
++
++ mapped_file_.reset(fd);
++ max_size_ = size;
++ freezeable_ = freezeable;
++ return true;
++}
++
++bool SharedMemory::ReadOnlyCopy(SharedMemory* ro_out) {
++ DCHECK(mapped_file_);
++ DCHECK(!read_only_);
++ CHECK(freezeable_);
++
++ if (ro_out == this) {
++ DCHECK(!memory_);
++ }
++
++ if (mozilla::android::ashmem_setProt(mapped_file_.get(), PROT_READ) != 0) {
++ CHROMIUM_LOG(WARNING) << "failed to set ashmem read-only: "
++ << strerror(errno);
++ return false;
++ }
++
++ mozilla::UniqueFileHandle ro_file = std::move(mapped_file_);
++
++ freezeable_ = false;
++ ro_out->Close();
++ ro_out->mapped_file_ = std::move(ro_file);
++ ro_out->max_size_ = max_size_;
++ ro_out->read_only_ = true;
++ ro_out->freezeable_ = false;
++
++ return true;
++}
++
++#else // not Android
++
++// memfd_create is a nonstandard interface for creating anonymous
++// shared memory accessible as a file descriptor but not tied to any
++// filesystem. It first appeared in Linux 3.17, and was adopted by
++// FreeBSD in version 13.
++
++# if !defined(HAVE_MEMFD_CREATE) && defined(OS_LINUX) && \
++ defined(SYS_memfd_create)
++
++// Older libc versions (e.g., glibc before 2.27) don't have the
++// wrapper, but we can supply our own; see `linux_memfd_defs.h`.
++
++static int memfd_create(const char* name, unsigned int flags) {
++ return syscall(SYS_memfd_create, name, flags);
++}
++
++# define HAVE_MEMFD_CREATE 1
++# endif
++
++// memfd supports having "seals" applied to the file, to prevent
++// various types of changes (which apply to all fds referencing the
++// file). Unfortunately, we can't rely on F_SEAL_WRITE to implement
++// Freeze(); see the comments in ReadOnlyCopy() below.
++//
++// Instead, to prevent a child process from regaining write access to
++// a read-only copy, the OS must also provide a way to remove write
++// permissions at the file descriptor level. This next section
++// attempts to accomplish that.
++
++# ifdef HAVE_MEMFD_CREATE
++# ifdef XP_LINUX
++# define USE_MEMFD_CREATE 1
++
++// To create a read-only duplicate of an fd, we can use procfs; the
++// same operation could restore write access, but sandboxing prevents
++// child processes from accessing /proc.
++//
++// (Note: if this ever changes to not use /proc, also reconsider how
++// and if HaveMemfd should check whether this works.)
++
++static int DupReadOnly(int fd) {
++ std::string path = StringPrintf("/proc/self/fd/%d", fd);
++ // procfs opens probably won't EINTR, but checking for it can't hurt
++ return HANDLE_EINTR(open(path.c_str(), O_RDONLY | O_CLOEXEC));
++}
++
++# elif defined(__FreeBSD__)
++# define USE_MEMFD_CREATE 1
++
++// FreeBSD's Capsicum framework allows irrevocably restricting the
++// operations permitted on a file descriptor.
++
++static int DupReadOnly(int fd) {
++ int rofd = dup(fd);
++ if (rofd < 0) {
++ return -1;
++ }
++
++ cap_rights_t rights;
++ cap_rights_init(&rights, CAP_FSTAT, CAP_MMAP_R);
++ if (cap_rights_limit(rofd, &rights) < 0) {
++ int err = errno;
++ close(rofd);
++ errno = err;
++ return -1;
++ }
++
++ return rofd;
++}
++
++# else // unhandled OS
++# warning "OS has memfd_create but no DupReadOnly implementation"
++# endif // OS selection
++# endif // HAVE_MEMFD_CREATE
++
++// Runtime detection for memfd support.
++static bool HaveMemfd() {
++# ifdef USE_MEMFD_CREATE
++ static const bool kHave = [] {
++ mozilla::UniqueFileHandle fd(
++ memfd_create("mozilla-ipc-test", MFD_CLOEXEC | MFD_ALLOW_SEALING));
++ if (!fd) {
++ DCHECK_EQ(errno, ENOSYS);
++ return false;
++ }
++
++ // Verify that DupReadOnly works; on Linux it's known to fail if:
++ //
++ // * SELinux assigns the memfd a type for which this process's
++ // domain doesn't have "open" permission; this is always the
++ // case on Android but could occur on desktop as well
++ //
++ // * /proc (used by the DupReadOnly implementation) isn't mounted,
++ // which is a configuration that the Tor Browser project is
++ // interested in as a way to reduce fingerprinting risk
++ //
++ // Sandboxed processes on Linux also can't use it if sandboxing
++ // has already been started, but that's expected. It should be
++ // safe for sandboxed child processes to use memfd even if an
++ // unsandboxed process couldn't freeze them, because freezing
++ // isn't allowed (or meaningful) for memory created by another
++ // process.
++
++ if (!PR_GetEnv("MOZ_SANDBOXED")) {
++ mozilla::UniqueFileHandle rofd(DupReadOnly(fd.get()));
++ if (!rofd) {
++ CHROMIUM_LOG(WARNING) << "read-only dup failed (" << strerror(errno)
++ << "); not using memfd";
++ return false;
++ }
++ }
++ return true;
++ }();
++ return kHave;
++# else
++ return false;
++# endif // USE_MEMFD_CREATE
++}
++
++// static
++bool SharedMemory::AppendPosixShmPrefix(std::string* str, pid_t pid) {
++ if (HaveMemfd()) {
++ return false;
++ }
+ *str += '/';
+ # ifdef OS_LINUX
+ // The Snap package environment doesn't provide a private /dev/shm
+@@ -90,7 +281,6 @@ bool SharedMemory::AppendPosixShmPrefix(std::string* str, pid_t pid) {
+ // enough for this.
+ StringAppendF(str, "org.mozilla.ipc.%d.", static_cast<int>(pid));
+ return true;
+-#endif // !ANDROID
+ }
+
+ bool SharedMemory::CreateInternal(size_t size, bool freezeable) {
+@@ -102,104 +292,118 @@ bool SharedMemory::CreateInternal(size_t size, bool freezeable) {
+
+ mozilla::UniqueFileHandle fd;
+ mozilla::UniqueFileHandle frozen_fd;
+- bool needs_truncate = true;
++ bool is_memfd = false;
++
++# ifdef USE_MEMFD_CREATE
++ if (HaveMemfd()) {
++ const unsigned flags = MFD_CLOEXEC | (freezeable ? MFD_ALLOW_SEALING : 0);
++ fd.reset(memfd_create("mozilla-ipc", flags));
++ if (!fd) {
++ // In general it's too late to fall back here -- in a sandboxed
++ // child process, shm_open is already blocked. And it shouldn't
++ // be necessary.
++ CHROMIUM_LOG(WARNING) << "failed to create memfd: " << strerror(errno);
++ return false;
++ }
++ is_memfd = true;
++ if (freezeable) {
++ frozen_fd.reset(DupReadOnly(fd.get()));
++ if (!frozen_fd) {
++ CHROMIUM_LOG(WARNING)
++ << "failed to create read-only memfd: " << strerror(errno);
++ return false;
++ }
++ }
++ }
++# endif
+
+-#ifdef ANDROID
+- // Android has its own shared memory facility:
+- fd.reset(mozilla::android::ashmem_create(nullptr, size));
+ if (!fd) {
+- CHROMIUM_LOG(WARNING) << "failed to open shm: " << strerror(errno);
+- return false;
+- }
+- needs_truncate = false;
+-#else
+- // Generic Unix: shm_open + shm_unlink
+- do {
+- // The names don't need to be unique, but it saves time if they
+- // usually are.
+- static mozilla::Atomic<size_t> sNameCounter;
+- std::string name;
+- CHECK(AppendPosixShmPrefix(&name, getpid()));
+- StringAppendF(&name, "%zu", sNameCounter++);
+- // O_EXCL means the names being predictable shouldn't be a problem.
+- fd.reset(
+- HANDLE_EINTR(shm_open(name.c_str(), O_RDWR | O_CREAT | O_EXCL, 0600)));
+- if (fd) {
+- if (freezeable) {
+- frozen_fd.reset(HANDLE_EINTR(shm_open(name.c_str(), O_RDONLY, 0400)));
+- if (!frozen_fd) {
+- int open_err = errno;
+- shm_unlink(name.c_str());
+- DLOG(FATAL) << "failed to re-open freezeable shm: "
+- << strerror(open_err);
++ // Generic Unix: shm_open + shm_unlink
++ do {
++ // The names don't need to be unique, but it saves time if they
++ // usually are.
++ static mozilla::Atomic<size_t> sNameCounter;
++ std::string name;
++ CHECK(AppendPosixShmPrefix(&name, getpid()));
++ StringAppendF(&name, "%zu", sNameCounter++);
++ // O_EXCL means the names being predictable shouldn't be a problem.
++ fd.reset(HANDLE_EINTR(
++ shm_open(name.c_str(), O_RDWR | O_CREAT | O_EXCL, 0600)));
++ if (fd) {
++ if (freezeable) {
++ frozen_fd.reset(HANDLE_EINTR(shm_open(name.c_str(), O_RDONLY, 0400)));
++ if (!frozen_fd) {
++ int open_err = errno;
++ shm_unlink(name.c_str());
++ DLOG(FATAL) << "failed to re-open freezeable shm: "
++ << strerror(open_err);
++ return false;
++ }
++ }
++ if (shm_unlink(name.c_str()) != 0) {
++ // This shouldn't happen, but if it does: assume the file is
++ // in fact leaked, and bail out now while it's still 0-length.
++ DLOG(FATAL) << "failed to unlink shm: " << strerror(errno);
+ return false;
+ }
+ }
+- if (shm_unlink(name.c_str()) != 0) {
+- // This shouldn't happen, but if it does: assume the file is
+- // in fact leaked, and bail out now while it's still 0-length.
+- DLOG(FATAL) << "failed to unlink shm: " << strerror(errno);
+- return false;
+- }
+- }
+- } while (!fd && errno == EEXIST);
+-#endif
++ } while (!fd && errno == EEXIST);
++ }
+
+ if (!fd) {
+ CHROMIUM_LOG(WARNING) << "failed to open shm: " << strerror(errno);
+ return false;
+ }
+
+- if (needs_truncate) {
+-#if defined(HAVE_POSIX_FALLOCATE)
+- // Using posix_fallocate will ensure that there's actually space for this
+- // file. Otherwise we end up with a sparse file that can give SIGBUS if we
+- // run out of space while writing to it.
+- int rv;
+- {
+- // Avoid repeated interruptions of posix_fallocate by the profiler's
+- // SIGPROF sampling signal. Indicating "thread sleep" here means we'll
+- // get up to one interruption but not more. See bug 1658847 for more.
+- // This has to be scoped outside the HANDLE_RV_EINTR retry loop.
+- AUTO_PROFILER_THREAD_SLEEP;
+- rv = HANDLE_RV_EINTR(
+- posix_fallocate(fd.get(), 0, static_cast<off_t>(size)));
+- }
+- if (rv != 0) {
+- if (rv == EOPNOTSUPP || rv == EINVAL || rv == ENODEV) {
+- // Some filesystems have trouble with posix_fallocate. For now, we must
+- // fallback ftruncate and accept the allocation failures like we do
+- // without posix_fallocate.
+- // See https://bugzilla.mozilla.org/show_bug.cgi?id=1618914
+- int fallocate_errno = rv;
+- rv = HANDLE_EINTR(ftruncate(fd.get(), static_cast<off_t>(size)));
+- if (rv != 0) {
+- CHROMIUM_LOG(WARNING) << "fallocate failed to set shm size: "
+- << strerror(fallocate_errno);
+- CHROMIUM_LOG(WARNING)
+- << "ftruncate failed to set shm size: " << strerror(errno);
+- return false;
+- }
+- } else {
++# if defined(HAVE_POSIX_FALLOCATE)
++ // Using posix_fallocate will ensure that there's actually space for this
++ // file. Otherwise we end up with a sparse file that can give SIGBUS if we
++ // run out of space while writing to it.
++ int rv;
++ {
++ // Avoid repeated interruptions of posix_fallocate by the profiler's
++ // SIGPROF sampling signal. Indicating "thread sleep" here means we'll
++ // get up to one interruption but not more. See bug 1658847 for more.
++ // This has to be scoped outside the HANDLE_RV_EINTR retry loop.
++ AUTO_PROFILER_THREAD_SLEEP;
++ rv =
++ HANDLE_RV_EINTR(posix_fallocate(fd.get(), 0, static_cast<off_t>(size)));
++ }
++ if (rv != 0) {
++ if (rv == EOPNOTSUPP || rv == EINVAL || rv == ENODEV) {
++ // Some filesystems have trouble with posix_fallocate. For now, we must
++ // fallback ftruncate and accept the allocation failures like we do
++ // without posix_fallocate.
++ // See https://bugzilla.mozilla.org/show_bug.cgi?id=1618914
++ int fallocate_errno = rv;
++ rv = HANDLE_EINTR(ftruncate(fd.get(), static_cast<off_t>(size)));
++ if (rv != 0) {
++ CHROMIUM_LOG(WARNING) << "fallocate failed to set shm size: "
++ << strerror(fallocate_errno);
+ CHROMIUM_LOG(WARNING)
+- << "fallocate failed to set shm size: " << strerror(rv);
++ << "ftruncate failed to set shm size: " << strerror(errno);
+ return false;
+ }
+- }
+-#else
+- int rv = HANDLE_EINTR(ftruncate(fd.get(), static_cast<off_t>(size)));
+- if (rv != 0) {
++ } else {
+ CHROMIUM_LOG(WARNING)
+- << "ftruncate failed to set shm size: " << strerror(errno);
++ << "fallocate failed to set shm size: " << strerror(rv);
+ return false;
+ }
+-#endif
+ }
++# else
++ int rv = HANDLE_EINTR(ftruncate(fd.get(), static_cast<off_t>(size)));
++ if (rv != 0) {
++ CHROMIUM_LOG(WARNING) << "ftruncate failed to set shm size: "
++ << strerror(errno);
++ return false;
++ }
++# endif
+
+ mapped_file_ = std::move(fd);
+ frozen_file_ = std::move(frozen_fd);
+ max_size_ = size;
+ freezeable_ = freezeable;
++ is_memfd_ = is_memfd;
+ return true;
+ }
+
+@@ -212,23 +416,63 @@ bool SharedMemory::ReadOnlyCopy(SharedMemory* ro_out) {
+ DCHECK(!memory_);
+ }
+
+- mozilla::UniqueFileHandle ro_file;
+-#ifdef ANDROID
+- ro_file = std::move(mapped_file_);
+- if (mozilla::android::ashmem_setProt(ro_file.get(), PROT_READ) != 0) {
+- CHROMIUM_LOG(WARNING) << "failed to set ashmem read-only: "
+- << strerror(errno);
+- return false;
++# ifdef USE_MEMFD_CREATE
++# ifdef MOZ_VALGRIND
++ // Valgrind allows memfd_create but doesn't understand F_ADD_SEALS.
++ static const bool haveSeals = RUNNING_ON_VALGRIND == 0;
++# else
++ static const bool haveSeals = true;
++# endif
++ static const bool useSeals = !PR_GetEnv("MOZ_SHM_NO_SEALS");
++ if (is_memfd_ && haveSeals && useSeals) {
++ // Seals are added to the file as defense-in-depth. The primary
++ // method of access control is creating a read-only fd (using
++ // procfs in this case) and requiring that sandboxes processes not
++ // have access to /proc/self/fd to regain write permission; this
++ // is the same as with shm_open.
++ //
++ // Unfortunately, F_SEAL_WRITE is unreliable: if the process
++ // forked while there was a writeable mapping, it will inherit a
++ // copy of the mapping, which causes the seal to fail.
++ //
++ // (Also, in the future we may want to split this into separate
++ // classes for mappings and shared memory handles, which would
++ // complicate identifying the case where `F_SEAL_WRITE` would be
++ // possible even in the absence of races with fork.)
++ //
++ // However, Linux 5.1 added F_SEAL_FUTURE_WRITE, which prevents
++ // write operations afterwards, but existing writeable mappings
++ // are unaffected (similar to ashmem protection semantics).
++
++ const int seals = F_SEAL_GROW | F_SEAL_SHRINK | F_SEAL_SEAL;
++ int sealError = EINVAL;
++
++# ifdef F_SEAL_FUTURE_WRITE
++ sealError =
++ fcntl(mapped_file_.get(), F_ADD_SEALS, seals | F_SEAL_FUTURE_WRITE) == 0
++ ? 0
++ : errno;
++# endif // F_SEAL_FUTURE_WRITE
++ if (sealError == EINVAL) {
++ sealError =
++ fcntl(mapped_file_.get(), F_ADD_SEALS, seals) == 0 ? 0 : errno;
++ }
++ if (sealError != 0) {
++ CHROMIUM_LOG(WARNING) << "failed to seal memfd: " << strerror(errno);
++ return false;
++ }
+ }
+-#else
++# else // !USE_MEMFD_CREATE
++ DCHECK(!is_memfd_);
++# endif
++
+ DCHECK(frozen_file_);
++ DCHECK(mapped_file_);
+ mapped_file_ = nullptr;
+- ro_file = std::move(frozen_file_);
+-#endif
++ mozilla::UniqueFileHandle ro_file = std::move(frozen_file_);
+
+ DCHECK(ro_file);
+ freezeable_ = false;
+-
+ ro_out->Close();
+ ro_out->mapped_file_ = std::move(ro_file);
+ ro_out->max_size_ = max_size_;
+@@ -238,6 +482,8 @@ bool SharedMemory::ReadOnlyCopy(SharedMemory* ro_out) {
+ return true;
+ }
+
++#endif // not Android
++
+ bool SharedMemory::Map(size_t bytes, void* fixed_address) {
+ if (!mapped_file_) {
+ return false;
+@@ -292,10 +538,12 @@ void SharedMemory::Close(bool unmap_view) {
+ }
+
+ mapped_file_ = nullptr;
++#ifndef ANDROID
+ if (frozen_file_) {
+ CHROMIUM_LOG(WARNING) << "freezeable shared memory was never frozen";
+ frozen_file_ = nullptr;
+ }
++#endif
+ }
+
+ } // namespace base
+diff --git ipc/gtest/TestSharedMemory.cpp ipc/gtest/TestSharedMemory.cpp
+index 4c8ab0c77592..1474be0ea0d9 100644
+--- ipc/gtest/TestSharedMemory.cpp
++++ ipc/gtest/TestSharedMemory.cpp
+@@ -13,6 +13,15 @@
+ #include "mozilla/ipc/SharedMemory.h"
+ #include "mozilla/ipc/SharedMemoryBasic.h"
+
++#ifdef XP_LINUX
++# include <errno.h>
++# include <linux/magic.h>
++# include <stdio.h>
++# include <string.h>
++# include <sys/statfs.h>
++# include <sys/utsname.h>
++#endif
++
+ #ifdef XP_WIN
+ # include <windows.h>
+ #endif
+@@ -293,4 +302,41 @@ TEST(IPCSharedMemory, BasicIsZero)
+ }
+ #endif
+
++#if defined(XP_LINUX) && !defined(ANDROID)
++// Test that memfd_create is used where expected.
++//
++// More precisely: if memfd_create support is expected, verify that
++// shared memory isn't subject to a filesystem size limit.
++TEST(IPCSharedMemory, IsMemfd)
++{
++ static constexpr int kMajor = 3;
++ static constexpr int kMinor = 17;
++
++ struct utsname uts;
++ ASSERT_EQ(uname(&uts), 0) << strerror(errno);
++ ASSERT_STREQ(uts.sysname, "Linux");
++ int major, minor;
++ ASSERT_EQ(sscanf(uts.release, "%d.%d", &major, &minor), 2);
++ bool expectMemfd = major > kMajor || (major == kMajor && minor >= kMinor);
++
++ base::SharedMemory shm;
++ ASSERT_TRUE(shm.Create(1));
++ UniqueFileHandle fd = shm.TakeHandle();
++ ASSERT_TRUE(fd);
++
++ struct statfs fs;
++ ASSERT_EQ(fstatfs(fd.get(), &fs), 0) << strerror(errno);
++ EXPECT_EQ(fs.f_type, TMPFS_MAGIC);
++ static constexpr decltype(fs.f_blocks) kNoLimit = 0;
++ if (expectMemfd) {
++ EXPECT_EQ(fs.f_blocks, kNoLimit);
++ } else {
++ // On older kernels, we expect the memfd / no-limit test to fail.
++ // (In theory it could succeed if backported memfd support exists;
++ // if that ever happens, this check can be removed.)
++ EXPECT_NE(fs.f_blocks, kNoLimit);
++ }
++}
++#endif
++
+ } // namespace mozilla
+diff --git security/sandbox/linux/SandboxFilter.cpp security/sandbox/linux/SandboxFilter.cpp
+index b337a513791e..3017cd967766 100644
+--- security/sandbox/linux/SandboxFilter.cpp
++++ security/sandbox/linux/SandboxFilter.cpp
+@@ -704,6 +704,10 @@ class SandboxPolicyCommon : public SandboxPolicyBase {
+ case __NR_munmap:
+ return Allow();
+
++ // Shared memory
++ case __NR_memfd_create:
++ return Allow();
++
+ // ipc::Shmem; also, glibc when creating threads:
+ case __NR_mprotect:
+ return Allow();
+@@ -1395,11 +1399,6 @@ class ContentSandboxPolicy : public SandboxPolicyCommon {
+ case __NR_eventfd2:
+ return Allow();
+
+-# ifdef __NR_memfd_create
+- case __NR_memfd_create:
+- return Allow();
+-# endif
+-
+ # ifdef __NR_rt_tgsigqueueinfo
+ // Only allow to send signals within the process.
+ case __NR_rt_tgsigqueueinfo: {