summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJeff Roberson <jeff@FreeBSD.org>2018-03-24 23:58:44 +0000
committerJeff Roberson <jeff@FreeBSD.org>2018-03-24 23:58:44 +0000
commit93f31533dfb3edff8b0d0ee96a8e4d28cd2b79fc (patch)
treeb3679501e829413fe4f2f55cbe40d8e6045ef377
parentc2f5940db3c7f6b7e584b93edba6d057c0dfc51d (diff)
Notes
-rw-r--r--lib/libc/sys/Makefile.inc2
-rw-r--r--lib/libc/sys/cpuset.224
-rw-r--r--lib/libc/sys/cpuset_getaffinity.22
-rw-r--r--share/man/man9/Makefile4
-rw-r--r--share/man/man9/malloc.912
-rw-r--r--share/man/man9/zone.929
-rw-r--r--usr.bin/cpuset/cpuset.128
7 files changed, 85 insertions, 16 deletions
diff --git a/lib/libc/sys/Makefile.inc b/lib/libc/sys/Makefile.inc
index 39e1974375c7f..dde8dc8fe7c71 100644
--- a/lib/libc/sys/Makefile.inc
+++ b/lib/libc/sys/Makefile.inc
@@ -174,6 +174,7 @@ MAN+= abort2.2 \
connectat.2 \
cpuset.2 \
cpuset_getaffinity.2 \
+ cpuset_getdomain.2 \
dup.2 \
execve.2 \
_exit.2 \
@@ -371,6 +372,7 @@ MLINKS+=nanosleep.2 clock_nanosleep.2
MLINKS+=cpuset.2 cpuset_getid.2 \
cpuset.2 cpuset_setid.2
MLINKS+=cpuset_getaffinity.2 cpuset_setaffinity.2
+MLINKS+=cpuset_getdomain.2 cpuset_setdomain.2
MLINKS+=dup.2 dup2.2
MLINKS+=execve.2 fexecve.2
MLINKS+=extattr_get_file.2 extattr.2 \
diff --git a/lib/libc/sys/cpuset.2 b/lib/libc/sys/cpuset.2
index d39a5c5eaeac4..664d48eeb9b4e 100644
--- a/lib/libc/sys/cpuset.2
+++ b/lib/libc/sys/cpuset.2
@@ -48,21 +48,21 @@
The
.Nm
family of system calls allow applications to control sets of processors and
-assign processes and threads to these sets.
-Processor sets contain lists of CPUs that members may run on and exist only
-as long as some process is a member of the set.
+memory domains and assign processes and threads to these sets.
+Processor sets contain lists of CPUs and domains that members may run on
+and exist only as long as some process is a member of the set.
All processes in the system have an assigned set.
The default set for all processes in the system is the set numbered 1.
Threads belong to the same set as the process which contains them,
however, they may further restrict their set with the anonymous
-per-thread mask.
+per-thread mask to bind to a specific CPU or subset of CPUs and memory domains.
.Pp
Sets are referenced by a number of type
.Ft cpuset_id_t .
Each thread has a root set, an assigned set, and an anonymous mask.
Only the root and assigned sets are numbered.
-The root set is the set of all CPUs available in the system or in the
-system partition the thread is running in.
+The root set is the set of all CPUs and memory domains available in the system
+or in the system partition the thread is running in.
The assigned set is a subset of the root set and is administratively
assignable on a per-process basis.
Many processes and threads may be members of a numbered set.
@@ -72,7 +72,8 @@ set.
It is intended that administrators will manipulate numbered sets using
.Xr cpuset 1
while application developers will manipulate anonymous sets using
-.Xr cpuset_setaffinity 2 .
+.Xr cpuset_setaffinity 2 and
+.Xr cpuset_setdomain 2 .
.Pp
To select the correct set a value of type
.Ft cpulevel_t
@@ -175,9 +176,10 @@ with a process or thread is unsupported since
this references the unnumbered anonymous mask.
.Pp
The actual contents of the sets may be retrieved or manipulated using
-.Xr cpuset_getaffinity 2
-and
-.Xr cpuset_setaffinity 2 .
+.Xr cpuset_getaffinity 2 ,
+.Xr cpuset_setaffinity 2 ,
+.Xr cpuset_getdomain 2 , and
+.Xr cpuset_setdomain 2 .
See those manual pages for more detail.
.Sh RETURN VALUES
.Rv -std
@@ -220,6 +222,8 @@ for allocation.
.Xr cpuset 1 ,
.Xr cpuset_getaffinity 2 ,
.Xr cpuset_setaffinity 2 ,
+.Xr cpuset_getdomain 2 ,
+.Xr cpuset_setdomain 2 ,
.Xr pthread_affinity_np 3 ,
.Xr pthread_attr_affinity_np 3 ,
.Xr cpuset 9
diff --git a/lib/libc/sys/cpuset_getaffinity.2 b/lib/libc/sys/cpuset_getaffinity.2
index dbf27ad9c2952..ebc72e391ed28 100644
--- a/lib/libc/sys/cpuset_getaffinity.2
+++ b/lib/libc/sys/cpuset_getaffinity.2
@@ -160,6 +160,8 @@ See
.Xr cpuset 2 ,
.Xr cpuset_getid 2 ,
.Xr cpuset_setid 2 ,
+.Xr cpuset_getdomain 2 ,
+.Xr cpuset_setdomain 2 ,
.Xr pthread_affinity_np 3 ,
.Xr pthread_attr_affinity_np 3 ,
.Xr cpuset 9
diff --git a/share/man/man9/Makefile b/share/man/man9/Makefile
index 630e5094a78c6..6937be37496aa 100644
--- a/share/man/man9/Makefile
+++ b/share/man/man9/Makefile
@@ -1271,6 +1271,8 @@ MLINKS+=make_dev.9 destroy_dev.9 \
make_dev.9 make_dev_p.9 \
make_dev.9 make_dev_s.9
MLINKS+=malloc.9 free.9 \
+ malloc.9 malloc_domain.9 \
+ malloc.9 free_domain.9 \
malloc.9 mallocarray.9 \
malloc.9 MALLOC_DECLARE.9 \
malloc.9 MALLOC_DEFINE.9 \
@@ -2213,10 +2215,12 @@ MLINKS+=vslock.9 vsunlock.9
MLINKS+=zone.9 uma.9 \
zone.9 uma_zalloc.9 \
zone.9 uma_zalloc_arg.9 \
+ zone.9 uma_zalloc_domain.9 \
zone.9 uma_zcreate.9 \
zone.9 uma_zdestroy.9 \
zone.9 uma_zfree.9 \
zone.9 uma_zfree_arg.9 \
+ zone.9 uma_zfree_domain.9 \
zone.9 uma_zone_get_cur.9 \
zone.9 uma_zone_get_max.9 \
zone.9 uma_zone_set_max.9 \
diff --git a/share/man/man9/malloc.9 b/share/man/man9/malloc.9
index 53fc21e4f9e54..82294a6897f57 100644
--- a/share/man/man9/malloc.9
+++ b/share/man/man9/malloc.9
@@ -46,9 +46,13 @@
.Ft void *
.Fn malloc "size_t size" "struct malloc_type *type" "int flags"
.Ft void *
+.Fn malloc_domain "size_t size" "struct malloc_type *type" "int domain" "int flags"
+.Ft void *
.Fn mallocarray "size_t nmemb" "size_t size" "struct malloc_type *type" "int flags"
.Ft void
.Fn free "void *addr" "struct malloc_type *type"
+.Ft void
+.Fn free_domain "void *addr" "struct malloc_type *type"
.Ft void *
.Fn realloc "void *addr" "size_t size" "struct malloc_type *type" "int flags"
.Ft void *
@@ -66,6 +70,14 @@ object whose size is specified by
.Fa size .
.Pp
The
+.Fn malloc_domain
+variant allocates the object from the specified memory domain. Memory allocated
+with this function should be returned with
+.Fn free_domain .
+See
+.Xr numa 9 for more details.
+.Pp
+The
.Fn mallocarray
function allocates uninitialized memory in kernel address space for an
array of
diff --git a/share/man/man9/zone.9 b/share/man/man9/zone.9
index 9dda67eb2c68f..0e8d7838e193e 100644
--- a/share/man/man9/zone.9
+++ b/share/man/man9/zone.9
@@ -32,8 +32,10 @@
.Nm uma_zcreate ,
.Nm uma_zalloc ,
.Nm uma_zalloc_arg ,
+.Nm uma_zalloc_domain ,
.Nm uma_zfree ,
.Nm uma_zfree_arg ,
+.Nm uma_zfree_domain ,
.Nm uma_zdestroy ,
.Nm uma_zone_set_max ,
.Nm uma_zone_get_max ,
@@ -55,11 +57,15 @@
.Fn uma_zalloc "uma_zone_t zone" "int flags"
.Ft "void *"
.Fn uma_zalloc_arg "uma_zone_t zone" "void *arg" "int flags"
+.Ft "void *"
+.Fn uma_zalloc_domain "uma_zone_t zone" "void *arg" "int domain" "int flags"
.Ft void
.Fn uma_zfree "uma_zone_t zone" "void *item"
.Ft void
.Fn uma_zfree_arg "uma_zone_t zone" "void *item" "void *arg"
.Ft void
+.Fn uma_zfree_domain "uma_zone_t zone" "void *item" "void *arg"
+.Ft void
.Fn uma_zdestroy "uma_zone_t zone"
.Ft int
.Fn uma_zone_set_max "uma_zone_t zone" "int nitems"
@@ -78,10 +84,13 @@
.Fn SYSCTL_ADD_UMA_CUR ctx parent nbr name access zone descr
.Sh DESCRIPTION
The zone allocator provides an efficient interface for managing
-dynamically-sized collections of items of similar size.
+dynamically-sized collections of items of identical size.
The zone allocator can work with preallocated zones as well as with
runtime-allocated ones, and is therefore available much earlier in the
-boot process than other memory management routines.
+boot process than other memory management routines. The zone allocator
+provides per-cpu allocation caches with linear scalability on SMP
+systems as well as round-robin and first-touch policies for NUMA
+systems.
.Pp
A zone is an extensible collection of items of identical size.
The zone allocator keeps track of which items are in use and which
@@ -209,6 +218,11 @@ The zone is for the
subsystem.
.It Dv UMA_ZONE_VM
The zone is for the VM subsystem.
+.It Dv UMA_ZONE_NUMA
+The zone should use a first-touch NUMA policy rather than the round-robin
+default. Callers that do not free memory on the same domain it is allocated
+from will cause mixing in per-cpu caches. See
+.Xr numa 9 for more details.
.El
.Pp
To allocate an item from a zone, simply call
@@ -243,12 +257,21 @@ The variations
.Fn uma_zalloc_arg
and
.Fn uma_zfree_arg
-allow to
+allow callers to
specify an argument for the
.Dv ctor
and
.Dv dtor
functions, respectively.
+The
+.Fn uma_zalloc_domain
+function allows callers to specify a fixed
+.Xr numa 9 domain to allocate from. This uses a guaranteed but slow path in
+the allocator which reduces concurrency. The
+.Fn uma_zfree_domain
+function should be used to return memory allocated in this fashion. This
+function infers the domain from the pointer and does not require it as an
+argument.
.Pp
Created zones,
which are empty,
diff --git a/usr.bin/cpuset/cpuset.1 b/usr.bin/cpuset/cpuset.1
index 66abc67a93690..4fe47f5bac9ed 100644
--- a/usr.bin/cpuset/cpuset.1
+++ b/usr.bin/cpuset/cpuset.1
@@ -34,20 +34,24 @@
.Sh SYNOPSIS
.Nm
.Op Fl l Ar cpu-list
+.Op Fl n Ar policy:domain-list
.Op Fl s Ar setid
.Ar cmd ...
.Nm
.Op Fl l Ar cpu-list
+.Op Fl n Ar policy:domain-list
.Op Fl s Ar setid
.Fl p Ar pid
.Nm
.Op Fl c
.Op Fl l Ar cpu-list
+.Op Fl n Ar policy:domain-list
.Fl C
.Fl p Ar pid
.Nm
.Op Fl c
.Op Fl l Ar cpu-list
+.Op Fl n Ar policy:domain-list
.Op Fl j Ar jailid | Fl p Ar pid | Fl t Ar tid | Fl s Ar setid | Fl x Ar irq
.Nm
.Fl g
@@ -57,8 +61,9 @@
The
.Nm
command can be used to assign processor sets to processes, run commands
-constrained to a given set or list of processors, and query information
-about processor binding, sets, and available processors in the system.
+constrained to a given set or list of processors and memory domains, and query
+information about processor binding, memory binding and policy, sets, and
+available processors and memory domains in the system.
.Pp
.Nm
requires a target to modify or query.
@@ -92,6 +97,15 @@ This last set is the list of all possible CPUs in the system and is
queried using
.Fl r .
.Pp
+Most sets include NUMA memory domain and policy information. This can be
+inspected with
+.Fl g
+and set with
+.Fl n .
+This will specify which NUMA domains are visible to the process and
+affect where anonymous memory and file pages will be stored on first access.
+Files accessed first by other processes may specify conflicting policy.
+.Pp
When running a command it may join a set specified with
.Fl s
otherwise a new set is created.
@@ -110,7 +124,8 @@ Create a new cpuset and assign the target process to that set.
The requested operation should reference the cpuset available via the
target specifier.
.It Fl d Ar domain
-Specifies a NUMA domain id as the target of the operation.
+Specifies a NUMA domain id as the target of the operation. This can only
+be used to query the cpus visible in each numberd domain.
.It Fl g
Causes
.Nm
@@ -130,6 +145,13 @@ numbers separated by '-' for ranges and commas separating individual numbers.
A special list of
.Dq all
may be specified in which case the list includes all CPUs from the root set.
+.It Fl n Ar domain-list:policy
+Specifies a list of domains and allocation policy to apply to a target. Ranges
+may be specified as in
+.Fl l .
+Valid policies include first-touch, ft, round-robin, rr, and prefer. The prefer
+policy accepts only a single domain in the set. The parent of the set is
+consulted if the preferred domain is unavailable.
.It Fl p Ar pid
Specifies a pid as the target of the operation.
.It Fl s Ar setid