diff options
Diffstat (limited to 'sys/net/vnet.c')
-rw-r--r-- | sys/net/vnet.c | 341 |
1 files changed, 341 insertions, 0 deletions
diff --git a/sys/net/vnet.c b/sys/net/vnet.c new file mode 100644 index 000000000000..a3148868bf98 --- /dev/null +++ b/sys/net/vnet.c @@ -0,0 +1,341 @@ +/*- + * Copyright (c) 2009 Jeffrey Roberson <jeff@freebsd.org> + * Copyright (c) 2009 Robert N. M. Watson + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include <sys/cdefs.h> +__FBSDID("$FreeBSD$"); + +#include <sys/param.h> +#include <sys/kernel.h> +#include <sys/systm.h> +#include <sys/sysctl.h> +#include <sys/linker_set.h> +#include <sys/lock.h> +#include <sys/malloc.h> +#include <sys/proc.h> +#include <sys/sx.h> +#include <sys/sysctl.h> +#include <sys/vimage.h> + +#include <net/vnet.h> + +/*- + * This is the virtual network stack allocator, which provides storage for + * virtualized global variables. These variables are defined/declared using + * the VNET_DEFINE()/VNET_DECLARE() macros, which place them in the + * 'set_vnet' linker set. The details of the implementation are somewhat + * subtle, but allow the majority of most network subsystems to maintain + * virtualization-agnostic. + * + * The virtual network stack allocator handles variables in the base kernel + * vs. modules in similar but different ways. In both cases, virtualized + * global variables are marked as such by being declared to be part of the + * vnet linker set. These "master" copies of global variables serve two + * functions: + * + * (1) They contain static initialization or "default" values for global + * variables which will be propagated to each virtual network stack + * instance when created. As with normal global variables, they default + * to zero-filled. + * + * (2) They act as unique global names by which the variable can be referred + * to, regardless of network stack instance. The single global symbol + * will be used to calculate the location of a per-virtual instance + * variable at run-time. + * + * Each virtual network stack instance has a complete copy of each + * virtualized global variable, stored in a malloc'd block of memory + * referred to by vnet->vnet_data_mem. Critical to the design is that each + * per-instance memory block is laid out identically to the master block so + * that the offset of each global variable is the same across all blocks. To + * optimize run-time access, a precalculated 'base' address, + * vnet->vnet_data_base, is stored in each vnet, and is the amount that can + * be added to the address of a 'master' instance of a variable to get to the + * per-vnet instance. + * + * Virtualized global variables are handled in a similar manner, but as each + * module has its own 'set_vnet' linker set, and we want to keep all + * virtualized globals togther, we reserve space in the kernel's linker set + * for potential module variables using a per-vnet character array, + * 'modspace'. The virtual network stack allocator maintains a free list to + * track what space in the array is free (all, initially) and as modules are + * linked, allocates portions of the space to specific globals. The kernel + * module linker queries the virtual network stack allocator and will + * bind references of the global to the location during linking. It also + * calls into the virtual network stack allocator, once the memory is + * initialized, in order to propagate the new static initializations to all + * existing virtual network stack instances so that the soon-to-be executing + * module will find every network stack instance with proper default values. + */ + +/* + * Location of the kernel's 'set_vnet' linker set. + */ +extern uintptr_t *__start_set_vnet; +extern uintptr_t *__stop_set_vnet; + +#define VNET_START (uintptr_t)&__start_set_vnet +#define VNET_STOP (uintptr_t)&__stop_set_vnet + +/* + * Number of bytes of data in the 'set_vnet' linker set, and hence the total + * size of all kernel virtualized global variables, and the malloc(9) type + * that will be used to allocate it. + */ +#define VNET_BYTES (VNET_STOP - VNET_START) + +MALLOC_DEFINE(M_VNET_DATA, "vnet_data", "VNET data"); + +/* + * VNET_MODMIN is the minimum number of bytes we will reserve for the sum of + * global variables across all loaded modules. As this actually sizes an + * array declared as a virtualized global variable in the kernel itself, and + * we want the virtualized global variable space to be page-sized, we may + * have more space than that in practice. + */ +#define VNET_MODMIN 8192 +#define VNET_SIZE roundup2(VNET_BYTES, PAGE_SIZE) +#define VNET_MODSIZE (VNET_SIZE - (VNET_BYTES - VNET_MODMIN)) + +/* + * Space to store virtualized global variables from loadable kernel modules, + * and the free list to manage it. + */ +static VNET_DEFINE(char, modspace[VNET_MODMIN]); + +struct vnet_data_free { + uintptr_t vnd_start; + int vnd_len; + TAILQ_ENTRY(vnet_data_free) vnd_link; +}; + +MALLOC_DEFINE(M_VNET_DATA_FREE, "vnet_data_free", "VNET resource accounting"); +static TAILQ_HEAD(, vnet_data_free) vnet_data_free_head = + TAILQ_HEAD_INITIALIZER(vnet_data_free_head); +static struct sx vnet_data_free_lock; + +/* + * Allocate storage for virtualized global variables in a new virtual network + * stack instance, and copy in initial values from our 'master' copy. + */ +void +vnet_data_init(struct vnet *vnet) +{ + + vnet->vnet_data_mem = malloc(VNET_SIZE, M_VNET_DATA, M_WAITOK); + memcpy(vnet->vnet_data_mem, (void *)VNET_START, VNET_BYTES); + + /* + * All use of vnet-specific data will immediately subtract VNET_START + * from the base memory pointer, so pre-calculate that now to avoid + * it on each use. + */ + vnet->vnet_data_base = (uintptr_t)vnet->vnet_data_mem - VNET_START; +} + +/* + * Release storage for a virtual network stack instance. + */ +void +vnet_data_destroy(struct vnet *vnet) +{ + + free(vnet->vnet_data_mem, M_VNET_DATA); + vnet->vnet_data_mem = NULL; + vnet->vnet_data_base = 0; +} + +/* + * Once on boot, initialize the modspace freelist to entirely cover modspace. + */ +static void +vnet_data_startup(void *dummy __unused) +{ + struct vnet_data_free *df; + + df = malloc(sizeof(*df), M_VNET_DATA_FREE, M_WAITOK | M_ZERO); + df->vnd_start = (uintptr_t)&VNET_NAME(modspace); + df->vnd_len = VNET_MODSIZE; + TAILQ_INSERT_HEAD(&vnet_data_free_head, df, vnd_link); + sx_init(&vnet_data_free_lock, "vnet_data alloc lock"); +} +SYSINIT(vnet_data, SI_SUB_KLD, SI_ORDER_FIRST, vnet_data_startup, 0); + +/* + * When a module is loaded and requires storage for a virtualized global + * variable, allocate space from the modspace free list. This interface + * should be used only by the kernel linker. + */ +void * +vnet_data_alloc(int size) +{ + struct vnet_data_free *df; + void *s; + + s = NULL; + size = roundup2(size, sizeof(void *)); + sx_xlock(&vnet_data_free_lock); + TAILQ_FOREACH(df, &vnet_data_free_head, vnd_link) { + if (df->vnd_len < size) + continue; + if (df->vnd_len == size) { + s = (void *)df->vnd_start; + TAILQ_REMOVE(&vnet_data_free_head, df, vnd_link); + free(df, M_VNET_DATA_FREE); + break; + } + s = (void *)df->vnd_start; + df->vnd_len -= size; + df->vnd_start = df->vnd_start + size; + break; + } + sx_xunlock(&vnet_data_free_lock); + + return (s); +} + +/* + * Free space for a virtualized global variable on module unload. + */ +void +vnet_data_free(void *start_arg, int size) +{ + struct vnet_data_free *df; + struct vnet_data_free *dn; + uintptr_t start; + uintptr_t end; + + size = roundup2(size, sizeof(void *)); + start = (uintptr_t)start_arg; + end = start + size; + /* + * Free a region of space and merge it with as many neighbors as + * possible. Keeping the list sorted simplifies this operation. + */ + sx_xlock(&vnet_data_free_lock); + TAILQ_FOREACH(df, &vnet_data_free_head, vnd_link) { + if (df->vnd_start > end) + break; + /* + * If we expand at the end of an entry we may have to + * merge it with the one following it as well. + */ + if (df->vnd_start + df->vnd_len == start) { + df->vnd_len += size; + dn = TAILQ_NEXT(df, vnd_link); + if (df->vnd_start + df->vnd_len == dn->vnd_start) { + df->vnd_len += dn->vnd_len; + TAILQ_REMOVE(&vnet_data_free_head, dn, vnd_link); + free(dn, M_VNET_DATA_FREE); + } + sx_xunlock(&vnet_data_free_lock); + return; + } + if (df->vnd_start == end) { + df->vnd_start = start; + df->vnd_len += size; + sx_xunlock(&vnet_data_free_lock); + return; + } + } + dn = malloc(sizeof(*df), M_VNET_DATA_FREE, M_WAITOK | M_ZERO); + dn->vnd_start = start; + dn->vnd_len = size; + if (df) + TAILQ_INSERT_BEFORE(df, dn, vnd_link); + else + TAILQ_INSERT_TAIL(&vnet_data_free_head, dn, vnd_link); + sx_xunlock(&vnet_data_free_lock); +} + +struct vnet_data_copy_fn_arg { + void *start; + int size; +}; + +static void +vnet_data_copy_fn(struct vnet *vnet, void *arg) +{ + struct vnet_data_copy_fn_arg *varg = arg; + + memcpy((void *)((uintptr_t)vnet->vnet_data_base + + (uintptr_t)varg->start), varg->start, varg->size); +} + +/* + * When a new virtualized global variable has been allocated, propagate its + * initial value to each already-allocated virtual network stack instance. + */ +void +vnet_data_copy(void *start, int size) +{ + struct vnet_data_copy_fn_arg varg; + + varg.start = start; + varg.size = size; + vnet_foreach(vnet_data_copy_fn, &varg); +} + +/* + * Variants on sysctl_handle_foo that know how to handle virtualized global + * variables: if 'arg1' is a pointer, then we transform it to the local vnet + * offset. + */ +int +vnet_sysctl_handle_int(SYSCTL_HANDLER_ARGS) +{ + + if (arg1 != NULL) + arg1 = (void *)(curvnet->vnet_data_base + (uintptr_t)arg1); + return (sysctl_handle_int(oidp, arg1, arg2, req)); +} + +int +vnet_sysctl_handle_opaque(SYSCTL_HANDLER_ARGS) +{ + + if (arg1 != NULL) + arg1 = (void *)(curvnet->vnet_data_base + (uintptr_t)arg1); + return (sysctl_handle_opaque(oidp, arg1, arg2, req)); +} + +int +vnet_sysctl_handle_string(SYSCTL_HANDLER_ARGS) +{ + + if (arg1 != NULL) + arg1 = (void *)(curvnet->vnet_data_base + (uintptr_t)arg1); + return (sysctl_handle_string(oidp, arg1, arg2, req)); +} + +int +vnet_sysctl_handle_uint(SYSCTL_HANDLER_ARGS) +{ + + if (arg1 != NULL) + arg1 = (void *)(curvnet->vnet_data_base + (uintptr_t)arg1); + return (sysctl_handle_int(oidp, arg1, arg2, req)); +} |