diff options
Diffstat (limited to 'sys/netinet/tcp_hpts_internal.h')
-rw-r--r-- | sys/netinet/tcp_hpts_internal.h | 184 |
1 files changed, 184 insertions, 0 deletions
diff --git a/sys/netinet/tcp_hpts_internal.h b/sys/netinet/tcp_hpts_internal.h new file mode 100644 index 000000000000..8b33e03a6981 --- /dev/null +++ b/sys/netinet/tcp_hpts_internal.h @@ -0,0 +1,184 @@ +/*- + * Copyright (c) 2025 Netflix, Inc. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#ifndef __tcp_hpts_internal_h__ +#define __tcp_hpts_internal_h__ + +/* + * TCP High Precision Timer System (HPTS) - Internal Definitions + * + * This header contains internal structures, constants, and interfaces that are + * implemented in tcp_hpts.c but exposed to enable comprehensive unit testing of + * the HPTS subsystem. + */ + +#if defined(_KERNEL) + +/* + * The hpts uses a 102400 wheel. The wheel + * defines the time in 10 usec increments (102400 x 10). + * This gives a range of 10usec - 1024ms to place + * an entry within. If the user requests more than + * 1.024 second, a remaineder is attached and the hpts + * when seeing the remainder will re-insert the + * inpcb forward in time from where it is until + * the remainder is zero. + */ + +#define NUM_OF_HPTSI_SLOTS 102400 + +/* The number of connections after which the dynamic sleep logic kicks in. */ +#define DEFAULT_CONNECTION_THRESHOLD 100 + +/* + * The hpts uses a 102400 wheel. The wheel + * defines the time in 10 usec increments (102400 x 10). + * This gives a range of 10usec - 1024ms to place + * an entry within. If the user requests more than + * 1.024 second, a remaineder is attached and the hpts + * when seeing the remainder will re-insert the + * inpcb forward in time from where it is until + * the remainder is zero. + */ + +#define NUM_OF_HPTSI_SLOTS 102400 + +/* Convert microseconds to HPTS slots */ +#define HPTS_USEC_TO_SLOTS(x) ((x+9) /10) + +/* The number of connections after which the dynamic sleep logic kicks in. */ +#define DEFAULT_CONNECTION_THRESHOLD 100 + +extern int tcp_bind_threads; /* Thread binding configuration + * (0=none, 1=cpu, 2=numa) */ + +/* + * Abstraction layer controlling time, interrupts and callouts. + */ +struct tcp_hptsi_funcs { + void (*microuptime)(struct timeval *tv); + int (*swi_add)(struct intr_event **eventp, const char *name, + driver_intr_t handler, void *arg, int pri, enum intr_type flags, + void **cookiep); + int (*swi_remove)(void *cookie); + void (*swi_sched)(void *cookie, int flags); + int (*intr_event_bind)(struct intr_event *ie, int cpu); + int (*intr_event_bind_ithread_cpuset)(struct intr_event *ie, + struct _cpuset *mask); + void (*callout_init)(struct callout *c, int mpsafe); + int (*callout_reset_sbt_on)(struct callout *c, sbintime_t sbt, + sbintime_t precision, void (*func)(void *), void *arg, int cpu, + int flags); + int (*_callout_stop_safe)(struct callout *c, int flags); +}; + +/* Default function table for system operation */ +extern const struct tcp_hptsi_funcs tcp_hptsi_default_funcs; + +/* Each hpts has its own p_mtx which is used for locking */ +#define HPTS_MTX_ASSERT(hpts) mtx_assert(&(hpts)->p_mtx, MA_OWNED) +#define HPTS_LOCK(hpts) mtx_lock(&(hpts)->p_mtx) +#define HPTS_TRYLOCK(hpts) mtx_trylock(&(hpts)->p_mtx) +#define HPTS_UNLOCK(hpts) mtx_unlock(&(hpts)->p_mtx) + +struct tcp_hpts_entry { + /* Cache line 0x00 */ + struct mtx p_mtx; /* Mutex for hpts */ + struct timeval p_mysleep; /* Our min sleep time */ + uint64_t syscall_cnt; + uint64_t sleeping; /* What the actual sleep was (if sleeping) */ + uint16_t p_hpts_active; /* Flag that says hpts is awake */ + uint8_t p_wheel_complete; /* have we completed the wheel arc walk? */ + uint32_t p_runningslot; /* Current slot we are at if we are running */ + uint32_t p_prev_slot; /* Previous slot we were on */ + uint32_t p_cur_slot; /* Current slot in wheel hpts is draining */ + uint32_t p_nxt_slot; /* The next slot outside the current range + * of slots that the hpts is running on. */ + int32_t p_on_queue_cnt; /* Count on queue in this hpts */ + uint8_t p_direct_wake :1, /* boolean */ + p_on_min_sleep:1, /* boolean */ + p_hpts_wake_scheduled:1,/* boolean */ + hit_callout_thresh:1, + p_avail:4; + uint8_t p_fill[3]; /* Fill to 32 bits */ + /* Cache line 0x40 */ + struct hptsh { + TAILQ_HEAD(, tcpcb) head; + uint32_t count; + uint32_t gencnt; + } *p_hptss; /* Hptsi wheel */ + uint32_t p_hpts_sleep_time; /* Current sleep interval having a max + * of 255ms */ + uint32_t overidden_sleep; /* what was overrided by min-sleep for logging */ + uint32_t saved_curslot; /* for logging */ + uint32_t saved_prev_slot; /* for logging */ + uint32_t p_delayed_by; /* How much were we delayed by */ + /* Cache line 0x80 */ + struct sysctl_ctx_list hpts_ctx; + struct sysctl_oid *hpts_root; + struct intr_event *ie; + void *ie_cookie; + uint16_t p_cpu; /* The hpts CPU */ + struct tcp_hptsi *p_hptsi; /* Back pointer to parent hptsi structure */ + /* There is extra space in here */ + /* Cache line 0x100 */ + struct callout co __aligned(CACHE_LINE_SIZE); +} __aligned(CACHE_LINE_SIZE); + +struct tcp_hptsi { + struct cpu_group **grps; + struct tcp_hpts_entry **rp_ent; /* Array of hptss */ + uint32_t *cts_last_ran; + uint32_t grp_cnt; + uint32_t rp_num_hptss; /* Number of hpts threads */ + struct hpts_domain_info { + int count; + int cpu[MAXCPU]; + } domains[MAXMEMDOM]; /* Per-NUMA domain CPU assignments */ + const struct tcp_hptsi_funcs *funcs; /* Function table for testability */ +}; + +/* + * Core tcp_hptsi structure manipulation functions. + */ +struct tcp_hptsi* tcp_hptsi_create(const struct tcp_hptsi_funcs *funcs, + bool enable_sysctl); +void tcp_hptsi_destroy(struct tcp_hptsi *pace); +void tcp_hptsi_start(struct tcp_hptsi *pace); +void tcp_hptsi_stop(struct tcp_hptsi *pace); +uint16_t tcp_hptsi_random_cpu(struct tcp_hptsi *pace); +int32_t tcp_hptsi(struct tcp_hpts_entry *hpts, bool from_callout); + +void tcp_hpts_wake(struct tcp_hpts_entry *hpts); + +/* + * LRO HPTS initialization and uninitialization, only for internal use by the + * HPTS code. + */ +void tcp_lro_hpts_init(void); +void tcp_lro_hpts_uninit(void); + +#endif /* defined(_KERNEL) */ +#endif /* __tcp_hpts_internal_h__ */ |