diff options
Diffstat (limited to 'sys/netinet/tcp_hpts_test.c')
-rw-r--r-- | sys/netinet/tcp_hpts_test.c | 1662 |
1 files changed, 1662 insertions, 0 deletions
diff --git a/sys/netinet/tcp_hpts_test.c b/sys/netinet/tcp_hpts_test.c new file mode 100644 index 000000000000..bab5827e0572 --- /dev/null +++ b/sys/netinet/tcp_hpts_test.c @@ -0,0 +1,1662 @@ +/*- + * SPDX-License-Identifier: BSD-2-Clause + * + * Copyright (c) 2025 Netflix, Inc. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include <tests/ktest.h> +#include <sys/cdefs.h> +#include <sys/param.h> +#include <sys/bus.h> +#include <sys/interrupt.h> +#include <sys/errno.h> +#include <sys/malloc.h> +#include <sys/mutex.h> +#include <sys/refcount.h> +#include <sys/socket.h> +#include <sys/sysctl.h> +#include <sys/systm.h> + +#include <netinet/in.h> +#include <netinet/tcp.h> +#include <netinet/in_pcb.h> +#include <netinet/tcp_seq.h> +#include <netinet/tcp_var.h> +#include <netinet/tcp_hpts.h> +#include <netinet/tcp_hpts_internal.h> +#include <dev/tcp_log/tcp_log_dev.h> +#include <netinet/tcp_log_buf.h> + +#undef tcp_hpts_init +#undef tcp_hpts_remove +#undef tcp_hpts_insert +#undef tcp_set_hpts + +/* Custom definitions that take the tcp_hptsi */ +#define tcp_hpts_init(pace, tp) __tcp_hpts_init((pace), (tp)) +#define tcp_hpts_remove(pace, tp) __tcp_hpts_remove((pace), (tp)) +#define tcp_hpts_insert(pace, tp, usecs, diag) \ + __tcp_hpts_insert((pace), (tp), (usecs), (diag)) +#define tcp_set_hpts(pace, tp) __tcp_set_hpts((pace), (tp)) + +static MALLOC_DEFINE(M_TCPHPTS, "tcp_hpts_test", "TCP hpts test"); + +static int test_exit_on_failure = true; +SYSCTL_NODE(_net_inet_tcp, OID_AUTO, hpts_test, CTLFLAG_RW | CTLFLAG_MPSAFE, 0, + "TCP HPTS test controls"); +SYSCTL_INT(_net_inet_tcp_hpts_test, OID_AUTO, exit_on_failure, CTLFLAG_RW, + &test_exit_on_failure, 0, + "Exit HPTS test immediately on first failure (1) or continue running all tests (0)"); + +#define KTEST_VERIFY(x) do { \ + if (!(x)) { \ + KTEST_ERR(ctx, "FAIL: %s", #x); \ + if (test_exit_on_failure) \ + return (EINVAL); \ + } else { \ + KTEST_LOG(ctx, "PASS: %s", #x); \ + } \ +} while (0) + +#define KTEST_EQUAL(x, y) do { \ + if ((x) != (y)) { \ + KTEST_ERR(ctx, "FAIL: %s != %s (%d != %d)", #x, #y, (x), (y)); \ + if (test_exit_on_failure) \ + return (EINVAL); \ + } else { \ + KTEST_LOG(ctx, "PASS: %s == %s", #x, #y); \ + } \ +} while (0) + +#define KTEST_NEQUAL(x, y) do { \ + if ((x) == (y)) { \ + KTEST_ERR(ctx, "FAIL: %s == %s (%d == %d)", #x, #y, (x), (y)); \ + if (test_exit_on_failure) \ + return (EINVAL); \ + } else { \ + KTEST_LOG(ctx, "PASS: %s != %s", #x, #y); \ + } \ +} while (0) + +#define KTEST_GREATER_THAN(x, y) do { \ + if ((x) <= (y)) { \ + KTEST_ERR(ctx, "FAIL: %s <= %s (%d <= %d)", #x, #y, (x), (y)); \ + if (test_exit_on_failure) \ + return (EINVAL); \ + } else { \ + KTEST_LOG(ctx, "PASS: %s > %s", #x, #y); \ + } \ +} while (0) + +#define KTEST_VERIFY_RET(x, y) do { \ + if (!(x)) { \ + KTEST_ERR(ctx, "FAIL: %s", #x); \ + if (test_exit_on_failure) \ + return (y); \ + } else { \ + KTEST_LOG(ctx, "PASS: %s", #x); \ + } \ +} while (0) + +static void +dump_hpts_entry(struct ktest_test_context *ctx, struct tcp_hpts_entry *hpts) +{ + KTEST_LOG(ctx, "tcp_hpts_entry(%p)", hpts); + KTEST_LOG(ctx, " p_cur_slot: %u", hpts->p_cur_slot); + KTEST_LOG(ctx, " p_prev_slot: %u", hpts->p_prev_slot); + KTEST_LOG(ctx, " p_nxt_slot: %u", hpts->p_nxt_slot); + KTEST_LOG(ctx, " p_runningslot: %u", hpts->p_runningslot); + KTEST_LOG(ctx, " p_on_queue_cnt: %d", hpts->p_on_queue_cnt); + KTEST_LOG(ctx, " p_hpts_active: %u", hpts->p_hpts_active); + KTEST_LOG(ctx, " p_wheel_complete: %u", hpts->p_wheel_complete); + KTEST_LOG(ctx, " p_direct_wake: %u", hpts->p_direct_wake); + KTEST_LOG(ctx, " p_on_min_sleep: %u", hpts->p_on_min_sleep); + KTEST_LOG(ctx, " p_hpts_wake_scheduled: %u", hpts->p_hpts_wake_scheduled); + KTEST_LOG(ctx, " hit_callout_thresh: %u", hpts->hit_callout_thresh); + KTEST_LOG(ctx, " p_hpts_sleep_time: %u", hpts->p_hpts_sleep_time); + KTEST_LOG(ctx, " p_delayed_by: %u", hpts->p_delayed_by); + KTEST_LOG(ctx, " overidden_sleep: %u", hpts->overidden_sleep); + KTEST_LOG(ctx, " saved_curslot: %u", hpts->saved_curslot); + KTEST_LOG(ctx, " saved_prev_slot: %u", hpts->saved_prev_slot); + KTEST_LOG(ctx, " syscall_cnt: %lu", hpts->syscall_cnt); + KTEST_LOG(ctx, " sleeping: %lu", hpts->sleeping); + KTEST_LOG(ctx, " p_cpu: %u", hpts->p_cpu); + KTEST_LOG(ctx, " ie_cookie: %p", hpts->ie_cookie); + KTEST_LOG(ctx, " p_hptsi: %p", hpts->p_hptsi); + KTEST_LOG(ctx, " p_mysleep: %ld.%06ld", hpts->p_mysleep.tv_sec, hpts->p_mysleep.tv_usec); +} + +static void +dump_tcpcb(struct tcpcb *tp) +{ + struct ktest_test_context *ctx = tp->t_fb_ptr; + struct inpcb *inp = &tp->t_inpcb; + + KTEST_LOG(ctx, "tcp_control_block(%p)", tp); + + /* HPTS-specific fields */ + KTEST_LOG(ctx, " t_in_hpts: %d", tp->t_in_hpts); + KTEST_LOG(ctx, " t_hpts_cpu: %u", tp->t_hpts_cpu); + KTEST_LOG(ctx, " t_hpts_slot: %d", tp->t_hpts_slot); + KTEST_LOG(ctx, " t_hpts_gencnt: %u", tp->t_hpts_gencnt); + KTEST_LOG(ctx, " t_hpts_request: %u", tp->t_hpts_request); + + /* LRO CPU field */ + KTEST_LOG(ctx, " t_lro_cpu: %u", tp->t_lro_cpu); + + /* TCP flags that affect HPTS */ + KTEST_LOG(ctx, " t_flags2: 0x%x", tp->t_flags2); + KTEST_LOG(ctx, " TF2_HPTS_CPU_SET: %s", (tp->t_flags2 & TF2_HPTS_CPU_SET) ? "YES" : "NO"); + KTEST_LOG(ctx, " TF2_HPTS_CALLS: %s", (tp->t_flags2 & TF2_HPTS_CALLS) ? "YES" : "NO"); + KTEST_LOG(ctx, " TF2_SUPPORTS_MBUFQ: %s", (tp->t_flags2 & TF2_SUPPORTS_MBUFQ) ? "YES" : "NO"); + + /* Input PCB fields that HPTS uses */ + KTEST_LOG(ctx, " inp_flags: 0x%x", inp->inp_flags); + KTEST_LOG(ctx, " INP_DROPPED: %s", (inp->inp_flags & INP_DROPPED) ? "YES" : "NO"); + KTEST_LOG(ctx, " inp_flowid: 0x%x", inp->inp_flowid); + KTEST_LOG(ctx, " inp_flowtype: %u", inp->inp_flowtype); + KTEST_LOG(ctx, " inp_numa_domain: %d", inp->inp_numa_domain); +} + +/* Enum for call counting indices */ +enum test_call_counts { + CCNT_MICROUPTIME = 0, + CCNT_SWI_ADD, + CCNT_SWI_REMOVE, + CCNT_SWI_SCHED, + CCNT_INTR_EVENT_BIND, + CCNT_INTR_EVENT_BIND_CPUSET, + CCNT_CALLOUT_INIT, + CCNT_CALLOUT_RESET_SBT_ON, + CCNT_CALLOUT_STOP_SAFE, + CCNT_TCP_OUTPUT, + CCNT_TCP_TFB_DO_QUEUED_SEGMENTS, + CCNT_MAX +}; + +static uint32_t call_counts[CCNT_MAX]; + +static uint64_t test_time_usec = 0; + +/* + * Reset all test global variables to a clean state. + */ +static void +test_hpts_init(void) +{ + memset(call_counts, 0, sizeof(call_counts)); + test_time_usec = 0; +} + +static void +test_microuptime(struct timeval *tv) +{ + call_counts[CCNT_MICROUPTIME]++; + tv->tv_sec = test_time_usec / 1000000; + tv->tv_usec = test_time_usec % 1000000; +} + +static int +test_swi_add(struct intr_event **eventp, const char *name, + driver_intr_t handler, void *arg, int pri, enum intr_type flags, + void **cookiep) +{ + call_counts[CCNT_SWI_ADD]++; + /* Simulate successful SWI creation */ + *eventp = (struct intr_event *)0xfeedface; /* Mock event */ + *cookiep = (void *)0xdeadbeef; /* Mock cookie */ + return (0); +} + +static int +test_swi_remove(void *cookie) +{ + call_counts[CCNT_SWI_REMOVE]++; + /* Simulate successful removal */ + return (0); +} + +static void +test_swi_sched(void *cookie, int flags) +{ + call_counts[CCNT_SWI_SCHED]++; + /* Simulate successful SWI scheduling */ +} + +static int +test_intr_event_bind(struct intr_event *ie, int cpu) +{ + call_counts[CCNT_INTR_EVENT_BIND]++; + /* Simulate successful binding */ + return (0); +} + +static int +test_intr_event_bind_ithread_cpuset(struct intr_event *ie, struct _cpuset *mask) +{ + call_counts[CCNT_INTR_EVENT_BIND_CPUSET]++; + /* Simulate successful cpuset binding */ + return (0); +} + +static void +test_callout_init(struct callout *c, int mpsafe) +{ + call_counts[CCNT_CALLOUT_INIT]++; + memset(c, 0, sizeof(*c)); +} + +static int +test_callout_reset_sbt_on(struct callout *c, sbintime_t sbt, sbintime_t precision, + void (*func)(void *), void *arg, int cpu, int flags) +{ + call_counts[CCNT_CALLOUT_RESET_SBT_ON]++; + /* Return 1 to simulate successful timer scheduling */ + return (1); +} + +static int +test_callout_stop_safe(struct callout *c, int flags) +{ + call_counts[CCNT_CALLOUT_STOP_SAFE]++; + /* Return 1 to simulate successful timer stopping */ + return (1); +} + +static const struct tcp_hptsi_funcs test_funcs = { + .microuptime = test_microuptime, + .swi_add = test_swi_add, + .swi_remove = test_swi_remove, + .swi_sched = test_swi_sched, + .intr_event_bind = test_intr_event_bind, + .intr_event_bind_ithread_cpuset = test_intr_event_bind_ithread_cpuset, + .callout_init = test_callout_init, + .callout_reset_sbt_on = test_callout_reset_sbt_on, + ._callout_stop_safe = test_callout_stop_safe, +}; + +#define TP_REMOVE_FROM_HPTS(tp) tp->bits_spare +#define TP_LOG_TEST(tp) tp->t_log_state_set + +static int +test_tcp_output(struct tcpcb *tp) +{ + struct ktest_test_context *ctx = tp->t_fb_ptr; + struct tcp_hptsi *pace = (struct tcp_hptsi*)tp->t_tfo_pending; + struct tcp_hpts_entry *hpts = pace->rp_ent[tp->t_hpts_cpu]; + + call_counts[CCNT_TCP_OUTPUT]++; + if (TP_LOG_TEST(tp)) { + KTEST_LOG(ctx, "=> tcp_output(%p)", tp); + dump_tcpcb(tp); + dump_hpts_entry(ctx, hpts); + } + + if ((TP_REMOVE_FROM_HPTS(tp) & 1) != 0) { + if (TP_LOG_TEST(tp)) + KTEST_LOG(ctx, "=> tcp_hpts_remove(%p)", tp); + tcp_hpts_remove(pace, tp); + } + + if ((TP_REMOVE_FROM_HPTS(tp) & 2) != 0) { + INP_WUNLOCK(&tp->t_inpcb); /* tcp_output unlocks on error */ + return (-1); /* Simulate tcp_output error */ + } + + return (0); +} + +static int +test_tfb_do_queued_segments(struct tcpcb *tp, int flag) +{ + struct ktest_test_context *ctx = tp->t_fb_ptr; + struct tcp_hptsi *pace = (struct tcp_hptsi*)tp->t_tfo_pending; + struct tcp_hpts_entry *hpts = pace->rp_ent[tp->t_hpts_cpu]; + + call_counts[CCNT_TCP_TFB_DO_QUEUED_SEGMENTS]++; + KTEST_LOG(ctx, "=> tfb_do_queued_segments(%p, %d)", tp, flag); + dump_tcpcb(tp); + dump_hpts_entry(ctx, hpts); + + if ((TP_REMOVE_FROM_HPTS(tp) & 1) != 0) { + if (TP_LOG_TEST(tp)) + KTEST_LOG(ctx, "=> tcp_hpts_remove(%p)", tp); + tcp_hpts_remove(pace, tp); + } + + if ((TP_REMOVE_FROM_HPTS(tp) & 2) != 0) { + INP_WUNLOCK(&tp->t_inpcb); /* do_queued_segments unlocks on error */ + return (-1); /* Simulate do_queued_segments error */ + } + + return (0); +} + +static struct tcp_function_block test_tcp_fb = { + .tfb_tcp_block_name = "hpts_test_tcp", + .tfb_tcp_output = test_tcp_output, + .tfb_do_queued_segments = test_tfb_do_queued_segments, +}; + +/* + * Create a minimally initialized tcpcb that can be safely inserted into HPTS. + * This function allocates and initializes all the fields that HPTS code + * reads or writes. + */ +static struct tcpcb * +test_hpts_create_tcpcb(struct ktest_test_context *ctx, struct tcp_hptsi *pace) +{ + struct tcpcb *tp; + + tp = malloc(sizeof(struct tcpcb), M_TCPHPTS, M_WAITOK | M_ZERO); + if (tp) { + rw_init_flags(&tp->t_inpcb.inp_lock, "test-inp", + RW_RECURSE | RW_DUPOK); + refcount_init(&tp->t_inpcb.inp_refcount, 1); + tp->t_inpcb.inp_pcbinfo = &V_tcbinfo; + tp->t_fb = &test_tcp_fb; + tp->t_hpts_cpu = HPTS_CPU_NONE; + STAILQ_INIT(&tp->t_inqueue); + tcp_hpts_init(pace, tp); + + /* Stuff some pointers in the tcb for test purposes. */ + tp->t_fb_ptr = ctx; + tp->t_tfo_pending = (unsigned int*)pace; + } + + return (tp); +} + +/* + * Free a test tcpcb created by test_hpts_create_tcpcb() + */ +static void +test_hpts_free_tcpcb(struct tcpcb *tp) +{ + if (tp == NULL) + return; + + INP_LOCK_DESTROY(&tp->t_inpcb); + free(tp, M_TCPHPTS); +} + +/* + * *********************************************** + * * KTEST functions for testing the HPTS module * + * *********************************************** + */ + +/* + * Validates that the HPTS module is properly loaded and initialized by checking + * that the minimum HPTS time is configured. + */ +KTEST_FUNC(module_load) +{ + test_hpts_init(); + KTEST_NEQUAL(tcp_min_hptsi_time, 0); + KTEST_VERIFY(tcp_bind_threads >= 0 && tcp_bind_threads <= 2); + KTEST_NEQUAL(tcp_hptsi_pace, NULL); + return (0); +} + +/* + * Validates the creation and destruction of tcp_hptsi structures, ensuring + * proper initialization of internal fields and clean destruction. + */ +KTEST_FUNC(hptsi_create_destroy) +{ + struct tcp_hptsi *pace; + + test_hpts_init(); + + pace = tcp_hptsi_create(&test_funcs, false); + KTEST_NEQUAL(pace, NULL); + KTEST_NEQUAL(pace->rp_ent, NULL); + KTEST_NEQUAL(pace->cts_last_ran, NULL); + KTEST_VERIFY(pace->rp_num_hptss > 0); + KTEST_VERIFY(pace->rp_num_hptss <= MAXCPU); /* Reasonable upper bound */ + KTEST_VERIFY(pace->grp_cnt >= 1); /* At least one group */ + KTEST_EQUAL(pace->funcs, &test_funcs); /* Verify function pointer was set */ + + /* Verify individual HPTS entries are properly initialized */ + for (uint32_t i = 0; i < pace->rp_num_hptss; i++) { + KTEST_NEQUAL(pace->rp_ent[i], NULL); + KTEST_EQUAL(pace->rp_ent[i]->p_cpu, i); + KTEST_EQUAL(pace->rp_ent[i]->p_hptsi, pace); + KTEST_EQUAL(pace->rp_ent[i]->p_on_queue_cnt, 0); + } + + tcp_hptsi_destroy(pace); + + return (0); +} + +/* + * Validates that tcp_hptsi structures can be started and stopped properly, + * including verification that threads are created during start and cleaned up + * during stop operations. + */ +KTEST_FUNC(hptsi_start_stop) +{ + struct tcp_hptsi *pace; + + test_hpts_init(); + + pace = tcp_hptsi_create(&test_funcs, false); + KTEST_NEQUAL(pace, NULL); + + tcp_hptsi_start(pace); + + /* Verify that entries have threads started */ + struct tcp_hpts_entry *hpts = pace->rp_ent[0]; + KTEST_NEQUAL(hpts->ie_cookie, NULL); /* Should have SWI handler */ + KTEST_EQUAL(hpts->p_hptsi, pace); /* Should point to our pace */ + + tcp_hptsi_stop(pace); + tcp_hptsi_destroy(pace); + + return (0); +} + +/* + * Validates that multiple tcp_hptsi instances can coexist independently, with + * different configurations and CPU assignments without interfering with each + * other. + */ +KTEST_FUNC(hptsi_independence) +{ + struct tcp_hptsi *pace1, *pace2; + uint16_t cpu1, cpu2; + + test_hpts_init(); + + pace1 = tcp_hptsi_create(&test_funcs, false); + pace2 = tcp_hptsi_create(&test_funcs, false); + KTEST_NEQUAL(pace1, NULL); + KTEST_NEQUAL(pace2, NULL); + KTEST_NEQUAL(pace2->rp_ent, NULL); + + cpu1 = tcp_hptsi_random_cpu(pace1); + cpu2 = tcp_hptsi_random_cpu(pace2); + KTEST_VERIFY(cpu1 < pace1->rp_num_hptss); + KTEST_VERIFY(cpu2 < pace2->rp_num_hptss); + + /* Verify both instances have independent entry arrays */ + KTEST_NEQUAL(pace1->rp_ent, pace2->rp_ent); + /* Verify they may have different CPU counts but both reasonable */ + KTEST_VERIFY(pace1->rp_num_hptss > 0 && pace1->rp_num_hptss <= MAXCPU); + KTEST_VERIFY(pace2->rp_num_hptss > 0 && pace2->rp_num_hptss <= MAXCPU); + + tcp_hptsi_destroy(pace1); + tcp_hptsi_destroy(pace2); + + return (0); +} + +/* + * Validates that custom function injection works correctly, ensuring that + * test-specific implementations of microuptime and others are properly + * called by the HPTS system. + */ +KTEST_FUNC(function_injection) +{ + struct tcp_hptsi *pace; + + test_hpts_init(); + + pace = tcp_hptsi_create(&test_funcs, false); + KTEST_NEQUAL(pace, NULL); + KTEST_EQUAL(pace->funcs, &test_funcs); + KTEST_VERIFY(call_counts[CCNT_MICROUPTIME] > 0); + KTEST_VERIFY(call_counts[CCNT_CALLOUT_INIT] > 0); + + tcp_hptsi_start(pace); + KTEST_VERIFY(call_counts[CCNT_SWI_ADD] > 0); + KTEST_VERIFY(tcp_bind_threads == 0 || + call_counts[CCNT_INTR_EVENT_BIND] > 0 || + call_counts[CCNT_INTR_EVENT_BIND_CPUSET] > 0); + KTEST_VERIFY(call_counts[CCNT_CALLOUT_RESET_SBT_ON] > 0); + + tcp_hptsi_stop(pace); + KTEST_VERIFY(call_counts[CCNT_CALLOUT_STOP_SAFE] > 0); + KTEST_VERIFY(call_counts[CCNT_SWI_REMOVE] > 0); + + tcp_hptsi_destroy(pace); + + /* Verify we have a reasonable balance of create/destroy calls */ + KTEST_EQUAL(call_counts[CCNT_SWI_ADD], call_counts[CCNT_SWI_REMOVE]); + KTEST_VERIFY(call_counts[CCNT_CALLOUT_RESET_SBT_ON] <= call_counts[CCNT_CALLOUT_STOP_SAFE]); + + return (0); +} + +/* + * Validates that a tcpcb can be properly initialized for HPTS compatibility, + * ensuring all required fields are set correctly and function pointers are + * valid for safe HPTS operations. + */ +KTEST_FUNC(tcpcb_initialization) +{ + struct tcp_hptsi *pace; + struct tcpcb *tp; + + test_hpts_init(); + + pace = tcp_hptsi_create(&test_funcs, false); + KTEST_NEQUAL(pace, NULL); + tcp_hptsi_start(pace); + + /* Verify the tcpcb is properly initialized for HPTS */ + tp = test_hpts_create_tcpcb(ctx, pace); + KTEST_NEQUAL(tp, NULL); + KTEST_NEQUAL(tp->t_fb, NULL); + KTEST_NEQUAL(tp->t_fb->tfb_tcp_output, NULL); + KTEST_NEQUAL(tp->t_fb->tfb_do_queued_segments, NULL); + KTEST_EQUAL(tp->t_in_hpts, IHPTS_NONE); + KTEST_EQUAL((tp->t_flags2 & (TF2_HPTS_CPU_SET | TF2_HPTS_CALLS)), 0); + + /* Verify that HPTS-specific fields are initialized */ + KTEST_EQUAL(tp->t_hpts_gencnt, 0); + KTEST_EQUAL(tp->t_hpts_slot, 0); + KTEST_EQUAL(tp->t_hpts_request, 0); + KTEST_EQUAL(tp->t_lro_cpu, 0); + KTEST_VERIFY(tp->t_hpts_cpu < pace->rp_num_hptss); + KTEST_EQUAL(tp->t_inpcb.inp_refcount, 1); + KTEST_VERIFY(!(tp->t_inpcb.inp_flags & INP_DROPPED)); + + test_hpts_free_tcpcb(tp); + tcp_hptsi_stop(pace); + tcp_hptsi_destroy(pace); + + return (0); +} + +/* + * Validates that tcpcb structures can be successfully inserted into and removed + * from the HPTS wheel, with proper state tracking and slot assignment during + * the process. + */ +KTEST_FUNC(tcpcb_insertion) +{ + struct tcp_hptsi *pace; + struct tcpcb *tp; + struct tcp_hpts_entry *hpts; + uint32_t timeout_usecs = 10; + + test_hpts_init(); + + pace = tcp_hptsi_create(&test_funcs, false); + KTEST_NEQUAL(pace, NULL); + tcp_hptsi_start(pace); + + tp = test_hpts_create_tcpcb(ctx, pace); + KTEST_NEQUAL(tp, NULL); + KTEST_EQUAL(tp->t_in_hpts, IHPTS_NONE); + KTEST_EQUAL((tp->t_flags2 & TF2_HPTS_CALLS), 0); + + INP_WLOCK(&tp->t_inpcb); + tp->t_flags2 |= TF2_HPTS_CALLS; + KTEST_EQUAL(call_counts[CCNT_SWI_SCHED], 0); + tcp_hpts_insert(pace, tp, timeout_usecs, NULL); + KTEST_EQUAL(tp->t_in_hpts, IHPTS_ONQUEUE); + INP_WUNLOCK(&tp->t_inpcb); + KTEST_EQUAL(call_counts[CCNT_TCP_OUTPUT], 0); + KTEST_EQUAL(call_counts[CCNT_SWI_SCHED], 1); + KTEST_VERIFY(tcp_in_hpts(tp)); + KTEST_VERIFY(tp->t_hpts_slot >= 0); + KTEST_VERIFY(tp->t_hpts_slot < NUM_OF_HPTSI_SLOTS); + + hpts = pace->rp_ent[tp->t_hpts_cpu]; + KTEST_EQUAL(hpts->p_on_queue_cnt, 1); + KTEST_EQUAL(tp->t_hpts_request, 0); + KTEST_EQUAL(tp->t_hpts_slot, HPTS_USEC_TO_SLOTS(timeout_usecs)); + //KTEST_EQUAL(tp->t_hpts_gencnt, 1); + + INP_WLOCK(&tp->t_inpcb); + tcp_hpts_remove(pace, tp); + KTEST_EQUAL(tp->t_in_hpts, IHPTS_NONE); + INP_WUNLOCK(&tp->t_inpcb); + KTEST_EQUAL(call_counts[CCNT_TCP_OUTPUT], 0); + KTEST_VERIFY(!tcp_in_hpts(tp)); + + KTEST_EQUAL(hpts->p_on_queue_cnt, 0); + + test_hpts_free_tcpcb(tp); + tcp_hptsi_stop(pace); + tcp_hptsi_destroy(pace); + + return (0); +} + +/* + * Validates the core HPTS timer functionality by verifying that scheduled + * tcpcb entries trigger tcp_output calls at appropriate times, simulating + * real-world timer-driven TCP processing. + */ +KTEST_FUNC(timer_functionality) +{ + struct epoch_tracker et; + struct tcp_hptsi *pace; + struct tcp_hpts_entry *hpts; + struct tcpcb *tp; + int32_t slots_ran; + uint32_t i; + + test_hpts_init(); + + pace = tcp_hptsi_create(&test_funcs, false); + KTEST_NEQUAL(pace, NULL); + tcp_hptsi_start(pace); + + for (i = 0; i < pace->rp_num_hptss; i++) + dump_hpts_entry(ctx, pace->rp_ent[i]); + + /* Create and insert the tcpcb into the HPTS wheel to wait for 500 usec */ + tp = test_hpts_create_tcpcb(ctx, pace); + KTEST_NEQUAL(tp, NULL); + dump_tcpcb(tp); + TP_LOG_TEST(tp) = 1; /* Enable logging for this tcpcb */ + + KTEST_LOG(ctx, "=> tcp_hpts_insert(%p)", tp); + INP_WLOCK(&tp->t_inpcb); + tp->t_flags2 |= TF2_HPTS_CALLS; /* Mark as needing HPTS processing */ + tcp_hpts_insert(pace, tp, 500, NULL); + INP_WUNLOCK(&tp->t_inpcb); + + dump_tcpcb(tp); + for (i = 0; i < pace->rp_num_hptss; i++) + dump_hpts_entry(ctx, pace->rp_ent[i]); + + hpts = pace->rp_ent[tp->t_hpts_cpu]; + KTEST_EQUAL(hpts->p_on_queue_cnt, 1); + KTEST_EQUAL(hpts->p_prev_slot, 0); + KTEST_EQUAL(hpts->p_cur_slot, 0); + KTEST_EQUAL(hpts->p_runningslot, 0); + KTEST_EQUAL(hpts->p_nxt_slot, 1); + KTEST_EQUAL(hpts->p_hpts_active, 0); + + KTEST_EQUAL(tp->t_in_hpts, IHPTS_ONQUEUE); + KTEST_EQUAL(tp->t_hpts_request, 0); + KTEST_EQUAL(tp->t_hpts_slot, HPTS_USEC_TO_SLOTS(500)); + + /* Set our test flag to indicate the tcpcb should be removed from the + * wheel when tcp_output is called. */ + TP_REMOVE_FROM_HPTS(tp) = 1; + + /* Test early exit condition: advance time by insufficient amount */ + KTEST_LOG(ctx, "Testing early exit with insufficient time advancement"); + test_time_usec += 1; /* Very small advancement - should cause early exit */ + HPTS_LOCK(hpts); + NET_EPOCH_ENTER(et); + slots_ran = tcp_hptsi(hpts, true); + HPTS_UNLOCK(hpts); + NET_EPOCH_EXIT(et); + + /* Should return 0 slots due to insufficient time advancement */ + KTEST_EQUAL(slots_ran, 0); + KTEST_EQUAL(call_counts[CCNT_TCP_OUTPUT], 0); /* No processing should occur */ + KTEST_EQUAL(tp->t_in_hpts, IHPTS_ONQUEUE); /* Connection still queued */ + + /* Wait for 498 more usecs and trigger the HPTS workers and verify + * nothing happens yet (total 499 usec) */ + KTEST_EQUAL(call_counts[CCNT_TCP_OUTPUT], 0); + test_time_usec += 498; + for (i = 0; i < pace->rp_num_hptss; i++) { + KTEST_LOG(ctx, "=> tcp_hptsi(%p)", pace->rp_ent[i]); + HPTS_LOCK(pace->rp_ent[i]); + NET_EPOCH_ENTER(et); + slots_ran = tcp_hptsi(pace->rp_ent[i], true); + HPTS_UNLOCK(pace->rp_ent[i]); + NET_EPOCH_EXIT(et); + + dump_hpts_entry(ctx, pace->rp_ent[i]); + KTEST_VERIFY(slots_ran >= 0); + KTEST_EQUAL(pace->rp_ent[i]->p_prev_slot, 49); + KTEST_EQUAL(pace->rp_ent[i]->p_cur_slot, 49); + } + + dump_tcpcb(tp); + KTEST_EQUAL(call_counts[CCNT_TCP_OUTPUT], 0); + KTEST_EQUAL(tp->t_in_hpts, IHPTS_ONQUEUE); + KTEST_EQUAL(tp->t_hpts_request, 0); + KTEST_EQUAL(tp->t_hpts_slot, HPTS_USEC_TO_SLOTS(500)); + KTEST_EQUAL(hpts->p_on_queue_cnt, 1); + + /* Wait for 1 more usec and trigger the HPTS workers and verify it + * triggers tcp_output this time */ + KTEST_EQUAL(call_counts[CCNT_TCP_OUTPUT], 0); + test_time_usec += 1; + for (i = 0; i < pace->rp_num_hptss; i++) { + KTEST_LOG(ctx, "=> tcp_hptsi(%p)", pace->rp_ent[i]); + HPTS_LOCK(pace->rp_ent[i]); + NET_EPOCH_ENTER(et); + slots_ran = tcp_hptsi(pace->rp_ent[i], true); + HPTS_UNLOCK(pace->rp_ent[i]); + NET_EPOCH_EXIT(et); + + dump_hpts_entry(ctx, pace->rp_ent[i]); + KTEST_VERIFY(slots_ran >= 0); + KTEST_EQUAL(pace->rp_ent[i]->p_prev_slot, 50); + KTEST_EQUAL(pace->rp_ent[i]->p_cur_slot, 50); + } + + dump_tcpcb(tp); + KTEST_EQUAL(call_counts[CCNT_TCP_OUTPUT], 1); + KTEST_EQUAL(tp->t_in_hpts, IHPTS_NONE); + KTEST_EQUAL(hpts->p_on_queue_cnt, 0); + + test_hpts_free_tcpcb(tp); + tcp_hptsi_stop(pace); + tcp_hptsi_destroy(pace); + + return (0); +} + +/* + * Validates HPTS scalability by creating and inserting a LOT of tcpcbs into + * the HPTS wheel, testing performance under high load conditions. + */ +KTEST_FUNC(scalability_tcpcbs) +{ + struct tcp_hptsi *pace; + struct tcpcb **tcpcbs; + uint32_t i, num_tcpcbs = 100000, total_queued = 0; + + test_hpts_init(); + + pace = tcp_hptsi_create(&test_funcs, false); + KTEST_NEQUAL(pace, NULL); + tcp_hptsi_start(pace); + + /* Allocate array to hold pointers to all tcpcbs */ + tcpcbs = malloc(num_tcpcbs * sizeof(struct tcpcb *), M_TCPHPTS, M_WAITOK | M_ZERO); + KTEST_VERIFY_RET(tcpcbs != NULL, ENOMEM); + + /* Create a LOT of tcpcbs */ + KTEST_LOG(ctx, "Creating %u tcpcbs...", num_tcpcbs); + for (i = 0; i < num_tcpcbs; i++) { + tcpcbs[i] = test_hpts_create_tcpcb(ctx, pace); + if (tcpcbs[i] == NULL) { + KTEST_ERR(ctx, "FAIL: tcpcbs[i] == NULL"); + return (EINVAL); + } + } + + /* Insert all created tcpcbs into HPTS */ + KTEST_LOG(ctx, "Inserting all tcpcbs into HPTS..."); + for (i = 0; i < num_tcpcbs; i++) { + INP_WLOCK(&tcpcbs[i]->t_inpcb); + tcpcbs[i]->t_flags2 |= TF2_HPTS_CALLS; + /* Insert with varying future timeouts to distribute across slots */ + tcp_hpts_insert(pace, tcpcbs[i], 100 + (i % 1000), NULL); + INP_WUNLOCK(&tcpcbs[i]->t_inpcb); + } + + /* Verify total queue counts across all CPUs */ + for (i = 0; i < pace->rp_num_hptss; i++) { + total_queued += pace->rp_ent[i]->p_on_queue_cnt; + } + KTEST_EQUAL(total_queued, num_tcpcbs); + + for (i = 0; i < pace->rp_num_hptss; i++) + dump_hpts_entry(ctx, pace->rp_ent[i]); + + /* Remove all tcpcbs from HPTS */ + KTEST_LOG(ctx, "Removing all tcpcbs from HPTS..."); + for (i = 0; i < num_tcpcbs; i++) { + INP_WLOCK(&tcpcbs[i]->t_inpcb); + if (tcpcbs[i]->t_in_hpts != IHPTS_NONE) { + tcp_hpts_remove(pace, tcpcbs[i]); + } + INP_WUNLOCK(&tcpcbs[i]->t_inpcb); + } + + /* Verify all queues are now empty */ + for (i = 0; i < pace->rp_num_hptss; i++) { + if (pace->rp_ent[i]->p_on_queue_cnt != 0) { + KTEST_ERR(ctx, "FAIL: pace->rp_ent[i]->p_on_queue_cnt != 0"); + return (EINVAL); + } + } + + for (i = 0; i < num_tcpcbs; i++) { + test_hpts_free_tcpcb(tcpcbs[i]); + } + free(tcpcbs, M_TCPHPTS); + tcp_hptsi_stop(pace); + tcp_hptsi_destroy(pace); + + return (0); +} + +/* + * Validates wheel wrap scenarios where the timer falls significantly behind + * and needs to process more than one full wheel revolution worth of slots. + */ +KTEST_FUNC(wheel_wrap_recovery) +{ + struct epoch_tracker et; + struct tcp_hptsi *pace; + struct tcpcb **tcpcbs; + uint32_t i, timeout_usecs, num_tcpcbs = 500; + int32_t slots_ran; + + test_hpts_init(); + + pace = tcp_hptsi_create(&test_funcs, false); + KTEST_NEQUAL(pace, NULL); + tcp_hptsi_start(pace); + + /* Allocate array to hold pointers to tcpcbs */ + tcpcbs = malloc(num_tcpcbs * sizeof(struct tcpcb *), M_TCPHPTS, M_WAITOK | M_ZERO); + KTEST_VERIFY_RET(tcpcbs != NULL, ENOMEM); + + /* Create tcpcbs and insert them across many slots */ + for (i = 0; i < num_tcpcbs; i++) { + tcpcbs[i] = test_hpts_create_tcpcb(ctx, pace); + KTEST_NEQUAL(tcpcbs[i], NULL); + TP_REMOVE_FROM_HPTS(tcpcbs[i]) = 1; + + timeout_usecs = ((i * NUM_OF_HPTSI_SLOTS) / num_tcpcbs) * HPTS_USECS_PER_SLOT; /* Spread across slots */ + + INP_WLOCK(&tcpcbs[i]->t_inpcb); + tcpcbs[i]->t_flags2 |= TF2_HPTS_CALLS; + tcp_hpts_insert(pace, tcpcbs[i], timeout_usecs, NULL); + INP_WUNLOCK(&tcpcbs[i]->t_inpcb); + } + + /* Fast forward time significantly to trigger wheel wrap */ + test_time_usec += (NUM_OF_HPTSI_SLOTS + 5000) * HPTS_USECS_PER_SLOT; + + for (i = 0; i < pace->rp_num_hptss; i++) { + KTEST_LOG(ctx, "=> tcp_hptsi(%u)", i); + KTEST_NEQUAL(pace->rp_ent[i]->p_on_queue_cnt, 0); + + HPTS_LOCK(pace->rp_ent[i]); + NET_EPOCH_ENTER(et); + slots_ran = tcp_hptsi(pace->rp_ent[i], true); + HPTS_UNLOCK(pace->rp_ent[i]); + NET_EPOCH_EXIT(et); + + KTEST_EQUAL(slots_ran, NUM_OF_HPTSI_SLOTS-1); /* Should process all slots */ + KTEST_EQUAL(pace->rp_ent[i]->p_on_queue_cnt, 0); + KTEST_NEQUAL(pace->rp_ent[i]->p_cur_slot, + pace->rp_ent[i]->p_prev_slot); + } + + /* Cleanup */ + for (i = 0; i < num_tcpcbs; i++) { + INP_WLOCK(&tcpcbs[i]->t_inpcb); + if (tcpcbs[i]->t_in_hpts != IHPTS_NONE) { + tcp_hpts_remove(pace, tcpcbs[i]); + } + INP_WUNLOCK(&tcpcbs[i]->t_inpcb); + test_hpts_free_tcpcb(tcpcbs[i]); + } + free(tcpcbs, M_TCPHPTS); + tcp_hptsi_stop(pace); + tcp_hptsi_destroy(pace); + + return (0); +} + +/* + * Validates proper handling of tcpcbs in the IHPTS_MOVING state, which occurs + * when a tcpcb is being processed by the HPTS thread but gets removed. + */ +KTEST_FUNC(tcpcb_moving_state) +{ + struct epoch_tracker et; + struct tcp_hptsi *pace; + struct tcpcb *tp1, *tp2; + struct tcp_hpts_entry *hpts; + int32_t slots_ran; + + test_hpts_init(); + + pace = tcp_hptsi_create(&test_funcs, false); + KTEST_NEQUAL(pace, NULL); + tcp_hptsi_start(pace); + + /* Create two tcpcbs on the same CPU/slot */ + tp1 = test_hpts_create_tcpcb(ctx, pace); + tp2 = test_hpts_create_tcpcb(ctx, pace); + KTEST_NEQUAL(tp1, NULL); + KTEST_NEQUAL(tp2, NULL); + + /* Force them to the same CPU for predictable testing */ + tp1->t_hpts_cpu = 0; + tp2->t_hpts_cpu = 0; + + /* Insert both into the same slot */ + INP_WLOCK(&tp1->t_inpcb); + tp1->t_flags2 |= TF2_HPTS_CALLS; + tcp_hpts_insert(pace, tp1, 100, NULL); + INP_WUNLOCK(&tp1->t_inpcb); + + INP_WLOCK(&tp2->t_inpcb); + tp2->t_flags2 |= TF2_HPTS_CALLS; + tcp_hpts_insert(pace, tp2, 100, NULL); + INP_WUNLOCK(&tp2->t_inpcb); + + hpts = pace->rp_ent[0]; + + /* Manually transition tp1 to MOVING state to simulate race condition */ + HPTS_LOCK(hpts); + tp1->t_in_hpts = IHPTS_MOVING; + tp1->t_hpts_slot = -1; /* Mark for removal */ + HPTS_UNLOCK(hpts); + + /* Set time and run HPTS to process the moving state */ + test_time_usec += 100; + HPTS_LOCK(hpts); + NET_EPOCH_ENTER(et); + slots_ran = tcp_hptsi(hpts, true); + HPTS_UNLOCK(hpts); + NET_EPOCH_EXIT(et); + + KTEST_VERIFY(slots_ran >= 0); + KTEST_EQUAL(call_counts[CCNT_TCP_OUTPUT], 1); /* Shouldn't call on both */ + + /* tp1 should be cleaned up and removed */ + KTEST_EQUAL(tp1->t_in_hpts, IHPTS_NONE); + /* tp2 should have been processed normally */ + KTEST_EQUAL(tp2->t_in_hpts, IHPTS_NONE); + + test_hpts_free_tcpcb(tp1); + test_hpts_free_tcpcb(tp2); + tcp_hptsi_stop(pace); + tcp_hptsi_destroy(pace); + + return (0); +} + +/* + * Validates that tcpcbs with deferred requests (t_hpts_request > 0) are + * properly handled and re-inserted into appropriate future slots after + * the wheel processes enough slots to accommodate the original request. + */ +KTEST_FUNC(deferred_requests) +{ + struct epoch_tracker et; + struct tcp_hptsi *pace; + struct tcpcb *tp, *tp2; + struct tcp_hpts_entry *hpts; + uint32_t large_timeout_usecs = (NUM_OF_HPTSI_SLOTS + 5000) * HPTS_USECS_PER_SLOT; /* Beyond wheel capacity */ + uint32_t huge_timeout_usecs = (NUM_OF_HPTSI_SLOTS * 3) * HPTS_USECS_PER_SLOT; /* 3x wheel capacity */ + uint32_t initial_request; + int32_t slots_ran; + + test_hpts_init(); + + pace = tcp_hptsi_create(&test_funcs, false); + KTEST_NEQUAL(pace, NULL); + tcp_hptsi_start(pace); + + tp = test_hpts_create_tcpcb(ctx, pace); + KTEST_NEQUAL(tp, NULL); + + /* Insert with a request that exceeds current wheel capacity */ + INP_WLOCK(&tp->t_inpcb); + tp->t_flags2 |= TF2_HPTS_CALLS; + tcp_hpts_insert(pace, tp, large_timeout_usecs, NULL); + INP_WUNLOCK(&tp->t_inpcb); + + /* Verify it was inserted with a deferred request */ + dump_tcpcb(tp); + KTEST_EQUAL(tp->t_in_hpts, IHPTS_ONQUEUE); + KTEST_VERIFY(tp->t_hpts_request > 0); + KTEST_VERIFY(tp->t_hpts_slot < NUM_OF_HPTSI_SLOTS); + + hpts = pace->rp_ent[tp->t_hpts_cpu]; + + /* Advance time to process deferred requests */ + test_time_usec += NUM_OF_HPTSI_SLOTS * HPTS_USECS_PER_SLOT; + + /* Process the wheel to handle deferred requests */ + HPTS_LOCK(hpts); + NET_EPOCH_ENTER(et); + slots_ran = tcp_hptsi(hpts, true); + HPTS_UNLOCK(hpts); + NET_EPOCH_EXIT(et); + + dump_hpts_entry(ctx, hpts); + KTEST_GREATER_THAN(slots_ran, 0); + dump_tcpcb(tp); + KTEST_EQUAL(tp->t_hpts_request, 0); + + /* Test incremental deferred request processing over multiple cycles */ + KTEST_LOG(ctx, "Testing incremental deferred request processing"); + + /* Create a new connection with an even larger request */ + tp2 = test_hpts_create_tcpcb(ctx, pace); + KTEST_NEQUAL(tp2, NULL); + tp2->t_hpts_cpu = tp->t_hpts_cpu; /* Same CPU for predictable testing */ + + INP_WLOCK(&tp2->t_inpcb); + tp2->t_flags2 |= TF2_HPTS_CALLS; + tcp_hpts_insert(pace, tp2, huge_timeout_usecs, NULL); + INP_WUNLOCK(&tp2->t_inpcb); + + /* Verify initial deferred request */ + initial_request = tp2->t_hpts_request; + KTEST_VERIFY(initial_request > NUM_OF_HPTSI_SLOTS); + + /* Process one wheel cycle - should reduce but not eliminate request */ + test_time_usec += NUM_OF_HPTSI_SLOTS * HPTS_USECS_PER_SLOT; + HPTS_LOCK(hpts); + NET_EPOCH_ENTER(et); + slots_ran = tcp_hptsi(hpts, true); + HPTS_UNLOCK(hpts); + NET_EPOCH_EXIT(et); + + /* Request should be reduced but not zero */ + KTEST_GREATER_THAN(initial_request, tp2->t_hpts_request); + KTEST_VERIFY(tp2->t_hpts_request > 0); + KTEST_EQUAL(tp2->t_in_hpts, IHPTS_ONQUEUE); /* Still queued */ + + /* For huge_timeout_usecs = NUM_OF_HPTSI_SLOTS * 3 * HPTS_USECS_PER_SLOT, we need ~3 cycles to complete. + * Each cycle can reduce the request by at most NUM_OF_HPTSI_SLOTS. */ + test_time_usec += NUM_OF_HPTSI_SLOTS * HPTS_USECS_PER_SLOT; + HPTS_LOCK(hpts); + NET_EPOCH_ENTER(et); + slots_ran = tcp_hptsi(hpts, true); + HPTS_UNLOCK(hpts); + NET_EPOCH_EXIT(et); + + /* After second cycle, request should be reduced significantly (likely by ~NUM_OF_HPTSI_SLOTS) */ + KTEST_VERIFY(tp2->t_hpts_request < initial_request); + KTEST_VERIFY(tp2->t_hpts_request > 0); /* But not yet zero for such a large request */ + + /* Clean up second connection */ + INP_WLOCK(&tp2->t_inpcb); + if (tp2->t_in_hpts != IHPTS_NONE) { + tcp_hpts_remove(pace, tp2); + } + INP_WUNLOCK(&tp2->t_inpcb); + test_hpts_free_tcpcb(tp2); + + /* Clean up */ + INP_WLOCK(&tp->t_inpcb); + if (tp->t_in_hpts != IHPTS_NONE) { + tcp_hpts_remove(pace, tp); + } + INP_WUNLOCK(&tp->t_inpcb); + test_hpts_free_tcpcb(tp); + tcp_hptsi_stop(pace); + tcp_hptsi_destroy(pace); + + return (0); +} + +/* + * Validates CPU assignment and affinity mechanisms, including flowid-based + * assignment, random fallback scenarios, and explicit CPU setting. Tests + * the actual cpu assignment logic in hpts_cpuid via tcp_set_hpts. + */ +KTEST_FUNC(cpu_assignment) +{ + struct tcp_hptsi *pace; + struct tcpcb *tp1, *tp2, *tp2_dup, *tp3; + + test_hpts_init(); + + pace = tcp_hptsi_create(&test_funcs, false); + KTEST_NEQUAL(pace, NULL); + + /* Test random CPU assignment (no flowid) */ + tp1 = test_hpts_create_tcpcb(ctx, pace); + KTEST_NEQUAL(tp1, NULL); + tp1->t_inpcb.inp_flowtype = M_HASHTYPE_NONE; + INP_WLOCK(&tp1->t_inpcb); + tcp_set_hpts(pace, tp1); + INP_WUNLOCK(&tp1->t_inpcb); + KTEST_VERIFY(tp1->t_hpts_cpu < pace->rp_num_hptss); + KTEST_VERIFY(tp1->t_flags2 & TF2_HPTS_CPU_SET); + + /* Test flowid-based assignment */ + tp2 = test_hpts_create_tcpcb(ctx, pace); + KTEST_NEQUAL(tp2, NULL); + tp2->t_inpcb.inp_flowtype = M_HASHTYPE_RSS_TCP_IPV4; + tp2->t_inpcb.inp_flowid = 12345; + INP_WLOCK(&tp2->t_inpcb); + tcp_set_hpts(pace, tp2); + INP_WUNLOCK(&tp2->t_inpcb); + KTEST_VERIFY(tp2->t_hpts_cpu < pace->rp_num_hptss); + KTEST_VERIFY(tp2->t_flags2 & TF2_HPTS_CPU_SET); + + /* With the same flowid, should get same CPU assignment */ + tp2_dup = test_hpts_create_tcpcb(ctx, pace); + KTEST_NEQUAL(tp2_dup, NULL); + tp2_dup->t_inpcb.inp_flowtype = M_HASHTYPE_RSS_TCP_IPV4; + tp2_dup->t_inpcb.inp_flowid = 12345; + INP_WLOCK(&tp2_dup->t_inpcb); + tcp_set_hpts(pace, tp2_dup); + INP_WUNLOCK(&tp2_dup->t_inpcb); + KTEST_EQUAL(tp2_dup->t_hpts_cpu, tp2->t_hpts_cpu); + + /* Test explicit CPU setting */ + tp3 = test_hpts_create_tcpcb(ctx, pace); + KTEST_NEQUAL(tp3, NULL); + tp3->t_hpts_cpu = 1; /* Assume we have at least 2 CPUs */ + tp3->t_flags2 |= TF2_HPTS_CPU_SET; + INP_WLOCK(&tp3->t_inpcb); + tcp_set_hpts(pace, tp3); + INP_WUNLOCK(&tp3->t_inpcb); + KTEST_EQUAL(tp3->t_hpts_cpu, 1); + + test_hpts_free_tcpcb(tp1); + test_hpts_free_tcpcb(tp2); + test_hpts_free_tcpcb(tp2_dup); + test_hpts_free_tcpcb(tp3); + tcp_hptsi_destroy(pace); + + return (0); +} + +/* + * Validates edge cases in slot calculation including boundary conditions + * around slot 0, maximum slots, and slot wrapping arithmetic. + */ +KTEST_FUNC(slot_boundary_conditions) +{ + struct tcp_hptsi *pace; + struct tcpcb *tp; + + test_hpts_init(); + + pace = tcp_hptsi_create(&test_funcs, false); + KTEST_NEQUAL(pace, NULL); + tcp_hptsi_start(pace); + + /* Test insertion at slot 0 */ + tp = test_hpts_create_tcpcb(ctx, pace); + KTEST_NEQUAL(tp, NULL); + INP_WLOCK(&tp->t_inpcb); + tp->t_flags2 |= TF2_HPTS_CALLS; + tcp_hpts_insert(pace, tp, 0, NULL); /* Should insert immediately (0 timeout) */ + INP_WUNLOCK(&tp->t_inpcb); + KTEST_EQUAL(tp->t_in_hpts, IHPTS_ONQUEUE); + KTEST_VERIFY(tp->t_hpts_slot < NUM_OF_HPTSI_SLOTS); + + INP_WLOCK(&tp->t_inpcb); + tcp_hpts_remove(pace, tp); + INP_WUNLOCK(&tp->t_inpcb); + + /* Test insertion at maximum slot value */ + INP_WLOCK(&tp->t_inpcb); + tp->t_flags2 |= TF2_HPTS_CALLS; + tcp_hpts_insert(pace, tp, (NUM_OF_HPTSI_SLOTS - 1) * HPTS_USECS_PER_SLOT, NULL); + INP_WUNLOCK(&tp->t_inpcb); + KTEST_EQUAL(tp->t_in_hpts, IHPTS_ONQUEUE); + + INP_WLOCK(&tp->t_inpcb); + tcp_hpts_remove(pace, tp); + INP_WUNLOCK(&tp->t_inpcb); + + /* Test very small timeout values */ + INP_WLOCK(&tp->t_inpcb); + tp->t_flags2 |= TF2_HPTS_CALLS; + tcp_hpts_insert(pace, tp, 1, NULL); + INP_WUNLOCK(&tp->t_inpcb); + KTEST_EQUAL(tp->t_in_hpts, IHPTS_ONQUEUE); + KTEST_EQUAL(tp->t_hpts_slot, HPTS_USEC_TO_SLOTS(1)); /* Should convert 1 usec to slot */ + + INP_WLOCK(&tp->t_inpcb); + tcp_hpts_remove(pace, tp); + INP_WUNLOCK(&tp->t_inpcb); + + test_hpts_free_tcpcb(tp); + tcp_hptsi_stop(pace); + tcp_hptsi_destroy(pace); + + return (0); +} + +/* + * Validates HPTS behavior under high load conditions, including proper + * processing of many connections and connection count tracking. + */ +KTEST_FUNC(dynamic_sleep_adjustment) +{ + struct epoch_tracker et; + struct tcp_hptsi *pace; + struct tcpcb **tcpcbs; + struct tcp_hpts_entry *hpts; + uint32_t i, num_tcpcbs = DEFAULT_CONNECTION_THRESHOLD + 50; + int32_t slots_ran; + + test_hpts_init(); + + pace = tcp_hptsi_create(&test_funcs, false); + KTEST_NEQUAL(pace, NULL); + tcp_hptsi_start(pace); + + /* Create many connections to exceed threshold */ + tcpcbs = malloc(num_tcpcbs * sizeof(struct tcpcb *), M_TCPHPTS, M_WAITOK | M_ZERO); + KTEST_VERIFY_RET(tcpcbs != NULL, ENOMEM); + + for (i = 0; i < num_tcpcbs; i++) { + tcpcbs[i] = test_hpts_create_tcpcb(ctx, pace); + KTEST_NEQUAL(tcpcbs[i], NULL); + tcpcbs[i]->t_hpts_cpu = 0; /* Force all to CPU 0 */ + INP_WLOCK(&tcpcbs[i]->t_inpcb); + tcpcbs[i]->t_flags2 |= TF2_HPTS_CALLS; + TP_REMOVE_FROM_HPTS(tcpcbs[i]) = 1; /* Will be removed after output */ + tcp_hpts_insert(pace, tcpcbs[i], 100, NULL); + INP_WUNLOCK(&tcpcbs[i]->t_inpcb); + } + + hpts = pace->rp_ent[0]; + dump_hpts_entry(ctx, hpts); + + /* Verify we're above threshold */ + KTEST_GREATER_THAN(hpts->p_on_queue_cnt, DEFAULT_CONNECTION_THRESHOLD); + + /* Run HPTS to process many connections */ + test_time_usec += 100; + HPTS_LOCK(hpts); + NET_EPOCH_ENTER(et); + slots_ran = tcp_hptsi(hpts, true); + HPTS_UNLOCK(hpts); + NET_EPOCH_EXIT(et); + + /* Verify HPTS processed slots and connections correctly */ + KTEST_GREATER_THAN(slots_ran, 0); + KTEST_EQUAL(call_counts[CCNT_TCP_OUTPUT], num_tcpcbs); + + /* Verify all connections were removed from queue */ + KTEST_EQUAL(hpts->p_on_queue_cnt, 0); + + /* Cleanup */ + for (i = 0; i < num_tcpcbs; i++) { + test_hpts_free_tcpcb(tcpcbs[i]); + } + free(tcpcbs, M_TCPHPTS); + tcp_hptsi_stop(pace); + tcp_hptsi_destroy(pace); + + return (0); +} + +/* + * Validates handling of concurrent insert/remove operations and race conditions + * between HPTS processing and user operations. + */ +KTEST_FUNC(concurrent_operations) +{ + struct tcp_hptsi *pace; + struct tcpcb *tp1, *tp2; + struct tcp_hpts_entry *hpts; + + test_hpts_init(); + + pace = tcp_hptsi_create(&test_funcs, false); + KTEST_NEQUAL(pace, NULL); + tcp_hptsi_start(pace); + + tp1 = test_hpts_create_tcpcb(ctx, pace); + tp2 = test_hpts_create_tcpcb(ctx, pace); + KTEST_NEQUAL(tp1, NULL); + KTEST_NEQUAL(tp2, NULL); + + /* Force all to CPU 0 */ + tp1->t_hpts_cpu = 0; + tp2->t_hpts_cpu = 0; + + /* Insert tp1 */ + INP_WLOCK(&tp1->t_inpcb); + tp1->t_flags2 |= TF2_HPTS_CALLS; + tcp_hpts_insert(pace, tp1, 100, NULL); + INP_WUNLOCK(&tp1->t_inpcb); + + /* Insert tp2 into same slot */ + INP_WLOCK(&tp2->t_inpcb); + tp2->t_flags2 |= TF2_HPTS_CALLS; + tcp_hpts_insert(pace, tp2, 100, NULL); + INP_WUNLOCK(&tp2->t_inpcb); + + /* Verify both are inserted */ + KTEST_EQUAL(tp1->t_in_hpts, IHPTS_ONQUEUE); + KTEST_EQUAL(tp2->t_in_hpts, IHPTS_ONQUEUE); + + /* Verify they're both assigned to the same slot */ + KTEST_EQUAL(tp1->t_hpts_slot, tp2->t_hpts_slot); + + /* Verify queue count reflects both connections */ + KTEST_EQUAL(tp1->t_hpts_cpu, tp2->t_hpts_cpu); /* Should be on same CPU */ + hpts = pace->rp_ent[tp1->t_hpts_cpu]; + KTEST_EQUAL(hpts->p_on_queue_cnt, 2); + + /* Remove tp1 while tp2 is still there */ + INP_WLOCK(&tp1->t_inpcb); + tcp_hpts_remove(pace, tp1); + INP_WUNLOCK(&tp1->t_inpcb); + + /* Verify tp1 removed, tp2 still there */ + KTEST_EQUAL(tp1->t_in_hpts, IHPTS_NONE); + KTEST_EQUAL(tp2->t_in_hpts, IHPTS_ONQUEUE); + + /* Verify queue count decreased by one */ + KTEST_EQUAL(hpts->p_on_queue_cnt, 1); + + /* Remove tp2 */ + INP_WLOCK(&tp2->t_inpcb); + tcp_hpts_remove(pace, tp2); + INP_WUNLOCK(&tp2->t_inpcb); + + KTEST_EQUAL(tp2->t_in_hpts, IHPTS_NONE); + + /* Verify queue is now completely empty */ + KTEST_EQUAL(hpts->p_on_queue_cnt, 0); + + test_hpts_free_tcpcb(tp1); + test_hpts_free_tcpcb(tp2); + tcp_hptsi_stop(pace); + tcp_hptsi_destroy(pace); + + return (0); +} + +/* + * Validates the queued segments processing path via tfb_do_queued_segments, + * which is an alternative to direct tcp_output calls. + */ +KTEST_FUNC(queued_segments_processing) +{ + struct epoch_tracker et; + struct tcp_hptsi *pace; + struct tcpcb *tp; + struct tcp_hpts_entry *hpts; + struct mbuf *fake_mbuf; + int32_t slots_ran; + + test_hpts_init(); + + pace = tcp_hptsi_create(&test_funcs, false); + KTEST_NEQUAL(pace, NULL); + tcp_hptsi_start(pace); + + tp = test_hpts_create_tcpcb(ctx, pace); + KTEST_NEQUAL(tp, NULL); + + /* Create a minimal fake mbuf that has valid STAILQ pointers */ + fake_mbuf = malloc(sizeof(struct mbuf), M_TCPHPTS, M_WAITOK | M_ZERO); + KTEST_NEQUAL(fake_mbuf, NULL); + + /* Set up for queued segments path */ + tp->t_flags2 |= (TF2_HPTS_CALLS | TF2_SUPPORTS_MBUFQ); + STAILQ_INSERT_TAIL(&tp->t_inqueue, fake_mbuf, m_stailqpkt); + + INP_WLOCK(&tp->t_inpcb); + tcp_hpts_insert(pace, tp, 100, NULL); + INP_WUNLOCK(&tp->t_inpcb); + + hpts = pace->rp_ent[tp->t_hpts_cpu]; + + /* Run HPTS and verify queued segments path is taken */ + test_time_usec += 100; + HPTS_LOCK(hpts); + NET_EPOCH_ENTER(et); + slots_ran = tcp_hptsi(hpts, true); + HPTS_UNLOCK(hpts); + NET_EPOCH_EXIT(et); + + KTEST_VERIFY(slots_ran >= 0); + KTEST_EQUAL(call_counts[CCNT_TCP_TFB_DO_QUEUED_SEGMENTS], 1); + + /* Connection should be removed from HPTS after processing */ + KTEST_EQUAL(tp->t_in_hpts, IHPTS_NONE); + + /* Clean up the fake mbuf if it's still in the queue */ + if (!STAILQ_EMPTY(&tp->t_inqueue)) { + struct mbuf *m = STAILQ_FIRST(&tp->t_inqueue); + STAILQ_REMOVE_HEAD(&tp->t_inqueue, m_stailqpkt); + free(m, M_TCPHPTS); + } + + test_hpts_free_tcpcb(tp); + tcp_hptsi_stop(pace); + tcp_hptsi_destroy(pace); + + return (0); +} + +/* + * Validates the direct wake mechanism and wake inhibition logic when + * the connection count exceeds thresholds. + */ +KTEST_FUNC(direct_wake_mechanism) +{ + struct tcp_hptsi *pace; + struct tcpcb *tp; + struct tcp_hpts_entry *hpts; + + test_hpts_init(); + + pace = tcp_hptsi_create(&test_funcs, false); + KTEST_NEQUAL(pace, NULL); + tcp_hptsi_start(pace); + + tp = test_hpts_create_tcpcb(ctx, pace); + KTEST_NEQUAL(tp, NULL); + hpts = pace->rp_ent[tp->t_hpts_cpu]; + + /* Test direct wake when not over threshold */ + HPTS_LOCK(hpts); + hpts->p_on_queue_cnt = 50; /* Below threshold */ + hpts->p_hpts_wake_scheduled = 0; + tcp_hpts_wake(hpts); + KTEST_EQUAL(hpts->p_hpts_wake_scheduled, 1); + KTEST_EQUAL(call_counts[CCNT_SWI_SCHED], 1); + HPTS_UNLOCK(hpts); + + /* Reset for next test */ + hpts->p_hpts_wake_scheduled = 0; + call_counts[CCNT_SWI_SCHED] = 0; + + /* Test wake inhibition when over threshold */ + HPTS_LOCK(hpts); + hpts->p_on_queue_cnt = 200; /* Above threshold */ + hpts->p_direct_wake = 1; /* Request direct wake */ + tcp_hpts_wake(hpts); + KTEST_EQUAL(hpts->p_hpts_wake_scheduled, 0); /* Should be inhibited */ + KTEST_EQUAL(hpts->p_direct_wake, 0); /* Should be cleared */ + KTEST_EQUAL(call_counts[CCNT_SWI_SCHED], 0); /* No SWI scheduled */ + HPTS_UNLOCK(hpts); + + test_hpts_free_tcpcb(tp); + tcp_hptsi_stop(pace); + tcp_hptsi_destroy(pace); + + return (0); +} + +/* + * Validates HPTS collision detection when attempting to run HPTS while + * it's already active. + */ +KTEST_FUNC(hpts_collision_detection) +{ + struct epoch_tracker et; + struct tcp_hptsi *pace; + struct tcp_hpts_entry *hpts; + int32_t slots_ran; + + test_hpts_init(); + + pace = tcp_hptsi_create(&test_funcs, false); + KTEST_NEQUAL(pace, NULL); + tcp_hptsi_start(pace); + + hpts = pace->rp_ent[0]; + + /* Mark HPTS as active */ + HPTS_LOCK(hpts); + hpts->p_hpts_active = 1; + HPTS_UNLOCK(hpts); + + /* Attempt to run HPTS again - should detect collision */ + HPTS_LOCK(hpts); + NET_EPOCH_ENTER(et); + slots_ran = tcp_hptsi(hpts, false); /* from_callout = false */ + HPTS_UNLOCK(hpts); + NET_EPOCH_EXIT(et); + + /* Should return 0 indicating no work done due to collision */ + KTEST_EQUAL(slots_ran, 0); + + tcp_hptsi_stop(pace); + tcp_hptsi_destroy(pace); + + return (0); +} + +/* + * Validates generation count handling for race condition detection between + * HPTS processing and connection insertion/removal operations. + */ +KTEST_FUNC(generation_count_validation) +{ + struct epoch_tracker et; + struct tcp_hptsi *pace; + struct tcp_hpts_entry *hpts; + struct tcpcb *tp1, *tp2; + uint32_t initial_gencnt, slot_to_test = 10; + uint32_t timeout_usecs = slot_to_test * HPTS_USECS_PER_SLOT; + uint32_t tp2_original_gencnt; + int32_t slots_ran; + + test_hpts_init(); + + pace = tcp_hptsi_create(&test_funcs, false); + KTEST_NEQUAL(pace, NULL); + tcp_hptsi_start(pace); + + hpts = pace->rp_ent[0]; + + /* Record initial generation count for the test slot */ + initial_gencnt = hpts->p_hptss[slot_to_test].gencnt; + + /* Create and insert first connection */ + tp1 = test_hpts_create_tcpcb(ctx, pace); + KTEST_NEQUAL(tp1, NULL); + tp1->t_hpts_cpu = 0; /* Force to CPU 0 */ + + INP_WLOCK(&tp1->t_inpcb); + tp1->t_flags2 |= TF2_HPTS_CALLS; + tcp_hpts_insert(pace, tp1, timeout_usecs, NULL); + INP_WUNLOCK(&tp1->t_inpcb); + + /* Verify connection stored the generation count */ + KTEST_EQUAL(tp1->t_in_hpts, IHPTS_ONQUEUE); + KTEST_EQUAL(tp1->t_hpts_slot, slot_to_test); + KTEST_EQUAL(tp1->t_hpts_gencnt, initial_gencnt); + + /* Create second connection but don't insert yet */ + tp2 = test_hpts_create_tcpcb(ctx, pace); + KTEST_NEQUAL(tp2, NULL); + tp2->t_hpts_cpu = 0; /* Force to CPU 0 */ + + /* Force generation count increment by processing the slot */ + test_time_usec += (slot_to_test + 1) * HPTS_USECS_PER_SLOT; + HPTS_LOCK(hpts); + NET_EPOCH_ENTER(et); + slots_ran = tcp_hptsi(hpts, true); + HPTS_UNLOCK(hpts); + NET_EPOCH_EXIT(et); + + /* Verify processing occurred */ + KTEST_VERIFY(slots_ran > 0); + KTEST_EQUAL(call_counts[CCNT_TCP_OUTPUT], 1); + + /* Verify generation count was incremented */ + KTEST_EQUAL(hpts->p_hptss[slot_to_test].gencnt, initial_gencnt + 1); + + /* Verify first connection was processed and removed */ + KTEST_EQUAL(tp1->t_in_hpts, IHPTS_NONE); + + /* Insert second connection and record its generation count */ + INP_WLOCK(&tp2->t_inpcb); + tp2->t_flags2 |= TF2_HPTS_CALLS; + tcp_hpts_insert(pace, tp2, timeout_usecs, NULL); + INP_WUNLOCK(&tp2->t_inpcb); + + /* Verify connection was inserted successfully */ + KTEST_EQUAL(tp2->t_in_hpts, IHPTS_ONQUEUE); + + /* Record the generation count that tp2 received */ + tp2_original_gencnt = tp2->t_hpts_gencnt; + + /* Test generation count mismatch detection during processing */ + /* Manually set stale generation count to simulate race condition */ + tp2->t_hpts_gencnt = tp2_original_gencnt + 100; /* Force a mismatch */ + + /* Process the slot to trigger generation count validation */ + test_time_usec += (slot_to_test + 1) * HPTS_USECS_PER_SLOT; + HPTS_LOCK(hpts); + NET_EPOCH_ENTER(et); + slots_ran = tcp_hptsi(hpts, true); + HPTS_UNLOCK(hpts); + NET_EPOCH_EXIT(et); + + /* Connection should be processed despite generation count mismatch */ + KTEST_EQUAL(tp2->t_in_hpts, IHPTS_NONE); /* Processed and released */ + + /* The key test: HPTS should handle mismatched generation counts gracefully */ + KTEST_VERIFY(slots_ran > 0); /* Processing should still occur */ + + test_hpts_free_tcpcb(tp1); + test_hpts_free_tcpcb(tp2); + tcp_hptsi_stop(pace); + tcp_hptsi_destroy(pace); + + return (0); +} + +static const struct ktest_test_info tests[] = { + KTEST_INFO(module_load), + KTEST_INFO(hptsi_create_destroy), + KTEST_INFO(hptsi_start_stop), + KTEST_INFO(hptsi_independence), + KTEST_INFO(function_injection), + KTEST_INFO(tcpcb_initialization), + KTEST_INFO(tcpcb_insertion), + KTEST_INFO(timer_functionality), + KTEST_INFO(scalability_tcpcbs), + KTEST_INFO(wheel_wrap_recovery), + KTEST_INFO(tcpcb_moving_state), + KTEST_INFO(deferred_requests), + KTEST_INFO(cpu_assignment), + KTEST_INFO(slot_boundary_conditions), + KTEST_INFO(dynamic_sleep_adjustment), + KTEST_INFO(concurrent_operations), + KTEST_INFO(queued_segments_processing), + KTEST_INFO(direct_wake_mechanism), + KTEST_INFO(hpts_collision_detection), + KTEST_INFO(generation_count_validation), +}; + +KTEST_MODULE_DECLARE(ktest_tcphpts, tests); +KTEST_MODULE_DEPEND(ktest_tcphpts, tcphpts); |