/*- * SPDX-License-Identifier: BSD-2-Clause * * Copyright (c) 2023-2025 Ruslan Bukin * * This work was supported by Innovate UK project 105694, "Digital Security * by Design (DSbD) Technology Platform Prototype". * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ /* * Hardware Tracing framework. * * The framework manages hardware tracing units that collect information * about software execution and store it as events in highly compressed format * into DRAM. The events cover information about control flow changes of a * program, whether branches taken or not, exceptions taken, timing information, * cycles elapsed and more. That allows us to restore entire program flow of a * given application without performance impact. * * Design overview. * * The framework provides character devices for mmap(2) and ioctl(2) system * calls to allow user to manage CPU (hardware) tracing units. * * /dev/hwt: * .ioctl: * hwt_ioctl(): * a) HWT_IOC_ALLOC * Allocates kernel tracing context CTX based on requested mode * of operation. Verifies the information that comes with the * request (pid, cpus), allocates unique ID for the context. * Creates a new character device for CTX management. * * /dev/hwt_%d[_%d], ident[, thread_id] * .mmap * Maps tracing buffers of the corresponding thread to userspace. * .ioctl * hwt_thread_ioctl(): * a) HWT_IOC_START * Enables tracing unit for a given context. * b) HWT_IOC_RECORD_GET * Transfers (small) record entries collected during program * execution for a given context to userspace, such as mmaping * tables of executable and dynamic libraries, interpreter, * kernel mappings, tid of threads created, etc. * c) HWT_IOC_SET_CONFIG * Allows to specify backend-specific configuration of the * trace unit. * d) HWT_IOC_WAKEUP * Wakes up a thread that is currently sleeping. * e) HWT_IOC_BUFPTR_GET * Transfers current hardware pointer in the filling buffer * to the userspace. * f) HWT_IOC_SVC_BUF * To avoid data loss, userspace may notify kernel it has * copied out the given buffer, so kernel is ok to overwrite * * HWT context lifecycle in THREAD mode of operation: * 1. User invokes HWT_IOC_ALLOC ioctl with information about pid to trace and * size of the buffers for the trace data to allocate. * Some architectures may have different tracing units supported, so user * also provides backend name to use for this context, e.g. "coresight". * 2. Kernel allocates context, lookups the proc for the given pid. Then it * creates first hwt_thread in the context and allocates trace buffers for * it. Immediately, kernel initializes tracing backend. * Kernel creates character device and returns unique identificator of * trace context to the user. * 3. To manage the new context, user opens the character device created. * User invokes HWT_IOC_START ioctl, kernel marks context as RUNNING. * At this point any HWT hook invocation by scheduler enables/disables * tracing for threads associated with the context (threads of the proc). * Any new threads creation (of the target proc) procedures will be invoking * corresponding hooks in HWT framework, so that new hwt_thread and buffers * allocated, character device for mmap(2) created on the fly. * 4. User issues HWT_IOC_RECORD_GET ioctl to fetch information about mmaping * tables and threads created during application startup. * 5. User mmaps tracing buffers of each thread to userspace (using * /dev/hwt_%d_%d % (ident, thread_id) character devices). * 6. User can repeat 4 if expected thread is not yet created during target * application execution. * 7. User issues HWT_IOC_BUFPTR_GET ioctl to get current filling level of the * hardware buffer of a given thread. * 8. User invokes trace decoder library to process available data and see the * results in human readable form. * 9. User repeats 7 if needed. * * HWT context lifecycle in CPU mode of operation: * 1. User invokes HWT_IOC_ALLOC ioctl providing a set of CPU to trace within * single CTX. * 2. Kernel verifies the set of CPU and allocates tracing context, creates * a buffer for each CPU. * Kernel creates a character device for every CPU provided in the request. * Kernel initialized tracing backend. * 3. User opens character devices of interest to map the buffers to userspace. * User can start tracing by invoking HWT_IOC_START on any of character * device within the context, entire context will be marked as RUNNING. * 4. The rest is similar to the THREAD mode. * */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #define HWT_DEBUG #undef HWT_DEBUG #ifdef HWT_DEBUG #define dprintf(fmt, ...) printf(fmt, ##__VA_ARGS__) #else #define dprintf(fmt, ...) #endif static eventhandler_tag hwt_exit_tag; static struct cdev *hwt_cdev; static struct cdevsw hwt_cdevsw = { .d_version = D_VERSION, .d_name = "hwt", .d_mmap_single = NULL, .d_ioctl = hwt_ioctl }; static void hwt_process_exit(void *arg __unused, struct proc *p) { struct hwt_owner *ho; /* Stop HWTs associated with exiting owner, if any. */ ho = hwt_ownerhash_lookup(p); if (ho) hwt_owner_shutdown(ho); } static int hwt_load(void) { struct make_dev_args args; int error; make_dev_args_init(&args); args.mda_devsw = &hwt_cdevsw; args.mda_flags = MAKEDEV_CHECKNAME | MAKEDEV_WAITOK; args.mda_uid = UID_ROOT; args.mda_gid = GID_WHEEL; args.mda_mode = 0660; args.mda_si_drv1 = NULL; hwt_backend_load(); hwt_ctx_load(); hwt_contexthash_load(); hwt_ownerhash_load(); hwt_record_load(); error = make_dev_s(&args, &hwt_cdev, "hwt"); if (error != 0) return (error); hwt_exit_tag = EVENTHANDLER_REGISTER(process_exit, hwt_process_exit, NULL, EVENTHANDLER_PRI_ANY); hwt_hook_load(); return (0); } static int hwt_unload(void) { hwt_hook_unload(); EVENTHANDLER_DEREGISTER(process_exit, hwt_exit_tag); destroy_dev(hwt_cdev); hwt_record_unload(); hwt_ownerhash_unload(); hwt_contexthash_unload(); hwt_ctx_unload(); hwt_backend_unload(); return (0); } static int hwt_modevent(module_t mod, int type, void *data) { int error; switch (type) { case MOD_LOAD: error = hwt_load(); break; case MOD_UNLOAD: error = hwt_unload(); break; default: error = 0; break; } return (error); } static moduledata_t hwt_mod = { "hwt", hwt_modevent, NULL }; DECLARE_MODULE(hwt, hwt_mod, SI_SUB_DRIVERS, SI_ORDER_FIRST); MODULE_VERSION(hwt, 1);