aboutsummaryrefslogtreecommitdiff
path: root/sideband/src/pt_sb_pevent.c
diff options
context:
space:
mode:
Diffstat (limited to 'sideband/src/pt_sb_pevent.c')
-rw-r--r--sideband/src/pt_sb_pevent.c1722
1 files changed, 1722 insertions, 0 deletions
diff --git a/sideband/src/pt_sb_pevent.c b/sideband/src/pt_sb_pevent.c
new file mode 100644
index 000000000000..b16ca7325075
--- /dev/null
+++ b/sideband/src/pt_sb_pevent.c
@@ -0,0 +1,1722 @@
+/*
+ * Copyright (c) 2017-2019, Intel Corporation
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "libipt-sb.h"
+
+#include "intel-pt.h"
+
+
+#ifndef FEATURE_PEVENT
+
+int pt_sb_alloc_pevent_decoder(struct pt_sb_session *session,
+ const struct pt_sb_pevent_config *config)
+{
+ (void) session;
+ (void) config;
+
+ return -pte_not_supported;
+}
+
+#else /* FEATURE_PEVENT */
+
+#include "pt_sb_pevent.h"
+#include "pt_sb_session.h"
+#include "pt_sb_context.h"
+#include "pt_sb_file.h"
+#include "pt_compiler.h"
+
+#include <inttypes.h>
+#include <stdlib.h>
+#include <string.h>
+
+#if defined(_MSC_VER) && (_MSC_VER < 1900)
+# define snprintf _snprintf_c
+#endif
+
+
+#ifndef FEATURE_ELF
+
+static int elf_get_abi(FILE *file)
+{
+ if (!file)
+ return -pte_internal;
+
+ return pt_sb_abi_unknown;
+}
+
+#else /* FEATURE_ELF */
+
+#include <elf.h>
+
+
+static int elf_get_abi(FILE *file)
+{
+ uint8_t e_ident[EI_NIDENT];
+ size_t count;
+ int status;
+
+ if (!file)
+ return -pte_internal;
+
+ status = fseek(file, 0, SEEK_SET);
+ if (status < 0)
+ return pt_sb_abi_unknown;
+
+ count = fread(e_ident, sizeof(e_ident), 1, file);
+ if (count != 1)
+ return pt_sb_abi_unknown;
+
+ status = memcmp(e_ident, ELFMAG, SELFMAG);
+ if (status != 0)
+ return pt_sb_abi_unknown;
+
+ if (e_ident[EI_VERSION] != EV_CURRENT)
+ return pt_sb_abi_unknown;
+
+ switch (e_ident[EI_CLASS]) {
+ default:
+ break;
+
+ case ELFCLASS64:
+ return pt_sb_abi_x64;
+
+ case ELFCLASS32: {
+ Elf32_Ehdr ehdr;
+
+ status = fseek(file, 0, SEEK_SET);
+ if (status < 0)
+ break;
+
+ count = fread(&ehdr, sizeof(ehdr), 1, file);
+ if (count != 1)
+ break;
+
+ switch (ehdr.e_machine) {
+ default:
+ break;
+
+ case EM_386:
+ return pt_sb_abi_ia32;
+
+ case EM_X86_64:
+ return pt_sb_abi_x32;
+ }
+ }
+ break;
+ }
+
+ return pt_sb_abi_unknown;
+}
+
+#endif /* FEATURE_ELF */
+
+static int pt_sb_pevent_error(const struct pt_sb_session *session, int errcode,
+ const struct pt_sb_pevent_priv *priv)
+{
+ const char *filename;
+ uint64_t offset;
+
+ filename = NULL;
+ offset = 0ull;
+
+ if (priv) {
+ const uint8_t *pos, *begin;
+
+ pos = priv->current;
+ if (!pos)
+ pos = priv->next;
+
+ begin = priv->begin;
+ if (pos < begin)
+ return -pte_internal;
+
+ filename = priv->filename;
+ offset = (uint64_t) (int64_t) (pos - begin);
+ }
+
+ return pt_sb_error(session, errcode, filename, offset);
+}
+
+static int pt_sb_pevent_track_abi(struct pt_sb_context *context,
+ const char *filename)
+{
+ FILE *file;
+ int abi;
+
+ if (!context || !filename)
+ return -pte_internal;
+
+ if (context->abi)
+ return 0;
+
+ file = fopen(filename, "rb");
+ if (!file)
+ return 0;
+
+ abi = elf_get_abi(file);
+
+ fclose(file);
+
+ if (abi < 0)
+ return abi;
+
+ context->abi = (enum pt_sb_abi) abi;
+
+ return 0;
+}
+
+static int pt_sb_pevent_find_vdso(const char **pvdso,
+ const struct pt_sb_pevent_priv *priv,
+ const struct pt_sb_context *context)
+{
+ const char *vdso;
+
+ if (!pvdso || !priv || !context)
+ return -pte_internal;
+
+ vdso = NULL;
+
+ switch (context->abi) {
+ case pt_sb_abi_unknown:
+ break;
+
+ case pt_sb_abi_x64:
+ vdso = priv->vdso_x64;
+ break;
+
+ case pt_sb_abi_x32:
+ vdso = priv->vdso_x32;
+ break;
+
+ case pt_sb_abi_ia32:
+ vdso = priv->vdso_ia32;
+ break;
+ }
+
+ if (!vdso)
+ return -pte_bad_config;
+
+ *pvdso = vdso;
+
+ return 0;
+}
+
+static void pt_sb_pevent_dtor(void *priv_arg)
+{
+ struct pt_sb_pevent_priv *priv;
+ struct pt_sb_context *context;
+
+ priv = (struct pt_sb_pevent_priv *) priv_arg;
+ if (!priv)
+ return;
+
+ context = priv->next_context;
+ if (context)
+ pt_sb_ctx_put(context);
+
+ context = priv->context;
+ if (context)
+ pt_sb_ctx_put(context);
+
+ free(priv->filename);
+ free(priv->sysroot);
+ free(priv->vdso_x64);
+ free(priv->vdso_x32);
+ free(priv->vdso_ia32);
+ free(priv->begin);
+ free(priv);
+}
+
+static int pt_sb_pevent_init_path(char **dst, const char *src)
+{
+ size_t len;
+ char *copy;
+
+ if (!dst)
+ return -pte_internal;
+
+ if (!src) {
+ *dst = NULL;
+ return 0;
+ }
+
+ len = strnlen(src, FILENAME_MAX);
+ if (len == FILENAME_MAX)
+ return -pte_invalid;
+
+ len += 1;
+ copy = malloc(len);
+ if (!copy)
+ return -pte_nomem;
+
+ memcpy(copy, src, len);
+
+ *dst = copy;
+
+ return 0;
+}
+
+int pt_sb_pevent_init(struct pt_sb_pevent_priv *priv,
+ const struct pt_sb_pevent_config *config)
+{
+ const char *filename;
+ size_t size;
+ void *buffer;
+ int errcode;
+
+ if (!priv || !config)
+ return -pte_internal;
+
+ /* This is the first version - we need all the fields. */
+ if (config->size < sizeof(*config))
+ return -pte_invalid;
+
+ filename = config->filename;
+ if (!filename)
+ return -pte_invalid;
+
+ buffer = NULL;
+ size = 0;
+ errcode = pt_sb_file_load(&buffer, &size, filename,
+ config->begin, config->end);
+ if (errcode < 0)
+ return errcode;
+
+ memset(priv, 0, sizeof(*priv));
+ priv->begin = (uint8_t *) buffer;
+ priv->end = (uint8_t *) buffer + size;
+ priv->next = (uint8_t *) buffer;
+
+ errcode = pt_sb_pevent_init_path(&priv->filename, filename);
+ if (errcode < 0) {
+ pt_sb_pevent_dtor(priv);
+ return errcode;
+ }
+
+ errcode = pt_sb_pevent_init_path(&priv->sysroot, config->sysroot);
+ if (errcode < 0) {
+ pt_sb_pevent_dtor(priv);
+ return errcode;
+ }
+
+ errcode = pt_sb_pevent_init_path(&priv->vdso_x64, config->vdso_x64);
+ if (errcode < 0) {
+ pt_sb_pevent_dtor(priv);
+ return errcode;
+ }
+
+ errcode = pt_sb_pevent_init_path(&priv->vdso_x32, config->vdso_x32);
+ if (errcode < 0) {
+ pt_sb_pevent_dtor(priv);
+ return errcode;
+ }
+
+ errcode = pt_sb_pevent_init_path(&priv->vdso_ia32, config->vdso_ia32);
+ if (errcode < 0) {
+ pt_sb_pevent_dtor(priv);
+ return errcode;
+ }
+
+ pev_config_init(&priv->pev);
+ priv->pev.sample_type = config->sample_type;
+ priv->pev.time_shift = config->time_shift;
+ priv->pev.time_mult = config->time_mult;
+ priv->pev.time_zero = config->time_zero;
+
+ priv->kernel_start = config->kernel_start;
+ priv->tsc_offset = config->tsc_offset;
+ priv->location = ploc_unknown;
+
+ return 0;
+}
+
+static int pt_sb_pevent_fetch(uint64_t *ptsc, struct pt_sb_pevent_priv *priv)
+{
+ struct pev_event *event;
+ const uint8_t *pos;
+ uint64_t tsc, offset;
+ int size;
+
+ if (!ptsc || !priv)
+ return -pte_internal;
+
+ pos = priv->next;
+ event = &priv->event;
+
+ /* Consume the current record early so we get the offset right when
+ * diagnosing fetch errors.
+ */
+ priv->current = pos;
+
+ size = pev_read(event, pos, priv->end, &priv->pev);
+ if (size < 0)
+ return size;
+
+ priv->next = pos + size;
+
+ /* If we don't have a time sample, set @ptsc to zero to process the
+ * record immediately.
+ */
+ if (!event->sample.time) {
+ *ptsc = 0ull;
+ return 0;
+ }
+
+ /* Subtract a pre-defined offset to cause sideband events from this
+ * channel to be applied a little earlier.
+ *
+ * We don't want @tsc to wrap around when subtracting @offset, though.
+ * This would suddenly push the event very far out and essentially block
+ * this sideband channel.
+ *
+ * On the other hand, we want to allow 'negative' offsets. And for
+ * those, we want to avoid wrapping around in the other direction.
+ */
+ offset = priv->tsc_offset;
+ tsc = event->sample.tsc;
+ if (offset <= tsc)
+ tsc -= offset;
+ else {
+ if (0ll <= (int64_t) offset)
+ tsc = 0ull;
+ else {
+ if (tsc <= offset)
+ tsc -= offset;
+ else
+ tsc = UINT64_MAX;
+ }
+ }
+
+ /* We update the event record's timestamp, as well, so we will print the
+ * updated tsc and apply the event at the right time.
+ *
+ * Note that we only update our copy in @priv, not the sideband stream.
+ */
+ event->sample.tsc = tsc;
+ *ptsc = tsc;
+
+ return 0;
+}
+
+static int pt_sb_pevent_print_event(const struct pev_event *event,
+ FILE *stream, uint32_t flags)
+{
+ if (!event)
+ return -pte_internal;
+
+ switch (event->type) {
+ default:
+ if (flags & ptsbp_compact)
+ fprintf(stream, "UNKNOWN (%x, %x)", event->type,
+ event->misc);
+
+ if (flags & ptsbp_verbose) {
+ fprintf(stream, "UNKNOWN");
+ fprintf(stream, "\n type: %x", event->type);
+ fprintf(stream, "\n misc: %x", event->misc);
+ }
+
+ break;
+
+ case PERF_RECORD_MMAP: {
+ const struct pev_record_mmap *mmap;
+
+ mmap = event->record.mmap;
+ if (!mmap)
+ return -pte_bad_packet;
+
+ if (flags & ptsbp_compact)
+ fprintf(stream, "PERF_RECORD_MMAP %x/%x, %" PRIx64
+ ", %" PRIx64 ", %" PRIx64 ", %s",
+ mmap->pid, mmap->tid, mmap->addr, mmap->len,
+ mmap->pgoff, mmap->filename);
+
+ if (flags & ptsbp_verbose) {
+ fprintf(stream, "PERF_RECORD_MMAP");
+ fprintf(stream, "\n pid: %x", mmap->pid);
+ fprintf(stream, "\n tid: %x", mmap->tid);
+ fprintf(stream, "\n addr: %" PRIx64, mmap->addr);
+ fprintf(stream, "\n len: %" PRIx64, mmap->len);
+ fprintf(stream, "\n pgoff: %" PRIx64, mmap->pgoff);
+ fprintf(stream, "\n filename: %s", mmap->filename);
+ }
+ }
+ break;
+
+ case PERF_RECORD_LOST: {
+ const struct pev_record_lost *lost;
+
+ lost = event->record.lost;
+ if (!lost)
+ return -pte_bad_packet;
+
+ if (flags & ptsbp_compact)
+ fprintf(stream, "PERF_RECORD_LOST %" PRIx64 ", %"
+ PRIx64, lost->id, lost->lost);
+
+ if (flags & ptsbp_verbose) {
+ fprintf(stream, "PERF_RECORD_LOST");
+ fprintf(stream, "\n id: %" PRIx64, lost->id);
+ fprintf(stream, "\n lost: %" PRIx64, lost->lost);
+ }
+ }
+ break;
+
+ case PERF_RECORD_COMM: {
+ const struct pev_record_comm *comm;
+ const char *sfx;
+
+ comm = event->record.comm;
+ if (!comm)
+ return -pte_bad_packet;
+
+ sfx = event->misc & PERF_RECORD_MISC_COMM_EXEC ? ".EXEC" : "";
+
+ if (flags & ptsbp_compact)
+ fprintf(stream, "PERF_RECORD_COMM%s %x/%x, %s", sfx,
+ comm->pid, comm->tid, comm->comm);
+
+ if (flags & ptsbp_verbose) {
+ fprintf(stream, "PERF_RECORD_COMM%s", sfx);
+ fprintf(stream, "\n pid: %x", comm->pid);
+ fprintf(stream, "\n tid: %x", comm->tid);
+ fprintf(stream, "\n comm: %s", comm->comm);
+ }
+ }
+ break;
+
+ case PERF_RECORD_EXIT: {
+ const struct pev_record_exit *exit;
+
+ exit = event->record.exit;
+ if (!exit)
+ return -pte_bad_packet;
+
+ if (flags & ptsbp_compact)
+ fprintf(stream, "PERF_RECORD_EXIT %x/%x, %x/%x, %"
+ PRIx64, exit->pid, exit->tid, exit->ppid,
+ exit->ptid, exit->time);
+
+ if (flags & ptsbp_verbose) {
+ fprintf(stream, "PERF_RECORD_EXIT");
+ fprintf(stream, "\n pid: %x", exit->pid);
+ fprintf(stream, "\n ppid: %x", exit->ppid);
+ fprintf(stream, "\n tid: %x", exit->tid);
+ fprintf(stream, "\n ptid: %x", exit->ptid);
+ fprintf(stream, "\n time: %" PRIx64, exit->time);
+ }
+ }
+ break;
+
+ case PERF_RECORD_THROTTLE: {
+ const struct pev_record_throttle *throttle;
+
+ throttle = event->record.throttle;
+ if (!throttle)
+ return -pte_bad_packet;
+
+ if (flags & ptsbp_compact)
+ fprintf(stream, "PERF_RECORD_THROTTLE %" PRIx64 ", %"
+ PRIx64 ", %" PRIx64, throttle->time,
+ throttle->id, throttle->stream_id);
+
+ if (flags & ptsbp_verbose) {
+ fprintf(stream, "PERF_RECORD_THROTTLE");
+ fprintf(stream, "\n time: %" PRIx64, throttle->time);
+ fprintf(stream, "\n id: %" PRIx64, throttle->id);
+ fprintf(stream, "\n stream_id: %" PRIx64,
+ throttle->stream_id);
+ }
+ }
+ break;
+
+ case PERF_RECORD_UNTHROTTLE: {
+ const struct pev_record_throttle *throttle;
+
+ throttle = event->record.throttle;
+ if (!throttle)
+ return -pte_bad_packet;
+
+ if (flags & ptsbp_compact)
+ fprintf(stream, "PERF_RECORD_UNTHROTTLE %" PRIx64
+ ", %" PRIx64 ", %" PRIx64, throttle->time,
+ throttle->id, throttle->stream_id);
+
+ if (flags & ptsbp_verbose) {
+ fprintf(stream, "PERF_RECORD_UNTHROTTLE");
+ fprintf(stream, "\n time: %" PRIx64, throttle->time);
+ fprintf(stream, "\n id: %" PRIx64, throttle->id);
+ fprintf(stream, "\n stream_id: %" PRIx64,
+ throttle->stream_id);
+ }
+ }
+ break;
+
+ case PERF_RECORD_FORK: {
+ const struct pev_record_fork *fork;
+
+ fork = event->record.fork;
+ if (!fork)
+ return -pte_bad_packet;
+
+ if (flags & ptsbp_compact)
+ fprintf(stream, "PERF_RECORD_FORK %x/%x, %x/%x, %"
+ PRIx64, fork->pid, fork->tid, fork->ppid,
+ fork->ptid, fork->time);
+
+ if (flags & ptsbp_verbose) {
+ fprintf(stream, "PERF_RECORD_FORK");
+ fprintf(stream, "\n pid: %x", fork->pid);
+ fprintf(stream, "\n ppid: %x", fork->ppid);
+ fprintf(stream, "\n tid: %x", fork->tid);
+ fprintf(stream, "\n ptid: %x", fork->ptid);
+ fprintf(stream, "\n time: %" PRIx64, fork->time);
+ }
+ }
+ break;
+
+ case PERF_RECORD_MMAP2: {
+ const struct pev_record_mmap2 *mmap2;
+
+ mmap2 = event->record.mmap2;
+ if (!mmap2)
+ return -pte_bad_packet;
+
+ if (flags & ptsbp_compact)
+ fprintf(stream, "PERF_RECORD_MMAP2 %x/%x, %" PRIx64
+ ", %" PRIx64 ", %" PRIx64 ", %x, %x, %" PRIx64
+ ", %" PRIx64 ", %x, %x, %s", mmap2->pid,
+ mmap2->tid, mmap2->addr, mmap2->len,
+ mmap2->pgoff, mmap2->maj, mmap2->min,
+ mmap2->ino, mmap2->ino_generation, mmap2->prot,
+ mmap2->flags, mmap2->filename);
+
+ if (flags & ptsbp_verbose) {
+ fprintf(stream, "PERF_RECORD_MMAP2");
+ fprintf(stream, "\n pid: %x", mmap2->pid);
+ fprintf(stream, "\n tid: %x", mmap2->tid);
+ fprintf(stream, "\n addr: %" PRIx64, mmap2->addr);
+ fprintf(stream, "\n len: %" PRIx64, mmap2->len);
+ fprintf(stream, "\n pgoff: %" PRIx64, mmap2->pgoff);
+ fprintf(stream, "\n maj: %x", mmap2->maj);
+ fprintf(stream, "\n min: %x", mmap2->min);
+ fprintf(stream, "\n ino: %" PRIx64, mmap2->ino);
+ fprintf(stream, "\n ino_generation: %" PRIx64,
+ mmap2->ino_generation);
+ fprintf(stream, "\n prot: %x", mmap2->prot);
+ fprintf(stream, "\n flags: %x", mmap2->flags);
+ fprintf(stream, "\n filename: %s", mmap2->filename);
+ }
+ }
+ break;
+
+ case PERF_RECORD_AUX: {
+ const struct pev_record_aux *aux;
+ const char *sfx;
+
+ aux = event->record.aux;
+ if (!aux)
+ return -pte_bad_packet;
+
+ sfx = aux->flags & PERF_AUX_FLAG_TRUNCATED ? ".TRUNCATED" : "";
+
+ if (flags & ptsbp_compact)
+ fprintf(stream, "PERF_RECORD_AUX%s %" PRIx64 ", %"
+ PRIx64 ", %" PRIx64, sfx, aux->aux_offset,
+ aux->aux_size, aux->flags);
+
+ if (flags & ptsbp_verbose) {
+ fprintf(stream, "PERF_RECORD_AUX%s", sfx);
+ fprintf(stream, "\n aux offset: %" PRIx64,
+ aux->aux_offset);
+ fprintf(stream, "\n aux size: %" PRIx64,
+ aux->aux_size);
+ fprintf(stream, "\n flags: %" PRIx64, aux->flags);
+ }
+ }
+ break;
+
+ case PERF_RECORD_ITRACE_START: {
+ const struct pev_record_itrace_start *itrace_start;
+
+ itrace_start = event->record.itrace_start;
+ if (!itrace_start)
+ return -pte_bad_packet;
+
+ if (flags & ptsbp_compact)
+ fprintf(stream, "PERF_RECORD_ITRACE_START %x/%x",
+ itrace_start->pid, itrace_start->tid);
+
+ if (flags & ptsbp_verbose) {
+ fprintf(stream, "PERF_RECORD_ITRACE_START");
+ fprintf(stream, "\n pid: %x", itrace_start->pid);
+ fprintf(stream, "\n tid: %x", itrace_start->tid);
+ }
+ }
+ break;
+
+ case PERF_RECORD_LOST_SAMPLES: {
+ const struct pev_record_lost_samples *lost_samples;
+
+ lost_samples = event->record.lost_samples;
+ if (!lost_samples)
+ return -pte_bad_packet;
+
+ if (flags & ptsbp_compact)
+ fprintf(stream, "PERF_RECORD_LOST_SAMPLES %" PRIx64,
+ lost_samples->lost);
+
+ if (flags & ptsbp_verbose) {
+ fprintf(stream, "PERF_RECORD_LOST_SAMPLES");
+ fprintf(stream, "\n lost: %" PRIx64,
+ lost_samples->lost);
+ }
+
+ }
+ break;
+
+ case PERF_RECORD_SWITCH: {
+ const char *sfx;
+
+ sfx = event->misc & PERF_RECORD_MISC_SWITCH_OUT ? "OUT" : "IN";
+
+ if (flags & (ptsbp_compact | ptsbp_verbose))
+ fprintf(stream, "PERF_RECORD_SWITCH.%s", sfx);
+ }
+ break;
+
+ case PERF_RECORD_SWITCH_CPU_WIDE: {
+ const struct pev_record_switch_cpu_wide *switch_cpu_wide;
+ const char *sfx, *pfx;
+
+ if (event->misc & PERF_RECORD_MISC_SWITCH_OUT) {
+ sfx = "OUT";
+ pfx = "next";
+ } else {
+ sfx = "IN";
+ pfx = "prev";
+ }
+
+ switch_cpu_wide = event->record.switch_cpu_wide;
+ if (!switch_cpu_wide)
+ return -pte_bad_packet;
+
+ if (flags & ptsbp_compact)
+ fprintf(stream, "PERF_RECORD_SWITCH_CPU_WIDE.%s %x/%x",
+ sfx, switch_cpu_wide->next_prev_pid,
+ switch_cpu_wide->next_prev_tid);
+
+ if (flags & ptsbp_verbose) {
+ fprintf(stream, "PERF_RECORD_SWITCH_CPU_WIDE.%s", sfx);
+ fprintf(stream, "\n %s pid: %x", pfx,
+ switch_cpu_wide->next_prev_pid);
+ fprintf(stream, "\n %s tid: %x", pfx,
+ switch_cpu_wide->next_prev_tid);
+ }
+ }
+ break;
+ }
+
+ return 0;
+}
+
+static int pt_sb_pevent_print_samples_compact(const struct pev_event *event,
+ FILE *stream)
+{
+ if (!event)
+ return -pte_internal;
+
+ fprintf(stream, " {");
+
+ if (event->sample.pid && event->sample.tid)
+ fprintf(stream, " %x/%x", *event->sample.pid,
+ *event->sample.tid);
+
+ if (event->sample.time)
+ fprintf(stream, " %" PRIx64, *event->sample.time);
+
+ if (event->sample.id)
+ fprintf(stream, " %" PRIx64, *event->sample.id);
+
+ if (event->sample.cpu)
+ fprintf(stream, " cpu-%x", *event->sample.cpu);
+
+ if (event->sample.stream_id)
+ fprintf(stream, " %" PRIx64, *event->sample.stream_id);
+
+ if (event->sample.identifier)
+ fprintf(stream, " %" PRIx64, *event->sample.identifier);
+
+ fprintf(stream, " }");
+
+ return 0;
+}
+
+static int pt_sb_pevent_print_samples_verbose(const struct pev_event *event,
+ FILE *stream)
+{
+ if (!event)
+ return -pte_internal;
+
+ if (event->sample.pid && event->sample.tid) {
+ fprintf(stream, "\n pid: %x", *event->sample.pid);
+ fprintf(stream, "\n tid: %x", *event->sample.tid);
+ }
+
+ if (event->sample.time)
+ fprintf(stream, "\n time: %" PRIx64, *event->sample.time);
+
+ if (event->sample.id)
+ fprintf(stream, "\n id: %" PRIx64, *event->sample.id);
+
+ if (event->sample.cpu)
+ fprintf(stream, "\n cpu: %x", *event->sample.cpu);
+
+ if (event->sample.stream_id)
+ fprintf(stream, "\n stream id: %" PRIx64,
+ *event->sample.stream_id);
+
+ if (event->sample.identifier)
+ fprintf(stream, "\n identifier: %" PRIx64,
+ *event->sample.identifier);
+
+ return 0;
+}
+
+static int pt_sb_pevent_print_samples(const struct pev_event *event,
+ FILE *stream, uint32_t flags)
+{
+ int errcode;
+
+ if (flags & ptsbp_compact) {
+ errcode = pt_sb_pevent_print_samples_compact(event, stream);
+ if (errcode < 0)
+ return errcode;
+ }
+
+ if (flags & ptsbp_verbose) {
+ errcode = pt_sb_pevent_print_samples_verbose(event, stream);
+ if (errcode < 0)
+ return errcode;
+ }
+
+ return 0;
+}
+
+static int pt_sb_pevent_print(struct pt_sb_pevent_priv *priv, FILE *stream,
+ uint32_t flags)
+{
+ struct pev_event *event;
+ const uint8_t *pos, *begin;
+ const char *filename;
+ int errcode;
+
+ if (!priv)
+ return -pte_internal;
+
+ /* We should not be called before fetching the first record. */
+ pos = priv->current;
+ if (!pos)
+ return -pte_internal;
+
+ begin = priv->begin;
+ if (pos < begin)
+ return -pte_internal;
+
+ filename = priv->filename;
+ if (!filename)
+ return -pte_internal;
+
+ event = &priv->event;
+
+ /* Print filename and/or file offset before the actual record. */
+ switch (flags & (ptsbp_filename | ptsbp_file_offset)) {
+ case ptsbp_filename | ptsbp_file_offset:
+ fprintf(stream, "%s:%016" PRIx64 " ", filename,
+ (uint64_t) (int64_t) (pos - begin));
+ break;
+
+ case ptsbp_filename:
+ fprintf(stream, "%s ", filename);
+ break;
+
+ case ptsbp_file_offset:
+ fprintf(stream, "%016" PRIx64 " ",
+ (uint64_t) (int64_t) (pos - begin));
+ break;
+ }
+
+ /* Print the timestamp if requested and available. */
+ if ((flags & ptsbp_tsc) && event->sample.time)
+ fprintf(stream, "%016" PRIx64 " ", event->sample.tsc);
+
+ /* Print the actual sideband record. */
+ errcode = pt_sb_pevent_print_event(event, stream, flags);
+ if (errcode < 0)
+ return errcode;
+
+ /* Print samples if configured. */
+ if (priv->pev.sample_type) {
+ errcode = pt_sb_pevent_print_samples(event, stream, flags);
+ if (errcode < 0)
+ return errcode;
+ }
+
+ if (flags)
+ fprintf(stream, "\n");
+
+ return 0;
+}
+
+static int pt_sb_pevent_switch_contexts(struct pt_sb_session *session,
+ struct pt_image **image,
+ struct pt_sb_pevent_priv *priv)
+{
+ struct pt_sb_context *prev, *next;
+ int errcode;
+
+ if (!priv || !image)
+ return -pte_internal;
+
+ prev = priv->context;
+ next = priv->next_context;
+ if (!next)
+ return -pte_internal;
+
+ errcode = pt_sb_ctx_switch_to(image, session, next);
+ if (errcode < 0)
+ return errcode;
+
+ priv->next_context = NULL;
+ priv->context = next;
+
+ return prev ? pt_sb_ctx_put(prev) : 0;
+}
+
+static int pt_sb_pevent_cancel_context_switch(struct pt_sb_pevent_priv *priv)
+{
+ struct pt_sb_context *context;
+
+ if (!priv)
+ return -pte_internal;
+
+ context = priv->next_context;
+ if (!context)
+ return 0;
+
+ priv->next_context = NULL;
+
+ return pt_sb_ctx_put(context);
+}
+
+static int pt_sb_pevent_prepare_context_switch(struct pt_sb_pevent_priv *priv,
+ struct pt_sb_context *context)
+{
+ int errcode;
+
+ if (!priv || !context)
+ return -pte_internal;
+
+ /* There's nothing to do if this switch is already pending.
+ *
+ * This could be the result of applying a cpu-wide switch-out followed
+ * by a cpu-wide switch-in without a chance to actually apply the
+ * context switch in-between.
+ */
+ if (priv->next_context == context)
+ return 0;
+
+ /* This context switch overwrites any previously pending switch.
+ *
+ * We may skip context switches due to imprecise timing or due to
+ * re-synchronization after an error.
+ */
+ errcode = pt_sb_pevent_cancel_context_switch(priv);
+ if (errcode < 0)
+ return errcode;
+
+ /* There's nothing to do if we're switching to the current context.
+ *
+ * This could be the result of switching between threads of the same
+ * process or of applying a cpu-wide switch-out followed by a cpu-wide
+ * switch-in.
+ */
+ if (priv->context == context)
+ return 0;
+
+ errcode = pt_sb_ctx_get(context);
+ if (errcode < 0)
+ return errcode;
+
+ priv->next_context = context;
+
+ return 0;
+}
+
+static int pt_sb_pevent_prepare_switch_to_pid(struct pt_sb_session *session,
+ struct pt_sb_pevent_priv *priv,
+ uint32_t pid)
+{
+ struct pt_sb_context *context;
+ int errcode;
+
+ context = NULL;
+ errcode = pt_sb_get_context_by_pid(&context, session, pid);
+ if (errcode < 0)
+ return errcode;
+
+ return pt_sb_pevent_prepare_context_switch(priv, context);
+}
+
+static int pt_sb_pevent_remove_context_for_pid(struct pt_sb_session *session,
+ uint32_t pid)
+{
+ struct pt_sb_context *context;
+ int errcode;
+
+ context = NULL;
+ errcode = pt_sb_find_context_by_pid(&context, session, pid);
+ if (errcode < 0)
+ return errcode;
+
+ if (!context)
+ return 0;
+
+ return pt_sb_remove_context(session, context);
+}
+
+static int
+pt_sb_pevent_itrace_start(struct pt_sb_session *session,
+ struct pt_image **image,
+ struct pt_sb_pevent_priv *priv,
+ const struct pev_record_itrace_start *record)
+{
+ int errcode;
+
+ if (!image || !record)
+ return -pte_internal;
+
+ errcode = pt_sb_pevent_prepare_switch_to_pid(session, priv,
+ record->pid);
+ if (errcode < 0)
+ return errcode;
+
+ /* We may have already installed the starting context. */
+ if (!priv->next_context)
+ return 0;
+
+ /* If we have not, let's not wait for a suitable event.
+ *
+ * We just started tracing so there's no reason to wait for a suitable
+ * location.
+ */
+ return pt_sb_pevent_switch_contexts(session, image, priv);
+}
+
+static int pt_sb_pevent_fork(struct pt_sb_session *session,
+ const struct pev_record_fork *record)
+{
+ struct pt_sb_context *context, *parent;
+ struct pt_image *image, *pimage;
+ uint32_t ppid, pid;
+ int errcode;
+
+ if (!record)
+ return -pte_internal;
+
+ /* If this is just creating a new thread, there's nothing to do.
+ *
+ * We should already have a context for this process. If we don't, it
+ * doesn't really help to create a new context with an empty process
+ * image at this point.
+ */
+ ppid = record->ppid;
+ pid = record->pid;
+ if (ppid == pid)
+ return 0;
+
+ /* We're creating a new process plus the initial thread.
+ *
+ * That initial thread should get the same id as the process.
+ */
+ if (pid != record->tid)
+ return -pte_internal;
+
+ /* Remove any existing context we might have for @pid.
+ *
+ * We're not removing process contexts when we get the exit event since
+ * that is sent while the process is still running inside the kernel.
+ */
+ errcode = pt_sb_pevent_remove_context_for_pid(session, pid);
+ if (errcode < 0)
+ return errcode;
+
+ /* Create a new context for this new process. */
+ context = NULL;
+ errcode = pt_sb_get_context_by_pid(&context, session, pid);
+ if (errcode < 0)
+ return errcode;
+
+ /* Let's see if we also know about the parent process. */
+ parent = NULL;
+ errcode = pt_sb_find_context_by_pid(&parent, session, ppid);
+ if (errcode < 0)
+ return errcode;
+
+ if (!parent)
+ return 0;
+
+ /* Both parent and child must have valid images. */
+ pimage = pt_sb_ctx_image(parent);
+ image = pt_sb_ctx_image(context);
+ if (!pimage || !image)
+ return -pte_internal;
+
+ /* Initialize the child's image with its parent's. */
+ return pt_image_copy(image, pimage);
+}
+
+static int pt_sb_pevent_exec(struct pt_sb_session *session,
+ struct pt_image **image,
+ struct pt_sb_pevent_priv *priv,
+ const struct pev_record_comm *record)
+{
+ struct pt_sb_context *context;
+ uint32_t pid;
+ int errcode;
+
+ if (!record)
+ return -pte_internal;
+
+ pid = record->pid;
+
+ /* Instead of replacing a context's image, we replace the context.
+ *
+ * This allows us to keep the old image around until we actually switch.
+ * We are likely using it at the moment.
+ */
+ errcode = pt_sb_pevent_remove_context_for_pid(session, pid);
+ if (errcode < 0)
+ return errcode;
+
+ /* This creates a new context and a new image.
+ *
+ * This new image will already be initialized with kernel sections but
+ * will otherwise be empty. We will populate it later with MMAP records
+ * that follow this COMM.EXEC record.
+ */
+ context = NULL;
+ errcode = pt_sb_get_context_by_pid(&context, session, pid);
+ if (errcode < 0)
+ return errcode;
+
+ /* If we're not maintaining a decoder, we're done. */
+ if (!image)
+ return 0;
+
+ /* We replaced the previous context of @pid with @context. Let's
+ * (prepare to) switch to the new @context.
+ *
+ * The actual context switch is postponed until we're in kernel context.
+ *
+ * It is quite likely that we are currently using the previous context
+ * we removed earlier in order to reach the location where we transition
+ * into the kernel. In the trace, we have not yet exec'ed.
+ */
+ return pt_sb_pevent_prepare_context_switch(priv, context);
+}
+
+static int pt_sb_pevent_switch(struct pt_sb_session *session,
+ struct pt_sb_pevent_priv *priv,
+ const uint32_t *pid)
+{
+ if (!pid)
+ return -pte_bad_config;
+
+ return pt_sb_pevent_prepare_switch_to_pid(session, priv, *pid);
+}
+
+static int
+pt_sb_pevent_switch_cpu(struct pt_sb_session *session,
+ struct pt_sb_pevent_priv *priv,
+ const struct pev_record_switch_cpu_wide *record)
+{
+ if (!record)
+ return -pte_internal;
+
+ return pt_sb_pevent_prepare_switch_to_pid(session, priv,
+ record->next_prev_pid);
+}
+
+static int pt_sb_pevent_map(struct pt_sb_session *session,
+ const struct pt_sb_pevent_priv *priv, uint32_t pid,
+ const char *filename, uint64_t offset,
+ uint64_t size, uint64_t vaddr)
+{
+ struct pt_sb_context *context;
+ const char *sysroot;
+ char buffer[FILENAME_MAX];
+ int errcode;
+
+ if (!priv || !filename)
+ return -pte_internal;
+
+ /* Get the context for this process. */
+ context = NULL;
+ errcode = pt_sb_get_context_by_pid(&context, session, pid);
+ if (errcode < 0)
+ return errcode;
+
+ /* The optional system root directoy. */
+ sysroot = priv->sysroot;
+
+ /* Some filenames do not represent actual files on disk. We handle
+ * some of those and ignore the rest.
+ */
+ if (filename[0] == '[') {
+ /* The [vdso] file represents the vdso that is mapped into
+ * every process.
+ *
+ * We expect the user to provide all necessary vdso flavors.
+ */
+ if (strcmp(filename, "[vdso]") == 0) {
+ errcode = pt_sb_pevent_find_vdso(&filename, priv,
+ context);
+ if (errcode != 0)
+ return pt_sb_pevent_error(session, errcode,
+ priv);
+ } else
+ return pt_sb_pevent_error(session, ptse_section_lost,
+ priv);
+
+
+ } else if (strcmp(filename, "//anon") == 0) {
+ /* Those are anonymous mappings that are, for example, used by
+ * JIT compilers to generate code in that is later executed.
+ *
+ * There is no general support for this in perf and JIT enabling
+ * is work-in-progress.
+ *
+ * We will likely fail with -pte_nomap later on.
+ */
+ return pt_sb_pevent_error(session, ptse_section_lost, priv);
+
+ } else if (strstr(filename, " (deleted)")) {
+ /* The file that was mapped as meanwhile been deleted.
+ *
+ * We will likely fail with -pte_nomap later on.
+ */
+ return pt_sb_pevent_error(session, ptse_section_lost, priv);
+
+ } else if (sysroot) {
+ /* Prepend the sysroot to normal files. */
+ errcode = snprintf(buffer, sizeof(buffer), "%s%s", sysroot,
+ filename);
+ if (errcode < 0)
+ return -pte_overflow;
+
+ filename = buffer;
+ }
+
+ errcode = pt_sb_pevent_track_abi(context, filename);
+ if (errcode < 0)
+ return errcode;
+
+ return pt_sb_ctx_mmap(session, context, filename, offset, size, vaddr);
+}
+
+static int pt_sb_pevent_mmap(struct pt_sb_session *session,
+ const struct pt_sb_pevent_priv *priv,
+ const struct pev_record_mmap *record)
+{
+ if (!record)
+ return -pte_internal;
+
+ return pt_sb_pevent_map(session, priv, record->pid, record->filename,
+ record->pgoff, record->len, record->addr);
+}
+
+static int pt_sb_pevent_mmap2(struct pt_sb_session *session,
+ const struct pt_sb_pevent_priv *priv,
+ const struct pev_record_mmap2 *record)
+{
+ if (!record)
+ return -pte_internal;
+
+ return pt_sb_pevent_map(session, priv, record->pid, record->filename,
+ record->pgoff, record->len, record->addr);
+}
+
+static int pt_sb_pevent_aux(const struct pt_sb_session *session,
+ const struct pt_sb_pevent_priv *priv,
+ const struct pev_record_aux *record)
+{
+ if (!record)
+ return -pte_internal;
+
+ if (record->flags & PERF_AUX_FLAG_TRUNCATED)
+ return pt_sb_pevent_error(session, ptse_trace_lost, priv);
+
+ return 0;
+}
+
+static int pt_sb_pevent_ignore_mmap(uint16_t misc)
+{
+ /* We rely on the kernel core file for ring-0 decode.
+ *
+ * Both kernel and kernel modules are modified during boot and insmod
+ * respectively. We can't decode from the respective files on disk.
+ *
+ * Ignore kernel MMAP events so we don't overwrite useful data from
+ * kcore with useless data from binary files.
+ */
+ switch (misc & PERF_RECORD_MISC_CPUMODE_MASK) {
+ case PERF_RECORD_MISC_KERNEL:
+ return 1;
+
+ default:
+ return 0;
+ }
+}
+
+static int pt_sb_pevent_apply_event_record(struct pt_sb_session *session,
+ struct pt_image **image,
+ struct pt_sb_pevent_priv *priv,
+ const struct pev_event *event)
+{
+ if (!event)
+ return -pte_internal;
+
+ switch (event->type) {
+ default:
+ /* Ignore unknown events. */
+ break;
+
+ case PERF_RECORD_ITRACE_START:
+ /* Ignore trace starts from secondary sideband channels. */
+ if (!image)
+ break;
+
+ return pt_sb_pevent_itrace_start(session, image, priv,
+ event->record.itrace_start);
+
+ case PERF_RECORD_FORK:
+ return pt_sb_pevent_fork(session, event->record.fork);
+
+ case PERF_RECORD_COMM:
+ /* We're only interested in COMM.EXEC events. */
+ if (!(event->misc & PERF_RECORD_MISC_COMM_EXEC))
+ break;
+
+ return pt_sb_pevent_exec(session, image, priv,
+ event->record.comm);
+
+ case PERF_RECORD_SWITCH:
+ /* Ignore context switches from secondary sideband channels. */
+ if (!image)
+ break;
+
+ /* Ignore switch-out events. We wait for the switch-in. */
+ if (event->misc & PERF_RECORD_MISC_SWITCH_OUT)
+ break;
+
+ return pt_sb_pevent_switch(session, priv, event->sample.pid);
+
+ case PERF_RECORD_SWITCH_CPU_WIDE:
+ /* Ignore context switches from secondary sideband channels. */
+ if (!image)
+ break;
+
+ /* For switch-in events, we use the pid sample, if available.
+ *
+ * For cpu-wide switch events, not sampling pid is acceptable
+ * since we get the pid in @prev_next_pid of a switch-out event.
+ *
+ * We will use a cpu-wide switch-in event, if possible, but we
+ * should be able to do without most of the time.
+ */
+ if (!(event->misc & PERF_RECORD_MISC_SWITCH_OUT)) {
+ if (!event->sample.pid)
+ break;
+
+ return pt_sb_pevent_switch(session, priv,
+ event->sample.pid);
+ }
+
+ return pt_sb_pevent_switch_cpu(session, priv,
+ event->record.switch_cpu_wide);
+
+ case PERF_RECORD_MMAP:
+ /* We intentionally ignore some MMAP records. */
+ if (pt_sb_pevent_ignore_mmap(event->misc))
+ break;
+
+ return pt_sb_pevent_mmap(session, priv, event->record.mmap);
+
+ case PERF_RECORD_MMAP2:
+ /* We intentionally ignore some MMAP records. */
+ if (pt_sb_pevent_ignore_mmap(event->misc))
+ break;
+
+ return pt_sb_pevent_mmap2(session, priv, event->record.mmap2);
+
+ case PERF_RECORD_LOST:
+ /* Warn about losses.
+ *
+ * We put the warning into the output. It is quite likely that
+ * we will run into a decode error shortly after (or ran into it
+ * already); this warning may help explain it.
+ */
+ return pt_sb_pevent_error(session, ptse_lost, priv);
+
+ case PERF_RECORD_AUX:
+ /* Ignore trace losses from secondary sideband channels. */
+ if (!image)
+ break;
+
+ return pt_sb_pevent_aux(session, priv, event->record.aux);
+ }
+
+ return 0;
+}
+
+static int ploc_from_ip(enum pt_sb_pevent_loc *loc,
+ const struct pt_sb_pevent_priv *priv, uint64_t ip)
+{
+ if (!loc || !priv)
+ return -pte_internal;
+
+ *loc = (ip < priv->kernel_start) ? ploc_in_user : ploc_in_kernel;
+
+ return 0;
+}
+
+static int ploc_from_suppressed_ip(enum pt_sb_pevent_loc *loc,
+ enum pt_sb_pevent_loc from)
+{
+ if (!loc)
+ return -pte_internal;
+
+ switch (from) {
+ default:
+ *loc = ploc_unknown;
+ break;
+
+ case ploc_likely_in_kernel:
+ case ploc_in_kernel:
+ *loc = ploc_likely_in_user;
+ break;
+
+ case ploc_likely_in_user:
+ case ploc_in_user:
+ *loc = ploc_likely_in_kernel;
+ break;
+ }
+
+ return 0;
+}
+
+static int ploc_from_event(enum pt_sb_pevent_loc *loc,
+ const struct pt_sb_pevent_priv *priv,
+ const struct pt_event *event)
+{
+ if (!loc || !priv || !event)
+ return -pte_internal;
+
+ switch (event->type) {
+ default:
+ break;
+
+ case ptev_enabled:
+ return ploc_from_ip(loc, priv, event->variant.enabled.ip);
+
+ case ptev_disabled:
+ if (!event->ip_suppressed)
+ return ploc_from_ip(loc, priv,
+ event->variant.disabled.ip);
+
+ return ploc_from_suppressed_ip(loc, priv->location);
+
+ case ptev_async_disabled: {
+ enum pt_sb_pevent_loc fromloc;
+ int errcode;
+
+ if (!event->ip_suppressed)
+ return ploc_from_ip(loc, priv,
+ event->variant.async_disabled.ip);
+
+ errcode = ploc_from_ip(&fromloc, priv,
+ event->variant.async_disabled.at);
+ if (errcode < 0)
+ return errcode;
+
+ return ploc_from_suppressed_ip(loc, fromloc);
+ }
+
+ case ptev_async_branch:
+ return ploc_from_ip(loc, priv, event->variant.async_branch.to);
+
+ case ptev_async_paging:
+ if (!event->ip_suppressed)
+ return ploc_from_ip(loc, priv,
+ event->variant.async_paging.ip);
+
+ fallthrough;
+ case ptev_paging:
+ *loc = ploc_likely_in_kernel;
+ return 0;
+
+ case ptev_overflow:
+ if (!event->ip_suppressed)
+ return ploc_from_ip(loc, priv,
+ event->variant.overflow.ip);
+
+ break;
+
+ case ptev_exec_mode:
+ if (!event->ip_suppressed)
+ return ploc_from_ip(loc, priv,
+ event->variant.exec_mode.ip);
+
+ break;
+
+ case ptev_tsx:
+ if (!event->ip_suppressed)
+ return ploc_from_ip(loc, priv,
+ event->variant.tsx.ip);
+
+ break;
+
+ case ptev_exstop:
+ if (!event->ip_suppressed)
+ return ploc_from_ip(loc, priv,
+ event->variant.exstop.ip);
+
+ break;
+
+ case ptev_mwait:
+ if (!event->ip_suppressed)
+ return ploc_from_ip(loc, priv,
+ event->variant.mwait.ip);
+
+ break;
+
+ case ptev_ptwrite:
+ if (!event->ip_suppressed)
+ return ploc_from_ip(loc, priv,
+ event->variant.ptwrite.ip);
+
+ break;
+
+ case ptev_tick:
+ if (!event->ip_suppressed)
+ return ploc_from_ip(loc, priv,
+ event->variant.tick.ip);
+
+ break;
+ }
+
+ *loc = ploc_unknown;
+ return 0;
+}
+
+static int pt_sb_pevent_apply(struct pt_sb_session *session,
+ struct pt_image **image,
+ const struct pt_event *event,
+ struct pt_sb_pevent_priv *priv)
+{
+ const struct pev_event *record;
+ enum pt_sb_pevent_loc oldloc;
+ int errcode;
+
+ if (!priv || !event)
+ return -pte_internal;
+
+ /* If the current perf event record is due, apply it.
+ *
+ * We don't need to look at the actual event that provided the
+ * timestamp. It suffices to know that time moved beyond the current
+ * perf event record.
+ *
+ * It is tempting to postpone applying the record until a suitable event
+ * but we need to ensure that records from different channels are
+ * applied in timestamp order.
+ *
+ * So we apply the record solely based on timestamps and postpone its
+ * effect until a suitable event.
+ *
+ * The last record in the trace won't be overridden and we have to take
+ * care to not apply it twice. We need to keep it until we were able to
+ * place the last pending context switch.
+ */
+ record = &priv->event;
+ if ((priv->current != priv->next) &&
+ (!record->sample.time || (record->sample.tsc <= event->tsc)))
+ return pt_sb_pevent_apply_event_record(session, image, priv,
+ record);
+
+ /* We first apply all our sideband records one-by-one until we're in
+ * sync with the event.
+ *
+ * When we get here, we applied all previous sideband records. Let's
+ * use the event to keep track of kernel vs user space and apply any
+ * postponed context switches.
+ *
+ * We preserve the previous location to detect returns from kernel to
+ * user space.
+ */
+ oldloc = priv->location;
+ errcode = ploc_from_event(&priv->location, priv, event);
+ if (errcode < 0)
+ return errcode;
+
+ /* We postpone context switches until we reach a suitable location in
+ * the trace. If we don't have a context switch pending, we're done.
+ */
+ if (!priv->next_context) {
+ /* Signal the end of the trace if the last event did not result
+ * in a postponed context switch or if that context switch had
+ * been applied at a previous event.
+ */
+ if (priv->current == priv->next)
+ return -pte_eos;
+
+ return 0;
+ }
+
+ /* Apply a postponed context switch inside kernel mode.
+ *
+ * For our purposes it does not matter where exactly we are in the
+ * kernel.
+ *
+ * In order to catch the first event window right before a tracing
+ * enabled event after some time of tracing being disabled (or at the
+ * beginning of the trace), we also accept an unknown location.
+ */
+ switch (oldloc) {
+ case ploc_likely_in_kernel:
+ case ploc_in_kernel:
+ case ploc_unknown:
+ return pt_sb_pevent_switch_contexts(session, image, priv);
+
+ default:
+ switch (priv->location) {
+ case ploc_likely_in_kernel:
+ case ploc_in_kernel:
+ case ploc_unknown:
+ return pt_sb_pevent_switch_contexts(session, image,
+ priv);
+
+ default:
+ break;
+ }
+
+ break;
+ };
+
+ return 0;
+}
+
+static int pt_sb_pevent_fetch_callback(struct pt_sb_session *session,
+ uint64_t *tsc, void *priv)
+{
+ int errcode;
+
+ errcode = pt_sb_pevent_fetch(tsc, (struct pt_sb_pevent_priv *) priv);
+ if ((errcode < 0) && (errcode != -pte_eos))
+ pt_sb_pevent_error(session, errcode,
+ (struct pt_sb_pevent_priv *) priv);
+
+ return errcode;
+}
+
+static int pt_sb_pevent_print_callback(struct pt_sb_session *session,
+ FILE *stream, uint32_t flags, void *priv)
+{
+ int errcode;
+
+ errcode = pt_sb_pevent_print((struct pt_sb_pevent_priv *) priv, stream,
+ flags);
+ if (errcode < 0)
+ return pt_sb_pevent_error(session, errcode,
+ (struct pt_sb_pevent_priv *) priv);
+
+ return 0;
+}
+
+static int pt_sb_pevent_apply_callback(struct pt_sb_session *session,
+ struct pt_image **image,
+ const struct pt_event *event, void *priv)
+{
+ int errcode;
+
+ errcode = pt_sb_pevent_apply(session, image, event,
+ (struct pt_sb_pevent_priv *) priv);
+ if ((errcode < 0) && (errcode != -pte_eos))
+ return pt_sb_pevent_error(session, errcode,
+ (struct pt_sb_pevent_priv *) priv);
+
+ return errcode;
+}
+
+int pt_sb_alloc_pevent_decoder(struct pt_sb_session *session,
+ const struct pt_sb_pevent_config *pev)
+{
+ struct pt_sb_decoder_config config;
+ struct pt_sb_pevent_priv *priv;
+ int errcode;
+
+ if (!session || !pev)
+ return -pte_invalid;
+
+ priv = malloc(sizeof(*priv));
+ if (!priv)
+ return -pte_nomem;
+
+ errcode = pt_sb_pevent_init(priv, pev);
+ if (errcode < 0) {
+ free(priv);
+ return errcode;
+ }
+
+ memset(&config, 0, sizeof(config));
+ config.fetch = pt_sb_pevent_fetch_callback;
+ config.apply = pt_sb_pevent_apply_callback;
+ config.print = pt_sb_pevent_print_callback;
+ config.dtor = pt_sb_pevent_dtor;
+ config.priv = priv;
+ config.primary = pev->primary;
+
+ errcode = pt_sb_alloc_decoder(session, &config);
+ if (errcode < 0)
+ free(priv);
+
+ return errcode;
+}
+
+#endif /* FEATURE_PEVENT */