mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
synced 2026-01-12 01:20:14 +00:00
Add a new event type for deferred callchains and a new callback for the struct perf_tool. For now it doesn't actually handle the deferred callchains but it just marks the sample if it has the PERF_CONTEXT_ USER_DEFFERED in the callchain array. At least, perf report can dump the raw data with this change. Actually this requires the next commit to enable attr.defer_callchain, but if you already have a data file, it'll show the following result. $ perf report -D ... 0x2158@perf.data [0x40]: event: 22 . . ... raw event: size 64 bytes . 0000: 16 00 00 00 02 00 40 00 06 00 00 00 0b 00 00 00 ......@......... . 0010: 03 00 00 00 00 00 00 00 a7 7f 33 fe 18 7f 00 00 ..........3..... . 0020: 0f 0e 33 fe 18 7f 00 00 48 14 33 fe 18 7f 00 00 ..3.....H.3..... . 0030: 08 09 00 00 08 09 00 00 e6 7a e7 35 1c 00 00 00 .........z.5.... 121163447014 0x2158 [0x40]: PERF_RECORD_CALLCHAIN_DEFERRED(IP, 0x2): 2312/2312: 0xb00000006 ... FP chain: nr:3 ..... 0: 00007f18fe337fa7 ..... 1: 00007f18fe330e0f ..... 2: 00007f18fe331448 : unhandled! Reviewed-by: Ian Rogers <irogers@google.com> Signed-off-by: Namhyung Kim <namhyung@kernel.org>
140 lines
3.1 KiB
C
140 lines
3.1 KiB
C
/* SPDX-License-Identifier: GPL-2.0 */
|
|
#ifndef __PERF_SAMPLE_H
|
|
#define __PERF_SAMPLE_H
|
|
|
|
#include <linux/perf_event.h>
|
|
#include <linux/types.h>
|
|
|
|
/* number of register is bound by the number of bits in regs_dump::mask (64) */
|
|
#define PERF_SAMPLE_REGS_CACHE_SIZE (8 * sizeof(u64))
|
|
|
|
struct regs_dump {
|
|
u64 abi;
|
|
u64 mask;
|
|
u64 *regs;
|
|
|
|
/* Cached values/mask filled by first register access. */
|
|
u64 cache_regs[PERF_SAMPLE_REGS_CACHE_SIZE];
|
|
u64 cache_mask;
|
|
};
|
|
|
|
struct stack_dump {
|
|
u16 offset;
|
|
u64 size;
|
|
char *data;
|
|
};
|
|
|
|
struct sample_read_value {
|
|
u64 value;
|
|
u64 id; /* only if PERF_FORMAT_ID */
|
|
u64 lost; /* only if PERF_FORMAT_LOST */
|
|
};
|
|
|
|
struct sample_read {
|
|
u64 time_enabled;
|
|
u64 time_running;
|
|
union {
|
|
struct {
|
|
u64 nr;
|
|
struct sample_read_value *values;
|
|
} group;
|
|
struct sample_read_value one;
|
|
};
|
|
};
|
|
|
|
static inline size_t sample_read_value_size(u64 read_format)
|
|
{
|
|
/* PERF_FORMAT_ID is forced for PERF_SAMPLE_READ */
|
|
if (read_format & PERF_FORMAT_LOST)
|
|
return sizeof(struct sample_read_value);
|
|
else
|
|
return offsetof(struct sample_read_value, lost);
|
|
}
|
|
|
|
static inline struct sample_read_value *next_sample_read_value(struct sample_read_value *v, u64 read_format)
|
|
{
|
|
return (void *)v + sample_read_value_size(read_format);
|
|
}
|
|
|
|
#define sample_read_group__for_each(v, nr, rf) \
|
|
for (int __i = 0; __i < (int)nr; v = next_sample_read_value(v, rf), __i++)
|
|
|
|
#define MAX_INSN 16
|
|
|
|
struct aux_sample {
|
|
u64 size;
|
|
void *data;
|
|
};
|
|
|
|
struct simd_flags {
|
|
u8 arch:1, /* architecture (isa) */
|
|
pred:2; /* predication */
|
|
};
|
|
|
|
/* simd architecture flags */
|
|
#define SIMD_OP_FLAGS_ARCH_SVE 0x01 /* ARM SVE */
|
|
|
|
/* simd predicate flags */
|
|
#define SIMD_OP_FLAGS_PRED_PARTIAL 0x01 /* partial predicate */
|
|
#define SIMD_OP_FLAGS_PRED_EMPTY 0x02 /* empty predicate */
|
|
|
|
struct perf_sample {
|
|
u64 ip;
|
|
u32 pid, tid;
|
|
u64 time;
|
|
u64 addr;
|
|
u64 id;
|
|
u64 stream_id;
|
|
u64 period;
|
|
u64 weight;
|
|
u64 transaction;
|
|
u64 insn_cnt;
|
|
u64 cyc_cnt;
|
|
u32 cpu;
|
|
u32 raw_size;
|
|
u64 data_src;
|
|
u64 phys_addr;
|
|
u64 data_page_size;
|
|
u64 code_page_size;
|
|
u64 cgroup;
|
|
u32 flags;
|
|
u32 machine_pid;
|
|
u32 vcpu;
|
|
u16 insn_len;
|
|
u8 cpumode;
|
|
u16 misc;
|
|
u16 ins_lat;
|
|
/** @weight3: On x86 holds retire_lat, on powerpc holds p_stage_cyc. */
|
|
u16 weight3;
|
|
bool no_hw_idx; /* No hw_idx collected in branch_stack */
|
|
bool deferred_callchain; /* Has deferred user callchains */
|
|
u64 deferred_cookie;
|
|
char insn[MAX_INSN];
|
|
void *raw_data;
|
|
struct ip_callchain *callchain;
|
|
struct branch_stack *branch_stack;
|
|
u64 *branch_stack_cntr;
|
|
struct regs_dump *user_regs;
|
|
struct regs_dump *intr_regs;
|
|
struct stack_dump user_stack;
|
|
struct sample_read read;
|
|
struct aux_sample aux_sample;
|
|
struct simd_flags simd_flags;
|
|
};
|
|
|
|
void perf_sample__init(struct perf_sample *sample, bool all);
|
|
void perf_sample__exit(struct perf_sample *sample);
|
|
struct regs_dump *perf_sample__user_regs(struct perf_sample *sample);
|
|
struct regs_dump *perf_sample__intr_regs(struct perf_sample *sample);
|
|
|
|
/*
|
|
* raw_data is always 4 bytes from an 8-byte boundary, so subtract 4 to get
|
|
* 8-byte alignment.
|
|
*/
|
|
static inline void *perf_sample__synth_ptr(struct perf_sample *sample)
|
|
{
|
|
return sample->raw_data - 4;
|
|
}
|
|
|
|
#endif /* __PERF_SAMPLE_H */
|