|
acme@redhat.com |
|---|
# pahole 1.18 through 1.21 can't handle zero-sized per-CPU vars ifeq ($(call test-le, $(pahole-ver), 121),y) pahole-flags-$(call test-ge, $(pahole-ver), 118) += --skip_encoding_btf_vars endif pahole-flags-$(call test-ge, $(pahole-ver), 121) += --btf_gen_floats pahole-flags-$(call test-ge, $(pahole-ver), 122) += -j pahole-flags-$(CONFIG_PAHOLE_HAS_LANG_EXCLUDE) += --lang_exclude=rust pahole-flags-$(call test-ge, $(pahole-ver), 125) += --skip_encoding_btf_inconsistent_proto --btf_gen_optimized export PAHOLE_FLAGS := $(pahole-flags-y)
# Switch to using --btf_features for v1.26 and later.
pahole-flags-$(call test-ge, $(pahole-ver), 126) = -j --btf_features=encode_force,var,float,enum64,
decl_tag,type_tag,optimized_func,
consistent_func
$ bpftool btf dump file .tmp_vmlinux.btf | rg "DECL_TAG 'bpf_kfunc'" | wc -l 121 $ bpftool btf dump file .tmp_vmlinux.btf | rg 56337 [56337] FUNC 'bpf_ct_change_timeout' type_id=56336 linkage=static [127861] DECL_TAG 'bpf_kfunc' type_id=56337 component_idx=-1
# echo 1 > /proc/sys/vm/drop_caches # perf mem record find / > /dev/null [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0.061 MB perf.data (26 samples) ] #
# perf evlist cpu_atom/mem-loads,ldlat=30/P cpu_atom/mem-stores/P dummy:u #
# taskset -c 0 \
perf record --weight --data \
--event '{cpu_core/mem-loads-aux/,cpu_core/mem-loads,ldlat=30/P}:S' \
--event cpu_core/mem-stores/ find / > /dev/null
[ perf record: Woken up 20 times to write data ]
[ perf record: Captured and wrote 5.138 MB perf.data (79628 samples) ]
#
# perf evlist cpu_core/mem-loads-aux/ cpu_core/mem-loads,ldlat=30/P cpu_core/mem-stores/ dummy:u #
# perf mem report
# Total Lost Samples: 0
#
# Samples: 25K of event 'cpu_core/mem-loads-aux/'
# Total weight : 1123282
# Sort order : local_weight,mem,sym,dso,symbol_daddr,dso_daddr,snoop,tlb,locked,blocked,local_ins_lat,local_p_stage_cyc
#
# Overhead Samples LocalWeight Mem access Symbol Shared Object Data Symbol
# ........ ....... ............ ........... .......................... ................. ..........................
#
0.50% 1 5635 RAM hit [k] btrfs_bin_search [kernel.kallsyms] [k] 0xffff90b3b9fe0a31
0.22% 1 2504 RAM hit [k] rb_next [kernel.kallsyms] [k] 0xffff90af31bfcda8
0.13% 1 1472 LFB/MAB hit [k] mutex_lock [kernel.kallsyms] [k] 0xffff90adca8c1d18
0.13% 1 1432 LFB/MAB hit [k] btrfs_get_delayed_node [kernel.kallsyms] [k] 0xffff90b4c9a17158
0.12% 1 1376 LFB/MAB hit [k] generic_fillattr [kernel.kallsyms] [k] 0xffff90b422422032
SNIP
0.02% 1 220 L3 hit [k] ktime_get_update_offsets_now [kernel.kallsyms] [k] tk_core+0xc0
SNIP
0.02% 1 216 LFB/MAB hit [k] update_vsyscall [kernel.kallsyms] [k] shadow_timekeeper+0x40
SNIP
0.02% 1 208 LFB/MAB hit [k] _raw_spin_lock [kernel.kallsyms] [k] jiffies_lock+0x0
# perf report --stdio --mem-mode --sort mem
# Samples: 26K of event 'cpu_core/mem-loads,ldlat=30/P'
# Total weight : 1135614
# Sort order : mem
#
# Overhead Memory access
# ........ .............
#
62.32% LFB/MAB hit
24.22% RAM hit
10.28% L1 hit
2.40% L3 hit
0.78% L2 hit
# perf report --dso '[kernel.kallsyms]' --stdio \
--mem-mode --sort sym,local_ins_lat
# Overhead Symbol Local INSTR Latency
# ........ ............................ ...................
#
0.50% [k] btrfs_bin_search 5637
0.22% [k] rb_next 2507
0.18% [k] folio_mark_accessed 419
0.18% [k] __d_lookup 405
0.17% [k] __d_lookup_rcu 389
0.14% [k] down_read 41
0.14% [k] __d_lookup_rcu 390
0.13% [k] mutex_lock 1475
0.13% [k] mutex_lock 487
0.13% [k] btrfs_get_delayed_node 1441
0.12% [k] generic_fillattr 703
0.12% [k] generic_fillattr 1378
0.12% [k] folio_mark_accessed 1371
0.12% [k] _raw_spin_lock 33
0.12% [k] btrfs_get_delayed_node 444
0.11% [k] dcache_readdir 1283
0.11% [k] __d_lookup_rcu 431
0.11% [k] folio_mark_accessed 640
#
# perf report --stdio -s type -i perf.data.mem.find
# Total Lost Samples: 0
#
# Samples: 25K of event 'cpu_core/mem-loads-aux/'
# Event count (approx.): 170070020
#
# Overhead Data Type
# ........ .........
18.34% (stack operation)
15.35% struct btrfs_key
10.83% struct
9.13% (unknown)
8.14% int
7.75% unsigned int
3.69% long long unsigned int
3.02% (stack canary)
2.62% struct _ftsent
2.61% struct extent_buffer
2.50% struct extent_buffer*
2.46% struct __va_list_tag
2.15% struct inode
2.12% long unsigned int
1.03% struct btrfs_delayed_node
0.86% struct nameidata
0.82% struct dentry
0.62% struct mnt_idmap*
0.57% struct malloc_chunk
0.54% struct av_decision
0.41% struct btrfs_path
0.36% struct av_decision*
0.34% unsigned char
0.32% struct hlist_bl_head
# perf report --stdio -s type,sym -i perf.data.mem.find
# Total Lost Samples: 0
#
# Samples: 25K of event 'cpu_core/mem-loads-aux/'
# Event count (approx.): 170070020
#
# Overhead Data Type Symbol
# ........ ......... ...............................
12.56% struct btrfs_key [k] btrfs_real_readdir
7.40% int [.] __GI___readdir64
5.98% unsigned int [k] _raw_spin_lock
4.75% (stack operation) [k] locks_remove_posix
3.24% (stack operation) [k] btrfs_verify_level_key
2.77% (stack operation) [k] check_buffer_tree_ref
2.76% struct [k] up_read
2.47% struct extent_buffer* [k] btrfs_search_slot
2.46% struct __va_list_tag [.] __printf_buffer
2.42% struct btrfs_key [k] btrfs_comp_cpu_keys
2.07% struct [k] down_read
1.81% struct extent_buffer [k] release_extent_buffer
1.59% (unknown) [k] memcpy
1.56% struct [k] check_buffer_tree_ref
1.24% (unknown) [k] __srcu_read_unlock
1.16% struct inode [k] generic_fillattr
1.14% unsigned int [k] find_extent_buffer_nolock
1.14% (stack canary) [k] locks_remove_posix
1.04% struct [k] __fput_sync
1.01% struct _ftsent [.] fts_compare_ino.lto_priv.0
0.97% long long unsigned int [k] mutex_lock
0.93% struct _ftsent [.] consider_visiting
0.89% (stack canary) [k] fsnotify
0.86% (stack operation) [k] read_extent_buffer
0.83% (unknown) [k] __srcu_read_lock
0.83% (stack operation) [k] __btrfs_tree_read_lock
0.81% long long unsigned int [k] lockref_put_return
0.79% (unknown) [.] __memmove_avx_unaligned_erms
0.76% (stack canary) [k] btrfs_verify_level_key
# perf report -s type,typeoff --hierarchy --stdio -i perf.data.mem.find
#
# Overhead Data Type / Data Type Offset
SNIP
2.15% struct inode
0.26% struct inode +40 (i_sb)
0.21% struct inode +356 (i_readcount.counter)
0.15% struct inode +56 (i_security)
0.15% struct inode +13 (i_flags)
0.12% struct inode +8 (i_gid.val)
0.12% struct inode +360 (i_fop)
0.11% struct inode +4 (i_uid.val)
0.10% struct inode +72 (i_nlink)
0.09% struct inode +88 (__i_atime.tv_sec)
0.09% struct inode +32 (i_op)
0.09% struct inode +0 (i_mode)
0.09% struct inode +64 (i_ino)
0.08% struct inode +12 (i_flags)
0.07% struct inode +112 (__i_mtime.tv_nsec)
0.07% struct inode +144 (i_blocks)
0.06% struct inode +96 (__i_atime.tv_nsec)
0.05% struct inode +80 (i_size)
0.05% struct inode +76 (i_rdev)
0.05% struct inode +128 (__i_ctime.tv_nsec)
0.04% struct inode +120 (__i_ctime.tv_sec)
0.04% struct inode +140 (i_bytes)
0.04% struct inode +104 (__i_mtime.tv_sec)
0.03% struct inode +142 (i_blkbits)
SNIP
# perf report -s type,typeoff,sym --hierarchy --stdio -i perf.data.mem.find
SNIP
15.35% struct btrfs_key
7.05% struct btrfs_key +0 (objectid)
6.04% [k] btrfs_real_readdir
0.76% [k] btrfs_comp_cpu_keys
0.26% [k] btrfs_bin_search
4.27% struct btrfs_key +9 (offset)
3.31% [k] btrfs_real_readdir
0.94% [k] btrfs_comp_cpu_keys
0.02% [k] btrfs_bin_search
4.03% struct btrfs_key +8 (type)
3.21% [k] btrfs_real_readdir
0.73% [k] btrfs_comp_cpu_keys
0.09% [k] btrfs_bin_search
SNIP
# perf annotate --stdio --data-type
Annotate type: 'struct btrfs_key' in [kernel.kallsyms] (6282 samples):
event[0] = cpu_core/mem-loads-aux/
event[1] = cpu_core/mem-loads,ldlat=30/P
=========================================================
Percent offset size field
100.00 100.00 0 17 struct btrfs_key {
45.93 45.90 0 8 __u64 objectid;
26.26 26.52 8 1 __u8 type;
27.80 27.58 9 8 __u64 offset;
};
root@number:~# strace -e openat pahole btrfs_key |& tail -11
openat(AT_FDCWD, "/sys/kernel/btf/vmlinux", O_RDONLY) = 3
struct btrfs_key {
__u64 objectid; /* 0 8 */
__u8 type; /* 8 1 */
__u64 offset; /* 9 8 */
/* size: 17, cachelines: 1, members: 3 */
/* last cacheline: 17 bytes */
} __attribute__((__packed__));
+++ exited with 0 +++
root@number:~#
# perf --debug type-profile annotate --data-type find data type for 0x6(reg7) at intel_pmu_handle_irq+0x53 CU for arch/x86/events/intel/core.c (die:0x1b1f23) frame base: cfa=1 fbreg=7 found "late_ack" in scope=1/1 (die: 0x1da6df) stack_offset=0x60 type_offset=0 variable location: use frame base, offset=0xffffffffffffffa6 type='_Bool' size=0x1 (die:0x1b21d4)
static int intel_pmu_handle_irq(struct pt_regs *regs)
{
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
bool late_ack = hybrid_bit(cpuc->pmu, late_ack);
bool mid_ack = hybrid_bit(cpuc->pmu, mid_ack);
int loops;
find data type for 0(reg1, reg0) at arch_asym_cpu_priority+0x1b CU for arch/x86/kernel/itmt.c (die:0xed3cc9) frame base: cfa=1 fbreg=7 scope: [1/1] (die:ed5101) bb: [0 - 1b] var [0] reg5 type='int' size=0x4 (die:0xed3d3e) mov [9] reg5 -> reg5 type='int' size=0x4 (die:0xed3d3e) mov [c] imm=0x19a38 -> reg0 mov [13] percpu base reg1 chk [1b] reg1 offset=0 ok=0 kind=2 cfa no variable found
int arch_asym_cpu_priority(int cpu)
{
return per_cpu(sched_core_priority, cpu);
}