|
acme@redhat.com |
|---|
$ pahole page_pool
struct page_pool {
struct page_pool_params p; /* 0 56 */
struct delayed_work release_dw; /* 56 88 */
/* XXX last struct has 4 bytes of padding */
/* --- cacheline 2 boundary (128 bytes) was 16 bytes ago --- */
void (*disconnect)(void *); /* 144 8 */
long unsigned int defer_start; /* 152 8 */
long unsigned int defer_warn; /* 160 8 */
u32 pages_state_hold_cnt; /* 168 4 */
unsigned int frag_offset; /* 172 4 */
struct page * frag_page; /* 176 8 */
long int frag_users; /* 184 8 */
/* --- cacheline 3 boundary (192 bytes) --- */
u32 xdp_mem_id; /* 192 4 */
/* XXX 60 bytes hole, try to pack */
/* --- cacheline 4 boundary (256 bytes) --- */
struct pp_alloc_cache alloc __attribute__((__aligned__(64))); /* 256 1032 */
/* XXX 56 bytes hole, try to pack */
/* --- cacheline 21 boundary (1344 bytes) --- */
struct ptr_ring ring __attribute__((__aligned__(64))); /* 1344 192 */
/* XXX last struct has 48 bytes of padding */
/* --- cacheline 24 boundary (1536 bytes) --- */
atomic_t pages_state_release_cnt; /* 1536 4 */
refcount_t user_cnt; /* 1540 4 */
u64 destroy_cnt; /* 1544 8 */
/* size: 1600, cachelines: 25, members: 15 */
/* sum members: 1436, holes: 2, sum holes: 116 */
/* padding: 48 */
/* paddings: 2, sum paddings: 52 */
/* forced alignments: 2, forced holes: 2, sum forced holes: 116 */
} __attribute__((__aligned__(64)));
$
# perf mem -t load report --sort=mem --stdio
# Total Lost Samples: 0
#
# Samples: 51K of event 'cpu/mem-loads,ldlat=30/P'
# Total weight : 4819902
# Sort order : mem
#
# Overhead Samples Memory access
# ........ ............ ........................
44.87% 20217 LFB or LFB hit
27.30% 18618 L3 or L3 hit
22.53% 11712 L1 or L1 hit
4.85% 637 Local RAM or RAM hit
0.25% 1 Uncached or N/A hit
0.20% 188 L2 or L2 hit
0.00% 35 L3 miss
[root@quaco linux]# perf mem record -a make -j8 O=../build/allyesconfig/ make[1]: Entering directory '/home/acme/git/build/allyesconfig'
# perf c2c record -a sleep 1 [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 7.787 MB perf.data (2450 samples) ] # perf evlist cpu/mem-loads,ldlat=30/P cpu/mem-stores/P dummy:HG #
# perf script --cpu 4 --pid 0 | head
swapper 0 [4] 319242.043904: 58 cpu/mem-loads,ldlat=30/P: ffff8d3e49c0e688 11868100242 |OP LOAD|LVL LFB or LFB hit|SNP None|TLB L1 or L2 hit|LCK No|BLK N/A 290 0 ffffffff9a13eb2c __update_load_avg_cfs_rq+0x9c (vmlinux) 9c0e688
swapper 0 [4] 319242.142295: 39 cpu/mem-loads,ldlat=30/P: ffff8d44865f2408 10268100142 |OP LOAD|LVL L1 or L1 hit|SNP None|TLB L1 or L2 hit|LCK No|BLK N/A 335 0 ffffffff9a13eecd update_rt_rq_load_avg+0x17d (vmlinux) 6465f2408
swapper 0 [4] 319242.143587: 99614 cpu/mem-stores/P: ffff8d4486500028 5080184 |OP STORE|LVL L1 miss|SNP N/A|TLB N/A|LCK N/A|BLK N/A 0 0 ffffffff9a001c2f __switch_to_asm+0x1f (vmlinux) 646500028
swapper 0 [4] 319242.174494: 33 cpu/mem-loads,ldlat=30/P: ffff8d3f595ddc38 11a68201042 |OP LOAD|LVL Local RAM or RAM hit|SNP Hit|TLB L1 or L2 hit|LCK No|BLK N/A 176 0 ffffffff9a13e78d __update_load_avg_se+0x1d (vmlinux) 1195ddc38
swapper 0 [4] 319242.178002: 27 cpu/mem-loads,ldlat=30/P: ffff8d44865312c0 10668100842 |OP LOAD|LVL L3 or L3 hit|SNP None|TLB L1 or L2 hit|LCK No|BLK N/A 56 0 ffffffff9a07d74f switch_mm_irqs_off+0x16f (vmlinux) 6465312c0
swapper 0 [4] 319242.212148: 23 cpu/mem-loads,ldlat=30/P: ffff8d44865322e8 10668100842 |OP LOAD|LVL L3 or L3 hit|SNP None|TLB L1 or L2 hit|LCK No|BLK N/A 55 0 ffffffff9a140c22 irqtime_account_process_tick+0xa2 (vmlinux) 6465322e8
swapper 0 [4] 319242.217357: 18 cpu/mem-loads,ldlat=30/P: ffff8d4486532490 10268100142 |OP LOAD|LVL L1 or L1 hit|SNP None|TLB L1 or L2 hit|LCK No|BLK N/A 125 0 ffffffff9a140076 update_irq_load_avg+0xf6 (vmlinux) 646532490
swapper 0 [4] 319242.220573: 15 cpu/mem-loads,ldlat=30/P: ffff8d3f4f35f218 11868100242 |OP LOAD|LVL LFB or LFB hit|SNP None|TLB L1 or L2 hit|LCK No|BLK N/A 383 0 ffffffff9a73b407 rb_erase+0x7 (vmlinux) 10f35f218
swapper 0 [4] 319242.240176: 15 cpu/mem-loads,ldlat=30/P: ffff8d3f6b617be0 10650100842 |OP LOAD|LVL L3 or L3 hit|SNP None|TLB L2 miss|LCK No|BLK N/A 184 0 ffffffff9a129fbb update_blocked_averages+0x1fb (vmlinux) 12b617be0
swapper 0 [4] 319242.243441: 8849 cpu/mem-stores/P: ffff8d3f40c2b1a4 5080144 |OP STORE|LVL L1 hit|SNP N/A|TLB N/A|LCK N/A|BLK N/A 0 0 ffffffff9ad68aed rcu_eqs_exit.constprop.0+0x3d (vmlinux) 100c2b1a4
#
# perf evlist -v | head -1
cpu/mem-loads,ldlat=30/P: type: 4, size: 128, config: 0x1cd, \
{ sample_period, sample_freq }: 4000, \
sample_type: IP|TID|TIME|ADDR|ID|CPU|PERIOD|DATA_SRC|PHYS_ADDR|WEIGHT_STRUCT, \
read_format: ID, disabled: 1, inherit: 1, freq: 1, precise_ip: 3, sample_id_all: 1, \
{ bp_addr, config1 }: 0x1f
#
$ wc -l tools/perf/util/bpf_skel/*.bpf.c 191 tools/perf/util/bpf_skel/bperf_cgroup.bpf.c 78 tools/perf/util/bpf_skel/bperf_follower.bpf.c 55 tools/perf/util/bpf_skel/bperf_leader.bpf.c 92 tools/perf/util/bpf_skel/bpf_prog_profiler.bpf.c 116 tools/perf/util/bpf_skel/func_latency.bpf.c 383 tools/perf/util/bpf_skel/kwork_trace.bpf.c 175 tools/perf/util/bpf_skel/lock_contention.bpf.c 273 tools/perf/util/bpf_skel/off_cpu.bpf.c 1363 total $
FIXME: add slide about perf lock -b, to show non record+report, BPF style in-kernel aggregation + output