|
acme@redhat.com |
|---|
# perf mem record -a sleep 1
#
# perf mem -t load report --sort=mem --stdio
# Total Lost Samples: 0
#
# Samples: 51K of event 'cpu/mem-loads,ldlat=30/P'
# Total weight : 4819902
# Sort order : mem
#
# Overhead Samples Memory access
# ........ ............ ........................
44.87% 20217 LFB or LFB hit
27.30% 18618 L3 or L3 hit
22.53% 11712 L1 or L1 hit
4.85% 637 Local RAM or RAM hit
0.25% 1 Uncached or N/A hit
0.20% 188 L2 or L2 hit
0.00% 35 L3 miss
# perf mem record sleep 1
#
# perf mem -t load report --sort=mem --stdio
# Total Lost Samples: 0
#
# Samples: 16 of event 'cpu/mem-loads,ldlat=30/P'
# Total weight : 1556
# Sort order : mem
# Overhead Samples Memory access
# ........ ............ ........................
#
64.52% 8 LFB or LFB hit
14.07% 4 L1 or L1 hit
11.05% 3 L3 or L3 hit
10.35% 1 Local RAM or RAM hit
$ sudo perf mem record -- -c 10000 ^C[ perf record: Woken up 227 times to write data ] [ perf record: Captured and wrote 58.760 MB perf.data (836978 samples) ] $ sudo perf mem report -F mem,sample,snoop Samples: 836K of event 'ibs_op//', Event count (approx.): 8418762 Memory access Samples Snoop N/A 700620 N/A L1 hit 126675 N/A L2 hit 424 N/A L3 hit 664 HitM L3 hit 10 N/A Local RAM hit 2 N/A Remote RAM (1 hop) hit 8558 N/A Remote Cache (1 hop) hit 3 N/A Remote Cache (1 hop) hit 2 HitM Remote Cache (2 hops) hit 10 HitM Remote Cache (2 hops) hit 6 N/A Uncached hit 4 N/A $
$ make -j8 O=../build/allmodconfig/ make[1]: Entering directory '/home/acme/git/build/allmodconfig'# perf mem record sleep 1m [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0.037 MB perf.data (20 samples) ] #
# perf mem report --stdio
# Total Lost Samples: 0
#
# Samples: 11 of event 'cpu/mem-loads,ldlat=30/P'
# Total weight : 2155
# Sort order : local_weight,mem,sym,dso,symbol_daddr,dso_daddr
#
# Local Mem
#Overhead Weig Access Symbol Sh Object Data Symbol Data Obj
#........ .... ........ ................... ......... ................ ...........
23.94% 516 LocalRAM copy_page [kernel] 0xffff8d42228ea900 [unknown]
15.31% 330 LFB flush_signal_handle [kernel] 0xffff8d3f976020a0 [unknown]
14.66% 316 LFB strlen [kernel] 0xffffffff9b5f4cd3 [kernel].ro
13.36% 288 LFB _dl_relocate_object ld-linux.so 0x00007f6ccdc23068 libc.so.6
11.46% 247 LFB next_uptodate_page [kernel] 0xffffe401957e4df4 [unknown]
7.33% 158 LFB copy_page [kernel] 0xffff8d41f2dae920 [unknown]
4.04% 87 LFB unlock_page_memcg [kernel] 0xffffe4019333d8b8 [unknown]
3.06% 66 L1 check_preemption_di [kernel] 0xffffa8e8622ffc80 [unknown]
2.69% 58 LFB perf_output_begin [kernel] 0xffff8d3f52a1b01c [unknown]
2.13% 46 L3 task_work_run [kernel] 0xffff8d3f4a9c802c [unknown]
2.00% 43 L1 kmem_cache_alloc_tr [kernel] 0xffffa8e8622ffbc8 [unknown]
# perf c2c report --stdio
=================================================
Shared Data Cache Line Table
=================================================
#
# -- Cacheline -- -- Load Hitm ---- Tot Total Total -- Stores --
# Idx Address Hitm Tot LclHitm rec Loads Stores L1Hit L1Miss
# ... ................ ..... ... ....... ... ..... ...... ............
#
0 ffff8d449e7d6380 8.43% 53 53 510 499 11 11 0
1 ffff8d4058209340 6.20% 39 39 371 135 236 223 13
2 ffff8d449e7ff400 5.88% 37 37 501 479 22 22 0
3 ffffffff9bf53980 4.93% 31 31 233 208 25 24 1
4 ffff8d3f49ebd280 3.18% 20 20 162 153 9 9 0
5 ffff8d3f420d4880 2.86% 18 18 126 121 5 5 0
Cacheline 0xffff8d449e7ff400 -HITM- CL --- cycles --- Tot cpu LclHitm Off Code address lcl hitm load rec cnt Symbol Object Source:Line 97.30% 0x0 0xffffffff9a2d293b 113 44 454 8 __mod_node_page_state [kernel] vmstat.c:379 0.00% 0x8 0xffffffff9a2d29bb 0 112 40 8 __mod_node_page_state [kernel] atomic64_64.h:46 2.70% 0x18 0xffffffff9a2d2be5 959 103 2 2 refresh_cpu_vm_stats [kernel] atomic64_64.h:46
# tools/bpf/runqslower/.output/runqslower 200 Tracing run queue latency higher than 200 us TIME COMM PID LAT(us) 08:56:22 rcu_preempt 4105 229 08:56:22 rcu_tasks_trace 4966 525 08:56:22 podman 7238 766 08:56:22 podman 2189 728 08:56:22 swapper/13 721 244 ^C # strip tools/bpf/runqslower/.output/runqslower # ls -lah tools/bpf/runqslower/.output/runqslower -rwxr-xr-x. 1 acme acme 349K tools/bpf/runqslower/.output/runqslower #
$ cat tools/perf/util/bpf_skel/lock_contention.bpf.c
SEC("tp_btf/contention_begin")
int contention_begin(u64 *ctx)
{
struct task_struct *curr;
struct tstamp_data *pelem;
if (!enabled || !can_record())
return 0;
curr = bpf_get_current_task_btf();
pelem = bpf_task_storage_get(&tstamp, curr, NULL,
BPF_LOCAL_STORAGE_GET_F_CREATE);
if (!pelem || pelem->lock)
return 0;
pelem->timestamp = bpf_ktime_get_ns();
pelem->lock = (__u64)ctx[0];
pelem->flags = (__u32)ctx[1];
pelem->stack_id = bpf_get_stackid(ctx, &stacks, BPF_F_FAST_STACK_CMP | stack_skip);
if (pelem->stack_id < 0)
lost++;
return 0;
}
$ sudo perf lock contention -b
^C
contended total wait max wait avg wait type caller
42 192.67 us 13.64 us 4.59 us spinlock queue_work_on+0x20
23 85.54 us 10.28 us 3.72 us spinlock worker_thread+0x14a
6 13.92 us 6.51 us 2.32 us mutex kernfs_iop_permission+0x30
3 11.59 us 10.04 us 3.86 us mutex kernfs_dop_revalidate+0x3c
1 7.52 us 7.52 us 7.52 us spinlock kthread+0x115
1 7.24 us 7.24 us 7.24 us rwlock:W sys_epoll_wait+0x148
2 7.08 us 3.99 us 3.54 us spinlock delayed_work_timer_fn+0x1b
1 6.41 us 6.41 us 6.41 us spinlock idle_balance+0xa06
2 2.50 us 1.83 us 1.25 us mutex kernfs_iop_lookup+0x2f
1 1.71 us 1.71 us 1.71 us mutex kernfs_iop_getattr+0x2c
...
# perf kwork report
Total Max Max Max
Kwork Name | Cpu | Runtime |Count | runtime | runtime start | runtime end |
---------------------------------------------------------------------------------------
nvme0q5:130 | 004 | 1.101 ms | 49 | 0.051 ms | 26035.056403 s | 26035.056455 s |
amdgpu:162 | 002 | 0.176 ms | 9 | 0.046 ms | 26035.268020 s | 26035.268066 s |
nvme0q24:149 | 023 | 0.161 ms | 55 | 0.009 ms | 26035.655280 s | 26035.655288 s |
nvme0q20:145 | 019 | 0.090 ms | 33 | 0.014 ms | 26035.939018 s | 26035.939032 s |
nvme0q31:156 | 030 | 0.075 ms | 21 | 0.010 ms | 26035.052237 s | 26035.052247 s |
nvme0q8:133 | 007 | 0.062 ms | 12 | 0.021 ms | 26035.416840 s | 26035.416861 s |
nvme0q6:131 | 005 | 0.054 ms | 22 | 0.010 ms | 26035.199919 s | 26035.199929 s |
nvme0q19:144 | 018 | 0.052 ms | 14 | 0.010 ms | 26035.110615 s | 26035.110625 s |
nvme0q7:132 | 006 | 0.049 ms | 13 | 0.007 ms | 26035.125180 s | 26035.125187 s |
nvme0q18:143 | 017 | 0.033 ms | 14 | 0.007 ms | 26035.169698 s | 26035.169705 s |
nvme0q17:142 | 016 | 0.013 ms | 1 | 0.013 ms | 26035.565147 s | 26035.565160 s |
enp5s0-rx-0:164 | 006 | 0.004 ms | 4 | 0.002 ms | 26035.928882 s | 26035.928884 s |
enp5s0-tx-0:166 | 008 | 0.003 ms | 3 | 0.002 ms | 26035.870923 s | 26035.870925 s |
---------------------------------------------------------------------------------------