现象
通过外部ssh连接登录节点192.168.x.x,输入密码后,控制台马上显示last login信息,但是十几秒之后才真正登录系统,才可以在控制台执行命令,但是在等待的过程中,要是按ctr+c,则立马登录进去,只是bash环境不像完整环境,不显示正常登录的系统主机名。现象看,是登录之后执行相关命令卡顿了。另外在控制终端执行任何命令,哪怕最简单ls,都有卡顿。

系统登录初始化
从上面过程看,是用户登录后初始化步骤慢。所有用户登录系统时,初始化用户环境依赖/etc/profile文件。检查文件内容,该文件会进一步执行/etc/profile.d/*.sh /etc/profile.d/sh.local下面的脚本。HPC登录节点/etc/profile.d/目录下面,自定义加入了其他的脚本,不是新安装的操作系统自带的脚本个数。

模拟测试
-
找一个正常ssh登录的ecs机器,在/etc/profile.d下面增加一个睡眠10s的脚本
[root@nfs-server profile.d]# pwd /etc/profile.d [root@nfs-server profile.d]# [root@nfs-server profile.d]# cat a.sh sleep 10s -
ssh登录该机器,卡顿10s,但是会输入密码后也是立马显示Last login信息,和登录HPC节点类似。

从上面现象看,有可能是/etc/profile.d/的自定义脚本执行耗时过长导致ssh登录慢。
执行命令系统调用跟踪
- 测试在HPC节点/root/rpm目录下面执行ls命令,耗时1s以上,strace跟踪系统调用输出如下(-T参数统计系统调用耗时)
[root@ln01 rpm]# pwd
/root/rpm
[root@ln01 rpm]# strace -tt -T -v -f ls
19:48:57.283001 execve("/usr/bin/ls", ["ls"], ["XDG_SESSION_ID=15", "HOSTNAME=ln01", "TERM=xterm-256color", "SHELL=/bin/bash", "HISTSIZE=1000", "SSH_CLIENT=192.168.173.198 59212"..., "SSH_TTY=/dev/pts/10", "CT_LSF_TOP=__LSF_TOP__", "JRE_HOME=/opt/clustertech/chess/"..., "JAVA_OPTS= -Dctcloud.top=/opt/cl"..., "USER=root", "LS_COLORS=rs=0:di=38;5;27:ln=38;"..., "JPDA_SUSPEND=n", "LD_LIBRARY_PATH=/home/apps/clust"..., "JPDA_ADDRESS=8000", "CT_GUI_TOP=/opt/clustertech/ches"..., "MAIL=/var/spool/mail/root", "PATH=/opt/clustertech/chess/repo"..., "DATABASE_DIR=/opt/clustertech/ch"..., "PWD=/root/rpm", "LANG=en_US.UTF-8", "MODULEPATH=/usr/share/Modules/mo"..., "LOADEDMODULES=", "HISTCONTROL=ignoredups", "SHLVL=1", "HOME=/root", "CHESS_NG_ROOT=/home/apps/cluster"..., "NO_VNC_PORT=6080", "LOGNAME=root", "XDG_DATA_DIRS=/root/.local/share"..., "SSH_CONNECTION=192.168.173.198 5"..., "MODULESHOME=/usr/share/Modules", "LESSOPEN=||/usr/bin/lesspipe.sh "..., "USER_HOME_DIR=/home", "VNC_TYPE=novnc", "XDG_RUNTIME_DIR=/run/user/0", "BASH_FUNC_module()=() { eval `/"..., "_=/usr/bin/strace", "OLDPWD=/root"]) = 0 <0.000177>
19:48:57.283427 brk(NULL) = 0x670000 <0.000007>
19:48:57.283499 mmap(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x2b84f4e22000 <0.000008>
19:48:57.283558 access("/etc/ld.so.preload", R_OK) = -1 ENOENT (No such file or directory) <0.000009>
19:48:57.283675 open("/home/apps/clustertech/chess//ng/bin/tls/x86_64/libselinux.so.1", O_RDONLY|O_CLOEXEC) = -1 ENOENT (No such file or directory) <0.125462>
19:48:57.409201 stat("/home/apps/clustertech/chess//ng/bin/tls/x86_64", 0x7fff61e268d0) = -1 ENOENT (No such file or directory) <0.086613>
19:48:57.495880 open("/home/apps/clustertech/chess//ng/bin/tls/libselinux.so.1", O_RDONLY|O_CLOEXEC) = -1 ENOENT (No such file or directory) <0.063862>
19:48:57.559796 stat("/home/apps/clustertech/chess//ng/bin/tls", 0x7fff61e268d0) = -1 ENOENT (No such file or directory) <0.121587>
19:48:57.681433 open("/home/apps/clustertech/chess//ng/bin/x86_64/libselinux.so.1", O_RDONLY|O_CLOEXEC) = -1 ENOENT (No such file or directory) <0.084954>
19:48:57.766438 stat("/home/apps/clustertech/chess//ng/bin/x86_64", 0x7fff61e268d0) = -1 ENOENT (No such file or directory) <0.092809>
19:48:57.859300 open("/home/apps/clustertech/chess//ng/bin/libselinux.so.1", O_RDONLY|O_CLOEXEC) = -1 ENOENT (No such file or directory) <0.080747>
19:48:57.940112 stat("/home/apps/clustertech/chess//ng/bin", {st_dev=makedev(0, 42), st_ino=6973949742946233390, st_mode=S_IFDIR|0775, st_nlink=2, st_uid=0, st_gid=0, st_blksize=524288, st_blocks=1, st_size=20, st_atime=1678415586 /* 2023-03-10T10:33:06+0800 */, st_atime_nsec=0, st_mtime=1689902605 /* 2023-07-21T09:23:25+0800 */, st_mtime_nsec=0, st_ctime=1689902605 /* 2023-07-21T09:23:25+0800 */, st_ctime_nsec=0}) = 0 <0.086231>
19:48:58.026460 open("/etc/ld.so.cache", O_RDONLY|O_CLOEXEC) = 3 <0.000013>
19:48:58.026538 fstat(3, {st_dev=makedev(8, 84), st_ino=153331770, st_mode=S_IFREG|0644, st_nlink=1, st_uid=0, st_gid=0, st_blksize=4096, st_blocks=208, st_size=105858, st_atime=1697193601 /* 2023-10-13T18:40:01.052175236+0800 */, st_atime_nsec=52175236, st_mtime=1671532686 /* 2022-12-20T18:38:06.042512557+0800 */, st_mtime_nsec=42512557, st_ctime=1671532686 /* 2022-12-20T18:38:06.042512557+0800 */, st_ctime_nsec=42512557}) = 0 <0.000012>
19:48:58.026605 mmap(NULL, 105858, PROT_READ, MAP_PRIVATE, 3, 0) = 0x2b84f4e23000 <0.000012>
19:48:58.026661 close(3) = 0 <0.000008>
19:48:58.026709 open("/lib64/libselinux.so.1", O_RDONLY|O_CLOEXEC) = 3 <0.000011>
19:48:58.026755 read(3, "\177ELF\2\1\1\0\0\0\0\0\0\0\0\0\3\0>\0\1\0\0\0\220j\0\0\0\0\0\0"..., 832) = 832 <0.000009>
19:48:58.026810 fstat(3, {st_dev=makedev(8, 84), st_ino=187177, st_mode=S_IFREG|0755, st_nlink=1, st_uid=0, st_gid=0, st_blksize=4096, st_blocks=312, st_size=155744, st_atime=1697187177 /* 2023-10-13T16:52:57.512177730+0800 */, st_atime_nsec=512177730, st_mtime=1585711000 /* 2020-04-01T11:16:40+0800 */, st_mtime_nsec=0, st_ctime=1670747096 /* 2022-12-11T16:24:56.133050965+0800 */, st_ctime_nsec=133050965}) = 0 <0.000008>
19:48:58.026867 mmap(NULL, 2255216, PROT_READ|PROT_EXEC, MAP_PRIVATE|MAP_DENYWRITE, 3, 0) = 0x2b84f5024000 <0.000011>
19:48:58.026911 mprotect(0x2b84f5048000, 2093056, PROT_NONE) = 0 <0.000013>
19:48:58.026956 mmap(0x2b84f5247000, 8192, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3, 0x23000) = 0x2b84f5247000 <0.000014>
19:48:58.027006 mmap(0x2b84f5249000, 6512, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_ANONYMOUS, -1, 0) = 0x2b84f5249000 <0.000010>
19:48:58.027054 close(3) = 0 <0.000008>
19:48:58.027099 open("/home/apps/clustertech/chess//ng/bin/libcap.so.2", O_RDONLY|O_CLOEXEC) = -1 ENOENT (No such file or directory) <0.157972>
19:48:58.185131 open("/lib64/libcap.so.2", O_RDONLY|O_CLOEXEC) = 3 <0.000012>
19:48:58.185209 read(3, "\177ELF\2\1\1\0\0\0\0\0\0\0\0\0\3\0>\0\1\0\0\0\20\26\0\0\0\0\0\0"..., 832) = 832 <0.000011>
19:48:58.185257 fstat(3, {st_dev=makedev(8, 84), st_ino=187438, st_mode=S_IFREG|0755, st_nlink=1, st_uid=0, st_gid=0, st_blksize=4096, st_blocks=40, st_size=20048, st_atime=1697185801 /* 2023-10-13T16:30:01.444621708+0800 */, st_atime_nsec=444621708, st_mtime=1585710513 /* 2020-04-01T11:08:33+0800 */, st_mtime_nsec=0, st_ctime=1670747108 /* 2022-12-11T16:25:08.076051689+0800 */, st_ctime_nsec=76051689}) = 0 <0.000012>
19:48:58.185324 mmap(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x2b84f4e3d000 <0.000009>
19:48:58.185373 mmap(NULL, 2114112, PROT_READ|PROT_EXEC, MAP_PRIVATE|MAP_DENYWRITE, 3, 0) = 0x2b84f524b000 <0.000010>
19:48:58.185415 mprotect(0x2b84f524f000, 2093056, PROT_NONE) = 0 <0.000013>
19:48:58.185457 mmap(0x2b84f544e000, 8192, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3, 0x3000) = 0x2b84f544e000 <0.000011>
19:48:58.185508 close(3) = 0 <0.000008>
19:48:58.185550 open("/home/apps/clustertech/chess//ng/bin/libacl.so.1", O_RDONLY|O_CLOEXEC) = -1 ENOENT (No such file or directory) <0.042436>
19:48:58.228028 open("/lib64/libacl.so.1", O_RDONLY|O_CLOEXEC) = 3 <0.000011>
19:48:58.228091 read(3, "\177ELF\2\1\1\0\0\0\0\0\0\0\0\0\3\0>\0\1\0\0\0p\37\0\0\0\0\0\0"..., 832) = 832 <0.000012>
19:48:58.228140 fstat(3, {st_dev=makedev(8, 84), st_ino=187436, st_mode=S_IFREG|0755, st_nlink=1, st_uid=0, st_gid=0, st_blksize=4096, st_blocks=80, st_size=37064, st_atime=1697188729 /* 2023-10-13T17:18:49.312129104+0800 */, st_atime_nsec=312129104, st_mtime=1585707373 /* 2020-04-01T10:16:13+0800 */, st_mtime_nsec=0, st_ctime=1670747107 /* 2022-12-11T16:25:07.939051681+0800 */, st_ctime_nsec=939051681}) = 0 <0.000012>
19:48:58.228198 mmap(NULL, 2130560, PROT_READ|PROT_EXEC, MAP_PRIVATE|MAP_DENYWRITE, 3, 0) = 0x2b84f5450000 <0.000010>
19:48:58.228241 mprotect(0x2b84f5457000, 2097152, PROT_NONE) = 0 <0.000013>
19:48:58.228284 mmap(0x2b84f5657000, 8192, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3, 0x7000) = 0x2b84f5657000 <0.000009>
19:48:58.228332 close(3) = 0 <0.000007>
19:48:58.228373 open("/home/apps/clustertech/chess//ng/bin/libc.so.6", O_RDONLY|O_CLOEXEC) = -1 ENOENT (No such file or directory) <0.017137>
19:48:58.245561 open("/lib64/libc.so.6", O_RDONLY|O_CLOEXEC) = 3 <0.000013>
19:48:58.245629 read(3, "\177ELF\2\1\1\3\0\0\0\0\0\0\0\0\3\0>\0\1\0\0\0`&\2\0\0\0\0\0"..., 832) = 832 <0.000012>
19:48:58.245678 fstat(3, {st_dev=makedev(8, 84), st_ino=186754, st_mode=S_IFREG|0755, st_nlink=1, st_uid=0, st_gid=0, st_blksize=4096, st_blocks=4216, st_size=2156272, st_atime=1697185801 /* 2023-10-13T16:30:01.166622406+0800 */, st_atime_nsec=166622406, st_mtime=1601432063 /* 2020-09-30T10:14:23+0800 */, st_mtime_nsec=0, st_ctime=1670747090 /* 2022-12-11T16:24:50.516050624+0800 */, st_ctime_nsec=516050624}) = 0 <0.000012>
19:48:58.245738 mmap(NULL, 3985920, PROT_READ|PROT_EXEC, MAP_PRIVATE|MAP_DENYWRITE, 3, 0) = 0x2b84f5659000 <0.000012>
19:48:58.245783 mprotect(0x2b84f581d000, 2093056, PROT_NONE) = 0 <0.000020>
19:48:58.245841 mmap(0x2b84f5a1c000, 24576, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3, 0x1c3000) = 0x2b84f5a1c000 <0.000013>
19:48:58.245890 mmap(0x2b84f5a22000, 16896, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_ANONYMOUS, -1, 0) = 0x2b84f5a22000 <0.000010>
19:48:58.245936 close(3) = 0 <0.000008>
19:48:58.245981 open("/home/apps/clustertech/chess//ng/bin/libpcre.so.1", O_RDONLY|O_CLOEXEC) = -1 ENOENT (No such file or directory) <0.067200>
19:48:58.313235 open("/lib64/libpcre.so.1", O_RDONLY|O_CLOEXEC) = 3 <0.000012>
19:48:58.313298 read(3, "\177ELF\2\1\1\0\0\0\0\0\0\0\0\0\3\0>\0\1\0\0\0\360\25\0\0\0\0\0\0"..., 832) = 832 <0.000008>
19:48:58.313344 fstat(3, {st_dev=makedev(8, 84), st_ino=187168, st_mode=S_IFREG|0755, st_nlink=1, st_uid=0, st_gid=0, st_blksize=4096, st_blocks=792, st_size=402384, st_atime=1697187177 /* 2023-10-13T16:52:57.054178875+0800 */, st_atime_nsec=54178875, st_mtime=1501643280 /* 2017-08-02T11:08:00+0800 */, st_mtime_nsec=0, st_ctime=1670747096 /* 2022-12-11T16:24:56.071050961+0800 */, st_ctime_nsec=71050961}) = 0 <0.000008>
19:48:58.313417 mmap(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x2b84f4e3e000 <0.000009>
19:48:58.313462 mmap(NULL, 2494984, PROT_READ|PROT_EXEC, MAP_PRIVATE|MAP_DENYWRITE, 3, 0) = 0x2b84f5a27000 <0.000011>
19:48:58.313506 mprotect(0x2b84f5a87000, 2097152, PROT_NONE) = 0 <0.000013>
19:48:58.313550 mmap(0x2b84f5c87000, 8192, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3, 0x60000) = 0x2b84f5c87000 <0.000012>
19:48:58.313601 close(3) = 0 <0.000007>
19:48:58.313643 open("/home/apps/clustertech/chess//ng/bin/libdl.so.2", O_RDONLY|O_CLOEXEC) = -1 ENOENT (No such file or directory) <0.083638>
19:48:58.397333 open("/lib64/libdl.so.2", O_RDONLY|O_CLOEXEC) = 3 <0.000012>
19:48:58.397398 read(3, "\177ELF\2\1\1\0\0\0\0\0\0\0\0\0\3\0>\0\1\0\0\0P\16\0\0\0\0\0\0"..., 832) = 832 <0.000009>
19:48:58.397448 fstat(3, {st_dev=makedev(8, 84), st_ino=186760, st_mode=S_IFREG|0755, st_nlink=1, st_uid=0, st_gid=0, st_blksize=4096, st_blocks=40, st_size=19248, st_atime=1697185801 /* 2023-10-13T16:30:01.206622306+0800 */, st_atime_nsec=206622306, st_mtime=1601432062 /* 2020-09-30T10:14:22+0800 */, st_mtime_nsec=0, st_ctime=1670747090 /* 2022-12-11T16:24:50.525050624+0800 */, st_ctime_nsec=525050624}) = 0 <0.000008>
19:48:58.397509 mmap(NULL, 2109744, PROT_READ|PROT_EXEC, MAP_PRIVATE|MAP_DENYWRITE, 3, 0) = 0x2b84f5c89000 <0.000012>
19:48:58.397557 mprotect(0x2b84f5c8b000, 2097152, PROT_NONE) = 0 <0.000014>
19:48:58.397601 mmap(0x2b84f5e8b000, 8192, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3, 0x2000) = 0x2b84f5e8b000 <0.000011>
19:48:58.397653 close(3) = 0 <0.000008>
19:48:58.397696 open("/home/apps/clustertech/chess//ng/bin/libattr.so.1", O_RDONLY|O_CLOEXEC) = -1 ENOENT (No such file or directory) <0.060842>
19:48:58.458585 open("/lib64/libattr.so.1", O_RDONLY|O_CLOEXEC) = 3 <0.000013>
19:48:58.458661 read(3, "\177ELF\2\1\1\0\0\0\0\0\0\0\0\0\3\0>\0\1\0\0\0\320\23\0\0\0\0\0\0"..., 832) = 832 <0.000012>
19:48:58.458709 fstat(3, {st_dev=makedev(8, 84), st_ino=187434, st_mode=S_IFREG|0755, st_nlink=1, st_uid=0, st_gid=0, st_blksize=4096, st_blocks=40, st_size=19896, st_atime=1697185801 /* 2023-10-13T16:30:01.440621719+0800 */, st_atime_nsec=440621719, st_mtime=1523407242 /* 2018-04-11T08:40:42+0800 */, st_mtime_nsec=0, st_ctime=1670747107 /* 2022-12-11T16:25:07.921051680+0800 */, st_ctime_nsec=921051680}) = 0 <0.000012>
19:48:58.458768 mmap(NULL, 2113904, PROT_READ|PROT_EXEC, MAP_PRIVATE|MAP_DENYWRITE, 3, 0) = 0x2b84f5e8d000 <0.000016>
19:48:58.458819 mprotect(0x2b84f5e91000, 2093056, PROT_NONE) = 0 <0.000015>
19:48:58.458866 mmap(0x2b84f6090000, 8192, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3, 0x3000) = 0x2b84f6090000 <0.000013>
19:48:58.458918 close(3) = 0 <0.000007>
19:48:58.458962 open("/home/apps/clustertech/chess//ng/bin/libpthread.so.0", O_RDONLY|O_CLOEXEC) = -1 ENOENT (No such file or directory) <0.077284>
19:48:58.536301 open("/lib64/libpthread.so.0", O_RDONLY|O_CLOEXEC) = 3 <0.000013>
19:48:58.536375 read(3, "\177ELF\2\1\1\0\0\0\0\0\0\0\0\0\3\0>\0\1\0\0\0\200m\0\0\0\0\0\0"..., 832) = 832 <0.000011>
19:48:58.536423 fstat(3, {st_dev=makedev(8, 84), st_ino=186780, st_mode=S_IFREG|0755, st_nlink=1, st_uid=0, st_gid=0, st_blksize=4096, st_blocks=280, st_size=142144, st_atime=1697185873 /* 2023-10-13T16:31:13.618439610+0800 */, st_atime_nsec=618439610, st_mtime=1601432063 /* 2020-09-30T10:14:23+0800 */, st_mtime_nsec=0, st_ctime=1670747090 /* 2022-12-11T16:24:50.589050628+0800 */, st_ctime_nsec=589050628}) = 0 <0.000008>
19:48:58.536480 mmap(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x2b84f4e3f000 <0.000009>
19:48:58.536525 mmap(NULL, 2208904, PROT_READ|PROT_EXEC, MAP_PRIVATE|MAP_DENYWRITE, 3, 0) = 0x2b84f6092000 <0.000010>
19:48:58.536567 mprotect(0x2b84f60a9000, 2093056, PROT_NONE) = 0 <0.000012>
19:48:58.536610 mmap(0x2b84f62a8000, 8192, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3, 0x16000) = 0x2b84f62a8000 <0.000011>
19:48:58.536657 mmap(0x2b84f62aa000, 13448, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_ANONYMOUS, -1, 0) = 0x2b84f62aa000 <0.000009>
19:48:58.536702 close(3) = 0 <0.000007>
19:48:58.536762 mmap(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x2b84f4e40000 <0.000008>
19:48:58.536803 mmap(NULL, 8192, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x2b84f4e41000 <0.000007>
19:48:58.536852 arch_prctl(ARCH_SET_FS, 0x2b84f4e40b00) = 0 <0.000008>
19:48:58.536958 mprotect(0x2b84f5a1c000, 16384, PROT_READ) = 0 <0.000011>
19:48:58.537028 mprotect(0x2b84f62a8000, 4096, PROT_READ) = 0 <0.000009>
19:48:58.537075 mprotect(0x2b84f6090000, 4096, PROT_READ) = 0 <0.000011>
19:48:58.537123 mprotect(0x2b84f5e8b000, 4096, PROT_READ) = 0 <0.000010>
19:48:58.537171 mprotect(0x2b84f5c87000, 4096, PROT_READ) = 0 <0.000010>
19:48:58.537215 mprotect(0x2b84f5657000, 4096, PROT_READ) = 0 <0.000010>
19:48:58.537258 mprotect(0x2b84f544e000, 4096, PROT_READ) = 0 <0.000013>
19:48:58.537308 mprotect(0x2b84f5247000, 4096, PROT_READ) = 0 <0.000010>
19:48:58.537364 mprotect(0x61a000, 4096, PROT_READ) = 0 <0.000017>
19:48:58.537433 mprotect(0x2b84f5021000, 4096, PROT_READ) = 0 <0.000015>
19:48:58.537489 munmap(0x2b84f4e23000, 105858) = 0 <0.000013>
19:48:58.537542 set_tid_address(0x2b84f4e40dd0) = 59784 <0.000008>
19:48:58.537591 set_robust_list(0x2b84f4e40de0, 24) = 0 <0.000008>
19:48:58.537636 rt_sigaction(SIGRTMIN, {sa_handler=0x2b84f6098860, sa_mask=[], sa_flags=SA_RESTORER|SA_SIGINFO, sa_restorer=0x2b84f60a1630}, NULL, 8) = 0 <0.000008>
19:48:58.537690 rt_sigaction(SIGRT_1, {sa_handler=0x2b84f60988f0, sa_mask=[], sa_flags=SA_RESTORER|SA_RESTART|SA_SIGINFO, sa_restorer=0x2b84f60a1630}, NULL, 8) = 0 <0.000008>
19:48:58.537734 rt_sigprocmask(SIG_UNBLOCK, [RTMIN RT_1], NULL, 8) = 0 <0.000008>
19:48:58.537784 getrlimit(RLIMIT_STACK, {rlim_cur=RLIM64_INFINITY, rlim_max=RLIM64_INFINITY}) = 0 <0.000009>
19:48:58.537860 statfs("/sys/fs/selinux", 0x7fff61e280c0) = -1 ENOENT (No such file or directory) <0.000011>
19:48:58.537912 statfs("/selinux", 0x7fff61e280c0) = -1 ENOENT (No such file or directory) <0.000008>
19:48:58.537998 brk(NULL) = 0x670000 <0.000008>
19:48:58.538036 brk(0x691000) = 0x691000 <0.000009>
19:48:58.538083 open("/proc/filesystems", O_RDONLY) = 3 <0.000012>
19:48:58.538135 fstat(3, {st_dev=makedev(0, 3), st_ino=4026532068, st_mode=S_IFREG|0444, st_nlink=1, st_uid=0, st_gid=0, st_blksize=1024, st_blocks=0, st_size=0, st_atime=1697197738 /* 2023-10-13T19:48:58.537374533+0800 */, st_atime_nsec=537374533, st_mtime=1697197738 /* 2023-10-13T19:48:58.537374533+0800 */, st_mtime_nsec=537374533, st_ctime=1697197738 /* 2023-10-13T19:48:58.537374533+0800 */, st_ctime_nsec=537374533}) = 0 <0.000008>
19:48:58.538193 mmap(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x2b84f4e23000 <0.000009>
19:48:58.538236 read(3, "nodev\tsysfs\nnodev\trootfs\nnodev\tr"..., 1024) = 408 <0.000019>
19:48:58.538287 stat("/etc/sysconfig/64bit_strstr_via_64bit_strstr_sse2_unaligned", 0x7fff61e275f0) = -1 ENOENT (No such file or directory) <0.000009>
19:48:58.538336 read(3, "", 1024) = 0 <0.000008>
19:48:58.538381 close(3) = 0 <0.000009>
19:48:58.538420 munmap(0x2b84f4e23000, 4096) = 0 <0.000011>
19:48:58.538461 access("/etc/selinux/config", F_OK) = 0 <0.000009>
19:48:58.538532 open("/usr/lib/locale/locale-archive", O_RDONLY|O_CLOEXEC) = 3 <0.000009>
19:48:58.538574 fstat(3, {st_dev=makedev(8, 84), st_ino=187086, st_mode=S_IFREG|0644, st_nlink=1, st_uid=0, st_gid=0, st_blksize=4096, st_blocks=207312, st_size=106172832, st_atime=1697188801 /* 2023-10-13T17:20:01.159935111+0800 */, st_atime_nsec=159935111, st_mtime=1670747092 /* 2022-12-11T16:24:52.357050736+0800 */, st_mtime_nsec=357050736, st_ctime=1670747092 /* 2022-12-11T16:24:52.357050736+0800 */, st_ctime_nsec=357050736}) = 0 <0.000008>
19:48:58.538628 mmap(NULL, 106172832, PROT_READ, MAP_PRIVATE, 3, 0) = 0x2b84f62ae000 <0.000010>
19:48:58.538672 close(3) = 0 <0.000008>
19:48:58.538740 ioctl(1, TCGETS, {c_iflags=0x500, c_oflags=0x5, c_cflags=0xbf, c_lflags=0x8a3b, c_line=0, c_cc="\x03\x1c\x7f\x15\x04\x00\x01\x00\x11\x13\x1a\x00\x12\x0f\x17\x16\x00\x00\x00"}) = 0 <0.000010>
19:48:58.538805 ioctl(1, TIOCGWINSZ, {ws_row=41, ws_col=134, ws_xpixel=0, ws_ypixel=0}) = 0 <0.000009>
19:48:58.538870 openat(AT_FDCWD, ".", O_RDONLY|O_NONBLOCK|O_CLOEXEC|O_DIRECTORY) = 3 <0.000009>
19:48:58.538918 getdents(3, [{d_ino=406651408, d_off=10, d_reclen=24, d_name=".", d_type=DT_DIR}, {d_ino=268647009, d_off=12, d_reclen=24, d_name="..", d_type=DT_DIR}, {d_ino=406651409, d_off=18, d_reclen=56, d_name="openblas-0.3.2-1.sdl7.x86_64.rpm", d_type=DT_REG}, {d_ino=406651410, d_off=512, d_reclen=64, d_name="openblas-devel-0.3.3-2.el7.x86_64.rpm", d_type=DT_REG}], 32768) = 168 <0.000012>
19:48:58.538976 getdents(3, [], 32768) = 0 <0.000008>
19:48:58.539016 close(3) = 0 <0.000008>
19:48:58.539075 fstat(1, {st_dev=makedev(0, 12), st_ino=13, st_mode=S_IFCHR|0620, st_nlink=1, st_uid=0, st_gid=5, st_blksize=1024, st_blocks=0, st_rdev=makedev(136, 10), st_atime=1697197736 /* 2023-10-13T19:48:56.575893370+0800 */, st_atime_nsec=575893370, st_mtime=1697197736 /* 2023-10-13T19:48:56.575893370+0800 */, st_mtime_nsec=575893370, st_ctime=1697170974 /* 2023-10-13T12:22:54.575893370+0800 */, st_ctime_nsec=575893370}) = 0 <0.000008>
19:48:58.539128 mmap(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x2b84f4e23000 <0.000008>
19:48:58.539175 write(1, "openblas-0.3.2-1.sdl7.x86_64.rpm"..., 72openblas-0.3.2-1.sdl7.x86_64.rpm openblas-devel-0.3.3-2.el7.x86_64.rpm
) = 72 <0.000021>
19:48:58.539236 close(1) = 0 <0.000008>
19:48:58.539278 munmap(0x2b84f4e23000, 4096) = 0 <0.000011>
19:48:58.539320 close(2) = 0 <0.000008>
19:48:58.539374 exit_group(0) = ?
19:48:58.539499 +++ exited with 0 +++
-t 在每行输出的前面,显示秒级别的时间
-tt 在每行输出的前面,显示毫秒级别的时间
-ttt 在每行输出的前面,显示微秒级别的时间
-T 显示每次系统调用所花费的时间
-v 对于某些相关调用,把完整的环境变量,文件stat结构等打出来。
-f 跟踪目标进程,以及目标进程创建的所有子进程
-e 控制要跟踪的事件和跟踪行为,比如指定要跟踪的系统调用名称
-e trace=set 只跟踪指定的系统 调用.例如:-e trace=open,close,read,write表示只跟踪这四个系统调用.默认的为set=all.
-e trace=file 只跟踪有关文件操作的系统调用.
-e trace=process 只跟踪有关进程管理的系统调用.
-e trace=network 跟踪与网络有关的所有系统调用,比如socket/sendto/connect
-e trace=signal 跟踪所有与系统信号有关的系统调用,比如kill/sigaction
-e trace=ipc 跟踪所有与进程通讯有关的系统调用.
-e trace=desc 和文件描述符相关,比如write/read/select/epoll等
-o 把strace的输出单独写到指定的文件
-s 当系统调用的某个参数是字符串时,最多输出指定长度的内容,默认是32个字节
-p 指定要跟踪的进程pid, 要同时跟踪多个pid, 重复多次-p选项即可。
-d 输出strace关于标准错误的调试信息.
注意输出中标黄部分,表示系统调用耗时比较多的部分,耗时比较多的系统调用,都和/home/apps/clustertech/chess//ng/bin/目录关联,在该目录下面检索查找相关文件失效。
19:48:58.027099 open("/home/apps/clustertech/chess//ng/bin/libcap.so.2", O_RDONLY|O_CLOEXEC) = -1 ENOENT (No such file or directory) <0.157972>
如上,执行ls命令,打开/home/apps/clustertech/chess//ng/bin/libcap.so.2报错返回-1,显示文件不存在,耗时0.15s。
- 为何执行ls命令会去/home/apps/clustertech/chess/ng/bin目录查找文件。这个是因为ls命令为动态编译,依赖系统库文件才能正常运行,通过ldd命令检查其依赖库文件如下,标黄部分libcap.so.2正是其依赖的一个库文件。
[root@ln01 rpm]# ldd /usr/bin/ls
linux-vdso.so.1 => (0x00007ffe29dae000)
libselinux.so.1 => /lib64/libselinux.so.1 (0x00002ac2a303c000)
libcap.so.2 => /lib64/libcap.so.2 (0x00002ac2a3263000)
libacl.so.1 => /lib64/libacl.so.1 (0x00002ac2a3468000)
libc.so.6 => /lib64/libc.so.6 (0x00002ac2a3671000)
libpcre.so.1 => /lib64/libpcre.so.1 (0x00002ac2a3a3f000)
libdl.so.2 => /lib64/libdl.so.2 (0x00002ac2a3ca1000)
/lib64/ld-linux-x86-64.so.2 (0x00002ac2a2e18000)
libattr.so.1 => /lib64/libattr.so.1 (0x00002ac2a3ea5000)
libpthread.so.0 => /lib64/libpthread.so.0 (0x00002ac2a40aa000)
- ldd显示的ls命令依赖的库文件地址在 /lib64/目录,为何去/home/apps/clustertech/chess/ng/bin目录下检索。linux系统库文件检索路径,由LD_LIBRARY_PATH环境变量和/etc/ld.so.conf配置文件控制。而用户的LD_LIBRARY_PATH环境变量被修改为了/home/apps/clustertech/chess//ng/bin。
[root@ln01 profile.d]# env |grep -i LD
LD_LIBRARY_PATH=/home/apps/clustertech/chess//ng/bin
OLDPWD=/root
- 调查LD_LIBRARY_PATH环境变量修改,是在/etc/profile.d目录下的自定义脚本chess-ng.sh引入的。

chess-ng.sh 脚本中,配置语句export LD_LIBRARY_PATH=${LD_LIBRARY_PATH:+${LD_LIBRARY_PATH}:}$CHESS_NG_ROOT/bin
$CHESS_NG_ROOT/bin目录下并没有库文件,是否可以去掉该语句。
执行ls命令,strace显示在open $CHESS_NG_ROOT/bin目录下不存在的库文件时,为何耗时很多?/home目录为beegfs文件系统,和存储关联?
[root@ln01 ~]# df -Th |grep home
beegfs_nodev beegfs 1.9P 1.6P 342T 83% /home
pwd命令执行不卡顿,执行跟踪如下
[root@ln01 ~]# strace -tt -T -v -f pwd
19:03:53.348422 execve("/usr/bin/pwd", ["pwd"], ["XDG_SESSION_ID=1113", "HOSTNAME=ln01", "TERM=xterm-256color", "SHELL=/bin/bash", "HISTSIZE=1000", "SSH_CLIENT=10.20.2.88 62027 22", "SSH_TTY=/dev/pts/30", "CT_LSF_TOP=__LSF_TOP__", "JRE_HOME=/opt/clustertech/chess/"..., "JAVA_OPTS= -Dctcloud.top=/opt/cl"..., "USER=root", "LS_COLORS=rs=0:di=38;5;27:ln=38;"..., "JPDA_SUSPEND=n", "LD_LIBRARY_PATH=/home/apps/clust"..., "JPDA_ADDRESS=8000", "CT_GUI_TOP=/opt/clustertech/ches"..., "MAIL=/var/spool/mail/root", "PATH=/opt/clustertech/chess/repo"..., "DATABASE_DIR=/opt/clustertech/ch"..., "PWD=/root", "LANG=en_US.UTF-8", "MODULEPATH=/usr/share/Modules/mo"..., "LOADEDMODULES=", "HISTCONTROL=ignoredups", "SHLVL=1", "HOME=/root", "CHESS_NG_ROOT=/home/apps/cluster"..., "NO_VNC_PORT=6080", "LOGNAME=root", "XDG_DATA_DIRS=/root/.local/share"..., "SSH_CONNECTION=10.20.2.88 62027 "..., "MODULESHOME=/usr/share/Modules", "LESSOPEN=||/usr/bin/lesspipe.sh "..., "USER_HOME_DIR=/home", "VNC_TYPE=novnc", "XDG_RUNTIME_DIR=/run/user/0", "BASH_FUNC_module()=() { eval `/"..., "_=/usr/bin/strace"]) = 0 <0.000240>
19:03:53.348914 brk(NULL) = 0xf22000 <0.000012>
19:03:53.349006 mmap(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x2acf4cc8a000 <0.000013>
19:03:53.349095 access("/etc/ld.so.preload", R_OK) = -1 ENOENT (No such file or directory) <0.000023>
19:03:53.349239 open("/home/apps/clustertech/chess//ng/bin/tls/x86_64/libc.so.6", O_RDONLY|O_CLOEXEC) = -1 ENOENT (No such file or directory) <0.095847>
19:03:53.445157 stat("/home/apps/clustertech/chess//ng/bin/tls/x86_64", 0x7ffe8bba7c10) = -1 ENOENT (No such file or directory) <0.097284>
19:03:53.542509 open("/home/apps/clustertech/chess//ng/bin/tls/libc.so.6", O_RDONLY|O_CLOEXEC) = -1 ENOENT (No such file or directory) <0.100245>
19:03:53.642831 stat("/home/apps/clustertech/chess//ng/bin/tls", 0x7ffe8bba7c10) = -1 ENOENT (No such file or directory) <0.077023>
19:03:53.719911 open("/home/apps/clustertech/chess//ng/bin/x86_64/libc.so.6", O_RDONLY|O_CLOEXEC) = -1 ENOENT (No such file or directory) <0.041933>
19:03:53.761897 stat("/home/apps/clustertech/chess//ng/bin/x86_64", 0x7ffe8bba7c10) = -1 ENOENT (No such file or directory) <0.110116>
19:03:53.872064 open("/home/apps/clustertech/chess//ng/bin/libc.so.6", O_RDONLY|O_CLOEXEC) = -1 ENOENT (No such file or directory) <0.044539>
19:03:53.916653 stat("/home/apps/clustertech/chess//ng/bin", {st_dev=makedev(0, 42), st_ino=6973949742946233390, st_mode=S_IFDIR|0775, st_nlink=2, st_uid=0, st_gid=0, st_blksize=524288, st_blocks=1, st_size=20, st_atime=1678415586 /* 2023-03-10T10:33:06+0800 */, st_atime_nsec=0, st_mtime=1689902605 /* 2023-07-21T09:23:25+0800 */, st_mtime_nsec=0, st_ctime=1689902605 /* 2023-07-21T09:23:25+0800 */, st_ctime_nsec=0}) = 0 <0.117298>
19:03:54.034030 open("/etc/ld.so.cache", O_RDONLY|O_CLOEXEC) = 3 <0.000012>
19:03:54.034105 fstat(3, {st_dev=makedev(8, 84), st_ino=153331770, st_mode=S_IFREG|0644, st_nlink=1, st_uid=0, st_gid=0, st_blksize=4096, st_blocks=208, st_size=105858, st_atime=1697366401 /* 2023-10-15T18:40:01.109574700+0800 */, st_atime_nsec=109574700, st_mtime=1671532686 /* 2022-12-20T18:38:06.042512557+0800 */, st_mtime_nsec=42512557, st_ctime=1671532686 /* 2022-12-20T18:38:06.042512557+0800 */, st_ctime_nsec=42512557}) = 0 <0.000011>
19:03:54.034171 mmap(NULL, 105858, PROT_READ, MAP_PRIVATE, 3, 0) = 0x2acf4cc8b000 <0.000011>
19:03:54.034230 close(3) = 0 <0.000008>
19:03:54.034281 open("/lib64/libc.so.6", O_RDONLY|O_CLOEXEC) = 3 <0.000013>
19:03:54.034328 read(3, "\177ELF\2\1\1\3\0\0\0\0\0\0\0\0\3\0>\0\1\0\0\0`&\2\0\0\0\0\0"..., 832) = 832 <0.000009>
19:03:54.034374 fstat(3, {st_dev=makedev(8, 84), st_ino=186754, st_mode=S_IFREG|0755, st_nlink=1, st_uid=0, st_gid=0, st_blksize=4096, st_blocks=4216, st_size=2156272, st_atime=1697358601 /* 2023-10-15T16:30:01.035128709+0800 */, st_atime_nsec=35128709, st_mtime=1601432063 /* 2020-09-30T10:14:23+0800 */, st_mtime_nsec=0, st_ctime=1670747090 /* 2022-12-11T16:24:50.516050624+0800 */, st_ctime_nsec=516050624}) = 0 <0.000008>
19:03:54.034434 mmap(NULL, 3985920, PROT_READ|PROT_EXEC, MAP_PRIVATE|MAP_DENYWRITE, 3, 0) = 0x2acf4ce8c000 <0.000011>
19:03:54.034478 mprotect(0x2acf4d050000, 2093056, PROT_NONE) = 0 <0.000013>
19:03:54.034524 mmap(0x2acf4d24f000, 24576, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3, 0x1c3000) = 0x2acf4d24f000 <0.000014>
19:03:54.034576 mmap(0x2acf4d255000, 16896, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_ANONYMOUS, -1, 0) = 0x2acf4d255000 <0.000010>
19:03:54.034624 close(3) = 0 <0.000007>
19:03:54.034676 mmap(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x2acf4cca5000 <0.000008>
19:03:54.034721 mmap(NULL, 8192, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x2acf4cca6000 <0.000008>
19:03:54.034766 arch_prctl(ARCH_SET_FS, 0x2acf4cca5b80) = 0 <0.000008>
19:03:54.034885 mprotect(0x2acf4d24f000, 16384, PROT_READ) = 0 <0.000010>
19:03:54.034933 mprotect(0x606000, 4096, PROT_READ) = 0 <0.000010>
19:03:54.034977 mprotect(0x2acf4ce89000, 4096, PROT_READ) = 0 <0.000012>
19:03:54.035020 munmap(0x2acf4cc8b000, 105858) = 0 <0.000019>
19:03:54.035147 brk(NULL) = 0xf22000 <0.000008>
19:03:54.035188 brk(0xf43000) = 0xf43000 <0.000009>
19:03:54.035231 brk(NULL) = 0xf43000 <0.000008>
19:03:54.035278 open("/usr/lib/locale/locale-archive", O_RDONLY|O_CLOEXEC) = 3 <0.000010>
19:03:54.035323 fstat(3, {st_dev=makedev(8, 84), st_ino=187086, st_mode=S_IFREG|0644, st_nlink=1, st_uid=0, st_gid=0, st_blksize=4096, st_blocks=207312, st_size=106172832, st_atime=1697361601 /* 2023-10-15T17:20:01.127241737+0800 */, st_atime_nsec=127241737, st_mtime=1670747092 /* 2022-12-11T16:24:52.357050736+0800 */, st_mtime_nsec=357050736, st_ctime=1670747092 /* 2022-12-11T16:24:52.357050736+0800 */, st_ctime_nsec=357050736}) = 0 <0.000008>
19:03:54.035377 mmap(NULL, 106172832, PROT_READ, MAP_PRIVATE, 3, 0) = 0x2acf4d25a000 <0.000009>
19:03:54.035420 close(3) = 0 <0.000008>
19:03:54.035489 getcwd("/root", 4096) = 6 <0.000009>
19:03:54.035540 fstat(1, {st_dev=makedev(0, 12), st_ino=33, st_mode=S_IFCHR|0620, st_nlink=1, st_uid=0, st_gid=5, st_blksize=1024, st_blocks=0, st_rdev=makedev(136, 30), st_atime=1697367824 /* 2023-10-15T19:03:44.933094389+0800 */, st_atime_nsec=933094389, st_mtime=1697367832 /* 2023-10-15T19:03:52.933094389+0800 */, st_mtime_nsec=933094389, st_ctime=1697367702 /* 2023-10-15T19:01:42.933094389+0800 */, st_ctime_nsec=933094389}) = 0 <0.000008>
19:03:54.035595 mmap(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x2acf4cc8b000 <0.000008>
19:03:54.035640 write(1, "/root\n", 6/root
) = 6 <0.000023>
19:03:54.035706 close(1) = 0 <0.000008>
19:03:54.035759 munmap(0x2acf4cc8b000, 4096) = 0 <0.000013>
19:03:54.035814 close(2) = 0 <0.000008>
19:03:54.035860 exit_group(0) = ?
19:03:54.035966 +++ exited with 0 +++
[root@ln01 ~]#
[root@ln01 ~]# pwd
/root
[root@ln01 ~]# ldd /usr/bin/pwd
linux-vdso.so.1 => (0x00007ffd9cbcc000)
libc.so.6 => /lib64/libc.so.6 (0x00002b7b0e113000)
/lib64/ld-linux-x86-64.so.2 (0x00002b7b0deef000)
临时去掉 LD_LIBRARY_PATH环境变量
unset LD_LIBRARY_PATH
ls
经测试验证,在终端通过unset LD_LIBRARY_PATH临时去掉该环境变量后,命令执行暂未感知卡顿。
总结
从现象来看,是库路径环境变量配置,指向了一个错误的路径引用,该路径的文件系统并非常见的ext4或者xfs文件系统,而是HPC常用的BeeGFS文件系统,该文件系统为并行文件系统,怀疑是该文件系统层面根因导致的上述问题,潜在可能性能层面有异常,待进一步验证。

被折叠的 条评论
为什么被折叠?



