2.1 环境检查
环境检查
2.1.1 操作系统版本检查
目前内部测试兼容如下版本centos7.5、centos7.6、redhat 7.6,查看操作系统版本命令如下
# cat /etc/redhat-release
CentOS Linux release 7.5.1804 (Core)
2.1.2 查看防火墙、Selinux状态,确保关闭
正常情况下,数据中心内部防火墙、Selinux是关闭的,有特殊安全需求,要确认开放哪些端口,进一步进行处理。
# getenforce
Disabled
# grep 'SELINUX=disabled' /etc/sysconfig/selinux
SELINUX=disabled
# systemctl is-enabled firewalld
disabled
# systemctl status firewalld
● firewalld.service - firewalld - dynamic firewall daemon
Loaded: loaded (/usr/lib/systemd/system/firewalld.service; disabled; vendor preset: enabled)
Active: inactive (dead)
Docs: man:firewalld(1)
2.1.3 检查所有主机的/etc/hosts一致性
确保域名和IP映射正确,通过域名能正常解析IP,并能正常通讯。
# getenforce
Disabled
# grep 'SELINUX=disabled' /etc/sysconfig/selinux
SELINUX=disabled
# systemctl is-enabled firewalld
disabled
# systemctl status firewalld
● firewalld.service - firewalld - dynamic firewall daemon
Loaded: loaded (/usr/lib/systemd/system/firewalld.service; disabled; vendor preset: enabled)
Active: inactive (dead)
Docs: man:firewalld(1)
2.1.4 集群内部主机时钟同步检查
当时主机的时钟同步情况,使用ntpd服务做时间同步时,使用ntpq -p命令查看;使用chronyd服务做时间同步时,通过chronyc sources -v命令查看。命令输出中有*标识的时钟服务器表示与此服务器完成时间的同步。如果命令执行报错,表示此服务器未做时间同步相关的配置,需要配置时间同步后,再进行集群的安装。
# ntpq -p
remote refid st t when poll reach delay offset jitter
==============================================================================
+common-dns0-vip 59.111.239.151 4 u 601 1024 377 2.451 -0.026 0.558
*common-dns1-vip 59.111.239.150 4 u 893 1024 377 2.375 -0.187 0.294
10.160.247.9 .STEP. 16 u - 1024 0 0.000 0.000 0.000
+sa-file-server. 59.111.239.150 4 u 247 1024 377 29.942 -0.038 0.229
puppet.bjyz.163 .STEP. 16 u - 1024 0 0.000 0.000 0.000
+tunnel-t-vip.bj 59.111.239.150 4 u 351 1024 377 29.976 -0.030 0.228
epay-internalpr .STEP. 16 u - 1024 0 0.000 0.000 0.000
epay-internalpr .STEP. 16 u - 1024 0 0.000 0.000 0.000
10.170.206.253 .STEP. 16 u - 1024 0 0.000 0.000 0.000
10.170.206.254 .STEP. 16 u - 1024 0 0.000 0.000 0.000
10.130.18.253 .STEP. 16 u - 1024 0 0.000 0.000 0.000
10.130.18.254 .STEP. 16 u - 1024 0 0.000 0.000 0.000
-tunnel-t-vip.bj 59.111.239.150 4 u 448 1024 377 33.611 -1.873 0.243
# chronyc sources -v
210 Number of sources = 4
.-- Source mode '^' = server, '=' = peer, '#' = local clock.
/ .- Source state '*' = current synced, '+' = combined , '-' = not combined,
| / '?' = unreachable, 'x' = time may be in error, '~' = time too variable.
|| .- xxxx [ yyyy ] +/- zzzz
|| Reachability register (octal) -. | xxxx = adjusted offset,
|| Log2(Polling interval) --. | | yyyy = measured offset,
|| \ | | zzzz = estimated error.
|| | | \
MS Name/IP address Stratum Poll Reach LastRx Last sample
===============================================================================
^* 139.199.215.251 2 10 377 95 +1234us[+1348us] +/- 24ms
^- 47.241.41.246 2 10 174 60m +3091us[+3384us] +/- 100ms
^- fluffykins.positive-inte> 2 10 377 817 +8863us[+8975us] +/- 150ms
^- ntp.wdc1.us.leaseweb.net 2 10 277 657 +6772us[+6884us] +/- 231m
2.1.5 检查/etc/fstab文件、数据盘挂载路径、容量、inode数
检查/etc/fstab文件配置正确,没有遗漏数据盘对应的条目,通过执行mount -a验证/etc/fstab文件的正常性,没有输出信息表示/etc/fstab文件编写正常。通过df -TH验证每个数据挂载的路径正确 ,且每个数据盘的容量是预期正常的;df -THi用于检查数据盘格式化后inode数量是否异常,正常4TB的磁盘格式化后是245M左右作为参考,如果在1M左右是异常的,需要重新正确格式化让inode量正常。
# cat /etc/fstab
UUID=6d4f43a4-2167-44f7-8f42-0cf5e7f5ef50 / ext4 defaults 1 1
UUID=c0f30fed-8665-4b09-a325-95b7e1804276 /home ext4 defaults 1 2
UUID=40e44a89-81fb-4460-abbf-d7dca7f634b6 /tmp ext4 defaults 1 2
UUID=ea394126-570c-4111-b383-310346255907 /var ext4 defaults 1 2
UUID=985d3e1b-2ff5-4599-8e9e-35477fa0f464 swap swap defaults 0 0
UUID=17a3c7b6-7a1f-408a-a481-ce38d87aad0f /mnt/dfs/0 ext4 rw,nofail,noatime,nodiratime,nobarrier,data=writeback,nodev,inode_readahead_blks=128 0 0
UUID=d97b30f2-394e-4191-9fba-4bdb8a05d82d /mnt/dfs/1 ext4 rw,nofail,noatime,nodiratime,nobarrier,data=writeback,nodev,inode_readahead_blks=128 0 0
UUID=ff536fd4-721d-4d53-af84-7b3c96c979b2 /mnt/dfs/2 ext4 rw,nofail,noatime,nodiratime,nobarrier,data=writeback,nodev,inode_readahead_blks=128 0 0
UUID=b3fd3767-389c-45ab-8008-84a1cbcc0763 /mnt/dfs/3 ext4 rw,nofail,noatime,nodiratime,nobarrier,data=writeback,nodev,inode_readahead_blks=128 0 0
UUID=8c33fd5c-23dc-4335-b6eb-c326aba23f59 /mnt/dfs/4 ext4 rw,nofail,noatime,nodiratime,nobarrier,data=writeback,nodev,inode_readahead_blks=128 0 0
UUID=b1f6cff1-bafe-4d64-acbd-4f4e815a8427 /mnt/dfs/5 ext4 rw,nofail,noatime,nodiratime,nobarrier,data=writeback,nodev,inode_readahead_blks=128 0 0
UUID=f5e3c14b-c383-4d06-853d-4e76b3f748fe /mnt/dfs/6 ext4 rw,nofail,noatime,nodiratime,nobarrier,data=writeback,nodev,inode_readahead_blks=128 0 0
UUID=7ea23cf0-f480-413e-a6db-821c7b4b4e2c /mnt/dfs/7 ext4 rw,nofail,noatime,nodiratime,nobarrier,data=writeback,nodev,inode_readahead_blks=128 0 0
UUID=45392e01-ceba-4b74-b99d-a27a1b1b8156 /mnt/dfs/8 ext4 rw,nofail,noatime,nodiratime,nobarrier,data=writeback,nodev,inode_readahead_blks=128 0 0
UUID=6935be8b-3beb-41a5-8c92-d985651eabd6 /mnt/dfs/9 ext4 rw,nofail,noatime,nodiratime,nobarrier,data=writeback,nodev,inode_readahead_blks=128 0 0
UUID=1bcb53b2-6587-492b-9be9-1f7a54f0bfb4 /mnt/dfs/10 ext4 rw,nofail,noatime,nodiratime,nobarrier,data=writeback,nodev,inode_readahead_blks=128 0 0
UUID=dcd91347-d45c-40fc-bdb2-7adf11ed3973 /mnt/dfs/11 ext4 rw,nofail,noatime,nodiratime,nobarrier,data=writeback,nodev,inode_readahead_blks=128 0 0
# mount -a
# df -THi
Filesystem Type Inodes IUsed IFree IUse% Mounted on
/dev/sda1 ext4 2.1M 113k 2.0M 6% /
/dev/sdd1 ext4 245M 224k 244M 1% /mnt/dfs/2
/dev/sdf1 ext4 245M 552k 244M 1% /mnt/dfs/4
/dev/sdb1 ext4 245M 212k 244M 1% /mnt/dfs/0
/dev/sde1 ext4 245M 237k 244M 1% /mnt/dfs/3
/dev/sdc1 ext4 245M 206k 244M 1% /mnt/dfs/1
/dev/sdl1 ext4 245M 146k 245M 1% /mnt/dfs/10
/dev/sdk1 ext4 245M 146k 245M 1% /mnt/dfs/9
/dev/sdh1 ext4 245M 146k 245M 1% /mnt/dfs/6
/dev/sdm1 ext4 245M 146k 245M 1% /mnt/dfs/11
/dev/sda3 ext4 2.1M 35 2.1M 1% /tmp
/dev/sda5 ext4 2.1M 15k 2.1M 1% /var
/dev/sda6 ext4 29M 25k 29M 1% /home
/dev/sdj1 ext4 245M 146k 245M 1% /mnt/dfs/8
/dev/sdg1 ext4 245M 146k 245M 1% /mnt/dfs/5
/dev/sdi1 ext4 245M 146k 245M 1% /mnt/dfs/7
# df -TH
Filesystem Type Size Used Avail Use% Mounted on
/dev/sda1 ext4 34G 22G 11G 67% /
/dev/sdd1 ext4 4.0T 126G 3.9T 4% /mnt/dfs/2
/dev/sdf1 ext4 4.0T 155G 3.8T 4% /mnt/dfs/4
/dev/sdb1 ext4 4.0T 293G 3.7T 8% /mnt/dfs/0
/dev/sde1 ext4 4.0T 159G 3.8T 5% /mnt/dfs/3
/dev/sdc1 ext4 4.0T 212G 3.8T 6% /mnt/dfs/1
/dev/sdl1 ext4 4.0T 79G 3.9T 2% /mnt/dfs/10
/dev/sdk1 ext4 4.0T 79G 3.9T 2% /mnt/dfs/9
/dev/sdh1 ext4 4.0T 79G 3.9T 2% /mnt/dfs/6
/dev/sdm1 ext4 4.0T 78G 3.9T 2% /mnt/dfs/11
/dev/sda3 ext4 34G 51M 32G 1% /tmp
/dev/sda5 ext4 34G 14G 19G 44% /var
/dev/sda6 ext4 455G 8.1G 424G 2% /home
/dev/sdj1 ext4 4.0T 78G 3.9T 2% /mnt/dfs/8
/dev/sdg1 ext4 4.0T 79G 3.9T 3% /mnt/dfs/5
/dev/sdi1 ext4 4.0T 79G 3.9T 3% /mnt/dfs/7
2.1.6 检查主机内存大小,及swap分区分配情况
检查主机的内存容量是否正常,一般大数据节点内存不低于125GB;swap交换分区一般不要超过4GB,有条件的话,直接永久禁用swap分区。客户现场由于物理内存不足,组件超额使用物理内存,swap配置32GB,实际内存不足时,swap空间基本耗尽,经常导致服务器长时间hung住,没有响应。
# free -h
total used free shared buff/cache available
Mem: 125G 110G 438M 4.1G 14G 8.0G
Swap: 4G 0G 0G
2.1.6 资源限制检查
默认linux系统给用户分配的系统资源较小,如打开的文件句柄数为1024,最大的进程数为4096,在大数据场景远远不够。修改完/etc/security/limits.d/20-nproc.conf后,用户重启登录shell,通过ulimit -a验证资源限制修改是否生效。
# cat /etc/security/limits.d/20-nproc.conf
# Default limit for number of user's processes to prevent
# accidental fork bombs.
# See rhbz #432903 for reasoning.
* soft nproc 131072
* soft nofile 655360
* soft memlock unlimited
root soft nproc unlimited
# ulimit -a
core file size (blocks, -c) unlimited
data seg size (kbytes, -d) unlimited
scheduling priority (-e) 0
file size (blocks, -f) unlimited
pending signals (-i) 514519
max locked memory (kbytes, -l) unlimited
max memory size (kbytes, -m) unlimited
open files (-n) 1048576
pipe size (512 bytes, -p) 8
POSIX message queues (bytes, -q) 819200
real-time priority (-r) 0
stack size (kbytes, -s) 8192
cpu time (seconds, -t) unlimited
max user processes (-u) 1048576
virtual memory (kbytes, -v) unlimited
file locks (-x) unlimited
2.1.7 检查内核参数设置
执行以下3条命令,确保输出信息一致。
# cat /sys/kernel/mm/transparent_hugepage/defrag
always madvise [never]
# cat "/sys/kernel/mm/transparent_hugepage/enabled"
always madvise [never]
# sysctl vm.swappiness
vm.swappiness = 1
2.1.8 easyops用户及权限设置
ssh easyops@netease-nn1 “sudo ifconfig”执行异常,说明easyops用户没有正常配置sudo权限 。
#ssh easyops@netease-nn1 "sudo ip a"
easyops@netease-nn1's password:
我们信任您已经从系统管理员那里了解了日常注意事项。
总结起来无外乎这三点:
#1) 尊重别人的隐私。
#2) 输入前要先考虑(后果和风险)。
#3) 权力越大,责任越大。
sudo: 没有终端存在,且未指定 askpass 程序
以上内容对您是否有帮助?