搭建软件版本
- Docker:20.10.0
- k8s:1.23.6
初始操作
shell
systemctl stop firewalld
systemctl disable firewalld
sed -i 's/enforcing/disabled/' /etc/selinux/config
setenforce
swapoff -a
sed -ri 's/.*swap.*/#&/' /etc/fstab
# set static host mapping
cat >> /etc/hosts << EOF
10.1.72.58 master
10.1.72.12 node1
EOF
# 将桥接IPv4流量传递到iptables链
cat > /etc/sysctl.d/k8s.conf << EOF
net.bridge.bridge-nf-call-ip6tables = 1
net.bridge.bridge-nf-call-iptables = 1
EOF
sysctl --system
yum install ntpdate -y
ntpdate time.windows.com
# pid
echo "kernel.pid_max=4194304" | tee -a /etc/sysctl.conf && sysctl -w kernel.pid_max=4194304 && sysctl -p
# limits
ulimit -u 1000000
sysctl -w kernel.pid_max=8388608
shell
set -e
# 1. 关闭防火墙(Ubuntu 使用 ufw)
systemctl stop ufw
systemctl disable ufw
# 2. 关闭 SELinux(Ubuntu 默认没有 SELinux,可以跳过)
# 但如果你装了 SELinux,可以这样禁用(一般不需要)
# apt install selinux-utils -y
# setenforce 0
# sed -i 's/^SELINUX=enforcing/SELINUX=disabled/' /etc/selinux/config
# 3. 关闭 swap
swapoff -a
sed -ri 's/^\s*(.+\s+)?swap\s+(\S+\s+){2}/#\0/' /etc/fstab
# 4. 设置 /etc/hosts 静态解析
cat >> /etc/hosts << EOF
172.16.4.121 master
172.16.4.122 node1
172.16.4.123 node3
EOF
# 5. 开启 iptables 处理桥接 IPv4 和 IPv6 流量
cat > /etc/sysctl.d/k8s.conf << EOF
net.bridge.bridge-nf-call-ip6tables = 1
net.bridge.bridge-nf-call-iptables = 1
net.ipv4.ip_forward = 1
EOF
modprobe br_netfilter
sysctl --system
# 6. 时间同步(Ubuntu 推荐 systemd-timesyncd 或 chrony)
timedatectl set-timezone Asia/Shanghai
timedatectl set-ntp true
# 或者手动使用 ntpdate:
apt update && apt install ntpdate -y
ntpdate time.windows.com
# 7. 设置 PID 最大值
echo "kernel.pid_max=4194304" | tee -a /etc/sysctl.conf
sysctl -w kernel.pid_max=4194304
sysctl -p
# 8. 临时修改用户进程数限制(只对当前会话生效)
ulimit -u 1000000
# 永久修改 limits(推荐)
cat >> /etc/security/limits.conf << EOF
* soft nproc 1000000
* hard nproc 1000000
EOF
安装基础软件
shell
curl -o /etc/yum.repos.d/CentOS-Base.repo http://mirrors.aliyun.com/repo/Centos-7.repo
cat > /etc/yum.repos.d/kubernetes.repo << EOF
[kubernetes]
name=Kubernetes
baseurl=https://mirrors.aliyun.com/kubernetes/yum/repos/kubernetes-el7-x86_64
enabled=1
gpgcheck=0
repo_gpgcheck=0
gpgkey=https://mirrors.aliyun.com/kubernetes/yum/doc/yum-key.gpg
https://mirrors.aliyun.com/kubernetes/yum/doc/rpm-package-key.gpg
EOF
yum clean expire-cache
yum makecache
shell
sudo yum install -y yum-utils
sudo yum-config-manager --add-repo http://mirrors.aliyun.com/docker-ce/linux/centos/docker-ce.repo
yum install -y docker-ce-20.10.0-3.el7 docker-ce-cli-20.10.0-3.el7 containerd.io
systemctl enable docker --now
json
{
"exec-opts": ["native.cgroupdriver=systemd"]
}
shell
systemctl daemon-reload
systemctl restart docker
shell
yum install -y kubelet-1.23.6 kubeadm-1.23.6 kubectl-1.23.6
systemctl enable kubelet --now
shell
# ubuntu 24 没有20的docker,可以通过 dpkg -i 安装
# 添加 Kubernetes 阿里云 APT 源
cat <<EOF | tee /etc/apt/sources.list.d/kubernetes.list
deb https://mirrors.aliyun.com/kubernetes/apt kubernetes-xenial main
EOF
# 添加 GPG 密钥
curl -s https://mirrors.aliyun.com/kubernetes/apt/doc/apt-key.gpg | gpg --dearmor | tee /usr/share/keyrings/kubernetes-archive-keyring.gpg >/dev/null
# 配置签名
sed -i 's|^deb |deb [signed-by=/usr/share/keyrings/kubernetes-archive-keyring.gpg] |' /etc/apt/sources.list.d/kubernetes.list
# 更新软件包索引
apt update
apt install -y kubelet=1.23.6-00 kubeadm=1.23.6-00 kubectl=1.23.6-00
# 防止被升级
apt-mark hold kubelet kubeadm kubectl
systemctl enable kubelet --now
shell
# 写入配置文件
sudo tee /etc/docker/daemon.json > /dev/null <<EOF
{
"default-runtime": "nvidia",
"runtimes": {
"nvidia": {
"path": "nvidia-container-runtime",
"runtimeArgs": []
}
},
"exec-opts": [
"native.cgroupdriver=systemd"
],
"graph": "/iflytek/docker/",
"insecure-registries": [
"172.29.101.173:8888"
],
"log-driver": "json-file",
"log-opts": {
"max-file": "3",
"max-size": "200m"
}
}
EOF
# 重新加载 systemd 并重启 Docker 服务
sudo systemctl daemon-reexec
sudo systemctl restart docker
# 验证配置是否生效
docker info | grep -i cgroup
初始化Master
yaml
cat <<EOF > kubeadm-config.yaml
apiVersion: kubeadm.k8s.io/v1beta3
kind: InitConfiguration
nodeRegistration:
kubeletExtraArgs:
root-dir: "/iflytek/kubelet"
localAPIEndpoint:
advertiseAddress: 172.29.101.173
bindPort: 6443
---
apiVersion: kubeadm.k8s.io/v1beta3
kind: ClusterConfiguration
kubernetesVersion: v1.23.6
imageRepository: registry.aliyuncs.com/google_containers
networking:
podSubnet: 10.244.0.0/16
serviceSubnet: 10.96.0.0/12
EOF
shell
kubeadm init --config kubeadm-config.yaml
shell
mkdir -p $HOME/.kube
sudo cp -i /etc/kubernetes/admin.conf $HOME/.kube/config
sudo chown $(id -u):$(id -g) $HOME/.kube/config
shell
[root@k8s-master ~]# kubectl get node
NAME STATUS ROLES AGE VERSION
k8s-master NotReady control-plane,master 3m28s v1.23.6
Node加入
node加入需要使用token,安装完成后会输出在终端,如果不小心清除可以用以下下方式查看
shell
# 创建token
kubeadm token create
# 如果token没有过期,可以直接list查看token
kubeadm token list
kubeadm token create --print-join-command
shell
kubeadm join 192.168.52.134:6443 --token mppfaq.kl14qfww5fezbze0 \
--discovery-token-ca-cert-hash sha256:f1b00c093f55bc2a2f61d8bd10a80df7609dd99e05f90d8817c6d4c73f420fb8
# --discovery-token-ca-cert-hash 获取
openssl x509 -pubkey -in /etc/kubernetes/pki/ca.crt | openssl rsa -pubin -outform der 2>/dev/null | \
openssl dgst --sha256 -hex | sed 's/^.* //'
shell
[root@k8s-master ~]# kubectl get node
NAME STATUS ROLES AGE VERSION
k8s-master NotReady control-plane,master 20m v1.23.6
k8s-node1 NotReady <none> 50s v1.23.6
k8s-node2 NotReady <none> 4s v1.23.6
初始化软件
shell
kubeadm config images pull --kubernetes-version=v1.23.6 --image-repository k8s-gcr.m.daocloud.io
kubeadm config images pull --kubernetes-version=v1.23.6 --image-repository registry.cn-hangzhou.aliyuncs.com/google_containers
部署CNI网络插件
Master:
shell
curl https://docs.projectcalico.org/manifests/calico.yaml -O
下载完成后需要替换配置
shell
# 上方初始化的IP
CALICO_IPV4POOL_CIDR
shell
kubectl apply -f calico.yaml
构建过程中可能会因为网络问题慢,可以通过一下命令去观察状态
shell
kubectl get po -n kube-system
kubectl get no -n kube-system
kubectl describe po-xxxx -n kube-sysetem
# 手动pull image
docker pull calico/kube-controllers:v3.25.0
docker pull calico/cni:v3.25.0
docker pull calico/node:v3.25.0
测试
shell
kubectl create deployment nginx --image=nginx
shell
kubectl expose deployment nginx --port=80 --type=NodePort
shell
kubectl get pod,svc
节点
shell
scp /etc/kubernetes/admin.conf root@k8s-node1:/etc/kubernetes
shell
echo "export KUBECONFIG=/etc/kubernetes/admin.conf">> ~/.bash_profile
source ~/.bash_profile
单机部署需要去除主节点污点 taint
shell
kubectl taint nodes --all node-role.kubernetes.io/master-
# node/master untainted
dashboard
shell
wget https://raw.githubusercontent.com/kubernetes/dashboard/v2.7.0/aio/deploy/recommended.yaml
#默认Dashboard只能集群内部访问,修改Service为NodePort类型,暴露到外部
vim recommended.yaml
# 指定类型,如果没有制定类型那么默认为 ClusterIP, ClusterIP 是无法在集群外部访问的,
# 所以我们需要修改一下这个Service的type NodePort
shell
# 然后k8s的主节点当中去执行:
kubectl apply -f recommended.yaml
# 监控仪表盘是否安装完成
watch kubectl get all -o wide -n kubernetes-dashboard
# 访问 Dashboard 用户界面
# 查看 kubernetes-dashboard Service暴露的端口:
kubectl get svc -n kubernetes-dashboard -o wide
shell
kubectl -n kubernetes-dashboard create token kubernetes-dashboard
# 创建用户
kubectl create serviceaccount dashboard-admin -n kubernetes-dashboard
# 用户授权
kubectl create clusterrolebinding dashboard-admin --clusterrole=cluster-admin --serviceaccount=kubernetes-dashboard:dashboard-admin
# 获取用户Token
kubectl create token dashboard-admin -n kubernetes-dashboard
# 使用输出的token登录Dashboard。
yaml
apiVersion: v1
kind: ServiceAccount
metadata:
name: dashboard-admin
namespace: kube-system
---
kind: ClusterRoleBinding
apiVersion: rbac.authorization.k8s.io/v1
metadata:
name: dashboard-admin
subjects:
- kind: ServiceAccount
name: dashboard-admin
namespace: kube-system
roleRef:
kind: ClusterRole
name: cluster-admin
apiGroup: rbac.authorization.k8s.io
---
apiVersion: v1
kind: Secret
metadata:
name: dashboard-admin-token
annotations:
kubernetes.io/service-account.name: "dashboard-admin"
type: kubernetes.io/service-account-token
shell
kubectl get secret -n kube-system
# 这个token也能用于和Java client
kubectl get secret dashboard-admin-token-xxxx -n kube-system -o jsonpath='{.data.token}' | base64 --decode
Token
yaml
apiVersion: v1
kind: ServiceAccount
metadata:
name: dashboard-admin
namespace: kube-system
---
kind: ClusterRoleBinding
apiVersion: rbac.authorization.k8s.io/v1
metadata:
name: dashboard-admin
subjects:
- kind: ServiceAccount
name: dashboard-admin
namespace: kube-system
roleRef:
kind: ClusterRole
name: cluster-admin
apiGroup: rbac.authorization.k8s.io
shell
kubectl get secret -n kube-system|grep admin
# dashboard-admin-token-xxxx kubernetes.io/service-account-token 3 2m
kubectl describe secret dashboard-admin-token-xxxx -n kube-system
yaml
apiVersion: v1
kind: ServiceAccount
metadata:
name: ns-admin
namespace: my-namespace
---
apiVersion: rbac.authorization.k8s.io/v1
kind: Role
metadata:
name: ns-admin
namespace: my-namespace
rules:
- apiGroups: [""] # 核心资源(如 pods, services)
resources: ["pods", "services", "configmaps", "secrets"]
verbs: ["get", "list", "watch", "create", "update", "patch", "delete"]
- apiGroups: ["apps"] # deployment 等
resources: ["deployments", "replicasets", "statefulsets"]
verbs: ["get", "list", "watch", "create", "update", "patch", "delete"]
- apiGroups: ["batch"] # job, cronjob
resources: ["jobs", "cronjobs"]
verbs: ["get", "list", "watch", "create", "update", "patch", "delete"]
---
apiVersion: rbac.authorization.k8s.io/v1
kind: RoleBinding
metadata:
name: ns-admin
namespace: my-namespace
subjects:
- kind: ServiceAccount
name: ns-admin
namespace: my-namespace
roleRef:
kind: Role
name: ns-admin
apiGroup: rbac.authorization.k8s.io
预留空间
K8s集群搭建完成后需要再每个节点保留相应的资源,用于基础环境运行,防止集群雪崩
yaml
# 在每个节点的 中指定不同的值
systemReserved:
cpu: "1" # 针对节点A
memory: "1Gi"
ephemeral-storage: "50Gi"
kubeReserved:
cpu: "1"
memory: "1Gi"
ephemeral-storage: "5Gi"
evictionHard:
memory.available: "200Mi"
nodefs.available: "10%"
nodefs.inodesFree: "5%"
imagefs.available: "15%"
磁盘管理
local-path
shell
kubectl apply -f https://raw.githubusercontent.com/rancher/local-path-provisioner/v0.0.29/deploy/local-path-storage.yaml
shell
cat > local-path-v2.yaml << EOF
apiVersion: storage.k8s.io/v1
kind: StorageClass
metadata:
annotations:
kubectl.kubernetes.io/last-applied-configuration: |
{"apiVersion":"storage.k8s.io/v1","kind":"StorageClass","metadata":{"annotations":{},"name":"local-path-v2"},"parameters":{"blockCleanerCommand":"/scripts/delete-block.sh","blockCleanerCommandRetries":"5","blockCleanerCommandTimeout":"1m","mountPath":"/iflytek/local-path-v2"},"provisioner":"rancher.io/local-path"}
name: local-path-v2
parameters:
blockCleanerCommand: /scripts/delete-block.sh
blockCleanerCommandRetries: "5"
blockCleanerCommandTimeout: 1m
mountPath: /iflytek/local-path-v2
provisioner: rancher.io/local-path
reclaimPolicy: Delete
volumeBindingMode: Immediate
EOF
cat > local-path.yaml << EOF
apiVersion: storage.k8s.io/v1
kind: StorageClass
metadata:
annotations:
kubectl.kubernetes.io/last-applied-configuration: |
{"apiVersion":"storage.k8s.io/v1","kind":"StorageClass","metadata":{"annotations":{},"name":"local-path"},"provisioner":"rancher.io/local-path","reclaimPolicy":"Delete","volumeBindingMode":"WaitForFirstConsumer"}
name: local-path
provisioner: rancher.io/local-path
reclaimPolicy: Delete
volumeBindingMode: WaitForFirstConsumer
EOF
临时卷
nginx
featureGates:
LocalStorageCapacityIsolation: true
shell
sudo systemctl daemon-reload # 重新加载 systemd 配置
sudo systemctl restart kubelet # 重启 kubelet 服务
yaml
apiVersion: extensions/v1beta1
kind: Deployment
metadata:
name: nginx
namespace: default
spec:
selector:
matchLabels:
run: nginx
template:
metadata:
labels:
run: nginx
spec:
containers:
- image: nginx
name: nginx
resources:
limits:
ephemeral-storage: 2Gi
requests:
ephemeral-storage: 2Gi
shell
dd if=/dev/zero of=/test bs=4096 count=1024000
然后应该会退出,pod会重建
配置
端口范围修改
shell
vim /etc/kubernetes/manifests/kube-apiserver.yaml
# 添加
- --service-node-port-range=20000-22767
# 在 kube-system 下查找API,然后删除,然后通过describe查询配置是否生效
迁移数据盘
shell
systemctl stop kubelet
shell
mkdir -p /iflytek/kubelet
cp -rf /var/lib/kubelet/pods /iflytek/kubelet/
cp -rf /var/lib/kubelet/pod-resources /iflytek/kubelet/
mv /var/lib/kubelet/pods{,.old}
mv /var/lib/kubelet/pod-resources{,.old}
shell
KUBELET_EXTRA_ARGS="--root-dir=/data/kubelet"
shell
systemctl daemon-reload && systemctl restart kubelet
systemctl status kubelet