1. 硬件要求
1、Master主机:2核CPU、4G内存、20G硬盘
2、Node主机:4+核CPU、8G+内存、40G+硬盘
2、集群中的所有机器的网络彼此均能相互连接
3、节点之中不可以有重复的主机名、MAC 地址或 product_uuid
4、开启机器上的某些端口
5、为了保证 kubelet 正常工作,必须禁用交换分区
- 在每一台节点上 写上集群主机的hosts
10.2.xx.215 gz-xx-gw-c7 # master
10.2.xx.128 gz-xx-node1-c7 # node
10.2.xx.246 gz-xx-node2-c7 # node
2. 服务器环境配置
2.1 关闭防火墙(所有节点)
关闭防火墙并设置开机不启动
systemctl stop firewalld
systemctl disable firewalld
2.3 关闭swap分区(所有节点)
修改后重启服务器生效
swapoff -a
vim /etc/fstab #永久禁用swap,删除或注释掉/etc/fstab里的swap设备的挂载命令即可
#/dev/mapper/centos-swap swap swap defaults 0 0
2.4 Centos7内核升级(所有节点)
由于centos7.9的系统默认内核版本是3.10,3.10的内核有很多BUG,最常见的一个就是group memory leak(四台主机都要执行)
1)下载所需要的内核版本,我这里采用rpm安装,所以直接下载的rpm包
[root@localhost ~]# wget https://cbs.centos.org/kojifiles/packages/kernel/4.9.220/37.el7/x86_64/kernel-4.9.220-37.el7.x86_64.rpm
2)执行rpm升级即可
[root@localhost ~]# rpm -ivh kernel-4.9.220-37.el7.x86_64.rpm
#查看系统可用内核,并设置启动项
[root@gz-bjrd-devops-gw-c7 dd]# sudo awk -F\' '$1=="menuentry " {print i++ " : " $2}' /etc/grub2.cfg
0 : CentOS Linux (4.9.220-37.el7.x86_64) 7 (Core)
1 : CentOS Linux (3.10.0-1160.88.1.el7.x86_64) 7 (Core)
2 : CentOS Linux (3.10.0-1160.76.1.el7.x86_64) 7 (Core)
3 : CentOS Linux (3.10.0-1160.el7.x86_64) 7 (Core)
4 : CentOS Linux (0-rescue-1caefa67ba0d4c758d6742dfc455d487) 7 (Core)
#指定开机启动内核版本
grub2-set-default 0 或者 grub2-set-default 'CentOS Linux (6.3.1-1.el7.elrepo.x86_64) 7 (Core)'
#生成 grub 配置文件
grub2-mkconfig -o /boot/grub2/grub.cfg
3)升级完reboot,然后查看内核是否成功升级################一定要重启
[root@localhost ~]# reboot
#重启系统,验证
[root@k8s-master ~]# uname -a
Linux gz-xxs-gw-c7 4.9.220-37.el7.x86_64 #1 SMP Tue Apr 28 10:14:25 UTC 2020 x86_64 x86_64 x86_64 GNU/Linux
2.5 设置主机名(所有节点)
[root@k8s-master ~]# cat /etc/hosts
10.2.xx.215 gz-xx-gw-c7 # master
10.2.xx.128 gz-xx-node1-c7 # node
10.2.xx.246 gz-xx-node2-c7 # node
2.6 时间同步(所有节点)
ntpdate cn.pool.ntp.org
2.7配制iptables规则
注意自己原有的规则(慎重点朋友)
iptables -F && iptables -X && iptables -F -t nat && iptables -X -t nat && iptables -P FORWARD ACCEPT
设置系统参数
cat <<EOF > /etc/sysctl.d/k8s.conf net.bridge.bridge-nf-call-ip6tables = 1 net.bridge.bridge-nf-call-iptables = 1 EOF sysctl --system
3、安装docker(所有节点)
[root@gz-xx-gw-c7 ~]# yum-config-manager --add-repo https://download.docker.com/linux/centos/docker-ce.repo
[root@gz-xx-gw-c7 ~]# yum install docker-ce-23.0.6-1.el7 -y
[root@gz-xx-gw-c7 ~]# mkdir -pv /opt/docker
[root@gz-xx-gw-c7 ~]# cat /etc/docker/daemon.json
{
"registry-mirrors": ["https://zd6lf0p4.mirror.aliyuncs.com"],
"exec-opts": ["native.cgroupdriver=systemd"],
"data-root":"/opt/docker"
}
[root@gz-xx-gw-c7 ~]# systemctl start docker
[root@gz-xx-gw-c7 ~]# systemctl enable docker
[root@gz-xx-gw-c7 ~]#
配制镜像加速和cgroup
没有则自己创建文件
没有则自己创建文件
[root@k8s-master ~]# cat /etc/docker/daemon.json
{
"registry-mirrors": ["https://zd6lf0p4.mirror.aliyuncs.com"],
"exec-opts": ["native.cgroupdriver=systemd"]
}
#"exec-opts": ["native.cgroupdriver=systemd"]为docker使用cgroup的方式,k8s使用方式也是systemd,两边要一致
#加载配制
systemctl restart docker
systemctl enable docker
4. 安装cri-dockerd(所有节点)
#从https://github.com/Mirantis/cri-dockerd/releases中下载最新的rpm包,手动下载后上传到服务器里
rpm -ivh cri-dockerd-0.3.1-3.el7.x86_64.rpm
#修改/usr/lib/systemd/system/cri-docker.service文件中的ExecStart配置
vim /usr/lib/systemd/system/cri-docker.service
ExecStart=/usr/bin/cri-dockerd --network-plugin=cni --pod-infra-container-image=registry.aliyuncs.com/google_containers/pause:3.7
systemctl daemon-reload
systemctl enable --now cri-docker
5.yum安装kubeadm、kubelet、kubectl(所有节点)
配置yum 源
vim /etc/yum.repos.d/kubernetes.repo
[kubernetes]
name=Kubernetes
baseurl=https://mirrors.aliyun.com/kubernetes/yum/repos/kubernetes-el7-x86_64/
enabled=1
gpgcheck=1
repo_gpgcheck=1
gpgkey=https://mirrors.aliyun.com/kubernetes/yum/doc/yum-key.gpg https://mirrors.aliyun.com/kubernetes/yum/doc/rpm-package-key.gpg
yum install -y kubelet kubeadm kubectl
systemctl enable kubelet --now
master pull images (master)
kubeadm config images pull --kubernetes-version=v1.27.3 --image-repository registry.aliyuncs.com/google_containers --cri-socket unix:///var/run/cri-dockerd.sock
6.初始化master节点的控制面板(master节点)
kubeadm init \
--apiserver-advertise-address=10.2.xx.215 \
--image-repository registry.aliyuncs.com/google_containers \
--kubernetes-version v1.27.3 \
--service-cidr=172.18x.0.0/12 \
--pod-network-cidr=172.17x.0.0/16 \
--cri-socket unix:///var/run/cri-dockerd.sock \
--ignore-preflight-errors=all
会出现下面的内容
Your Kubernetes control-plane has initialized successfully!
To start using your cluster, you need to run the following as a regular user:
mkdir -p $HOME/.kube
sudo cp -i /etc/kubernetes/admin.conf $HOME/.kube/config
sudo chown $(id -u):$(id -g) $HOME/.kube/config
Alternatively, if you are the root user, you can run:
export KUBECONFIG=/etc/kubernetes/admin.conf
You should now deploy a pod network to the cluster.
Run "kubectl apply -f [podnetwork].yaml" with one of the options listed at:
https://kubernetes.io/docs/concepts/cluster-administration/addons/
Then you can join any number of worker nodes by running the following on each as root:
kubeadm join 10.2.xx.215:6443 --token 4eozgp.xm7tfxxxxxxxxxz42y \
--discovery-token-ca-cert-hash sha256:66bfxxxxxxxxxxxxxxxx8ac68cbc927e86789a9e4e8183365ded688a1
- 在master节点执行
mkdir -p $HOME/.kube
sudo cp -i /etc/kubernetes/admin.conf $HOME/.kube/config
sudo chown $(id -u):$(id -g) $HOME/.kube/config
- 在node节点执行
kubeadm join 10.2.xx.215:6443 --token 4eozgp.xm7tfxxxxxxxxxz42y --discovery-token-ca-cert-hash sha256:66bfxxxxxxxxxxxxxxxx8ac68cbc927e86789a9e4e8183365ded688a1
还是根据上面的提示来添加节点,下面的命令是在你要加入的节点上执行,master节点不用执行
- 在master节点检查
[root@gz-bjrd-devops-gw-c7 ~]# kubectl get node
NAME STATUS ROLES AGE VERSION
gz-xx-gw-c7 NotReady control-plane 22h v1.27.3
gz-xx-node1-c7 NotReady <none> 21h v1.27.3
gz-xx-node2-c7 NotReady <none> 21h v1.27.3
不要慌,网络插件搞好就ready了
7.安装网络插件(flannel)
https://github.com/flannel-io...
[root@gz-bjrd-devops-gw-c7 ~]# cat flannel.yml
apiVersion: v1
kind: Namespace
metadata:
labels:
k8s-app: flannel
pod-security.kubernetes.io/enforce: privileged
name: kube-flannel
---
apiVersion: v1
kind: ServiceAccount
metadata:
labels:
k8s-app: flannel
name: flannel
namespace: kube-flannel
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRole
metadata:
labels:
k8s-app: flannel
name: flannel
rules:
- apiGroups:
- ""
resources:
- pods
verbs:
- get
- apiGroups:
- ""
resources:
- nodes
verbs:
- get
- list
- watch
- apiGroups:
- ""
resources:
- nodes/status
verbs:
- patch
- apiGroups:
- networking.k8s.io
resources:
- clustercidrs
verbs:
- list
- watch
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRoleBinding
metadata:
labels:
k8s-app: flannel
name: flannel
roleRef:
apiGroup: rbac.authorization.k8s.io
kind: ClusterRole
name: flannel
subjects:
- kind: ServiceAccount
name: flannel
namespace: kube-flannel
---
apiVersion: v1
data:
cni-conf.json: |
{
"name": "cbr0",
"cniVersion": "0.3.1",
"plugins": [
{
"type": "flannel",
"delegate": {
"hairpinMode": true,
"isDefaultGateway": true
}
},
{
"type": "portmap",
"capabilities": {
"portMappings": true
}
}
]
}
net-conf.json: |
{
"Network": "172.17x.0.0/16", # 这里得改成你自己的
"Backend": {
"Type": "vxlan"
}
}
kind: ConfigMap
metadata:
labels:
app: flannel
k8s-app: flannel
tier: node
name: kube-flannel-cfg
namespace: kube-flannel
---
apiVersion: apps/v1
kind: DaemonSet
metadata:
labels:
app: flannel
k8s-app: flannel
tier: node
name: kube-flannel-ds
namespace: kube-flannel
spec:
selector:
matchLabels:
app: flannel
k8s-app: flannel
template:
metadata:
labels:
app: flannel
k8s-app: flannel
tier: node
spec:
affinity:
nodeAffinity:
requiredDuringSchedulingIgnoredDuringExecution:
nodeSelectorTerms:
- matchExpressions:
- key: kubernetes.io/os
operator: In
values:
- linux
containers:
- args:
- --ip-masq
- --kube-subnet-mgr
command:
- /opt/bin/flanneld
env:
- name: POD_NAME
valueFrom:
fieldRef:
fieldPath: metadata.name
- name: POD_NAMESPACE
valueFrom:
fieldRef:
fieldPath: metadata.namespace
- name: EVENT_QUEUE_DEPTH
value: "5000"
image: docker.io/flannel/flannel:v0.21.5
name: kube-flannel
resources:
requests:
cpu: 100m
memory: 50Mi
securityContext:
capabilities:
add:
- NET_ADMIN
- NET_RAW
privileged: false
volumeMounts:
- mountPath: /run/flannel
name: run
- mountPath: /etc/kube-flannel/
name: flannel-cfg
- mountPath: /run/xtables.lock
name: xtables-lock
hostNetwork: true
initContainers:
- args:
- -f
- /flannel
- /opt/cni/bin/flannel
command:
- cp
image: docker.io/flannel/flannel-cni-plugin:v1.1.2
name: install-cni-plugin
volumeMounts:
- mountPath: /opt/cni/bin
name: cni-plugin
- args:
- -f
- /etc/kube-flannel/cni-conf.json
- /etc/cni/net.d/10-flannel.conflist
command:
- cp
image: docker.io/flannel/flannel:v0.21.5
name: install-cni
volumeMounts:
- mountPath: /etc/cni/net.d
name: cni
- mountPath: /etc/kube-flannel/
name: flannel-cfg
priorityClassName: system-node-critical
serviceAccountName: flannel
tolerations:
- effect: NoSchedule
operator: Exists
volumes:
- hostPath:
path: /run/flannel
name: run
- hostPath:
path: /opt/cni/bin
name: cni-plugin
- hostPath:
path: /etc/cni/net.d
name: cni
- configMap:
name: kube-flannel-cfg
name: flannel-cfg
- hostPath:
path: /run/xtables.lock
type: FileOrCreate
name: xtables-lock
kubectl apply -f flannel.yml
测试
可以进行一个简单的测试
[root@k8s-master /data/yaml]# cat nginx.yaml
apiVersion: apps/v1
kind: Deployment
metadata:
name: nginx-deploy
labels:
app: ceshi
spec:
replicas: 2
selector:
matchLabels:
school: qinghua
template:
metadata:
name: nginx-pod
labels:
school: qinghua
spec:
containers:
- name: nginx-web
image: nginx:1.20.2
ports:
- containerPort: 80
---
apiVersion: v1
kind: Service
metadata:
name: nginx-service
labels:
role: leader
spec:
type: NodePort
ports:
- port: 8888
targetPort: 80
nodePort: 30000
kubectl apply -f nginx.yaml
[root@k8s-master /data/yaml]# kubectl get pods
NAME READY STATUS RESTARTS AGE
nginx-deploy-6659dbd7c4-kldxj 1/1 Running 0 79m
nginx-deploy-6659dbd7c4-qgr4v 1/1 Running 0 79m
这里由于是第一次下载镜像会很慢,等一段时间running就代表完成了
#查看service状态
Endpoint有ip应该就没问题了
[root@k8s-master /data/yaml]# kubectl describe svc nginx-service
Name: nginx-service
Namespace: default
Labels: role=leader
Annotations: <none>
Selector: school=qinghua
Type: NodePort
IP Family Policy: SingleStack
IP Families: IPv4
IP: 10.2.2xx.219
IPs: 10.2.2xx.219
Port: <unset> 8888/TCP
TargetPort: 80/TCP
NodePort: <unset> 30000/TCP
Endpoints: 172.17.1.12:80,172.117.1.13:80
Session Affinity: None
External Traffic Policy: Cluster
Events: <none>
注意:k8s1.24版本以后nodeport(使用宿主机端口)在node宿主机不会显示端口,如ss -lntup,直接浏览器nodeip加端口的方式访问给就可以
问题解决
Error registering network: failed to acquire lease: node "caasfaasslave1.XXXXXX.local" pod cidr not assigned
*在master上查看
pod没有获得podCIDR。我也遇到了这种情况,尽管主节点上的清单说podCIDR是真的,但它仍然不起作用,漏斗在crashbackloop中运行
[root@gz-xxs-gw-c7 ~]# sudo cat /etc/kubernetes/manifests/kube-controller-manager.yaml | grep -i cluster-cidr
- --cluster-cidr=172.17x.0.0/16
kubectl patch node gz-xx-node2-c7 -p '{"spec":{"podCIDR":"172.17x.0.0/16"}}'
kubectl patch node gz-xx-node1-c7 -p '{"spec":{"podCIDR":"172.17x.0.0/16"}}'
https://stackoverflow.com/que...
**粗体** _斜体_ [链接](http://example.com) `代码` - 列表 > 引用
。你还可以使用@
来通知其他用户。