Kubernetes Operator开发实战
一、Operator概述
Kubernetes Operator是一种软件扩展模式,用于管理复杂的有状态应用。
1.1 Operator模式
┌─────────────────────────────────────────────────────────────┐
│ Operator │
│ ┌───────────────────────────────────────────────────────┐ │
│ │ Controller ──> Watch ──> Reconcile ──> Update │ │
│ └───────────────────────────────────────────────────────┘ │
└─────────────────────────────────────────────────────────────┘
│
▼
┌─────────────────────────────────────────────────────────────┐
│ Custom Resource │
│ (定义应用期望状态) │
└─────────────────────────────────────────────────────────────┘
1.2 Operator组成
| 组件 | 说明 |
|---|---|
| Custom Resource | 定义应用的自定义资源 |
| Controller | 监听资源变化,维持期望状态 |
| Reconcile Loop | 核心控制循环,持续调谐状态 |
二、环境准备
2.1 安装Operator SDK
# 安装Operator SDK
curl -sL https://github.com/operator-framework/operator-sdk/releases/download/v1.32.0/operator-sdk_linux_amd64 -o operator-sdk
chmod +x operator-sdk
sudo mv operator-sdk /usr/local/bin/
# 验证安装
operator-sdk version
2.2 初始化项目
# 创建项目
operator-sdk init --domain example.com --repo github.com/example/myapp-operator
# 添加API
operator-sdk create api --group apps --version v1 --kind MyApp --resource --controller
三、定义Custom Resource
3.1 API定义
// MyAppSpec defines the desired state of MyApp
type MyAppSpec struct {
Replicas *int32 `json:"replicas,omitempty"`
Image string `json:"image,omitempty"`
Port int32 `json:"port,omitempty"`
// 自定义配置
Resources Resources `json:"resources,omitempty"`
Env []EnvVar `json:"env,omitempty"`
}
// MyAppStatus defines the observed state of MyApp
type MyAppStatus struct {
ReadyReplicas int32 `json:"readyReplicas,omitempty"`
Phase string `json:"phase,omitempty"`
Conditions []metav1.Condition `json:"conditions,omitempty"`
}
// +kubebuilder:object:root=true
// +kubebuilder:subresource:status
// MyApp is the Schema for the myapps API
type MyApp struct {
metav1.TypeMeta `json:",inline"`
metav1.ObjectMeta `json:"metadata,omitempty"`
Spec MyAppSpec `json:"spec,omitempty"`
Status MyAppStatus `json:"status,omitempty"`
}
// +kubebuilder:object:root=true
// MyAppList contains a list of MyApp
type MyAppList struct {
metav1.TypeMeta `json:",inline"`
metav1.ListMeta `json:"metadata,omitempty"`
Items []MyApp `json:"items"`
}
3.2 生成CRD
# 生成CRD
make manifests
# 查看生成的CRD
cat config/crd/bases/apps.example.com_myapps.yaml
四、Controller实现
4.1 Reconcile逻辑
func (r *MyAppReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error) {
log := log.FromContext(ctx)
// 1. 获取MyApp资源
var myapp appsv1.MyApp
if err := r.Get(ctx, req.NamespacedName, &myapp); err != nil {
if apierrors.IsNotFound(err) {
return ctrl.Result{}, nil
}
log.Error(err, "Unable to fetch MyApp")
return ctrl.Result{}, err
}
// 2. 检查Deployment是否存在
var deployment appsv1.Deployment
deploymentName := myapp.Name
if err := r.Get(ctx, types.NamespacedName{Name: deploymentName, Namespace: myapp.Namespace}, &deployment); err != nil {
if apierrors.IsNotFound(err) {
// 创建Deployment
deployment = r.createDeployment(&myapp)
if err := r.Create(ctx, &deployment); err != nil {
log.Error(err, "Failed to create Deployment")
return ctrl.Result{}, err
}
return ctrl.Result{Requeue: true}, nil
}
log.Error(err, "Unable to fetch Deployment")
return ctrl.Result{}, err
}
// 3. 检查副本数是否匹配
if *deployment.Spec.Replicas != *myapp.Spec.Replicas {
deployment.Spec.Replicas = myapp.Spec.Replicas
if err := r.Update(ctx, &deployment); err != nil {
log.Error(err, "Failed to update Deployment")
return ctrl.Result{}, err
}
return ctrl.Result{Requeue: true}, nil
}
// 4. 更新状态
r.updateStatus(ctx, &myapp, &deployment)
return ctrl.Result{}, nil
}
4.2 创建Deployment
func (r *MyAppReconciler) createDeployment(myapp *appsv1.MyApp) appsv1.Deployment {
labels := map[string]string{
"app": myapp.Name,
}
return appsv1.Deployment{
ObjectMeta: metav1.ObjectMeta{
Name: myapp.Name,
Namespace: myapp.Namespace,
OwnerReferences: []metav1.OwnerReference{
*metav1.NewControllerRef(myapp, appsv1.GroupVersion.WithKind("MyApp")),
},
},
Spec: appsv1.DeploymentSpec{
Replicas: myapp.Spec.Replicas,
Selector: &metav1.LabelSelector{
MatchLabels: labels,
},
Template: corev1.PodTemplateSpec{
ObjectMeta: metav1.ObjectMeta{
Labels: labels,
},
Spec: corev1.PodSpec{
Containers: []corev1.Container{{
Name: myapp.Name,
Image: myapp.Spec.Image,
Ports: []corev1.ContainerPort{{
ContainerPort: myapp.Spec.Port,
}},
Resources: myapp.Spec.Resources,
Env: myapp.Spec.Env,
}},
},
},
},
}
}
4.3 更新状态
func (r *MyAppReconciler) updateStatus(ctx context.Context, myapp *appsv1.MyApp, deployment *appsv1.Deployment) {
readyReplicas := deployment.Status.ReadyReplicas
phase := "Pending"
if readyReplicas == *deployment.Spec.Replicas {
phase = "Ready"
} else if readyReplicas > 0 {
phase = "Partial"
}
myapp.Status.ReadyReplicas = readyReplicas
myapp.Status.Phase = phase
if err := r.Status().Update(ctx, myapp); err != nil {
log.FromContext(ctx).Error(err, "Failed to update MyApp status")
}
}
4.4 Setup With Manager
func (r *MyAppReconciler) SetupWithManager(mgr ctrl.Manager) error {
return ctrl.NewControllerManagedBy(mgr).
For(&appsv1.MyApp{}).
Owns(&appsv1.Deployment{}).
Owns(&corev1.Service{}).
Complete(r)
}
五、测试Operator
5.1 创建测试CR
apiVersion: apps.example.com/v1
kind: MyApp
metadata:
name: myapp-sample
spec:
replicas: 3
image: nginx:latest
port: 80
resources:
requests:
memory: "128Mi"
cpu: "100m"
limits:
memory: "256Mi"
cpu: "200m"
env:
- name: ENV
value: production
5.2 部署Operator
# 部署CRD
kubectl apply -f config/crd/bases/apps.example.com_myapps.yaml
# 部署Operator
make install
make deploy IMG=myapp-operator:latest
# 创建示例资源
kubectl apply -f config/samples/apps_v1_myapp.yaml
5.3 验证部署
# 查看Operator日志
kubectl logs -n myapp-operator-system -l control-plane=controller-manager
# 查看MyApp状态
kubectl get myapps
kubectl describe myapp myapp-sample
# 查看创建的资源
kubectl get deployments
kubectl get pods
六、高级功能
6.1 事件处理
func (r *MyAppReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error) {
log := log.FromContext(ctx)
var myapp appsv1.MyApp
if err := r.Get(ctx, req.NamespacedName, &myapp); err != nil {
return ctrl.Result{}, client.IgnoreNotFound(err)
}
// 记录事件
r.Recorder.Event(&myapp, corev1.EventTypeNormal, "Reconciling", "Starting reconciliation")
// ... 业务逻辑 ...
r.Recorder.Event(&myapp, corev1.EventTypeNormal, "Reconciled", "Reconciliation completed")
return ctrl.Result{}, nil
}
6.2 Finalizer处理
func (r *MyAppReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error) {
var myapp appsv1.MyApp
if err := r.Get(ctx, req.NamespacedName, &myapp); err != nil {
return ctrl.Result{}, client.IgnoreNotFound(err)
}
// 检查是否正在删除
if myapp.GetDeletionTimestamp() != nil {
// 清理资源
if err := r.cleanupResources(ctx, &myapp); err != nil {
return ctrl.Result{}, err
}
// 移除finalizer
myapp.Finalizers = removeString(myapp.Finalizers, "myapp.finalizer")
if err := r.Update(ctx, &myapp); err != nil {
return ctrl.Result{}, err
}
return ctrl.Result{}, nil
}
// 添加finalizer
if !containsString(myapp.Finalizers, "myapp.finalizer") {
myapp.Finalizers = append(myapp.Finalizers, "myapp.finalizer")
if err := r.Update(ctx, &myapp); err != nil {
return ctrl.Result{}, err
}
}
// ... 业务逻辑 ...
}
6.3 状态条件
func (r *MyAppReconciler) updateStatusConditions(ctx context.Context, myapp *appsv1.MyApp, ready bool) {
conditionType := appsv1.MyAppReady
status := metav1.ConditionFalse
if ready {
status = metav1.ConditionTrue
}
r.Status().Patch(ctx, myapp, client.MergeFrom(myapp.DeepCopy()))
myapp.Status.Conditions = append(myapp.Status.Conditions, metav1.Condition{
Type: string(conditionType),
Status: status,
LastTransitionTime: metav1.Now(),
Reason: "Reconciled",
Message: "MyApp is ready",
})
}
七、部署与分发
7.1 构建镜像
# 构建镜像
make docker-build IMG=myapp-operator:latest
# 推送镜像
make docker-push IMG=myapp-operator:latest
7.2 Helm Chart
apiVersion: v2
name: myapp-operator
description: A Helm chart for MyApp Operator
type: application
version: 0.1.0
appVersion: "1.0"
dependencies:
- name: cert-manager
version: v1.13.0
repository: https://charts.jetstack.io
condition: cert-manager.enabled
templates:
- deployment.yaml
- service.yaml
- rbac.yaml
7.3 OLM部署
# 创建Catalog
operator-sdk olm install
# 打包Operator
operator-sdk bundle create --image-builder docker --directory deploy/olm-catalog/myapp-operator
# 推送Bundle
docker push myapp-operator-bundle:latest
# 订阅Operator
kubectl apply -f deploy/olm-catalog/myapp-operator/subscription.yaml
八、最佳实践
8.1 设计原则
- 幂等性:Reconcile应该是幂等的
- 重试机制:使用Result.Requeue进行重试
- 错误处理:区分可重试和不可重试错误
- 状态管理:合理使用Status字段
8.2 性能优化
// 使用FieldSelector减少监听范围
func (r *MyAppReconciler) SetupWithManager(mgr ctrl.Manager) error {
return ctrl.NewControllerManagedBy(mgr).
For(&appsv1.MyApp{}).
WithEventFilter(predicate.GenerationChangedPredicate{}).
Complete(r)
}
8.3 测试策略
func TestMyAppReconciler(t *testing.T) {
tests := []struct {
name string
setup func(*envtest.Environment)
wantErr bool
}{
{
name: "create myapp",
setup: func(env *envtest.Environment) {
// 测试设置
},
wantErr: false,
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
// 测试逻辑
})
}
}
九、总结
开发Kubernetes Operator需要以下步骤:
- 定义CRD:使用kubebuilder定义自定义资源
- 实现Controller:编写Reconcile逻辑
- 处理状态:更新资源状态
- 测试验证:单元测试和集成测试
- 部署分发:构建镜像和Chart
Operator模式是管理复杂应用的最佳实践,通过声明式API提供一致的管理体验。
2312

被折叠的 条评论
为什么被折叠?



