diff --git a/.evergreen.yml b/.evergreen.yml index 5918cdf0f..a6a3b8407 100644 --- a/.evergreen.yml +++ b/.evergreen.yml @@ -1131,6 +1131,15 @@ task_groups: - e2e_om_ops_manager_prometheus <<: *teardown_group + # Tests features only supported on OM80 + - name: e2e_ops_manager_kind_8_0_only_task_group + max_hosts: -1 + <<: *setup_group + <<: *setup_and_teardown_task + tasks: + - e2e_search_enterprise_tls + <<: *teardown_group + # Tests features only supported on OM70 and OM80, its only upgrade test as we test upgrading from 6 to 7 or 7 to 8 - name: e2e_ops_manager_upgrade_only_task_group max_hosts: -1 @@ -1341,6 +1350,7 @@ buildvariants: - name: e2e_ops_manager_kind_5_0_only_task_group_without_queryable_backup - name: e2e_ops_manager_kind_6_0_only_task_group - name: e2e_ops_manager_upgrade_only_task_group + - name: e2e_ops_manager_kind_8_0_only_task_group - name: e2e_static_om80_kind_ubi display_name: e2e_static_om80_kind_ubi diff --git a/controllers/om/automation_status.go b/controllers/om/automation_status.go index 502628cfa..15e54e6af 100644 --- a/controllers/om/automation_status.go +++ b/controllers/om/automation_status.go @@ -2,7 +2,9 @@ package om import ( "encoding/json" + "errors" "fmt" + "github.com/mongodb/mongodb-kubernetes/controllers/operator/workflow" "maps" "slices" "sort" @@ -40,6 +42,14 @@ func buildAutomationStatusFromBytes(b []byte) (*AutomationStatus, error) { return as, nil } +type PendingErr struct { + msg string +} + +func (e PendingErr) Error() string { + return e.msg +} + // WaitForReadyState waits until the agents for relevant processes reach their state func WaitForReadyState(oc Connection, processNames []string, supressErrors bool, log *zap.SugaredLogger) error { if len(processNames) == 0 { @@ -72,6 +82,41 @@ func WaitForReadyState(oc Connection, processNames []string, supressErrors bool, return nil } +func CheckForReadyState(oc Connection, processNames []string, log *zap.SugaredLogger) workflow.Status { + err := CheckForReadyStateReturningError(oc, processNames, log) + + if err != nil { + pendingErr := PendingErr{} + if ok := errors.As(err, &pendingErr); ok { + return workflow.Pending(pendingErr.Error()) + } + + return workflow.Failed(err) + } + + return workflow.OK() +} + +func CheckForReadyStateReturningError(oc Connection, processNames []string, log *zap.SugaredLogger) error { + if len(processNames) == 0 { + log.Infow("Not checking for MongoDB agents to reach READY state (no expected processes to check)") + return nil + } + + log.Infow("Checking if MongoDB agents reached READY state...", "processes", processNames) + as, err := oc.ReadAutomationStatus() + if err != nil { + return xerrors.Errorf("Error reading Automation Agents status: %s", err) + } + + if allReachedGoalState, msg := checkAutomationStatusIsGoal(as, processNames, log); allReachedGoalState { + log.Info("MongoDB agents have reached READY state") + return nil + } else { + return PendingErr{fmt.Sprintf("MongoDB agents haven't reached READY state; %s", msg)} + } +} + // CheckAutomationStatusIsGoal returns true if all the relevant processes are in Goal // state. // Note, that the function is quite tolerant to any situations except for non-matching goal state, for example diff --git a/controllers/om/replicaset/om_replicaset.go b/controllers/om/replicaset/om_replicaset.go index 2e72d2c3e..b98a122da 100644 --- a/controllers/om/replicaset/om_replicaset.go +++ b/controllers/om/replicaset/om_replicaset.go @@ -58,7 +58,7 @@ func PrepareScaleDownFromMap(omClient om.Connection, rsMembers map[string][]stri return xerrors.Errorf("unable to set votes, priority to 0 in Ops Manager, hosts: %v, err: %w", processes, err) } - if err := om.WaitForReadyState(omClient, processesToWaitForGoalState, false, log); err != nil { + if err := om.CheckForReadyStateReturningError(omClient, processesToWaitForGoalState, log); err != nil { return err } diff --git a/controllers/operator/appdbreplicaset_controller.go b/controllers/operator/appdbreplicaset_controller.go index fe68e07c6..b51607f5f 100644 --- a/controllers/operator/appdbreplicaset_controller.go +++ b/controllers/operator/appdbreplicaset_controller.go @@ -2,6 +2,7 @@ package operator import ( "context" + "errors" "fmt" "path" "sort" @@ -550,6 +551,10 @@ func (r *ReconcileAppDbReplicaSet) ReconcileAppDB(ctx context.Context, opsManage // it's possible that Ops Manager will not be available when we attempt to configure AppDB monitoring // in Ops Manager. This is not a blocker to continue with the rest of the reconciliation. if err != nil { + pendingErr := om.PendingErr{} + if ok := errors.As(err, &pendingErr); ok { + return r.updateStatus(ctx, opsManager, workflow.Pending(pendingErr.Error()), log, omStatusOption) + } log.Errorf("Unable to configure monitoring of AppDB: %s, configuration will be attempted next reconciliation.", err) if podVars.ProjectID != "" { diff --git a/controllers/operator/authentication/authentication.go b/controllers/operator/authentication/authentication.go index c2e36735b..3b11e4a1c 100644 --- a/controllers/operator/authentication/authentication.go +++ b/controllers/operator/authentication/authentication.go @@ -91,7 +91,7 @@ func Configure(conn om.Connection, opts Options, isRecovering bool, log *zap.Sug if isRecovering { return nil } - return om.WaitForReadyState(conn, opts.ProcessNames, false, log) + return om.CheckForReadyStateReturningError(conn, opts.ProcessNames, log) } // we need to make sure the desired authentication mechanism for the agent exists. If the desired agent @@ -172,6 +172,7 @@ func Disable(conn om.Connection, opts Options, deleteUsers bool, log *zap.Sugare return xerrors.Errorf("error read/updating automation config: %w", err) } + // Disable is called also onDelete, so we cannot requeue here, we must wait if err := om.WaitForReadyState(conn, opts.ProcessNames, false, log); err != nil { return xerrors.Errorf("error waiting for ready state: %w", err) } @@ -222,7 +223,7 @@ func Disable(conn om.Connection, opts Options, deleteUsers bool, log *zap.Sugare return xerrors.Errorf("error read/updating backup agent config: %w", err) } - if err := om.WaitForReadyState(conn, opts.ProcessNames, false, log); err != nil { + if err := om.CheckForReadyStateReturningError(conn, opts.ProcessNames, log); err != nil { return xerrors.Errorf("error waiting for ready state: %w", err) } diff --git a/controllers/operator/common_controller.go b/controllers/operator/common_controller.go index e76cf4e81..43594d0df 100644 --- a/controllers/operator/common_controller.go +++ b/controllers/operator/common_controller.go @@ -4,6 +4,7 @@ import ( "context" "encoding/json" "encoding/pem" + "errors" "fmt" "path/filepath" "reflect" @@ -427,9 +428,12 @@ func (r *ReconcileCommonController) updateOmAuthentication(ctx context.Context, return workflow.Failed(err), false } - // we need to wait for all agents to be ready before configuring any authentication settings - if err := om.WaitForReadyState(conn, processNames, isRecovering, log); err != nil { - return workflow.Failed(err), false + if !isRecovering { + if workflowStatus := om.CheckForReadyState(conn, processNames, log); !workflowStatus.IsOK() { + return workflowStatus, false + } + } else { + log.Warnf("Ignoring checking for ready state due to recovering") } clientCerts := util.OptionalClientCertficates @@ -515,6 +519,10 @@ func (r *ReconcileCommonController) updateOmAuthentication(ctx context.Context, } if err := authentication.Configure(conn, authOpts, isRecovering, log); err != nil { + pendingErr := om.PendingErr{} + if ok := errors.As(err, &pendingErr); ok { + return workflow.Pending(pendingErr.Error()), false + } return workflow.Failed(err), false } } else if wantToEnableAuthentication { @@ -534,6 +542,7 @@ func (r *ReconcileCommonController) updateOmAuthentication(ctx context.Context, authOpts.UserOptions = userOpts if err := authentication.Disable(conn, authOpts, false, log); err != nil { + return workflow.Failed(err), false } } diff --git a/controllers/operator/mongodbmultireplicaset_controller.go b/controllers/operator/mongodbmultireplicaset_controller.go index 5d963c00e..c42376035 100644 --- a/controllers/operator/mongodbmultireplicaset_controller.go +++ b/controllers/operator/mongodbmultireplicaset_controller.go @@ -3,6 +3,7 @@ package operator import ( "context" "encoding/json" + "errors" "fmt" "reflect" "sort" @@ -213,6 +214,10 @@ func (r *ReconcileMongoDbMultiReplicaSet) Reconcile(ctx context.Context, request status := workflow.RunInGivenOrder(publishAutomationConfigFirst, func() workflow.Status { if err := r.updateOmDeploymentRs(ctx, conn, mrs, agentCertPath, tlsCertPath, internalClusterCertPath, false, log); err != nil { + pendingErr := om.PendingErr{} + if ok := errors.As(err, &pendingErr); ok { + return workflow.Pending(pendingErr.Error()) + } return workflow.Failed(err) } return workflow.OK() @@ -789,9 +794,14 @@ func (r *ReconcileMongoDbMultiReplicaSet) updateOmDeploymentRs(ctx context.Conte reachableProcessNames = append(reachableProcessNames, proc.Name()) } } - if err := om.WaitForReadyState(conn, reachableProcessNames, isRecovering, log); err != nil && !isRecovering { + if isRecovering { + return nil + } + + if err := om.CheckForReadyStateReturningError(conn, reachableProcessNames, log); err != nil { return err } + return nil } diff --git a/controllers/operator/mongodbreplicaset_controller.go b/controllers/operator/mongodbreplicaset_controller.go index 470e56716..cc6111a1f 100644 --- a/controllers/operator/mongodbreplicaset_controller.go +++ b/controllers/operator/mongodbreplicaset_controller.go @@ -2,6 +2,7 @@ package operator import ( "context" + goerrors "errors" "fmt" "go.uber.org/zap" @@ -240,6 +241,10 @@ func (r *ReconcileMongoDbReplicaSet) Reconcile(ctx context.Context, request reco if scale.ReplicasThisReconciliation(rs) < rs.Status.Members { if err := replicaset.PrepareScaleDownFromStatefulSet(conn, sts, rs, log); err != nil { + pendingErr := om.PendingErr{} + if ok := goerrors.As(err, &pendingErr); ok { + return r.updateStatus(ctx, rs, workflow.Pending(pendingErr.Error()), log) + } return r.updateStatus(ctx, rs, workflow.Failed(xerrors.Errorf("Failed to prepare Replica Set for scaling down using Ops Manager: %w", err)), log) } } @@ -512,8 +517,12 @@ func (r *ReconcileMongoDbReplicaSet) updateOmDeploymentRs(ctx context.Context, c return workflow.Failed(err) } - if err := om.WaitForReadyState(conn, processNames, isRecovering, log); err != nil { - return workflow.Failed(err) + if !isRecovering { + if workflowStatus := om.CheckForReadyState(conn, processNames, log); !workflowStatus.IsOK() { + return workflowStatus + } + } else { + log.Warnf("Ignoring checking for ready state due to recovering") } reconcileResult, _ := ReconcileLogRotateSetting(conn, rs.Spec.Agent, log) diff --git a/controllers/operator/mongodbshardedcluster_controller.go b/controllers/operator/mongodbshardedcluster_controller.go index abe384b16..c9ca7880f 100644 --- a/controllers/operator/mongodbshardedcluster_controller.go +++ b/controllers/operator/mongodbshardedcluster_controller.go @@ -2,7 +2,9 @@ package operator import ( "context" + goerrors "errors" "fmt" + "k8s.io/apimachinery/pkg/api/errors" "slices" "sort" "strings" @@ -11,7 +13,6 @@ import ( "github.com/hashicorp/go-multierror" "go.uber.org/zap" "golang.org/x/xerrors" - "k8s.io/apimachinery/pkg/api/errors" "k8s.io/apimachinery/pkg/runtime" "k8s.io/apimachinery/pkg/util/intstr" "k8s.io/utils/ptr" @@ -1611,6 +1612,7 @@ func (r *ShardedClusterReconcileHelper) cleanOpsManagerState(ctx context.Context } logDiffOfProcessNames(processNames, r.getHealthyProcessNames(), log.With("ctx", "cleanOpsManagerState")) + // we're onDelete, we cannot requeue, so we need to wait if err := om.WaitForReadyState(conn, r.getHealthyProcessNames(), false, log); err != nil { return err } @@ -1849,13 +1851,12 @@ func (r *ShardedClusterReconcileHelper) updateOmDeploymentShardedCluster(ctx con healthyProcessesToWaitForReadyState := r.getHealthyProcessNamesToWaitForReadyState(conn, log) logDiffOfProcessNames(processNames, healthyProcessesToWaitForReadyState, log.With("ctx", "updateOmDeploymentShardedCluster")) - if err = om.WaitForReadyState(conn, healthyProcessesToWaitForReadyState, isRecovering, log); err != nil { - if !isRecovering { - if shardsRemoving { - return workflow.Pending("automation agents haven't reached READY state: shards removal in progress: %v", err) - } - return workflow.Failed(err) + + if !isRecovering { + if workflowStatus := om.CheckForReadyState(conn, healthyProcessesToWaitForReadyState, log); !workflowStatus.IsOK() { + return workflowStatus } + } else { logWarnIgnoredDueToRecovery(log, err) } @@ -1873,12 +1874,16 @@ func (r *ShardedClusterReconcileHelper) updateOmDeploymentShardedCluster(ctx con healthyProcessesToWaitForReadyState := r.getHealthyProcessNamesToWaitForReadyState(conn, log) logDiffOfProcessNames(processNames, healthyProcessesToWaitForReadyState, log.With("ctx", "shardsRemoving")) - if err = om.WaitForReadyState(conn, healthyProcessesToWaitForReadyState, isRecovering, log); err != nil { - if !isRecovering { - return workflow.Failed(xerrors.Errorf("automation agents haven't reached READY state while cleaning replica set and processes: %w", err)) - } + if isRecovering { logWarnIgnoredDueToRecovery(log, err) } + if err = om.CheckForReadyStateReturningError(conn, healthyProcessesToWaitForReadyState, log); err != nil { + pendingErr := om.PendingErr{} + if ok := goerrors.As(err, &pendingErr); ok { + return workflow.Pending(pendingErr.Error()) + } + return workflow.Failed(err) + } } currentHosts := r.getAllHostnames(false) @@ -2042,8 +2047,13 @@ func (r *ShardedClusterReconcileHelper) publishDeployment(ctx context.Context, c healthyProcessesToWaitForReadyState = r.getHealthyProcessNamesToWaitForReadyState(conn, log) logDiffOfProcessNames(opts.processNames, healthyProcessesToWaitForReadyState, log.With("ctx", "publishDeployment")) - if err := om.WaitForReadyState(conn, healthyProcessesToWaitForReadyState, isRecovering, log); err != nil { - return nil, shardsRemoving, workflow.Failed(err) + + if !isRecovering { + if workflowStatus := om.CheckForReadyState(conn, healthyProcessesToWaitForReadyState, log); workflowStatus != workflow.OK() { + return nil, shardsRemoving, workflowStatus + } + } else { + log.Warnf("Ignoring checking for ready state due to recovering") } if additionalReconciliationRequired { diff --git a/controllers/operator/mongodbstandalone_controller.go b/controllers/operator/mongodbstandalone_controller.go index 47ff489bb..ad5b20c13 100644 --- a/controllers/operator/mongodbstandalone_controller.go +++ b/controllers/operator/mongodbstandalone_controller.go @@ -353,8 +353,12 @@ func (r *ReconcileMongoDbStandalone) updateOmDeployment(ctx context.Context, con return workflow.Failed(err) } - if err := om.WaitForReadyState(conn, []string{set.Name}, isRecovering, log); err != nil { - return workflow.Failed(err) + if !isRecovering { + if workflowStatus := om.CheckForReadyState(conn, []string{set.Name}, log); status != workflow.OK() { + return workflowStatus + } + } else { + log.Warnf("Ignoring checking for ready state due to recovering") } if additionalReconciliationRequired { diff --git a/docker/mongodb-kubernetes-tests/kubetester/mongodb.py b/docker/mongodb-kubernetes-tests/kubetester/mongodb.py index c4fd2a103..8665f357d 100644 --- a/docker/mongodb-kubernetes-tests/kubetester/mongodb.py +++ b/docker/mongodb-kubernetes-tests/kubetester/mongodb.py @@ -230,7 +230,18 @@ def __repr__(self): def configure( self, - om: MongoDBOpsManager, + om: Optional[MongoDBOpsManager], + project_name: str, + api_client: Optional[client.ApiClient] = None, + ) -> MongoDB: + if om is not None: + return self.configure_ops_manager(om, project_name, api_client=api_client) + else: + return self.configure_cloud_qa(project_name, api_client=api_client) + + def configure_ops_manager( + self, + om: Optional[MongoDBOpsManager], project_name: str, api_client: Optional[client.ApiClient] = None, ) -> MongoDB: @@ -247,6 +258,39 @@ def configure( self["spec"]["credentials"] = om.api_key_secret(self.namespace, api_client=api_client) return self + def configure_cloud_qa( + self, + project_name, + src_project_config_map_name: str = None, + api_client: Optional[client.ApiClient] = None, + ) -> MongoDB: + if "opsManager" in self["spec"]: + del self["spec"]["opsManager"] + + if src_project_config_map_name is None and "cloudManager" in self["spec"]: + src_project_config_map_name = self["spec"]["cloudManager"]["configMapRef"]["name"] + else: + # my-project cm and my-credentials secret are created by scripts/evergreen/e2e/configure_operator.sh + src_project_config_map_name = "my-project" + + try: + src_cm = read_configmap(self.namespace, src_project_config_map_name, api_client=api_client) + except client.ApiException as e: + if e.status == 404: + logger.debug("project config map is not specified, trying my-project as the source") + src_cm = read_configmap(self.namespace, "my-project", api_client=api_client) + else: + raise e + + new_project_config_map_name = f"{self.name}-project-config" + ensure_nested_objects(self, ["spec", "cloudManager", "configMapRef"]) + self["spec"]["cloudManager"]["configMapRef"]["name"] = new_project_config_map_name + + src_cm.update({"projectName": f"{self.namespace}-{project_name}"}) + create_or_update_configmap(self.namespace, new_project_config_map_name, src_cm, api_client=api_client) + + return self + def configure_backup(self, mode: str = "enabled") -> MongoDB: ensure_nested_objects(self, ["spec", "backup"]) self["spec"]["backup"]["mode"] = mode @@ -449,6 +493,9 @@ def get_external_domain(self): def config_map_name(self) -> str: if "opsManager" in self["spec"]: return self["spec"]["opsManager"]["configMapRef"]["name"] + elif "cloudManager" in self["spec"]: + return self["spec"]["cloudManager"]["configMapRef"]["name"] + return self["spec"]["project"] def shard_replicaset_names(self) -> List[str]: diff --git a/docker/mongodb-kubernetes-tests/kubetester/omtester.py b/docker/mongodb-kubernetes-tests/kubetester/omtester.py index 72874d2b6..5798a89d7 100644 --- a/docker/mongodb-kubernetes-tests/kubetester/omtester.py +++ b/docker/mongodb-kubernetes-tests/kubetester/omtester.py @@ -20,16 +20,9 @@ from kubetester.om_queryable_backups import OMQueryableBackup from opentelemetry import trace from requests.adapters import HTTPAdapter, Retry +from tests.common.ops_manager.cloud_manager import is_cloud_qa -from .kubetester import get_env_var_or_fail - - -def running_cloud_manager(): - "Determines if the current test is running against Cloud Manager" - return get_env_var_or_fail("OM_HOST") == "https://cloud-qa.mongodb.com/" - - -skip_if_cloud_manager = pytest.mark.skipif(running_cloud_manager(), reason="Do not run in Cloud Manager") +skip_if_cloud_manager = pytest.mark.skipif(is_cloud_qa(), reason="Do not run in Cloud Manager") class BackupStatus(str, Enum): diff --git a/docker/mongodb-kubernetes-tests/tests/common/ops_manager/cloud_manager.py b/docker/mongodb-kubernetes-tests/tests/common/ops_manager/cloud_manager.py new file mode 100644 index 000000000..ed936858d --- /dev/null +++ b/docker/mongodb-kubernetes-tests/tests/common/ops_manager/cloud_manager.py @@ -0,0 +1,5 @@ +import os + + +def is_cloud_qa() -> bool: + return os.getenv("ops_manager_version", "cloud_qa") == "cloud_qa" diff --git a/docker/mongodb-kubernetes-tests/tests/opsmanager/fixtures/om_ops_manager_basic.yaml b/docker/mongodb-kubernetes-tests/tests/opsmanager/fixtures/om_ops_manager_basic.yaml index daa8b85f5..a4b6ba3d1 100644 --- a/docker/mongodb-kubernetes-tests/tests/opsmanager/fixtures/om_ops_manager_basic.yaml +++ b/docker/mongodb-kubernetes-tests/tests/opsmanager/fixtures/om_ops_manager_basic.yaml @@ -15,3 +15,16 @@ spec: backup: enabled: false + + # adding this just to avoid wizard when opening OM UI + configuration: + automation.versions.source: mongodb + mms.adminEmailAddr: cloud-manager-support@mongodb.com + mms.fromEmailAddr: cloud-manager-support@mongodb.com + mms.ignoreInitialUiSetup: "true" + mms.mail.hostname: email-smtp.us-east-1.amazonaws.com + mms.mail.port: "465" + mms.mail.ssl: "true" + mms.mail.transport: smtp + mms.minimumTLSVersion: TLSv1.2 + mms.replyToEmailAddr: cloud-manager-support@mongodb.com diff --git a/docker/mongodb-kubernetes-tests/tests/search/fixtures/enterprise-replicaset-sample-mflix.yaml b/docker/mongodb-kubernetes-tests/tests/search/fixtures/enterprise-replicaset-sample-mflix.yaml index cf58035f1..926aed93d 100644 --- a/docker/mongodb-kubernetes-tests/tests/search/fixtures/enterprise-replicaset-sample-mflix.yaml +++ b/docker/mongodb-kubernetes-tests/tests/search/fixtures/enterprise-replicaset-sample-mflix.yaml @@ -19,16 +19,15 @@ spec: - SCRAM agent: logLevel: DEBUG - statefulSet: - spec: - template: - spec: - containers: - - name: mongodb-enterprise-database - resources: - limits: - cpu: "2" - memory: 2Gi - requests: - cpu: "1" - memory: 1Gi + podSpec: + podTemplate: + spec: + containers: + - name: mongodb-enterprise-database + resources: + limits: + cpu: "2" + memory: 2Gi + requests: + cpu: "1" + memory: 1Gi diff --git a/docker/mongodb-kubernetes-tests/tests/search/om_deployment.py b/docker/mongodb-kubernetes-tests/tests/search/om_deployment.py new file mode 100644 index 000000000..a027b6f98 --- /dev/null +++ b/docker/mongodb-kubernetes-tests/tests/search/om_deployment.py @@ -0,0 +1,30 @@ +from typing import Optional + +from kubetester import try_load +from kubetester.kubetester import fixture as yaml_fixture +from kubetester.kubetester import is_multi_cluster +from kubetester.opsmanager import MongoDBOpsManager +from pytest import fixture +from tests.common.ops_manager.cloud_manager import is_cloud_qa +from tests.conftest import get_custom_appdb_version, get_custom_om_version +from tests.opsmanager.withMonitoredAppDB.conftest import enable_multi_cluster_deployment + + +def get_ops_manager(namespace: str) -> Optional[MongoDBOpsManager]: + if is_cloud_qa(): + return None + + resource: MongoDBOpsManager = MongoDBOpsManager.from_yaml( + yaml_fixture("om_ops_manager_basic.yaml"), namespace=namespace + ) + + if try_load(resource): + return resource + + resource.set_version(get_custom_om_version()) + resource.set_appdb_version(get_custom_appdb_version()) + + if is_multi_cluster(): + enable_multi_cluster_deployment(resource) + + return resource diff --git a/docker/mongodb-kubernetes-tests/tests/search/search_enterprise_tls.py b/docker/mongodb-kubernetes-tests/tests/search/search_enterprise_tls.py index 887a8e5d0..0ecb1a961 100644 --- a/docker/mongodb-kubernetes-tests/tests/search/search_enterprise_tls.py +++ b/docker/mongodb-kubernetes-tests/tests/search/search_enterprise_tls.py @@ -1,19 +1,21 @@ import pymongo import yaml -from kubetester import create_or_update_secret, try_load +from kubernetes import client +from kubetester import create_or_update_secret, run_periodically, try_load, wait_until from kubetester.certs import create_mongodb_tls_certs, create_tls_certs from kubetester.kubetester import KubernetesTester from kubetester.kubetester import fixture as yaml_fixture from kubetester.mongodb import MongoDB from kubetester.mongodb_search import MongoDBSearch from kubetester.mongodb_user import MongoDBUser +from kubetester.omtester import skip_if_cloud_manager from kubetester.phase import Phase from pytest import fixture, mark from tests import test_logger from tests.common.search import movies_search_helper -from tests.common.search.movies_search_helper import SampleMoviesSearchHelper from tests.common.search.search_tester import SearchTester -from tests.conftest import get_default_operator +from tests.conftest import get_default_operator, get_issuer_ca_filepath +from tests.search.om_deployment import get_ops_manager logger = test_logger.get_test_logger(__name__) @@ -26,11 +28,14 @@ USER_NAME = "mdb-user" USER_PASSWORD = f"{USER_NAME}-password" -MDB_RESOURCE_NAME = "mdb-rs" +MDB_RESOURCE_NAME = "mdb-ent-tls" # MongoDBSearch TLS configuration MDBS_TLS_SECRET_NAME = "mdbs-tls-secret" +MDB_VERSION_WITHOUT_BUILT_IN_ROLE = "8.0.10-ent" +MDB_VERSION_WITH_BUILT_IN_ROLE = "8.2.0-ent" + @fixture(scope="function") def mdb(namespace: str, issuer_ca_configmap: str) -> MongoDB: @@ -39,6 +44,8 @@ def mdb(namespace: str, issuer_ca_configmap: str) -> MongoDB: name=MDB_RESOURCE_NAME, namespace=namespace, ) + resource.configure(om=get_ops_manager(namespace), project_name=MDB_RESOURCE_NAME) + resource.set_version(MDB_VERSION_WITHOUT_BUILT_IN_ROLE) if try_load(resource): return resource @@ -73,6 +80,7 @@ def admin_user(namespace: str) -> MongoDBUser: if try_load(resource): return resource + resource["spec"]["mongodbResourceRef"]["name"] = MDB_RESOURCE_NAME resource["spec"]["username"] = resource.name resource["spec"]["passwordSecretKeyRef"]["name"] = f"{resource.name}-password" @@ -86,6 +94,7 @@ def user(namespace: str) -> MongoDBUser: if try_load(resource): return resource + resource["spec"]["mongodbResourceRef"]["name"] = MDB_RESOURCE_NAME resource["spec"]["username"] = resource.name resource["spec"]["passwordSecretKeyRef"]["name"] = f"{resource.name}-password" @@ -103,6 +112,7 @@ def mongot_user(namespace: str, mdbs: MongoDBSearch) -> MongoDBUser: if try_load(resource): return resource + resource["spec"]["mongodbResourceRef"]["name"] = MDB_RESOURCE_NAME resource["spec"]["username"] = MONGOT_USER_NAME resource["spec"]["passwordSecretKeyRef"]["name"] = f"{resource.name}-password" @@ -115,6 +125,15 @@ def test_install_operator(namespace: str, operator_installation_config: dict[str operator.assert_is_running() +@mark.e2e_search_enterprise_tls +@skip_if_cloud_manager +def test_create_ops_manager(namespace: str): + ops_manager = get_ops_manager(namespace) + ops_manager.update() + ops_manager.om_status().assert_reaches_phase(Phase.Running, timeout=1200) + ops_manager.appdb_status().assert_reaches_phase(Phase.Running, timeout=600) + + @mark.e2e_search_enterprise_tls def test_install_tls_secrets_and_configmaps(namespace: str, mdb: MongoDB, mdbs: MongoDBSearch, issuer: str): create_mongodb_tls_certs(issuer, namespace, mdb.name, f"certs-{mdb.name}-cert", mdb.get_members()) @@ -144,19 +163,20 @@ def test_create_users( create_or_update_secret( namespace, name=admin_user["spec"]["passwordSecretKeyRef"]["name"], data={"password": ADMIN_USER_PASSWORD} ) - admin_user.create() - admin_user.assert_reaches_phase(Phase.Updated, timeout=300) + admin_user.update() create_or_update_secret( namespace, name=user["spec"]["passwordSecretKeyRef"]["name"], data={"password": USER_PASSWORD} ) - user.create() + user.update() + + admin_user.assert_reaches_phase(Phase.Updated, timeout=300) user.assert_reaches_phase(Phase.Updated, timeout=300) create_or_update_secret( namespace, name=mongot_user["spec"]["passwordSecretKeyRef"]["name"], data={"password": MONGOT_USER_PASSWORD} ) - mongot_user.create() + mongot_user.update() # we deliberately don't wait for this user to be ready, because to be reconciled successfully it needs the searchCoordinator role # which the ReplicaSet reconciler will only define in the automation config after the MongoDBSearch resource is created. @@ -167,29 +187,127 @@ def test_create_search_resource(mdbs: MongoDBSearch): mdbs.assert_reaches_phase(Phase.Running, timeout=300) +@mark.e2e_search_enterprise_tls +def test_wait_for_mongod_parameters(mdb: MongoDB): + # After search CR is deployed, MongoDB controller will pick it up + # and start adding searchCoordinator role and search-related + # parameters to the automation config. + def check_mongod_parameters(): + parameters_are_set = True + pod_parameters = [] + for idx in range(mdb.get_members()): + mongod_config = yaml.safe_load( + KubernetesTester.run_command_in_pod_container( + f"{mdb.name}-{idx}", mdb.namespace, ["cat", "/data/automation-mongod.conf"] + ) + ) + set_parameter = mongod_config.get("setParameter", {}) + parameters_are_set = parameters_are_set and ( + "mongotHost" in set_parameter and "searchIndexManagementHostAndPort" in set_parameter + ) + pod_parameters.append(f"pod {idx} setParameter: {set_parameter}") + + return parameters_are_set, f'Not all pods have mongot parameters set:\n{"\n".join(pod_parameters)}' + + run_periodically(check_mongod_parameters, timeout=200) + +# After picking up MongoDBSearch CR, MongoDB reconciler will add mongod parameters. +# But it will not immediately mark the MongoDB CR as Pending +# spinning @mark.e2e_search_enterprise_tls def test_wait_for_database_resource_ready(mdb: MongoDB): - mdb.assert_abandons_phase(Phase.Running, timeout=300) mdb.assert_reaches_phase(Phase.Running, timeout=300) - for idx in range(mdb.get_members()): - mongod_config = yaml.safe_load( - KubernetesTester.run_command_in_pod_container( - f"{mdb.name}-{idx}", mdb.namespace, ["cat", "/data/automation-mongod.conf"] - ) - ) - setParameter = mongod_config.get("setParameter", {}) - assert ( - "mongotHost" in setParameter and "searchIndexManagementHostAndPort" in setParameter - ), "mongot parameters not found in mongod config" + +@mark.e2e_search_enterprise_tls +def test_validate_tls_connections(mdb: MongoDB, mdbs: MongoDBSearch, namespace: str): + validate_tls_connections(mdb, mdbs, namespace) + + +@mark.e2e_search_enterprise_tls +def test_search_restore_sample_database(mdb: MongoDB): + get_admin_sample_movies_helper(mdb).restore_sample_database() @mark.e2e_search_enterprise_tls -def test_validate_tls_connections(mdb: MongoDB, mdbs: MongoDBSearch, namespace: str, issuer_ca_filepath: str): +def test_search_create_search_index(mdb: MongoDB): + get_user_sample_movies_helper(mdb).create_search_index() + + +@mark.e2e_search_enterprise_tls +def test_search_assert_search_query(mdb: MongoDB): + get_user_sample_movies_helper(mdb).assert_search_query(retry_timeout=60) + + +@mark.e2e_search_enterprise_tls +# This test class verifies if mongodb <8.2 can be upgraded to mongodb >=8.2 +# For mongod <8.2 the operator is automatically creating searchCoordinator customRole. +# We test here that the role exists before upgrade, because +# after mongodb is upgraded, the role should be removed from AC +# From 8.2 searchCoordinator role is a built-in role. +class TestUpgradeMongod: + def test_mongod_version(self, mdb: MongoDB): + # This test is redundant when looking at the context of the full test file, + # as we deploy MDB_VERSION_WITHOUT_BUILT_IN_ROLE initially + # But it makes sense if we take into consideration TestUpgradeMongod test class alone. + # This checks the most important prerequisite for this test class to work. + # We check the version in case the test class is reused in another place + # or executed again when running locally. + mdb.tester(ca_path=get_issuer_ca_filepath(), use_ssl=True).assert_version(MDB_VERSION_WITHOUT_BUILT_IN_ROLE) + + def test_check_polyfilled_role_in_ac(self, mdb: MongoDB): + custom_roles = mdb.get_automation_config_tester().automation_config.get("roles", []) + assert len(custom_roles) > 0 + assert "searchCoordinator" in [role["role"] for role in custom_roles] + + def test_upgrade_to_mongo_8_2(self, mdb: MongoDB): + mdb.set_version(MDB_VERSION_WITH_BUILT_IN_ROLE) + mdb.update() + mdb.assert_reaches_phase(Phase.Running, timeout=600) + + def test_check_polyfilled_role_not_in_ac(self, mdb: MongoDB): + custom_roles = mdb.get_automation_config_tester().automation_config.get("roles", []) + assert len(custom_roles) >= 0 + assert "searchCoordinator" not in [role["role"] for role in custom_roles] + + def test_mongod_version_after_upgrade(self, mdb: MongoDB): + mdb_tester = mdb.tester(ca_path=get_issuer_ca_filepath(), use_ssl=True) + mdb_tester.assert_scram_sha_authentication( + ADMIN_USER_NAME, ADMIN_USER_PASSWORD, "SCRAM-SHA-256", 1, ssl=True, tlsCAFile=get_issuer_ca_filepath() + ) + mdb_tester.assert_version(MDB_VERSION_WITH_BUILT_IN_ROLE) + + def test_search_assert_search_query_after_upgrade(self, mdb: MongoDB): + get_user_sample_movies_helper(mdb).assert_search_query(retry_timeout=60) + + +def get_connection_string(mdb: MongoDB, user_name: str, user_password: str) -> str: + return f"mongodb://{user_name}:{user_password}@{mdb.name}-0.{mdb.name}-svc.{mdb.namespace}.svc.cluster.local:27017/?replicaSet={mdb.name}" + + +def get_admin_sample_movies_helper(mdb): + return movies_search_helper.SampleMoviesSearchHelper( + SearchTester( + get_connection_string(mdb, ADMIN_USER_NAME, ADMIN_USER_PASSWORD), + use_ssl=True, + ca_path=get_issuer_ca_filepath(), + ) + ) + + +def get_user_sample_movies_helper(mdb): + return movies_search_helper.SampleMoviesSearchHelper( + SearchTester( + get_connection_string(mdb, USER_NAME, USER_PASSWORD), use_ssl=True, ca_path=get_issuer_ca_filepath() + ) + ) + + +def validate_tls_connections(mdb: MongoDB, mdbs: MongoDBSearch, namespace: str): with pymongo.MongoClient( f"mongodb://{mdb.name}-0.{mdb.name}-svc.{namespace}.svc.cluster.local:27017/?replicaSet={mdb.name}", tls=True, - tlsCAFile=issuer_ca_filepath, + tlsCAFile=get_issuer_ca_filepath(), tlsAllowInvalidHostnames=False, serverSelectionTimeoutMS=30000, connectTimeoutMS=20000, @@ -200,40 +318,10 @@ def test_validate_tls_connections(mdb: MongoDB, mdbs: MongoDBSearch, namespace: with pymongo.MongoClient( f"mongodb://{mdbs.name}-search-svc.{namespace}.svc.cluster.local:27027", tls=True, - tlsCAFile=issuer_ca_filepath, + tlsCAFile=get_issuer_ca_filepath(), tlsAllowInvalidHostnames=False, serverSelectionTimeoutMS=10000, connectTimeoutMS=10000, ) as search_client: search_info = search_client.admin.command("hello") assert search_info.get("ok") == 1, "MongoDBSearch connection failed" - - -@mark.e2e_search_enterprise_tls -def test_search_restore_sample_database(mdb: MongoDB, issuer_ca_filepath: str): - sample_movies_helper = movies_search_helper.SampleMoviesSearchHelper( - SearchTester( - get_connection_string(mdb, ADMIN_USER_NAME, ADMIN_USER_PASSWORD), use_ssl=True, ca_path=issuer_ca_filepath - ) - ) - sample_movies_helper.restore_sample_database() - - -@mark.e2e_search_enterprise_tls -def test_search_create_search_index(mdb: MongoDB, issuer_ca_filepath: str): - sample_movies_helper = movies_search_helper.SampleMoviesSearchHelper( - SearchTester(get_connection_string(mdb, USER_NAME, USER_PASSWORD), use_ssl=True, ca_path=issuer_ca_filepath) - ) - sample_movies_helper.create_search_index() - - -@mark.e2e_search_enterprise_tls -def test_search_assert_search_query(mdb: MongoDB, issuer_ca_filepath: str): - sample_movies_helper = movies_search_helper.SampleMoviesSearchHelper( - SearchTester(get_connection_string(mdb, USER_NAME, USER_PASSWORD), use_ssl=True, ca_path=issuer_ca_filepath) - ) - sample_movies_helper.assert_search_query(retry_timeout=60) - - -def get_connection_string(mdb: MongoDB, user_name: str, user_password: str) -> str: - return f"mongodb://{user_name}:{user_password}@{mdb.name}-0.{mdb.name}-svc.{mdb.namespace}.svc.cluster.local:27017/?replicaSet={mdb.name}" diff --git a/scripts/dev/contexts/variables/om80 b/scripts/dev/contexts/variables/om80 index f0b677d90..06f10fe7a 100644 --- a/scripts/dev/contexts/variables/om80 +++ b/scripts/dev/contexts/variables/om80 @@ -19,3 +19,5 @@ export AGENT_IMAGE="${MDB_AGENT_IMAGE_REPOSITORY}:${AGENT_VERSION}" export CUSTOM_APPDB_VERSION=8.0.6-ent export TEST_MODE=opsmanager export OPS_MANAGER_REGISTRY="${BASE_REPO_URL}" + +export ops_manager_version="${CUSTOM_OM_VERSION}" diff --git a/scripts/funcs/kubernetes b/scripts/funcs/kubernetes index 11250422d..247da29b9 100644 --- a/scripts/funcs/kubernetes +++ b/scripts/funcs/kubernetes @@ -98,7 +98,7 @@ create_image_registries_secret() { context=$1 namespace=$2 secret_name=$3 - + # Detect the correct config file path based on container runtime local config_file local temp_config_file="" @@ -106,7 +106,7 @@ create_image_registries_secret() { # For Podman, use root's auth.json since minikube uses sudo podman config_file="/root/.config/containers/auth.json" echo "Using Podman config: ${config_file}" - + # Create a temporary copy that the current user can read temp_config_file=$(mktemp) sudo cp "${config_file}" "${temp_config_file}" @@ -117,7 +117,7 @@ create_image_registries_secret() { config_file="${HOME}/.docker/config.json" echo "Using Docker config: ${config_file}" fi - + # shellcheck disable=SC2154 if kubectl --context "${context}" get namespace "${namespace}"; then kubectl --context "${context}" -n "${namespace}" delete secret "${secret_name}" --ignore-not-found @@ -127,7 +127,7 @@ create_image_registries_secret() { else echo "Skipping creating pull secret in ${context}/${namespace}. The namespace doesn't exist yet." fi - + # Clean up temporary file if [[ -n "${temp_config_file}" ]] && [[ -f "${temp_config_file}" ]]; then rm -f "${temp_config_file}" @@ -255,7 +255,6 @@ run_script_with_wrapped_kubectl() { cat > "${wrapper_script}" << EOF #!/bin/bash # Define kubectl function to include the context -set -x kubectl() { command kubectl --context "${context}" "\$@" }