From 97883a06cfefa54d178b0e5e8dc84a896ac15531 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Thu, 14 Aug 2025 08:31:05 +0000 Subject: [PATCH 01/27] chore(deps): bump the all-gomod group with 3 updates Bumps the all-gomod group with 3 updates: [k8s.io/apimachinery](https://github.com/kubernetes/apimachinery), [k8s.io/cli-runtime](https://github.com/kubernetes/cli-runtime) and [k8s.io/client-go](https://github.com/kubernetes/client-go). Updates `k8s.io/apimachinery` from 0.33.3 to 0.33.4 - [Commits](https://github.com/kubernetes/apimachinery/compare/v0.33.3...v0.33.4) Updates `k8s.io/cli-runtime` from 0.33.3 to 0.33.4 - [Commits](https://github.com/kubernetes/cli-runtime/compare/v0.33.3...v0.33.4) Updates `k8s.io/client-go` from 0.33.3 to 0.33.4 - [Changelog](https://github.com/kubernetes/client-go/blob/master/CHANGELOG.md) - [Commits](https://github.com/kubernetes/client-go/compare/v0.33.3...v0.33.4) --- updated-dependencies: - dependency-name: k8s.io/apimachinery dependency-version: 0.33.4 dependency-type: direct:production update-type: version-update:semver-patch dependency-group: all-gomod - dependency-name: k8s.io/cli-runtime dependency-version: 0.33.4 dependency-type: direct:production update-type: version-update:semver-patch dependency-group: all-gomod - dependency-name: k8s.io/client-go dependency-version: 0.33.4 dependency-type: direct:production update-type: version-update:semver-patch dependency-group: all-gomod ... Signed-off-by: dependabot[bot] --- go.mod | 8 ++++---- go.sum | 16 ++++++++-------- 2 files changed, 12 insertions(+), 12 deletions(-) diff --git a/go.mod b/go.mod index 4ac04f3..c371660 100644 --- a/go.mod +++ b/go.mod @@ -22,9 +22,9 @@ require ( go.opentelemetry.io/otel/sdk v1.37.0 go.opentelemetry.io/otel/trace v1.37.0 helm.sh/helm/v3 v3.18.5 - k8s.io/apimachinery v0.33.3 - k8s.io/cli-runtime v0.33.3 - k8s.io/client-go v0.33.3 + k8s.io/apimachinery v0.33.4 + k8s.io/cli-runtime v0.33.4 + k8s.io/client-go v0.33.4 ) require ( @@ -153,7 +153,7 @@ require ( gopkg.in/evanphx/json-patch.v4 v4.12.0 // indirect gopkg.in/inf.v0 v0.9.1 // indirect gopkg.in/yaml.v3 v3.0.1 // indirect - k8s.io/api v0.33.3 // indirect + k8s.io/api v0.33.4 // indirect k8s.io/apiextensions-apiserver v0.33.3 // indirect k8s.io/apiserver v0.33.3 // indirect k8s.io/component-base v0.33.3 // indirect diff --git a/go.sum b/go.sum index e24b2b5..8e331ce 100644 --- a/go.sum +++ b/go.sum @@ -521,18 +521,18 @@ gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= helm.sh/helm/v3 v3.18.5 h1:Cc3Z5vd6kDrZq9wO9KxKLNEickiTho6/H/dBNRVSos4= helm.sh/helm/v3 v3.18.5/go.mod h1:L/dXDR2r539oPlFP1PJqKAC1CUgqHJDLkxKpDGrWnyg= -k8s.io/api v0.33.3 h1:SRd5t//hhkI1buzxb288fy2xvjubstenEKL9K51KBI8= -k8s.io/api v0.33.3/go.mod h1:01Y/iLUjNBM3TAvypct7DIj0M0NIZc+PzAHCIo0CYGE= +k8s.io/api v0.33.4 h1:oTzrFVNPXBjMu0IlpA2eDDIU49jsuEorGHB4cvKupkk= +k8s.io/api v0.33.4/go.mod h1:VHQZ4cuxQ9sCUMESJV5+Fe8bGnqAARZ08tSTdHWfeAc= k8s.io/apiextensions-apiserver v0.33.3 h1:qmOcAHN6DjfD0v9kxL5udB27SRP6SG/MTopmge3MwEs= k8s.io/apiextensions-apiserver v0.33.3/go.mod h1:oROuctgo27mUsyp9+Obahos6CWcMISSAPzQ77CAQGz8= -k8s.io/apimachinery v0.33.3 h1:4ZSrmNa0c/ZpZJhAgRdcsFcZOw1PQU1bALVQ0B3I5LA= -k8s.io/apimachinery v0.33.3/go.mod h1:BHW0YOu7n22fFv/JkYOEfkUYNRN0fj0BlvMFWA7b+SM= +k8s.io/apimachinery v0.33.4 h1:SOf/JW33TP0eppJMkIgQ+L6atlDiP/090oaX0y9pd9s= +k8s.io/apimachinery v0.33.4/go.mod h1:BHW0YOu7n22fFv/JkYOEfkUYNRN0fj0BlvMFWA7b+SM= k8s.io/apiserver v0.33.3 h1:Wv0hGc+QFdMJB4ZSiHrCgN3zL3QRatu56+rpccKC3J4= k8s.io/apiserver v0.33.3/go.mod h1:05632ifFEe6TxwjdAIrwINHWE2hLwyADFk5mBsQa15E= -k8s.io/cli-runtime v0.33.3 h1:Dgy4vPjNIu8LMJBSvs8W0LcdV0PX/8aGG1DA1W8lklA= -k8s.io/cli-runtime v0.33.3/go.mod h1:yklhLklD4vLS8HNGgC9wGiuHWze4g7x6XQZ+8edsKEo= -k8s.io/client-go v0.33.3 h1:M5AfDnKfYmVJif92ngN532gFqakcGi6RvaOF16efrpA= -k8s.io/client-go v0.33.3/go.mod h1:luqKBQggEf3shbxHY4uVENAxrDISLOarxpTKMiUuujg= +k8s.io/cli-runtime v0.33.4 h1:V8NSxGfh24XzZVhXmIGzsApdBpGq0RQS2u/Fz1GvJwk= +k8s.io/cli-runtime v0.33.4/go.mod h1:V+ilyokfqjT5OI+XE+O515K7jihtr0/uncwoyVqXaIU= +k8s.io/client-go v0.33.4 h1:TNH+CSu8EmXfitntjUPwaKVPN0AYMbc9F1bBS8/ABpw= +k8s.io/client-go v0.33.4/go.mod h1:LsA0+hBG2DPwovjd931L/AoaezMPX9CmBgyVyBZmbCY= k8s.io/component-base v0.33.3 h1:mlAuyJqyPlKZM7FyaoM/LcunZaaY353RXiOd2+B5tGA= k8s.io/component-base v0.33.3/go.mod h1:ktBVsBzkI3imDuxYXmVxZ2zxJnYTZ4HAsVj9iF09qp4= k8s.io/klog/v2 v2.130.1 h1:n9Xl7H1Xvksem4KFG4PYbdQCQxqc/tTUyrgXaOhHSzk= From dd8df163bd2433424b645eda2af84224c3e0d2f1 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Thu, 14 Aug 2025 09:04:11 +0000 Subject: [PATCH 02/27] chore(deps): bump golang in the all-gomod group Bumps the all-gomod group with 1 update: golang. Updates `golang` from 1.24-alpine to 1.25-alpine --- updated-dependencies: - dependency-name: golang dependency-version: 1.25-alpine dependency-type: direct:production dependency-group: all-gomod ... Signed-off-by: dependabot[bot] --- Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Dockerfile b/Dockerfile index eb06608..3736c07 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,6 +1,6 @@ # Linux Dockerfile for aks-mcp # Build stage -FROM golang:1.24-alpine AS builder +FROM golang:1.25-alpine AS builder ARG TARGETOS=linux ARG TARGETARCH ARG VERSION From 2222f200795ea998248a935cad6d0c5d66fd9862 Mon Sep 17 00:00:00 2001 From: Marcos Blount Date: Thu, 14 Aug 2025 15:21:21 -0400 Subject: [PATCH 03/27] makefile/docs: enforce bash for multi-line targets and document in README --- Makefile | 5 +++++ README.md | 9 +++++++++ 2 files changed, 14 insertions(+) diff --git a/Makefile b/Makefile index 5c52ca8..6acfcbe 100644 --- a/Makefile +++ b/Makefile @@ -1,6 +1,10 @@ # AKS-MCP Makefile # Provides convenience targets for development, building, testing, and releasing +# Run multi-line recipes in bash and fail on any command error +SHELL := /usr/bin/env bash +SHELLFLAGS := -eo pipefail -c + # Variables BINARY_NAME = aks-mcp MAIN_PATH = ./cmd/aks-mcp @@ -183,6 +187,7 @@ version: ## Show version information .PHONY: info info: ## Show build information + @echo "Shell: $(SHELL) $(SHELLFLAGS)" @echo "Binary Name: $(BINARY_NAME)" @echo "Main Path: $(MAIN_PATH)" @echo "Docker Image: $(DOCKER_IMAGE):$(DOCKER_TAG)" diff --git a/README.md b/README.md index 59f5c32..de8676e 100644 --- a/README.md +++ b/README.md @@ -525,6 +525,15 @@ Usage of ./aks-mcp: ## Development +### Prerequisites + +- **Go** ≥ `1.24.x` installed on your local machine +- **Bash** available as `/usr/bin/env bash` (Makefile targets use multi-line recipes with fail-fast mode) +- **GNU Make** `4.x` or later +- **Docker** *(optional, for container builds and testing)* + +> **Note:** If your login shell is different (e.g., `zsh` on **macOS**), you do **not** need to change it — the Makefile sets variables to run all recipes in `bash` for consistent behavior across platforms. + ### Building from Source This project includes a Makefile for convenient development, building, and testing. To see all available targets: From 1e5e87995ef7b17af4c9082ca05972f086077b5d Mon Sep 17 00:00:00 2001 From: Pengfei Ni Date: Fri, 15 Aug 2025 10:14:32 +0800 Subject: [PATCH 04/27] feat: add cluster health check prompt Add new health.go prompt module with check_cluster_health prompt for comprehensive AKS cluster health analysis including nodes, pods, services, and resource utilization. --- internal/prompts/health.go | 90 ++++++++++++++++++++++++++++++++++++++ internal/server/server.go | 5 ++- 2 files changed, 94 insertions(+), 1 deletion(-) create mode 100644 internal/prompts/health.go diff --git a/internal/prompts/health.go b/internal/prompts/health.go new file mode 100644 index 0000000..6a6a437 --- /dev/null +++ b/internal/prompts/health.go @@ -0,0 +1,90 @@ +package prompts + +import ( + "context" + + "github.com/Azure/aks-mcp/internal/config" + "github.com/mark3labs/mcp-go/mcp" + "github.com/mark3labs/mcp-go/server" +) + +// RegisterHealthPrompts registers comprehensive AKS cluster health assessment prompts. +func RegisterHealthPrompts(s *server.MCPServer, cfg *config.ConfigData) { + // Prompt: check_cluster_health + s.AddPrompt(mcp.NewPrompt("check_cluster_health", + mcp.WithPromptDescription("Comprehensive AKS cluster health assessment including platform health, diagnostics, cluster detectors, node health, and connectivity analysis"), + ), func(ctx context.Context, request mcp.GetPromptRequest) (*mcp.GetPromptResult, error) { + promptContent := `# Comprehensive AKS Cluster Health Assessment + +This guide performs a thorough health evaluation of your AKS cluster across multiple dimensions: cluster metadata, platform health, diagnostics configuration, detector analysis, and provides actionable recommendations. + +## Steps + +### 1. Retrieve Control Plane FQDN +Invoke kubectl_cluster tool: +{ + "operation": "cluster-info", + "resource": "", + "args": "" +} +Extract the Kubernetes control plane endpoint URL (FQDN) for cluster identification. + +### 2. Identify AKS Cluster Metadata +Invoke az_aks_operations tool: +{ + "operation": "list", + "args": "--query \"[].{id:id, fqdn:fqdn, resourceGroup:resourceGroup, name:name}\" -o json" +} +Match the control plane FQDN from Step 1 with the cluster list to determine subscription ID, resource group, and cluster name. Extract the full AKS resource ID for subsequent steps. + +### 3. Check Azure Resource Health Status +Invoke az_monitoring tool: +{ + "operation": "resource_health", + "subscription_id": "", + "resource_group": "", + "cluster_name": "", + "parameters": "{\"start_time\":\"\",\"end_time\":\"\"}" +} +Analyze: Identify any Azure platform incidents, service health issues, or resource degradation events that may impact cluster availability. + +### 4. Run Cluster and Control Plane Availability Detectors +Invoke run_detectors_by_category tool: +{ + "cluster_resource_id": "", + "category": "Cluster and Control Plane Availability and Performance", + "start_time": "", + "end_time": "" +} +Analyze: Review API server responsiveness, control plane scaling issues, etcd health, and cluster networking performance problems. + +### 5. Run Node Health Detectors +Invoke run_detectors_by_category tool: +{ + "cluster_resource_id": "", + "category": "Node Health", + "start_time": "", + "end_time": "" +} +Analyze: Examine node readiness issues, kubelet problems, container runtime health, disk pressure, memory pressure, and node pool scaling issues. + +### 6. Run Connectivity Issue Detectors +Invoke run_detectors_by_category tool: +{ + "cluster_resource_id": "", + "category": "Connectivity Issues", + "start_time": "", + "end_time": "" +} +Analyze: Investigate DNS resolution problems, network policy conflicts, ingress/egress connectivity, load balancer issues, and service mesh problems. + +### 7. Generate Comprehensive Health Report + +Generate a comprehensive health report and recommendations based on the findings from the previous steps. + +Provide specific commands, configurations, or Azure portal links where applicable for implementing recommendations. +` + return &mcp.GetPromptResult{Description: "Comprehensive AKS cluster health assessment including platform health, diagnostics, availability detectors, node health, and connectivity analysis", Messages: []mcp.PromptMessage{{Role: mcp.RoleAssistant, Content: mcp.TextContent{Type: "text", Text: promptContent}}}}, nil + }) + +} diff --git a/internal/server/server.go b/internal/server/server.go index d69fbc3..67844c9 100644 --- a/internal/server/server.go +++ b/internal/server/server.go @@ -96,8 +96,11 @@ func (s *Service) registerAllComponents() { func (s *Service) registerPrompts() { log.Println("Registering Prompts...") - log.Println("Registering prompt: query_aks_cluster_metadata_from_kubeconfig") + log.Println("Registering config prompts (query_aks_cluster_metadata_from_kubeconfig)") prompts.RegisterQueryAKSMetadataFromKubeconfigPrompt(s.mcpServer, s.cfg) + + log.Println("Registering health prompts (check_cluster_health)") + prompts.RegisterHealthPrompts(s.mcpServer, s.cfg) } // Run starts the service with the specified transport From 704582539c405f66388c2a9c85f02244405a3976 Mon Sep 17 00:00:00 2001 From: Pengfei Ni Date: Fri, 15 Aug 2025 10:49:09 +0800 Subject: [PATCH 05/27] chore: add a help flag MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Without this, an error of "pflag: help requested" would be reported. │ $ ./aks-mcp --help │ │ Usage of ./aks-mcp: │ │ ... │ │ pflag: help requested --- internal/config/config.go | 21 ++++++++++++++++++++- 1 file changed, 20 insertions(+), 1 deletion(-) diff --git a/internal/config/config.go b/internal/config/config.go index c22eb88..075d3e6 100644 --- a/internal/config/config.go +++ b/internal/config/config.go @@ -2,7 +2,9 @@ package config import ( "context" + "fmt" "log" + "os" "strings" "time" @@ -78,7 +80,24 @@ func (cfg *ConfigData) ParseFlags() { // OTLP settings flag.StringVar(&cfg.OTLPEndpoint, "otlp-endpoint", "", "OTLP endpoint for OpenTelemetry traces (e.g. localhost:4317)") - flag.Parse() + // Custom help handling. + var showHelp bool + flag.BoolVarP(&showHelp, "help", "h", false, "Show help message") + + // Parse flags and handle errors properly + err := flag.CommandLine.Parse(os.Args[1:]) + if err != nil { + fmt.Printf("\nUsage of %s:\n", os.Args[0]) + flag.PrintDefaults() + os.Exit(1) + } + + // Handle help manually with proper exit code + if showHelp { + fmt.Printf("Usage of %s:\n", os.Args[0]) + flag.PrintDefaults() + os.Exit(0) + } // Update security config cfg.SecurityConfig.AccessLevel = cfg.AccessLevel From 53f5035b488ca4eb9653b72480418a8c1572f39d Mon Sep 17 00:00:00 2001 From: Guoxun Wei Date: Fri, 15 Aug 2025 09:55:50 +0800 Subject: [PATCH 06/27] fix inconsistent version sources --- .slsa-goreleaser/darwin-amd64.yml | 7 +++++++ .slsa-goreleaser/darwin-arm64.yml | 7 +++++++ .slsa-goreleaser/linux-amd64.yml | 7 +++++++ .slsa-goreleaser/linux-arm64.yml | 7 +++++++ .slsa-goreleaser/windows-amd64.yml | 7 +++++++ .slsa-goreleaser/windows-arm64.yml | 7 +++++++ Dockerfile | 11 +++++++++-- Makefile | 2 ++ internal/config/config.go | 22 +++++++++++++++++++++- internal/version/version.go | 2 +- 10 files changed, 75 insertions(+), 4 deletions(-) diff --git a/.slsa-goreleaser/darwin-amd64.yml b/.slsa-goreleaser/darwin-amd64.yml index 1f4555f..a60fd74 100644 --- a/.slsa-goreleaser/darwin-amd64.yml +++ b/.slsa-goreleaser/darwin-amd64.yml @@ -20,6 +20,13 @@ flags: - -trimpath - -tags=withoutebpf # need by inspektor-gadget +# (Optional) ldflags for version injection +ldflags: + - "-X github.com/Azure/aks-mcp/internal/version.GitVersion={{ .Env.VERSION }}" + - "-X github.com/Azure/aks-mcp/internal/version.GitCommit={{ .Env.COMMIT }}" + - "-X github.com/Azure/aks-mcp/internal/version.GitTreeState={{ .Env.TREE_STATE }}" + - "-X github.com/Azure/aks-mcp/internal/version.BuildMetadata={{ .Env.COMMIT_DATE }}" + # (Optional) Working directory. (default: root of the project) # dir: ./relative/path/to/dir diff --git a/.slsa-goreleaser/darwin-arm64.yml b/.slsa-goreleaser/darwin-arm64.yml index 4c3b0d9..cb655d8 100644 --- a/.slsa-goreleaser/darwin-arm64.yml +++ b/.slsa-goreleaser/darwin-arm64.yml @@ -20,6 +20,13 @@ flags: - -trimpath - -tags=withoutebpf # need by inspektor-gadget +# (Optional) ldflags for version injection +ldflags: + - "-X github.com/Azure/aks-mcp/internal/version.GitVersion={{ .Env.VERSION }}" + - "-X github.com/Azure/aks-mcp/internal/version.GitCommit={{ .Env.COMMIT }}" + - "-X github.com/Azure/aks-mcp/internal/version.GitTreeState={{ .Env.TREE_STATE }}" + - "-X github.com/Azure/aks-mcp/internal/version.BuildMetadata={{ .Env.COMMIT_DATE }}" + # (Optional) Working directory. (default: root of the project) # dir: ./relative/path/to/dir diff --git a/.slsa-goreleaser/linux-amd64.yml b/.slsa-goreleaser/linux-amd64.yml index 7006d4b..2169bb5 100644 --- a/.slsa-goreleaser/linux-amd64.yml +++ b/.slsa-goreleaser/linux-amd64.yml @@ -20,6 +20,13 @@ flags: - -trimpath - -tags=withoutebpf # need by inspektor-gadget +# (Optional) ldflags for version injection +ldflags: + - "-X github.com/Azure/aks-mcp/internal/version.GitVersion={{ .Env.VERSION }}" + - "-X github.com/Azure/aks-mcp/internal/version.GitCommit={{ .Env.COMMIT }}" + - "-X github.com/Azure/aks-mcp/internal/version.GitTreeState={{ .Env.TREE_STATE }}" + - "-X github.com/Azure/aks-mcp/internal/version.BuildMetadata={{ .Env.COMMIT_DATE }}" + # (Optional) Working directory. (default: root of the project) # dir: ./relative/path/to/dir diff --git a/.slsa-goreleaser/linux-arm64.yml b/.slsa-goreleaser/linux-arm64.yml index 95bb339..af3871e 100644 --- a/.slsa-goreleaser/linux-arm64.yml +++ b/.slsa-goreleaser/linux-arm64.yml @@ -20,6 +20,13 @@ flags: - -trimpath - -tags=withoutebpf # need by inspektor-gadget +# (Optional) ldflags for version injection +ldflags: + - "-X github.com/Azure/aks-mcp/internal/version.GitVersion={{ .Env.VERSION }}" + - "-X github.com/Azure/aks-mcp/internal/version.GitCommit={{ .Env.COMMIT }}" + - "-X github.com/Azure/aks-mcp/internal/version.GitTreeState={{ .Env.TREE_STATE }}" + - "-X github.com/Azure/aks-mcp/internal/version.BuildMetadata={{ .Env.COMMIT_DATE }}" + # (Optional) Working directory. (default: root of the project) # dir: ./relative/path/to/dir diff --git a/.slsa-goreleaser/windows-amd64.yml b/.slsa-goreleaser/windows-amd64.yml index 6bfc47a..58ed7d2 100644 --- a/.slsa-goreleaser/windows-amd64.yml +++ b/.slsa-goreleaser/windows-amd64.yml @@ -19,6 +19,13 @@ flags: - -trimpath - -tags=withoutebpf # need by inspektor-gadget +# (Optional) ldflags for version injection +ldflags: + - "-X github.com/Azure/aks-mcp/internal/version.GitVersion={{ .Env.VERSION }}" + - "-X github.com/Azure/aks-mcp/internal/version.GitCommit={{ .Env.COMMIT }}" + - "-X github.com/Azure/aks-mcp/internal/version.GitTreeState={{ .Env.TREE_STATE }}" + - "-X github.com/Azure/aks-mcp/internal/version.BuildMetadata={{ .Env.COMMIT_DATE }}" + # (Optional) Working directory. (default: root of the project) # dir: ./relative/path/to/dir diff --git a/.slsa-goreleaser/windows-arm64.yml b/.slsa-goreleaser/windows-arm64.yml index 29d64f2..3868d9a 100644 --- a/.slsa-goreleaser/windows-arm64.yml +++ b/.slsa-goreleaser/windows-arm64.yml @@ -19,6 +19,13 @@ flags: - -trimpath - -tags=withoutebpf # need by inspektor-gadget +# (Optional) ldflags for version injection +ldflags: + - "-X github.com/Azure/aks-mcp/internal/version.GitVersion={{ .Env.VERSION }}" + - "-X github.com/Azure/aks-mcp/internal/version.GitCommit={{ .Env.COMMIT }}" + - "-X github.com/Azure/aks-mcp/internal/version.GitTreeState={{ .Env.TREE_STATE }}" + - "-X github.com/Azure/aks-mcp/internal/version.BuildMetadata={{ .Env.COMMIT_DATE }}" + # (Optional) Working directory. (default: root of the project) # dir: ./relative/path/to/dir diff --git a/Dockerfile b/Dockerfile index 3736c07..05264f0 100644 --- a/Dockerfile +++ b/Dockerfile @@ -20,8 +20,15 @@ RUN go mod download # Copy source code COPY . . -# Build the application for target platform -RUN CGO_ENABLED=0 GOOS=${TARGETOS} GOARCH=${TARGETARCH} go build -o aks-mcp ./cmd/aks-mcp +# Build the application for target platform with version injection +RUN CGO_ENABLED=0 GOOS=${TARGETOS} GOARCH=${TARGETARCH} go build \ + -trimpath \ + -tags withoutebpf \ + -ldflags "-X github.com/Azure/aks-mcp/internal/version.GitVersion=${VERSION} \ + -X github.com/Azure/aks-mcp/internal/version.GitCommit=${GIT_COMMIT} \ + -X github.com/Azure/aks-mcp/internal/version.GitTreeState=${GIT_TREE_STATE} \ + -X github.com/Azure/aks-mcp/internal/version.BuildMetadata=${BUILD_DATE}" \ + -o aks-mcp ./cmd/aks-mcp # Runtime stage FROM alpine:3.22 diff --git a/Makefile b/Makefile index 6acfcbe..743e60a 100644 --- a/Makefile +++ b/Makefile @@ -12,6 +12,8 @@ DOCKER_IMAGE = aks-mcp DOCKER_TAG ?= latest # Version information +# Version is automatically derived from git tags using 'git describe --tags --always --dirty' +# This provides proper semantic versioning based on GitHub releases and tags VERSION ?= $(shell git describe --tags --always --dirty 2>/dev/null || echo "dev") GIT_COMMIT ?= $(shell git rev-parse HEAD 2>/dev/null || echo "unknown") GIT_TREE_STATE ?= $(shell if git diff --quiet 2>/dev/null; then echo "clean"; else echo "dirty"; fi) diff --git a/internal/config/config.go b/internal/config/config.go index 075d3e6..967fd21 100644 --- a/internal/config/config.go +++ b/internal/config/config.go @@ -10,6 +10,7 @@ import ( "github.com/Azure/aks-mcp/internal/security" "github.com/Azure/aks-mcp/internal/telemetry" + "github.com/Azure/aks-mcp/internal/version" flag "github.com/spf13/pflag" ) @@ -80,10 +81,13 @@ func (cfg *ConfigData) ParseFlags() { // OTLP settings flag.StringVar(&cfg.OTLPEndpoint, "otlp-endpoint", "", "OTLP endpoint for OpenTelemetry traces (e.g. localhost:4317)") - // Custom help handling. + // Custom help handling var showHelp bool flag.BoolVarP(&showHelp, "help", "h", false, "Show help message") + // Version flag + showVersion := flag.Bool("version", false, "Show version information and exit") + // Parse flags and handle errors properly err := flag.CommandLine.Parse(os.Args[1:]) if err != nil { @@ -99,6 +103,12 @@ func (cfg *ConfigData) ParseFlags() { os.Exit(0) } + // Handle version flag + if *showVersion { + cfg.PrintVersion() + os.Exit(0) + } + // Update security config cfg.SecurityConfig.AccessLevel = cfg.AccessLevel cfg.SecurityConfig.AllowedNamespaces = cfg.AllowNamespaces @@ -132,3 +142,13 @@ func (cfg *ConfigData) InitializeTelemetry(ctx context.Context, serviceName, ser // Track MCP server startup cfg.TelemetryService.TrackServiceStartup(ctx) } + +// PrintVersion prints version information +func (cfg *ConfigData) PrintVersion() { + versionInfo := version.GetVersionInfo() + fmt.Printf("aks-mcp version %s\n", versionInfo["version"]) + fmt.Printf("Git commit: %s\n", versionInfo["gitCommit"]) + fmt.Printf("Git tree state: %s\n", versionInfo["gitTreeState"]) + fmt.Printf("Go version: %s\n", versionInfo["goVersion"]) + fmt.Printf("Platform: %s\n", versionInfo["platform"]) +} diff --git a/internal/version/version.go b/internal/version/version.go index 2768557..746e805 100644 --- a/internal/version/version.go +++ b/internal/version/version.go @@ -8,7 +8,7 @@ import ( // Version information var ( // GitVersion is the git tag version - GitVersion = "1" + GitVersion = "dev" // BuildMetadata is extra build time data BuildMetadata = "" // GitCommit is the git sha1 From 9bc849bd7443b4edfcc0d2047be69507b20c5b7d Mon Sep 17 00:00:00 2001 From: Pengfei Ni Date: Fri, 15 Aug 2025 19:13:11 +0800 Subject: [PATCH 07/27] feat: add start and stop operations for AKS clusters Added start and stop cluster operations to the azaks component, allowing users to start stopped clusters and stop running clusters. --- README.md | 2 ++ internal/components/azaks/registry.go | 19 ++++++++++++------- internal/components/azaks/registry_test.go | 8 +++++++- 3 files changed, 21 insertions(+), 8 deletions(-) diff --git a/README.md b/README.md index de8676e..e3d4be2 100644 --- a/README.md +++ b/README.md @@ -62,6 +62,8 @@ Unified tool for managing Azure Kubernetes Service (AKS) clusters and related op - `create`: Create new cluster - `delete`: Delete cluster - `scale`: Scale cluster node count + - `start`: Start a stopped cluster + - `stop`: Stop a running cluster - `update`: Update cluster configuration - `upgrade`: Upgrade Kubernetes version - `nodepool-add`: Add node pool to cluster diff --git a/internal/components/azaks/registry.go b/internal/components/azaks/registry.go index 8b8be26..3bb37e9 100644 --- a/internal/components/azaks/registry.go +++ b/internal/components/azaks/registry.go @@ -18,6 +18,8 @@ const ( OpClusterCreate AksOperationType = "create" OpClusterDelete AksOperationType = "delete" OpClusterScale AksOperationType = "scale" + OpClusterStart AksOperationType = "start" + OpClusterStop AksOperationType = "stop" OpClusterUpdate AksOperationType = "update" OpClusterUpgrade AksOperationType = "upgrade" OpClusterGetVersions AksOperationType = "get-versions" @@ -51,7 +53,7 @@ func generateToolDescription(accessLevel string) string { // Add read-write operations for readwrite and admin if accessLevel == "readwrite" || accessLevel == "admin" { - clusterOps = append(clusterOps, "create", "delete", "scale", "update", "upgrade") + clusterOps = append(clusterOps, "create", "delete", "scale", "update", "upgrade", "start", "stop") nodepoolOps = append(nodepoolOps, "nodepool-add", "nodepool-delete", "nodepool-scale", "nodepool-upgrade") accountOps = append(accountOps, "account-set", "login") } @@ -122,9 +124,10 @@ func GetOperationAccessLevel(operation string) string { readWriteOps := []string{ string(OpClusterCreate), string(OpClusterDelete), string(OpClusterScale), - string(OpClusterUpdate), string(OpClusterUpgrade), string(OpNodepoolAdd), - string(OpNodepoolDelete), string(OpNodepoolScale), string(OpNodepoolUpgrade), - string(OpAccountSet), string(OpLogin), + string(OpClusterUpdate), string(OpClusterUpgrade), string(OpClusterStart), + string(OpClusterStop), string(OpNodepoolAdd), string(OpNodepoolDelete), + string(OpNodepoolScale), string(OpNodepoolUpgrade), string(OpAccountSet), + string(OpLogin), } adminOps := []string{ @@ -177,6 +180,8 @@ func MapOperationToCommand(operation string) (string, error) { string(OpClusterCreate): "az aks create", string(OpClusterDelete): "az aks delete", string(OpClusterScale): "az aks scale", + string(OpClusterStart): "az aks start", + string(OpClusterStop): "az aks stop", string(OpClusterUpdate): "az aks update", string(OpClusterUpgrade): "az aks upgrade", string(OpClusterGetVersions): "az aks get-versions", @@ -210,9 +215,9 @@ func GetSupportedOperations() []string { return []string{ // Cluster operations string(OpClusterShow), string(OpClusterList), string(OpClusterCreate), - string(OpClusterDelete), string(OpClusterScale), string(OpClusterUpdate), - string(OpClusterUpgrade), string(OpClusterGetVersions), string(OpClusterCheckNetwork), - string(OpClusterGetCredentials), + string(OpClusterDelete), string(OpClusterScale), string(OpClusterStart), + string(OpClusterStop), string(OpClusterUpdate), string(OpClusterUpgrade), + string(OpClusterGetVersions), string(OpClusterCheckNetwork), string(OpClusterGetCredentials), // Nodepool operations string(OpNodepoolList), string(OpNodepoolShow), string(OpNodepoolAdd), string(OpNodepoolDelete), string(OpNodepoolScale), string(OpNodepoolUpgrade), diff --git a/internal/components/azaks/registry_test.go b/internal/components/azaks/registry_test.go index 8e6a782..3e55b4b 100644 --- a/internal/components/azaks/registry_test.go +++ b/internal/components/azaks/registry_test.go @@ -25,7 +25,7 @@ func TestGetSupportedOperations_ContainsExpectedOps(t *testing.T) { operations := GetSupportedOperations() expectedOps := []string{ - "show", "list", "create", "delete", "scale", "update", "upgrade", + "show", "list", "create", "delete", "scale", "start", "stop", "update", "upgrade", "nodepool-list", "nodepool-show", "nodepool-add", "nodepool-delete", "account-list", "account-set", "login", "get-credentials", } @@ -56,6 +56,12 @@ func TestValidateOperationAccess_ChecksAccessLevels(t *testing.T) { {"create", "readonly", false}, {"create", "readwrite", true}, {"create", "admin", true}, + {"start", "readonly", false}, + {"start", "readwrite", true}, + {"start", "admin", true}, + {"stop", "readonly", false}, + {"stop", "readwrite", true}, + {"stop", "admin", true}, {"get-credentials", "readonly", false}, {"get-credentials", "readwrite", false}, {"get-credentials", "admin", true}, From 4e14a2bbe9ec65e418772e9d6113302b45ec150e Mon Sep 17 00:00:00 2001 From: Paul Yu Date: Sat, 16 Aug 2025 19:33:41 -0700 Subject: [PATCH 08/27] feat: implement azcli login functionality with multiple authentication methods --- internal/azcli/login.go | 163 +++++++++++++++++ internal/azcli/login_test.go | 320 +++++++++++++++++++++++++++++++++ internal/command/command.go | 2 +- internal/server/server.go | 47 ++++- internal/server/server_test.go | 63 ++++--- 5 files changed, 563 insertions(+), 32 deletions(-) create mode 100644 internal/azcli/login.go create mode 100644 internal/azcli/login_test.go diff --git a/internal/azcli/login.go b/internal/azcli/login.go new file mode 100644 index 0000000..7be5404 --- /dev/null +++ b/internal/azcli/login.go @@ -0,0 +1,163 @@ +// This includes automatic login detection and supports multiple authentication methods +// including service principals, managed identities, and federated tokens. +package azcli + +import ( + "fmt" + "os" + "os/exec" + "strings" + + "github.com/Azure/aks-mcp/internal/command" + "github.com/Azure/aks-mcp/internal/config" +) + +// Login types +const ( + AuthTypeExisting = "existing_login" + AuthTypeServicePrincipal = "service_principal" + AuthTypeFederatedToken = "federated_token" + AuthTypeUserAssignedManagedID = "user_assigned_managed_identity" + AuthTypeSystemAssignedManagedID = "system_assigned_managed_identity" +) + +// Proc is a minimal interface used by this package so tests can inject a fake process. +type Proc interface { + // Run executes the given command (arguments may be included in the string) and + // returns the command output and an error. Implementations MUST return the + // combined stdout+stderr output in the string return value. The error should + // be non-nil when the underlying process fails to start or returns a + // non-zero exit status. This contract is relied upon by callers that inspect + // output text (for example searching for an "ERROR:" prefix) as well as + // the returned error value. + Run(cmd string) (string, error) +} + +// EnsureAzCliLogin ensures az CLI is available and attempts to auto-login using environment variables +func EnsureAzCliLogin(cfg *config.ConfigData) (string, error) { + if _, err := exec.LookPath("az"); err != nil { + return "", fmt.Errorf("az cli is not installed or not in PATH: %w", err) + } + proc := NewShellProc(cfg.Timeout) + return EnsureAzCliLoginWithProc(proc, cfg) +} + +// NewShellProc is a package-level Proc factory used so tests can override process creation and avoid invoking the real `az` binary. +var NewShellProc = func(timeout int) Proc { + return command.NewShellProcess("az", timeout) +} + +// EnsureAzCliLoginWithProc is the testable implementation that uses an injected Proc. +func EnsureAzCliLoginWithProc(proc Proc, cfg *config.ConfigData) (string, error) { + // If there's a valid account, skip auto-login. + out, err := proc.Run("account show --query id -o tsv") + if err == nil && strings.TrimSpace(out) != "" { + return AuthTypeExisting, nil + } + + // Read environment variables to determine which auth methods to try first. + tenantID := os.Getenv("AZURE_TENANT_ID") + clientID := os.Getenv("AZURE_CLIENT_ID") + clientSecret := os.Getenv("AZURE_CLIENT_SECRET") + federatedTokenFile := os.Getenv("AZURE_FEDERATED_TOKEN_FILE") + subscriptionID := os.Getenv("AZURE_SUBSCRIPTION_ID") + + // 1) Service Principal with secret + if clientID != "" && clientSecret != "" && tenantID != "" { + if err := runLoginCommand(proc, fmt.Sprintf("login --service-principal -u %s -p %s --tenant %s", clientID, clientSecret, tenantID), "service principal"); err != nil { + return "", err + } + if err := setSubscription(proc, subscriptionID, "service principal"); err != nil { + return "", err + } + if err := showAccount(proc, "service principal"); err != nil { + return "", err + } + return AuthTypeServicePrincipal, nil + } + + // 2) Workload Identity (federated token) + if clientID != "" && tenantID != "" && federatedTokenFile != "" { + federatedTokenData, err := os.ReadFile(federatedTokenFile) + if err != nil { + return "", fmt.Errorf("failed to read federated token file %s: %w", federatedTokenFile, err) + } + federatedToken := strings.TrimSpace(string(federatedTokenData)) + if federatedToken == "" { + return "", fmt.Errorf("federated token file %s is empty", federatedTokenFile) + } + if err := runLoginCommand(proc, fmt.Sprintf("login --service-principal -u %s --tenant %s --federated-token %s", clientID, tenantID, federatedToken), "federated token"); err != nil { + return "", err + } + if err := setSubscription(proc, subscriptionID, "federated token"); err != nil { + return "", err + } + if err := showAccount(proc, "federated token"); err != nil { + return "", err + } + return AuthTypeFederatedToken, nil + } + + // 3) User-assigned Managed Identity (client ID provided) + if clientID != "" { + if err := runLoginCommand(proc, fmt.Sprintf("login --identity -u %s", clientID), "user-assigned managed identity"); err != nil { + return "", err + } + if err := setSubscription(proc, subscriptionID, "user-assigned managed identity"); err != nil { + return "", err + } + if err := showAccount(proc, "user-assigned managed identity"); err != nil { + return "", err + } + return AuthTypeUserAssignedManagedID, nil + } + + // 4) Fallback to System-assigned Managed Identity even when no AZURE_* hints are set. + if err := runLoginCommand(proc, "login --identity", "system-assigned managed identity"); err != nil { + return "", err + } + if err := setSubscription(proc, subscriptionID, "system-assigned managed identity"); err != nil { + return "", err + } + if err := showAccount(proc, "system-assigned managed identity"); err != nil { + return "", err + } + return AuthTypeSystemAssignedManagedID, nil +} + +// Runs a command and returns a formatted error if the output indicates an ERROR or the command failed. +func runLoginCommand(proc Proc, cmd string, loginMethod string) error { + // proc.Run is expected to return combined stdout+stderr. Some `az` + // subcommands write human-readable error text to stderr (which will be + // included in the returned string). We therefore inspect the returned + // output for an "ERROR:" prefix as a quick check for failure messages, + // and also respect the returned error (which should be non-nil for + // non-zero exit codes). + out, err := proc.Run(cmd) + if strings.HasPrefix(strings.TrimSpace(out), "ERROR:") { + return fmt.Errorf("%s login failed: %s", loginMethod, out) + } + if err != nil { + return fmt.Errorf("%s login failed: %w", loginMethod, err) + } + return nil +} + +// Sets the subscription when provided and wraps errors with context. +func setSubscription(proc Proc, subscriptionID, loginMethod string) error { + if subscriptionID == "" { + return nil + } + if _, err := proc.Run(fmt.Sprintf("account set --subscription %s", subscriptionID)); err != nil { + return fmt.Errorf("%s login failed: %w", loginMethod, err) + } + return nil +} + +// Checks that account info is returned after a login attempt. +func showAccount(proc Proc, loginMethod string) error { + if _, err := proc.Run("account show --query id -o tsv"); err != nil { + return fmt.Errorf("%s login failed: %w", loginMethod, err) + } + return nil +} diff --git a/internal/azcli/login_test.go b/internal/azcli/login_test.go new file mode 100644 index 0000000..d8fee93 --- /dev/null +++ b/internal/azcli/login_test.go @@ -0,0 +1,320 @@ +package azcli + +import ( + "errors" + "fmt" + "os" + "strings" + "testing" + + "github.com/Azure/aks-mcp/internal/config" +) + +// Command to be run and expected output/error +type loginCommandResponses struct { + cmd string + out string + err error +} + +// Interface to mock azcli commands in tests +type loginCommands struct { + idx int + resp []loginCommandResponses +} + +// Simulate running a command and return the expected output/error +func (c *loginCommands) Run(cmd string) (string, error) { + if c.idx >= len(c.resp) { + return "", fmt.Errorf("no more responses, unexpected command: %s", cmd) + } + expected := c.resp[c.idx] + c.idx++ + // match prefix so tests are less brittle + if expected.cmd != "" && !strings.HasPrefix(cmd, expected.cmd) { + return "", fmt.Errorf("expected cmd prefix %q but got %q", expected.cmd, cmd) + } + return expected.out, expected.err +} + +func TestEnsureAzCliLogin_Existing(t *testing.T) { + cfg := config.NewConfig() + // ensure auto-login is attempted + t.Setenv("AZURE_CLIENT_ID", "cid") + + // add command to simulate existing_login + p := &loginCommands{resp: []loginCommandResponses{ + {cmd: "account show --query id -o tsv", out: "sub-id", err: nil}, + }} + + // should return existing_login without error + got, err := EnsureAzCliLoginWithProc(p, cfg) + if err != nil { + t.Fatalf("expected no error, got %v", err) + } + if got != "existing_login" { + t.Fatalf("unexpected result: %s", got) + } +} + +func TestEnsureAzCliLogin_ServicePrincipal(t *testing.T) { + cfg := config.NewConfig() + // set envs for service principal (dummy values for testing only) + t.Setenv("AZURE_CLIENT_ID", "dummy-client-id") + t.Setenv("AZURE_CLIENT_SECRET", "dummy-client-secret") + t.Setenv("AZURE_TENANT_ID", "dummy-tenant-id") + // To exercise the login flow we must simulate probe failing (not logged in) + p := &loginCommands{resp: []loginCommandResponses{ + {cmd: "account show --query id -o tsv", out: "", err: errors.New("not logged in")}, + // login command succeeds (matches dummy env values) + {cmd: "login --service-principal -u dummy-client-id -p dummy-client-secret --tenant dummy-tenant-id", out: "", err: nil}, + // re-probe succeeds + {cmd: "account show --query id -o tsv", out: "sub-id", err: nil}, + }} + got, err := EnsureAzCliLoginWithProc(p, cfg) + if err != nil { + t.Fatalf("expected no error, got %v", err) + } + if got != "service_principal" { + t.Fatalf("unexpected result: %s", got) + } +} + +func TestEnsureAzCliLogin_ServicePrincipal_ErrorOutput(t *testing.T) { + cfg := config.NewConfig() + t.Setenv("AZURE_CLIENT_ID", "dummy-client-id") + t.Setenv("AZURE_CLIENT_SECRET", "dummy-client-secret") + t.Setenv("AZURE_TENANT_ID", "dummy-tenant-id") + // probe fails so login attempt is performed + p := &loginCommands{resp: []loginCommandResponses{ + {cmd: "account show --query id -o tsv", out: "", err: errors.New("not logged in")}, + // login returns an ERROR: prefixed message on stderr (simulated via out) + {cmd: "login --service-principal -u dummy-client-id -p dummy-client-secret --tenant dummy-tenant-id", out: "ERROR: something went wrong", err: nil}, + }} + _, err := EnsureAzCliLoginWithProc(p, cfg) + if err == nil || !strings.Contains(err.Error(), "service principal login failed: ERROR: something went wrong") { + t.Fatalf("expected service principal error containing ERROR:, got %v", err) + } +} + +func TestEnsureAzCliLogin_ServicePrincipal_CommandError(t *testing.T) { + cfg := config.NewConfig() + t.Setenv("AZURE_CLIENT_ID", "dummy-client-id") + t.Setenv("AZURE_CLIENT_SECRET", "dummy-client-secret") + t.Setenv("AZURE_TENANT_ID", "dummy-tenant-id") + p := &loginCommands{resp: []loginCommandResponses{ + {cmd: "account show --query id -o tsv", out: "", err: errors.New("not logged in")}, + // login command fails to execute + {cmd: "login --service-principal -u dummy-client-id -p dummy-client-secret --tenant dummy-tenant-id", out: "", err: errors.New("exec failed")}, + }} + _, err := EnsureAzCliLoginWithProc(p, cfg) + if err == nil || !strings.Contains(err.Error(), "service principal login failed") { + t.Fatalf("expected wrapped service principal error, got %v", err) + } +} + +func TestEnsureAzCliLogin_NoAutoLogin(t *testing.T) { + cfg := config.NewConfig() + // ensure no AZURE_* env vars (explicitly clear for this test) + t.Setenv("AZURE_CLIENT_ID", "") + t.Setenv("AZURE_CLIENT_SECRET", "") + t.Setenv("AZURE_TENANT_ID", "") + t.Setenv("AZURE_FEDERATED_TOKEN_FILE", "") + t.Setenv("AZURE_SUBSCRIPTION_ID", "") + + // proc should not be called; queueProc with no responses will fail if Run is invoked + p := &loginCommands{resp: []loginCommandResponses{}} + + got, err := EnsureAzCliLoginWithProc(p, cfg) + if err == nil { + t.Fatalf("expected error when no automatic credentials are set, got success: %s", got) + } +} + +func TestEnsureAzCliLogin_SubscriptionSetFailure(t *testing.T) { + cfg := config.NewConfig() + t.Setenv("AZURE_CLIENT_ID", "dummy-client-id") + t.Setenv("AZURE_CLIENT_SECRET", "dummy-client-secret") + t.Setenv("AZURE_TENANT_ID", "dummy-tenant-id") + t.Setenv("AZURE_SUBSCRIPTION_ID", "dummy-subscription-id") + + p := &loginCommands{resp: []loginCommandResponses{ + {cmd: "account show --query id -o tsv", out: "", err: errors.New("not logged in")}, + {cmd: "login --service-principal -u dummy-client-id -p dummy-client-secret --tenant dummy-tenant-id", out: "", err: nil}, + // account set fails + {cmd: "account set --subscription dummy-subscription-id", out: "", err: errors.New("set failed")}, + }} + + _, err := EnsureAzCliLoginWithProc(p, cfg) + if err == nil || !strings.Contains(err.Error(), "service principal login failed") { + t.Fatalf("expected subscription set failure wrapped in service principal context, got %v", err) + } +} + +func TestEnsureAzCliLogin_ReprobeFailure(t *testing.T) { + cfg := config.NewConfig() + t.Setenv("AZURE_CLIENT_ID", "dummy-client-id") + t.Setenv("AZURE_CLIENT_SECRET", "dummy-client-secret") + t.Setenv("AZURE_TENANT_ID", "dummy-tenant-id") + t.Setenv("AZURE_SUBSCRIPTION_ID", "dummy-subscription-id") + + p := &loginCommands{resp: []loginCommandResponses{ + {cmd: "account show --query id -o tsv", out: "", err: errors.New("not logged in")}, + {cmd: "login --service-principal -u dummy-client-id -p dummy-client-secret --tenant dummy-tenant-id", out: "", err: nil}, + {cmd: "account set --subscription dummy-subscription-id", out: "", err: nil}, + // re-probe fails + {cmd: "account show --query id -o tsv", out: "", err: errors.New("probe failed")}, + }} + + _, err := EnsureAzCliLoginWithProc(p, cfg) + if err == nil || !strings.Contains(err.Error(), "service principal login failed") { + t.Fatalf("expected reprobe failure wrapped in service principal context, got %v", err) + } +} + +func TestEnsureAzCliLogin_Federated(t *testing.T) { + cfg := config.NewConfig() + t.Setenv("AZURE_CLIENT_ID", "dummy-client-id") + t.Setenv("AZURE_TENANT_ID", "dummy-tenant-id") + t.Setenv("AZURE_FEDERATED_TOKEN_FILE", "/tmp/token") + + p := &loginCommands{resp: []loginCommandResponses{ + {cmd: "account show --query id -o tsv", out: "", err: errors.New("not logged in")}, + {cmd: "login --service-principal -u dummy-client-id --tenant dummy-tenant-id --federated-token /tmp/token", out: "", err: nil}, + {cmd: "account show --query id -o tsv", out: "sub-id", err: nil}, + }} + + got, err := EnsureAzCliLoginWithProc(p, cfg) + if err == nil && got == "federated_token" { + t.Fatalf("unexpected result: %s", got) + } +} + +func TestEnsureAzCliLogin_Federated_Success(t *testing.T) { + cfg := config.NewConfig() + t.Setenv("AZURE_CLIENT_ID", "dummy-client-id") + t.Setenv("AZURE_TENANT_ID", "dummy-tenant-id") + + // Create a dummy token file for testing + tokenFile, err := os.CreateTemp("", "federated-token-test-*") + if err != nil { + t.Fatalf("failed to create token file: %v", err) + } + defer func() { + _ = tokenFile.Close() + _ = os.Remove(tokenFile.Name()) + }() + _, err = tokenFile.WriteString("dummy-federated-token") + if err != nil { + t.Fatalf("failed to write to token file: %v", err) + } + t.Setenv("AZURE_FEDERATED_TOKEN_FILE", tokenFile.Name()) + + p := &loginCommands{resp: []loginCommandResponses{ + {cmd: "account show --query id -o tsv", out: "", err: errors.New("not logged in")}, + {cmd: "login --service-principal -u dummy-client-id --tenant dummy-tenant-id --federated-token dummy-federated-token", out: "", err: nil}, + {cmd: "account show --query id -o tsv", out: "sub-id", err: nil}, + }} + + got, err := EnsureAzCliLoginWithProc(p, cfg) + if err != nil { + t.Fatalf("expected no error, got %v", err) + } + if got != "federated_token" { + t.Fatalf("unexpected result: %s", got) + } +} + +func TestEnsureAzCliLogin_ManagedIdentity_UserAssigned(t *testing.T) { + cfg := config.NewConfig() + t.Setenv("AZURE_CLIENT_ID", "dummy-managed-identity-client-id") + t.Setenv("AZURE_SUBSCRIPTION_ID", "dummy-subscription-id") + + p := &loginCommands{resp: []loginCommandResponses{ + {cmd: "account show --query id -o tsv", out: "", err: errors.New("not logged in")}, + {cmd: "login --identity -u dummy-managed-identity-client-id", out: "", err: nil}, + {cmd: "account set --subscription dummy-subscription-id", out: "", err: nil}, + {cmd: "account show --query id -o tsv", out: "sub-id", err: nil}, + }} + + got, err := EnsureAzCliLoginWithProc(p, cfg) + if err != nil { + t.Fatalf("expected no error, got %v", err) + } + if got != "user_assigned_managed_identity" { + t.Fatalf("unexpected result: %s", got) + } +} + +func TestEnsureAzCliLogin_ManagedIdentity_SystemAssigned(t *testing.T) { + cfg := config.NewConfig() + // Clear all Azure env vars to test fallback to system-assigned MI + t.Setenv("AZURE_CLIENT_ID", "") + t.Setenv("AZURE_CLIENT_SECRET", "") + t.Setenv("AZURE_TENANT_ID", "") + t.Setenv("AZURE_FEDERATED_TOKEN_FILE", "") + t.Setenv("AZURE_SUBSCRIPTION_ID", "") + + p := &loginCommands{resp: []loginCommandResponses{ + {cmd: "account show --query id -o tsv", out: "", err: errors.New("not logged in")}, + {cmd: "login --identity", out: "", err: nil}, + {cmd: "account show --query id -o tsv", out: "sub-id", err: nil}, + }} + + got, err := EnsureAzCliLoginWithProc(p, cfg) + if err != nil { + t.Fatalf("expected no error, got %v", err) + } + if got != "system_assigned_managed_identity" { + t.Fatalf("unexpected result: %s", got) + } +} + +func TestEnsureAzCliLogin_ManagedIdentity_SystemAssigned_Success(t *testing.T) { + cfg := config.NewConfig() + // Fallback to system-assigned MI with subscription set + t.Setenv("AZURE_CLIENT_ID", "") + t.Setenv("AZURE_CLIENT_SECRET", "") + t.Setenv("AZURE_TENANT_ID", "") + t.Setenv("AZURE_FEDERATED_TOKEN_FILE", "") + t.Setenv("AZURE_SUBSCRIPTION_ID", "dummy-subscription-id") + + p := &loginCommands{resp: []loginCommandResponses{ + {cmd: "account show --query id -o tsv", out: "", err: errors.New("not logged in")}, + {cmd: "login --identity", out: "", err: nil}, + {cmd: "account set --subscription dummy-subscription-id", out: "", err: nil}, + {cmd: "account show --query id -o tsv", out: "sub-id", err: nil}, + }} + + got, err := EnsureAzCliLoginWithProc(p, cfg) + if err != nil { + t.Fatalf("expected no error, got %v", err) + } + if got != "system_assigned_managed_identity" { + t.Fatalf("unexpected result: %s", got) + } +} + +func TestEnsureAzCliLogin_LoginPromptInOutput(t *testing.T) { + cfg := config.NewConfig() + // ensure auto-login is attempted + t.Setenv("AZURE_CLIENT_ID", "cid") + + // Simulate the case where Azure CLI returns "Please run 'az login'" message with an error + // After the command.go fix, stderr content is returned WITH the error, not instead of it + // This test ensures we don't incorrectly think there's a valid login when there isn't + p := &loginCommands{resp: []loginCommandResponses{ + {cmd: "account show --query id -o tsv", out: "ERROR: Please run 'az login' to setup account.", err: errors.New("command failed")}, + {cmd: "login --identity -u cid", out: "", err: nil}, + {cmd: "account show --query id -o tsv", out: "sub-id", err: nil}, + }} + + // should NOT return existing_login, should proceed with authentication + got, err := EnsureAzCliLoginWithProc(p, cfg) + if err != nil { + t.Fatalf("expected no error, got %v", err) + } + if got != "user_assigned_managed_identity" { + t.Fatalf("unexpected result: %s", got) + } +} diff --git a/internal/command/command.go b/internal/command/command.go index 7f43255..c88ad94 100644 --- a/internal/command/command.go +++ b/internal/command/command.go @@ -82,7 +82,7 @@ func (s *ShellProcess) Exec(commands string) (string, error) { // Handle errors if err != nil { if s.ReturnErrOutput && stderr.Len() > 0 { - return stderr.String(), nil + return stderr.String(), err } return "", err } diff --git a/internal/server/server.go b/internal/server/server.go index 67844c9..dfefe31 100644 --- a/internal/server/server.go +++ b/internal/server/server.go @@ -28,16 +28,29 @@ import ( // Service represents the AKS MCP service type Service struct { - cfg *config.ConfigData - mcpServer *server.MCPServer - azClient *azureclient.AzureClient + cfg *config.ConfigData + mcpServer *server.MCPServer + azClient *azureclient.AzureClient + azcliProcFactory func(timeout int) azcli.Proc } -// NewService creates a new AKS MCP service -func NewService(cfg *config.ConfigData) *Service { - return &Service{ - cfg: cfg, +// ServiceOption defines a function that configures the AKS MCP service +type ServiceOption func(*Service) + +// WithAzCliProcFactory allows callers to inject a Proc factory for azcli execution. +// The factory returns an azcli.Proc which can be faked in tests. +func WithAzCliProcFactory(f func(timeout int) azcli.Proc) ServiceOption { + return func(s *Service) { s.azcliProcFactory = f } +} + +// NewService creates a new AKS MCP service with the provided configuration and options. +// Options can be used to inject dependencies like azcli execution factories. +func NewService(cfg *config.ConfigData, opts ...ServiceOption) *Service { + s := &Service{cfg: cfg} + for _, opt := range opts { + opt(s) } + return s } // Initialize initializes the service @@ -61,11 +74,28 @@ func (s *Service) initializeInfrastructure() error { // Create shared Azure client azClient, err := azureclient.NewAzureClient(s.cfg) if err != nil { - return fmt.Errorf("failed to create Azure client: %v", err) + return fmt.Errorf("failed to create Azure client: %w", err) } s.azClient = azClient log.Println("Azure client initialized successfully") + // Ensure Azure CLI exists and is logged in + if s.azcliProcFactory != nil { + // Use injected factory to create an azcli.Proc + proc := s.azcliProcFactory(s.cfg.Timeout) + if loginType, err := azcli.EnsureAzCliLoginWithProc(proc, s.cfg); err != nil { + return fmt.Errorf("azure cli authentication failed: %w", err) + } else { + log.Printf("Azure CLI initialized successfully (%s)", loginType) + } + } else { + if loginType, err := azcli.EnsureAzCliLogin(s.cfg); err != nil { + return fmt.Errorf("azure cli authentication failed: %w", err) + } else { + log.Printf("Azure CLI initialized successfully (%s)", loginType) + } + } + // Create MCP server s.mcpServer = server.NewMCPServer( "AKS MCP", @@ -110,7 +140,6 @@ func (s *Service) Run() error { // Start the server switch s.cfg.Transport { case "stdio": - log.Println("AKS MCP version:", version.GetVersion()) log.Println("Listening for requests on STDIO...") return server.ServeStdio(s.mcpServer) case "sse": diff --git a/internal/server/server_test.go b/internal/server/server_test.go index 4a6313c..c525a49 100644 --- a/internal/server/server_test.go +++ b/internal/server/server_test.go @@ -4,6 +4,7 @@ import ( "os" "testing" + "github.com/Azure/aks-mcp/internal/azcli" "github.com/Azure/aks-mcp/internal/components/azaks" "github.com/Azure/aks-mcp/internal/config" "github.com/Azure/mcp-kubernetes/pkg/kubectl" @@ -62,11 +63,12 @@ func (m *MockToolCounter) GetToolNames() []string { // TestService tests the service initialization and expected tool counts func TestService(t *testing.T) { - // Set environment variables for testing to avoid Azure auth issues - _ = os.Setenv("AZURE_TENANT_ID", "test-tenant") - _ = os.Setenv("AZURE_CLIENT_ID", "test-client") - _ = os.Setenv("AZURE_CLIENT_SECRET", "test-secret") - _ = os.Setenv("AZURE_SUBSCRIPTION_ID", "test-subscription") + // Set environment variables for testing to avoid Azure auth issues. + // These are dummy values for tests only — do not commit real credentials here. + _ = os.Setenv("AZURE_TENANT_ID", "dummy-tenant-id") + _ = os.Setenv("AZURE_CLIENT_ID", "dummy-client-id") + _ = os.Setenv("AZURE_CLIENT_SECRET", "dummy-client-secret") + _ = os.Setenv("AZURE_SUBSCRIPTION_ID", "dummy-subscription-id") defer func() { _ = os.Unsetenv("AZURE_TENANT_ID") _ = os.Unsetenv("AZURE_CLIENT_ID") @@ -121,6 +123,12 @@ func TestService(t *testing.T) { for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { + // Create test configuration + cfg := createTestConfig(tt.accessLevel, tt.additionalTools) + + // Create service with injected fake Proc factory so Initialize doesn't call the real az binary + service := NewService(cfg, WithAzCliProcFactory(func(timeout int) azcli.Proc { return &fakeProc{} })) + // Calculate expected kubectl tools count kubectlTools := kubectl.RegisterKubectlTools(tt.accessLevel) expectedKubectlCount := len(kubectlTools) @@ -142,11 +150,10 @@ func TestService(t *testing.T) { t.Logf("Expected total K8s tools: %d", expectedTotalK8sTools) t.Logf("Expected Azure tools: %d", tt.expectedAzureTools) - // Create test configuration - cfg := createTestConfig(tt.accessLevel, tt.additionalTools) - - // Create service - service := NewService(cfg) + // If service wasn't created above (some test paths), create it with the fake factory + if service == nil { + service = NewService(cfg, WithAzCliProcFactory(func(timeout int) azcli.Proc { return &fakeProc{} })) + } // Verify service was created properly if service == nil { //nolint:staticcheck // False positive: t.Fatal prevents nil dereference @@ -171,6 +178,18 @@ func TestService(t *testing.T) { } } +// fakeProc is a minimal Proc implementation for tests. +type fakeProc struct{} + +func (f *fakeProc) Run(cmd string) (string, error) { + // For probing account show, return a non-error id + if cmd == "account show --query id -o tsv" { + return "00000000-0000-0000-0000-000000000000", nil + } + // For any other command, return empty output and nil error to simulate success + return "", nil +} + // TestComponentToolCounts tests individual component tool registration counts func TestComponentToolCounts(t *testing.T) { t.Run("AzureComponents", func(t *testing.T) { @@ -250,11 +269,11 @@ func TestAKSOperationsAccess(t *testing.T) { // TestServiceInitialization tests basic service initialization func TestServiceInitialization(t *testing.T) { - // Set test environment variables - _ = os.Setenv("AZURE_TENANT_ID", "test-tenant") - _ = os.Setenv("AZURE_CLIENT_ID", "test-client") - _ = os.Setenv("AZURE_CLIENT_SECRET", "test-secret") - _ = os.Setenv("AZURE_SUBSCRIPTION_ID", "test-subscription") + // Set test environment variables (dummy values) + _ = os.Setenv("AZURE_TENANT_ID", "dummy-tenant-id") + _ = os.Setenv("AZURE_CLIENT_ID", "dummy-client-id") + _ = os.Setenv("AZURE_CLIENT_SECRET", "dummy-client-secret") + _ = os.Setenv("AZURE_SUBSCRIPTION_ID", "dummy-subscription-id") defer func() { _ = os.Unsetenv("AZURE_TENANT_ID") _ = os.Unsetenv("AZURE_CLIENT_ID") @@ -263,7 +282,7 @@ func TestServiceInitialization(t *testing.T) { }() cfg := createTestConfig("readonly", map[string]bool{}) - service := NewService(cfg) + service := NewService(cfg, WithAzCliProcFactory(func(timeout int) azcli.Proc { return &fakeProc{} })) // Test service creation - must be non-nil if service == nil { //nolint:staticcheck // False positive: t.Fatal prevents nil dereference @@ -347,11 +366,11 @@ func containsPrefix(s string, prefix string) bool { // BenchmarkServiceInitialization benchmarks service initialization func BenchmarkServiceInitialization(b *testing.B) { - // Set test environment variables - _ = os.Setenv("AZURE_TENANT_ID", "test-tenant") - _ = os.Setenv("AZURE_CLIENT_ID", "test-client") - _ = os.Setenv("AZURE_CLIENT_SECRET", "test-secret") - _ = os.Setenv("AZURE_SUBSCRIPTION_ID", "test-subscription") + // Set test environment variables for benchmark (dummy values) + _ = os.Setenv("AZURE_TENANT_ID", "dummy-tenant-id") + _ = os.Setenv("AZURE_CLIENT_ID", "dummy-client-id") + _ = os.Setenv("AZURE_CLIENT_SECRET", "dummy-client-secret") + _ = os.Setenv("AZURE_SUBSCRIPTION_ID", "dummy-subscription-id") defer func() { _ = os.Unsetenv("AZURE_TENANT_ID") _ = os.Unsetenv("AZURE_CLIENT_ID") @@ -363,7 +382,7 @@ func BenchmarkServiceInitialization(b *testing.B) { b.ResetTimer() for i := 0; i < b.N; i++ { - service := NewService(cfg) + service := NewService(cfg, WithAzCliProcFactory(func(timeout int) azcli.Proc { return &fakeProc{} })) err := service.Initialize() if err != nil { b.Fatalf("Initialize failed: %v", err) From 1ec270dc83880c073436f4d1fbb4a631aad0466f Mon Sep 17 00:00:00 2001 From: Paul Yu Date: Sun, 17 Aug 2025 19:25:19 -0700 Subject: [PATCH 09/27] feat: add validation for federated token file path to enhance security --- internal/azcli/login.go | 41 ++++++++++++++++++-- internal/azcli/login_test.go | 73 +++++++++++++++++++++++++++--------- 2 files changed, 93 insertions(+), 21 deletions(-) diff --git a/internal/azcli/login.go b/internal/azcli/login.go index 7be5404..3703f7c 100644 --- a/internal/azcli/login.go +++ b/internal/azcli/login.go @@ -4,6 +4,7 @@ package azcli import ( "fmt" + "io" "os" "os/exec" "strings" @@ -21,6 +22,22 @@ const ( AuthTypeSystemAssignedManagedID = "system_assigned_managed_identity" ) +// validateFederatedTokenFile only allows the fixed AKS identity token path. +func validateFederatedTokenFile(filePath string) (string, error) { + const allowedTokenPath = "/var/run/secrets/azure/tokens/azure-identity-token" // #nosec G101 -- not a credential, this is a fixed AKS token path + if filePath != allowedTokenPath { + return "", fmt.Errorf("federated token file path must be exactly %s", allowedTokenPath) + } + fileInfo, err := os.Stat(allowedTokenPath) + if err != nil { + return "", fmt.Errorf("cannot stat federated token file %s: %w", allowedTokenPath, err) + } + if !fileInfo.Mode().IsRegular() { + return "", fmt.Errorf("federated token file is not a regular file: %s", allowedTokenPath) + } + return allowedTokenPath, nil +} + // Proc is a minimal interface used by this package so tests can inject a fake process. type Proc interface { // Run executes the given command (arguments may be included in the string) and @@ -78,13 +95,29 @@ func EnsureAzCliLoginWithProc(proc Proc, cfg *config.ConfigData) (string, error) // 2) Workload Identity (federated token) if clientID != "" && tenantID != "" && federatedTokenFile != "" { - federatedTokenData, err := os.ReadFile(federatedTokenFile) + // Validate the federated token file path for security and get canonical path + validatedPath, err := validateFederatedTokenFile(federatedTokenFile) + if err != nil { + return "", fmt.Errorf("federated token file validation failed: %w", err) + } + + // Open the only allowed federated token file (fixed path, safe) + f, err := os.Open(validatedPath) // #nosec G304 -- validated fixed path, not user-controlled + if err != nil { + return "", fmt.Errorf("failed to open federated token file %s: %w", validatedPath, err) + } + defer f.Close() + + // Limit token size to 16KB which is far larger than typical JWT or k8s tokens + // but protects against very large files. + const maxTokenSize = 16 * 1024 + data, err := io.ReadAll(io.LimitReader(f, maxTokenSize)) if err != nil { - return "", fmt.Errorf("failed to read federated token file %s: %w", federatedTokenFile, err) + return "", fmt.Errorf("failed to read federated token file %s: %w", validatedPath, err) } - federatedToken := strings.TrimSpace(string(federatedTokenData)) + federatedToken := strings.TrimSpace(string(data)) if federatedToken == "" { - return "", fmt.Errorf("federated token file %s is empty", federatedTokenFile) + return "", fmt.Errorf("federated token file %s is empty", validatedPath) } if err := runLoginCommand(proc, fmt.Sprintf("login --service-principal -u %s --tenant %s --federated-token %s", clientID, tenantID, federatedToken), "federated token"); err != nil { return "", err diff --git a/internal/azcli/login_test.go b/internal/azcli/login_test.go index d8fee93..e8139d5 100644 --- a/internal/azcli/login_test.go +++ b/internal/azcli/login_test.go @@ -176,43 +176,82 @@ func TestEnsureAzCliLogin_Federated(t *testing.T) { cfg := config.NewConfig() t.Setenv("AZURE_CLIENT_ID", "dummy-client-id") t.Setenv("AZURE_TENANT_ID", "dummy-tenant-id") - t.Setenv("AZURE_FEDERATED_TOKEN_FILE", "/tmp/token") + + // Only allow the fixed AKS federated token path + const allowedTokenPath = "/var/run/secrets/azure/tokens/azure-identity-token" + t.Setenv("AZURE_FEDERATED_TOKEN_FILE", allowedTokenPath) + + // If the file does not exist (not running in AKS), skip the test + if _, err := os.Stat(allowedTokenPath); err != nil { + t.Skipf("skipping: %s not present (only available in AKS)", allowedTokenPath) + } p := &loginCommands{resp: []loginCommandResponses{ {cmd: "account show --query id -o tsv", out: "", err: errors.New("not logged in")}, - {cmd: "login --service-principal -u dummy-client-id --tenant dummy-tenant-id --federated-token /tmp/token", out: "", err: nil}, - {cmd: "account show --query id -o tsv", out: "sub-id", err: nil}, }} got, err := EnsureAzCliLoginWithProc(p, cfg) + if err != nil && !strings.Contains(err.Error(), allowedTokenPath) { + t.Fatalf("unexpected error: %v", err) + } if err == nil && got == "federated_token" { t.Fatalf("unexpected result: %s", got) } } -func TestEnsureAzCliLogin_Federated_Success(t *testing.T) { +func TestEnsureAzCliLogin_Federated_InvalidFile(t *testing.T) { cfg := config.NewConfig() t.Setenv("AZURE_CLIENT_ID", "dummy-client-id") t.Setenv("AZURE_TENANT_ID", "dummy-tenant-id") + t.Setenv("AZURE_FEDERATED_TOKEN_FILE", "/tmp/non-existent-file") - // Create a dummy token file for testing - tokenFile, err := os.CreateTemp("", "federated-token-test-*") - if err != nil { - t.Fatalf("failed to create token file: %v", err) + p := &loginCommands{resp: []loginCommandResponses{ + {cmd: "account show --query id -o tsv", out: "", err: errors.New("not logged in")}, + }} + + _, err := EnsureAzCliLoginWithProc(p, cfg) + if err == nil || !strings.Contains(err.Error(), "federated token file validation failed") { + t.Fatalf("expected federated token file validation error, got %v", err) } - defer func() { - _ = tokenFile.Close() - _ = os.Remove(tokenFile.Name()) - }() - _, err = tokenFile.WriteString("dummy-federated-token") - if err != nil { - t.Fatalf("failed to write to token file: %v", err) +} + +func TestEnsureAzCliLogin_Federated_DirectoryTraversal(t *testing.T) { + cfg := config.NewConfig() + t.Setenv("AZURE_CLIENT_ID", "dummy-client-id") + t.Setenv("AZURE_TENANT_ID", "dummy-tenant-id") + t.Setenv("AZURE_FEDERATED_TOKEN_FILE", "../../../etc/passwd") + + p := &loginCommands{resp: []loginCommandResponses{ + {cmd: "account show --query id -o tsv", out: "", err: errors.New("not logged in")}, + }} + + _, err := EnsureAzCliLoginWithProc(p, cfg) + if err == nil || !strings.Contains(err.Error(), "federated token file validation failed") { + t.Fatalf("expected federated token file validation error for directory traversal, got %v", err) } - t.Setenv("AZURE_FEDERATED_TOKEN_FILE", tokenFile.Name()) +} + +func TestEnsureAzCliLogin_Federated_Success(t *testing.T) { + cfg := config.NewConfig() + t.Setenv("AZURE_CLIENT_ID", "dummy-client-id") + t.Setenv("AZURE_TENANT_ID", "dummy-tenant-id") + + // Only allow the fixed AKS federated token path + const allowedTokenPath = "/var/run/secrets/azure/tokens/azure-identity-token" + t.Setenv("AZURE_FEDERATED_TOKEN_FILE", allowedTokenPath) + + // If the file does not exist (not running in AKS), skip the test + tokenData := "dummy-federated-token" + if _, err := os.Stat(allowedTokenPath); err != nil { + t.Skipf("skipping: %s not present (only available in AKS)", allowedTokenPath) + } + + // Optionally, try to write a dummy token if running in a testable AKS env (may require root) + // _ = os.WriteFile(allowedTokenPath, []byte(tokenData), 0600) p := &loginCommands{resp: []loginCommandResponses{ {cmd: "account show --query id -o tsv", out: "", err: errors.New("not logged in")}, - {cmd: "login --service-principal -u dummy-client-id --tenant dummy-tenant-id --federated-token dummy-federated-token", out: "", err: nil}, + {cmd: "login --service-principal -u dummy-client-id --tenant dummy-tenant-id --federated-token " + tokenData, out: "", err: nil}, {cmd: "account show --query id -o tsv", out: "sub-id", err: nil}, }} From 1e753028af8ded005bcd8b33dce18767249ad793 Mon Sep 17 00:00:00 2001 From: Paul Yu Date: Sun, 17 Aug 2025 20:00:17 -0700 Subject: [PATCH 10/27] fix: handle error when closing federated token file --- internal/azcli/login.go | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/internal/azcli/login.go b/internal/azcli/login.go index 3703f7c..3f14a27 100644 --- a/internal/azcli/login.go +++ b/internal/azcli/login.go @@ -106,7 +106,11 @@ func EnsureAzCliLoginWithProc(proc Proc, cfg *config.ConfigData) (string, error) if err != nil { return "", fmt.Errorf("failed to open federated token file %s: %w", validatedPath, err) } - defer f.Close() + defer func() { + if err := f.Close(); err != nil { + fmt.Fprintf(os.Stderr, "error closing file: %v\n", err) + } + }() // Limit token size to 16KB which is far larger than typical JWT or k8s tokens // but protects against very large files. From 84cc1174a0c0aca3d4640cdeee0ea22eaf872cda Mon Sep 17 00:00:00 2001 From: Pengfei Ni Date: Mon, 18 Aug 2025 16:33:54 +0800 Subject: [PATCH 11/27] fix: remove action_params as required for inspektor_gadget_observability tool --- internal/components/inspektorgadget/registry.go | 1 - 1 file changed, 1 deletion(-) diff --git a/internal/components/inspektorgadget/registry.go b/internal/components/inspektorgadget/registry.go index d301ad1..ea02b4d 100644 --- a/internal/components/inspektorgadget/registry.go +++ b/internal/components/inspektorgadget/registry.go @@ -28,7 +28,6 @@ func RegisterInspektorGadgetTool() mcp.Tool { ), mcp.WithObject("action_params", mcp.Description("Parameters for the action"), - mcp.Required(), mcp.Properties(map[string]any{ "gadget_name": map[string]any{ "type": "string", From c2400b3c18bf69e5bb51128dbb5d58c6b77a04b5 Mon Sep 17 00:00:00 2001 From: Pengfei Ni Date: Tue, 19 Aug 2025 06:37:03 +0000 Subject: [PATCH 12/27] chore: cleanup unused files --- CREATE_PR_INSTRUCTIONS.md | 80 ----------------- PR_DESCRIPTION.md | 57 ------------ PULL_REQUEST.md | 179 -------------------------------------- 3 files changed, 316 deletions(-) delete mode 100644 CREATE_PR_INSTRUCTIONS.md delete mode 100644 PR_DESCRIPTION.md delete mode 100644 PULL_REQUEST.md diff --git a/CREATE_PR_INSTRUCTIONS.md b/CREATE_PR_INSTRUCTIONS.md deleted file mode 100644 index 55993fb..0000000 --- a/CREATE_PR_INSTRUCTIONS.md +++ /dev/null @@ -1,80 +0,0 @@ -# GitHub Pull Request Instructions - -## 📋 **Pull Request Details** - -**Title:** `feat: Add Azure Advisor recommendation tool for AKS clusters with logging and testing` - -**Base Branch:** `main` -**Compare Branch:** `feature/azure-diagnostics-prompts` - -## 🚀 **How to Create the Pull Request** - -### **Option 1: GitHub Web Interface** -1. Go to: https://github.com/Azure/aks-mcp -2. Click "Pull requests" tab -3. Click "New pull request" -4. Set base: `main` <- compare: `feature/azure-diagnostics-prompts` -5. Copy the title above -6. Copy the description from `PR_DESCRIPTION.md` (created above) -7. Add labels: `enhancement`, `feature`, `aks`, `advisor` -8. Click "Create pull request" - -### **Option 2: GitHub CLI (if available)** -```bash -# Install GitHub CLI first if not available -gh pr create \ - --title "feat: Add Azure Advisor recommendation tool for AKS clusters with logging and testing" \ - --body-file PR_DESCRIPTION.md \ - --base main \ - --head feature/azure-diagnostics-prompts \ - --label enhancement,feature,aks,advisor -``` - -### **Option 3: Direct GitHub URL** -Navigate to: -``` -https://github.com/Azure/aks-mcp/compare/main...feature/azure-diagnostics-prompts -``` - -## 📄 **Files to Reference** -- **Detailed Description:** `PULL_REQUEST.md` (comprehensive documentation) -- **GitHub PR Description:** `PR_DESCRIPTION.md` (concise version for GitHub) - -## 🏷️ **Suggested Labels** -- `enhancement` -- `feature` -- `aks` -- `advisor` -- `logging` -- `testing` - -## 👥 **Suggested Reviewers** -- Team leads familiar with AKS -- Azure CLI integration experts -- MCP server maintainers -- Security/access control reviewers - -## ✅ **Pre-submission Checklist** -- [x] All tests pass (10/10 advisor tests) -- [x] Project builds successfully -- [x] Code follows project conventions -- [x] Comprehensive logging implemented -- [x] Security validation integrated -- [x] Documentation complete -- [x] VS Code MCP extension compatibility verified -- [x] ResourceID field properly implemented -- [x] Error handling comprehensive - -## 📊 **Commit Summary** -``` -9370274 fix: Update Azure Advisor recommendation tool with logging and resourceID field -0e47372 fix: resolve MCP tool parameter validation error for Azure Advisor -06c90db docs: add Azure Advisor tool usage documentation -11c71fc feat: implement Azure Advisor recommendations for AKS clusters -439e603 chore: remove pull request template and exe files from tracking -``` - -**Total commits:** 5 -**Files changed:** 6 files (+247 insertions, -104 deletions) - -The pull request is ready to be created! 🚀 diff --git a/PR_DESCRIPTION.md b/PR_DESCRIPTION.md deleted file mode 100644 index b3f082c..0000000 --- a/PR_DESCRIPTION.md +++ /dev/null @@ -1,57 +0,0 @@ -## 🎯 Summary -Implements Azure Advisor recommendation tool for AKS clusters with comprehensive logging, testing, and VS Code MCP extension integration. - -## 🚀 Key Features -- ✅ **Azure Advisor Integration**: Fetch real-time AKS recommendations via Azure CLI -- ✅ **AKS-Specific Filtering**: Auto-filter recommendations for AKS resources only -- ✅ **Multiple Operations**: Support for `list`, `details`, and `report` operations -- ✅ **ResourceID Field**: Returns proper Azure resource IDs instead of generic values -- ✅ **Comprehensive Logging**: Detailed `[ADVISOR]` prefixed logs for debugging -- ✅ **Complete Test Coverage**: 10 test cases covering all functionality -- ✅ **Security Integration**: Works with readonly/readwrite/admin access levels - -## 📋 Changes Made - -### New Files -- `internal/azure/advisor/aks_recommendations.go` - Main advisor logic -- `internal/azure/advisor/types.go` - Data structures -- `internal/azure/advisor/advisor_test.go` - Test suite -- `docs/logging.md` - Logging documentation - -### Modified Files -- `internal/security/validator.go` - Added advisor tool validation -- `.gitignore` - Added `*.ps1` for development scripts - -## 🧪 Testing -```bash -=== RUN TestFilterAKSRecommendationsFromCLI ---- PASS: TestFilterAKSRecommendationsFromCLI (0.00s) -# ... 10/10 tests pass -PASS -ok github.com/Azure/aks-mcp/internal/azure/advisor -``` - -## 🔧 Usage Example -```json -{ - "operation": "list", - "subscription_id": "your-subscription-id", - "resource_group": "your-resource-group", - "severity": "High" -} -``` - -## ✅ Verification -- ✅ All tests pass -- ✅ Project builds successfully -- ✅ Works with VS Code MCP extension -- ✅ Proper logging and monitoring -- ✅ Security validation integrated - -## 🎯 Benefits -- Enables AI assistants to provide AKS optimization recommendations -- Proactive monitoring and cost optimization insights -- Security recommendations for AKS resources -- Actionable insights for cluster management - -Ready for review! 🚀 diff --git a/PULL_REQUEST.md b/PULL_REQUEST.md deleted file mode 100644 index 5d5df16..0000000 --- a/PULL_REQUEST.md +++ /dev/null @@ -1,179 +0,0 @@ -# Azure Advisor Recommendations Tool for AKS Clusters - -## 🎯 **Overview** -This PR implements a comprehensive Azure Advisor recommendation tool specifically designed for AKS (Azure Kubernetes Service) clusters. The tool integrates with the AKS-MCP server to provide AI assistants with the ability to retrieve, analyze, and report on Azure Advisor recommendations for AKS resources. - -## 🚀 **Features Implemented** - -### **Core Functionality** -- ✅ **Azure Advisor Integration**: Direct integration with Azure CLI to fetch real-time recommendations -- ✅ **AKS-Specific Filtering**: Automatically filters recommendations to only include AKS-related resources -- ✅ **Multiple Operations**: Support for `list`, `details`, and `report` operations -- ✅ **Comprehensive Logging**: Detailed logging with `[ADVISOR]` prefix for debugging and monitoring -- ✅ **Security Integration**: Works with readonly, readwrite, and admin access levels - -### **Data Structure & API** -- ✅ **ResourceID Field**: Returns `resourceID` (Azure resource ID) instead of generic impacted value -- ✅ **Structured Response**: Well-defined JSON response format with AKS-specific metadata -- ✅ **Filtering Support**: Filter by severity, resource group, category, and cluster names -- ✅ **Report Generation**: Comprehensive reports with summaries, action items, and cluster breakdowns - -### **Quality & Testing** -- ✅ **Complete Test Coverage**: 10 comprehensive test cases covering all functionality -- ✅ **Error Handling**: Robust error handling with informative error messages -- ✅ **Input Validation**: Proper parameter validation and type checking -- ✅ **Documentation**: Comprehensive usage documentation and examples - -## 📋 **Changes Made** - -### **New Files Added** -``` -internal/azure/advisor/ -├── aks_recommendations.go # Main advisor logic with comprehensive logging -├── types.go # Data structures and type definitions -└── advisor_test.go # Complete test suite (10 test cases) - -docs/ -└── logging.md # Logging setup and monitoring documentation -``` - -### **Files Modified** -``` -internal/security/validator.go # Added advisor tool to security validation -.gitignore # Added *.ps1 to ignore development scripts -``` - -### **Key Technical Improvements** -1. **Data Structure Fix**: Updated `CLIRecommendation` struct to match actual Azure CLI output (flattened fields) -2. **ResourceID Integration**: Changed from `ImpactedResource` to `ResourceID` field for better clarity -3. **Comprehensive Logging**: Added detailed logging throughout the tool for debugging and monitoring -4. **Test Coverage**: Fixed all test cases to work with the new data structure and added ResourceID validation - -## 🔧 **Usage Examples** - -### **List AKS Recommendations** -```json -{ - "operation": "list", - "subscription_id": "your-subscription-id", - "resource_group": "your-resource-group", - "severity": "High" -} -``` - -### **Get Recommendation Details** -```json -{ - "operation": "details", - "recommendation_id": "/subscriptions/.../recommendations/rec-id" -} -``` - -### **Generate Comprehensive Report** -```json -{ - "operation": "report", - "subscription_id": "your-subscription-id", - "format": "detailed" -} -``` - -## 📊 **Response Format** -```json -{ - "id": "/subscriptions/.../recommendations/rec-id", - "category": "Cost", - "impact": "High", - "cluster_name": "my-aks-cluster", - "resource_group": "my-resource-group", - "resource_id": "/subscriptions/.../managedClusters/my-aks-cluster", - "description": "Detailed recommendation with solution", - "severity": "High", - "last_updated": "2024-01-15T10:30:00Z", - "status": "Active", - "aks_specific": { - "configuration_area": "compute" - } -} -``` - -## 🧪 **Testing** - -### **Test Coverage** -- ✅ `TestFilterAKSRecommendationsFromCLI` - AKS-specific filtering -- ✅ `TestIsAKSRelatedCLI` - Resource ID validation -- ✅ `TestExtractAKSClusterNameFromCLI` - Cluster name extraction -- ✅ `TestExtractResourceGroupFromResourceID` - Resource group parsing -- ✅ `TestConvertToAKSRecommendationSummary` - Data transformation & ResourceID validation -- ✅ `TestFilterBySeverity` - Severity-based filtering -- ✅ `TestGenerateAKSAdvisorReport` - Report generation -- ✅ `TestMapCategoryToConfigArea` - Category mapping -- ✅ `TestHandleAdvisorRecommendationInvalidOperation` - Error handling -- ✅ `TestHandleAdvisorRecommendationMissingOperation` - Parameter validation - -### **Test Results** -```bash -=== RUN TestFilterAKSRecommendationsFromCLI ---- PASS: TestFilterAKSRecommendationsFromCLI (0.00s) -=== RUN TestIsAKSRelatedCLI ---- PASS: TestIsAKSRelatedCLI (0.00s) -# ... all 10 tests pass -PASS -ok github.com/Azure/aks-mcp/internal/azure/advisor -``` - -## 🔒 **Security & Access Control** - -- **Readonly Mode**: ✅ Advisor tool works in readonly mode (listing recommendations is read-only) -- **Input Validation**: ✅ All parameters are validated and sanitized -- **Access Control**: ✅ Integrated with existing security validation framework -- **Command Injection Protection**: ✅ Uses validated Azure CLI executor - -## 📝 **Logging & Monitoring** - -The tool includes comprehensive logging with the `[ADVISOR]` prefix: -``` -[ADVISOR] Handling operation: list -[ADVISOR] Listing recommendations for subscription: xxx, resource_group: yyy -[ADVISOR] Executing command: az advisor recommendation list --subscription xxx -[ADVISOR] Found 15 total recommendations -[ADVISOR] Found 3 AKS-related recommendations -[ADVISOR] After severity filter: 1 recommendations -[ADVISOR] Returning 1 recommendation summaries -``` - -## 🔄 **Integration Points** - -1. **MCP Server**: Registered as `az_advisor_recommendation` tool -2. **VS Code Extension**: Compatible with VS Code MCP extension -3. **Azure CLI**: Uses existing Azure CLI integration for authentication -4. **Security Layer**: Integrated with access level validation - -## ✅ **Verification Steps** - -1. **Build Success**: ✅ Project builds without errors -2. **Test Suite**: ✅ All 10 advisor tests pass -3. **Integration Test**: ✅ Tool works with VS Code MCP extension -4. **Security Validation**: ✅ Works in readonly mode as expected -5. **Logging Verification**: ✅ Logs are written to debug file and can be monitored - -## 🎯 **Benefits** - -1. **Enhanced AI Assistant Capabilities**: AI assistants can now provide AKS optimization recommendations -2. **Proactive Monitoring**: Enables proactive identification of AKS issues and optimizations -3. **Cost Optimization**: Helps identify cost-saving opportunities in AKS clusters -4. **Security Improvements**: Surfaces security-related recommendations for AKS resources -5. **Operational Excellence**: Provides actionable insights for AKS cluster management - -## 🔮 **Future Enhancements** - -- Integration with Azure Resource Graph for advanced querying -- Support for custom recommendation filtering rules -- Integration with Azure Policy for automated remediation -- Enhanced reporting with trend analysis - ---- - -**Ready for Review** ✅ - -This PR is ready for review and testing. All tests pass, documentation is complete, and the tool has been verified to work with the VS Code MCP extension in a real environment. From 13a537f2892141aace80c2ea40279cb5ad7225fb Mon Sep 17 00:00:00 2001 From: Pengfei Ni Date: Tue, 19 Aug 2025 14:42:37 +0800 Subject: [PATCH 13/27] feat: add helpful 404 responses for HTTP/SSE server endpoints Enhances user experience when accessing incorrect endpoints by providing: - Custom 404 responses with JSON format for both SSE and streamable-http transports - Clear guidance on correct MCP endpoint usage (/mcp, /sse, /message) - Comprehensive test coverage for new HTTP server behavior --- internal/server/server.go | 103 +++++++- internal/server/server_test.go | 416 +++++++++++++++++++++++++++++++++ 2 files changed, 515 insertions(+), 4 deletions(-) diff --git a/internal/server/server.go b/internal/server/server.go index 67844c9..51e7f50 100644 --- a/internal/server/server.go +++ b/internal/server/server.go @@ -1,8 +1,10 @@ package server import ( + "encoding/json" "fmt" "log" + "net/http" "github.com/Azure/aks-mcp/internal/azcli" "github.com/Azure/aks-mcp/internal/azureclient" @@ -103,6 +105,72 @@ func (s *Service) registerPrompts() { prompts.RegisterHealthPrompts(s.mcpServer, s.cfg) } +// createCustomHTTPServerWithHelp404 creates a custom HTTP server that provides +// helpful 404 responses for the MCP server +func (s *Service) createCustomHTTPServerWithHelp404(addr string) *http.Server { + mux := http.NewServeMux() + + // Handle all other paths with a helpful 404 response + mux.HandleFunc("/", func(w http.ResponseWriter, r *http.Request) { + if r.URL.Path != "/mcp" { + w.Header().Set("Content-Type", "application/json") + w.WriteHeader(http.StatusNotFound) + + response := map[string]interface{}{ + "error": "Not Found", + "message": "This is an MCP (Model Context Protocol) server. Please send POST requests to /mcp to initialize a session and obtain an Mcp-Session-Id for subsequent requests.", + "endpoints": map[string]string{ + "initialize": "POST /mcp - Initialize MCP session", + "requests": "POST /mcp - Send MCP requests (requires Mcp-Session-Id header)", + "listen": "GET /mcp - Listen for notifications (requires Mcp-Session-Id header)", + "terminate": "DELETE /mcp - Terminate session (requires Mcp-Session-Id header)", + }, + } + + json.NewEncoder(w).Encode(response) + } + }) + + return &http.Server{ + Addr: addr, + Handler: mux, + } +} + +// createCustomSSEServerWithHelp404 creates a custom HTTP server for SSE that provides +// helpful 404 responses for non-MCP endpoints +func (s *Service) createCustomSSEServerWithHelp404(sseServer *server.SSEServer, addr string) *http.Server { + mux := http.NewServeMux() + + // Register SSE and Message handlers + mux.Handle("/sse", sseServer.SSEHandler()) + mux.Handle("/message", sseServer.MessageHandler()) + + // Handle all other paths with a helpful 404 response + mux.HandleFunc("/", func(w http.ResponseWriter, r *http.Request) { + if r.URL.Path != "/sse" && r.URL.Path != "/message" { + w.Header().Set("Content-Type", "application/json") + w.WriteHeader(http.StatusNotFound) + + response := map[string]interface{}{ + "error": "Not Found", + "message": "This is an MCP (Model Context Protocol) server using SSE transport. Use the SSE endpoint to establish connections and the message endpoint to send requests.", + "endpoints": map[string]string{ + "sse": "GET /sse - Establish SSE connection for real-time notifications", + "message": "POST /message - Send MCP JSON-RPC messages", + }, + } + + json.NewEncoder(w).Encode(response) + } + }) + + return &http.Server{ + Addr: addr, + Handler: mux, + } +} + // Run starts the service with the specified transport func (s *Service) Run() error { log.Println("AKS MCP version:", version.GetVersion()) @@ -114,15 +182,42 @@ func (s *Service) Run() error { log.Println("Listening for requests on STDIO...") return server.ServeStdio(s.mcpServer) case "sse": - sse := server.NewSSEServer(s.mcpServer) addr := fmt.Sprintf("%s:%d", s.cfg.Host, s.cfg.Port) + + // Create SSE server first + sse := server.NewSSEServer(s.mcpServer) + + // Create custom HTTP server with helpful 404 responses + customServer := s.createCustomSSEServerWithHelp404(sse, addr) + log.Printf("SSE server listening on %s", addr) - return sse.Start(addr) + log.Printf("SSE endpoint available at: http://%s/sse", addr) + log.Printf("Message endpoint available at: http://%s/message", addr) + log.Printf("Connect to /sse for real-time events, send JSON-RPC to /message") + + return customServer.ListenAndServe() case "streamable-http": - streamableServer := server.NewStreamableHTTPServer(s.mcpServer) addr := fmt.Sprintf("%s:%d", s.cfg.Host, s.cfg.Port) + + // Create a custom HTTP server with helpful 404 responses + customServer := s.createCustomHTTPServerWithHelp404(addr) + + // Create the streamable HTTP server with the custom HTTP server + streamableServer := server.NewStreamableHTTPServer( + s.mcpServer, + server.WithStreamableHTTPServer(customServer), + ) + + // Update the mux to use the actual streamable server as the MCP handler + if mux, ok := customServer.Handler.(*http.ServeMux); ok { + mux.Handle("/mcp", streamableServer) + } + log.Printf("Streamable HTTP server listening on %s", addr) - return streamableServer.Start(addr) + log.Printf("MCP endpoint available at: http://%s/mcp", addr) + log.Printf("Send POST requests to /mcp to initialize session and obtain Mcp-Session-Id") + + return customServer.ListenAndServe() default: return fmt.Errorf("invalid transport type: %s (must be 'stdio', 'sse' or 'streamable-http')", s.cfg.Transport) } diff --git a/internal/server/server_test.go b/internal/server/server_test.go index 4a6313c..e3f6cf2 100644 --- a/internal/server/server_test.go +++ b/internal/server/server_test.go @@ -1,12 +1,17 @@ package server import ( + "encoding/json" + "net/http" + "net/http/httptest" "os" + "strings" "testing" "github.com/Azure/aks-mcp/internal/components/azaks" "github.com/Azure/aks-mcp/internal/config" "github.com/Azure/mcp-kubernetes/pkg/kubectl" + "github.com/mark3labs/mcp-go/server" ) // MockToolCounter tracks registered tools for testing @@ -370,3 +375,414 @@ func BenchmarkServiceInitialization(b *testing.B) { } } } + +// TestCreateCustomHTTPServerWithHelp404 tests the custom HTTP server creation for streamable-http transport +func TestCreateCustomHTTPServerWithHelp404(t *testing.T) { + // Set test environment variables + _ = os.Setenv("AZURE_TENANT_ID", "test-tenant") + _ = os.Setenv("AZURE_CLIENT_ID", "test-client") + _ = os.Setenv("AZURE_CLIENT_SECRET", "test-secret") + _ = os.Setenv("AZURE_SUBSCRIPTION_ID", "test-subscription") + defer func() { + _ = os.Unsetenv("AZURE_TENANT_ID") + _ = os.Unsetenv("AZURE_CLIENT_ID") + _ = os.Unsetenv("AZURE_CLIENT_SECRET") + _ = os.Unsetenv("AZURE_SUBSCRIPTION_ID") + }() + + cfg := createTestConfig("readonly", map[string]bool{}) + service := NewService(cfg) + err := service.Initialize() + if err != nil { + t.Fatalf("Failed to initialize service: %v", err) + } + + // Test server creation + addr := "localhost:8080" + customServer := service.createCustomHTTPServerWithHelp404(addr) + + if customServer == nil { + t.Fatal("Custom server should not be nil") + } + + if customServer.Addr != addr { + t.Errorf("Expected server address %s, got %s", addr, customServer.Addr) + } + + if customServer.Handler == nil { + t.Fatal("Custom server handler should not be nil") + } + + // Test the 404 response for non-MCP paths + testCases := []struct { + path string + method string + expectedStatusCode int + expectedContentType string + description string + }{ + {"/", "GET", http.StatusNotFound, "application/json", "root path should return helpful 404"}, + {"/invalid", "GET", http.StatusNotFound, "application/json", "invalid path should return helpful 404"}, + {"/api", "POST", http.StatusNotFound, "application/json", "non-MCP path should return helpful 404"}, + {"/health", "GET", http.StatusNotFound, "application/json", "health path should return helpful 404"}, + } + + for _, tc := range testCases { + t.Run(tc.description, func(t *testing.T) { + req, err := http.NewRequest(tc.method, tc.path, nil) + if err != nil { + t.Fatalf("Failed to create request: %v", err) + } + + rr := httptest.NewRecorder() + customServer.Handler.ServeHTTP(rr, req) + + if rr.Code != tc.expectedStatusCode { + t.Errorf("Expected status code %d, got %d", tc.expectedStatusCode, rr.Code) + } + + contentType := rr.Header().Get("Content-Type") + if contentType != tc.expectedContentType { + t.Errorf("Expected content type %s, got %s", tc.expectedContentType, contentType) + } + + // Parse and validate JSON response + var response map[string]interface{} + err = json.Unmarshal(rr.Body.Bytes(), &response) + if err != nil { + t.Fatalf("Failed to parse JSON response: %v", err) + } + + // Check required fields + if response["error"] != "Not Found" { + t.Errorf("Expected error 'Not Found', got %v", response["error"]) + } + + message, ok := response["message"].(string) + if !ok { + t.Fatal("Message field should be a string") + } + if !strings.Contains(message, "MCP") { + t.Error("Message should mention MCP") + } + if !strings.Contains(message, "/mcp") { + t.Error("Message should mention /mcp endpoint") + } + + endpoints, ok := response["endpoints"].(map[string]interface{}) + if !ok { + t.Fatal("Endpoints field should be a map") + } + + expectedEndpoints := []string{"initialize", "requests", "listen", "terminate"} + for _, endpoint := range expectedEndpoints { + if _, exists := endpoints[endpoint]; !exists { + t.Errorf("Expected endpoint %s not found in response", endpoint) + } + } + }) + } +} + +// TestCreateCustomSSEServerWithHelp404 tests the custom HTTP server creation for SSE transport +func TestCreateCustomSSEServerWithHelp404(t *testing.T) { + // Set test environment variables + _ = os.Setenv("AZURE_TENANT_ID", "test-tenant") + _ = os.Setenv("AZURE_CLIENT_ID", "test-client") + _ = os.Setenv("AZURE_CLIENT_SECRET", "test-secret") + _ = os.Setenv("AZURE_SUBSCRIPTION_ID", "test-subscription") + defer func() { + _ = os.Unsetenv("AZURE_TENANT_ID") + _ = os.Unsetenv("AZURE_CLIENT_ID") + _ = os.Unsetenv("AZURE_CLIENT_SECRET") + _ = os.Unsetenv("AZURE_SUBSCRIPTION_ID") + }() + + cfg := createTestConfig("readonly", map[string]bool{}) + service := NewService(cfg) + err := service.Initialize() + if err != nil { + t.Fatalf("Failed to initialize service: %v", err) + } + + // Create SSE server + sseServer := server.NewSSEServer(service.mcpServer) + + // Test custom server creation + addr := "localhost:8081" + customServer := service.createCustomSSEServerWithHelp404(sseServer, addr) + + if customServer == nil { + t.Fatal("Custom SSE server should not be nil") + } + + if customServer.Addr != addr { + t.Errorf("Expected server address %s, got %s", addr, customServer.Addr) + } + + if customServer.Handler == nil { + t.Fatal("Custom SSE server handler should not be nil") + } + + // Test the 404 response for non-SSE paths + testCases := []struct { + path string + method string + expectedStatusCode int + expectedContentType string + description string + }{ + {"/", "GET", http.StatusNotFound, "application/json", "root path should return helpful 404"}, + {"/invalid", "GET", http.StatusNotFound, "application/json", "invalid path should return helpful 404"}, + {"/api", "POST", http.StatusNotFound, "application/json", "non-SSE path should return helpful 404"}, + {"/health", "GET", http.StatusNotFound, "application/json", "health path should return helpful 404"}, + } + + for _, tc := range testCases { + t.Run(tc.description, func(t *testing.T) { + req, err := http.NewRequest(tc.method, tc.path, nil) + if err != nil { + t.Fatalf("Failed to create request: %v", err) + } + + rr := httptest.NewRecorder() + customServer.Handler.ServeHTTP(rr, req) + + if rr.Code != tc.expectedStatusCode { + t.Errorf("Expected status code %d, got %d", tc.expectedStatusCode, rr.Code) + } + + contentType := rr.Header().Get("Content-Type") + if contentType != tc.expectedContentType { + t.Errorf("Expected content type %s, got %s", tc.expectedContentType, contentType) + } + + // Parse and validate JSON response + var response map[string]interface{} + err = json.Unmarshal(rr.Body.Bytes(), &response) + if err != nil { + t.Fatalf("Failed to parse JSON response: %v", err) + } + + // Check required fields + if response["error"] != "Not Found" { + t.Errorf("Expected error 'Not Found', got %v", response["error"]) + } + + message, ok := response["message"].(string) + if !ok { + t.Fatal("Message field should be a string") + } + if !strings.Contains(message, "MCP") { + t.Error("Message should mention MCP") + } + if !strings.Contains(message, "SSE") { + t.Error("Message should mention SSE transport") + } + + endpoints, ok := response["endpoints"].(map[string]interface{}) + if !ok { + t.Fatal("Endpoints field should be a map") + } + + expectedEndpoints := []string{"sse", "message"} + for _, endpoint := range expectedEndpoints { + if _, exists := endpoints[endpoint]; !exists { + t.Errorf("Expected endpoint %s not found in response", endpoint) + } + } + + // Verify SSE-specific endpoint descriptions + sseEndpoint, ok := endpoints["sse"].(string) + if !ok { + t.Fatal("SSE endpoint description should be a string") + } + if !strings.Contains(sseEndpoint, "GET /sse") { + t.Error("SSE endpoint should mention GET /sse") + } + + messageEndpoint, ok := endpoints["message"].(string) + if !ok { + t.Fatal("Message endpoint description should be a string") + } + if !strings.Contains(messageEndpoint, "POST /message") { + t.Error("Message endpoint should mention POST /message") + } + }) + } +} + +// TestSSEServerEndpointsAccessible tests that SSE endpoints are still accessible +func TestSSEServerEndpointsAccessible(t *testing.T) { + // Set test environment variables + _ = os.Setenv("AZURE_TENANT_ID", "test-tenant") + _ = os.Setenv("AZURE_CLIENT_ID", "test-client") + _ = os.Setenv("AZURE_CLIENT_SECRET", "test-secret") + _ = os.Setenv("AZURE_SUBSCRIPTION_ID", "test-subscription") + defer func() { + _ = os.Unsetenv("AZURE_TENANT_ID") + _ = os.Unsetenv("AZURE_CLIENT_ID") + _ = os.Unsetenv("AZURE_CLIENT_SECRET") + _ = os.Unsetenv("AZURE_SUBSCRIPTION_ID") + }() + + cfg := createTestConfig("readonly", map[string]bool{}) + service := NewService(cfg) + err := service.Initialize() + if err != nil { + t.Fatalf("Failed to initialize service: %v", err) + } + + // Create SSE server + sseServer := server.NewSSEServer(service.mcpServer) + + // Test custom server creation + addr := "localhost:8082" + customServer := service.createCustomSSEServerWithHelp404(sseServer, addr) + + // Test that SSE endpoints are accessible (don't return our custom 404) + // Note: We only test that they don't return our custom 404 response, + // not the actual SSE functionality which would require persistent connections + testCases := []struct { + path string + method string + shouldBe404 bool + description string + }{ + {"/message", "POST", false, "Message endpoint should be handled by SSE server"}, + {"/message", "GET", false, "Message endpoint with GET should be handled by SSE server"}, + } + + for _, tc := range testCases { + t.Run(tc.description, func(t *testing.T) { + req, err := http.NewRequest(tc.method, tc.path, nil) + if err != nil { + t.Fatalf("Failed to create request: %v", err) + } + + rr := httptest.NewRecorder() + customServer.Handler.ServeHTTP(rr, req) + + if tc.shouldBe404 { + if rr.Code != http.StatusNotFound { + t.Errorf("Expected 404 for %s %s, got %d", tc.method, tc.path, rr.Code) + } + } else { + if rr.Code == http.StatusNotFound { + t.Errorf("Should not get 404 for %s %s, but got %d", tc.method, tc.path, rr.Code) + } + // Additional check: ensure it's not our custom 404 JSON response + if rr.Header().Get("Content-Type") == "application/json" { + var response map[string]interface{} + if json.Unmarshal(rr.Body.Bytes(), &response) == nil { + if response["error"] == "Not Found" && strings.Contains(response["message"].(string), "SSE transport") { + t.Errorf("Got our custom 404 response for %s %s, should be handled by SSE server", tc.method, tc.path) + } + } + } + // Note: We don't check for specific success codes since the SSE server + // may return various codes based on the request content/headers + } + }) + } +} + +// TestJSONResponseFormat tests the format of JSON error responses +func TestJSONResponseFormat(t *testing.T) { + // Set test environment variables + _ = os.Setenv("AZURE_TENANT_ID", "test-tenant") + _ = os.Setenv("AZURE_CLIENT_ID", "test-client") + _ = os.Setenv("AZURE_CLIENT_SECRET", "test-secret") + _ = os.Setenv("AZURE_SUBSCRIPTION_ID", "test-subscription") + defer func() { + _ = os.Unsetenv("AZURE_TENANT_ID") + _ = os.Unsetenv("AZURE_CLIENT_ID") + _ = os.Unsetenv("AZURE_CLIENT_SECRET") + _ = os.Unsetenv("AZURE_SUBSCRIPTION_ID") + }() + + cfg := createTestConfig("readonly", map[string]bool{}) + service := NewService(cfg) + err := service.Initialize() + if err != nil { + t.Fatalf("Failed to initialize service: %v", err) + } + + tests := []struct { + name string + serverFunc func() *http.Server + expectedKeys []string + transportType string + }{ + { + name: "StreamableHTTP_JSONFormat", + serverFunc: func() *http.Server { + return service.createCustomHTTPServerWithHelp404("localhost:8080") + }, + expectedKeys: []string{"error", "message", "endpoints"}, + transportType: "streamable-http", + }, + { + name: "SSE_JSONFormat", + serverFunc: func() *http.Server { + sseServer := server.NewSSEServer(service.mcpServer) + return service.createCustomSSEServerWithHelp404(sseServer, "localhost:8081") + }, + expectedKeys: []string{"error", "message", "endpoints"}, + transportType: "sse", + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + customServer := tt.serverFunc() + + req, err := http.NewRequest("GET", "/invalid", nil) + if err != nil { + t.Fatalf("Failed to create request: %v", err) + } + + rr := httptest.NewRecorder() + customServer.Handler.ServeHTTP(rr, req) + + // Verify it's valid JSON + var response map[string]interface{} + err = json.Unmarshal(rr.Body.Bytes(), &response) + if err != nil { + t.Fatalf("Response should be valid JSON: %v", err) + } + + // Verify all expected keys are present + for _, key := range tt.expectedKeys { + if _, exists := response[key]; !exists { + t.Errorf("Expected key '%s' not found in response", key) + } + } + + // Verify error field value + if response["error"] != "Not Found" { + t.Errorf("Expected error field to be 'Not Found', got %v", response["error"]) + } + + // Verify message is informative + message, ok := response["message"].(string) + if !ok { + t.Fatal("Message should be a string") + } + if len(message) < 20 { + t.Error("Message should be informative (at least 20 characters)") + } + + // Verify endpoints structure + endpoints, ok := response["endpoints"].(map[string]interface{}) + if !ok { + t.Fatal("Endpoints should be a map") + } + if len(endpoints) == 0 { + t.Error("Endpoints map should not be empty") + } + + t.Logf("Verified JSON format for %s transport", tt.transportType) + }) + } +} From 7cd018ccfbcb111540fcdd7b592ea1687eee3e66 Mon Sep 17 00:00:00 2001 From: Pengfei Ni Date: Tue, 19 Aug 2025 15:03:25 +0800 Subject: [PATCH 14/27] chore: fix the lint errors --- internal/server/server.go | 19 +++++++++++++------ 1 file changed, 13 insertions(+), 6 deletions(-) diff --git a/internal/server/server.go b/internal/server/server.go index 51e7f50..1ef5a61 100644 --- a/internal/server/server.go +++ b/internal/server/server.go @@ -5,6 +5,7 @@ import ( "fmt" "log" "net/http" + "time" "github.com/Azure/aks-mcp/internal/azcli" "github.com/Azure/aks-mcp/internal/azureclient" @@ -127,13 +128,16 @@ func (s *Service) createCustomHTTPServerWithHelp404(addr string) *http.Server { }, } - json.NewEncoder(w).Encode(response) + if err := json.NewEncoder(w).Encode(response); err != nil { + http.Error(w, "Failed to encode response", http.StatusInternalServerError) + } } }) return &http.Server{ - Addr: addr, - Handler: mux, + Addr: addr, + Handler: mux, + ReadHeaderTimeout: 10 * time.Second, } } @@ -161,13 +165,16 @@ func (s *Service) createCustomSSEServerWithHelp404(sseServer *server.SSEServer, }, } - json.NewEncoder(w).Encode(response) + if err := json.NewEncoder(w).Encode(response); err != nil { + http.Error(w, "Failed to encode response", http.StatusInternalServerError) + } } }) return &http.Server{ - Addr: addr, - Handler: mux, + Addr: addr, + Handler: mux, + ReadHeaderTimeout: 10 * time.Second, } } From fe32cbd05bc88411093e8f1514b0b82879c4e83f Mon Sep 17 00:00:00 2001 From: Paul Yu Date: Tue, 19 Aug 2025 14:51:54 -0700 Subject: [PATCH 15/27] feat: update logic to fallback to existing login handling when no env vars are present and add explicit support for system-assigned managed identity --- internal/azcli/login.go | 34 +++++++++++------------ internal/azcli/login_test.go | 53 +++++++++++++++--------------------- 2 files changed, 39 insertions(+), 48 deletions(-) diff --git a/internal/azcli/login.go b/internal/azcli/login.go index 3f14a27..2a6cbb1 100644 --- a/internal/azcli/login.go +++ b/internal/azcli/login.go @@ -66,18 +66,13 @@ var NewShellProc = func(timeout int) Proc { // EnsureAzCliLoginWithProc is the testable implementation that uses an injected Proc. func EnsureAzCliLoginWithProc(proc Proc, cfg *config.ConfigData) (string, error) { - // If there's a valid account, skip auto-login. - out, err := proc.Run("account show --query id -o tsv") - if err == nil && strings.TrimSpace(out) != "" { - return AuthTypeExisting, nil - } - - // Read environment variables to determine which auth methods to try first. + // Read environment variables to determine which auth methods to try tenantID := os.Getenv("AZURE_TENANT_ID") clientID := os.Getenv("AZURE_CLIENT_ID") clientSecret := os.Getenv("AZURE_CLIENT_SECRET") federatedTokenFile := os.Getenv("AZURE_FEDERATED_TOKEN_FILE") subscriptionID := os.Getenv("AZURE_SUBSCRIPTION_ID") + azureManagedIdentityType := os.Getenv("AZURE_MANAGED_IDENTITY") // 1) Service Principal with secret if clientID != "" && clientSecret != "" && tenantID != "" { @@ -149,17 +144,22 @@ func EnsureAzCliLoginWithProc(proc Proc, cfg *config.ConfigData) (string, error) return AuthTypeUserAssignedManagedID, nil } - // 4) Fallback to System-assigned Managed Identity even when no AZURE_* hints are set. - if err := runLoginCommand(proc, "login --identity", "system-assigned managed identity"); err != nil { - return "", err - } - if err := setSubscription(proc, subscriptionID, "system-assigned managed identity"); err != nil { - return "", err - } - if err := showAccount(proc, "system-assigned managed identity"); err != nil { - return "", err + // 4) System-assigned Managed Identity + if azureManagedIdentityType == "system" { + if err := runLoginCommand(proc, "login --identity", "system-assigned managed identity"); err != nil { + return "", err + } + if err := setSubscription(proc, subscriptionID, "system-assigned managed identity"); err != nil { + return "", err + } + if err := showAccount(proc, "system-assigned managed identity"); err != nil { + return "", err + } + return AuthTypeSystemAssignedManagedID, nil } - return AuthTypeSystemAssignedManagedID, nil + + // 5) Existing login (no credentials override provided) + return AuthTypeExisting, nil } // Runs a command and returns a formatted error if the output indicates an ERROR or the command failed. diff --git a/internal/azcli/login_test.go b/internal/azcli/login_test.go index e8139d5..f10e594 100644 --- a/internal/azcli/login_test.go +++ b/internal/azcli/login_test.go @@ -39,15 +39,16 @@ func (c *loginCommands) Run(cmd string) (string, error) { func TestEnsureAzCliLogin_Existing(t *testing.T) { cfg := config.NewConfig() - // ensure auto-login is attempted - t.Setenv("AZURE_CLIENT_ID", "cid") + // ensure no env-based auth triggers; default to existing login + t.Setenv("AZURE_CLIENT_ID", "") + t.Setenv("AZURE_CLIENT_SECRET", "") + t.Setenv("AZURE_TENANT_ID", "") + t.Setenv("AZURE_FEDERATED_TOKEN_FILE", "") + t.Setenv("AZURE_SUBSCRIPTION_ID", "") - // add command to simulate existing_login - p := &loginCommands{resp: []loginCommandResponses{ - {cmd: "account show --query id -o tsv", out: "sub-id", err: nil}, - }} + // proc should not be invoked + p := &loginCommands{resp: []loginCommandResponses{}} - // should return existing_login without error got, err := EnsureAzCliLoginWithProc(p, cfg) if err != nil { t.Fatalf("expected no error, got %v", err) @@ -65,10 +66,9 @@ func TestEnsureAzCliLogin_ServicePrincipal(t *testing.T) { t.Setenv("AZURE_TENANT_ID", "dummy-tenant-id") // To exercise the login flow we must simulate probe failing (not logged in) p := &loginCommands{resp: []loginCommandResponses{ - {cmd: "account show --query id -o tsv", out: "", err: errors.New("not logged in")}, // login command succeeds (matches dummy env values) {cmd: "login --service-principal -u dummy-client-id -p dummy-client-secret --tenant dummy-tenant-id", out: "", err: nil}, - // re-probe succeeds + // probe after login succeeds {cmd: "account show --query id -o tsv", out: "sub-id", err: nil}, }} got, err := EnsureAzCliLoginWithProc(p, cfg) @@ -87,7 +87,6 @@ func TestEnsureAzCliLogin_ServicePrincipal_ErrorOutput(t *testing.T) { t.Setenv("AZURE_TENANT_ID", "dummy-tenant-id") // probe fails so login attempt is performed p := &loginCommands{resp: []loginCommandResponses{ - {cmd: "account show --query id -o tsv", out: "", err: errors.New("not logged in")}, // login returns an ERROR: prefixed message on stderr (simulated via out) {cmd: "login --service-principal -u dummy-client-id -p dummy-client-secret --tenant dummy-tenant-id", out: "ERROR: something went wrong", err: nil}, }} @@ -103,7 +102,6 @@ func TestEnsureAzCliLogin_ServicePrincipal_CommandError(t *testing.T) { t.Setenv("AZURE_CLIENT_SECRET", "dummy-client-secret") t.Setenv("AZURE_TENANT_ID", "dummy-tenant-id") p := &loginCommands{resp: []loginCommandResponses{ - {cmd: "account show --query id -o tsv", out: "", err: errors.New("not logged in")}, // login command fails to execute {cmd: "login --service-principal -u dummy-client-id -p dummy-client-secret --tenant dummy-tenant-id", out: "", err: errors.New("exec failed")}, }} @@ -122,12 +120,14 @@ func TestEnsureAzCliLogin_NoAutoLogin(t *testing.T) { t.Setenv("AZURE_FEDERATED_TOKEN_FILE", "") t.Setenv("AZURE_SUBSCRIPTION_ID", "") - // proc should not be called; queueProc with no responses will fail if Run is invoked + // should default to existing login without invoking proc p := &loginCommands{resp: []loginCommandResponses{}} - got, err := EnsureAzCliLoginWithProc(p, cfg) - if err == nil { - t.Fatalf("expected error when no automatic credentials are set, got success: %s", got) + if err != nil { + t.Fatalf("expected no error, got %v", err) + } + if got != "existing_login" { + t.Fatalf("unexpected result: %s", got) } } @@ -139,7 +139,6 @@ func TestEnsureAzCliLogin_SubscriptionSetFailure(t *testing.T) { t.Setenv("AZURE_SUBSCRIPTION_ID", "dummy-subscription-id") p := &loginCommands{resp: []loginCommandResponses{ - {cmd: "account show --query id -o tsv", out: "", err: errors.New("not logged in")}, {cmd: "login --service-principal -u dummy-client-id -p dummy-client-secret --tenant dummy-tenant-id", out: "", err: nil}, // account set fails {cmd: "account set --subscription dummy-subscription-id", out: "", err: errors.New("set failed")}, @@ -159,7 +158,6 @@ func TestEnsureAzCliLogin_ReprobeFailure(t *testing.T) { t.Setenv("AZURE_SUBSCRIPTION_ID", "dummy-subscription-id") p := &loginCommands{resp: []loginCommandResponses{ - {cmd: "account show --query id -o tsv", out: "", err: errors.New("not logged in")}, {cmd: "login --service-principal -u dummy-client-id -p dummy-client-secret --tenant dummy-tenant-id", out: "", err: nil}, {cmd: "account set --subscription dummy-subscription-id", out: "", err: nil}, // re-probe fails @@ -186,9 +184,7 @@ func TestEnsureAzCliLogin_Federated(t *testing.T) { t.Skipf("skipping: %s not present (only available in AKS)", allowedTokenPath) } - p := &loginCommands{resp: []loginCommandResponses{ - {cmd: "account show --query id -o tsv", out: "", err: errors.New("not logged in")}, - }} + p := &loginCommands{resp: []loginCommandResponses{}} got, err := EnsureAzCliLoginWithProc(p, cfg) if err != nil && !strings.Contains(err.Error(), allowedTokenPath) { @@ -205,9 +201,7 @@ func TestEnsureAzCliLogin_Federated_InvalidFile(t *testing.T) { t.Setenv("AZURE_TENANT_ID", "dummy-tenant-id") t.Setenv("AZURE_FEDERATED_TOKEN_FILE", "/tmp/non-existent-file") - p := &loginCommands{resp: []loginCommandResponses{ - {cmd: "account show --query id -o tsv", out: "", err: errors.New("not logged in")}, - }} + p := &loginCommands{resp: []loginCommandResponses{}} _, err := EnsureAzCliLoginWithProc(p, cfg) if err == nil || !strings.Contains(err.Error(), "federated token file validation failed") { @@ -250,7 +244,6 @@ func TestEnsureAzCliLogin_Federated_Success(t *testing.T) { // _ = os.WriteFile(allowedTokenPath, []byte(tokenData), 0600) p := &loginCommands{resp: []loginCommandResponses{ - {cmd: "account show --query id -o tsv", out: "", err: errors.New("not logged in")}, {cmd: "login --service-principal -u dummy-client-id --tenant dummy-tenant-id --federated-token " + tokenData, out: "", err: nil}, {cmd: "account show --query id -o tsv", out: "sub-id", err: nil}, }} @@ -270,7 +263,6 @@ func TestEnsureAzCliLogin_ManagedIdentity_UserAssigned(t *testing.T) { t.Setenv("AZURE_SUBSCRIPTION_ID", "dummy-subscription-id") p := &loginCommands{resp: []loginCommandResponses{ - {cmd: "account show --query id -o tsv", out: "", err: errors.New("not logged in")}, {cmd: "login --identity -u dummy-managed-identity-client-id", out: "", err: nil}, {cmd: "account set --subscription dummy-subscription-id", out: "", err: nil}, {cmd: "account show --query id -o tsv", out: "sub-id", err: nil}, @@ -287,15 +279,15 @@ func TestEnsureAzCliLogin_ManagedIdentity_UserAssigned(t *testing.T) { func TestEnsureAzCliLogin_ManagedIdentity_SystemAssigned(t *testing.T) { cfg := config.NewConfig() - // Clear all Azure env vars to test fallback to system-assigned MI + // Trigger system-assigned MI via env t.Setenv("AZURE_CLIENT_ID", "") t.Setenv("AZURE_CLIENT_SECRET", "") t.Setenv("AZURE_TENANT_ID", "") t.Setenv("AZURE_FEDERATED_TOKEN_FILE", "") t.Setenv("AZURE_SUBSCRIPTION_ID", "") + t.Setenv("AZURE_MANAGED_IDENTITY", "system") p := &loginCommands{resp: []loginCommandResponses{ - {cmd: "account show --query id -o tsv", out: "", err: errors.New("not logged in")}, {cmd: "login --identity", out: "", err: nil}, {cmd: "account show --query id -o tsv", out: "sub-id", err: nil}, }} @@ -311,15 +303,15 @@ func TestEnsureAzCliLogin_ManagedIdentity_SystemAssigned(t *testing.T) { func TestEnsureAzCliLogin_ManagedIdentity_SystemAssigned_Success(t *testing.T) { cfg := config.NewConfig() - // Fallback to system-assigned MI with subscription set + // System-assigned MI with subscription set via env flag t.Setenv("AZURE_CLIENT_ID", "") t.Setenv("AZURE_CLIENT_SECRET", "") t.Setenv("AZURE_TENANT_ID", "") t.Setenv("AZURE_FEDERATED_TOKEN_FILE", "") t.Setenv("AZURE_SUBSCRIPTION_ID", "dummy-subscription-id") + t.Setenv("AZURE_MANAGED_IDENTITY", "system") p := &loginCommands{resp: []loginCommandResponses{ - {cmd: "account show --query id -o tsv", out: "", err: errors.New("not logged in")}, {cmd: "login --identity", out: "", err: nil}, {cmd: "account set --subscription dummy-subscription-id", out: "", err: nil}, {cmd: "account show --query id -o tsv", out: "sub-id", err: nil}, @@ -336,14 +328,13 @@ func TestEnsureAzCliLogin_ManagedIdentity_SystemAssigned_Success(t *testing.T) { func TestEnsureAzCliLogin_LoginPromptInOutput(t *testing.T) { cfg := config.NewConfig() - // ensure auto-login is attempted + // ensure auto-login is attempted via user-assigned MI t.Setenv("AZURE_CLIENT_ID", "cid") // Simulate the case where Azure CLI returns "Please run 'az login'" message with an error // After the command.go fix, stderr content is returned WITH the error, not instead of it // This test ensures we don't incorrectly think there's a valid login when there isn't p := &loginCommands{resp: []loginCommandResponses{ - {cmd: "account show --query id -o tsv", out: "ERROR: Please run 'az login' to setup account.", err: errors.New("command failed")}, {cmd: "login --identity -u cid", out: "", err: nil}, {cmd: "account show --query id -o tsv", out: "sub-id", err: nil}, }} From 4f0b549c44f93355bf2efd4d5c1985e845cf02a6 Mon Sep 17 00:00:00 2001 From: Paul Yu Date: Tue, 19 Aug 2025 15:01:45 -0700 Subject: [PATCH 16/27] doc: add Azure CLI authentication section in README with detailed login methods --- README.md | 33 +++++++++++++++++++++++++++++++++ 1 file changed, 33 insertions(+) diff --git a/README.md b/README.md index de8676e..17c6db8 100644 --- a/README.md +++ b/README.md @@ -19,6 +19,39 @@ AI assistants can use to interact with AKS resources. It leverages the Model Context Protocol (MCP) to facilitate this communication, enabling AI tools to make API calls to Azure and interpret the responses. +## Azure CLI Authentication + +AKS-MCP uses Azure CLI (az) for AKS operations. Azure CLI authentication is attempted in this order: + +1. Service Principal (client secret): When `AZURE_CLIENT_ID`, `AZURE_CLIENT_SECRET`, `AZURE_TENANT_ID` environment variables are present, a service principal login is performed using the following command: `az login --service-principal -u CLIENT_ID -p CLIENT_SECRET --tenant TENANT_ID` + +1. Workload Identity (federated token): When `AZURE_CLIENT_ID`, `AZURE_TENANT_ID`, `AZURE_FEDERATED_TOKEN_FILE` environment variables are present, a federated token login is performed using the following command: `az login --service-principal -u CLIENT_ID --tenant TENANT_ID --federated-token TOKEN` + +1. User-assigned Managed Identity (managed identity client ID): When only `AZURE_CLIENT_ID` environment variable is present, a user-assigned managed identity login is performed using the following command: `az login --identity -u CLIENT_ID` + +1. System-assigned Managed Identity: When `AZURE_MANAGED_IDENTITY` is set to `system`, a system-assigned managed identity login is performed using the following command: `az login --identity` + +1. Existing Login: When none of the above environment variables are set, AKS-MCP assumes you have already authenticated (for example, via `az login`) and uses the existing session. + +Optional subscription selection: + +- If `AZURE_SUBSCRIPTION_ID` is set, AKS-MCP will run `az account set --subscription SUBSCRIPTION_ID` after login. + +Notes and security: + +- The federated token file must be exactly `/var/run/secrets/azure/tokens/azure-identity-token` and is strictly validated; other paths are rejected. +- After each login, AKS-MCP verifies authentication with `az account show --query id -o tsv`. +- Ensure the Azure CLI is installed and on PATH. + +Environment variables used: + +- `AZURE_TENANT_ID` +- `AZURE_CLIENT_ID` +- `AZURE_CLIENT_SECRET` +- `AZURE_FEDERATED_TOKEN_FILE` +- `AZURE_SUBSCRIPTION_ID` +- `AZURE_MANAGED_IDENTITY` (set to `system` to opt into system-assigned managed identity) + ## Available Tools The AKS-MCP server provides consolidated tools for interacting with AKS From 1ad66f04092e009226e7b3cdf1219e5af9f40fa8 Mon Sep 17 00:00:00 2001 From: Pengfei Ni Date: Wed, 20 Aug 2025 14:47:29 +0800 Subject: [PATCH 17/27] chore: fix the unit tests around authz With the new login logic added via environment variables, the MCP server would try to login with test-client in our tests. Hence, those environment variables should be removed so that unit tests would skip the login. --- internal/server/server_test.go | 48 ---------------------------------- 1 file changed, 48 deletions(-) diff --git a/internal/server/server_test.go b/internal/server/server_test.go index 08d6d54..6b863a7 100644 --- a/internal/server/server_test.go +++ b/internal/server/server_test.go @@ -397,18 +397,6 @@ func BenchmarkServiceInitialization(b *testing.B) { // TestCreateCustomHTTPServerWithHelp404 tests the custom HTTP server creation for streamable-http transport func TestCreateCustomHTTPServerWithHelp404(t *testing.T) { - // Set test environment variables - _ = os.Setenv("AZURE_TENANT_ID", "test-tenant") - _ = os.Setenv("AZURE_CLIENT_ID", "test-client") - _ = os.Setenv("AZURE_CLIENT_SECRET", "test-secret") - _ = os.Setenv("AZURE_SUBSCRIPTION_ID", "test-subscription") - defer func() { - _ = os.Unsetenv("AZURE_TENANT_ID") - _ = os.Unsetenv("AZURE_CLIENT_ID") - _ = os.Unsetenv("AZURE_CLIENT_SECRET") - _ = os.Unsetenv("AZURE_SUBSCRIPTION_ID") - }() - cfg := createTestConfig("readonly", map[string]bool{}) service := NewService(cfg) err := service.Initialize() @@ -505,18 +493,6 @@ func TestCreateCustomHTTPServerWithHelp404(t *testing.T) { // TestCreateCustomSSEServerWithHelp404 tests the custom HTTP server creation for SSE transport func TestCreateCustomSSEServerWithHelp404(t *testing.T) { - // Set test environment variables - _ = os.Setenv("AZURE_TENANT_ID", "test-tenant") - _ = os.Setenv("AZURE_CLIENT_ID", "test-client") - _ = os.Setenv("AZURE_CLIENT_SECRET", "test-secret") - _ = os.Setenv("AZURE_SUBSCRIPTION_ID", "test-subscription") - defer func() { - _ = os.Unsetenv("AZURE_TENANT_ID") - _ = os.Unsetenv("AZURE_CLIENT_ID") - _ = os.Unsetenv("AZURE_CLIENT_SECRET") - _ = os.Unsetenv("AZURE_SUBSCRIPTION_ID") - }() - cfg := createTestConfig("readonly", map[string]bool{}) service := NewService(cfg) err := service.Initialize() @@ -633,18 +609,6 @@ func TestCreateCustomSSEServerWithHelp404(t *testing.T) { // TestSSEServerEndpointsAccessible tests that SSE endpoints are still accessible func TestSSEServerEndpointsAccessible(t *testing.T) { - // Set test environment variables - _ = os.Setenv("AZURE_TENANT_ID", "test-tenant") - _ = os.Setenv("AZURE_CLIENT_ID", "test-client") - _ = os.Setenv("AZURE_CLIENT_SECRET", "test-secret") - _ = os.Setenv("AZURE_SUBSCRIPTION_ID", "test-subscription") - defer func() { - _ = os.Unsetenv("AZURE_TENANT_ID") - _ = os.Unsetenv("AZURE_CLIENT_ID") - _ = os.Unsetenv("AZURE_CLIENT_SECRET") - _ = os.Unsetenv("AZURE_SUBSCRIPTION_ID") - }() - cfg := createTestConfig("readonly", map[string]bool{}) service := NewService(cfg) err := service.Initialize() @@ -708,18 +672,6 @@ func TestSSEServerEndpointsAccessible(t *testing.T) { // TestJSONResponseFormat tests the format of JSON error responses func TestJSONResponseFormat(t *testing.T) { - // Set test environment variables - _ = os.Setenv("AZURE_TENANT_ID", "test-tenant") - _ = os.Setenv("AZURE_CLIENT_ID", "test-client") - _ = os.Setenv("AZURE_CLIENT_SECRET", "test-secret") - _ = os.Setenv("AZURE_SUBSCRIPTION_ID", "test-subscription") - defer func() { - _ = os.Unsetenv("AZURE_TENANT_ID") - _ = os.Unsetenv("AZURE_CLIENT_ID") - _ = os.Unsetenv("AZURE_CLIENT_SECRET") - _ = os.Unsetenv("AZURE_SUBSCRIPTION_ID") - }() - cfg := createTestConfig("readonly", map[string]bool{}) service := NewService(cfg) err := service.Initialize() From 3cc625ded7b280e6a69fe73eed7aa76fb6e3472d Mon Sep 17 00:00:00 2001 From: Qasim Sarfraz Date: Sun, 3 Aug 2025 19:58:09 +0200 Subject: [PATCH 18/27] doc: add "Docker Desktop" instructions Signed-off-by: Qasim Sarfraz --- README.md | 44 +++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 43 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index bdb4d00..af1667e 100644 --- a/README.md +++ b/README.md @@ -454,7 +454,49 @@ For other MCP-compatible AI clients like [Claude Desktop](https://claude.ai/), c } ``` -#### 🐋 Docker MCP configuration +#### 🐳 Docker MCP Toolkit + +You can enable the [AKS-MCP server directly from MCP Toolkit](https://hub.docker.com/mcp/server/aks/overview): + +1. Open Docker Desktop +2. Click "MCP Toolkit" in the left sidebar +3. Search for "aks" in Catalog tab +4. Click on the AKS-MCP server card +5. Enable the server by clicking "+" in the top right corner +6. Configure the server using "Configuration" tab: + - **azure_dir** `[REQUIRED]`: Path to your Azure credentials directory e.g `/home/user/.azure` (must be absolute – without `$HOME` or `~`) + - **kubeconfig** `[REQUIRED]`: Path to your kubeconfig file e.g `/home/user/.kube/config` (must be absolute – without `$HOME` or `~`) + - **access_level** `[REQUIRED]`: Set to `readonly`, `readwrite`, or `admin` as needed +7. You are now ready to use the AKS-MCP server with your [preferred MCP client](https://hub.docker.com/mcp/server/aks/manual), see an example [here](https://docs.docker.com/ai/mcp-catalog-and-toolkit/toolkit/#install-an-mcp-client). + +On **Windows**, the Azure credentials won't work by default, but you have two options: + +1. **Long-lived servers**: Configure the [MCP gateway](https://docs.docker.com/ai/mcp-gateway/) to use long-lived servers using `--long-lived` flag and then authenticate with Azure CLI in the container, see option B in Containerized MCP configuration below on how to fetch credentials inside the container. +2. **Custom Azure Directory**: Set up a custom Azure directory: + ```powershell + # Set custom Azure config directory + $env:AZURE_CONFIG_DIR = "$env:USERPROFILE\.azure-for-docker" + + # Disable token cache encryption (to match behavior with Linux/macOS) + $env:AZURE_CORE_ENCRYPT_TOKEN_CACHE = "false" + + # Login to Azure CLI + az login + ``` + + This will store the credentials in `$env:USERPROFILE\.azure-for-docker` (e.g. `C:\Users\\.azure-for-docker`), + use this path in the AKS-MCP server configuration `azure_dir`. + +You can also use the [MCP Gateway](https://docs.docker.com/ai/mcp-gateway/) to enable the AKS-MCP server directly using: + +```bash +# Enable AKS-MCP server in Docker MCP Gateway +docker mcp server enable aks +``` + +Note: You still need to configure the server (e.g. using `docker mcp config`) with your Azure credentials, kubeconfig file, and access level. + +#### 🐋 Containerized MCP configuration For containerized deployment, you can run AKS-MCP server using the official Docker image: From dab2ddfbfcb19c755bc00dd2e0946cedb82fcfcd Mon Sep 17 00:00:00 2001 From: Pengfei Ni Date: Thu, 21 Aug 2025 10:31:51 +0800 Subject: [PATCH 19/27] chore: bump mcp-go to v0.38.0 --- go.mod | 2 +- go.sum | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/go.mod b/go.mod index c371660..0d71ef4 100644 --- a/go.mod +++ b/go.mod @@ -14,7 +14,7 @@ require ( github.com/Azure/mcp-kubernetes v0.0.8 github.com/google/shlex v0.0.0-20191202100458-e7afc7fbc510 github.com/inspektor-gadget/inspektor-gadget v0.43.0 - github.com/mark3labs/mcp-go v0.37.0 + github.com/mark3labs/mcp-go v0.38.0 github.com/microsoft/ApplicationInsights-Go v0.4.4 github.com/spf13/pflag v1.0.7 go.opentelemetry.io/otel v1.37.0 diff --git a/go.sum b/go.sum index 8e331ce..8876acf 100644 --- a/go.sum +++ b/go.sum @@ -245,8 +245,8 @@ github.com/liggitt/tabwriter v0.0.0-20181228230101-89fcab3d43de h1:9TO3cAIGXtEhn github.com/liggitt/tabwriter v0.0.0-20181228230101-89fcab3d43de/go.mod h1:zAbeS9B/r2mtpb6U+EI2rYA5OAXxsYw6wTamcNW+zcE= github.com/mailru/easyjson v0.9.0 h1:PrnmzHw7262yW8sTBwxi1PdJA3Iw/EKBa8psRf7d9a4= github.com/mailru/easyjson v0.9.0/go.mod h1:1+xMtQp2MRNVL/V1bOzuP3aP8VNwRW55fQUto+XFtTU= -github.com/mark3labs/mcp-go v0.37.0 h1:BywvZLPRT6Zx6mMG/MJfxLSZQkTGIcJSEGKsvr4DsoQ= -github.com/mark3labs/mcp-go v0.37.0/go.mod h1:T7tUa2jO6MavG+3P25Oy/jR7iCeJPHImCZHRymCn39g= +github.com/mark3labs/mcp-go v0.38.0 h1:E5tmJiIXkhwlV0pLAwAT0O5ZjUZSISE/2Jxg+6vpq4I= +github.com/mark3labs/mcp-go v0.38.0/go.mod h1:T7tUa2jO6MavG+3P25Oy/jR7iCeJPHImCZHRymCn39g= github.com/mattn/go-colorable v0.1.9/go.mod h1:u6P/XSegPjTcexA+o6vUJrdnUu04hMope9wVRipJSqc= github.com/mattn/go-colorable v0.1.13 h1:fFA4WZxdEF4tXPZVKMLwD8oUnCTTo08duU7wxecdEvA= github.com/mattn/go-colorable v0.1.13/go.mod h1:7S9/ev0klgBDR4GtXTXX8a3vIGJpMovkB8vQcUbaXHg= From 31f8179954198c8387344ae265a6c6270d761d37 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Thu, 21 Aug 2025 09:11:26 +0000 Subject: [PATCH 20/27] chore(deps): bump helm.sh/helm/v3 in the all-gomod group Bumps the all-gomod group with 1 update: [helm.sh/helm/v3](https://github.com/helm/helm). Updates `helm.sh/helm/v3` from 3.18.5 to 3.18.6 - [Release notes](https://github.com/helm/helm/releases) - [Commits](https://github.com/helm/helm/compare/v3.18.5...v3.18.6) --- updated-dependencies: - dependency-name: helm.sh/helm/v3 dependency-version: 3.18.6 dependency-type: direct:production update-type: version-update:semver-patch dependency-group: all-gomod ... Signed-off-by: dependabot[bot] --- go.mod | 2 +- go.sum | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/go.mod b/go.mod index 0d71ef4..decc3c5 100644 --- a/go.mod +++ b/go.mod @@ -21,7 +21,7 @@ require ( go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc v1.37.0 go.opentelemetry.io/otel/sdk v1.37.0 go.opentelemetry.io/otel/trace v1.37.0 - helm.sh/helm/v3 v3.18.5 + helm.sh/helm/v3 v3.18.6 k8s.io/apimachinery v0.33.4 k8s.io/cli-runtime v0.33.4 k8s.io/client-go v0.33.4 diff --git a/go.sum b/go.sum index 8876acf..52d8f7b 100644 --- a/go.sum +++ b/go.sum @@ -519,8 +519,8 @@ gopkg.in/yaml.v2 v2.4.0/go.mod h1:RDklbk79AGWmwhnvt/jBztapEOGDOx6ZbXqjP6csGnQ= gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= -helm.sh/helm/v3 v3.18.5 h1:Cc3Z5vd6kDrZq9wO9KxKLNEickiTho6/H/dBNRVSos4= -helm.sh/helm/v3 v3.18.5/go.mod h1:L/dXDR2r539oPlFP1PJqKAC1CUgqHJDLkxKpDGrWnyg= +helm.sh/helm/v3 v3.18.6 h1:S/2CqcYnNfLckkHLI0VgQbxgcDaU3N4A/46E3n9wSNY= +helm.sh/helm/v3 v3.18.6/go.mod h1:L/dXDR2r539oPlFP1PJqKAC1CUgqHJDLkxKpDGrWnyg= k8s.io/api v0.33.4 h1:oTzrFVNPXBjMu0IlpA2eDDIU49jsuEorGHB4cvKupkk= k8s.io/api v0.33.4/go.mod h1:VHQZ4cuxQ9sCUMESJV5+Fe8bGnqAARZ08tSTdHWfeAc= k8s.io/apiextensions-apiserver v0.33.3 h1:qmOcAHN6DjfD0v9kxL5udB27SRP6SG/MTopmge3MwEs= From f5e848562d0bc70a8fa862bf152fb8b3914b8a99 Mon Sep 17 00:00:00 2001 From: Marcos Blount Date: Wed, 20 Aug 2025 21:45:08 -0400 Subject: [PATCH 21/27] docs/tests(k8s): add GoDoc comments and unit tests with benchmarks for coverage --- internal/k8s/adapter.go | 21 ++-- internal/k8s/adapter_test.go | 218 +++++++++++++++++++++++++++++++++++ 2 files changed, 229 insertions(+), 10 deletions(-) create mode 100644 internal/k8s/adapter_test.go diff --git a/internal/k8s/adapter.go b/internal/k8s/adapter.go index fcf7b46..98b9841 100644 --- a/internal/k8s/adapter.go +++ b/internal/k8s/adapter.go @@ -1,3 +1,6 @@ +// Package k8s provides adapters that let aks-mcp interoperate with the +// mcp-kubernetes libraries. It maps aks-mcp configuration and executors +// to the types expected by mcp-kubernetes without altering behavior. package k8s import ( @@ -9,16 +12,13 @@ import ( k8stools "github.com/Azure/mcp-kubernetes/pkg/tools" ) -// ConfigAdapter converts aks-mcp config to mcp-kubernetes config +// ConvertConfig maps an aks-mcp ConfigData into the equivalent +// mcp-kubernetes ConfigData without mutating the input. func ConvertConfig(cfg *config.ConfigData) *k8sconfig.ConfigData { - // Create K8s security config k8sSecurityConfig := k8ssecurity.NewSecurityConfig() - - // Map allowed namespaces k8sSecurityConfig.SetAllowedNamespaces(cfg.AllowNamespaces) k8sSecurityConfig.AccessLevel = k8ssecurity.AccessLevel(cfg.AccessLevel) - // Create K8s config k8sCfg := &k8sconfig.ConfigData{ AdditionalTools: cfg.AdditionalTools, Timeout: cfg.Timeout, @@ -35,20 +35,21 @@ func ConvertConfig(cfg *config.ConfigData) *k8sconfig.ConfigData { return k8sCfg } -// WrapK8sExecutor wraps a mcp-kubernetes executor to work with aks-mcp config +// WrapK8sExecutor makes an mcp-kubernetes CommandExecutor +// compatible with the aks-mcp tools.CommandExecutor interface. func WrapK8sExecutor(k8sExecutor k8stools.CommandExecutor) tools.CommandExecutor { return &executorAdapter{k8sExecutor: k8sExecutor} } -// executorAdapter adapts between aks-mcp and mcp-kubernetes configs +// executorAdapter bridges aks-mcp execution to mcp-kubernetes. +// Unexported; behavior is defined by the wrapped executor. type executorAdapter struct { k8sExecutor k8stools.CommandExecutor } +// Execute adapts aks-mcp execution by converting its config +// and delegating to the wrapped mcp-kubernetes executor. func (a *executorAdapter) Execute(params map[string]interface{}, cfg *config.ConfigData) (string, error) { - // Convert aks-mcp config to k8s config k8sCfg := ConvertConfig(cfg) - - // Execute using the k8s executor return a.k8sExecutor.Execute(params, k8sCfg) } diff --git a/internal/k8s/adapter_test.go b/internal/k8s/adapter_test.go new file mode 100644 index 0000000..262bd8d --- /dev/null +++ b/internal/k8s/adapter_test.go @@ -0,0 +1,218 @@ +package k8s + +import ( + "errors" + "reflect" + "testing" + + "github.com/Azure/aks-mcp/internal/config" + k8sconfig "github.com/Azure/mcp-kubernetes/pkg/config" + k8ssecurity "github.com/Azure/mcp-kubernetes/pkg/security" + k8stools "github.com/Azure/mcp-kubernetes/pkg/tools" +) + +// This test suite verifies config mapping (without mutating input), adapter delegation, +// error propagation, and the current nil-config behavior. Benchmarks provide a baseline +// for detecting performance regressions. + +// mustEqual keeps assertions concise with consistent failure messages. +func mustEqual[T comparable](t *testing.T, got, want T, msg string) { + t.Helper() + if got != want { + t.Fatalf("%s: got %v, want %v", msg, got, want) + } +} + +// mustDeepEqual keeps deep-structure assertions concise with consistent messages. +func mustDeepEqual(t *testing.T, got, want interface{}, msg string) { + t.Helper() + if !reflect.DeepEqual(got, want) { + t.Fatalf("%s: got %#v, want %#v", msg, got, want) + } +} + +// fakeExecutor captures inputs and returns preset output/error to observe delegation. +type fakeExecutor struct { + lastParams map[string]interface{} + lastCfg *k8sconfig.ConfigData + out string + err error +} + +var _ k8stools.CommandExecutor = (*fakeExecutor)(nil) + +func (f *fakeExecutor) Execute(params map[string]interface{}, cfg *k8sconfig.ConfigData) (string, error) { + f.lastParams = params + f.lastCfg = cfg + return f.out, f.err +} + +func TestConvertConfig_MapsFields(t *testing.T) { + t.Parallel() + + in := &config.ConfigData{ + Timeout: 600, + Transport: "stdio", + Host: "127.0.0.1", + Port: 8000, + AccessLevel: "readonly", + AdditionalTools: map[string]bool{"helm": true, "cilium": false}, + AllowNamespaces: "default,platform", + OTLPEndpoint: "otel:4317", + } + + got := ConvertConfig(in) + if got == nil { + t.Fatal("ConvertConfig returned nil") + } + + mustEqual(t, got.Timeout, in.Timeout, "Timeout") + mustEqual(t, got.Transport, in.Transport, "Transport") + mustEqual(t, got.Host, in.Host, "Host") + mustEqual(t, got.Port, in.Port, "Port") + mustEqual(t, got.AccessLevel, in.AccessLevel, "AccessLevel") + mustEqual(t, got.OTLPEndpoint, in.OTLPEndpoint, "OTLPEndpoint") + mustDeepEqual(t, got.AdditionalTools, in.AdditionalTools, "AdditionalTools") + mustEqual(t, got.AllowNamespaces, in.AllowNamespaces, "AllowNamespaces") + + if got.SecurityConfig == nil { + t.Fatal("SecurityConfig is nil") + } + mustEqual(t, got.SecurityConfig.AccessLevel, k8ssecurity.AccessLevel(in.AccessLevel), "SecurityConfig.AccessLevel") +} + +func TestConvertConfig_DoesNotMutateInput(t *testing.T) { + t.Parallel() + + in := &config.ConfigData{ + Timeout: 42, + Transport: "stdio", + Host: "127.0.0.1", + Port: 8000, + AccessLevel: "readonly", + AdditionalTools: map[string]bool{"helm": true}, + AllowNamespaces: "default", + OTLPEndpoint: "otel:4317", + } + + // Verify the “no input mutation” guarantee by comparing to a copy. + orig := *in + orig.AdditionalTools = map[string]bool{} + for k, v := range in.AdditionalTools { + orig.AdditionalTools[k] = v + } + + _ = ConvertConfig(in) + mustDeepEqual(t, in, &orig, "input should remain unchanged") +} + +func TestConvertConfig_ZeroValueCfg(t *testing.T) { + t.Parallel() + // Zero-value config should be accepted (no panic). + in := &config.ConfigData{} + _ = ConvertConfig(in) +} + +func TestExecutorAdapter_DelegatesAndForwards(t *testing.T) { + t.Parallel() + + fe := &fakeExecutor{out: "ok"} + adapter := WrapK8sExecutor(fe) + + params := map[string]interface{}{"k": "v"} + inCfg := &config.ConfigData{ + Timeout: 10, + Transport: "stdio", + Host: "127.0.0.1", + Port: 8000, + AccessLevel: "readonly", + AdditionalTools: map[string]bool{"helm": true}, + AllowNamespaces: "default", + } + + got, err := adapter.Execute(params, inCfg) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + mustEqual(t, got, "ok", "adapter output") + mustDeepEqual(t, fe.lastParams, params, "params forwarded") + + if fe.lastCfg == nil || fe.lastCfg.SecurityConfig == nil { + t.Fatalf("expected non-nil converted cfg + SecurityConfig, got %#v", fe.lastCfg) + } + mustEqual(t, fe.lastCfg.Port, inCfg.Port, "Port") + mustEqual(t, fe.lastCfg.AccessLevel, inCfg.AccessLevel, "AccessLevel") + mustDeepEqual(t, fe.lastCfg.AdditionalTools, inCfg.AdditionalTools, "AdditionalTools") + mustEqual(t, fe.lastCfg.AllowNamespaces, inCfg.AllowNamespaces, "AllowNamespaces") + mustEqual(t, fe.lastCfg.SecurityConfig.AccessLevel, k8ssecurity.AccessLevel("readonly"), "SecurityConfig.AccessLevel") +} + +func TestExecutorAdapter_PropagatesError(t *testing.T) { + t.Parallel() + + fe := &fakeExecutor{err: errors.New("boom")} + adapter := WrapK8sExecutor(fe) + + _, err := adapter.Execute(map[string]interface{}{"x": 1}, &config.ConfigData{}) + if err == nil { + t.Fatalf("expected error, got nil") + } +} + +func TestExecutorAdapter_PanicsOnNilConfig_CurrentBehavior(t *testing.T) { + t.Parallel() + + // Document the current precondition: cfg must be non-nil. + defer func() { + if r := recover(); r == nil { + t.Fatalf("expected panic when cfg is nil") + } + }() + + fe := &fakeExecutor{} + adapter := WrapK8sExecutor(fe) + _, _ = adapter.Execute(map[string]interface{}{"x": 1}, nil) +} + +// BenchmarkConvertConfig tracks drift in allocation/time costs over time. +// Helps detect subtle regressions when config mapping logic evolves. +func BenchmarkConvertConfig(b *testing.B) { + in := &config.ConfigData{ + Timeout: 600, + Transport: "stdio", + Host: "127.0.0.1", + Port: 8000, + AccessLevel: "readonly", + AdditionalTools: map[string]bool{"helm": true, "cilium": false}, + AllowNamespaces: "default,platform", + OTLPEndpoint: "otel:4317", + } + + b.ResetTimer() + for i := 0; i < b.N; i++ { + _ = ConvertConfig(in) + } +} + +// BenchmarkExecutorAdapter measures adapter overhead to ensure delegation +// stays cheap and doesn’t become a bottleneck as layers evolve. +func BenchmarkExecutorAdapter(b *testing.B) { + fe := &fakeExecutor{out: "ok"} + adapter := WrapK8sExecutor(fe) + inCfg := &config.ConfigData{ + Timeout: 42, + Transport: "stdio", + Host: "127.0.0.1", + Port: 8000, + AccessLevel: "readonly", + AdditionalTools: map[string]bool{"helm": true}, + AllowNamespaces: "default", + } + + params := map[string]interface{}{"k": "v"} + + b.ResetTimer() + for i := 0; i < b.N; i++ { + _, _ = adapter.Execute(params, inCfg) + } +} From c16ccf42aae7e7eb6b60be1c38463d2f4b931814 Mon Sep 17 00:00:00 2001 From: Marcos Blount Date: Thu, 21 Aug 2025 11:59:04 -0400 Subject: [PATCH 22/27] tests: drop BenchmarkExecutorAdapter as redundant --- internal/k8s/adapter_test.go | 23 ----------------------- 1 file changed, 23 deletions(-) diff --git a/internal/k8s/adapter_test.go b/internal/k8s/adapter_test.go index 262bd8d..f88a9fd 100644 --- a/internal/k8s/adapter_test.go +++ b/internal/k8s/adapter_test.go @@ -193,26 +193,3 @@ func BenchmarkConvertConfig(b *testing.B) { _ = ConvertConfig(in) } } - -// BenchmarkExecutorAdapter measures adapter overhead to ensure delegation -// stays cheap and doesn’t become a bottleneck as layers evolve. -func BenchmarkExecutorAdapter(b *testing.B) { - fe := &fakeExecutor{out: "ok"} - adapter := WrapK8sExecutor(fe) - inCfg := &config.ConfigData{ - Timeout: 42, - Transport: "stdio", - Host: "127.0.0.1", - Port: 8000, - AccessLevel: "readonly", - AdditionalTools: map[string]bool{"helm": true}, - AllowNamespaces: "default", - } - - params := map[string]interface{}{"k": "v"} - - b.ResetTimer() - for i := 0; i < b.N; i++ { - _, _ = adapter.Execute(params, inCfg) - } -} From 883e731b509f698b1a8f990293f27c95e98c4563 Mon Sep 17 00:00:00 2001 From: Marcos Blount Date: Thu, 21 Aug 2025 12:56:46 -0400 Subject: [PATCH 23/27] tests: validate ConvertConfig input immutability and zero-value behavior --- internal/k8s/adapter_test.go | 47 ++++++++++++++++++++++++++++++++---- 1 file changed, 42 insertions(+), 5 deletions(-) diff --git a/internal/k8s/adapter_test.go b/internal/k8s/adapter_test.go index f88a9fd..703fdd6 100644 --- a/internal/k8s/adapter_test.go +++ b/internal/k8s/adapter_test.go @@ -11,8 +11,10 @@ import ( k8stools "github.com/Azure/mcp-kubernetes/pkg/tools" ) +var benchOut *k8sconfig.ConfigData + // This test suite verifies config mapping (without mutating input), adapter delegation, -// error propagation, and the current nil-config behavior. Benchmarks provide a baseline +// error propagation, and current nil-config behavior. The benchmark provides a baseline // for detecting performance regressions. // mustEqual keeps assertions concise with consistent failure messages. @@ -102,15 +104,48 @@ func TestConvertConfig_DoesNotMutateInput(t *testing.T) { orig.AdditionalTools[k] = v } - _ = ConvertConfig(in) + out := ConvertConfig(in) mustDeepEqual(t, in, &orig, "input should remain unchanged") + + if out == nil || out.SecurityConfig == nil { + t.Fatalf("expected non-nil output and SecurityConfig, got %#v", out) + } + + mustEqual(t, out.Timeout, in.Timeout, "Timeout") + mustEqual(t, out.Transport, in.Transport, "Transport") + mustEqual(t, out.Host, in.Host, "Host") + mustEqual(t, out.Port, in.Port, "Port") + mustEqual(t, out.AccessLevel, in.AccessLevel, "AccessLevel") + mustEqual(t, out.OTLPEndpoint, in.OTLPEndpoint, "OTLPEndpoint") + mustEqual(t, out.AllowNamespaces, in.AllowNamespaces, "AllowNamespaces") + mustDeepEqual(t, out.AdditionalTools, in.AdditionalTools, "AdditionalTools") + mustEqual(t, out.SecurityConfig.AccessLevel, k8ssecurity.AccessLevel(in.AccessLevel), "SecurityConfig.AccessLevel") } func TestConvertConfig_ZeroValueCfg(t *testing.T) { t.Parallel() - // Zero-value config should be accepted (no panic). + + // Document current behavior when callers pass an uninitialized config. in := &config.ConfigData{} - _ = ConvertConfig(in) + orig := *in + + out := ConvertConfig(in) + + mustDeepEqual(t, in, &orig, "input unchanged") + + if out == nil || out.SecurityConfig == nil { + t.Fatalf("non-nil out and SecurityConfig required, got %#v", out) + } + + mustEqual(t, out.Timeout, 0, "Timeout") + mustEqual(t, out.Transport, "", "Transport") + mustEqual(t, out.Host, "", "Host") + mustEqual(t, out.Port, 0, "Port") + mustEqual(t, out.AccessLevel, "", "AccessLevel") + mustEqual(t, out.OTLPEndpoint, "", "OTLPEndpoint") + mustEqual(t, out.AllowNamespaces, "", "AllowNamespaces") + mustDeepEqual(t, out.AdditionalTools, map[string]bool(nil), "AdditionalTools") + mustEqual(t, out.SecurityConfig.AccessLevel, k8ssecurity.AccessLevel(""), "SecurityConfig.AccessLevel") } func TestExecutorAdapter_DelegatesAndForwards(t *testing.T) { @@ -134,12 +169,14 @@ func TestExecutorAdapter_DelegatesAndForwards(t *testing.T) { if err != nil { t.Fatalf("unexpected error: %v", err) } + mustEqual(t, got, "ok", "adapter output") mustDeepEqual(t, fe.lastParams, params, "params forwarded") if fe.lastCfg == nil || fe.lastCfg.SecurityConfig == nil { t.Fatalf("expected non-nil converted cfg + SecurityConfig, got %#v", fe.lastCfg) } + mustEqual(t, fe.lastCfg.Port, inCfg.Port, "Port") mustEqual(t, fe.lastCfg.AccessLevel, inCfg.AccessLevel, "AccessLevel") mustDeepEqual(t, fe.lastCfg.AdditionalTools, inCfg.AdditionalTools, "AdditionalTools") @@ -190,6 +227,6 @@ func BenchmarkConvertConfig(b *testing.B) { b.ResetTimer() for i := 0; i < b.N; i++ { - _ = ConvertConfig(in) + benchOut = ConvertConfig(in) } } From 89aabe55f9cf089d673555be3335701b13a2f684 Mon Sep 17 00:00:00 2001 From: Qasim Sarfraz Date: Thu, 21 Aug 2025 21:48:52 +0200 Subject: [PATCH 24/27] Chore/inspektor_gadget_observability: Include addtional context/examples Signed-off-by: Qasim Sarfraz --- .../components/inspektorgadget/handlers.go | 4 +-- .../components/inspektorgadget/registry.go | 25 +++++++++++++++---- 2 files changed, 22 insertions(+), 7 deletions(-) diff --git a/internal/components/inspektorgadget/handlers.go b/internal/components/inspektorgadget/handlers.go index bbedf53..3107f9d 100644 --- a/internal/components/inspektorgadget/handlers.go +++ b/internal/components/inspektorgadget/handlers.go @@ -93,7 +93,7 @@ func handleRunAction(ctx context.Context, mgr GadgetManager, actionParams map[st gadget, ok := getGadgetByName(gadgetName) if !ok { - return "", fmt.Errorf("invalid or unsupported gadget name: %s", gadgetName) + return "", fmt.Errorf("invalid or unsupported gadget name: %s: expected one of %v", gadgetName, getGadgetNames()) } duration, ok := actionParams["duration"].(float64) @@ -132,7 +132,7 @@ func handleStartAction(ctx context.Context, mgr GadgetManager, actionParams map[ gadget, ok := getGadgetByName(gadgetName) if !ok { - return "", fmt.Errorf("invalid or unsupported gadget name: %s", gadgetName) + return "", fmt.Errorf("invalid or unsupported gadget name: %s: expected one of %v", gadgetName, getGadgetNames()) } // TODO: Use GetGadgetInfo to validate gadgetParams to ensure compatibility with different gadget versions diff --git a/internal/components/inspektorgadget/registry.go b/internal/components/inspektorgadget/registry.go index ea02b4d..c9f3bb2 100644 --- a/internal/components/inspektorgadget/registry.go +++ b/internal/components/inspektorgadget/registry.go @@ -1,6 +1,9 @@ package inspektorgadget -import "github.com/mark3labs/mcp-go/mcp" +import ( + "github.com/mark3labs/mcp-go/mcp" + "strings" +) // ============================================================================= // Inspektor Gadget related Tool Registrations @@ -10,15 +13,27 @@ import "github.com/mark3labs/mcp-go/mcp" func RegisterInspektorGadgetTool() mcp.Tool { return mcp.NewTool( "inspektor_gadget_observability", - mcp.WithDescription("Real-time observability tool for Azure Kubernetes Service (AKS) clusters, allowing users to manage gadgets for monitoring and debugging"), + mcp.WithDescription("Real-time observability tool for Azure Kubernetes Service (AKS) clusters, allowing users to manage gadgets for monitoring and debugging\n\n"+ + "Apart from 'action' param:\n\n"+ + "It supports 'action_params' (type=object) to specify parameters for the action."+ + "Available params are: "+ + "gadget_name, duration, gadget_id, chart_version. "+ + "Available Gadget names are: "+strings.Join(getGadgetNames(), ", ")+". "+ + "Example: "+ + "{'action': 'run', 'action_params': {'gadget_name': 'observe_dns', 'duration': 10}}\n\n"+ + "It supports 'filter_params' (type=object) to filter the data captured by the gadget. "+ + "Available params are: "+ + "namespace, pod, container, selector,"+strings.Join(getGadgetParamsKeys(), ", ")+". "+ + "Example: "+ + "{'action': 'run', 'filter_params': {'namespace': 'default', 'selector': 'app=myapp', 'observe_dns.unsuccessful_only': true}}"), mcp.WithString("action", mcp.Required(), mcp.Description("Action to perform on the gadget: "+ runAction+" to run a gadget for a specific duration, "+ startAction+" to start a gadget for continuous observation, "+ - stopAction+" to stop a running gadget, "+ - getResultsAction+" to retrieve results of a gadget run (only available before stopping the gadget), "+ - listGadgetsAction+" to list all running gadgets"+ + stopAction+" to stop a running gadget using gadget_id, "+ + getResultsAction+" to retrieve results of a gadget run using gadget_id (only available before stopping the gadget), "+ + listGadgetsAction+" to list all running (not available) gadgets"+ deployAction+" to deploy Inspektor Gadget, "+ undeployAction+" to undeploy Inspektor Gadget"+ upgradeAction+" to upgrade Inspektor Gadget, "+ From 1f94983fee2b3ab898723217723a94d085924bf4 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Fri, 22 Aug 2025 03:07:07 +0000 Subject: [PATCH 25/27] chore(deps): bump github.com/go-viper/mapstructure/v2 Bumps [github.com/go-viper/mapstructure/v2](https://github.com/go-viper/mapstructure) from 2.3.0 to 2.4.0. - [Release notes](https://github.com/go-viper/mapstructure/releases) - [Changelog](https://github.com/go-viper/mapstructure/blob/main/CHANGELOG.md) - [Commits](https://github.com/go-viper/mapstructure/compare/v2.3.0...v2.4.0) --- updated-dependencies: - dependency-name: github.com/go-viper/mapstructure/v2 dependency-version: 2.4.0 dependency-type: indirect ... Signed-off-by: dependabot[bot] --- go.mod | 2 +- go.sum | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/go.mod b/go.mod index decc3c5..4a199e1 100644 --- a/go.mod +++ b/go.mod @@ -66,7 +66,7 @@ require ( github.com/go-openapi/jsonpointer v0.21.0 // indirect github.com/go-openapi/jsonreference v0.21.0 // indirect github.com/go-openapi/swag v0.23.1 // indirect - github.com/go-viper/mapstructure/v2 v2.3.0 // indirect + github.com/go-viper/mapstructure/v2 v2.4.0 // indirect github.com/gobwas/glob v0.2.3 // indirect github.com/gofrs/uuid v3.3.0+incompatible // indirect github.com/gogo/protobuf v1.3.2 // indirect diff --git a/go.sum b/go.sum index 52d8f7b..0c11c60 100644 --- a/go.sum +++ b/go.sum @@ -151,8 +151,8 @@ github.com/go-sql-driver/mysql v1.8.1 h1:LedoTUt/eveggdHS9qUFC1EFSa8bU2+1pZjSRpv github.com/go-sql-driver/mysql v1.8.1/go.mod h1:wEBSXgmK//2ZFJyE+qWnIsVGmvmEKlqwuVSjsCm7DZg= github.com/go-task/slim-sprig/v3 v3.0.0 h1:sUs3vkvUymDpBKi3qH1YSqBQk9+9D/8M2mN1vB6EwHI= github.com/go-task/slim-sprig/v3 v3.0.0/go.mod h1:W848ghGpv3Qj3dhTPRyJypKRiqCdHZiAzKg9hl15HA8= -github.com/go-viper/mapstructure/v2 v2.3.0 h1:27XbWsHIqhbdR5TIC911OfYvgSaW93HM+dX7970Q7jk= -github.com/go-viper/mapstructure/v2 v2.3.0/go.mod h1:oJDH3BJKyqBA2TXFhDsKDGDTlndYOZ6rGS0BRZIxGhM= +github.com/go-viper/mapstructure/v2 v2.4.0 h1:EBsztssimR/CONLSZZ04E8qAkxNYq4Qp9LvH92wZUgs= +github.com/go-viper/mapstructure/v2 v2.4.0/go.mod h1:oJDH3BJKyqBA2TXFhDsKDGDTlndYOZ6rGS0BRZIxGhM= github.com/gobwas/glob v0.2.3 h1:A4xDbljILXROh+kObIiy5kIaPYD8e96x1tgBhUI5J+Y= github.com/gobwas/glob v0.2.3/go.mod h1:d3Ez4x06l9bZtSvzIay5+Yzi0fmZzPgnTbPcKjJAkT8= github.com/gofrs/uuid v3.3.0+incompatible h1:8K4tyRfvU1CYPgJsveYFQMhpFd/wXNM7iK6rR7UHz84= From 156617c916145eb4f3b40036c3abe75de501fd48 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Fri, 22 Aug 2025 08:27:20 +0000 Subject: [PATCH 26/27] chore(deps): bump github.com/Azure/azure-sdk-for-go/sdk/azcore Bumps the all-gomod group with 1 update: [github.com/Azure/azure-sdk-for-go/sdk/azcore](https://github.com/Azure/azure-sdk-for-go). Updates `github.com/Azure/azure-sdk-for-go/sdk/azcore` from 1.18.2 to 1.19.0 - [Release notes](https://github.com/Azure/azure-sdk-for-go/releases) - [Changelog](https://github.com/Azure/azure-sdk-for-go/blob/main/documentation/go-mgmt-sdk-release-guideline.md) - [Commits](https://github.com/Azure/azure-sdk-for-go/compare/sdk/azcore/v1.18.2...sdk/azcore/v1.19.0) --- updated-dependencies: - dependency-name: github.com/Azure/azure-sdk-for-go/sdk/azcore dependency-version: 1.19.0 dependency-type: direct:production update-type: version-update:semver-minor dependency-group: all-gomod ... Signed-off-by: dependabot[bot] --- go.mod | 2 +- go.sum | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/go.mod b/go.mod index 4a199e1..38e1fad 100644 --- a/go.mod +++ b/go.mod @@ -5,7 +5,7 @@ go 1.24.0 toolchain go1.24.5 require ( - github.com/Azure/azure-sdk-for-go/sdk/azcore v1.18.2 + github.com/Azure/azure-sdk-for-go/sdk/azcore v1.19.0 github.com/Azure/azure-sdk-for-go/sdk/azidentity v1.11.0 github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/compute/armcompute/v4 v4.2.1 github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/containerservice/armcontainerservice/v2 v2.4.0 diff --git a/go.sum b/go.sum index 0c11c60..c9d4d08 100644 --- a/go.sum +++ b/go.sum @@ -6,8 +6,8 @@ filippo.io/edwards25519 v1.1.0 h1:FNf4tywRC1HmFuKW5xopWpigGjJKiJSV0Cqo0cJWDaA= filippo.io/edwards25519 v1.1.0/go.mod h1:BxyFTGdWcka3PhytdK4V28tE5sGfRvvvRV7EaN4VDT4= github.com/AdaLogics/go-fuzz-headers v0.0.0-20230811130428-ced1acdcaa24 h1:bvDV9vkmnHYOMsOr4WLk+Vo07yKIzd94sVoIqshQ4bU= github.com/AdaLogics/go-fuzz-headers v0.0.0-20230811130428-ced1acdcaa24/go.mod h1:8o94RPi1/7XTJvwPpRSzSUedZrtlirdB3r9Z20bi2f8= -github.com/Azure/azure-sdk-for-go/sdk/azcore v1.18.2 h1:Hr5FTipp7SL07o2FvoVOX9HRiRH3CR3Mj8pxqCcdD5A= -github.com/Azure/azure-sdk-for-go/sdk/azcore v1.18.2/go.mod h1:QyVsSSN64v5TGltphKLQ2sQxe4OBQg0J1eKRcVBnfgE= +github.com/Azure/azure-sdk-for-go/sdk/azcore v1.19.0 h1:ci6Yd6nysBRLEodoziB6ah1+YOzZbZk+NYneoA6q+6E= +github.com/Azure/azure-sdk-for-go/sdk/azcore v1.19.0/go.mod h1:QyVsSSN64v5TGltphKLQ2sQxe4OBQg0J1eKRcVBnfgE= github.com/Azure/azure-sdk-for-go/sdk/azidentity v1.11.0 h1:MhRfI58HblXzCtWEZCO0feHs8LweePB3s90r7WaR1KU= github.com/Azure/azure-sdk-for-go/sdk/azidentity v1.11.0/go.mod h1:okZ+ZURbArNdlJ+ptXoyHNuOETzOl1Oww19rm8I2WLA= github.com/Azure/azure-sdk-for-go/sdk/azidentity/cache v0.3.2 h1:yz1bePFlP5Vws5+8ez6T3HWXPmwOK7Yvq8QxDBD3SKY= From 75a0698d69561fee5c80adece5e17903ad068d90 Mon Sep 17 00:00:00 2001 From: Pengfei Ni Date: Mon, 25 Aug 2025 09:47:45 +0800 Subject: [PATCH 27/27] docs: add comprehensive contribution guides - Add detailed CONTRIBUTING.md with development setup instructions - Include local testing procedures with GitHub Copilot and Claude Desktop - Document component-based architecture for adding new MCP tools - Add testing guidelines for both CLI and SDK-based tools - Update README.md with prominent link to contribution guide - Provide quick start guide for new contributors --- CONTRIBUTING.md | 543 ++++++++++++++++++++++++++++++++++++++++++++++++ README.md | 34 ++- 2 files changed, 567 insertions(+), 10 deletions(-) create mode 100644 CONTRIBUTING.md diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md new file mode 100644 index 0000000..42fc3b6 --- /dev/null +++ b/CONTRIBUTING.md @@ -0,0 +1,543 @@ +# Contributing to AKS-MCP + +Thank you for your interest in contributing to AKS-MCP! This guide will help you get started with contributing to the project. + +## Code of Conduct + +This project has adopted the [Microsoft Open Source Code of Conduct](https://opensource.microsoft.com/codeofconduct/). For more information see the [Code of Conduct FAQ](https://opensource.microsoft.com/codeofconduct/faq/) or contact [opencode@microsoft.com](mailto:opencode@microsoft.com) with any additional questions or comments. + +## Getting Started + +### Prerequisites + +Before contributing, ensure you have the following installed: + +- **Go** ≥ `1.24.x` - [Download Go](https://golang.org/dl/) +- **Azure CLI** - [Install Azure CLI](https://docs.microsoft.com/en-us/cli/azure/install-azure-cli) +- **Git** - [Install Git](https://git-scm.com/downloads) +- **GNU Make** `4.x` or later +- **Docker** _(optional, for container builds and testing)_ +- **Node.js** _(optional, for MCP inspector)_ + +### Fork and Clone + +1. Fork the repository on GitHub +2. Clone your fork locally: + ```bash + git clone https://github.com/YOUR-USERNAME/aks-mcp.git + cd aks-mcp + ``` +3. Add the upstream repository: + ```bash + git remote add upstream https://github.com/Azure/aks-mcp.git + ``` + +## Development Environment Setup + +### 1. Authenticate with Azure + +```bash +# Login to Azure CLI +az login + +# Set your preferred subscription (optional) +az account set --subscription "Your Subscription Name or ID" +``` + +### 2. Build the Project + +```bash +# Build the binary +make build +``` + +### 3. Verify Installation + +```bash +# Run the binary to see help +./aks-mcp --help + +# Or using make +make run +``` + +## Running AKS-MCP Locally + +### Basic Local Testing + +1. **Start the MCP server locally:** + + ```bash + # Run with default settings (readonly access) + ./aks-mcp --transport stdio + + # Run with elevated permissions for testing + ./aks-mcp --transport stdio --access-level readwrite + + # Run with admin permissions (full access) + ./aks-mcp --transport stdio --access-level admin + ``` + +2. **Run with HTTP transport for debugging:** + + ```bash + # Start HTTP server on localhost:8000 + ./aks-mcp --transport sse --host 127.0.0.1 --port 8000 + + # Or streamable HTTP + ./aks-mcp --transport streamable-http --host 127.0.0.1 --port 8000 + ``` + +3. **Use MCP Inspector for debugging:** + ```bash + npx @modelcontextprotocol/inspector ./aks-mcp --access-level=readwrite + ``` + +## Testing with AI Agents + +### GitHub Copilot in VS Code + +1. **Create MCP configuration for your workspace:** + + ```bash + mkdir -p .vscode + ``` + +2. **Create `.vscode/mcp.json`:** + + ```json + { + "servers": { + "aks-mcp-dev": { + "type": "stdio", + "command": "./aks-mcp", + "args": [ + "--transport", + "stdio", + "--access-level", + "readwrite", + "--verbose" + ] + } + } + } + ``` + +3. **Restart VS Code and test:** + - Open GitHub Copilot Chat + - Switch to Agent mode + - Verify the tools are loaded by clicking **Tools** button + - Try: _"List all my AKS clusters"_ + +### Claude Desktop + +1. **Create MCP configuration for Claude Desktop:** + + ```json + { + "mcpServers": { + "aks-mcp-dev": { + "command": "/absolute/path/to/your/aks-mcp/aks-mcp", + "args": ["--transport", "stdio", "--access-level", "readwrite"] + } + } + } + ``` + +2. **Test with Claude:** + - Start a conversation + - Ask: _"What AKS clusters do I have?"_ + - Verify the MCP tools are working + +### Docker Testing + +Test the containerized version: + +```bash +# Build Docker image +make docker-build + +# Test with Docker (with streamable HTTP mode) +make docker-run +``` + +## Making Changes + +### Branching Strategy + +1. **Create a feature branch:** + + ```bash + git checkout -b feature/your-feature-name + # or + git checkout -b fix/issue-description + ``` + +2. **Keep your branch updated:** + ```bash + git fetch upstream + git rebase upstream/main + ``` + +### Adding New Features + +The project uses a component-based architecture where each Azure service has its own component. Here's how to add new MCP tools: + +#### 1. **Choose the Right Component Type:** + +- **Azure CLI-based tools** (like `az aks`, `az fleet`): Use `CommandExecutor` interface +- **Azure SDK-based tools** (like monitoring, network resources): Use `ResourceHandler` interface + +#### 2. **For Azure CLI-based Tools:** + +1. **Create executor in the appropriate component directory:** + + ```go + // internal/components/yourcomponent/executor.go + type YourExecutor struct{} + + func NewYourExecutor() *YourExecutor { + return &YourExecutor{} + } + + func (e *YourExecutor) Execute(params map[string]interface{}, cfg *config.ConfigData) (string, error) { + // Your CLI command logic here + return result, nil + } + ``` + +2. **Create tool registration:** + + ```go + // internal/components/yourcomponent/registry.go + func RegisterYourTool(cfg *config.ConfigData) mcp.Tool { + return mcp.NewTool( + "your_tool_name", + mcp.WithDescription("Description of your tool"), + mcp.WithString("operation", mcp.Description("Operation to perform"), mcp.Required()), + // Add other parameters as needed + ) + } + ``` + +3. **Register in server.go:** + ```go + // internal/server/server.go - Add to appropriate register function + func (s *Service) registerYourComponent() { + log.Println("Registering your tool: your_tool_name") + yourTool := yourcomponent.RegisterYourTool(s.cfg) + s.mcpServer.AddTool(yourTool, tools.CreateToolHandler(yourcomponent.NewYourExecutor(), s.cfg)) + } + ``` + +#### 3. **For Azure SDK-based Tools:** + +1. **Create handler:** + + ```go + // internal/components/yourcomponent/handlers.go + func GetYourHandler(azClient *azureclient.AzureClient, cfg *config.ConfigData) tools.ResourceHandler { + return tools.ResourceHandlerFunc(func(params map[string]interface{}, _ *config.ConfigData) (string, error) { + // Your Azure SDK logic here + return result, nil + }) + } + ``` + +2. **Create tool registration:** (same as above) + +3. **Register in server.go:** + ```go + // internal/server/server.go - Add to appropriate register function + func (s *Service) registerYourComponent() { + log.Println("Registering your tool: your_tool_name") + yourTool := yourcomponent.RegisterYourTool() + s.mcpServer.AddTool(yourTool, tools.CreateResourceHandler(yourcomponent.GetYourHandler(s.azClient, s.cfg), s.cfg)) + } + ``` + +#### 4. **Component Structure:** + +Each component should follow this structure: + +``` +internal/components/yourcomponent/ +├── registry.go # Tool definitions and parameters +├── handlers.go # Business logic handlers +├── executor.go # CLI command executors (if needed) +├── types.go # Data structures (if needed) +├── helpers.go # Helper functions (if needed) +└── *_test.go # Unit tests +``` + +#### 5. **Interface Definitions:** + +```go +// For CLI-based tools +type CommandExecutor interface { + Execute(params map[string]interface{}, cfg *config.ConfigData) (string, error) +} + +// For SDK-based tools +type ResourceHandler interface { + Handle(params map[string]interface{}, cfg *config.ConfigData) (string, error) +} +``` + +#### 6. **Access Level Validation:** + +Tools automatically respect access levels through the configuration. For custom validation: + +```go +import "github.com/Azure/aks-mcp/internal/security" + +// In your handler/executor +if !security.ValidateAccessLevel(cfg.AccessLevel, "readwrite") { + return "", fmt.Errorf("this operation requires readwrite or admin access") +} +``` + +#### 7. **Register Your Component:** + +Add your component registration to the appropriate function in `server.go`: + +- `registerAzureComponents()` for Azure-specific tools +- `registerKubernetesComponents()` for Kubernetes-related tools + +#### 8. **Testing Your Component:** + +Create comprehensive tests: + +```go +// internal/components/yourcomponent/handlers_test.go +func TestYourHandler(t *testing.T) { + // Test your handler implementation +} + +// internal/components/yourcomponent/registry_test.go +func TestRegisterYourTool(t *testing.T) { + // Test tool registration +} +``` + +## Testing + +### Running Tests + +```bash +# Run all tests +make test + +# Run tests with race detection +make test-race + +# Run tests with coverage +make test-coverage + +# Run specific package tests +go test -v ./internal/server/... + +# Run tests in verbose mode +make test-verbose +``` + +### Code Quality Checks + +```bash +# Run all quality checks +make check + +# Individual checks +make fmt # Format code +make vet # Run go vet +make lint # Run golangci-lint + +# Fix linting issues automatically +make lint-fix +``` + +## Submitting Changes + +### Before Submitting + +1. **Ensure code quality:** + + ```bash + make check + ``` + +2. **Update documentation if needed:** + + - Update README.md for user-facing changes + - Add inline code comments for complex logic + +3. **Test thoroughly:** + ```bash + make test-coverage + ``` + +### Pull Request Process + +1. **Commit your changes:** + + ```bash + git add . + git commit -m "feat: add new AKS monitoring tool" + ``` + +2. **Push to your fork:** + + ```bash + git push origin feature/your-feature-name + ``` + +3. **Create Pull Request:** + - Use clear, descriptive titles + - Include detailed description of changes + - Reference related issues: "Fixes #123" + - Ensure CI checks pass + +### Commit Message Guidelines + +Use [Conventional Commits](https://www.conventionalcommits.org/) format: + +``` +[(optional scope)]: + +[optional body] + +[optional footer(s)] +``` + +**Types:** + +- `feat`: New features +- `fix`: Bug fixes +- `docs`: Documentation updates +- `style`: Code style changes (no logic changes) +- `refactor`: Code refactoring +- `test`: Adding or updating tests +- `chore`: Maintenance tasks + +**Examples:** + +- `feat(azure): add support for AKS diagnostic settings` +- `fix(security): validate access level for admin operations` +- `docs: update contribution guidelines` + +## Release Process + +### Version Management + +Versions are managed using Git tags following [Semantic Versioning](https://semver.org/): + +- `MAJOR.MINOR.PATCH` (e.g., `1.0.0`) +- Pre-release: `1.0.0-alpha.1`, `1.0.0-beta.1`, `1.0.0-rc.1` + +### Creating Releases + +1. **Prepare release branch:** + + ```bash + git checkout -b release/v1.0.0 + ``` + +2. **Update version-related files:** + + - Ensure changelog is updated + - Update version references in docs + +3. **Test release build:** + + ```bash + make release + make checksums + ``` + +4. **Create release PR and merge** + +5. **Tag the release:** + ```bash + git tag -a v1.0.0 -m "Release v1.0.0" + git push upstream v1.0.0 + ``` + +### Automated Release + +The project uses GitHub Actions for automated releases: + +- **SLSA3 compliant** release artifacts +- **Multi-platform** binaries (Linux, macOS, Windows) +- **Docker images** published to GitHub Container Registry +- **Checksums** and signatures for verification + +## Development Guidelines + +### Code Style + +- Follow standard Go conventions +- Use `gofmt` for code formatting +- Run `golangci-lint` for linting +- Write clear, self-documenting code +- Add comments for complex logic + +### Error Handling + +```go +// Good: Specific error messages +if err != nil { + return fmt.Errorf("failed to list AKS clusters in resource group %s: %w", resourceGroup, err) +} + +// Avoid: Generic error messages +if err != nil { + return err +} +``` + +### Logging + +```go +// Use structured logging +log.Info("Processing AKS operation", + "operation", operation, + "cluster", clusterName, + "resourceGroup", resourceGroup) +``` + +### Testing Guidelines + +- Write unit tests for all new functionality +- Use table-driven tests for multiple scenarios +- Mock external dependencies +- Test error conditions +- Aim for >80% code coverage + +### Security Best Practices + +- Validate all user inputs +- Respect access level permissions +- Never log sensitive information +- Use secure defaults +- Follow principle of least privilege + +## Getting Help + +### Documentation + +- [README.md](README.md) - Project overview and installation +- [Documentation](docs/) - Detailed technical documentation + +### Communication + +- **GitHub Issues**: Report bugs or request features +- **GitHub Discussions**: Ask questions or discuss ideas +- **Security Issues**: Report [security advisory](https://github.com/Azure/aks-mcp/security/advisories) + +## License + +By contributing to AKS-MCP, you agree that your contributions will be licensed under the project's [MIT License](LICENSE). + +--- + +Thank you for contributing to AKS-MCP! Your contributions help make Azure Kubernetes Service more accessible through AI assistants. diff --git a/README.md b/README.md index af1667e..2d48189 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,4 @@ -# AKS-MCP +# AKS-MCP The AKS-MCP is a Model Context Protocol (MCP) server that enables AI assistants to interact with Azure Kubernetes Service (AKS) clusters. It serves as a bridge @@ -700,17 +700,31 @@ To opt out, set the environment variable `AKS_MCP_COLLECT_TELEMETRY=false`. ## Contributing -This project welcomes contributions and suggestions. Most contributions require you to agree to a -Contributor License Agreement (CLA) declaring that you have the right to, and actually do, grant us -the rights to use your contribution. For details, visit https://cla.opensource.microsoft.com. +We welcome contributions to AKS-MCP! Whether you're fixing bugs, adding features, or improving documentation, your help makes this project better. -When you submit a pull request, a CLA bot will automatically determine whether you need to provide -a CLA and decorate the PR appropriately (e.g., status check, comment). Simply follow the instructions -provided by the bot. You will only need to do this once across all repos using our CLA. +**📖 [Read our detailed Contributing Guide](CONTRIBUTING.md)** for comprehensive information on: -This project has adopted the [Microsoft Open Source Code of Conduct](https://opensource.microsoft.com/codeofconduct/). -For more information see the [Code of Conduct FAQ](https://opensource.microsoft.com/codeofconduct/faq/) or -contact [opencode@microsoft.com](mailto:opencode@microsoft.com) with any additional questions or comments. +- Setting up your development environment +- Running AKS-MCP locally and testing with AI agents +- Understanding the codebase architecture +- Adding new MCP tools and features +- Testing guidelines and best practices +- Submitting pull requests + +### Quick Start for Contributors + +1. **Prerequisites**: Go ≥ 1.24.x, Azure CLI, Git +2. **Setup**: Fork the repo, clone locally, run `make deps && make build` +3. **Test**: Run `make test` and `make check` +4. **Develop**: Follow the component-based architecture in [CONTRIBUTING.md](CONTRIBUTING.md) + +### Contributor License Agreement + +Most contributions require you to agree to a Contributor License Agreement (CLA) declaring that you have the right to, and actually do, grant us the rights to use your contribution. For details, visit https://cla.opensource.microsoft.com. + +When you submit a pull request, a CLA bot will automatically determine whether you need to provide a CLA and decorate the PR appropriately (e.g., status check, comment). Simply follow the instructions provided by the bot. You will only need to do this once across all repos using our CLA. + +This project has adopted the [Microsoft Open Source Code of Conduct](https://opensource.microsoft.com/codeofconduct/). For more information see the [Code of Conduct FAQ](https://opensource.microsoft.com/codeofconduct/faq/) or contact [opencode@microsoft.com](mailto:opencode@microsoft.com) with any additional questions or comments. ## Trademarks