diff --git a/.slsa-goreleaser/darwin-amd64.yml b/.slsa-goreleaser/darwin-amd64.yml index 1f4555f..a60fd74 100644 --- a/.slsa-goreleaser/darwin-amd64.yml +++ b/.slsa-goreleaser/darwin-amd64.yml @@ -20,6 +20,13 @@ flags: - -trimpath - -tags=withoutebpf # need by inspektor-gadget +# (Optional) ldflags for version injection +ldflags: + - "-X github.com/Azure/aks-mcp/internal/version.GitVersion={{ .Env.VERSION }}" + - "-X github.com/Azure/aks-mcp/internal/version.GitCommit={{ .Env.COMMIT }}" + - "-X github.com/Azure/aks-mcp/internal/version.GitTreeState={{ .Env.TREE_STATE }}" + - "-X github.com/Azure/aks-mcp/internal/version.BuildMetadata={{ .Env.COMMIT_DATE }}" + # (Optional) Working directory. (default: root of the project) # dir: ./relative/path/to/dir diff --git a/.slsa-goreleaser/darwin-arm64.yml b/.slsa-goreleaser/darwin-arm64.yml index 4c3b0d9..cb655d8 100644 --- a/.slsa-goreleaser/darwin-arm64.yml +++ b/.slsa-goreleaser/darwin-arm64.yml @@ -20,6 +20,13 @@ flags: - -trimpath - -tags=withoutebpf # need by inspektor-gadget +# (Optional) ldflags for version injection +ldflags: + - "-X github.com/Azure/aks-mcp/internal/version.GitVersion={{ .Env.VERSION }}" + - "-X github.com/Azure/aks-mcp/internal/version.GitCommit={{ .Env.COMMIT }}" + - "-X github.com/Azure/aks-mcp/internal/version.GitTreeState={{ .Env.TREE_STATE }}" + - "-X github.com/Azure/aks-mcp/internal/version.BuildMetadata={{ .Env.COMMIT_DATE }}" + # (Optional) Working directory. (default: root of the project) # dir: ./relative/path/to/dir diff --git a/.slsa-goreleaser/linux-amd64.yml b/.slsa-goreleaser/linux-amd64.yml index 7006d4b..2169bb5 100644 --- a/.slsa-goreleaser/linux-amd64.yml +++ b/.slsa-goreleaser/linux-amd64.yml @@ -20,6 +20,13 @@ flags: - -trimpath - -tags=withoutebpf # need by inspektor-gadget +# (Optional) ldflags for version injection +ldflags: + - "-X github.com/Azure/aks-mcp/internal/version.GitVersion={{ .Env.VERSION }}" + - "-X github.com/Azure/aks-mcp/internal/version.GitCommit={{ .Env.COMMIT }}" + - "-X github.com/Azure/aks-mcp/internal/version.GitTreeState={{ .Env.TREE_STATE }}" + - "-X github.com/Azure/aks-mcp/internal/version.BuildMetadata={{ .Env.COMMIT_DATE }}" + # (Optional) Working directory. (default: root of the project) # dir: ./relative/path/to/dir diff --git a/.slsa-goreleaser/linux-arm64.yml b/.slsa-goreleaser/linux-arm64.yml index 95bb339..af3871e 100644 --- a/.slsa-goreleaser/linux-arm64.yml +++ b/.slsa-goreleaser/linux-arm64.yml @@ -20,6 +20,13 @@ flags: - -trimpath - -tags=withoutebpf # need by inspektor-gadget +# (Optional) ldflags for version injection +ldflags: + - "-X github.com/Azure/aks-mcp/internal/version.GitVersion={{ .Env.VERSION }}" + - "-X github.com/Azure/aks-mcp/internal/version.GitCommit={{ .Env.COMMIT }}" + - "-X github.com/Azure/aks-mcp/internal/version.GitTreeState={{ .Env.TREE_STATE }}" + - "-X github.com/Azure/aks-mcp/internal/version.BuildMetadata={{ .Env.COMMIT_DATE }}" + # (Optional) Working directory. (default: root of the project) # dir: ./relative/path/to/dir diff --git a/.slsa-goreleaser/windows-amd64.yml b/.slsa-goreleaser/windows-amd64.yml index 6bfc47a..58ed7d2 100644 --- a/.slsa-goreleaser/windows-amd64.yml +++ b/.slsa-goreleaser/windows-amd64.yml @@ -19,6 +19,13 @@ flags: - -trimpath - -tags=withoutebpf # need by inspektor-gadget +# (Optional) ldflags for version injection +ldflags: + - "-X github.com/Azure/aks-mcp/internal/version.GitVersion={{ .Env.VERSION }}" + - "-X github.com/Azure/aks-mcp/internal/version.GitCommit={{ .Env.COMMIT }}" + - "-X github.com/Azure/aks-mcp/internal/version.GitTreeState={{ .Env.TREE_STATE }}" + - "-X github.com/Azure/aks-mcp/internal/version.BuildMetadata={{ .Env.COMMIT_DATE }}" + # (Optional) Working directory. (default: root of the project) # dir: ./relative/path/to/dir diff --git a/.slsa-goreleaser/windows-arm64.yml b/.slsa-goreleaser/windows-arm64.yml index 29d64f2..3868d9a 100644 --- a/.slsa-goreleaser/windows-arm64.yml +++ b/.slsa-goreleaser/windows-arm64.yml @@ -19,6 +19,13 @@ flags: - -trimpath - -tags=withoutebpf # need by inspektor-gadget +# (Optional) ldflags for version injection +ldflags: + - "-X github.com/Azure/aks-mcp/internal/version.GitVersion={{ .Env.VERSION }}" + - "-X github.com/Azure/aks-mcp/internal/version.GitCommit={{ .Env.COMMIT }}" + - "-X github.com/Azure/aks-mcp/internal/version.GitTreeState={{ .Env.TREE_STATE }}" + - "-X github.com/Azure/aks-mcp/internal/version.BuildMetadata={{ .Env.COMMIT_DATE }}" + # (Optional) Working directory. (default: root of the project) # dir: ./relative/path/to/dir diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md new file mode 100644 index 0000000..42fc3b6 --- /dev/null +++ b/CONTRIBUTING.md @@ -0,0 +1,543 @@ +# Contributing to AKS-MCP + +Thank you for your interest in contributing to AKS-MCP! This guide will help you get started with contributing to the project. + +## Code of Conduct + +This project has adopted the [Microsoft Open Source Code of Conduct](https://opensource.microsoft.com/codeofconduct/). For more information see the [Code of Conduct FAQ](https://opensource.microsoft.com/codeofconduct/faq/) or contact [opencode@microsoft.com](mailto:opencode@microsoft.com) with any additional questions or comments. + +## Getting Started + +### Prerequisites + +Before contributing, ensure you have the following installed: + +- **Go** ≥ `1.24.x` - [Download Go](https://golang.org/dl/) +- **Azure CLI** - [Install Azure CLI](https://docs.microsoft.com/en-us/cli/azure/install-azure-cli) +- **Git** - [Install Git](https://git-scm.com/downloads) +- **GNU Make** `4.x` or later +- **Docker** _(optional, for container builds and testing)_ +- **Node.js** _(optional, for MCP inspector)_ + +### Fork and Clone + +1. Fork the repository on GitHub +2. Clone your fork locally: + ```bash + git clone https://github.com/YOUR-USERNAME/aks-mcp.git + cd aks-mcp + ``` +3. Add the upstream repository: + ```bash + git remote add upstream https://github.com/Azure/aks-mcp.git + ``` + +## Development Environment Setup + +### 1. Authenticate with Azure + +```bash +# Login to Azure CLI +az login + +# Set your preferred subscription (optional) +az account set --subscription "Your Subscription Name or ID" +``` + +### 2. Build the Project + +```bash +# Build the binary +make build +``` + +### 3. Verify Installation + +```bash +# Run the binary to see help +./aks-mcp --help + +# Or using make +make run +``` + +## Running AKS-MCP Locally + +### Basic Local Testing + +1. **Start the MCP server locally:** + + ```bash + # Run with default settings (readonly access) + ./aks-mcp --transport stdio + + # Run with elevated permissions for testing + ./aks-mcp --transport stdio --access-level readwrite + + # Run with admin permissions (full access) + ./aks-mcp --transport stdio --access-level admin + ``` + +2. **Run with HTTP transport for debugging:** + + ```bash + # Start HTTP server on localhost:8000 + ./aks-mcp --transport sse --host 127.0.0.1 --port 8000 + + # Or streamable HTTP + ./aks-mcp --transport streamable-http --host 127.0.0.1 --port 8000 + ``` + +3. **Use MCP Inspector for debugging:** + ```bash + npx @modelcontextprotocol/inspector ./aks-mcp --access-level=readwrite + ``` + +## Testing with AI Agents + +### GitHub Copilot in VS Code + +1. **Create MCP configuration for your workspace:** + + ```bash + mkdir -p .vscode + ``` + +2. **Create `.vscode/mcp.json`:** + + ```json + { + "servers": { + "aks-mcp-dev": { + "type": "stdio", + "command": "./aks-mcp", + "args": [ + "--transport", + "stdio", + "--access-level", + "readwrite", + "--verbose" + ] + } + } + } + ``` + +3. **Restart VS Code and test:** + - Open GitHub Copilot Chat + - Switch to Agent mode + - Verify the tools are loaded by clicking **Tools** button + - Try: _"List all my AKS clusters"_ + +### Claude Desktop + +1. **Create MCP configuration for Claude Desktop:** + + ```json + { + "mcpServers": { + "aks-mcp-dev": { + "command": "/absolute/path/to/your/aks-mcp/aks-mcp", + "args": ["--transport", "stdio", "--access-level", "readwrite"] + } + } + } + ``` + +2. **Test with Claude:** + - Start a conversation + - Ask: _"What AKS clusters do I have?"_ + - Verify the MCP tools are working + +### Docker Testing + +Test the containerized version: + +```bash +# Build Docker image +make docker-build + +# Test with Docker (with streamable HTTP mode) +make docker-run +``` + +## Making Changes + +### Branching Strategy + +1. **Create a feature branch:** + + ```bash + git checkout -b feature/your-feature-name + # or + git checkout -b fix/issue-description + ``` + +2. **Keep your branch updated:** + ```bash + git fetch upstream + git rebase upstream/main + ``` + +### Adding New Features + +The project uses a component-based architecture where each Azure service has its own component. Here's how to add new MCP tools: + +#### 1. **Choose the Right Component Type:** + +- **Azure CLI-based tools** (like `az aks`, `az fleet`): Use `CommandExecutor` interface +- **Azure SDK-based tools** (like monitoring, network resources): Use `ResourceHandler` interface + +#### 2. **For Azure CLI-based Tools:** + +1. **Create executor in the appropriate component directory:** + + ```go + // internal/components/yourcomponent/executor.go + type YourExecutor struct{} + + func NewYourExecutor() *YourExecutor { + return &YourExecutor{} + } + + func (e *YourExecutor) Execute(params map[string]interface{}, cfg *config.ConfigData) (string, error) { + // Your CLI command logic here + return result, nil + } + ``` + +2. **Create tool registration:** + + ```go + // internal/components/yourcomponent/registry.go + func RegisterYourTool(cfg *config.ConfigData) mcp.Tool { + return mcp.NewTool( + "your_tool_name", + mcp.WithDescription("Description of your tool"), + mcp.WithString("operation", mcp.Description("Operation to perform"), mcp.Required()), + // Add other parameters as needed + ) + } + ``` + +3. **Register in server.go:** + ```go + // internal/server/server.go - Add to appropriate register function + func (s *Service) registerYourComponent() { + log.Println("Registering your tool: your_tool_name") + yourTool := yourcomponent.RegisterYourTool(s.cfg) + s.mcpServer.AddTool(yourTool, tools.CreateToolHandler(yourcomponent.NewYourExecutor(), s.cfg)) + } + ``` + +#### 3. **For Azure SDK-based Tools:** + +1. **Create handler:** + + ```go + // internal/components/yourcomponent/handlers.go + func GetYourHandler(azClient *azureclient.AzureClient, cfg *config.ConfigData) tools.ResourceHandler { + return tools.ResourceHandlerFunc(func(params map[string]interface{}, _ *config.ConfigData) (string, error) { + // Your Azure SDK logic here + return result, nil + }) + } + ``` + +2. **Create tool registration:** (same as above) + +3. **Register in server.go:** + ```go + // internal/server/server.go - Add to appropriate register function + func (s *Service) registerYourComponent() { + log.Println("Registering your tool: your_tool_name") + yourTool := yourcomponent.RegisterYourTool() + s.mcpServer.AddTool(yourTool, tools.CreateResourceHandler(yourcomponent.GetYourHandler(s.azClient, s.cfg), s.cfg)) + } + ``` + +#### 4. **Component Structure:** + +Each component should follow this structure: + +``` +internal/components/yourcomponent/ +├── registry.go # Tool definitions and parameters +├── handlers.go # Business logic handlers +├── executor.go # CLI command executors (if needed) +├── types.go # Data structures (if needed) +├── helpers.go # Helper functions (if needed) +└── *_test.go # Unit tests +``` + +#### 5. **Interface Definitions:** + +```go +// For CLI-based tools +type CommandExecutor interface { + Execute(params map[string]interface{}, cfg *config.ConfigData) (string, error) +} + +// For SDK-based tools +type ResourceHandler interface { + Handle(params map[string]interface{}, cfg *config.ConfigData) (string, error) +} +``` + +#### 6. **Access Level Validation:** + +Tools automatically respect access levels through the configuration. For custom validation: + +```go +import "github.com/Azure/aks-mcp/internal/security" + +// In your handler/executor +if !security.ValidateAccessLevel(cfg.AccessLevel, "readwrite") { + return "", fmt.Errorf("this operation requires readwrite or admin access") +} +``` + +#### 7. **Register Your Component:** + +Add your component registration to the appropriate function in `server.go`: + +- `registerAzureComponents()` for Azure-specific tools +- `registerKubernetesComponents()` for Kubernetes-related tools + +#### 8. **Testing Your Component:** + +Create comprehensive tests: + +```go +// internal/components/yourcomponent/handlers_test.go +func TestYourHandler(t *testing.T) { + // Test your handler implementation +} + +// internal/components/yourcomponent/registry_test.go +func TestRegisterYourTool(t *testing.T) { + // Test tool registration +} +``` + +## Testing + +### Running Tests + +```bash +# Run all tests +make test + +# Run tests with race detection +make test-race + +# Run tests with coverage +make test-coverage + +# Run specific package tests +go test -v ./internal/server/... + +# Run tests in verbose mode +make test-verbose +``` + +### Code Quality Checks + +```bash +# Run all quality checks +make check + +# Individual checks +make fmt # Format code +make vet # Run go vet +make lint # Run golangci-lint + +# Fix linting issues automatically +make lint-fix +``` + +## Submitting Changes + +### Before Submitting + +1. **Ensure code quality:** + + ```bash + make check + ``` + +2. **Update documentation if needed:** + + - Update README.md for user-facing changes + - Add inline code comments for complex logic + +3. **Test thoroughly:** + ```bash + make test-coverage + ``` + +### Pull Request Process + +1. **Commit your changes:** + + ```bash + git add . + git commit -m "feat: add new AKS monitoring tool" + ``` + +2. **Push to your fork:** + + ```bash + git push origin feature/your-feature-name + ``` + +3. **Create Pull Request:** + - Use clear, descriptive titles + - Include detailed description of changes + - Reference related issues: "Fixes #123" + - Ensure CI checks pass + +### Commit Message Guidelines + +Use [Conventional Commits](https://www.conventionalcommits.org/) format: + +``` +[(optional scope)]: + +[optional body] + +[optional footer(s)] +``` + +**Types:** + +- `feat`: New features +- `fix`: Bug fixes +- `docs`: Documentation updates +- `style`: Code style changes (no logic changes) +- `refactor`: Code refactoring +- `test`: Adding or updating tests +- `chore`: Maintenance tasks + +**Examples:** + +- `feat(azure): add support for AKS diagnostic settings` +- `fix(security): validate access level for admin operations` +- `docs: update contribution guidelines` + +## Release Process + +### Version Management + +Versions are managed using Git tags following [Semantic Versioning](https://semver.org/): + +- `MAJOR.MINOR.PATCH` (e.g., `1.0.0`) +- Pre-release: `1.0.0-alpha.1`, `1.0.0-beta.1`, `1.0.0-rc.1` + +### Creating Releases + +1. **Prepare release branch:** + + ```bash + git checkout -b release/v1.0.0 + ``` + +2. **Update version-related files:** + + - Ensure changelog is updated + - Update version references in docs + +3. **Test release build:** + + ```bash + make release + make checksums + ``` + +4. **Create release PR and merge** + +5. **Tag the release:** + ```bash + git tag -a v1.0.0 -m "Release v1.0.0" + git push upstream v1.0.0 + ``` + +### Automated Release + +The project uses GitHub Actions for automated releases: + +- **SLSA3 compliant** release artifacts +- **Multi-platform** binaries (Linux, macOS, Windows) +- **Docker images** published to GitHub Container Registry +- **Checksums** and signatures for verification + +## Development Guidelines + +### Code Style + +- Follow standard Go conventions +- Use `gofmt` for code formatting +- Run `golangci-lint` for linting +- Write clear, self-documenting code +- Add comments for complex logic + +### Error Handling + +```go +// Good: Specific error messages +if err != nil { + return fmt.Errorf("failed to list AKS clusters in resource group %s: %w", resourceGroup, err) +} + +// Avoid: Generic error messages +if err != nil { + return err +} +``` + +### Logging + +```go +// Use structured logging +log.Info("Processing AKS operation", + "operation", operation, + "cluster", clusterName, + "resourceGroup", resourceGroup) +``` + +### Testing Guidelines + +- Write unit tests for all new functionality +- Use table-driven tests for multiple scenarios +- Mock external dependencies +- Test error conditions +- Aim for >80% code coverage + +### Security Best Practices + +- Validate all user inputs +- Respect access level permissions +- Never log sensitive information +- Use secure defaults +- Follow principle of least privilege + +## Getting Help + +### Documentation + +- [README.md](README.md) - Project overview and installation +- [Documentation](docs/) - Detailed technical documentation + +### Communication + +- **GitHub Issues**: Report bugs or request features +- **GitHub Discussions**: Ask questions or discuss ideas +- **Security Issues**: Report [security advisory](https://github.com/Azure/aks-mcp/security/advisories) + +## License + +By contributing to AKS-MCP, you agree that your contributions will be licensed under the project's [MIT License](LICENSE). + +--- + +Thank you for contributing to AKS-MCP! Your contributions help make Azure Kubernetes Service more accessible through AI assistants. diff --git a/CREATE_PR_INSTRUCTIONS.md b/CREATE_PR_INSTRUCTIONS.md deleted file mode 100644 index 55993fb..0000000 --- a/CREATE_PR_INSTRUCTIONS.md +++ /dev/null @@ -1,80 +0,0 @@ -# GitHub Pull Request Instructions - -## 📋 **Pull Request Details** - -**Title:** `feat: Add Azure Advisor recommendation tool for AKS clusters with logging and testing` - -**Base Branch:** `main` -**Compare Branch:** `feature/azure-diagnostics-prompts` - -## 🚀 **How to Create the Pull Request** - -### **Option 1: GitHub Web Interface** -1. Go to: https://github.com/Azure/aks-mcp -2. Click "Pull requests" tab -3. Click "New pull request" -4. Set base: `main` <- compare: `feature/azure-diagnostics-prompts` -5. Copy the title above -6. Copy the description from `PR_DESCRIPTION.md` (created above) -7. Add labels: `enhancement`, `feature`, `aks`, `advisor` -8. Click "Create pull request" - -### **Option 2: GitHub CLI (if available)** -```bash -# Install GitHub CLI first if not available -gh pr create \ - --title "feat: Add Azure Advisor recommendation tool for AKS clusters with logging and testing" \ - --body-file PR_DESCRIPTION.md \ - --base main \ - --head feature/azure-diagnostics-prompts \ - --label enhancement,feature,aks,advisor -``` - -### **Option 3: Direct GitHub URL** -Navigate to: -``` -https://github.com/Azure/aks-mcp/compare/main...feature/azure-diagnostics-prompts -``` - -## 📄 **Files to Reference** -- **Detailed Description:** `PULL_REQUEST.md` (comprehensive documentation) -- **GitHub PR Description:** `PR_DESCRIPTION.md` (concise version for GitHub) - -## 🏷️ **Suggested Labels** -- `enhancement` -- `feature` -- `aks` -- `advisor` -- `logging` -- `testing` - -## 👥 **Suggested Reviewers** -- Team leads familiar with AKS -- Azure CLI integration experts -- MCP server maintainers -- Security/access control reviewers - -## ✅ **Pre-submission Checklist** -- [x] All tests pass (10/10 advisor tests) -- [x] Project builds successfully -- [x] Code follows project conventions -- [x] Comprehensive logging implemented -- [x] Security validation integrated -- [x] Documentation complete -- [x] VS Code MCP extension compatibility verified -- [x] ResourceID field properly implemented -- [x] Error handling comprehensive - -## 📊 **Commit Summary** -``` -9370274 fix: Update Azure Advisor recommendation tool with logging and resourceID field -0e47372 fix: resolve MCP tool parameter validation error for Azure Advisor -06c90db docs: add Azure Advisor tool usage documentation -11c71fc feat: implement Azure Advisor recommendations for AKS clusters -439e603 chore: remove pull request template and exe files from tracking -``` - -**Total commits:** 5 -**Files changed:** 6 files (+247 insertions, -104 deletions) - -The pull request is ready to be created! 🚀 diff --git a/Dockerfile b/Dockerfile index eb06608..05264f0 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,6 +1,6 @@ # Linux Dockerfile for aks-mcp # Build stage -FROM golang:1.24-alpine AS builder +FROM golang:1.25-alpine AS builder ARG TARGETOS=linux ARG TARGETARCH ARG VERSION @@ -20,8 +20,15 @@ RUN go mod download # Copy source code COPY . . -# Build the application for target platform -RUN CGO_ENABLED=0 GOOS=${TARGETOS} GOARCH=${TARGETARCH} go build -o aks-mcp ./cmd/aks-mcp +# Build the application for target platform with version injection +RUN CGO_ENABLED=0 GOOS=${TARGETOS} GOARCH=${TARGETARCH} go build \ + -trimpath \ + -tags withoutebpf \ + -ldflags "-X github.com/Azure/aks-mcp/internal/version.GitVersion=${VERSION} \ + -X github.com/Azure/aks-mcp/internal/version.GitCommit=${GIT_COMMIT} \ + -X github.com/Azure/aks-mcp/internal/version.GitTreeState=${GIT_TREE_STATE} \ + -X github.com/Azure/aks-mcp/internal/version.BuildMetadata=${BUILD_DATE}" \ + -o aks-mcp ./cmd/aks-mcp # Runtime stage FROM alpine:3.22 diff --git a/Makefile b/Makefile index 5c52ca8..743e60a 100644 --- a/Makefile +++ b/Makefile @@ -1,6 +1,10 @@ # AKS-MCP Makefile # Provides convenience targets for development, building, testing, and releasing +# Run multi-line recipes in bash and fail on any command error +SHELL := /usr/bin/env bash +SHELLFLAGS := -eo pipefail -c + # Variables BINARY_NAME = aks-mcp MAIN_PATH = ./cmd/aks-mcp @@ -8,6 +12,8 @@ DOCKER_IMAGE = aks-mcp DOCKER_TAG ?= latest # Version information +# Version is automatically derived from git tags using 'git describe --tags --always --dirty' +# This provides proper semantic versioning based on GitHub releases and tags VERSION ?= $(shell git describe --tags --always --dirty 2>/dev/null || echo "dev") GIT_COMMIT ?= $(shell git rev-parse HEAD 2>/dev/null || echo "unknown") GIT_TREE_STATE ?= $(shell if git diff --quiet 2>/dev/null; then echo "clean"; else echo "dirty"; fi) @@ -183,6 +189,7 @@ version: ## Show version information .PHONY: info info: ## Show build information + @echo "Shell: $(SHELL) $(SHELLFLAGS)" @echo "Binary Name: $(BINARY_NAME)" @echo "Main Path: $(MAIN_PATH)" @echo "Docker Image: $(DOCKER_IMAGE):$(DOCKER_TAG)" diff --git a/PR_DESCRIPTION.md b/PR_DESCRIPTION.md deleted file mode 100644 index b3f082c..0000000 --- a/PR_DESCRIPTION.md +++ /dev/null @@ -1,57 +0,0 @@ -## 🎯 Summary -Implements Azure Advisor recommendation tool for AKS clusters with comprehensive logging, testing, and VS Code MCP extension integration. - -## 🚀 Key Features -- ✅ **Azure Advisor Integration**: Fetch real-time AKS recommendations via Azure CLI -- ✅ **AKS-Specific Filtering**: Auto-filter recommendations for AKS resources only -- ✅ **Multiple Operations**: Support for `list`, `details`, and `report` operations -- ✅ **ResourceID Field**: Returns proper Azure resource IDs instead of generic values -- ✅ **Comprehensive Logging**: Detailed `[ADVISOR]` prefixed logs for debugging -- ✅ **Complete Test Coverage**: 10 test cases covering all functionality -- ✅ **Security Integration**: Works with readonly/readwrite/admin access levels - -## 📋 Changes Made - -### New Files -- `internal/azure/advisor/aks_recommendations.go` - Main advisor logic -- `internal/azure/advisor/types.go` - Data structures -- `internal/azure/advisor/advisor_test.go` - Test suite -- `docs/logging.md` - Logging documentation - -### Modified Files -- `internal/security/validator.go` - Added advisor tool validation -- `.gitignore` - Added `*.ps1` for development scripts - -## 🧪 Testing -```bash -=== RUN TestFilterAKSRecommendationsFromCLI ---- PASS: TestFilterAKSRecommendationsFromCLI (0.00s) -# ... 10/10 tests pass -PASS -ok github.com/Azure/aks-mcp/internal/azure/advisor -``` - -## 🔧 Usage Example -```json -{ - "operation": "list", - "subscription_id": "your-subscription-id", - "resource_group": "your-resource-group", - "severity": "High" -} -``` - -## ✅ Verification -- ✅ All tests pass -- ✅ Project builds successfully -- ✅ Works with VS Code MCP extension -- ✅ Proper logging and monitoring -- ✅ Security validation integrated - -## 🎯 Benefits -- Enables AI assistants to provide AKS optimization recommendations -- Proactive monitoring and cost optimization insights -- Security recommendations for AKS resources -- Actionable insights for cluster management - -Ready for review! 🚀 diff --git a/PULL_REQUEST.md b/PULL_REQUEST.md deleted file mode 100644 index 5d5df16..0000000 --- a/PULL_REQUEST.md +++ /dev/null @@ -1,179 +0,0 @@ -# Azure Advisor Recommendations Tool for AKS Clusters - -## 🎯 **Overview** -This PR implements a comprehensive Azure Advisor recommendation tool specifically designed for AKS (Azure Kubernetes Service) clusters. The tool integrates with the AKS-MCP server to provide AI assistants with the ability to retrieve, analyze, and report on Azure Advisor recommendations for AKS resources. - -## 🚀 **Features Implemented** - -### **Core Functionality** -- ✅ **Azure Advisor Integration**: Direct integration with Azure CLI to fetch real-time recommendations -- ✅ **AKS-Specific Filtering**: Automatically filters recommendations to only include AKS-related resources -- ✅ **Multiple Operations**: Support for `list`, `details`, and `report` operations -- ✅ **Comprehensive Logging**: Detailed logging with `[ADVISOR]` prefix for debugging and monitoring -- ✅ **Security Integration**: Works with readonly, readwrite, and admin access levels - -### **Data Structure & API** -- ✅ **ResourceID Field**: Returns `resourceID` (Azure resource ID) instead of generic impacted value -- ✅ **Structured Response**: Well-defined JSON response format with AKS-specific metadata -- ✅ **Filtering Support**: Filter by severity, resource group, category, and cluster names -- ✅ **Report Generation**: Comprehensive reports with summaries, action items, and cluster breakdowns - -### **Quality & Testing** -- ✅ **Complete Test Coverage**: 10 comprehensive test cases covering all functionality -- ✅ **Error Handling**: Robust error handling with informative error messages -- ✅ **Input Validation**: Proper parameter validation and type checking -- ✅ **Documentation**: Comprehensive usage documentation and examples - -## 📋 **Changes Made** - -### **New Files Added** -``` -internal/azure/advisor/ -├── aks_recommendations.go # Main advisor logic with comprehensive logging -├── types.go # Data structures and type definitions -└── advisor_test.go # Complete test suite (10 test cases) - -docs/ -└── logging.md # Logging setup and monitoring documentation -``` - -### **Files Modified** -``` -internal/security/validator.go # Added advisor tool to security validation -.gitignore # Added *.ps1 to ignore development scripts -``` - -### **Key Technical Improvements** -1. **Data Structure Fix**: Updated `CLIRecommendation` struct to match actual Azure CLI output (flattened fields) -2. **ResourceID Integration**: Changed from `ImpactedResource` to `ResourceID` field for better clarity -3. **Comprehensive Logging**: Added detailed logging throughout the tool for debugging and monitoring -4. **Test Coverage**: Fixed all test cases to work with the new data structure and added ResourceID validation - -## 🔧 **Usage Examples** - -### **List AKS Recommendations** -```json -{ - "operation": "list", - "subscription_id": "your-subscription-id", - "resource_group": "your-resource-group", - "severity": "High" -} -``` - -### **Get Recommendation Details** -```json -{ - "operation": "details", - "recommendation_id": "/subscriptions/.../recommendations/rec-id" -} -``` - -### **Generate Comprehensive Report** -```json -{ - "operation": "report", - "subscription_id": "your-subscription-id", - "format": "detailed" -} -``` - -## 📊 **Response Format** -```json -{ - "id": "/subscriptions/.../recommendations/rec-id", - "category": "Cost", - "impact": "High", - "cluster_name": "my-aks-cluster", - "resource_group": "my-resource-group", - "resource_id": "/subscriptions/.../managedClusters/my-aks-cluster", - "description": "Detailed recommendation with solution", - "severity": "High", - "last_updated": "2024-01-15T10:30:00Z", - "status": "Active", - "aks_specific": { - "configuration_area": "compute" - } -} -``` - -## 🧪 **Testing** - -### **Test Coverage** -- ✅ `TestFilterAKSRecommendationsFromCLI` - AKS-specific filtering -- ✅ `TestIsAKSRelatedCLI` - Resource ID validation -- ✅ `TestExtractAKSClusterNameFromCLI` - Cluster name extraction -- ✅ `TestExtractResourceGroupFromResourceID` - Resource group parsing -- ✅ `TestConvertToAKSRecommendationSummary` - Data transformation & ResourceID validation -- ✅ `TestFilterBySeverity` - Severity-based filtering -- ✅ `TestGenerateAKSAdvisorReport` - Report generation -- ✅ `TestMapCategoryToConfigArea` - Category mapping -- ✅ `TestHandleAdvisorRecommendationInvalidOperation` - Error handling -- ✅ `TestHandleAdvisorRecommendationMissingOperation` - Parameter validation - -### **Test Results** -```bash -=== RUN TestFilterAKSRecommendationsFromCLI ---- PASS: TestFilterAKSRecommendationsFromCLI (0.00s) -=== RUN TestIsAKSRelatedCLI ---- PASS: TestIsAKSRelatedCLI (0.00s) -# ... all 10 tests pass -PASS -ok github.com/Azure/aks-mcp/internal/azure/advisor -``` - -## 🔒 **Security & Access Control** - -- **Readonly Mode**: ✅ Advisor tool works in readonly mode (listing recommendations is read-only) -- **Input Validation**: ✅ All parameters are validated and sanitized -- **Access Control**: ✅ Integrated with existing security validation framework -- **Command Injection Protection**: ✅ Uses validated Azure CLI executor - -## 📝 **Logging & Monitoring** - -The tool includes comprehensive logging with the `[ADVISOR]` prefix: -``` -[ADVISOR] Handling operation: list -[ADVISOR] Listing recommendations for subscription: xxx, resource_group: yyy -[ADVISOR] Executing command: az advisor recommendation list --subscription xxx -[ADVISOR] Found 15 total recommendations -[ADVISOR] Found 3 AKS-related recommendations -[ADVISOR] After severity filter: 1 recommendations -[ADVISOR] Returning 1 recommendation summaries -``` - -## 🔄 **Integration Points** - -1. **MCP Server**: Registered as `az_advisor_recommendation` tool -2. **VS Code Extension**: Compatible with VS Code MCP extension -3. **Azure CLI**: Uses existing Azure CLI integration for authentication -4. **Security Layer**: Integrated with access level validation - -## ✅ **Verification Steps** - -1. **Build Success**: ✅ Project builds without errors -2. **Test Suite**: ✅ All 10 advisor tests pass -3. **Integration Test**: ✅ Tool works with VS Code MCP extension -4. **Security Validation**: ✅ Works in readonly mode as expected -5. **Logging Verification**: ✅ Logs are written to debug file and can be monitored - -## 🎯 **Benefits** - -1. **Enhanced AI Assistant Capabilities**: AI assistants can now provide AKS optimization recommendations -2. **Proactive Monitoring**: Enables proactive identification of AKS issues and optimizations -3. **Cost Optimization**: Helps identify cost-saving opportunities in AKS clusters -4. **Security Improvements**: Surfaces security-related recommendations for AKS resources -5. **Operational Excellence**: Provides actionable insights for AKS cluster management - -## 🔮 **Future Enhancements** - -- Integration with Azure Resource Graph for advanced querying -- Support for custom recommendation filtering rules -- Integration with Azure Policy for automated remediation -- Enhanced reporting with trend analysis - ---- - -**Ready for Review** ✅ - -This PR is ready for review and testing. All tests pass, documentation is complete, and the tool has been verified to work with the VS Code MCP extension in a real environment. diff --git a/README.md b/README.md index 59f5c32..2d48189 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,4 @@ -# AKS-MCP +# AKS-MCP The AKS-MCP is a Model Context Protocol (MCP) server that enables AI assistants to interact with Azure Kubernetes Service (AKS) clusters. It serves as a bridge @@ -19,6 +19,39 @@ AI assistants can use to interact with AKS resources. It leverages the Model Context Protocol (MCP) to facilitate this communication, enabling AI tools to make API calls to Azure and interpret the responses. +## Azure CLI Authentication + +AKS-MCP uses Azure CLI (az) for AKS operations. Azure CLI authentication is attempted in this order: + +1. Service Principal (client secret): When `AZURE_CLIENT_ID`, `AZURE_CLIENT_SECRET`, `AZURE_TENANT_ID` environment variables are present, a service principal login is performed using the following command: `az login --service-principal -u CLIENT_ID -p CLIENT_SECRET --tenant TENANT_ID` + +1. Workload Identity (federated token): When `AZURE_CLIENT_ID`, `AZURE_TENANT_ID`, `AZURE_FEDERATED_TOKEN_FILE` environment variables are present, a federated token login is performed using the following command: `az login --service-principal -u CLIENT_ID --tenant TENANT_ID --federated-token TOKEN` + +1. User-assigned Managed Identity (managed identity client ID): When only `AZURE_CLIENT_ID` environment variable is present, a user-assigned managed identity login is performed using the following command: `az login --identity -u CLIENT_ID` + +1. System-assigned Managed Identity: When `AZURE_MANAGED_IDENTITY` is set to `system`, a system-assigned managed identity login is performed using the following command: `az login --identity` + +1. Existing Login: When none of the above environment variables are set, AKS-MCP assumes you have already authenticated (for example, via `az login`) and uses the existing session. + +Optional subscription selection: + +- If `AZURE_SUBSCRIPTION_ID` is set, AKS-MCP will run `az account set --subscription SUBSCRIPTION_ID` after login. + +Notes and security: + +- The federated token file must be exactly `/var/run/secrets/azure/tokens/azure-identity-token` and is strictly validated; other paths are rejected. +- After each login, AKS-MCP verifies authentication with `az account show --query id -o tsv`. +- Ensure the Azure CLI is installed and on PATH. + +Environment variables used: + +- `AZURE_TENANT_ID` +- `AZURE_CLIENT_ID` +- `AZURE_CLIENT_SECRET` +- `AZURE_FEDERATED_TOKEN_FILE` +- `AZURE_SUBSCRIPTION_ID` +- `AZURE_MANAGED_IDENTITY` (set to `system` to opt into system-assigned managed identity) + ## Available Tools The AKS-MCP server provides consolidated tools for interacting with AKS @@ -62,6 +95,8 @@ Unified tool for managing Azure Kubernetes Service (AKS) clusters and related op - `create`: Create new cluster - `delete`: Delete cluster - `scale`: Scale cluster node count + - `start`: Start a stopped cluster + - `stop`: Stop a running cluster - `update`: Update cluster configuration - `upgrade`: Upgrade Kubernetes version - `nodepool-add`: Add node pool to cluster @@ -419,7 +454,49 @@ For other MCP-compatible AI clients like [Claude Desktop](https://claude.ai/), c } ``` -#### 🐋 Docker MCP configuration +#### 🐳 Docker MCP Toolkit + +You can enable the [AKS-MCP server directly from MCP Toolkit](https://hub.docker.com/mcp/server/aks/overview): + +1. Open Docker Desktop +2. Click "MCP Toolkit" in the left sidebar +3. Search for "aks" in Catalog tab +4. Click on the AKS-MCP server card +5. Enable the server by clicking "+" in the top right corner +6. Configure the server using "Configuration" tab: + - **azure_dir** `[REQUIRED]`: Path to your Azure credentials directory e.g `/home/user/.azure` (must be absolute – without `$HOME` or `~`) + - **kubeconfig** `[REQUIRED]`: Path to your kubeconfig file e.g `/home/user/.kube/config` (must be absolute – without `$HOME` or `~`) + - **access_level** `[REQUIRED]`: Set to `readonly`, `readwrite`, or `admin` as needed +7. You are now ready to use the AKS-MCP server with your [preferred MCP client](https://hub.docker.com/mcp/server/aks/manual), see an example [here](https://docs.docker.com/ai/mcp-catalog-and-toolkit/toolkit/#install-an-mcp-client). + +On **Windows**, the Azure credentials won't work by default, but you have two options: + +1. **Long-lived servers**: Configure the [MCP gateway](https://docs.docker.com/ai/mcp-gateway/) to use long-lived servers using `--long-lived` flag and then authenticate with Azure CLI in the container, see option B in Containerized MCP configuration below on how to fetch credentials inside the container. +2. **Custom Azure Directory**: Set up a custom Azure directory: + ```powershell + # Set custom Azure config directory + $env:AZURE_CONFIG_DIR = "$env:USERPROFILE\.azure-for-docker" + + # Disable token cache encryption (to match behavior with Linux/macOS) + $env:AZURE_CORE_ENCRYPT_TOKEN_CACHE = "false" + + # Login to Azure CLI + az login + ``` + + This will store the credentials in `$env:USERPROFILE\.azure-for-docker` (e.g. `C:\Users\\.azure-for-docker`), + use this path in the AKS-MCP server configuration `azure_dir`. + +You can also use the [MCP Gateway](https://docs.docker.com/ai/mcp-gateway/) to enable the AKS-MCP server directly using: + +```bash +# Enable AKS-MCP server in Docker MCP Gateway +docker mcp server enable aks +``` + +Note: You still need to configure the server (e.g. using `docker mcp config`) with your Azure credentials, kubeconfig file, and access level. + +#### 🐋 Containerized MCP configuration For containerized deployment, you can run AKS-MCP server using the official Docker image: @@ -525,6 +602,15 @@ Usage of ./aks-mcp: ## Development +### Prerequisites + +- **Go** ≥ `1.24.x` installed on your local machine +- **Bash** available as `/usr/bin/env bash` (Makefile targets use multi-line recipes with fail-fast mode) +- **GNU Make** `4.x` or later +- **Docker** *(optional, for container builds and testing)* + +> **Note:** If your login shell is different (e.g., `zsh` on **macOS**), you do **not** need to change it — the Makefile sets variables to run all recipes in `bash` for consistent behavior across platforms. + ### Building from Source This project includes a Makefile for convenient development, building, and testing. To see all available targets: @@ -614,17 +700,31 @@ To opt out, set the environment variable `AKS_MCP_COLLECT_TELEMETRY=false`. ## Contributing -This project welcomes contributions and suggestions. Most contributions require you to agree to a -Contributor License Agreement (CLA) declaring that you have the right to, and actually do, grant us -the rights to use your contribution. For details, visit https://cla.opensource.microsoft.com. +We welcome contributions to AKS-MCP! Whether you're fixing bugs, adding features, or improving documentation, your help makes this project better. + +**📖 [Read our detailed Contributing Guide](CONTRIBUTING.md)** for comprehensive information on: + +- Setting up your development environment +- Running AKS-MCP locally and testing with AI agents +- Understanding the codebase architecture +- Adding new MCP tools and features +- Testing guidelines and best practices +- Submitting pull requests + +### Quick Start for Contributors + +1. **Prerequisites**: Go ≥ 1.24.x, Azure CLI, Git +2. **Setup**: Fork the repo, clone locally, run `make deps && make build` +3. **Test**: Run `make test` and `make check` +4. **Develop**: Follow the component-based architecture in [CONTRIBUTING.md](CONTRIBUTING.md) + +### Contributor License Agreement + +Most contributions require you to agree to a Contributor License Agreement (CLA) declaring that you have the right to, and actually do, grant us the rights to use your contribution. For details, visit https://cla.opensource.microsoft.com. -When you submit a pull request, a CLA bot will automatically determine whether you need to provide -a CLA and decorate the PR appropriately (e.g., status check, comment). Simply follow the instructions -provided by the bot. You will only need to do this once across all repos using our CLA. +When you submit a pull request, a CLA bot will automatically determine whether you need to provide a CLA and decorate the PR appropriately (e.g., status check, comment). Simply follow the instructions provided by the bot. You will only need to do this once across all repos using our CLA. -This project has adopted the [Microsoft Open Source Code of Conduct](https://opensource.microsoft.com/codeofconduct/). -For more information see the [Code of Conduct FAQ](https://opensource.microsoft.com/codeofconduct/faq/) or -contact [opencode@microsoft.com](mailto:opencode@microsoft.com) with any additional questions or comments. +This project has adopted the [Microsoft Open Source Code of Conduct](https://opensource.microsoft.com/codeofconduct/). For more information see the [Code of Conduct FAQ](https://opensource.microsoft.com/codeofconduct/faq/) or contact [opencode@microsoft.com](mailto:opencode@microsoft.com) with any additional questions or comments. ## Trademarks diff --git a/go.mod b/go.mod index 4ac04f3..38e1fad 100644 --- a/go.mod +++ b/go.mod @@ -5,7 +5,7 @@ go 1.24.0 toolchain go1.24.5 require ( - github.com/Azure/azure-sdk-for-go/sdk/azcore v1.18.2 + github.com/Azure/azure-sdk-for-go/sdk/azcore v1.19.0 github.com/Azure/azure-sdk-for-go/sdk/azidentity v1.11.0 github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/compute/armcompute/v4 v4.2.1 github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/containerservice/armcontainerservice/v2 v2.4.0 @@ -14,17 +14,17 @@ require ( github.com/Azure/mcp-kubernetes v0.0.8 github.com/google/shlex v0.0.0-20191202100458-e7afc7fbc510 github.com/inspektor-gadget/inspektor-gadget v0.43.0 - github.com/mark3labs/mcp-go v0.37.0 + github.com/mark3labs/mcp-go v0.38.0 github.com/microsoft/ApplicationInsights-Go v0.4.4 github.com/spf13/pflag v1.0.7 go.opentelemetry.io/otel v1.37.0 go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc v1.37.0 go.opentelemetry.io/otel/sdk v1.37.0 go.opentelemetry.io/otel/trace v1.37.0 - helm.sh/helm/v3 v3.18.5 - k8s.io/apimachinery v0.33.3 - k8s.io/cli-runtime v0.33.3 - k8s.io/client-go v0.33.3 + helm.sh/helm/v3 v3.18.6 + k8s.io/apimachinery v0.33.4 + k8s.io/cli-runtime v0.33.4 + k8s.io/client-go v0.33.4 ) require ( @@ -66,7 +66,7 @@ require ( github.com/go-openapi/jsonpointer v0.21.0 // indirect github.com/go-openapi/jsonreference v0.21.0 // indirect github.com/go-openapi/swag v0.23.1 // indirect - github.com/go-viper/mapstructure/v2 v2.3.0 // indirect + github.com/go-viper/mapstructure/v2 v2.4.0 // indirect github.com/gobwas/glob v0.2.3 // indirect github.com/gofrs/uuid v3.3.0+incompatible // indirect github.com/gogo/protobuf v1.3.2 // indirect @@ -153,7 +153,7 @@ require ( gopkg.in/evanphx/json-patch.v4 v4.12.0 // indirect gopkg.in/inf.v0 v0.9.1 // indirect gopkg.in/yaml.v3 v3.0.1 // indirect - k8s.io/api v0.33.3 // indirect + k8s.io/api v0.33.4 // indirect k8s.io/apiextensions-apiserver v0.33.3 // indirect k8s.io/apiserver v0.33.3 // indirect k8s.io/component-base v0.33.3 // indirect diff --git a/go.sum b/go.sum index e24b2b5..c9d4d08 100644 --- a/go.sum +++ b/go.sum @@ -6,8 +6,8 @@ filippo.io/edwards25519 v1.1.0 h1:FNf4tywRC1HmFuKW5xopWpigGjJKiJSV0Cqo0cJWDaA= filippo.io/edwards25519 v1.1.0/go.mod h1:BxyFTGdWcka3PhytdK4V28tE5sGfRvvvRV7EaN4VDT4= github.com/AdaLogics/go-fuzz-headers v0.0.0-20230811130428-ced1acdcaa24 h1:bvDV9vkmnHYOMsOr4WLk+Vo07yKIzd94sVoIqshQ4bU= github.com/AdaLogics/go-fuzz-headers v0.0.0-20230811130428-ced1acdcaa24/go.mod h1:8o94RPi1/7XTJvwPpRSzSUedZrtlirdB3r9Z20bi2f8= -github.com/Azure/azure-sdk-for-go/sdk/azcore v1.18.2 h1:Hr5FTipp7SL07o2FvoVOX9HRiRH3CR3Mj8pxqCcdD5A= -github.com/Azure/azure-sdk-for-go/sdk/azcore v1.18.2/go.mod h1:QyVsSSN64v5TGltphKLQ2sQxe4OBQg0J1eKRcVBnfgE= +github.com/Azure/azure-sdk-for-go/sdk/azcore v1.19.0 h1:ci6Yd6nysBRLEodoziB6ah1+YOzZbZk+NYneoA6q+6E= +github.com/Azure/azure-sdk-for-go/sdk/azcore v1.19.0/go.mod h1:QyVsSSN64v5TGltphKLQ2sQxe4OBQg0J1eKRcVBnfgE= github.com/Azure/azure-sdk-for-go/sdk/azidentity v1.11.0 h1:MhRfI58HblXzCtWEZCO0feHs8LweePB3s90r7WaR1KU= github.com/Azure/azure-sdk-for-go/sdk/azidentity v1.11.0/go.mod h1:okZ+ZURbArNdlJ+ptXoyHNuOETzOl1Oww19rm8I2WLA= github.com/Azure/azure-sdk-for-go/sdk/azidentity/cache v0.3.2 h1:yz1bePFlP5Vws5+8ez6T3HWXPmwOK7Yvq8QxDBD3SKY= @@ -151,8 +151,8 @@ github.com/go-sql-driver/mysql v1.8.1 h1:LedoTUt/eveggdHS9qUFC1EFSa8bU2+1pZjSRpv github.com/go-sql-driver/mysql v1.8.1/go.mod h1:wEBSXgmK//2ZFJyE+qWnIsVGmvmEKlqwuVSjsCm7DZg= github.com/go-task/slim-sprig/v3 v3.0.0 h1:sUs3vkvUymDpBKi3qH1YSqBQk9+9D/8M2mN1vB6EwHI= github.com/go-task/slim-sprig/v3 v3.0.0/go.mod h1:W848ghGpv3Qj3dhTPRyJypKRiqCdHZiAzKg9hl15HA8= -github.com/go-viper/mapstructure/v2 v2.3.0 h1:27XbWsHIqhbdR5TIC911OfYvgSaW93HM+dX7970Q7jk= -github.com/go-viper/mapstructure/v2 v2.3.0/go.mod h1:oJDH3BJKyqBA2TXFhDsKDGDTlndYOZ6rGS0BRZIxGhM= +github.com/go-viper/mapstructure/v2 v2.4.0 h1:EBsztssimR/CONLSZZ04E8qAkxNYq4Qp9LvH92wZUgs= +github.com/go-viper/mapstructure/v2 v2.4.0/go.mod h1:oJDH3BJKyqBA2TXFhDsKDGDTlndYOZ6rGS0BRZIxGhM= github.com/gobwas/glob v0.2.3 h1:A4xDbljILXROh+kObIiy5kIaPYD8e96x1tgBhUI5J+Y= github.com/gobwas/glob v0.2.3/go.mod h1:d3Ez4x06l9bZtSvzIay5+Yzi0fmZzPgnTbPcKjJAkT8= github.com/gofrs/uuid v3.3.0+incompatible h1:8K4tyRfvU1CYPgJsveYFQMhpFd/wXNM7iK6rR7UHz84= @@ -245,8 +245,8 @@ github.com/liggitt/tabwriter v0.0.0-20181228230101-89fcab3d43de h1:9TO3cAIGXtEhn github.com/liggitt/tabwriter v0.0.0-20181228230101-89fcab3d43de/go.mod h1:zAbeS9B/r2mtpb6U+EI2rYA5OAXxsYw6wTamcNW+zcE= github.com/mailru/easyjson v0.9.0 h1:PrnmzHw7262yW8sTBwxi1PdJA3Iw/EKBa8psRf7d9a4= github.com/mailru/easyjson v0.9.0/go.mod h1:1+xMtQp2MRNVL/V1bOzuP3aP8VNwRW55fQUto+XFtTU= -github.com/mark3labs/mcp-go v0.37.0 h1:BywvZLPRT6Zx6mMG/MJfxLSZQkTGIcJSEGKsvr4DsoQ= -github.com/mark3labs/mcp-go v0.37.0/go.mod h1:T7tUa2jO6MavG+3P25Oy/jR7iCeJPHImCZHRymCn39g= +github.com/mark3labs/mcp-go v0.38.0 h1:E5tmJiIXkhwlV0pLAwAT0O5ZjUZSISE/2Jxg+6vpq4I= +github.com/mark3labs/mcp-go v0.38.0/go.mod h1:T7tUa2jO6MavG+3P25Oy/jR7iCeJPHImCZHRymCn39g= github.com/mattn/go-colorable v0.1.9/go.mod h1:u6P/XSegPjTcexA+o6vUJrdnUu04hMope9wVRipJSqc= github.com/mattn/go-colorable v0.1.13 h1:fFA4WZxdEF4tXPZVKMLwD8oUnCTTo08duU7wxecdEvA= github.com/mattn/go-colorable v0.1.13/go.mod h1:7S9/ev0klgBDR4GtXTXX8a3vIGJpMovkB8vQcUbaXHg= @@ -519,20 +519,20 @@ gopkg.in/yaml.v2 v2.4.0/go.mod h1:RDklbk79AGWmwhnvt/jBztapEOGDOx6ZbXqjP6csGnQ= gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= -helm.sh/helm/v3 v3.18.5 h1:Cc3Z5vd6kDrZq9wO9KxKLNEickiTho6/H/dBNRVSos4= -helm.sh/helm/v3 v3.18.5/go.mod h1:L/dXDR2r539oPlFP1PJqKAC1CUgqHJDLkxKpDGrWnyg= -k8s.io/api v0.33.3 h1:SRd5t//hhkI1buzxb288fy2xvjubstenEKL9K51KBI8= -k8s.io/api v0.33.3/go.mod h1:01Y/iLUjNBM3TAvypct7DIj0M0NIZc+PzAHCIo0CYGE= +helm.sh/helm/v3 v3.18.6 h1:S/2CqcYnNfLckkHLI0VgQbxgcDaU3N4A/46E3n9wSNY= +helm.sh/helm/v3 v3.18.6/go.mod h1:L/dXDR2r539oPlFP1PJqKAC1CUgqHJDLkxKpDGrWnyg= +k8s.io/api v0.33.4 h1:oTzrFVNPXBjMu0IlpA2eDDIU49jsuEorGHB4cvKupkk= +k8s.io/api v0.33.4/go.mod h1:VHQZ4cuxQ9sCUMESJV5+Fe8bGnqAARZ08tSTdHWfeAc= k8s.io/apiextensions-apiserver v0.33.3 h1:qmOcAHN6DjfD0v9kxL5udB27SRP6SG/MTopmge3MwEs= k8s.io/apiextensions-apiserver v0.33.3/go.mod h1:oROuctgo27mUsyp9+Obahos6CWcMISSAPzQ77CAQGz8= -k8s.io/apimachinery v0.33.3 h1:4ZSrmNa0c/ZpZJhAgRdcsFcZOw1PQU1bALVQ0B3I5LA= -k8s.io/apimachinery v0.33.3/go.mod h1:BHW0YOu7n22fFv/JkYOEfkUYNRN0fj0BlvMFWA7b+SM= +k8s.io/apimachinery v0.33.4 h1:SOf/JW33TP0eppJMkIgQ+L6atlDiP/090oaX0y9pd9s= +k8s.io/apimachinery v0.33.4/go.mod h1:BHW0YOu7n22fFv/JkYOEfkUYNRN0fj0BlvMFWA7b+SM= k8s.io/apiserver v0.33.3 h1:Wv0hGc+QFdMJB4ZSiHrCgN3zL3QRatu56+rpccKC3J4= k8s.io/apiserver v0.33.3/go.mod h1:05632ifFEe6TxwjdAIrwINHWE2hLwyADFk5mBsQa15E= -k8s.io/cli-runtime v0.33.3 h1:Dgy4vPjNIu8LMJBSvs8W0LcdV0PX/8aGG1DA1W8lklA= -k8s.io/cli-runtime v0.33.3/go.mod h1:yklhLklD4vLS8HNGgC9wGiuHWze4g7x6XQZ+8edsKEo= -k8s.io/client-go v0.33.3 h1:M5AfDnKfYmVJif92ngN532gFqakcGi6RvaOF16efrpA= -k8s.io/client-go v0.33.3/go.mod h1:luqKBQggEf3shbxHY4uVENAxrDISLOarxpTKMiUuujg= +k8s.io/cli-runtime v0.33.4 h1:V8NSxGfh24XzZVhXmIGzsApdBpGq0RQS2u/Fz1GvJwk= +k8s.io/cli-runtime v0.33.4/go.mod h1:V+ilyokfqjT5OI+XE+O515K7jihtr0/uncwoyVqXaIU= +k8s.io/client-go v0.33.4 h1:TNH+CSu8EmXfitntjUPwaKVPN0AYMbc9F1bBS8/ABpw= +k8s.io/client-go v0.33.4/go.mod h1:LsA0+hBG2DPwovjd931L/AoaezMPX9CmBgyVyBZmbCY= k8s.io/component-base v0.33.3 h1:mlAuyJqyPlKZM7FyaoM/LcunZaaY353RXiOd2+B5tGA= k8s.io/component-base v0.33.3/go.mod h1:ktBVsBzkI3imDuxYXmVxZ2zxJnYTZ4HAsVj9iF09qp4= k8s.io/klog/v2 v2.130.1 h1:n9Xl7H1Xvksem4KFG4PYbdQCQxqc/tTUyrgXaOhHSzk= diff --git a/internal/azcli/login.go b/internal/azcli/login.go new file mode 100644 index 0000000..2a6cbb1 --- /dev/null +++ b/internal/azcli/login.go @@ -0,0 +1,200 @@ +// This includes automatic login detection and supports multiple authentication methods +// including service principals, managed identities, and federated tokens. +package azcli + +import ( + "fmt" + "io" + "os" + "os/exec" + "strings" + + "github.com/Azure/aks-mcp/internal/command" + "github.com/Azure/aks-mcp/internal/config" +) + +// Login types +const ( + AuthTypeExisting = "existing_login" + AuthTypeServicePrincipal = "service_principal" + AuthTypeFederatedToken = "federated_token" + AuthTypeUserAssignedManagedID = "user_assigned_managed_identity" + AuthTypeSystemAssignedManagedID = "system_assigned_managed_identity" +) + +// validateFederatedTokenFile only allows the fixed AKS identity token path. +func validateFederatedTokenFile(filePath string) (string, error) { + const allowedTokenPath = "/var/run/secrets/azure/tokens/azure-identity-token" // #nosec G101 -- not a credential, this is a fixed AKS token path + if filePath != allowedTokenPath { + return "", fmt.Errorf("federated token file path must be exactly %s", allowedTokenPath) + } + fileInfo, err := os.Stat(allowedTokenPath) + if err != nil { + return "", fmt.Errorf("cannot stat federated token file %s: %w", allowedTokenPath, err) + } + if !fileInfo.Mode().IsRegular() { + return "", fmt.Errorf("federated token file is not a regular file: %s", allowedTokenPath) + } + return allowedTokenPath, nil +} + +// Proc is a minimal interface used by this package so tests can inject a fake process. +type Proc interface { + // Run executes the given command (arguments may be included in the string) and + // returns the command output and an error. Implementations MUST return the + // combined stdout+stderr output in the string return value. The error should + // be non-nil when the underlying process fails to start or returns a + // non-zero exit status. This contract is relied upon by callers that inspect + // output text (for example searching for an "ERROR:" prefix) as well as + // the returned error value. + Run(cmd string) (string, error) +} + +// EnsureAzCliLogin ensures az CLI is available and attempts to auto-login using environment variables +func EnsureAzCliLogin(cfg *config.ConfigData) (string, error) { + if _, err := exec.LookPath("az"); err != nil { + return "", fmt.Errorf("az cli is not installed or not in PATH: %w", err) + } + proc := NewShellProc(cfg.Timeout) + return EnsureAzCliLoginWithProc(proc, cfg) +} + +// NewShellProc is a package-level Proc factory used so tests can override process creation and avoid invoking the real `az` binary. +var NewShellProc = func(timeout int) Proc { + return command.NewShellProcess("az", timeout) +} + +// EnsureAzCliLoginWithProc is the testable implementation that uses an injected Proc. +func EnsureAzCliLoginWithProc(proc Proc, cfg *config.ConfigData) (string, error) { + // Read environment variables to determine which auth methods to try + tenantID := os.Getenv("AZURE_TENANT_ID") + clientID := os.Getenv("AZURE_CLIENT_ID") + clientSecret := os.Getenv("AZURE_CLIENT_SECRET") + federatedTokenFile := os.Getenv("AZURE_FEDERATED_TOKEN_FILE") + subscriptionID := os.Getenv("AZURE_SUBSCRIPTION_ID") + azureManagedIdentityType := os.Getenv("AZURE_MANAGED_IDENTITY") + + // 1) Service Principal with secret + if clientID != "" && clientSecret != "" && tenantID != "" { + if err := runLoginCommand(proc, fmt.Sprintf("login --service-principal -u %s -p %s --tenant %s", clientID, clientSecret, tenantID), "service principal"); err != nil { + return "", err + } + if err := setSubscription(proc, subscriptionID, "service principal"); err != nil { + return "", err + } + if err := showAccount(proc, "service principal"); err != nil { + return "", err + } + return AuthTypeServicePrincipal, nil + } + + // 2) Workload Identity (federated token) + if clientID != "" && tenantID != "" && federatedTokenFile != "" { + // Validate the federated token file path for security and get canonical path + validatedPath, err := validateFederatedTokenFile(federatedTokenFile) + if err != nil { + return "", fmt.Errorf("federated token file validation failed: %w", err) + } + + // Open the only allowed federated token file (fixed path, safe) + f, err := os.Open(validatedPath) // #nosec G304 -- validated fixed path, not user-controlled + if err != nil { + return "", fmt.Errorf("failed to open federated token file %s: %w", validatedPath, err) + } + defer func() { + if err := f.Close(); err != nil { + fmt.Fprintf(os.Stderr, "error closing file: %v\n", err) + } + }() + + // Limit token size to 16KB which is far larger than typical JWT or k8s tokens + // but protects against very large files. + const maxTokenSize = 16 * 1024 + data, err := io.ReadAll(io.LimitReader(f, maxTokenSize)) + if err != nil { + return "", fmt.Errorf("failed to read federated token file %s: %w", validatedPath, err) + } + federatedToken := strings.TrimSpace(string(data)) + if federatedToken == "" { + return "", fmt.Errorf("federated token file %s is empty", validatedPath) + } + if err := runLoginCommand(proc, fmt.Sprintf("login --service-principal -u %s --tenant %s --federated-token %s", clientID, tenantID, federatedToken), "federated token"); err != nil { + return "", err + } + if err := setSubscription(proc, subscriptionID, "federated token"); err != nil { + return "", err + } + if err := showAccount(proc, "federated token"); err != nil { + return "", err + } + return AuthTypeFederatedToken, nil + } + + // 3) User-assigned Managed Identity (client ID provided) + if clientID != "" { + if err := runLoginCommand(proc, fmt.Sprintf("login --identity -u %s", clientID), "user-assigned managed identity"); err != nil { + return "", err + } + if err := setSubscription(proc, subscriptionID, "user-assigned managed identity"); err != nil { + return "", err + } + if err := showAccount(proc, "user-assigned managed identity"); err != nil { + return "", err + } + return AuthTypeUserAssignedManagedID, nil + } + + // 4) System-assigned Managed Identity + if azureManagedIdentityType == "system" { + if err := runLoginCommand(proc, "login --identity", "system-assigned managed identity"); err != nil { + return "", err + } + if err := setSubscription(proc, subscriptionID, "system-assigned managed identity"); err != nil { + return "", err + } + if err := showAccount(proc, "system-assigned managed identity"); err != nil { + return "", err + } + return AuthTypeSystemAssignedManagedID, nil + } + + // 5) Existing login (no credentials override provided) + return AuthTypeExisting, nil +} + +// Runs a command and returns a formatted error if the output indicates an ERROR or the command failed. +func runLoginCommand(proc Proc, cmd string, loginMethod string) error { + // proc.Run is expected to return combined stdout+stderr. Some `az` + // subcommands write human-readable error text to stderr (which will be + // included in the returned string). We therefore inspect the returned + // output for an "ERROR:" prefix as a quick check for failure messages, + // and also respect the returned error (which should be non-nil for + // non-zero exit codes). + out, err := proc.Run(cmd) + if strings.HasPrefix(strings.TrimSpace(out), "ERROR:") { + return fmt.Errorf("%s login failed: %s", loginMethod, out) + } + if err != nil { + return fmt.Errorf("%s login failed: %w", loginMethod, err) + } + return nil +} + +// Sets the subscription when provided and wraps errors with context. +func setSubscription(proc Proc, subscriptionID, loginMethod string) error { + if subscriptionID == "" { + return nil + } + if _, err := proc.Run(fmt.Sprintf("account set --subscription %s", subscriptionID)); err != nil { + return fmt.Errorf("%s login failed: %w", loginMethod, err) + } + return nil +} + +// Checks that account info is returned after a login attempt. +func showAccount(proc Proc, loginMethod string) error { + if _, err := proc.Run("account show --query id -o tsv"); err != nil { + return fmt.Errorf("%s login failed: %w", loginMethod, err) + } + return nil +} diff --git a/internal/azcli/login_test.go b/internal/azcli/login_test.go new file mode 100644 index 0000000..f10e594 --- /dev/null +++ b/internal/azcli/login_test.go @@ -0,0 +1,350 @@ +package azcli + +import ( + "errors" + "fmt" + "os" + "strings" + "testing" + + "github.com/Azure/aks-mcp/internal/config" +) + +// Command to be run and expected output/error +type loginCommandResponses struct { + cmd string + out string + err error +} + +// Interface to mock azcli commands in tests +type loginCommands struct { + idx int + resp []loginCommandResponses +} + +// Simulate running a command and return the expected output/error +func (c *loginCommands) Run(cmd string) (string, error) { + if c.idx >= len(c.resp) { + return "", fmt.Errorf("no more responses, unexpected command: %s", cmd) + } + expected := c.resp[c.idx] + c.idx++ + // match prefix so tests are less brittle + if expected.cmd != "" && !strings.HasPrefix(cmd, expected.cmd) { + return "", fmt.Errorf("expected cmd prefix %q but got %q", expected.cmd, cmd) + } + return expected.out, expected.err +} + +func TestEnsureAzCliLogin_Existing(t *testing.T) { + cfg := config.NewConfig() + // ensure no env-based auth triggers; default to existing login + t.Setenv("AZURE_CLIENT_ID", "") + t.Setenv("AZURE_CLIENT_SECRET", "") + t.Setenv("AZURE_TENANT_ID", "") + t.Setenv("AZURE_FEDERATED_TOKEN_FILE", "") + t.Setenv("AZURE_SUBSCRIPTION_ID", "") + + // proc should not be invoked + p := &loginCommands{resp: []loginCommandResponses{}} + + got, err := EnsureAzCliLoginWithProc(p, cfg) + if err != nil { + t.Fatalf("expected no error, got %v", err) + } + if got != "existing_login" { + t.Fatalf("unexpected result: %s", got) + } +} + +func TestEnsureAzCliLogin_ServicePrincipal(t *testing.T) { + cfg := config.NewConfig() + // set envs for service principal (dummy values for testing only) + t.Setenv("AZURE_CLIENT_ID", "dummy-client-id") + t.Setenv("AZURE_CLIENT_SECRET", "dummy-client-secret") + t.Setenv("AZURE_TENANT_ID", "dummy-tenant-id") + // To exercise the login flow we must simulate probe failing (not logged in) + p := &loginCommands{resp: []loginCommandResponses{ + // login command succeeds (matches dummy env values) + {cmd: "login --service-principal -u dummy-client-id -p dummy-client-secret --tenant dummy-tenant-id", out: "", err: nil}, + // probe after login succeeds + {cmd: "account show --query id -o tsv", out: "sub-id", err: nil}, + }} + got, err := EnsureAzCliLoginWithProc(p, cfg) + if err != nil { + t.Fatalf("expected no error, got %v", err) + } + if got != "service_principal" { + t.Fatalf("unexpected result: %s", got) + } +} + +func TestEnsureAzCliLogin_ServicePrincipal_ErrorOutput(t *testing.T) { + cfg := config.NewConfig() + t.Setenv("AZURE_CLIENT_ID", "dummy-client-id") + t.Setenv("AZURE_CLIENT_SECRET", "dummy-client-secret") + t.Setenv("AZURE_TENANT_ID", "dummy-tenant-id") + // probe fails so login attempt is performed + p := &loginCommands{resp: []loginCommandResponses{ + // login returns an ERROR: prefixed message on stderr (simulated via out) + {cmd: "login --service-principal -u dummy-client-id -p dummy-client-secret --tenant dummy-tenant-id", out: "ERROR: something went wrong", err: nil}, + }} + _, err := EnsureAzCliLoginWithProc(p, cfg) + if err == nil || !strings.Contains(err.Error(), "service principal login failed: ERROR: something went wrong") { + t.Fatalf("expected service principal error containing ERROR:, got %v", err) + } +} + +func TestEnsureAzCliLogin_ServicePrincipal_CommandError(t *testing.T) { + cfg := config.NewConfig() + t.Setenv("AZURE_CLIENT_ID", "dummy-client-id") + t.Setenv("AZURE_CLIENT_SECRET", "dummy-client-secret") + t.Setenv("AZURE_TENANT_ID", "dummy-tenant-id") + p := &loginCommands{resp: []loginCommandResponses{ + // login command fails to execute + {cmd: "login --service-principal -u dummy-client-id -p dummy-client-secret --tenant dummy-tenant-id", out: "", err: errors.New("exec failed")}, + }} + _, err := EnsureAzCliLoginWithProc(p, cfg) + if err == nil || !strings.Contains(err.Error(), "service principal login failed") { + t.Fatalf("expected wrapped service principal error, got %v", err) + } +} + +func TestEnsureAzCliLogin_NoAutoLogin(t *testing.T) { + cfg := config.NewConfig() + // ensure no AZURE_* env vars (explicitly clear for this test) + t.Setenv("AZURE_CLIENT_ID", "") + t.Setenv("AZURE_CLIENT_SECRET", "") + t.Setenv("AZURE_TENANT_ID", "") + t.Setenv("AZURE_FEDERATED_TOKEN_FILE", "") + t.Setenv("AZURE_SUBSCRIPTION_ID", "") + + // should default to existing login without invoking proc + p := &loginCommands{resp: []loginCommandResponses{}} + got, err := EnsureAzCliLoginWithProc(p, cfg) + if err != nil { + t.Fatalf("expected no error, got %v", err) + } + if got != "existing_login" { + t.Fatalf("unexpected result: %s", got) + } +} + +func TestEnsureAzCliLogin_SubscriptionSetFailure(t *testing.T) { + cfg := config.NewConfig() + t.Setenv("AZURE_CLIENT_ID", "dummy-client-id") + t.Setenv("AZURE_CLIENT_SECRET", "dummy-client-secret") + t.Setenv("AZURE_TENANT_ID", "dummy-tenant-id") + t.Setenv("AZURE_SUBSCRIPTION_ID", "dummy-subscription-id") + + p := &loginCommands{resp: []loginCommandResponses{ + {cmd: "login --service-principal -u dummy-client-id -p dummy-client-secret --tenant dummy-tenant-id", out: "", err: nil}, + // account set fails + {cmd: "account set --subscription dummy-subscription-id", out: "", err: errors.New("set failed")}, + }} + + _, err := EnsureAzCliLoginWithProc(p, cfg) + if err == nil || !strings.Contains(err.Error(), "service principal login failed") { + t.Fatalf("expected subscription set failure wrapped in service principal context, got %v", err) + } +} + +func TestEnsureAzCliLogin_ReprobeFailure(t *testing.T) { + cfg := config.NewConfig() + t.Setenv("AZURE_CLIENT_ID", "dummy-client-id") + t.Setenv("AZURE_CLIENT_SECRET", "dummy-client-secret") + t.Setenv("AZURE_TENANT_ID", "dummy-tenant-id") + t.Setenv("AZURE_SUBSCRIPTION_ID", "dummy-subscription-id") + + p := &loginCommands{resp: []loginCommandResponses{ + {cmd: "login --service-principal -u dummy-client-id -p dummy-client-secret --tenant dummy-tenant-id", out: "", err: nil}, + {cmd: "account set --subscription dummy-subscription-id", out: "", err: nil}, + // re-probe fails + {cmd: "account show --query id -o tsv", out: "", err: errors.New("probe failed")}, + }} + + _, err := EnsureAzCliLoginWithProc(p, cfg) + if err == nil || !strings.Contains(err.Error(), "service principal login failed") { + t.Fatalf("expected reprobe failure wrapped in service principal context, got %v", err) + } +} + +func TestEnsureAzCliLogin_Federated(t *testing.T) { + cfg := config.NewConfig() + t.Setenv("AZURE_CLIENT_ID", "dummy-client-id") + t.Setenv("AZURE_TENANT_ID", "dummy-tenant-id") + + // Only allow the fixed AKS federated token path + const allowedTokenPath = "/var/run/secrets/azure/tokens/azure-identity-token" + t.Setenv("AZURE_FEDERATED_TOKEN_FILE", allowedTokenPath) + + // If the file does not exist (not running in AKS), skip the test + if _, err := os.Stat(allowedTokenPath); err != nil { + t.Skipf("skipping: %s not present (only available in AKS)", allowedTokenPath) + } + + p := &loginCommands{resp: []loginCommandResponses{}} + + got, err := EnsureAzCliLoginWithProc(p, cfg) + if err != nil && !strings.Contains(err.Error(), allowedTokenPath) { + t.Fatalf("unexpected error: %v", err) + } + if err == nil && got == "federated_token" { + t.Fatalf("unexpected result: %s", got) + } +} + +func TestEnsureAzCliLogin_Federated_InvalidFile(t *testing.T) { + cfg := config.NewConfig() + t.Setenv("AZURE_CLIENT_ID", "dummy-client-id") + t.Setenv("AZURE_TENANT_ID", "dummy-tenant-id") + t.Setenv("AZURE_FEDERATED_TOKEN_FILE", "/tmp/non-existent-file") + + p := &loginCommands{resp: []loginCommandResponses{}} + + _, err := EnsureAzCliLoginWithProc(p, cfg) + if err == nil || !strings.Contains(err.Error(), "federated token file validation failed") { + t.Fatalf("expected federated token file validation error, got %v", err) + } +} + +func TestEnsureAzCliLogin_Federated_DirectoryTraversal(t *testing.T) { + cfg := config.NewConfig() + t.Setenv("AZURE_CLIENT_ID", "dummy-client-id") + t.Setenv("AZURE_TENANT_ID", "dummy-tenant-id") + t.Setenv("AZURE_FEDERATED_TOKEN_FILE", "../../../etc/passwd") + + p := &loginCommands{resp: []loginCommandResponses{ + {cmd: "account show --query id -o tsv", out: "", err: errors.New("not logged in")}, + }} + + _, err := EnsureAzCliLoginWithProc(p, cfg) + if err == nil || !strings.Contains(err.Error(), "federated token file validation failed") { + t.Fatalf("expected federated token file validation error for directory traversal, got %v", err) + } +} + +func TestEnsureAzCliLogin_Federated_Success(t *testing.T) { + cfg := config.NewConfig() + t.Setenv("AZURE_CLIENT_ID", "dummy-client-id") + t.Setenv("AZURE_TENANT_ID", "dummy-tenant-id") + + // Only allow the fixed AKS federated token path + const allowedTokenPath = "/var/run/secrets/azure/tokens/azure-identity-token" + t.Setenv("AZURE_FEDERATED_TOKEN_FILE", allowedTokenPath) + + // If the file does not exist (not running in AKS), skip the test + tokenData := "dummy-federated-token" + if _, err := os.Stat(allowedTokenPath); err != nil { + t.Skipf("skipping: %s not present (only available in AKS)", allowedTokenPath) + } + + // Optionally, try to write a dummy token if running in a testable AKS env (may require root) + // _ = os.WriteFile(allowedTokenPath, []byte(tokenData), 0600) + + p := &loginCommands{resp: []loginCommandResponses{ + {cmd: "login --service-principal -u dummy-client-id --tenant dummy-tenant-id --federated-token " + tokenData, out: "", err: nil}, + {cmd: "account show --query id -o tsv", out: "sub-id", err: nil}, + }} + + got, err := EnsureAzCliLoginWithProc(p, cfg) + if err != nil { + t.Fatalf("expected no error, got %v", err) + } + if got != "federated_token" { + t.Fatalf("unexpected result: %s", got) + } +} + +func TestEnsureAzCliLogin_ManagedIdentity_UserAssigned(t *testing.T) { + cfg := config.NewConfig() + t.Setenv("AZURE_CLIENT_ID", "dummy-managed-identity-client-id") + t.Setenv("AZURE_SUBSCRIPTION_ID", "dummy-subscription-id") + + p := &loginCommands{resp: []loginCommandResponses{ + {cmd: "login --identity -u dummy-managed-identity-client-id", out: "", err: nil}, + {cmd: "account set --subscription dummy-subscription-id", out: "", err: nil}, + {cmd: "account show --query id -o tsv", out: "sub-id", err: nil}, + }} + + got, err := EnsureAzCliLoginWithProc(p, cfg) + if err != nil { + t.Fatalf("expected no error, got %v", err) + } + if got != "user_assigned_managed_identity" { + t.Fatalf("unexpected result: %s", got) + } +} + +func TestEnsureAzCliLogin_ManagedIdentity_SystemAssigned(t *testing.T) { + cfg := config.NewConfig() + // Trigger system-assigned MI via env + t.Setenv("AZURE_CLIENT_ID", "") + t.Setenv("AZURE_CLIENT_SECRET", "") + t.Setenv("AZURE_TENANT_ID", "") + t.Setenv("AZURE_FEDERATED_TOKEN_FILE", "") + t.Setenv("AZURE_SUBSCRIPTION_ID", "") + t.Setenv("AZURE_MANAGED_IDENTITY", "system") + + p := &loginCommands{resp: []loginCommandResponses{ + {cmd: "login --identity", out: "", err: nil}, + {cmd: "account show --query id -o tsv", out: "sub-id", err: nil}, + }} + + got, err := EnsureAzCliLoginWithProc(p, cfg) + if err != nil { + t.Fatalf("expected no error, got %v", err) + } + if got != "system_assigned_managed_identity" { + t.Fatalf("unexpected result: %s", got) + } +} + +func TestEnsureAzCliLogin_ManagedIdentity_SystemAssigned_Success(t *testing.T) { + cfg := config.NewConfig() + // System-assigned MI with subscription set via env flag + t.Setenv("AZURE_CLIENT_ID", "") + t.Setenv("AZURE_CLIENT_SECRET", "") + t.Setenv("AZURE_TENANT_ID", "") + t.Setenv("AZURE_FEDERATED_TOKEN_FILE", "") + t.Setenv("AZURE_SUBSCRIPTION_ID", "dummy-subscription-id") + t.Setenv("AZURE_MANAGED_IDENTITY", "system") + + p := &loginCommands{resp: []loginCommandResponses{ + {cmd: "login --identity", out: "", err: nil}, + {cmd: "account set --subscription dummy-subscription-id", out: "", err: nil}, + {cmd: "account show --query id -o tsv", out: "sub-id", err: nil}, + }} + + got, err := EnsureAzCliLoginWithProc(p, cfg) + if err != nil { + t.Fatalf("expected no error, got %v", err) + } + if got != "system_assigned_managed_identity" { + t.Fatalf("unexpected result: %s", got) + } +} + +func TestEnsureAzCliLogin_LoginPromptInOutput(t *testing.T) { + cfg := config.NewConfig() + // ensure auto-login is attempted via user-assigned MI + t.Setenv("AZURE_CLIENT_ID", "cid") + + // Simulate the case where Azure CLI returns "Please run 'az login'" message with an error + // After the command.go fix, stderr content is returned WITH the error, not instead of it + // This test ensures we don't incorrectly think there's a valid login when there isn't + p := &loginCommands{resp: []loginCommandResponses{ + {cmd: "login --identity -u cid", out: "", err: nil}, + {cmd: "account show --query id -o tsv", out: "sub-id", err: nil}, + }} + + // should NOT return existing_login, should proceed with authentication + got, err := EnsureAzCliLoginWithProc(p, cfg) + if err != nil { + t.Fatalf("expected no error, got %v", err) + } + if got != "user_assigned_managed_identity" { + t.Fatalf("unexpected result: %s", got) + } +} diff --git a/internal/command/command.go b/internal/command/command.go index 7f43255..c88ad94 100644 --- a/internal/command/command.go +++ b/internal/command/command.go @@ -82,7 +82,7 @@ func (s *ShellProcess) Exec(commands string) (string, error) { // Handle errors if err != nil { if s.ReturnErrOutput && stderr.Len() > 0 { - return stderr.String(), nil + return stderr.String(), err } return "", err } diff --git a/internal/components/azaks/registry.go b/internal/components/azaks/registry.go index 8b8be26..3bb37e9 100644 --- a/internal/components/azaks/registry.go +++ b/internal/components/azaks/registry.go @@ -18,6 +18,8 @@ const ( OpClusterCreate AksOperationType = "create" OpClusterDelete AksOperationType = "delete" OpClusterScale AksOperationType = "scale" + OpClusterStart AksOperationType = "start" + OpClusterStop AksOperationType = "stop" OpClusterUpdate AksOperationType = "update" OpClusterUpgrade AksOperationType = "upgrade" OpClusterGetVersions AksOperationType = "get-versions" @@ -51,7 +53,7 @@ func generateToolDescription(accessLevel string) string { // Add read-write operations for readwrite and admin if accessLevel == "readwrite" || accessLevel == "admin" { - clusterOps = append(clusterOps, "create", "delete", "scale", "update", "upgrade") + clusterOps = append(clusterOps, "create", "delete", "scale", "update", "upgrade", "start", "stop") nodepoolOps = append(nodepoolOps, "nodepool-add", "nodepool-delete", "nodepool-scale", "nodepool-upgrade") accountOps = append(accountOps, "account-set", "login") } @@ -122,9 +124,10 @@ func GetOperationAccessLevel(operation string) string { readWriteOps := []string{ string(OpClusterCreate), string(OpClusterDelete), string(OpClusterScale), - string(OpClusterUpdate), string(OpClusterUpgrade), string(OpNodepoolAdd), - string(OpNodepoolDelete), string(OpNodepoolScale), string(OpNodepoolUpgrade), - string(OpAccountSet), string(OpLogin), + string(OpClusterUpdate), string(OpClusterUpgrade), string(OpClusterStart), + string(OpClusterStop), string(OpNodepoolAdd), string(OpNodepoolDelete), + string(OpNodepoolScale), string(OpNodepoolUpgrade), string(OpAccountSet), + string(OpLogin), } adminOps := []string{ @@ -177,6 +180,8 @@ func MapOperationToCommand(operation string) (string, error) { string(OpClusterCreate): "az aks create", string(OpClusterDelete): "az aks delete", string(OpClusterScale): "az aks scale", + string(OpClusterStart): "az aks start", + string(OpClusterStop): "az aks stop", string(OpClusterUpdate): "az aks update", string(OpClusterUpgrade): "az aks upgrade", string(OpClusterGetVersions): "az aks get-versions", @@ -210,9 +215,9 @@ func GetSupportedOperations() []string { return []string{ // Cluster operations string(OpClusterShow), string(OpClusterList), string(OpClusterCreate), - string(OpClusterDelete), string(OpClusterScale), string(OpClusterUpdate), - string(OpClusterUpgrade), string(OpClusterGetVersions), string(OpClusterCheckNetwork), - string(OpClusterGetCredentials), + string(OpClusterDelete), string(OpClusterScale), string(OpClusterStart), + string(OpClusterStop), string(OpClusterUpdate), string(OpClusterUpgrade), + string(OpClusterGetVersions), string(OpClusterCheckNetwork), string(OpClusterGetCredentials), // Nodepool operations string(OpNodepoolList), string(OpNodepoolShow), string(OpNodepoolAdd), string(OpNodepoolDelete), string(OpNodepoolScale), string(OpNodepoolUpgrade), diff --git a/internal/components/azaks/registry_test.go b/internal/components/azaks/registry_test.go index 8e6a782..3e55b4b 100644 --- a/internal/components/azaks/registry_test.go +++ b/internal/components/azaks/registry_test.go @@ -25,7 +25,7 @@ func TestGetSupportedOperations_ContainsExpectedOps(t *testing.T) { operations := GetSupportedOperations() expectedOps := []string{ - "show", "list", "create", "delete", "scale", "update", "upgrade", + "show", "list", "create", "delete", "scale", "start", "stop", "update", "upgrade", "nodepool-list", "nodepool-show", "nodepool-add", "nodepool-delete", "account-list", "account-set", "login", "get-credentials", } @@ -56,6 +56,12 @@ func TestValidateOperationAccess_ChecksAccessLevels(t *testing.T) { {"create", "readonly", false}, {"create", "readwrite", true}, {"create", "admin", true}, + {"start", "readonly", false}, + {"start", "readwrite", true}, + {"start", "admin", true}, + {"stop", "readonly", false}, + {"stop", "readwrite", true}, + {"stop", "admin", true}, {"get-credentials", "readonly", false}, {"get-credentials", "readwrite", false}, {"get-credentials", "admin", true}, diff --git a/internal/components/inspektorgadget/handlers.go b/internal/components/inspektorgadget/handlers.go index bbedf53..3107f9d 100644 --- a/internal/components/inspektorgadget/handlers.go +++ b/internal/components/inspektorgadget/handlers.go @@ -93,7 +93,7 @@ func handleRunAction(ctx context.Context, mgr GadgetManager, actionParams map[st gadget, ok := getGadgetByName(gadgetName) if !ok { - return "", fmt.Errorf("invalid or unsupported gadget name: %s", gadgetName) + return "", fmt.Errorf("invalid or unsupported gadget name: %s: expected one of %v", gadgetName, getGadgetNames()) } duration, ok := actionParams["duration"].(float64) @@ -132,7 +132,7 @@ func handleStartAction(ctx context.Context, mgr GadgetManager, actionParams map[ gadget, ok := getGadgetByName(gadgetName) if !ok { - return "", fmt.Errorf("invalid or unsupported gadget name: %s", gadgetName) + return "", fmt.Errorf("invalid or unsupported gadget name: %s: expected one of %v", gadgetName, getGadgetNames()) } // TODO: Use GetGadgetInfo to validate gadgetParams to ensure compatibility with different gadget versions diff --git a/internal/components/inspektorgadget/registry.go b/internal/components/inspektorgadget/registry.go index d301ad1..c9f3bb2 100644 --- a/internal/components/inspektorgadget/registry.go +++ b/internal/components/inspektorgadget/registry.go @@ -1,6 +1,9 @@ package inspektorgadget -import "github.com/mark3labs/mcp-go/mcp" +import ( + "github.com/mark3labs/mcp-go/mcp" + "strings" +) // ============================================================================= // Inspektor Gadget related Tool Registrations @@ -10,15 +13,27 @@ import "github.com/mark3labs/mcp-go/mcp" func RegisterInspektorGadgetTool() mcp.Tool { return mcp.NewTool( "inspektor_gadget_observability", - mcp.WithDescription("Real-time observability tool for Azure Kubernetes Service (AKS) clusters, allowing users to manage gadgets for monitoring and debugging"), + mcp.WithDescription("Real-time observability tool for Azure Kubernetes Service (AKS) clusters, allowing users to manage gadgets for monitoring and debugging\n\n"+ + "Apart from 'action' param:\n\n"+ + "It supports 'action_params' (type=object) to specify parameters for the action."+ + "Available params are: "+ + "gadget_name, duration, gadget_id, chart_version. "+ + "Available Gadget names are: "+strings.Join(getGadgetNames(), ", ")+". "+ + "Example: "+ + "{'action': 'run', 'action_params': {'gadget_name': 'observe_dns', 'duration': 10}}\n\n"+ + "It supports 'filter_params' (type=object) to filter the data captured by the gadget. "+ + "Available params are: "+ + "namespace, pod, container, selector,"+strings.Join(getGadgetParamsKeys(), ", ")+". "+ + "Example: "+ + "{'action': 'run', 'filter_params': {'namespace': 'default', 'selector': 'app=myapp', 'observe_dns.unsuccessful_only': true}}"), mcp.WithString("action", mcp.Required(), mcp.Description("Action to perform on the gadget: "+ runAction+" to run a gadget for a specific duration, "+ startAction+" to start a gadget for continuous observation, "+ - stopAction+" to stop a running gadget, "+ - getResultsAction+" to retrieve results of a gadget run (only available before stopping the gadget), "+ - listGadgetsAction+" to list all running gadgets"+ + stopAction+" to stop a running gadget using gadget_id, "+ + getResultsAction+" to retrieve results of a gadget run using gadget_id (only available before stopping the gadget), "+ + listGadgetsAction+" to list all running (not available) gadgets"+ deployAction+" to deploy Inspektor Gadget, "+ undeployAction+" to undeploy Inspektor Gadget"+ upgradeAction+" to upgrade Inspektor Gadget, "+ @@ -28,7 +43,6 @@ func RegisterInspektorGadgetTool() mcp.Tool { ), mcp.WithObject("action_params", mcp.Description("Parameters for the action"), - mcp.Required(), mcp.Properties(map[string]any{ "gadget_name": map[string]any{ "type": "string", diff --git a/internal/config/config.go b/internal/config/config.go index c22eb88..967fd21 100644 --- a/internal/config/config.go +++ b/internal/config/config.go @@ -2,12 +2,15 @@ package config import ( "context" + "fmt" "log" + "os" "strings" "time" "github.com/Azure/aks-mcp/internal/security" "github.com/Azure/aks-mcp/internal/telemetry" + "github.com/Azure/aks-mcp/internal/version" flag "github.com/spf13/pflag" ) @@ -78,7 +81,33 @@ func (cfg *ConfigData) ParseFlags() { // OTLP settings flag.StringVar(&cfg.OTLPEndpoint, "otlp-endpoint", "", "OTLP endpoint for OpenTelemetry traces (e.g. localhost:4317)") - flag.Parse() + // Custom help handling + var showHelp bool + flag.BoolVarP(&showHelp, "help", "h", false, "Show help message") + + // Version flag + showVersion := flag.Bool("version", false, "Show version information and exit") + + // Parse flags and handle errors properly + err := flag.CommandLine.Parse(os.Args[1:]) + if err != nil { + fmt.Printf("\nUsage of %s:\n", os.Args[0]) + flag.PrintDefaults() + os.Exit(1) + } + + // Handle help manually with proper exit code + if showHelp { + fmt.Printf("Usage of %s:\n", os.Args[0]) + flag.PrintDefaults() + os.Exit(0) + } + + // Handle version flag + if *showVersion { + cfg.PrintVersion() + os.Exit(0) + } // Update security config cfg.SecurityConfig.AccessLevel = cfg.AccessLevel @@ -113,3 +142,13 @@ func (cfg *ConfigData) InitializeTelemetry(ctx context.Context, serviceName, ser // Track MCP server startup cfg.TelemetryService.TrackServiceStartup(ctx) } + +// PrintVersion prints version information +func (cfg *ConfigData) PrintVersion() { + versionInfo := version.GetVersionInfo() + fmt.Printf("aks-mcp version %s\n", versionInfo["version"]) + fmt.Printf("Git commit: %s\n", versionInfo["gitCommit"]) + fmt.Printf("Git tree state: %s\n", versionInfo["gitTreeState"]) + fmt.Printf("Go version: %s\n", versionInfo["goVersion"]) + fmt.Printf("Platform: %s\n", versionInfo["platform"]) +} diff --git a/internal/k8s/adapter.go b/internal/k8s/adapter.go index fcf7b46..98b9841 100644 --- a/internal/k8s/adapter.go +++ b/internal/k8s/adapter.go @@ -1,3 +1,6 @@ +// Package k8s provides adapters that let aks-mcp interoperate with the +// mcp-kubernetes libraries. It maps aks-mcp configuration and executors +// to the types expected by mcp-kubernetes without altering behavior. package k8s import ( @@ -9,16 +12,13 @@ import ( k8stools "github.com/Azure/mcp-kubernetes/pkg/tools" ) -// ConfigAdapter converts aks-mcp config to mcp-kubernetes config +// ConvertConfig maps an aks-mcp ConfigData into the equivalent +// mcp-kubernetes ConfigData without mutating the input. func ConvertConfig(cfg *config.ConfigData) *k8sconfig.ConfigData { - // Create K8s security config k8sSecurityConfig := k8ssecurity.NewSecurityConfig() - - // Map allowed namespaces k8sSecurityConfig.SetAllowedNamespaces(cfg.AllowNamespaces) k8sSecurityConfig.AccessLevel = k8ssecurity.AccessLevel(cfg.AccessLevel) - // Create K8s config k8sCfg := &k8sconfig.ConfigData{ AdditionalTools: cfg.AdditionalTools, Timeout: cfg.Timeout, @@ -35,20 +35,21 @@ func ConvertConfig(cfg *config.ConfigData) *k8sconfig.ConfigData { return k8sCfg } -// WrapK8sExecutor wraps a mcp-kubernetes executor to work with aks-mcp config +// WrapK8sExecutor makes an mcp-kubernetes CommandExecutor +// compatible with the aks-mcp tools.CommandExecutor interface. func WrapK8sExecutor(k8sExecutor k8stools.CommandExecutor) tools.CommandExecutor { return &executorAdapter{k8sExecutor: k8sExecutor} } -// executorAdapter adapts between aks-mcp and mcp-kubernetes configs +// executorAdapter bridges aks-mcp execution to mcp-kubernetes. +// Unexported; behavior is defined by the wrapped executor. type executorAdapter struct { k8sExecutor k8stools.CommandExecutor } +// Execute adapts aks-mcp execution by converting its config +// and delegating to the wrapped mcp-kubernetes executor. func (a *executorAdapter) Execute(params map[string]interface{}, cfg *config.ConfigData) (string, error) { - // Convert aks-mcp config to k8s config k8sCfg := ConvertConfig(cfg) - - // Execute using the k8s executor return a.k8sExecutor.Execute(params, k8sCfg) } diff --git a/internal/k8s/adapter_test.go b/internal/k8s/adapter_test.go new file mode 100644 index 0000000..703fdd6 --- /dev/null +++ b/internal/k8s/adapter_test.go @@ -0,0 +1,232 @@ +package k8s + +import ( + "errors" + "reflect" + "testing" + + "github.com/Azure/aks-mcp/internal/config" + k8sconfig "github.com/Azure/mcp-kubernetes/pkg/config" + k8ssecurity "github.com/Azure/mcp-kubernetes/pkg/security" + k8stools "github.com/Azure/mcp-kubernetes/pkg/tools" +) + +var benchOut *k8sconfig.ConfigData + +// This test suite verifies config mapping (without mutating input), adapter delegation, +// error propagation, and current nil-config behavior. The benchmark provides a baseline +// for detecting performance regressions. + +// mustEqual keeps assertions concise with consistent failure messages. +func mustEqual[T comparable](t *testing.T, got, want T, msg string) { + t.Helper() + if got != want { + t.Fatalf("%s: got %v, want %v", msg, got, want) + } +} + +// mustDeepEqual keeps deep-structure assertions concise with consistent messages. +func mustDeepEqual(t *testing.T, got, want interface{}, msg string) { + t.Helper() + if !reflect.DeepEqual(got, want) { + t.Fatalf("%s: got %#v, want %#v", msg, got, want) + } +} + +// fakeExecutor captures inputs and returns preset output/error to observe delegation. +type fakeExecutor struct { + lastParams map[string]interface{} + lastCfg *k8sconfig.ConfigData + out string + err error +} + +var _ k8stools.CommandExecutor = (*fakeExecutor)(nil) + +func (f *fakeExecutor) Execute(params map[string]interface{}, cfg *k8sconfig.ConfigData) (string, error) { + f.lastParams = params + f.lastCfg = cfg + return f.out, f.err +} + +func TestConvertConfig_MapsFields(t *testing.T) { + t.Parallel() + + in := &config.ConfigData{ + Timeout: 600, + Transport: "stdio", + Host: "127.0.0.1", + Port: 8000, + AccessLevel: "readonly", + AdditionalTools: map[string]bool{"helm": true, "cilium": false}, + AllowNamespaces: "default,platform", + OTLPEndpoint: "otel:4317", + } + + got := ConvertConfig(in) + if got == nil { + t.Fatal("ConvertConfig returned nil") + } + + mustEqual(t, got.Timeout, in.Timeout, "Timeout") + mustEqual(t, got.Transport, in.Transport, "Transport") + mustEqual(t, got.Host, in.Host, "Host") + mustEqual(t, got.Port, in.Port, "Port") + mustEqual(t, got.AccessLevel, in.AccessLevel, "AccessLevel") + mustEqual(t, got.OTLPEndpoint, in.OTLPEndpoint, "OTLPEndpoint") + mustDeepEqual(t, got.AdditionalTools, in.AdditionalTools, "AdditionalTools") + mustEqual(t, got.AllowNamespaces, in.AllowNamespaces, "AllowNamespaces") + + if got.SecurityConfig == nil { + t.Fatal("SecurityConfig is nil") + } + mustEqual(t, got.SecurityConfig.AccessLevel, k8ssecurity.AccessLevel(in.AccessLevel), "SecurityConfig.AccessLevel") +} + +func TestConvertConfig_DoesNotMutateInput(t *testing.T) { + t.Parallel() + + in := &config.ConfigData{ + Timeout: 42, + Transport: "stdio", + Host: "127.0.0.1", + Port: 8000, + AccessLevel: "readonly", + AdditionalTools: map[string]bool{"helm": true}, + AllowNamespaces: "default", + OTLPEndpoint: "otel:4317", + } + + // Verify the “no input mutation” guarantee by comparing to a copy. + orig := *in + orig.AdditionalTools = map[string]bool{} + for k, v := range in.AdditionalTools { + orig.AdditionalTools[k] = v + } + + out := ConvertConfig(in) + mustDeepEqual(t, in, &orig, "input should remain unchanged") + + if out == nil || out.SecurityConfig == nil { + t.Fatalf("expected non-nil output and SecurityConfig, got %#v", out) + } + + mustEqual(t, out.Timeout, in.Timeout, "Timeout") + mustEqual(t, out.Transport, in.Transport, "Transport") + mustEqual(t, out.Host, in.Host, "Host") + mustEqual(t, out.Port, in.Port, "Port") + mustEqual(t, out.AccessLevel, in.AccessLevel, "AccessLevel") + mustEqual(t, out.OTLPEndpoint, in.OTLPEndpoint, "OTLPEndpoint") + mustEqual(t, out.AllowNamespaces, in.AllowNamespaces, "AllowNamespaces") + mustDeepEqual(t, out.AdditionalTools, in.AdditionalTools, "AdditionalTools") + mustEqual(t, out.SecurityConfig.AccessLevel, k8ssecurity.AccessLevel(in.AccessLevel), "SecurityConfig.AccessLevel") +} + +func TestConvertConfig_ZeroValueCfg(t *testing.T) { + t.Parallel() + + // Document current behavior when callers pass an uninitialized config. + in := &config.ConfigData{} + orig := *in + + out := ConvertConfig(in) + + mustDeepEqual(t, in, &orig, "input unchanged") + + if out == nil || out.SecurityConfig == nil { + t.Fatalf("non-nil out and SecurityConfig required, got %#v", out) + } + + mustEqual(t, out.Timeout, 0, "Timeout") + mustEqual(t, out.Transport, "", "Transport") + mustEqual(t, out.Host, "", "Host") + mustEqual(t, out.Port, 0, "Port") + mustEqual(t, out.AccessLevel, "", "AccessLevel") + mustEqual(t, out.OTLPEndpoint, "", "OTLPEndpoint") + mustEqual(t, out.AllowNamespaces, "", "AllowNamespaces") + mustDeepEqual(t, out.AdditionalTools, map[string]bool(nil), "AdditionalTools") + mustEqual(t, out.SecurityConfig.AccessLevel, k8ssecurity.AccessLevel(""), "SecurityConfig.AccessLevel") +} + +func TestExecutorAdapter_DelegatesAndForwards(t *testing.T) { + t.Parallel() + + fe := &fakeExecutor{out: "ok"} + adapter := WrapK8sExecutor(fe) + + params := map[string]interface{}{"k": "v"} + inCfg := &config.ConfigData{ + Timeout: 10, + Transport: "stdio", + Host: "127.0.0.1", + Port: 8000, + AccessLevel: "readonly", + AdditionalTools: map[string]bool{"helm": true}, + AllowNamespaces: "default", + } + + got, err := adapter.Execute(params, inCfg) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + + mustEqual(t, got, "ok", "adapter output") + mustDeepEqual(t, fe.lastParams, params, "params forwarded") + + if fe.lastCfg == nil || fe.lastCfg.SecurityConfig == nil { + t.Fatalf("expected non-nil converted cfg + SecurityConfig, got %#v", fe.lastCfg) + } + + mustEqual(t, fe.lastCfg.Port, inCfg.Port, "Port") + mustEqual(t, fe.lastCfg.AccessLevel, inCfg.AccessLevel, "AccessLevel") + mustDeepEqual(t, fe.lastCfg.AdditionalTools, inCfg.AdditionalTools, "AdditionalTools") + mustEqual(t, fe.lastCfg.AllowNamespaces, inCfg.AllowNamespaces, "AllowNamespaces") + mustEqual(t, fe.lastCfg.SecurityConfig.AccessLevel, k8ssecurity.AccessLevel("readonly"), "SecurityConfig.AccessLevel") +} + +func TestExecutorAdapter_PropagatesError(t *testing.T) { + t.Parallel() + + fe := &fakeExecutor{err: errors.New("boom")} + adapter := WrapK8sExecutor(fe) + + _, err := adapter.Execute(map[string]interface{}{"x": 1}, &config.ConfigData{}) + if err == nil { + t.Fatalf("expected error, got nil") + } +} + +func TestExecutorAdapter_PanicsOnNilConfig_CurrentBehavior(t *testing.T) { + t.Parallel() + + // Document the current precondition: cfg must be non-nil. + defer func() { + if r := recover(); r == nil { + t.Fatalf("expected panic when cfg is nil") + } + }() + + fe := &fakeExecutor{} + adapter := WrapK8sExecutor(fe) + _, _ = adapter.Execute(map[string]interface{}{"x": 1}, nil) +} + +// BenchmarkConvertConfig tracks drift in allocation/time costs over time. +// Helps detect subtle regressions when config mapping logic evolves. +func BenchmarkConvertConfig(b *testing.B) { + in := &config.ConfigData{ + Timeout: 600, + Transport: "stdio", + Host: "127.0.0.1", + Port: 8000, + AccessLevel: "readonly", + AdditionalTools: map[string]bool{"helm": true, "cilium": false}, + AllowNamespaces: "default,platform", + OTLPEndpoint: "otel:4317", + } + + b.ResetTimer() + for i := 0; i < b.N; i++ { + benchOut = ConvertConfig(in) + } +} diff --git a/internal/prompts/health.go b/internal/prompts/health.go new file mode 100644 index 0000000..6a6a437 --- /dev/null +++ b/internal/prompts/health.go @@ -0,0 +1,90 @@ +package prompts + +import ( + "context" + + "github.com/Azure/aks-mcp/internal/config" + "github.com/mark3labs/mcp-go/mcp" + "github.com/mark3labs/mcp-go/server" +) + +// RegisterHealthPrompts registers comprehensive AKS cluster health assessment prompts. +func RegisterHealthPrompts(s *server.MCPServer, cfg *config.ConfigData) { + // Prompt: check_cluster_health + s.AddPrompt(mcp.NewPrompt("check_cluster_health", + mcp.WithPromptDescription("Comprehensive AKS cluster health assessment including platform health, diagnostics, cluster detectors, node health, and connectivity analysis"), + ), func(ctx context.Context, request mcp.GetPromptRequest) (*mcp.GetPromptResult, error) { + promptContent := `# Comprehensive AKS Cluster Health Assessment + +This guide performs a thorough health evaluation of your AKS cluster across multiple dimensions: cluster metadata, platform health, diagnostics configuration, detector analysis, and provides actionable recommendations. + +## Steps + +### 1. Retrieve Control Plane FQDN +Invoke kubectl_cluster tool: +{ + "operation": "cluster-info", + "resource": "", + "args": "" +} +Extract the Kubernetes control plane endpoint URL (FQDN) for cluster identification. + +### 2. Identify AKS Cluster Metadata +Invoke az_aks_operations tool: +{ + "operation": "list", + "args": "--query \"[].{id:id, fqdn:fqdn, resourceGroup:resourceGroup, name:name}\" -o json" +} +Match the control plane FQDN from Step 1 with the cluster list to determine subscription ID, resource group, and cluster name. Extract the full AKS resource ID for subsequent steps. + +### 3. Check Azure Resource Health Status +Invoke az_monitoring tool: +{ + "operation": "resource_health", + "subscription_id": "", + "resource_group": "", + "cluster_name": "", + "parameters": "{\"start_time\":\"\",\"end_time\":\"\"}" +} +Analyze: Identify any Azure platform incidents, service health issues, or resource degradation events that may impact cluster availability. + +### 4. Run Cluster and Control Plane Availability Detectors +Invoke run_detectors_by_category tool: +{ + "cluster_resource_id": "", + "category": "Cluster and Control Plane Availability and Performance", + "start_time": "", + "end_time": "" +} +Analyze: Review API server responsiveness, control plane scaling issues, etcd health, and cluster networking performance problems. + +### 5. Run Node Health Detectors +Invoke run_detectors_by_category tool: +{ + "cluster_resource_id": "", + "category": "Node Health", + "start_time": "", + "end_time": "" +} +Analyze: Examine node readiness issues, kubelet problems, container runtime health, disk pressure, memory pressure, and node pool scaling issues. + +### 6. Run Connectivity Issue Detectors +Invoke run_detectors_by_category tool: +{ + "cluster_resource_id": "", + "category": "Connectivity Issues", + "start_time": "", + "end_time": "" +} +Analyze: Investigate DNS resolution problems, network policy conflicts, ingress/egress connectivity, load balancer issues, and service mesh problems. + +### 7. Generate Comprehensive Health Report + +Generate a comprehensive health report and recommendations based on the findings from the previous steps. + +Provide specific commands, configurations, or Azure portal links where applicable for implementing recommendations. +` + return &mcp.GetPromptResult{Description: "Comprehensive AKS cluster health assessment including platform health, diagnostics, availability detectors, node health, and connectivity analysis", Messages: []mcp.PromptMessage{{Role: mcp.RoleAssistant, Content: mcp.TextContent{Type: "text", Text: promptContent}}}}, nil + }) + +} diff --git a/internal/server/server.go b/internal/server/server.go index d69fbc3..f9942a5 100644 --- a/internal/server/server.go +++ b/internal/server/server.go @@ -1,8 +1,11 @@ package server import ( + "encoding/json" "fmt" "log" + "net/http" + "time" "github.com/Azure/aks-mcp/internal/azcli" "github.com/Azure/aks-mcp/internal/azureclient" @@ -28,16 +31,29 @@ import ( // Service represents the AKS MCP service type Service struct { - cfg *config.ConfigData - mcpServer *server.MCPServer - azClient *azureclient.AzureClient + cfg *config.ConfigData + mcpServer *server.MCPServer + azClient *azureclient.AzureClient + azcliProcFactory func(timeout int) azcli.Proc } -// NewService creates a new AKS MCP service -func NewService(cfg *config.ConfigData) *Service { - return &Service{ - cfg: cfg, +// ServiceOption defines a function that configures the AKS MCP service +type ServiceOption func(*Service) + +// WithAzCliProcFactory allows callers to inject a Proc factory for azcli execution. +// The factory returns an azcli.Proc which can be faked in tests. +func WithAzCliProcFactory(f func(timeout int) azcli.Proc) ServiceOption { + return func(s *Service) { s.azcliProcFactory = f } +} + +// NewService creates a new AKS MCP service with the provided configuration and options. +// Options can be used to inject dependencies like azcli execution factories. +func NewService(cfg *config.ConfigData, opts ...ServiceOption) *Service { + s := &Service{cfg: cfg} + for _, opt := range opts { + opt(s) } + return s } // Initialize initializes the service @@ -61,11 +77,28 @@ func (s *Service) initializeInfrastructure() error { // Create shared Azure client azClient, err := azureclient.NewAzureClient(s.cfg) if err != nil { - return fmt.Errorf("failed to create Azure client: %v", err) + return fmt.Errorf("failed to create Azure client: %w", err) } s.azClient = azClient log.Println("Azure client initialized successfully") + // Ensure Azure CLI exists and is logged in + if s.azcliProcFactory != nil { + // Use injected factory to create an azcli.Proc + proc := s.azcliProcFactory(s.cfg.Timeout) + if loginType, err := azcli.EnsureAzCliLoginWithProc(proc, s.cfg); err != nil { + return fmt.Errorf("azure cli authentication failed: %w", err) + } else { + log.Printf("Azure CLI initialized successfully (%s)", loginType) + } + } else { + if loginType, err := azcli.EnsureAzCliLogin(s.cfg); err != nil { + return fmt.Errorf("azure cli authentication failed: %w", err) + } else { + log.Printf("Azure CLI initialized successfully (%s)", loginType) + } + } + // Create MCP server s.mcpServer = server.NewMCPServer( "AKS MCP", @@ -96,8 +129,83 @@ func (s *Service) registerAllComponents() { func (s *Service) registerPrompts() { log.Println("Registering Prompts...") - log.Println("Registering prompt: query_aks_cluster_metadata_from_kubeconfig") + log.Println("Registering config prompts (query_aks_cluster_metadata_from_kubeconfig)") prompts.RegisterQueryAKSMetadataFromKubeconfigPrompt(s.mcpServer, s.cfg) + + log.Println("Registering health prompts (check_cluster_health)") + prompts.RegisterHealthPrompts(s.mcpServer, s.cfg) +} + +// createCustomHTTPServerWithHelp404 creates a custom HTTP server that provides +// helpful 404 responses for the MCP server +func (s *Service) createCustomHTTPServerWithHelp404(addr string) *http.Server { + mux := http.NewServeMux() + + // Handle all other paths with a helpful 404 response + mux.HandleFunc("/", func(w http.ResponseWriter, r *http.Request) { + if r.URL.Path != "/mcp" { + w.Header().Set("Content-Type", "application/json") + w.WriteHeader(http.StatusNotFound) + + response := map[string]interface{}{ + "error": "Not Found", + "message": "This is an MCP (Model Context Protocol) server. Please send POST requests to /mcp to initialize a session and obtain an Mcp-Session-Id for subsequent requests.", + "endpoints": map[string]string{ + "initialize": "POST /mcp - Initialize MCP session", + "requests": "POST /mcp - Send MCP requests (requires Mcp-Session-Id header)", + "listen": "GET /mcp - Listen for notifications (requires Mcp-Session-Id header)", + "terminate": "DELETE /mcp - Terminate session (requires Mcp-Session-Id header)", + }, + } + + if err := json.NewEncoder(w).Encode(response); err != nil { + http.Error(w, "Failed to encode response", http.StatusInternalServerError) + } + } + }) + + return &http.Server{ + Addr: addr, + Handler: mux, + ReadHeaderTimeout: 10 * time.Second, + } +} + +// createCustomSSEServerWithHelp404 creates a custom HTTP server for SSE that provides +// helpful 404 responses for non-MCP endpoints +func (s *Service) createCustomSSEServerWithHelp404(sseServer *server.SSEServer, addr string) *http.Server { + mux := http.NewServeMux() + + // Register SSE and Message handlers + mux.Handle("/sse", sseServer.SSEHandler()) + mux.Handle("/message", sseServer.MessageHandler()) + + // Handle all other paths with a helpful 404 response + mux.HandleFunc("/", func(w http.ResponseWriter, r *http.Request) { + if r.URL.Path != "/sse" && r.URL.Path != "/message" { + w.Header().Set("Content-Type", "application/json") + w.WriteHeader(http.StatusNotFound) + + response := map[string]interface{}{ + "error": "Not Found", + "message": "This is an MCP (Model Context Protocol) server using SSE transport. Use the SSE endpoint to establish connections and the message endpoint to send requests.", + "endpoints": map[string]string{ + "sse": "GET /sse - Establish SSE connection for real-time notifications", + "message": "POST /message - Send MCP JSON-RPC messages", + }, + } + + if err := json.NewEncoder(w).Encode(response); err != nil { + http.Error(w, "Failed to encode response", http.StatusInternalServerError) + } + } + }) + + return &http.Server{ + Addr: addr, + Handler: mux, + ReadHeaderTimeout: 10 * time.Second, + } } // Run starts the service with the specified transport @@ -107,19 +215,45 @@ func (s *Service) Run() error { // Start the server switch s.cfg.Transport { case "stdio": - log.Println("AKS MCP version:", version.GetVersion()) log.Println("Listening for requests on STDIO...") return server.ServeStdio(s.mcpServer) case "sse": - sse := server.NewSSEServer(s.mcpServer) addr := fmt.Sprintf("%s:%d", s.cfg.Host, s.cfg.Port) + + // Create SSE server first + sse := server.NewSSEServer(s.mcpServer) + + // Create custom HTTP server with helpful 404 responses + customServer := s.createCustomSSEServerWithHelp404(sse, addr) + log.Printf("SSE server listening on %s", addr) - return sse.Start(addr) + log.Printf("SSE endpoint available at: http://%s/sse", addr) + log.Printf("Message endpoint available at: http://%s/message", addr) + log.Printf("Connect to /sse for real-time events, send JSON-RPC to /message") + + return customServer.ListenAndServe() case "streamable-http": - streamableServer := server.NewStreamableHTTPServer(s.mcpServer) addr := fmt.Sprintf("%s:%d", s.cfg.Host, s.cfg.Port) + + // Create a custom HTTP server with helpful 404 responses + customServer := s.createCustomHTTPServerWithHelp404(addr) + + // Create the streamable HTTP server with the custom HTTP server + streamableServer := server.NewStreamableHTTPServer( + s.mcpServer, + server.WithStreamableHTTPServer(customServer), + ) + + // Update the mux to use the actual streamable server as the MCP handler + if mux, ok := customServer.Handler.(*http.ServeMux); ok { + mux.Handle("/mcp", streamableServer) + } + log.Printf("Streamable HTTP server listening on %s", addr) - return streamableServer.Start(addr) + log.Printf("MCP endpoint available at: http://%s/mcp", addr) + log.Printf("Send POST requests to /mcp to initialize session and obtain Mcp-Session-Id") + + return customServer.ListenAndServe() default: return fmt.Errorf("invalid transport type: %s (must be 'stdio', 'sse' or 'streamable-http')", s.cfg.Transport) } diff --git a/internal/server/server_test.go b/internal/server/server_test.go index 4a6313c..6b863a7 100644 --- a/internal/server/server_test.go +++ b/internal/server/server_test.go @@ -1,12 +1,18 @@ package server import ( + "encoding/json" + "net/http" + "net/http/httptest" "os" + "strings" "testing" + "github.com/Azure/aks-mcp/internal/azcli" "github.com/Azure/aks-mcp/internal/components/azaks" "github.com/Azure/aks-mcp/internal/config" "github.com/Azure/mcp-kubernetes/pkg/kubectl" + "github.com/mark3labs/mcp-go/server" ) // MockToolCounter tracks registered tools for testing @@ -62,11 +68,12 @@ func (m *MockToolCounter) GetToolNames() []string { // TestService tests the service initialization and expected tool counts func TestService(t *testing.T) { - // Set environment variables for testing to avoid Azure auth issues - _ = os.Setenv("AZURE_TENANT_ID", "test-tenant") - _ = os.Setenv("AZURE_CLIENT_ID", "test-client") - _ = os.Setenv("AZURE_CLIENT_SECRET", "test-secret") - _ = os.Setenv("AZURE_SUBSCRIPTION_ID", "test-subscription") + // Set environment variables for testing to avoid Azure auth issues. + // These are dummy values for tests only — do not commit real credentials here. + _ = os.Setenv("AZURE_TENANT_ID", "dummy-tenant-id") + _ = os.Setenv("AZURE_CLIENT_ID", "dummy-client-id") + _ = os.Setenv("AZURE_CLIENT_SECRET", "dummy-client-secret") + _ = os.Setenv("AZURE_SUBSCRIPTION_ID", "dummy-subscription-id") defer func() { _ = os.Unsetenv("AZURE_TENANT_ID") _ = os.Unsetenv("AZURE_CLIENT_ID") @@ -121,6 +128,12 @@ func TestService(t *testing.T) { for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { + // Create test configuration + cfg := createTestConfig(tt.accessLevel, tt.additionalTools) + + // Create service with injected fake Proc factory so Initialize doesn't call the real az binary + service := NewService(cfg, WithAzCliProcFactory(func(timeout int) azcli.Proc { return &fakeProc{} })) + // Calculate expected kubectl tools count kubectlTools := kubectl.RegisterKubectlTools(tt.accessLevel) expectedKubectlCount := len(kubectlTools) @@ -142,11 +155,10 @@ func TestService(t *testing.T) { t.Logf("Expected total K8s tools: %d", expectedTotalK8sTools) t.Logf("Expected Azure tools: %d", tt.expectedAzureTools) - // Create test configuration - cfg := createTestConfig(tt.accessLevel, tt.additionalTools) - - // Create service - service := NewService(cfg) + // If service wasn't created above (some test paths), create it with the fake factory + if service == nil { + service = NewService(cfg, WithAzCliProcFactory(func(timeout int) azcli.Proc { return &fakeProc{} })) + } // Verify service was created properly if service == nil { //nolint:staticcheck // False positive: t.Fatal prevents nil dereference @@ -171,6 +183,18 @@ func TestService(t *testing.T) { } } +// fakeProc is a minimal Proc implementation for tests. +type fakeProc struct{} + +func (f *fakeProc) Run(cmd string) (string, error) { + // For probing account show, return a non-error id + if cmd == "account show --query id -o tsv" { + return "00000000-0000-0000-0000-000000000000", nil + } + // For any other command, return empty output and nil error to simulate success + return "", nil +} + // TestComponentToolCounts tests individual component tool registration counts func TestComponentToolCounts(t *testing.T) { t.Run("AzureComponents", func(t *testing.T) { @@ -250,11 +274,11 @@ func TestAKSOperationsAccess(t *testing.T) { // TestServiceInitialization tests basic service initialization func TestServiceInitialization(t *testing.T) { - // Set test environment variables - _ = os.Setenv("AZURE_TENANT_ID", "test-tenant") - _ = os.Setenv("AZURE_CLIENT_ID", "test-client") - _ = os.Setenv("AZURE_CLIENT_SECRET", "test-secret") - _ = os.Setenv("AZURE_SUBSCRIPTION_ID", "test-subscription") + // Set test environment variables (dummy values) + _ = os.Setenv("AZURE_TENANT_ID", "dummy-tenant-id") + _ = os.Setenv("AZURE_CLIENT_ID", "dummy-client-id") + _ = os.Setenv("AZURE_CLIENT_SECRET", "dummy-client-secret") + _ = os.Setenv("AZURE_SUBSCRIPTION_ID", "dummy-subscription-id") defer func() { _ = os.Unsetenv("AZURE_TENANT_ID") _ = os.Unsetenv("AZURE_CLIENT_ID") @@ -263,7 +287,7 @@ func TestServiceInitialization(t *testing.T) { }() cfg := createTestConfig("readonly", map[string]bool{}) - service := NewService(cfg) + service := NewService(cfg, WithAzCliProcFactory(func(timeout int) azcli.Proc { return &fakeProc{} })) // Test service creation - must be non-nil if service == nil { //nolint:staticcheck // False positive: t.Fatal prevents nil dereference @@ -347,11 +371,11 @@ func containsPrefix(s string, prefix string) bool { // BenchmarkServiceInitialization benchmarks service initialization func BenchmarkServiceInitialization(b *testing.B) { - // Set test environment variables - _ = os.Setenv("AZURE_TENANT_ID", "test-tenant") - _ = os.Setenv("AZURE_CLIENT_ID", "test-client") - _ = os.Setenv("AZURE_CLIENT_SECRET", "test-secret") - _ = os.Setenv("AZURE_SUBSCRIPTION_ID", "test-subscription") + // Set test environment variables for benchmark (dummy values) + _ = os.Setenv("AZURE_TENANT_ID", "dummy-tenant-id") + _ = os.Setenv("AZURE_CLIENT_ID", "dummy-client-id") + _ = os.Setenv("AZURE_CLIENT_SECRET", "dummy-client-secret") + _ = os.Setenv("AZURE_SUBSCRIPTION_ID", "dummy-subscription-id") defer func() { _ = os.Unsetenv("AZURE_TENANT_ID") _ = os.Unsetenv("AZURE_CLIENT_ID") @@ -363,10 +387,373 @@ func BenchmarkServiceInitialization(b *testing.B) { b.ResetTimer() for i := 0; i < b.N; i++ { - service := NewService(cfg) + service := NewService(cfg, WithAzCliProcFactory(func(timeout int) azcli.Proc { return &fakeProc{} })) err := service.Initialize() if err != nil { b.Fatalf("Initialize failed: %v", err) } } } + +// TestCreateCustomHTTPServerWithHelp404 tests the custom HTTP server creation for streamable-http transport +func TestCreateCustomHTTPServerWithHelp404(t *testing.T) { + cfg := createTestConfig("readonly", map[string]bool{}) + service := NewService(cfg) + err := service.Initialize() + if err != nil { + t.Fatalf("Failed to initialize service: %v", err) + } + + // Test server creation + addr := "localhost:8080" + customServer := service.createCustomHTTPServerWithHelp404(addr) + + if customServer == nil { + t.Fatal("Custom server should not be nil") + } + + if customServer.Addr != addr { + t.Errorf("Expected server address %s, got %s", addr, customServer.Addr) + } + + if customServer.Handler == nil { + t.Fatal("Custom server handler should not be nil") + } + + // Test the 404 response for non-MCP paths + testCases := []struct { + path string + method string + expectedStatusCode int + expectedContentType string + description string + }{ + {"/", "GET", http.StatusNotFound, "application/json", "root path should return helpful 404"}, + {"/invalid", "GET", http.StatusNotFound, "application/json", "invalid path should return helpful 404"}, + {"/api", "POST", http.StatusNotFound, "application/json", "non-MCP path should return helpful 404"}, + {"/health", "GET", http.StatusNotFound, "application/json", "health path should return helpful 404"}, + } + + for _, tc := range testCases { + t.Run(tc.description, func(t *testing.T) { + req, err := http.NewRequest(tc.method, tc.path, nil) + if err != nil { + t.Fatalf("Failed to create request: %v", err) + } + + rr := httptest.NewRecorder() + customServer.Handler.ServeHTTP(rr, req) + + if rr.Code != tc.expectedStatusCode { + t.Errorf("Expected status code %d, got %d", tc.expectedStatusCode, rr.Code) + } + + contentType := rr.Header().Get("Content-Type") + if contentType != tc.expectedContentType { + t.Errorf("Expected content type %s, got %s", tc.expectedContentType, contentType) + } + + // Parse and validate JSON response + var response map[string]interface{} + err = json.Unmarshal(rr.Body.Bytes(), &response) + if err != nil { + t.Fatalf("Failed to parse JSON response: %v", err) + } + + // Check required fields + if response["error"] != "Not Found" { + t.Errorf("Expected error 'Not Found', got %v", response["error"]) + } + + message, ok := response["message"].(string) + if !ok { + t.Fatal("Message field should be a string") + } + if !strings.Contains(message, "MCP") { + t.Error("Message should mention MCP") + } + if !strings.Contains(message, "/mcp") { + t.Error("Message should mention /mcp endpoint") + } + + endpoints, ok := response["endpoints"].(map[string]interface{}) + if !ok { + t.Fatal("Endpoints field should be a map") + } + + expectedEndpoints := []string{"initialize", "requests", "listen", "terminate"} + for _, endpoint := range expectedEndpoints { + if _, exists := endpoints[endpoint]; !exists { + t.Errorf("Expected endpoint %s not found in response", endpoint) + } + } + }) + } +} + +// TestCreateCustomSSEServerWithHelp404 tests the custom HTTP server creation for SSE transport +func TestCreateCustomSSEServerWithHelp404(t *testing.T) { + cfg := createTestConfig("readonly", map[string]bool{}) + service := NewService(cfg) + err := service.Initialize() + if err != nil { + t.Fatalf("Failed to initialize service: %v", err) + } + + // Create SSE server + sseServer := server.NewSSEServer(service.mcpServer) + + // Test custom server creation + addr := "localhost:8081" + customServer := service.createCustomSSEServerWithHelp404(sseServer, addr) + + if customServer == nil { + t.Fatal("Custom SSE server should not be nil") + } + + if customServer.Addr != addr { + t.Errorf("Expected server address %s, got %s", addr, customServer.Addr) + } + + if customServer.Handler == nil { + t.Fatal("Custom SSE server handler should not be nil") + } + + // Test the 404 response for non-SSE paths + testCases := []struct { + path string + method string + expectedStatusCode int + expectedContentType string + description string + }{ + {"/", "GET", http.StatusNotFound, "application/json", "root path should return helpful 404"}, + {"/invalid", "GET", http.StatusNotFound, "application/json", "invalid path should return helpful 404"}, + {"/api", "POST", http.StatusNotFound, "application/json", "non-SSE path should return helpful 404"}, + {"/health", "GET", http.StatusNotFound, "application/json", "health path should return helpful 404"}, + } + + for _, tc := range testCases { + t.Run(tc.description, func(t *testing.T) { + req, err := http.NewRequest(tc.method, tc.path, nil) + if err != nil { + t.Fatalf("Failed to create request: %v", err) + } + + rr := httptest.NewRecorder() + customServer.Handler.ServeHTTP(rr, req) + + if rr.Code != tc.expectedStatusCode { + t.Errorf("Expected status code %d, got %d", tc.expectedStatusCode, rr.Code) + } + + contentType := rr.Header().Get("Content-Type") + if contentType != tc.expectedContentType { + t.Errorf("Expected content type %s, got %s", tc.expectedContentType, contentType) + } + + // Parse and validate JSON response + var response map[string]interface{} + err = json.Unmarshal(rr.Body.Bytes(), &response) + if err != nil { + t.Fatalf("Failed to parse JSON response: %v", err) + } + + // Check required fields + if response["error"] != "Not Found" { + t.Errorf("Expected error 'Not Found', got %v", response["error"]) + } + + message, ok := response["message"].(string) + if !ok { + t.Fatal("Message field should be a string") + } + if !strings.Contains(message, "MCP") { + t.Error("Message should mention MCP") + } + if !strings.Contains(message, "SSE") { + t.Error("Message should mention SSE transport") + } + + endpoints, ok := response["endpoints"].(map[string]interface{}) + if !ok { + t.Fatal("Endpoints field should be a map") + } + + expectedEndpoints := []string{"sse", "message"} + for _, endpoint := range expectedEndpoints { + if _, exists := endpoints[endpoint]; !exists { + t.Errorf("Expected endpoint %s not found in response", endpoint) + } + } + + // Verify SSE-specific endpoint descriptions + sseEndpoint, ok := endpoints["sse"].(string) + if !ok { + t.Fatal("SSE endpoint description should be a string") + } + if !strings.Contains(sseEndpoint, "GET /sse") { + t.Error("SSE endpoint should mention GET /sse") + } + + messageEndpoint, ok := endpoints["message"].(string) + if !ok { + t.Fatal("Message endpoint description should be a string") + } + if !strings.Contains(messageEndpoint, "POST /message") { + t.Error("Message endpoint should mention POST /message") + } + }) + } +} + +// TestSSEServerEndpointsAccessible tests that SSE endpoints are still accessible +func TestSSEServerEndpointsAccessible(t *testing.T) { + cfg := createTestConfig("readonly", map[string]bool{}) + service := NewService(cfg) + err := service.Initialize() + if err != nil { + t.Fatalf("Failed to initialize service: %v", err) + } + + // Create SSE server + sseServer := server.NewSSEServer(service.mcpServer) + + // Test custom server creation + addr := "localhost:8082" + customServer := service.createCustomSSEServerWithHelp404(sseServer, addr) + + // Test that SSE endpoints are accessible (don't return our custom 404) + // Note: We only test that they don't return our custom 404 response, + // not the actual SSE functionality which would require persistent connections + testCases := []struct { + path string + method string + shouldBe404 bool + description string + }{ + {"/message", "POST", false, "Message endpoint should be handled by SSE server"}, + {"/message", "GET", false, "Message endpoint with GET should be handled by SSE server"}, + } + + for _, tc := range testCases { + t.Run(tc.description, func(t *testing.T) { + req, err := http.NewRequest(tc.method, tc.path, nil) + if err != nil { + t.Fatalf("Failed to create request: %v", err) + } + + rr := httptest.NewRecorder() + customServer.Handler.ServeHTTP(rr, req) + + if tc.shouldBe404 { + if rr.Code != http.StatusNotFound { + t.Errorf("Expected 404 for %s %s, got %d", tc.method, tc.path, rr.Code) + } + } else { + if rr.Code == http.StatusNotFound { + t.Errorf("Should not get 404 for %s %s, but got %d", tc.method, tc.path, rr.Code) + } + // Additional check: ensure it's not our custom 404 JSON response + if rr.Header().Get("Content-Type") == "application/json" { + var response map[string]interface{} + if json.Unmarshal(rr.Body.Bytes(), &response) == nil { + if response["error"] == "Not Found" && strings.Contains(response["message"].(string), "SSE transport") { + t.Errorf("Got our custom 404 response for %s %s, should be handled by SSE server", tc.method, tc.path) + } + } + } + // Note: We don't check for specific success codes since the SSE server + // may return various codes based on the request content/headers + } + }) + } +} + +// TestJSONResponseFormat tests the format of JSON error responses +func TestJSONResponseFormat(t *testing.T) { + cfg := createTestConfig("readonly", map[string]bool{}) + service := NewService(cfg) + err := service.Initialize() + if err != nil { + t.Fatalf("Failed to initialize service: %v", err) + } + + tests := []struct { + name string + serverFunc func() *http.Server + expectedKeys []string + transportType string + }{ + { + name: "StreamableHTTP_JSONFormat", + serverFunc: func() *http.Server { + return service.createCustomHTTPServerWithHelp404("localhost:8080") + }, + expectedKeys: []string{"error", "message", "endpoints"}, + transportType: "streamable-http", + }, + { + name: "SSE_JSONFormat", + serverFunc: func() *http.Server { + sseServer := server.NewSSEServer(service.mcpServer) + return service.createCustomSSEServerWithHelp404(sseServer, "localhost:8081") + }, + expectedKeys: []string{"error", "message", "endpoints"}, + transportType: "sse", + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + customServer := tt.serverFunc() + + req, err := http.NewRequest("GET", "/invalid", nil) + if err != nil { + t.Fatalf("Failed to create request: %v", err) + } + + rr := httptest.NewRecorder() + customServer.Handler.ServeHTTP(rr, req) + + // Verify it's valid JSON + var response map[string]interface{} + err = json.Unmarshal(rr.Body.Bytes(), &response) + if err != nil { + t.Fatalf("Response should be valid JSON: %v", err) + } + + // Verify all expected keys are present + for _, key := range tt.expectedKeys { + if _, exists := response[key]; !exists { + t.Errorf("Expected key '%s' not found in response", key) + } + } + + // Verify error field value + if response["error"] != "Not Found" { + t.Errorf("Expected error field to be 'Not Found', got %v", response["error"]) + } + + // Verify message is informative + message, ok := response["message"].(string) + if !ok { + t.Fatal("Message should be a string") + } + if len(message) < 20 { + t.Error("Message should be informative (at least 20 characters)") + } + + // Verify endpoints structure + endpoints, ok := response["endpoints"].(map[string]interface{}) + if !ok { + t.Fatal("Endpoints should be a map") + } + if len(endpoints) == 0 { + t.Error("Endpoints map should not be empty") + } + + t.Logf("Verified JSON format for %s transport", tt.transportType) + }) + } +} diff --git a/internal/version/version.go b/internal/version/version.go index 2768557..746e805 100644 --- a/internal/version/version.go +++ b/internal/version/version.go @@ -8,7 +8,7 @@ import ( // Version information var ( // GitVersion is the git tag version - GitVersion = "1" + GitVersion = "dev" // BuildMetadata is extra build time data BuildMetadata = "" // GitCommit is the git sha1