Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Next Next commit
for mulitple diags and azure-specific logs
  • Loading branch information
gossion committed Jul 25, 2025
commit ff6d6fd6a162af0d41fd4a80a870c5ca3a2cb59f
15 changes: 11 additions & 4 deletions internal/components/monitor/diagnostics/handlers.go
Original file line number Diff line number Diff line change
Expand Up @@ -69,17 +69,24 @@ func HandleControlPlaneLogs(params map[string]interface{}, cfg *config.ConfigDat
return "", err
}

// Get workspace GUID from diagnostic settings
workspaceGUID, err := ExtractWorkspaceGUIDFromDiagnosticSettings(subscriptionID, resourceGroup, clusterName, cfg)
// Find the diagnostic setting that has the requested log category enabled
// This handles cases where multiple diagnostic settings exist for the same cluster
workspaceResourceID, isResourceSpecific, err := FindDiagnosticSettingForCategory(subscriptionID, resourceGroup, clusterName, logCategory, cfg)
if err != nil {
return "", fmt.Errorf("failed to find diagnostic setting for log category %s in cluster %s: %w", logCategory, clusterName, err)
}

// Get workspace GUID from the workspace resource ID
workspaceGUID, err := getWorkspaceGUID(workspaceResourceID, cfg)
if err != nil {
return "", fmt.Errorf("failed to get workspace GUID for cluster %s: %w", clusterName, err)
}

// Build cluster resource ID for scoping using utility function
clusterResourceID := buildClusterResourceID(subscriptionID, resourceGroup, clusterName)

// Build safe KQL query scoped to this specific AKS cluster
kqlQuery := BuildSafeKQLQuery(logCategory, logLevel, maxRecords, clusterResourceID)
// Build safe KQL query scoped to this specific AKS cluster with appropriate table mode
kqlQuery := BuildSafeKQLQuery(logCategory, logLevel, maxRecords, clusterResourceID, isResourceSpecific)

// Calculate timespan for the query
timespan, err := CalculateTimespan(startTime, endTime)
Expand Down
86 changes: 75 additions & 11 deletions internal/components/monitor/diagnostics/kql.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,31 +19,95 @@ var auditCategories = map[string]bool{
"kube-audit-admin": true,
}

// Resource-specific table mappings for AKS log categories
var resourceSpecificTables = map[string]string{
"kube-audit": "AKSAudit",
"kube-audit-admin": "AKSAuditAdmin",
"kube-apiserver": "AKSControlPlane",
"kube-controller-manager": "AKSControlPlane",
"kube-scheduler": "AKSControlPlane",
"cluster-autoscaler": "AKSControlPlane",
"cloud-controller-manager": "AKSControlPlane",
"guard": "AKSControlPlane",
"csi-azuredisk-controller": "AKSControlPlane",
"csi-azurefile-controller": "AKSControlPlane",
"csi-snapshot-controller": "AKSControlPlane",
}

// isAuditCategory checks if the given category is an audit log category
func isAuditCategory(category string) bool {
return auditCategories[category]
}

// BuildSafeKQLQuery builds pre-validated KQL queries to prevent injection, scoped to specific AKS cluster
func BuildSafeKQLQuery(category, logLevel string, maxRecords int, clusterResourceID string) string {
// Convert resource ID to uppercase as it's stored in uppercase in Log Analytics
upperResourceID := strings.ToUpper(clusterResourceID)
baseQuery := fmt.Sprintf("AzureDiagnostics | where Category == '%s' and ResourceId == '%s'", category, upperResourceID)
// Supports both Azure Diagnostics and Resource-specific destination tables
func BuildSafeKQLQuery(category, logLevel string, maxRecords int, clusterResourceID string, isResourceSpecific bool) string {
var baseQuery string

if isResourceSpecific {
// Use resource-specific table
if tableName, exists := resourceSpecificTables[category]; exists {
// For resource-specific tables, _ResourceId is stored in lowercase
// Convert the resource ID to lowercase to match Azure's storage format
lowerResourceID := strings.ToLower(clusterResourceID)
baseQuery = fmt.Sprintf("%s | where _ResourceId == '%s'", tableName, lowerResourceID)
} else {
// Fallback to Azure Diagnostics table if no resource-specific mapping found
// Azure Diagnostics uses uppercase ResourceId
upperResourceID := strings.ToUpper(clusterResourceID)
baseQuery = fmt.Sprintf("AzureDiagnostics | where Category == '%s' and ResourceId == '%s'", category, upperResourceID)
}
} else {
// Use Azure Diagnostics table (legacy mode)
// Azure Diagnostics ResourceId field is stored in uppercase
upperResourceID := strings.ToUpper(clusterResourceID)
baseQuery = fmt.Sprintf("AzureDiagnostics | where Category == '%s' and ResourceId == '%s'", category, upperResourceID)
}

// Add log level filtering for non-audit logs
if logLevel != "" && !isAuditCategory(category) {
// For Kubernetes component logs (not audit), use the log_s prefix pattern
// Kubernetes logs use format like "I0715" (Info), "W0715" (Warning), "E0715" (Error)
// Audit logs don't follow this pattern, so we skip log level filtering for them
if prefix, exists := logLevelPrefixes[strings.ToLower(logLevel)]; exists {
baseQuery += fmt.Sprintf(" | where log_s startswith '%s'", prefix)
if isResourceSpecific {
// In resource-specific tables, log level is stored in the Level field as "INFO", "WARNING", "ERROR"
// Convert the requested log level to the format used in resource-specific tables
switch strings.ToLower(logLevel) {
case "info":
baseQuery += " | where Level == 'INFO'"
case "warning":
baseQuery += " | where Level == 'WARNING'"
case "error":
baseQuery += " | where Level == 'ERROR'"
}
} else {
// For Azure Diagnostics, use the log_s prefix pattern
// Kubernetes logs use format like "I0715" (Info), "W0715" (Warning), "E0715" (Error)
if prefix, exists := logLevelPrefixes[strings.ToLower(logLevel)]; exists {
baseQuery += fmt.Sprintf(" | where log_s startswith '%s'", prefix)
}
}
}

baseQuery += " | order by TimeGenerated desc"
baseQuery += fmt.Sprintf(" | limit %d", maxRecords)

// Project only essential fields: log content, timestamp, and level
baseQuery += " | project TimeGenerated, Level, log_s"
// Project essential fields - adjust based on table type
if isResourceSpecific {
// Resource-specific tables have different field names based on the table type
if tableName, exists := resourceSpecificTables[category]; exists {
if tableName == "AKSAudit" || tableName == "AKSAuditAdmin" {
// Audit tables have structured fields, no single message field
baseQuery += " | project TimeGenerated, Level, AuditId, Stage, RequestUri, Verb, User"
} else {
// AKSControlPlane table has Message field
baseQuery += " | project TimeGenerated, Category, Level, Message, PodName"
}
} else {
// Fallback projection for unknown resource-specific tables
baseQuery += " | project TimeGenerated, Level, Message"
}
} else {
// Azure Diagnostics table fields
baseQuery += " | project TimeGenerated, Level, log_s"
}

return baseQuery
}
Expand Down
Loading