Skip to content

Commit d3fbd57

Browse files
author
Fly.io CI
committed
Add Sentry
1 parent b782594 commit d3fbd57

File tree

8 files changed

+214
-21
lines changed

8 files changed

+214
-21
lines changed

cmd/postgres-operator/main.go

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,7 @@ import (
4747
"github.com/percona/percona-postgresql-operator/internal/initialize"
4848
"github.com/percona/percona-postgresql-operator/internal/logging"
4949
"github.com/percona/percona-postgresql-operator/internal/naming"
50+
"github.com/percona/percona-postgresql-operator/internal/sentry"
5051
"github.com/percona/percona-postgresql-operator/internal/upgradecheck"
5152
perconaController "github.com/percona/percona-postgresql-operator/percona/controller"
5253
"github.com/percona/percona-postgresql-operator/percona/controller/pgbackup"
@@ -74,6 +75,13 @@ func main() {
7475
assertNoError(err)
7576
defer otelFlush()
7677

78+
// Initialize Sentry if DSN is provided
79+
if dsn := os.Getenv("SENTRY_DSN"); dsn != "" {
80+
err := sentry.Initialize(dsn)
81+
assertNoError(err)
82+
defer sentry.Flush()
83+
}
84+
7785
opts := zap.Options{
7886
Encoder: getLogEncoder(),
7987
Level: getLogLevel(),

config/manager/default/manager.yaml

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,18 @@ spec:
3535
value: INFO
3636
- name: DISABLE_TELEMETRY
3737
value: "false"
38+
- name: SENTRY_DSN
39+
valueFrom:
40+
secretKeyRef:
41+
name: percona-postgresql-operator-sentry
42+
key: dsn
43+
optional: true
44+
- name: SENTRY_ENVIRONMENT
45+
valueFrom:
46+
fieldRef:
47+
fieldPath: metadata.namespace
48+
- name: SENTRY_DEBUG
49+
value: "false"
3850
ports:
3951
- containerPort: 8080
4052
name: metrics
Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
apiVersion: v1
2+
kind: Secret
3+
metadata:
4+
name: percona-postgresql-operator-sentry
5+
namespace: default
6+
type: Opaque
7+
stringData:
8+
dsn: "" # Replace with your Sentry DSN

docs/content/monitoring.md

Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,47 @@
1+
# Error Reporting with Sentry
2+
3+
The Percona PostgreSQL Operator supports error reporting through Sentry. This helps track and monitor errors that occur during the operation of your PostgreSQL clusters.
4+
5+
## Configuration
6+
7+
To enable Sentry error reporting:
8+
9+
1. Create a Sentry project and obtain your DSN (Data Source Name)
10+
11+
2. Create a secret containing your Sentry DSN:
12+
```bash
13+
kubectl create secret generic percona-postgresql-operator-sentry \
14+
--from-literal=dsn=your-sentry-dsn
15+
```
16+
17+
3. The operator deployment automatically picks up the Sentry configuration from the secret.
18+
19+
## Environment Variables
20+
21+
The following environment variables can be configured:
22+
23+
- `SENTRY_DSN`: The Sentry DSN (configured via secret)
24+
- `SENTRY_ENVIRONMENT`: The environment name (defaults to namespace name)
25+
- `SENTRY_DEBUG`: Enable debug mode for Sentry (default: "false")
26+
27+
## Error Tracking
28+
29+
The operator reports various types of errors to Sentry:
30+
31+
- Reconciliation errors
32+
- Unexpected errors during cluster operations
33+
- Panics (which are captured and reported before re-panicking)
34+
35+
Each error report includes relevant context such as:
36+
- Namespace and name of the affected PostgreSQL cluster
37+
- Operation being performed
38+
- Controller name
39+
- Additional error context
40+
41+
## Disabling Error Reporting
42+
43+
To disable Sentry error reporting, simply delete the Sentry secret:
44+
45+
```bash
46+
kubectl delete secret percona-postgresql-operator-sentry
47+
```

go.mod

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@ go 1.22.6
55
require (
66
github.com/Percona-Lab/percona-version-service v0.0.0-20230404081016-ea25e30cdcbc
77
github.com/evanphx/json-patch/v5 v5.9.0
8+
github.com/getsentry/sentry-go v0.27.0
89
github.com/go-logr/logr v1.4.2
910
github.com/go-openapi/errors v0.22.0
1011
github.com/go-openapi/runtime v0.28.0

go.sum

Lines changed: 10 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,10 @@ github.com/fsnotify/fsnotify v1.7.0 h1:8JEhPFa5W2WU7YfeZzPNqzMP6Lwt7L2715Ggo0nos
4444
github.com/fsnotify/fsnotify v1.7.0/go.mod h1:40Bi/Hjc2AVfZrqy+aj+yEI+/bRxZnMJyTJwOpGvigM=
4545
github.com/fxamacker/cbor/v2 v2.7.0 h1:iM5WgngdRBanHcxugY4JySA0nk1wZorNOpTgCMedv5E=
4646
github.com/fxamacker/cbor/v2 v2.7.0/go.mod h1:pxXPTn3joSm21Gbwsv0w9OSA2y1HFR9qXEeXQVeNoDQ=
47+
github.com/getsentry/sentry-go v0.27.0 h1:Pv98CIbtB3LkMWmXi4Joa5OOcwbmnX88sF5qbK3r3Ps=
48+
github.com/getsentry/sentry-go v0.27.0/go.mod h1:lc76E2QywIyW8WuBnwl8Lc4bkmQH4+w1gwTf25trprY=
49+
github.com/go-errors/errors v1.4.2 h1:J6MZopCL4uSllY1OfXM374weqZFFItUbrImctkmUxIA=
50+
github.com/go-errors/errors v1.4.2/go.mod h1:sIVyrIiJhuEF+Pj9Ebtd6P/rEYROXFi3BopGUQ5a5Og=
4751
github.com/go-kit/log v0.1.0/go.mod h1:zbhenjAZHb184qTLMA9ZjW7ThYL0H2mk7Q6pNt4vbaY=
4852
github.com/go-logfmt/logfmt v0.5.0/go.mod h1:wCYkCAKZfumFQihp8CzCvQ3paCTfi41vtzG1KdI/P7A=
4953
github.com/go-logr/logr v1.2.2/go.mod h1:jdQByPbusPIv2/zmleS9BjJVeZ6kBagPoEUsqbVz/1A=
@@ -136,10 +140,10 @@ github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY=
136140
github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE=
137141
github.com/mailru/easyjson v0.7.7 h1:UGYAvKxe3sBsEDzO8ZeWOSlIQfWFlxbzLZe7hwFURr0=
138142
github.com/mailru/easyjson v0.7.7/go.mod h1:xzfreul335JAWq5oZzymOObrkdz5UnU4kGfJJLY9Nlc=
139-
github.com/mattn/go-colorable v0.1.8 h1:c1ghPdyEDarC70ftn0y+A/Ee++9zz8ljHG1b13eJ0s8=
140-
github.com/mattn/go-colorable v0.1.8/go.mod h1:u6P/XSegPjTcexA+o6vUJrdnUu04hMope9wVRipJSqc=
141-
github.com/mattn/go-isatty v0.0.14 h1:yVuAays6BHfxijgZPzw+3Zlu5yQgKGP2/hcQbHb7S9Y=
142-
github.com/mattn/go-isatty v0.0.14/go.mod h1:7GGIvUiUoEMVVmxf/4nioHXj79iQHKdU27kJ6hsGG94=
143+
github.com/mattn/go-colorable v0.1.13 h1:fFA4WZxdEF4tXPZVKMLwD8oUnCTTo08duU7wxecdEvA=
144+
github.com/mattn/go-colorable v0.1.13/go.mod h1:7S9/ev0klgBDR4GtXTXX8a3vIGJpMovkB8vQcUbaXHg=
145+
github.com/mattn/go-isatty v0.0.17 h1:BTarxUcIeDqL27Mc+vyvdWYSL28zpIhv3RoTdsLMPng=
146+
github.com/mattn/go-isatty v0.0.17/go.mod h1:kYGgaQfpe5nmfYZH+SKPsOc2e4SrIfOl2e/yFXSvRLM=
143147
github.com/mitchellh/mapstructure v1.5.0 h1:jeMsZIYE/09sWLaz43PL7Gy6RuMjD2eJVyuac5Z2hdY=
144148
github.com/mitchellh/mapstructure v1.5.0/go.mod h1:bFUtVrKA4DC2yAKiSyO/QUcy7e+RRV2QTWOzhPopBRo=
145149
github.com/moby/spdystream v0.4.0 h1:Vy79D6mHeJJjiPdFEL2yku1kl0chZpJfZcPpb16BRl8=
@@ -168,6 +172,8 @@ github.com/opentracing/opentracing-go v1.2.0 h1:uEJPy/1a5RIPAJ0Ov+OIO8OxWu77jEv+
168172
github.com/opentracing/opentracing-go v1.2.0/go.mod h1:GxEUsuufX4nBwe+T+Wl9TAgYrxe9dPLANfrWvHYVTgc=
169173
github.com/pganalyze/pg_query_go/v5 v5.1.0 h1:MlxQqHZnvA3cbRQYyIrjxEjzo560P6MyTgtlaf3pmXg=
170174
github.com/pganalyze/pg_query_go/v5 v5.1.0/go.mod h1:FsglvxidZsVN+Ltw3Ai6nTgPVcK2BPukH3jCDEqc1Ug=
175+
github.com/pingcap/errors v0.11.4 h1:lFuQV/oaUMGcD2tqt+01ROSmJs75VG1ToEOkZIZ4nE4=
176+
github.com/pingcap/errors v0.11.4/go.mod h1:Oi8TUi2kEtXXLMJk9l1cGmz20kV3TaQ0usTwv5KuLY8=
171177
github.com/pkg/errors v0.8.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0=
172178
github.com/pkg/errors v0.9.1 h1:FEBLx1zS214owpjy7qsBeixbURkuhQAwrK5UwLGTwt4=
173179
github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0=

internal/controller/postgrescluster/controller.go

Lines changed: 39 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,7 @@ import (
3939
"sigs.k8s.io/controller-runtime/pkg/manager"
4040
"sigs.k8s.io/controller-runtime/pkg/reconcile"
4141

42+
sentrygo "github.com/getsentry/sentry-go"
4243
"github.com/percona/percona-postgresql-operator/internal/config"
4344
"github.com/percona/percona-postgresql-operator/internal/controller/runtime"
4445
"github.com/percona/percona-postgresql-operator/internal/initialize"
@@ -84,19 +85,35 @@ func (r *Reconciler) Reconcile(
8485
ctx context.Context, request reconcile.Request,
8586
) (reconcile.Result, error) {
8687
ctx, span := r.Tracer.Start(ctx, "Reconcile")
87-
log := logging.FromContext(ctx)
8888
defer span.End()
8989

90+
// Add Sentry context
91+
hub := sentrygo.GetHubFromContext(ctx)
92+
if hub == nil {
93+
hub = sentrygo.CurrentHub().Clone()
94+
}
95+
hub.ConfigureScope(func(scope *sentrygo.Scope) {
96+
scope.SetTag("namespace", request.Namespace)
97+
scope.SetTag("name", request.Name)
98+
})
99+
ctx = sentrygo.SetHubOnContext(ctx, hub)
100+
defer func() {
101+
if err := recover(); err != nil {
102+
hub.Recover(err)
103+
panic(err)
104+
}
105+
}()
106+
107+
log := logging.FromContext(ctx)
108+
90109
// get the postgrescluster from the cache
91110
cluster := &v1beta1.PostgresCluster{}
92-
if err := r.Client.Get(ctx, request.NamespacedName, cluster); err != nil {
93-
// NotFound cannot be fixed by requeuing so ignore it. During background
94-
// deletion, we receive delete events from cluster's dependents after
95-
// cluster is deleted.
96-
if err = client.IgnoreNotFound(err); err != nil {
97-
log.Error(err, "unable to fetch PostgresCluster")
98-
span.RecordError(err)
99-
}
111+
err := r.Client.Get(ctx, request.NamespacedName, cluster)
112+
if err = client.IgnoreNotFound(err); err != nil {
113+
log.Error(err, "unable to fetch PostgresCluster")
114+
span.RecordError(err)
115+
// Record unexpected errors
116+
hub.CaptureException(err)
100117
return runtime.ErrorWithBackoff(err)
101118
}
102119

@@ -116,8 +133,7 @@ func (r *Reconciler) Reconcile(
116133

117134
// Keep a copy of cluster prior to any manipulations.
118135
before := cluster.DeepCopy()
119-
var err error
120-
result := reconcile.Result{}
136+
var result reconcile.Result
121137
defer func() {
122138
if !equality.Semantic.DeepEqual(before.Status, cluster.Status) {
123139
statusErr := r.Client.Status().Patch(ctx, cluster, client.MergeFrom(before), r.Owner)
@@ -141,7 +157,7 @@ func (r *Reconciler) Reconcile(
141157
if deleteResult, deleteErr := r.handleDelete(ctx, cluster); deleteErr != nil {
142158
span.RecordError(deleteErr)
143159
log.Error(deleteErr, "deleting")
144-
return runtime.ErrorWithBackoff(deleteErr)
160+
return reconcile.Result{}, errors.Join(err, deleteErr)
145161

146162
} else if deleteResult != nil {
147163
if log := log.V(1); log.Enabled() {
@@ -162,7 +178,7 @@ func (r *Reconciler) Reconcile(
162178
// specifically allow reconciliation if the cluster is shutdown to
163179
// facilitate upgrades, otherwise return
164180
if !initialize.FromPointer(cluster.Spec.Shutdown) {
165-
return runtime.ErrorWithBackoff(err)
181+
return reconcile.Result{}, err
166182
}
167183
}
168184

@@ -176,7 +192,7 @@ func (r *Reconciler) Reconcile(
176192
path := field.NewPath("spec", "standby")
177193
err := field.Invalid(path, cluster.Name, "Standby requires a host or repoName to be enabled")
178194
r.Recorder.Event(cluster, corev1.EventTypeWarning, "InvalidStandbyConfiguration", err.Error())
179-
return runtime.ErrorWithBackoff(err)
195+
return reconcile.Result{}, err
180196
}
181197

182198
var (
@@ -226,7 +242,7 @@ func (r *Reconciler) Reconcile(
226242

227243
ObservedGeneration: cluster.GetGeneration(),
228244
})
229-
return runtime.ErrorWithBackoff(patchClusterStatus())
245+
return reconcile.Result{}, errors.Join(err, patchClusterStatus())
230246
} else {
231247
meta.RemoveStatusCondition(&cluster.Status.Conditions, v1beta1.PostgresClusterProgressing)
232248
}
@@ -269,7 +285,7 @@ func (r *Reconciler) Reconcile(
269285
returnEarly, err := r.reconcileDirMoveJobs(ctx, cluster)
270286
if err != nil || returnEarly {
271287
log.V(1).Info("waiting for directory move jobs", "cluster", cluster.Name, "error", err)
272-
return runtime.ErrorWithBackoff(errors.Join(err, patchClusterStatus()))
288+
return reconcile.Result{}, errors.Join(err, patchClusterStatus())
273289
}
274290
}
275291
if err == nil {
@@ -325,7 +341,7 @@ func (r *Reconciler) Reconcile(
325341
returnEarly, err := r.reconcileDataSource(ctx, cluster, instances, clusterVolumes, rootCA)
326342
if err != nil || returnEarly {
327343
log.V(1).Info("waiting for data source initialization", "cluster", cluster.Name, "error", err)
328-
return runtime.ErrorWithBackoff(errors.Join(err, patchClusterStatus()))
344+
return reconcile.Result{}, errors.Join(err, patchClusterStatus())
329345
}
330346
}
331347
if err == nil {
@@ -431,6 +447,12 @@ func (r *Reconciler) Reconcile(
431447
"requeue", result.Requeue,
432448
"requeueAfter", result.RequeueAfter)
433449

450+
// If we get here, reconciliation succeeded.
451+
// Record any errors that occurred during reconciliation.
452+
if err != nil {
453+
hub.CaptureException(err)
454+
}
455+
434456
return result, errors.Join(err, patchClusterStatus())
435457
}
436458

internal/sentry/sentry.go

Lines changed: 89 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,89 @@
1+
package sentry
2+
3+
import (
4+
"context"
5+
"os"
6+
"time"
7+
8+
"github.com/getsentry/sentry-go"
9+
"github.com/pkg/errors"
10+
"k8s.io/apimachinery/pkg/types"
11+
12+
"github.com/percona/percona-postgresql-operator/internal/logging"
13+
)
14+
15+
const (
16+
// DefaultTimeout is the default timeout for flushing events to Sentry
17+
DefaultTimeout = 2 * time.Second
18+
)
19+
20+
// Initialize sets up the Sentry client with the provided DSN
21+
func Initialize(dsn string) error {
22+
if dsn == "" {
23+
return nil // Sentry is disabled if no DSN is provided
24+
}
25+
26+
err := sentry.Init(sentry.ClientOptions{
27+
Dsn: dsn,
28+
EnableTracing: true,
29+
TracesSampleRate: 1.0,
30+
Debug: os.Getenv("SENTRY_DEBUG") == "true",
31+
Environment: os.Getenv("SENTRY_ENVIRONMENT"),
32+
})
33+
34+
if err != nil {
35+
return errors.Wrap(err, "failed to initialize Sentry")
36+
}
37+
38+
return nil
39+
}
40+
41+
// CaptureError reports an error to Sentry with additional context
42+
func CaptureError(ctx context.Context, err error, tags map[string]string) {
43+
if err == nil {
44+
return
45+
}
46+
47+
log := logging.FromContext(ctx)
48+
49+
hub := sentry.GetHubFromContext(ctx)
50+
if hub == nil {
51+
hub = sentry.CurrentHub()
52+
}
53+
54+
// Add any context from the logger
55+
if len(tags) > 0 {
56+
hub.ConfigureScope(func(scope *sentry.Scope) {
57+
for k, v := range tags {
58+
scope.SetTag(k, v)
59+
}
60+
})
61+
}
62+
63+
hub.CaptureException(err)
64+
log.Error(err, "error captured by Sentry")
65+
}
66+
67+
// RecoverPanic captures panics and reports them to Sentry
68+
func RecoverPanic() {
69+
if err := recover(); err != nil {
70+
sentry.CurrentHub().Recover(err)
71+
sentry.Flush(DefaultTimeout)
72+
panic(err) // Re-panic after reporting
73+
}
74+
}
75+
76+
// WithContext returns a new context with Sentry hub
77+
func WithContext(ctx context.Context, namespacedName types.NamespacedName) context.Context {
78+
hub := sentry.CurrentHub().Clone()
79+
hub.ConfigureScope(func(scope *sentry.Scope) {
80+
scope.SetTag("namespace", namespacedName.Namespace)
81+
scope.SetTag("name", namespacedName.Name)
82+
})
83+
return sentry.SetHubOnContext(ctx, hub)
84+
}
85+
86+
// Flush ensures all queued events are sent to Sentry
87+
func Flush() {
88+
sentry.Flush(DefaultTimeout)
89+
}

0 commit comments

Comments
 (0)