Skip to content

Commit 895d4c1

Browse files
committed
Merge branch '486-se-soft-failure' into 'master'
feat: introduce soft limits for billing errors (#486) Closes #486 See merge request postgres-ai/database-lab!718
2 parents 34cdb10 + 55710ab commit 895d4c1

File tree

10 files changed

+175
-105
lines changed

10 files changed

+175
-105
lines changed

engine/cmd/database-lab/main.go

+9-5
Original file line numberDiff line numberDiff line change
@@ -183,6 +183,7 @@ func main() {
183183
ctx,
184184
engProps,
185185
provisioner,
186+
billingSvc,
186187
retrievalSvc,
187188
pm,
188189
cloningSvc,
@@ -198,7 +199,7 @@ func main() {
198199
billingSvc, obs, pm, tm, tokenHolder, logFilter, embeddedUI, reloadConfigFn)
199200
shutdownCh := setShutdownListener()
200201

201-
go setReloadListener(ctx, engProps, provisioner,
202+
go setReloadListener(ctx, engProps, provisioner, billingSvc,
202203
retrievalSvc, pm, cloningSvc, platformSvc,
203204
embeddedUI, server,
204205
logCleaner, logFilter)
@@ -282,7 +283,7 @@ func getEngineProperties(ctx context.Context, docker *client.Client, cfg *config
282283
return engProps, nil
283284
}
284285

285-
func reloadConfig(ctx context.Context, engProp global.EngineProps, provisionSvc *provision.Provisioner,
286+
func reloadConfig(ctx context.Context, engProp global.EngineProps, provisionSvc *provision.Provisioner, billingSvc *billing.Billing,
286287
retrievalSvc *retrieval.Retrieval, pm *pool.Manager, cloningSvc *cloning.Base, platformSvc *platform.Service,
287288
embeddedUI *embeddedui.UIManager, server *srv.Server, cleaner *diagnostic.Cleaner, filtering *log.Filtering) error {
288289
cfg, err := config.LoadConfiguration()
@@ -328,12 +329,13 @@ func reloadConfig(ctx context.Context, engProp global.EngineProps, provisionSvc
328329
retrievalSvc.Reload(ctx, newRetrievalConfig)
329330
cloningSvc.Reload(cfg.Cloning)
330331
platformSvc.Reload(newPlatformSvc)
332+
billingSvc.Reload(newPlatformSvc.Client)
331333
server.Reload(cfg.Server)
332334

333335
return nil
334336
}
335337

336-
func setReloadListener(ctx context.Context, engProp global.EngineProps, provisionSvc *provision.Provisioner,
338+
func setReloadListener(ctx context.Context, engProp global.EngineProps, provisionSvc *provision.Provisioner, billingSvc *billing.Billing,
337339
retrievalSvc *retrieval.Retrieval, pm *pool.Manager, cloningSvc *cloning.Base, platformSvc *platform.Service,
338340
embeddedUI *embeddedui.UIManager, server *srv.Server, cleaner *diagnostic.Cleaner, logFilter *log.Filtering) {
339341
reloadCh := make(chan os.Signal, 1)
@@ -343,12 +345,14 @@ func setReloadListener(ctx context.Context, engProp global.EngineProps, provisio
343345
log.Msg("Reloading configuration")
344346

345347
if err := reloadConfig(ctx, engProp,
346-
provisionSvc, retrievalSvc,
348+
provisionSvc, billingSvc, retrievalSvc,
347349
pm, cloningSvc,
348350
platformSvc,
349351
embeddedUI, server,
350352
cleaner, logFilter); err != nil {
351-
log.Err("Failed to reload configuration", err)
353+
log.Err("Failed to reload configuration:", err)
354+
355+
continue
352356
}
353357

354358
log.Msg("Configuration has been reloaded")

engine/internal/billing/billing.go

+87-20
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@ import (
66
"errors"
77
"fmt"
88
"runtime"
9+
"sync"
910
"time"
1011

1112
"github.com/pbnjay/memory"
@@ -17,16 +18,56 @@ import (
1718
"gitlab.com/postgres-ai/database-lab/v3/pkg/models"
1819
)
1920

21+
const errorsSoftLimit = 2
22+
2023
// Billing manages the billing data.
2124
type Billing struct {
22-
platform *platform.Client
23-
props *global.EngineProps
24-
pm *pool.Manager
25+
platform *platform.Client
26+
props *global.EngineProps
27+
pm *pool.Manager
28+
mu *sync.Mutex
29+
softFails int
2530
}
2631

2732
// New creates a new Billing struct.
2833
func New(platform *platform.Client, props *global.EngineProps, pm *pool.Manager) *Billing {
29-
return &Billing{platform: platform, props: props, pm: pm}
34+
return &Billing{platform: platform, props: props, pm: pm, mu: &sync.Mutex{}}
35+
}
36+
37+
// Reload updates platform client.
38+
func (b *Billing) Reload(platformSvc *platform.Client) {
39+
b.platform = platformSvc
40+
}
41+
42+
func (b *Billing) increaseFailureCounter() int {
43+
b.mu.Lock()
44+
defer b.mu.Unlock()
45+
46+
b.softFails++
47+
48+
return b.softFails
49+
}
50+
51+
func (b *Billing) softLimitCounter() int {
52+
b.mu.Lock()
53+
defer b.mu.Unlock()
54+
55+
return b.softFails
56+
}
57+
58+
func (b *Billing) isSoftLimitExceeded() bool {
59+
b.mu.Lock()
60+
defer b.mu.Unlock()
61+
62+
return b.softFails > errorsSoftLimit
63+
}
64+
65+
func (b *Billing) resetSoftFailureCounter() {
66+
b.mu.Lock()
67+
68+
b.softFails = 0
69+
70+
b.mu.Unlock()
3071
}
3172

3273
// RegisterInstance registers instance on the Platform.
@@ -44,14 +85,9 @@ func (b *Billing) RegisterInstance(ctx context.Context, systemMetrics models.Sys
4485
return fmt.Errorf("cannot register instance: %w", err)
4586
}
4687

47-
if _, err := b.platform.SendUsage(ctx, b.props, platform.InstanceUsage{
48-
InstanceID: b.props.InstanceID,
49-
EventData: platform.DataUsage{
50-
CPU: systemMetrics.CPU,
51-
TotalMemory: systemMetrics.TotalMemory,
52-
DataSize: systemMetrics.DataUsed,
53-
}}); err != nil {
54-
return fmt.Errorf("cannot send the initial usage event: %w", err)
88+
// To check billing state immediately.
89+
if err := b.SendUsage(ctx, systemMetrics); err != nil {
90+
return err
5591
}
5692

5793
return nil
@@ -74,20 +110,51 @@ func (b *Billing) CollectUsage(ctx context.Context, system models.System) {
74110
case <-ticker.C:
75111
poolStat := b.pm.CollectPoolStat()
76112

77-
if _, err := b.platform.SendUsage(ctx, b.props, platform.InstanceUsage{
78-
InstanceID: b.props.InstanceID,
79-
EventData: platform.DataUsage{
80-
CPU: system.CPU,
81-
TotalMemory: system.TotalMemory,
82-
DataSize: poolStat.TotalUsed,
83-
},
113+
if err := b.SendUsage(ctx, models.System{
114+
CPU: system.CPU,
115+
TotalMemory: system.TotalMemory,
116+
DataUsed: poolStat.TotalUsed,
84117
}); err != nil {
85-
log.Err("failed to send usage event:", err)
118+
log.Err("collecting usage:", err)
86119
}
87120
}
88121
}
89122
}
90123

124+
// SendUsage sends usage events.
125+
func (b *Billing) SendUsage(ctx context.Context, systemMetrics models.System) error {
126+
respData, err := b.platform.SendUsage(ctx, b.props, platform.InstanceUsage{
127+
InstanceID: b.props.InstanceID,
128+
EventData: platform.DataUsage{
129+
CPU: systemMetrics.CPU,
130+
TotalMemory: systemMetrics.TotalMemory,
131+
DataSize: systemMetrics.DataUsed,
132+
}})
133+
134+
if err != nil {
135+
b.increaseFailureCounter()
136+
137+
if b.isSoftLimitExceeded() {
138+
log.Msg("Billing error threshold surpassed. Certain features have been temporarily disabled.")
139+
b.props.UpdateBilling(false)
140+
}
141+
142+
return fmt.Errorf("cannot send usage event: %w. Attempts: %d", err, b.softLimitCounter())
143+
}
144+
145+
if b.props.BillingActive != respData.BillingActive {
146+
b.props.UpdateBilling(respData.BillingActive)
147+
148+
log.Dbg("Instance state updated. Billing is active:", respData.BillingActive)
149+
}
150+
151+
if b.props.BillingActive {
152+
b.resetSoftFailureCounter()
153+
}
154+
155+
return nil
156+
}
157+
91158
func (b *Billing) shouldSendPlatformRequests() error {
92159
if b.props.Infrastructure == global.AWSInfrastructure {
93160
return errors.New("DLE infrastructure is AWS Marketplace")

engine/internal/srv/billing.go

+70
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,70 @@
1+
package srv
2+
3+
import (
4+
"context"
5+
"fmt"
6+
"net/http"
7+
8+
"github.com/AlekSi/pointer"
9+
10+
"gitlab.com/postgres-ai/database-lab/v3/internal/billing"
11+
"gitlab.com/postgres-ai/database-lab/v3/internal/srv/api"
12+
"gitlab.com/postgres-ai/database-lab/v3/pkg/client/platform"
13+
"gitlab.com/postgres-ai/database-lab/v3/pkg/models"
14+
"gitlab.com/postgres-ai/database-lab/v3/version"
15+
)
16+
17+
func (s *Server) billingStatus(w http.ResponseWriter, r *http.Request) {
18+
usageResponse, err := s.billingUsage(r.Context())
19+
if err != nil {
20+
api.SendBadRequestError(w, r, err.Error())
21+
return
22+
}
23+
24+
if usageResponse.Code != "" {
25+
api.SendBadRequestError(w, r, fmt.Sprintf("Error code %s, message: %s", usageResponse.Code, usageResponse.Message))
26+
return
27+
}
28+
29+
if err := api.WriteJSON(w, http.StatusOK, usageResponse.BillingResponse); err != nil {
30+
api.SendError(w, r, err)
31+
return
32+
}
33+
}
34+
35+
func (s *Server) activate(w http.ResponseWriter, r *http.Request) {
36+
if _, err := s.billingUsage(r.Context()); err != nil {
37+
api.SendBadRequestError(w, r, err.Error())
38+
return
39+
}
40+
41+
engine := models.Engine{
42+
Version: version.GetVersion(),
43+
Edition: s.engProps.GetEdition(),
44+
BillingActive: pointer.ToBool(s.engProps.BillingActive),
45+
InstanceID: s.engProps.InstanceID,
46+
StartedAt: s.startedAt,
47+
Telemetry: pointer.ToBool(s.Platform.IsTelemetryEnabled()),
48+
DisableConfigModification: pointer.ToBool(s.Config.DisableConfigModification),
49+
}
50+
51+
if err := api.WriteJSON(w, http.StatusOK, engine); err != nil {
52+
api.SendError(w, r, err)
53+
return
54+
}
55+
}
56+
57+
func (s *Server) billingUsage(ctx context.Context) (*platform.EditionResponse, error) {
58+
systemMetrics := billing.GetSystemMetrics(s.pm)
59+
60+
instanceUsage := platform.InstanceUsage{
61+
InstanceID: s.engProps.InstanceID,
62+
EventData: platform.DataUsage{
63+
CPU: systemMetrics.CPU,
64+
TotalMemory: systemMetrics.TotalMemory,
65+
DataSize: systemMetrics.DataUsed,
66+
},
67+
}
68+
69+
return s.Platform.Client.SendUsage(ctx, s.engProps, instanceUsage)
70+
}

engine/internal/srv/config.go

-59
Original file line numberDiff line numberDiff line change
@@ -8,24 +8,20 @@ import (
88
"regexp"
99
"time"
1010

11-
"github.com/AlekSi/pointer"
1211
"github.com/docker/docker/api/types"
1312
yamlv2 "gopkg.in/yaml.v2"
1413
"gopkg.in/yaml.v3"
1514

16-
"gitlab.com/postgres-ai/database-lab/v3/internal/billing"
1715
"gitlab.com/postgres-ai/database-lab/v3/internal/provision"
1816
"gitlab.com/postgres-ai/database-lab/v3/internal/retrieval"
1917
"gitlab.com/postgres-ai/database-lab/v3/internal/retrieval/engine/postgres/logical"
2018
"gitlab.com/postgres-ai/database-lab/v3/internal/retrieval/engine/postgres/tools/db"
2119
"gitlab.com/postgres-ai/database-lab/v3/internal/srv/api"
22-
"gitlab.com/postgres-ai/database-lab/v3/pkg/client/platform"
2320
"gitlab.com/postgres-ai/database-lab/v3/pkg/config"
2421
"gitlab.com/postgres-ai/database-lab/v3/pkg/log"
2522
"gitlab.com/postgres-ai/database-lab/v3/pkg/models"
2623
"gitlab.com/postgres-ai/database-lab/v3/pkg/util/projection"
2724
yamlUtils "gitlab.com/postgres-ai/database-lab/v3/pkg/util/yaml"
28-
"gitlab.com/postgres-ai/database-lab/v3/version"
2925
)
3026

3127
const (
@@ -386,58 +382,3 @@ func validateCustomOptions(customOptions []interface{}) error {
386382

387383
return nil
388384
}
389-
390-
func (s *Server) billingStatus(w http.ResponseWriter, r *http.Request) {
391-
systemMetrics := billing.GetSystemMetrics(s.pm)
392-
393-
instanceUsage := platform.InstanceUsage{
394-
InstanceID: s.engProps.InstanceID,
395-
EventData: platform.DataUsage{
396-
CPU: systemMetrics.CPU,
397-
TotalMemory: systemMetrics.TotalMemory,
398-
DataSize: systemMetrics.DataUsed,
399-
},
400-
}
401-
402-
usageResponse, err := s.Platform.Client.SendUsage(r.Context(), s.engProps, instanceUsage)
403-
if err != nil {
404-
api.SendBadRequestError(w, r, err.Error())
405-
return
406-
}
407-
408-
if usageResponse.Code != "" {
409-
api.SendBadRequestError(w, r, fmt.Sprintf("Error code %s, message: %s", usageResponse.Code, usageResponse.Message))
410-
return
411-
}
412-
413-
if err := api.WriteJSON(w, http.StatusOK, usageResponse.BillingResponse); err != nil {
414-
api.SendError(w, r, err)
415-
return
416-
}
417-
}
418-
419-
func (s *Server) activate(w http.ResponseWriter, r *http.Request) {
420-
systemMetrics := billing.GetSystemMetrics(s.pm)
421-
422-
if err := s.billingSvc.RegisterInstance(r.Context(), systemMetrics); err != nil {
423-
log.Msg("cannot register instance:", err)
424-
api.SendBadRequestError(w, r, err.Error())
425-
426-
return
427-
}
428-
429-
engine := models.Engine{
430-
Version: version.GetVersion(),
431-
Edition: s.engProps.GetEdition(),
432-
BillingActive: pointer.ToBool(s.engProps.BillingActive),
433-
InstanceID: s.engProps.InstanceID,
434-
StartedAt: s.startedAt,
435-
Telemetry: pointer.ToBool(s.Platform.IsTelemetryEnabled()),
436-
DisableConfigModification: pointer.ToBool(s.Config.DisableConfigModification),
437-
}
438-
439-
if err := api.WriteJSON(w, http.StatusOK, engine); err != nil {
440-
api.SendError(w, r, err)
441-
return
442-
}
443-
}

engine/pkg/config/global/config.go

+4-1
Original file line numberDiff line numberDiff line change
@@ -58,9 +58,12 @@ const (
5858
// CommunityEdition defines the community edition.
5959
CommunityEdition = "community"
6060

61-
// StandardEdition defines the community edition.
61+
// StandardEdition defines the standard edition.
6262
StandardEdition = "standard"
6363

64+
// EnterpriseEdition defines the enterprise edition.
65+
EnterpriseEdition = "enterprise"
66+
6467
// AWSInfrastructure marks instances running from AWS Marketplace.
6568
AWSInfrastructure = "AWS"
6669
)

engine/test/1.synthetic.sh

+1-4
Original file line numberDiff line numberDiff line change
@@ -80,7 +80,7 @@ curl https://gitlab.com/postgres-ai/database-lab/-/raw/"${TAG:-master}"/engine/c
8080
# Edit the following options
8181
yq eval -i '
8282
.global.debug = true |
83-
.global.telemetry.enabled = false |
83+
.platform.enableTelemetry = false |
8484
.embeddedUI.enabled = false |
8585
.server.port = env(DLE_SERVER_PORT) |
8686
.provision.portPool.from = env(DLE_PORT_POOL_FROM) |
@@ -119,9 +119,6 @@ sudo docker run \
119119
--volume ${DLE_TEST_MOUNT_DIR}:${DLE_TEST_MOUNT_DIR}/:rshared \
120120
--volume "${configDir}":/home/dblab/configs \
121121
--volume "${metaDir}":/home/dblab/meta \
122-
--volume /sys/kernel/debug:/sys/kernel/debug:rw \
123-
--volume /lib/modules:/lib/modules:ro \
124-
--volume /proc:/host_proc:ro \
125122
--env DOCKER_API_VERSION=1.39 \
126123
--detach \
127124
"${IMAGE2TEST}"

0 commit comments

Comments
 (0)