Skip to content

Commit 941bd0b

Browse files
committed
fix(engine): optimize snapshot list (#323)
* keep a list of available snapshots in filesystem managers (do not retrieve on each request) * refresh the state of a snapshot list on modifying events * add tests with a snapshot list and integration tests to check available snapshots This has to improve the performance of `/snapshot` API calls which now may take many seconds.
1 parent 42dc98f commit 941bd0b

File tree

10 files changed

+131
-32
lines changed

10 files changed

+131
-32
lines changed

engine/internal/cloning/base.go

+2
Original file line numberDiff line numberDiff line change
@@ -78,6 +78,8 @@ func (c *Base) Run(ctx context.Context) error {
7878
return errors.Wrap(err, "failed to run cloning service")
7979
}
8080

81+
c.provision.DiscoverSnapshots()
82+
8183
if _, err := c.GetSnapshots(); err != nil {
8284
log.Err("No available snapshots: ", err)
8385
}

engine/internal/provision/mode_local.go

+12-11
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,6 @@ import (
2727
"gitlab.com/postgres-ai/database-lab/v3/internal/provision/pool"
2828
"gitlab.com/postgres-ai/database-lab/v3/internal/provision/resources"
2929
"gitlab.com/postgres-ai/database-lab/v3/internal/provision/runners"
30-
"gitlab.com/postgres-ai/database-lab/v3/internal/provision/thinclones/zfs"
3130
"gitlab.com/postgres-ai/database-lab/v3/internal/retrieval/engine/postgres/tools"
3231
"gitlab.com/postgres-ai/database-lab/v3/pkg/log"
3332
"gitlab.com/postgres-ai/database-lab/v3/pkg/models"
@@ -299,20 +298,22 @@ func (p *Provisioner) ResetSession(session *resources.Session, snapshotID string
299298
return snapshotModel, nil
300299
}
301300

301+
// DiscoverSnapshots discovers snapshots from active pools.
302+
func (p *Provisioner) DiscoverSnapshots() {
303+
for _, fsManager := range p.pm.GetAvailableFSManagers() {
304+
fsManager.RefreshSnapshotList()
305+
}
306+
}
307+
302308
// GetSnapshots provides a snapshot list from active pools.
303309
func (p *Provisioner) GetSnapshots() ([]resources.Snapshot, error) {
304310
snapshots := []resources.Snapshot{}
305311

306-
for _, activeFSManager := range p.pm.GetActiveFSManagers() {
307-
poolSnapshots, err := activeFSManager.GetSnapshots()
308-
if err != nil {
309-
var emptyErr *zfs.EmptyPoolError
310-
if errors.As(err, &emptyErr) {
311-
log.Msg(emptyErr.Error())
312-
continue
313-
}
314-
315-
log.Err(fmt.Errorf("failed to get snapshots for pool %s: %w", activeFSManager.Pool().Name, err))
312+
for _, activeFSManager := range p.pm.GetAvailableFSManagers() {
313+
poolSnapshots := activeFSManager.SnapshotList()
314+
if len(poolSnapshots) == 0 {
315+
log.Msg(fmt.Sprintf("no snapshots for pool %s", activeFSManager.Pool().Name))
316+
continue
316317
}
317318

318319
snapshots = append(snapshots, poolSnapshots...)

engine/internal/provision/mode_local_test.go

+5-2
Original file line numberDiff line numberDiff line change
@@ -84,8 +84,11 @@ func (m mockFSManager) CleanupSnapshots(retentionLimit int) ([]string, error) {
8484
return nil, nil
8585
}
8686

87-
func (m mockFSManager) GetSnapshots() ([]resources.Snapshot, error) {
88-
return nil, nil
87+
func (m mockFSManager) SnapshotList() []resources.Snapshot {
88+
return nil
89+
}
90+
91+
func (m mockFSManager) RefreshSnapshotList() {
8992
}
9093

9194
func (m mockFSManager) GetSessionState(name string) (*resources.SessionState, error) {

engine/internal/provision/pool/manager.go

+2-1
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,8 @@ type Snapshotter interface {
4545
CreateSnapshot(poolSuffix, dataStateAt string) (snapshotName string, err error)
4646
DestroySnapshot(snapshotName string) (err error)
4747
CleanupSnapshots(retentionLimit int) ([]string, error)
48-
GetSnapshots() ([]resources.Snapshot, error)
48+
SnapshotList() []resources.Snapshot
49+
RefreshSnapshotList()
4950
}
5051

5152
// Pooler describes methods for Pool providing.

engine/internal/provision/pool/pool_manager.go

+3-3
Original file line numberDiff line numberDiff line change
@@ -193,14 +193,14 @@ func (pm *Manager) CollectPoolStat() telemetry.PoolStat {
193193
return ps
194194
}
195195

196-
// GetActiveFSManagers returns a list of active filesystem managers.
197-
func (pm *Manager) GetActiveFSManagers() []FSManager {
196+
// GetAvailableFSManagers returns a list of non-empty (active or refreshing) filesystem managers.
197+
func (pm *Manager) GetAvailableFSManagers() []FSManager {
198198
fs := []FSManager{}
199199

200200
pm.mu.Lock()
201201

202202
for _, fsManager := range pm.fsManagerPool {
203-
if pool := fsManager.Pool(); pool != nil && pool.Status() == resources.ActivePool {
203+
if pool := fsManager.Pool(); pool != nil && pool.Status() != resources.EmptyPool {
204204
fs = append(fs, fsManager)
205205
}
206206
}

engine/internal/provision/thinclones/lvm/lvmanager.go

+8-3
Original file line numberDiff line numberDiff line change
@@ -106,8 +106,8 @@ func (m *LVManager) CleanupSnapshots(_ int) ([]string, error) {
106106
return nil, nil
107107
}
108108

109-
// GetSnapshots is not implemented.
110-
func (m *LVManager) GetSnapshots() ([]resources.Snapshot, error) {
109+
// SnapshotList is not implemented.
110+
func (m *LVManager) SnapshotList() []resources.Snapshot {
111111
// TODO(anatoly): Not supported in LVM mode warning.
112112
return []resources.Snapshot{
113113
{
@@ -116,7 +116,12 @@ func (m *LVManager) GetSnapshots() ([]resources.Snapshot, error) {
116116
DataStateAt: time.Now(),
117117
Pool: m.pool.Name,
118118
},
119-
}, nil
119+
}
120+
}
121+
122+
// RefreshSnapshotList is not supported in LVM mode.
123+
func (m *LVManager) RefreshSnapshotList() {
124+
log.Msg("RefreshSnapshotList is not supported for LVM. Skip the operation")
120125
}
121126

122127
// GetSessionState is not implemented.

engine/internal/provision/thinclones/zfs/zfs.go

+62-4
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@ import (
1010
"path"
1111
"strconv"
1212
"strings"
13+
"sync"
1314
"time"
1415
"unicode"
1516

@@ -142,8 +143,10 @@ func (e *EmptyPoolError) Error() string {
142143

143144
// Manager describes a filesystem manager for ZFS.
144145
type Manager struct {
145-
runner runners.Runner
146-
config Config
146+
runner runners.Runner
147+
config Config
148+
mu *sync.Mutex
149+
snapshots []resources.Snapshot
147150
}
148151

149152
// Config defines configuration for ZFS filesystem manager.
@@ -158,6 +161,7 @@ func NewFSManager(runner runners.Runner, config Config) *Manager {
158161
m := Manager{
159162
runner: runner,
160163
config: config,
164+
mu: &sync.Mutex{},
161165
}
162166

163167
return &m
@@ -307,6 +311,20 @@ func (m *Manager) CreateSnapshot(poolSuffix, dataStateAt string) (string, error)
307311
}
308312
}
309313

314+
dataStateTime, err := util.ParseCustomTime(strings.TrimSuffix(dataStateAt, m.config.PreSnapshotSuffix))
315+
if err != nil {
316+
return "", fmt.Errorf("failed to parse dataStateAt: %w", err)
317+
}
318+
319+
m.addSnapshotToList(resources.Snapshot{
320+
ID: snapshotName,
321+
CreatedAt: time.Now(),
322+
DataStateAt: dataStateTime,
323+
Pool: m.config.Pool.Name,
324+
})
325+
326+
go m.RefreshSnapshotList()
327+
310328
return snapshotName, nil
311329
}
312330

@@ -334,6 +352,8 @@ func (m *Manager) DestroySnapshot(snapshotName string) error {
334352
return errors.Wrap(err, "failed to run command")
335353
}
336354

355+
m.removeSnapshotFromList(snapshotName)
356+
337357
return nil
338358
}
339359

@@ -360,6 +380,8 @@ func (m *Manager) CleanupSnapshots(retentionLimit int) ([]string, error) {
360380

361381
lines := strings.Split(out, "\n")
362382

383+
m.RefreshSnapshotList()
384+
363385
return lines, nil
364386
}
365387

@@ -487,8 +509,26 @@ func (m *Manager) GetFilesystemState() (models.FileSystem, error) {
487509
return fileSystem, nil
488510
}
489511

490-
// GetSnapshots returns a snapshot list.
491-
func (m *Manager) GetSnapshots() ([]resources.Snapshot, error) {
512+
// SnapshotList returns a list of snapshots.
513+
func (m *Manager) SnapshotList() []resources.Snapshot {
514+
snapshotList := m.snapshots
515+
return snapshotList
516+
}
517+
518+
// RefreshSnapshotList updates the list of snapshots.
519+
func (m *Manager) RefreshSnapshotList() {
520+
snapshots, err := m.getSnapshots()
521+
if err != nil {
522+
log.Err("Failed to refresh snapshot list: ", err)
523+
return
524+
}
525+
526+
m.mu.Lock()
527+
m.snapshots = snapshots
528+
m.mu.Unlock()
529+
}
530+
531+
func (m *Manager) getSnapshots() ([]resources.Snapshot, error) {
492532
entries, err := m.listSnapshots(m.config.Pool.Name)
493533
if err != nil {
494534
return nil, fmt.Errorf("failed to list snapshots: %w", err)
@@ -517,6 +557,24 @@ func (m *Manager) GetSnapshots() ([]resources.Snapshot, error) {
517557
return snapshots, nil
518558
}
519559

560+
func (m *Manager) addSnapshotToList(snapshot resources.Snapshot) {
561+
m.mu.Lock()
562+
m.snapshots = append(m.snapshots, snapshot)
563+
m.mu.Unlock()
564+
}
565+
566+
func (m *Manager) removeSnapshotFromList(snapshotName string) {
567+
for i, snapshot := range m.snapshots {
568+
if snapshot.ID == snapshotName {
569+
m.mu.Lock()
570+
m.snapshots = append(m.snapshots[:i], m.snapshots[i+1:]...)
571+
m.mu.Unlock()
572+
573+
break
574+
}
575+
}
576+
}
577+
520578
// ListFilesystems lists ZFS file systems (clones, pools).
521579
func (m *Manager) listFilesystems(pool string) ([]*ListEntry, error) {
522580
filter := snapshotFilter{

engine/internal/provision/thinclones/zfs/zfs_test.go

+28
Original file line numberDiff line numberDiff line change
@@ -196,3 +196,31 @@ func TestBuildingCommands(t *testing.T) {
196196
require.Equal(t, "zfs get -H -p -o value used testSnapshot", command)
197197
})
198198
}
199+
200+
func TestSnapshotList(t *testing.T) {
201+
t.Run("Snapshot list", func(t *testing.T) {
202+
fsManager := NewFSManager(runnerMock{}, Config{})
203+
204+
require.Equal(t, 0, len(fsManager.SnapshotList()))
205+
206+
snapshot := resources.Snapshot{ID: "test1"}
207+
fsManager.addSnapshotToList(snapshot)
208+
209+
require.Equal(t, 1, len(fsManager.SnapshotList()))
210+
require.Equal(t, []resources.Snapshot{{ID: "test1"}}, fsManager.SnapshotList())
211+
212+
snapshot2 := resources.Snapshot{ID: "test2"}
213+
fsManager.addSnapshotToList(snapshot2)
214+
215+
snapshot3 := resources.Snapshot{ID: "test3"}
216+
fsManager.addSnapshotToList(snapshot3)
217+
218+
require.Equal(t, 3, len(fsManager.SnapshotList()))
219+
require.Equal(t, []resources.Snapshot{{ID: "test1"}, {ID: "test2"}, {ID: "test3"}}, fsManager.SnapshotList())
220+
221+
fsManager.removeSnapshotFromList("test2")
222+
223+
require.Equal(t, 2, len(fsManager.SnapshotList()))
224+
require.Equal(t, []resources.Snapshot{{ID: "test1"}, {ID: "test3"}}, fsManager.SnapshotList())
225+
})
226+
}

engine/internal/retrieval/retrieval.go

+5-8
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,6 @@ import (
2222
"gitlab.com/postgres-ai/database-lab/v3/internal/provision/pool"
2323
"gitlab.com/postgres-ai/database-lab/v3/internal/provision/resources"
2424
"gitlab.com/postgres-ai/database-lab/v3/internal/provision/runners"
25-
"gitlab.com/postgres-ai/database-lab/v3/internal/provision/thinclones/zfs"
2625
"gitlab.com/postgres-ai/database-lab/v3/internal/retrieval/components"
2726
"gitlab.com/postgres-ai/database-lab/v3/internal/retrieval/config"
2827
"gitlab.com/postgres-ai/database-lab/v3/internal/retrieval/dbmarker"
@@ -525,14 +524,12 @@ func preparePoolToRefresh(poolToUpdate pool.FSManager) error {
525524
strings.Join(cloneList, " "))
526525
}
527526

528-
snapshots, err := poolToUpdate.GetSnapshots()
529-
if err != nil {
530-
var emptyErr *zfs.EmptyPoolError
531-
if !errors.As(err, &emptyErr) {
532-
return errors.Wrap(err, "failed to check existing snapshots")
533-
}
527+
poolToUpdate.RefreshSnapshotList()
534528

535-
log.Msg(emptyErr.Error())
529+
snapshots := poolToUpdate.SnapshotList()
530+
if len(snapshots) == 0 {
531+
log.Msg(fmt.Sprintf("no snapshots for pool %s", poolToUpdate.Pool().Name))
532+
return nil
536533
}
537534

538535
for _, snapshotEntry := range snapshots {

engine/test/1.synthetic.sh

+4
Original file line numberDiff line numberDiff line change
@@ -151,6 +151,10 @@ dblab init \
151151
# Check the configuration by fetching the status of the instance:
152152
dblab instance status
153153

154+
# Check the snapshot list
155+
if [[ $(dblab snapshot list | jq length) -eq 0 ]] ; then
156+
echo "No snapshot found" && exit 1
157+
fi
154158

155159
## Create a clone
156160
dblab clone create \

0 commit comments

Comments
 (0)