@@ -98,8 +98,11 @@ type FileReplica struct {
98
98
walIndexGauge prometheus.Gauge
99
99
walOffsetGauge prometheus.Gauge
100
100
101
+ // Frequency to create new snapshots.
102
+ SnapshotInterval time.Duration
103
+
101
104
// Time to keep snapshots and related WAL files.
102
- // Database is snapshotted after interval and older WAL files are discarded.
105
+ // Database is snapshotted after interval, if needed, and older WAL files are discarded.
103
106
Retention time.Duration
104
107
105
108
// Time between checks for retention.
@@ -402,9 +405,10 @@ func (r *FileReplica) Start(ctx context.Context) {
402
405
ctx , r .cancel = context .WithCancel (ctx )
403
406
404
407
// Start goroutine to replicate data.
405
- r .wg .Add (3 )
408
+ r .wg .Add (4 )
406
409
go func () { defer r .wg .Done (); r .monitor (ctx ) }()
407
410
go func () { defer r .wg .Done (); r .retainer (ctx ) }()
411
+ go func () { defer r .wg .Done (); r .snapshotter (ctx ) }()
408
412
go func () { defer r .wg .Done (); r .validator (ctx ) }()
409
413
}
410
414
@@ -446,7 +450,18 @@ func (r *FileReplica) monitor(ctx context.Context) {
446
450
447
451
// retainer runs in a separate goroutine and handles retention.
448
452
func (r * FileReplica ) retainer (ctx context.Context ) {
449
- ticker := time .NewTicker (r .RetentionCheckInterval )
453
+ // Disable retention enforcement if retention period is non-positive.
454
+ if r .Retention <= 0 {
455
+ return
456
+ }
457
+
458
+ // Ensure check interval is not longer than retention period.
459
+ checkInterval := r .RetentionCheckInterval
460
+ if checkInterval > r .Retention {
461
+ checkInterval = r .Retention
462
+ }
463
+
464
+ ticker := time .NewTicker (checkInterval )
450
465
defer ticker .Stop ()
451
466
452
467
for {
@@ -462,6 +477,28 @@ func (r *FileReplica) retainer(ctx context.Context) {
462
477
}
463
478
}
464
479
480
+ // snapshotter runs in a separate goroutine and handles snapshotting.
481
+ func (r * FileReplica ) snapshotter (ctx context.Context ) {
482
+ if r .SnapshotInterval <= 0 {
483
+ return
484
+ }
485
+
486
+ ticker := time .NewTicker (r .SnapshotInterval )
487
+ defer ticker .Stop ()
488
+
489
+ for {
490
+ select {
491
+ case <- ctx .Done ():
492
+ return
493
+ case <- ticker .C :
494
+ if err := r .Snapshot (ctx ); err != nil && err != ErrNoGeneration {
495
+ log .Printf ("%s(%s): snapshotter error: %s" , r .db .Path (), r .Name (), err )
496
+ continue
497
+ }
498
+ }
499
+ }
500
+ }
501
+
465
502
// validator runs in a separate goroutine and handles periodic validation.
466
503
func (r * FileReplica ) validator (ctx context.Context ) {
467
504
// Initialize counters since validation occurs infrequently.
@@ -531,6 +568,18 @@ func (r *FileReplica) CalcPos(ctx context.Context, generation string) (pos Pos,
531
568
return pos , nil
532
569
}
533
570
571
+ // Snapshot copies the entire database to the replica path.
572
+ func (r * FileReplica ) Snapshot (ctx context.Context ) error {
573
+ // Find current position of database.
574
+ pos , err := r .db .Pos ()
575
+ if err != nil {
576
+ return fmt .Errorf ("cannot determine current db generation: %w" , err )
577
+ } else if pos .IsZero () {
578
+ return ErrNoGeneration
579
+ }
580
+ return r .snapshot (ctx , pos .Generation , pos .Index )
581
+ }
582
+
534
583
// snapshot copies the entire database to the replica path.
535
584
func (r * FileReplica ) snapshot (ctx context.Context , generation string , index int ) error {
536
585
// Acquire a read lock on the database during snapshot to prevent checkpoints.
@@ -557,7 +606,7 @@ func (r *FileReplica) snapshot(ctx context.Context, generation string, index int
557
606
return err
558
607
}
559
608
560
- log .Printf ("%s(%s): snapshot: creating %s/%08x t=%s" , r .db .Path (), r .Name (), generation , index , time .Since (startTime ))
609
+ log .Printf ("%s(%s): snapshot: creating %s/%08x t=%s" , r .db .Path (), r .Name (), generation , index , time .Since (startTime ). Truncate ( time . Millisecond ) )
561
610
return nil
562
611
}
563
612
0 commit comments