Skip to content

Commit c3022c8

Browse files
committed
Fix release of non-OFD locks
This commit removes short-lived `os.Open()` calls on the database file because this can cause locks to be released when `os.File.Close()` is later called if the operating system does not support OFD (Open File Descriptor) locks.
1 parent f652186 commit c3022c8

File tree

4 files changed

+146
-79
lines changed

4 files changed

+146
-79
lines changed

db.go

Lines changed: 28 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,7 @@ type DB struct {
4545
mu sync.RWMutex
4646
path string // part to database
4747
db *sql.DB // target database
48+
f *os.File // long-running db file descriptor
4849
rtx *sql.Tx // long running read transaction
4950
pageSize int // page size, in bytes
5051
notify chan struct{} // closes on WAL change
@@ -285,15 +286,23 @@ func (db *DB) Open() (err error) {
285286
// Close releases the read lock & closes the database. This method should only
286287
// be called by tests as it causes the underlying database to be checkpointed.
287288
func (db *DB) Close() (err error) {
288-
if e := db.SoftClose(); e != nil && err == nil {
289-
err = e
289+
// Ensure replicas all stop replicating.
290+
for _, r := range db.Replicas {
291+
r.Stop(true)
292+
}
293+
294+
if db.rtx != nil {
295+
if e := db.releaseReadLock(); e != nil && err == nil {
296+
err = e
297+
}
290298
}
291299

292300
if db.db != nil {
293301
if e := db.db.Close(); e != nil && err == nil {
294302
err = e
295303
}
296304
}
305+
297306
return err
298307
}
299308

@@ -386,13 +395,19 @@ func (db *DB) init() (err error) {
386395
return err
387396
}
388397

398+
// Open long-running database file descriptor. Required for non-OFD locks.
399+
if db.f, err = os.Open(db.path); err != nil {
400+
return fmt.Errorf("open db file descriptor: %w", err)
401+
}
402+
389403
// Ensure database is closed if init fails.
390404
// Initialization can retry on next sync.
391405
defer func() {
392406
if err != nil {
393-
db.releaseReadLock()
407+
_ = db.releaseReadLock()
394408
db.db.Close()
395-
db.db = nil
409+
db.f.Close()
410+
db.db, db.f = nil, nil
396411
}
397412
}()
398413

@@ -586,7 +601,7 @@ func (db *DB) SoftClose() (err error) {
586601

587602
// Ensure replicas all stop replicating.
588603
for _, r := range db.Replicas {
589-
r.Stop()
604+
r.Stop(false)
590605
}
591606

592607
if db.rtx != nil {
@@ -917,9 +932,9 @@ func (db *DB) verify() (info syncInfo, err error) {
917932
// Verify last page synced still matches.
918933
if info.shadowWALSize > WALHeaderSize {
919934
offset := info.shadowWALSize - int64(db.pageSize+WALFrameHeaderSize)
920-
if buf0, err := readFileAt(db.WALPath(), offset, int64(db.pageSize+WALFrameHeaderSize)); err != nil {
935+
if buf0, err := readWALFileAt(db.WALPath(), offset, int64(db.pageSize+WALFrameHeaderSize)); err != nil {
921936
return info, fmt.Errorf("cannot read last synced wal page: %w", err)
922-
} else if buf1, err := readFileAt(info.shadowWALPath, offset, int64(db.pageSize+WALFrameHeaderSize)); err != nil {
937+
} else if buf1, err := readWALFileAt(info.shadowWALPath, offset, int64(db.pageSize+WALFrameHeaderSize)); err != nil {
923938
return info, fmt.Errorf("cannot read last synced shadow wal page: %w", err)
924939
} else if !bytes.Equal(buf0, buf1) {
925940
info.reason = "wal overwritten by another process"
@@ -1466,20 +1481,6 @@ func RestoreReplica(ctx context.Context, r Replica, opt RestoreOptions) error {
14661481
return nil
14671482
}
14681483

1469-
func checksumFile(filename string) (uint64, error) {
1470-
f, err := os.Open(filename)
1471-
if err != nil {
1472-
return 0, err
1473-
}
1474-
defer f.Close()
1475-
1476-
h := crc64.New(crc64.MakeTable(crc64.ISO))
1477-
if _, err := io.Copy(h, f); err != nil {
1478-
return 0, err
1479-
}
1480-
return h.Sum64(), nil
1481-
}
1482-
14831484
// CalcRestoreTarget returns a replica & generation to restore from based on opt criteria.
14841485
func (db *DB) CalcRestoreTarget(ctx context.Context, opt RestoreOptions) (Replica, string, error) {
14851486
var target struct {
@@ -1672,11 +1673,14 @@ func (db *DB) CRC64() (uint64, Pos, error) {
16721673
}
16731674
pos.Offset = 0
16741675

1675-
chksum, err := checksumFile(db.Path())
1676-
if err != nil {
1676+
// Seek to the beginning of the db file descriptor and checksum whole file.
1677+
h := crc64.New(crc64.MakeTable(crc64.ISO))
1678+
if _, err := db.f.Seek(0, io.SeekStart); err != nil {
1679+
return 0, pos, err
1680+
} else if _, err := io.Copy(h, db.f); err != nil {
16771681
return 0, pos, err
16781682
}
1679-
return chksum, pos, nil
1683+
return h.Sum64(), pos, nil
16801684
}
16811685

16821686
// RestoreOptions represents options for DB.Restore().
@@ -1808,24 +1812,3 @@ func headerByteOrder(hdr []byte) (binary.ByteOrder, error) {
18081812
return nil, fmt.Errorf("invalid wal header magic: %x", magic)
18091813
}
18101814
}
1811-
1812-
func copyFile(dst, src string) error {
1813-
r, err := os.Open(src)
1814-
if err != nil {
1815-
return err
1816-
}
1817-
defer r.Close()
1818-
1819-
w, err := os.Create(dst)
1820-
if err != nil {
1821-
return err
1822-
}
1823-
defer w.Close()
1824-
1825-
if _, err := io.Copy(w, r); err != nil {
1826-
return err
1827-
} else if err := w.Sync(); err != nil {
1828-
return err
1829-
}
1830-
return nil
1831-
}

litestream.go

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -152,8 +152,9 @@ func readWALHeader(filename string) ([]byte, error) {
152152
return buf[:n], err
153153
}
154154

155-
// readFileAt reads a slice from a file.
156-
func readFileAt(filename string, offset, n int64) ([]byte, error) {
155+
// readWALFileAt reads a slice from a file. Do not use this with database files
156+
// as it causes problems with non-OFD locks.
157+
func readWALFileAt(filename string, offset, n int64) ([]byte, error) {
157158
f, err := os.Open(filename)
158159
if err != nil {
159160
return nil, err

replica.go

Lines changed: 80 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@ import (
44
"context"
55
"encoding/binary"
66
"fmt"
7+
"hash/crc64"
78
"io"
89
"io/ioutil"
910
"log"
@@ -31,10 +32,10 @@ type Replica interface {
3132
DB() *DB
3233

3334
// Starts replicating in a background goroutine.
34-
Start(ctx context.Context)
35+
Start(ctx context.Context) error
3536

3637
// Stops all replication processing. Blocks until processing stopped.
37-
Stop()
38+
Stop(hard bool) error
3839

3940
// Returns the last replication position.
4041
LastPos() Pos
@@ -90,6 +91,9 @@ type FileReplica struct {
9091
mu sync.RWMutex
9192
pos Pos // last position
9293

94+
muf sync.Mutex
95+
f *os.File // long-running file descriptor to avoid non-OFD lock issues
96+
9397
wg sync.WaitGroup
9498
cancel func()
9599

@@ -392,14 +396,19 @@ func (r *FileReplica) WALs(ctx context.Context) ([]*WALInfo, error) {
392396
}
393397

394398
// Start starts replication for a given generation.
395-
func (r *FileReplica) Start(ctx context.Context) {
399+
func (r *FileReplica) Start(ctx context.Context) (err error) {
396400
// Ignore if replica is being used sychronously.
397401
if !r.MonitorEnabled {
398-
return
402+
return nil
399403
}
400404

401405
// Stop previous replication.
402-
r.Stop()
406+
r.Stop(false)
407+
408+
// Open db file descriptor.
409+
if r.f, err = os.Open(r.db.Path()); err != nil {
410+
return err
411+
}
403412

404413
// Wrap context with cancelation.
405414
ctx, r.cancel = context.WithCancel(ctx)
@@ -410,12 +419,27 @@ func (r *FileReplica) Start(ctx context.Context) {
410419
go func() { defer r.wg.Done(); r.retainer(ctx) }()
411420
go func() { defer r.wg.Done(); r.snapshotter(ctx) }()
412421
go func() { defer r.wg.Done(); r.validator(ctx) }()
422+
423+
return nil
413424
}
414425

415426
// Stop cancels any outstanding replication and blocks until finished.
416-
func (r *FileReplica) Stop() {
427+
//
428+
// Performing a hard stop will close the DB file descriptor which could release
429+
// locks on per-process locks. Hard stops should only be performed when
430+
// stopping the entire process.
431+
func (r *FileReplica) Stop(hard bool) (err error) {
417432
r.cancel()
418433
r.wg.Wait()
434+
435+
r.muf.Lock()
436+
defer r.muf.Unlock()
437+
if hard && r.f != nil {
438+
if e := r.f.Close(); e != nil && err == nil {
439+
err = e
440+
}
441+
}
442+
return err
419443
}
420444

421445
// monitor runs in a separate goroutine and continuously replicates the DB.
@@ -582,6 +606,9 @@ func (r *FileReplica) Snapshot(ctx context.Context) error {
582606

583607
// snapshot copies the entire database to the replica path.
584608
func (r *FileReplica) snapshot(ctx context.Context, generation string, index int) error {
609+
r.muf.Lock()
610+
defer r.muf.Unlock()
611+
585612
// Acquire a read lock on the database during snapshot to prevent checkpoints.
586613
tx, err := r.db.db.Begin()
587614
if err != nil {
@@ -602,7 +629,39 @@ func (r *FileReplica) snapshot(ctx context.Context, generation string, index int
602629

603630
if err := mkdirAll(filepath.Dir(snapshotPath), r.db.dirmode, r.db.diruid, r.db.dirgid); err != nil {
604631
return err
605-
} else if err := compressFile(r.db.Path(), snapshotPath, r.db.uid, r.db.gid); err != nil {
632+
}
633+
634+
if _, err := r.f.Seek(0, io.SeekStart); err != nil {
635+
return err
636+
}
637+
638+
fi, err := r.f.Stat()
639+
if err != nil {
640+
return err
641+
}
642+
643+
w, err := createFile(snapshotPath+".tmp", fi.Mode(), r.db.uid, r.db.gid)
644+
if err != nil {
645+
return err
646+
}
647+
defer w.Close()
648+
649+
zr := lz4.NewWriter(w)
650+
defer zr.Close()
651+
652+
// Copy & compress file contents to temporary file.
653+
if _, err := io.Copy(zr, r.f); err != nil {
654+
return err
655+
} else if err := zr.Close(); err != nil {
656+
return err
657+
} else if err := w.Sync(); err != nil {
658+
return err
659+
} else if err := w.Close(); err != nil {
660+
return err
661+
}
662+
663+
// Move compressed file to final location.
664+
if err := os.Rename(snapshotPath+".tmp", snapshotPath); err != nil {
606665
return err
607666
}
608667

@@ -805,7 +864,7 @@ func (r *FileReplica) compress(ctx context.Context, generation string) error {
805864
}
806865

807866
dst := filename + ".lz4"
808-
if err := compressFile(filename, dst, r.db.uid, r.db.gid); err != nil {
867+
if err := compressWALFile(filename, dst, r.db.uid, r.db.gid); err != nil {
809868
return err
810869
} else if err := os.Remove(filename); err != nil {
811870
return err
@@ -1051,8 +1110,9 @@ func WALIndexAt(ctx context.Context, r Replica, generation string, maxIndex int,
10511110
return index, nil
10521111
}
10531112

1054-
// compressFile compresses a file and replaces it with a new file with a .lz4 extension.
1055-
func compressFile(src, dst string, uid, gid int) error {
1113+
// compressWALFile compresses a file and replaces it with a new file with a .lz4 extension.
1114+
// Do not use this on database files because of issues with non-OFD locks.
1115+
func compressWALFile(src, dst string, uid, gid int) error {
10561116
r, err := os.Open(src)
10571117
if err != nil {
10581118
return err
@@ -1102,7 +1162,6 @@ func ValidateReplica(ctx context.Context, r Replica) error {
11021162

11031163
// Compute checksum of primary database under lock. This prevents a
11041164
// sync from occurring and the database will not be written.
1105-
primaryPath := filepath.Join(tmpdir, "primary")
11061165
chksum0, pos, err := db.CRC64()
11071166
if err != nil {
11081167
return fmt.Errorf("cannot compute checksum: %w", err)
@@ -1125,10 +1184,19 @@ func ValidateReplica(ctx context.Context, r Replica) error {
11251184
}
11261185

11271186
// Open file handle for restored database.
1128-
chksum1, err := checksumFile(restorePath)
1187+
// NOTE: This open is ok as the restored database is not managed by litestream.
1188+
f, err := os.Open(restorePath)
11291189
if err != nil {
11301190
return err
11311191
}
1192+
defer f.Close()
1193+
1194+
// Read entire file into checksum.
1195+
h := crc64.New(crc64.MakeTable(crc64.ISO))
1196+
if _, err := io.Copy(h, f); err != nil {
1197+
return err
1198+
}
1199+
chksum1 := h.Sum64()
11321200

11331201
status := "ok"
11341202
mismatch := chksum0 != chksum1
@@ -1140,15 +1208,6 @@ func ValidateReplica(ctx context.Context, r Replica) error {
11401208
// Validate checksums match.
11411209
if mismatch {
11421210
internal.ReplicaValidationTotalCounterVec.WithLabelValues(db.Path(), r.Name(), "error").Inc()
1143-
1144-
// Compress mismatched databases and report temporary path for investigation.
1145-
if err := compressFile(primaryPath, primaryPath+".lz4", db.uid, db.gid); err != nil {
1146-
return fmt.Errorf("cannot compress primary db: %w", err)
1147-
} else if err := compressFile(restorePath, restorePath+".lz4", db.uid, db.gid); err != nil {
1148-
return fmt.Errorf("cannot compress replica db: %w", err)
1149-
}
1150-
log.Printf("%s(%s): validator: mismatch files @ %s", db.Path(), r.Name(), tmpdir)
1151-
11521211
return ErrChecksumMismatch
11531212
}
11541213

0 commit comments

Comments
 (0)