// Copyright 2017 The File Authors. All rights reserved. // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. package file // import "modernc.org/file" import ( "bytes" "encoding/binary" "fmt" "io" "os" "time" "unsafe" "modernc.org/internal/buffer" ) const ( bit63 = -1 << 63 szNfo = unsafe.Sizeof(nfo{}) ) var ( _ File = (*WAL)(nil) _ io.ReaderAt = (*WAL)(nil) _ io.WriterAt = (*WAL)(nil) _ os.FileInfo = (*fileInfo)(nil) crash bool // Test hook: simulate (*WAL).Commit crash. tag = []byte{0x80, 0x19, 0xcb, 0x57, 0xa7, 0xfd, 0x1a, 0xb4, 0xda, 0xcd, 0xc4, 0x4d, 0x1a, 0x63, 0xfc, 0x37} ) type nfo struct { PageSize int64 Pages int64 Size int64 Skip int64 // Keep last Tag [16]byte } type fileInfo WAL func (f *fileInfo) IsDir() bool { return false } func (f *fileInfo) ModTime() time.Time { return f.modTime } func (f *fileInfo) Mode() os.FileMode { return f.fileMode } func (f *fileInfo) Name() string { return f.name } func (f *fileInfo) Size() int64 { return f.size } func (f *fileInfo) Sys() interface{} { return f.sys } // WAL implements a write ahead log of F using W. Call wal.F.ReadAt to perform // 'read-commited' reads. Call wal.ReadAt to perform 'read uncommitted' reads. // Call wal.W.ReadAt to read the write ahead log itself. // // Concurrency // // wal.ReadAt (read uncommitted) is safe for concurrent use by multiple // goroutines so multiple readers are fine, but multiple readers and a single // writer is not. However, wal.F.ReadAt (read committed) is safe to run // concurrently with any WAL method except Commit. In a typical DB isolation // scenario the setup is something like // // var wal *file.WAL // var mu sync.RWMutex // The mutex associated with wal. // // // in another goroutine // mu.RLock() // read isolated, concurrently with other readers // n, err := wal.F.ReadAt(buf, off) // note the F // ... // // reads are isolated only until the next RUnlock. // mu.RUnlock() // // wal.Commit and mutating of F is now possible. // ... // // // in another goroutine (writers serialization not considered in this example) // n, err := wal.WriteAt(buf, off) // ... // // // and eventually somewhere // mu.Lock() // err := wal.Commit() // ... // mu.Unlock() // ... // // No other WAL method is safe for concurrent use by multiple goroutines or // concurrently with ReadAt. Callers must provide their own synchronization // when used concurrently by multiple goroutines. // // Logical page // // The WAL is divided in logical pages. Every page is 1<= 0 { zf = first z := buffer.CGet(w.pageSize - int(po)) if _, err := w.W.WriteAt(*z, wp+po+szInt64); err != nil { buffer.Put(z) return err } buffer.Put(z) } first++ } last := w.size &^ w.pageMask if po := w.size & w.pageMask; po != 0 && last != zf { last++ } for ; first <= last; first++ { if wp, ok := w.m[first]; ok && wp >= 0 { w.m[first] = bit63 | wp // Invalidate the page but keep it allocated. } } w.size = size if size < w.size0 { w.size0 = size } return nil } // ReadAt performs a read-uncommitted operation on w. The semantics are those // of io.ReaderAt.ReadAt. Call w.F.ReadAt to perform a read-committed // operation. // // ReadAt implements File. func (w *WAL) ReadAt(b []byte, off int64) (n int, err error) { if off < 0 { return 0, fmt.Errorf("%T.ReadAt(): invalid offset %#x", w, off) } avail := w.size - off for len(b) != 0 && avail != 0 { p := off &^ w.pageMask o := off & w.pageMask rq := w.pageSize - int(o) if rq > len(b) { rq = len(b) } if int64(rq) > avail { rq = int(avail) } var nr int switch wp, ok := w.m[p]; { case wp < 0: if off >= w.size { return n, io.EOF } nr = w.pageSize - int(o) if nr > len(b) { nr = len(b) } z := b[:nr] for i := range z { z[i] = 0 } case ok: if nr, err = w.W.ReadAt(b[:rq], wp+o+szInt64); err != nil { if err != io.EOF { return n, err } err = nil } default: if avail0 := w.size0 - off; avail0 > 0 { rq0 := w.pageSize - int(o) if rq0 > len(b) { rq0 = len(b) } if rq0 > rq { rq0 = rq } if int64(rq0) > avail0 { rq0 = int(avail0) } if nr, err = w.F.ReadAt(b[:rq0], p+o); err != nil { if err != io.EOF { return n, err } err = nil } z := b[rq0:rq] for i := range z { z[i] = 0 } nr += len(z) break } z := b[:rq] for i := range z { z[i] = 0 } nr = len(z) } n += nr b = b[nr:] off += int64(nr) avail -= int64(nr) } if avail == 0 { err = io.EOF } return n, err } // WriteAt performs a write operation on w. The semantics are those of // io.WriteAT.WriteAt. WriteAt does not write to w.F. Instead the writes are // collected in w.W until committed. // // WriteAt implements File. func (w *WAL) WriteAt(b []byte, off int64) (n int, err error) { if off < 0 { return 0, fmt.Errorf("%T.ReadAt(): invalid offset %#x", w, off) } if len(b) == 0 { return 0, nil } var buf []byte for len(b) != 0 { p := off &^ w.pageMask o := off & w.pageMask var nw int switch wp, ok := w.m[p]; { case wp < 0: if buf == nil { p := buffer.Get(w.pageSize + szInt64) defer buffer.Put(p) buf = *p } buf := buf[szInt64:] z := buf[:o] for i := range z { z[i] = 0 } wp &^= bit63 nw = copy(buf[o:], b) z = buf[int(o)+nw:] for i := range z { z[i] = 0 } if _, err := w.W.WriteAt(buf, wp+szInt64); err != nil { return n, err } w.m[p] = wp case ok: rq := w.pageSize - int(o) if rq > len(b) { rq = len(b) } if nw, err = w.W.WriteAt(b[:rq], wp+o+szInt64); err != nil { return n, err } default: if buf == nil { p := buffer.Get(w.pageSize + szInt64) defer buffer.Put(p) buf = *p } binary.BigEndian.PutUint64(buf, uint64(p)) rq := 0 if p < w.size0 { avail := w.size0 - p rq = w.pageSize if avail < int64(w.pageSize) { rq = int(avail) } nr, err := w.F.ReadAt(buf[szInt64:rq+szInt64], p) if err != nil && err != io.EOF { return n, err } if nr == 0 { return n, io.EOF } } z := buf[rq+szInt64:] for i := range z { z[i] = 0 } nw = copy(buf[o+szInt64:], b) wp = w.skip + int64(len(w.m))*int64(len(buf)) if _, err := w.W.WriteAt(buf, wp); err != nil { return n, err } w.m[p] = wp } n += nw b = b[nw:] off += int64(nw) } if off > w.size { w.size = off } return n, nil } // Commit transfers all writes to w collected so far into w.F and empties w and // w.W or returns an error, if any. If the program crashes during committing a // subsequent NewWAL call with the same files f and w will re-initiate the // commit operation. // // The WAL is ready for reuse if Commit returns nil. func (w *WAL) Commit() error { for p, wp := range w.m { if p < w.size && wp < 0 { binary.BigEndian.PutUint64(w.b8[:], uint64(bit63|p)) if _, err := w.W.WriteAt(w.b8[:], wp&^bit63); err != nil { return err } } } binary.BigEndian.PutUint64(w.nfo[unsafe.Offsetof(nfo{}.Pages):], uint64(len(w.m))) binary.BigEndian.PutUint64(w.nfo[unsafe.Offsetof(nfo{}.Size):], uint64(w.size)) h := w.skip + int64(len(w.m))*int64(w.pageSize+szInt64) if _, err := w.W.WriteAt(w.nfo[:], h); err != nil { return fmt.Errorf("%T.Commit: write WAL metadata: %v", w, err) } if w.DoSync { if err := w.Sync(); err != nil { return err } } if crash { return nil } if err := w.F.Truncate(w.size0); err != nil { return fmt.Errorf("%T.commit: truncate: %v", w, err) } return w.commit(h) } func (w *WAL) commit(h int64) error { bufSz := w.pageSize + szInt64 p := buffer.Get(bufSz) defer buffer.Put(p) buf := *p for o := w.skip; o < h; o += int64(bufSz) { if _, err := w.W.ReadAt(buf, o); err != nil { return fmt.Errorf("%T.commit: read WAL: %v", w, err) } p := int64(binary.BigEndian.Uint64(buf)) q := p &^ bit63 if q >= w.size { continue } rq := w.pageSize avail := w.size - q if int64(rq) > avail { rq = int(avail) } if p < 0 { z := buf[szInt64 : szInt64+rq] for i := range z { z[i] = 0 } } if _, err := w.F.WriteAt(buf[szInt64:szInt64+rq], q); err != nil { return fmt.Errorf("%T.commit: write: %v", w, err) } } if err := w.F.Truncate(w.size); err != nil { return fmt.Errorf("%T.commit: truncate: %v", w, err) } if w.DoSync { if err := w.F.Sync(); err != nil { return err } } if err := w.W.Truncate(w.skip); err != nil { return fmt.Errorf("%T.commit: truncate WAL: %v", w, err) } switch { case len(w.m) <= 100: for k := range w.m { delete(w.m, k) } default: w.m = map[int64]int64{} } w.size0 = w.size w.rollbakcSize = w.size return nil } // Rollback empties the WAL journal without transferring it to F. func (w *WAL) Rollback() error { if err := w.W.Truncate(w.skip); err != nil { return err } for k := range w.m { delete(w.m, k) } w.size0 = w.rollbakcSize w.size = w.rollbakcSize return nil }