mirror of
https://github.com/taigrr/bitcask
synced 2025-01-18 04:03:17 -08:00
new merge approach (#191)
* new merge approach * code refactor * comment added * isMerging flag added to allow 1 merge operation at a time * get api modified. merge updated (no recursive read locks) Co-authored-by: yash <yash.chandra@grabpay.com> Co-authored-by: James Mills <prologic@shortcircuit.net.au>
This commit is contained in:
parent
80c06a3572
commit
e1cdffd8f1
116
bitcask.go
116
bitcask.go
@ -44,6 +44,10 @@ var (
|
||||
// ErrDatabaseLocked is the error returned if the database is locked
|
||||
// (typically opened by another process)
|
||||
ErrDatabaseLocked = errors.New("error: database locked")
|
||||
|
||||
// ErrMergeInProgress is the error returned if merge is called when already a merge
|
||||
// is in progress
|
||||
ErrMergeInProgress = errors.New("error: merge already in progress")
|
||||
)
|
||||
|
||||
// Bitcask is a struct that represents a on-disk LSM and WAL data structure
|
||||
@ -62,6 +66,7 @@ type Bitcask struct {
|
||||
trie art.Tree
|
||||
indexer index.Indexer
|
||||
metadata *metadata.MetaData
|
||||
isMerging bool
|
||||
}
|
||||
|
||||
// Stats is a struct returned by Stats() on an open Bitcask instance
|
||||
@ -123,15 +128,21 @@ func (b *Bitcask) Sync() error {
|
||||
return b.curr.Sync()
|
||||
}
|
||||
|
||||
// Get retrieves the value of the given key. If the key is not found or an/I/O
|
||||
// error occurs a null byte slice is returned along with the error.
|
||||
// Get fetches value for given key
|
||||
func (b *Bitcask) Get(key []byte) ([]byte, error) {
|
||||
b.mu.RLock()
|
||||
defer b.mu.RUnlock()
|
||||
|
||||
return b.get(key)
|
||||
}
|
||||
|
||||
// get retrieves the value of the given key. If the key is not found or an/I/O
|
||||
// error occurs a null byte slice is returned along with the error.
|
||||
func (b *Bitcask) get(key []byte) ([]byte, error) {
|
||||
var df data.Datafile
|
||||
|
||||
b.mu.RLock()
|
||||
value, found := b.trie.Search(key)
|
||||
if !found {
|
||||
b.mu.RUnlock()
|
||||
return nil, ErrKeyNotFound
|
||||
}
|
||||
|
||||
@ -144,7 +155,6 @@ func (b *Bitcask) Get(key []byte) ([]byte, error) {
|
||||
}
|
||||
|
||||
e, err := df.ReadAt(item.Offset, item.Size)
|
||||
b.mu.RUnlock()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
@ -338,6 +348,33 @@ func (b *Bitcask) put(key, value []byte) (int64, int64, error) {
|
||||
return b.curr.Write(e)
|
||||
}
|
||||
|
||||
// closeCurrentFile closes current datafile and makes it read only.
|
||||
func (b *Bitcask) closeCurrentFile() error {
|
||||
err := b.curr.Close()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
id := b.curr.FileID()
|
||||
df, err := data.NewDatafile(b.path, id, true, b.config.MaxKeySize, b.config.MaxValueSize, b.config.FileFileModeBeforeUmask)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
b.datafiles[id] = df
|
||||
return nil
|
||||
}
|
||||
|
||||
// openNewWritableFile opens new datafile for writing data
|
||||
func (b *Bitcask) openNewWritableFile() error {
|
||||
id := b.curr.FileID() + 1
|
||||
curr, err := data.NewDatafile(b.path, id, false, b.config.MaxKeySize, b.config.MaxValueSize, b.config.FileFileModeBeforeUmask)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
b.curr = curr
|
||||
return nil
|
||||
}
|
||||
|
||||
func (b *Bitcask) Reopen() error {
|
||||
b.mu.Lock()
|
||||
defer b.mu.Unlock()
|
||||
@ -367,6 +404,34 @@ func (b *Bitcask) Reopen() error {
|
||||
// and deleted keys removes. Duplicate key/value pairs are also removed.
|
||||
// Call this function periodically to reclaim disk space.
|
||||
func (b *Bitcask) Merge() error {
|
||||
b.mu.Lock()
|
||||
if b.isMerging {
|
||||
b.mu.Unlock()
|
||||
return ErrMergeInProgress
|
||||
}
|
||||
b.isMerging = true
|
||||
b.mu.Unlock()
|
||||
defer func() {
|
||||
b.isMerging = false
|
||||
}()
|
||||
b.mu.RLock()
|
||||
err := b.closeCurrentFile()
|
||||
if err != nil {
|
||||
b.mu.RUnlock()
|
||||
return err
|
||||
}
|
||||
filesToMerge := make([]int, 0, len(b.datafiles))
|
||||
for k := range b.datafiles {
|
||||
filesToMerge = append(filesToMerge, k)
|
||||
}
|
||||
err = b.openNewWritableFile()
|
||||
if err != nil {
|
||||
b.mu.RUnlock()
|
||||
return err
|
||||
}
|
||||
b.mu.RUnlock()
|
||||
sort.Ints(filesToMerge)
|
||||
|
||||
// Temporary merged database path
|
||||
temp, err := ioutil.TempDir(b.path, "merge")
|
||||
if err != nil {
|
||||
@ -384,7 +449,12 @@ func (b *Bitcask) Merge() error {
|
||||
// Doing this automatically strips deleted keys and
|
||||
// old key/value pairs
|
||||
err = b.Fold(func(key []byte) error {
|
||||
value, err := b.Get(key)
|
||||
item, _ := b.trie.Search(key)
|
||||
// if key was updated after start of merge operation, nothing to do
|
||||
if item.(internal.Item).FileID > filesToMerge[len(filesToMerge)-1] {
|
||||
return nil
|
||||
}
|
||||
value, err := b.get(key)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
@ -398,29 +468,33 @@ func (b *Bitcask) Merge() error {
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
err = mdb.Close()
|
||||
if err != nil {
|
||||
if err = mdb.Close(); err != nil {
|
||||
return err
|
||||
}
|
||||
if err = b.Close(); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// Close the database
|
||||
err = b.Close()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// Remove all data files
|
||||
// Remove data files
|
||||
files, err := ioutil.ReadDir(b.path)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
for _, file := range files {
|
||||
if !file.IsDir() {
|
||||
err := os.RemoveAll(path.Join([]string{b.path, file.Name()}...))
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if file.IsDir() {
|
||||
continue
|
||||
}
|
||||
ids, err := internal.ParseIds([]string{file.Name()})
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
// if datafile was created after start of merge, skip
|
||||
if len(ids) > 0 && ids[0] > filesToMerge[len(filesToMerge)-1] {
|
||||
continue
|
||||
}
|
||||
err = os.RemoveAll(path.Join(b.path, file.Name()))
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -1034,7 +1034,7 @@ func TestMerge(t *testing.T) {
|
||||
|
||||
s3, err := db.Stats()
|
||||
assert.NoError(err)
|
||||
assert.Equal(1, s3.Datafiles)
|
||||
assert.Equal(2, s3.Datafiles)
|
||||
assert.Equal(1, s3.Keys)
|
||||
assert.True(s3.Size > s1.Size)
|
||||
assert.True(s3.Size < s2.Size)
|
||||
@ -1364,18 +1364,19 @@ func TestMergeErrors(t *testing.T) {
|
||||
assert.NoError(err)
|
||||
defer os.RemoveAll(testdir)
|
||||
|
||||
db, err := Open(testdir)
|
||||
db, err := Open(testdir, WithMaxDatafileSize(22))
|
||||
assert.NoError(err)
|
||||
|
||||
assert.NoError(db.Put([]byte("foo"), []byte("bar")))
|
||||
assert.NoError(db.Put([]byte("bar"), []byte("baz")))
|
||||
|
||||
mockDatafile := new(mocks.Datafile)
|
||||
mockDatafile.On("FileID").Return(0)
|
||||
mockDatafile.On("Close").Return(nil)
|
||||
mockDatafile.On("ReadAt", int64(0), int64(22)).Return(
|
||||
internal.Entry{},
|
||||
ErrMockError,
|
||||
)
|
||||
db.curr = mockDatafile
|
||||
db.datafiles[0] = mockDatafile
|
||||
|
||||
err = db.Merge()
|
||||
assert.Error(err)
|
||||
|
Loading…
x
Reference in New Issue
Block a user