Compare commits

...

3 Commits

9 changed files with 255 additions and 73 deletions

View File

@@ -97,31 +97,33 @@ Benchmarks run on a 11" Macbook with a 1.4Ghz Intel Core i7:
``` ```
$ make bench $ make bench
... ...
BenchmarkGet/128B-4 200000 5780 ns/op 400 B/op 5 allocs/op BenchmarkGet/128B-4 300000 5178 ns/op 400 B/op 5 allocs/op
BenchmarkGet/256B-4 200000 6138 ns/op 656 B/op 5 allocs/op BenchmarkGet/256B-4 300000 5273 ns/op 656 B/op 5 allocs/op
BenchmarkGet/512B-4 200000 5967 ns/op 1200 B/op 5 allocs/op BenchmarkGet/512B-4 200000 5368 ns/op 1200 B/op 5 allocs/op
BenchmarkGet/1K-4 200000 6290 ns/op 2288 B/op 5 allocs/op BenchmarkGet/1K-4 200000 5800 ns/op 2288 B/op 5 allocs/op
BenchmarkGet/2K-4 200000 6293 ns/op 4464 B/op 5 allocs/op BenchmarkGet/2K-4 200000 6766 ns/op 4464 B/op 5 allocs/op
BenchmarkGet/4K-4 200000 7673 ns/op 9072 B/op 5 allocs/op BenchmarkGet/4K-4 200000 7857 ns/op 9072 B/op 5 allocs/op
BenchmarkGet/8K-4 200000 10373 ns/op 17776 B/op 5 allocs/op BenchmarkGet/8K-4 200000 9538 ns/op 17776 B/op 5 allocs/op
BenchmarkGet/16K-4 100000 14227 ns/op 34928 B/op 5 allocs/op BenchmarkGet/16K-4 100000 13188 ns/op 34928 B/op 5 allocs/op
BenchmarkGet/32K-4 100000 25953 ns/op 73840 B/op 5 allocs/op BenchmarkGet/32K-4 100000 21620 ns/op 73840 B/op 5 allocs/op
BenchmarkPut/128B-4 100000 17353 ns/op 680 B/op 5 allocs/op BenchmarkPut/128B-4 200000 7875 ns/op 409 B/op 6 allocs/op
BenchmarkPut/256B-4 100000 18620 ns/op 808 B/op 5 allocs/op BenchmarkPut/256B-4 200000 8712 ns/op 538 B/op 6 allocs/op
BenchmarkPut/512B-4 100000 19068 ns/op 1096 B/op 5 allocs/op BenchmarkPut/512B-4 200000 9832 ns/op 829 B/op 6 allocs/op
BenchmarkPut/1K-4 100000 23738 ns/op 1673 B/op 5 allocs/op BenchmarkPut/1K-4 100000 13105 ns/op 1410 B/op 6 allocs/op
BenchmarkPut/2K-4 50000 25118 ns/op 2826 B/op 5 allocs/op BenchmarkPut/2K-4 100000 18601 ns/op 2572 B/op 6 allocs/op
BenchmarkPut/4K-4 50000 44605 ns/op 5389 B/op 5 allocs/op BenchmarkPut/4K-4 50000 36631 ns/op 5151 B/op 6 allocs/op
BenchmarkPut/8K-4 30000 55237 ns/op 10001 B/op 5 allocs/op BenchmarkPut/8K-4 30000 56128 ns/op 9798 B/op 6 allocs/op
BenchmarkPut/16K-4 20000 78966 ns/op 18972 B/op 5 allocs/op BenchmarkPut/16K-4 20000 83209 ns/op 18834 B/op 6 allocs/op
BenchmarkPut/32K-4 10000 116253 ns/op 41520 B/op 5 allocs/op BenchmarkPut/32K-4 10000 135899 ns/op 41517 B/op 6 allocs/op
BenchmarkScan-4 1000000 1851 ns/op 493 B/op 25 allocs/op
``` ```
For 128B values: For 128B values:
* ~180,000 reads/sec * ~200,000 reads/sec
* ~60,000 writes/sec * ~130,000 writes/sec
The full benchmark above shows linear performance as you increase key/value sizes. The full benchmark above shows linear performance as you increase key/value sizes.

View File

@@ -9,6 +9,7 @@ import (
"strings" "strings"
"time" "time"
"github.com/derekparker/trie"
"github.com/gofrs/flock" "github.com/gofrs/flock"
) )
@@ -27,6 +28,7 @@ type Bitcask struct {
curr *Datafile curr *Datafile
keydir *Keydir keydir *Keydir
datafiles []*Datafile datafiles []*Datafile
trie *trie.Trie
maxDatafileSize int64 maxDatafileSize int64
} }
@@ -82,7 +84,8 @@ func (b *Bitcask) Put(key string, value []byte) error {
return err return err
} }
b.keydir.Add(key, b.curr.id, index, time.Now().Unix()) item := b.keydir.Add(key, b.curr.id, index, time.Now().Unix())
b.trie.Add(key, item)
return nil return nil
} }
@@ -94,10 +97,21 @@ func (b *Bitcask) Delete(key string) error {
} }
b.keydir.Delete(key) b.keydir.Delete(key)
b.trie.Remove(key)
return nil return nil
} }
func (b *Bitcask) Scan(prefix string, f func(key string) error) error {
keys := b.trie.PrefixSearch(prefix)
for _, key := range keys {
if err := f(key); err != nil {
return err
}
}
return nil
}
func (b *Bitcask) Fold(f func(key string) error) error { func (b *Bitcask) Fold(f func(key string) error) error {
for key := range b.keydir.Keys() { for key := range b.keydir.Keys() {
if err := f(key); err != nil { if err := f(key); err != nil {
@@ -265,9 +279,11 @@ func Open(path string, options ...func(*Bitcask) error) (*Bitcask, error) {
return nil, err return nil, err
} }
keydir := NewKeydir()
var datafiles []*Datafile var datafiles []*Datafile
keydir := NewKeydir()
trie := trie.New()
for i, fn := range fns { for i, fn := range fns {
df, err := NewDatafile(path, ids[i], true) df, err := NewDatafile(path, ids[i], true)
if err != nil { if err != nil {
@@ -289,7 +305,8 @@ func Open(path string, options ...func(*Bitcask) error) (*Bitcask, error) {
for key := range hint.Keys() { for key := range hint.Keys() {
item, _ := hint.Get(key) item, _ := hint.Get(key)
keydir.Add(key, item.FileID, item.Index, item.Timestamp) _ = keydir.Add(key, item.FileID, item.Index, item.Timestamp)
trie.Add(key, item)
} }
} else { } else {
for { for {
@@ -307,7 +324,8 @@ func Open(path string, options ...func(*Bitcask) error) (*Bitcask, error) {
continue continue
} }
keydir.Add(e.Key, ids[i], e.Index, e.Timestamp) item := keydir.Add(e.Key, ids[i], e.Index, e.Timestamp)
trie.Add(e.Key, item)
} }
} }
} }
@@ -329,6 +347,7 @@ func Open(path string, options ...func(*Bitcask) error) (*Bitcask, error) {
curr: curr, curr: curr,
keydir: keydir, keydir: keydir,
datafiles: datafiles, datafiles: datafiles,
trie: trie,
maxDatafileSize: DefaultMaxDatafileSize, maxDatafileSize: DefaultMaxDatafileSize,
} }

View File

@@ -3,6 +3,8 @@ package bitcask
import ( import (
"fmt" "fmt"
"io/ioutil" "io/ioutil"
"reflect"
"sort"
"strings" "strings"
"sync" "sync"
"testing" "testing"
@@ -327,6 +329,58 @@ func TestConcurrent(t *testing.T) {
}) })
} }
func TestScan(t *testing.T) {
assert := assert.New(t)
testdir, err := ioutil.TempDir("", "bitcask")
assert.NoError(err)
var db *Bitcask
t.Run("Setup", func(t *testing.T) {
t.Run("Open", func(t *testing.T) {
db, err = Open(testdir)
assert.NoError(err)
})
t.Run("Put", func(t *testing.T) {
var items = map[string][]byte{
"1": []byte("1"),
"2": []byte("2"),
"3": []byte("3"),
"food": []byte("pizza"),
"foo": []byte("foo"),
"fooz": []byte("fooz ball"),
"hello": []byte("world"),
}
for k, v := range items {
err = db.Put(k, v)
assert.NoError(err)
}
})
})
t.Run("Scan", func(t *testing.T) {
var (
vals []string
expected = []string{
"foo",
"fooz ball",
"pizza",
}
)
err = db.Scan("fo", func(key string) error {
val, err := db.Get(key)
assert.NoError(err)
vals = append(vals, string(val))
return nil
})
sort.Strings(vals)
assert.Equal(expected, vals)
})
}
func TestLocking(t *testing.T) { func TestLocking(t *testing.T) {
assert := assert.New(t) assert := assert.New(t)
@@ -433,3 +487,47 @@ func BenchmarkPut(b *testing.B) {
}) })
} }
} }
func BenchmarkScan(b *testing.B) {
testdir, err := ioutil.TempDir("", "bitcask")
if err != nil {
b.Fatal(err)
}
db, err := Open(testdir)
if err != nil {
b.Fatal(err)
}
defer db.Close()
var items = map[string][]byte{
"1": []byte("1"),
"2": []byte("2"),
"3": []byte("3"),
"food": []byte("pizza"),
"foo": []byte("foo"),
"fooz": []byte("fooz ball"),
"hello": []byte("world"),
}
for k, v := range items {
err := db.Put(k, v)
if err != nil {
b.Fatal(err)
}
}
var expected = []string{"foo", "food", "fooz"}
b.ResetTimer()
for i := 0; i < b.N; i++ {
var keys []string
err = db.Scan("fo", func(key string) error {
keys = append(keys, key)
return nil
})
sort.Strings(keys)
if !reflect.DeepEqual(expected, keys) {
b.Fatal(fmt.Errorf("expected keys=#%v got=%#v", expected, keys))
}
}
}

60
cmd/bitcask/scan.go Normal file
View File

@@ -0,0 +1,60 @@
package main
import (
"fmt"
"os"
log "github.com/sirupsen/logrus"
"github.com/spf13/cobra"
"github.com/spf13/viper"
"github.com/prologic/bitcask"
)
var scanCmd = &cobra.Command{
Use: "scan <prefix>",
Aliases: []string{"search", "find"},
Short: "Perform a prefis scan for keys",
Long: `This performa a prefix scan for keys starting with the given
prefix. This uses a Trie to search for matching keys and returns all matched
keys.`,
Args: cobra.ExactArgs(1),
Run: func(cmd *cobra.Command, args []string) {
path := viper.GetString("path")
prefix := args[0]
os.Exit(scan(path, prefix))
},
}
func init() {
RootCmd.AddCommand(scanCmd)
}
func scan(path, prefix string) int {
db, err := bitcask.Open(path)
if err != nil {
log.WithError(err).Error("error opening database")
return 1
}
defer db.Close()
err = db.Scan(prefix, func(key string) error {
value, err := db.Get(key)
if err != nil {
log.WithError(err).Error("error reading key")
return err
}
fmt.Printf("%s\n", string(value))
log.WithField("key", key).WithField("value", value).Debug("key/value")
return nil
})
if err != nil {
log.WithError(err).Error("error scanning keys")
return 1
}
return 0
}

View File

@@ -1,13 +1,14 @@
package bitcask package bitcask
import ( import (
"errors"
"fmt" "fmt"
"os" "os"
"path/filepath" "path/filepath"
"sync" "sync"
"time" "time"
"github.com/pkg/errors"
pb "github.com/prologic/bitcask/proto" pb "github.com/prologic/bitcask/proto"
"github.com/prologic/bitcask/streampb" "github.com/prologic/bitcask/streampb"
) )
@@ -23,11 +24,12 @@ var (
type Datafile struct { type Datafile struct {
sync.RWMutex sync.RWMutex
id int id int
r *os.File r *os.File
w *os.File w *os.File
dec *streampb.Decoder offset int64
enc *streampb.Encoder dec *streampb.Decoder
enc *streampb.Encoder
} }
func NewDatafile(path string, id int, readonly bool) (*Datafile, error) { func NewDatafile(path string, id int, readonly bool) (*Datafile, error) {
@@ -50,16 +52,23 @@ func NewDatafile(path string, id int, readonly bool) (*Datafile, error) {
if err != nil { if err != nil {
return nil, err return nil, err
} }
stat, err := r.Stat()
if err != nil {
return nil, errors.Wrap(err, "error calling Stat()")
}
offset := stat.Size()
dec := streampb.NewDecoder(r) dec := streampb.NewDecoder(r)
enc := streampb.NewEncoder(w) enc := streampb.NewEncoder(w)
return &Datafile{ return &Datafile{
id: id, id: id,
r: r, r: r,
w: w, w: w,
dec: dec, offset: offset,
enc: enc, dec: dec,
enc: enc,
}, nil }, nil
} }
@@ -87,22 +96,7 @@ func (df *Datafile) Sync() error {
} }
func (df *Datafile) Size() (int64, error) { func (df *Datafile) Size() (int64, error) {
var ( return df.offset, nil
stat os.FileInfo
err error
)
if df.w == nil {
stat, err = df.r.Stat()
} else {
stat, err = df.w.Stat()
}
if err != nil {
return -1, err
}
return stat.Size(), nil
} }
func (df *Datafile) Read() (e pb.Entry, err error) { func (df *Datafile) Read() (e pb.Entry, err error) {
@@ -129,23 +123,17 @@ func (df *Datafile) Write(e pb.Entry) (int64, error) {
return -1, ErrReadonly return -1, ErrReadonly
} }
stat, err := df.w.Stat() e.Index = df.offset
if err != nil {
return -1, err
}
index := stat.Size()
e.Index = index
e.Timestamp = time.Now().Unix() e.Timestamp = time.Now().Unix()
df.Lock() df.Lock()
err = df.enc.Encode(&e) n, err := df.enc.Encode(&e)
df.Unlock() df.Unlock()
if err != nil { if err != nil {
return -1, err return -1, err
} }
df.offset += n
return index, nil return e.Index, nil
} }

1
go.mod
View File

@@ -1,6 +1,7 @@
module github.com/prologic/bitcask module github.com/prologic/bitcask
require ( require (
github.com/derekparker/trie v0.0.0-20180212171413-e608c2733dc7
github.com/gofrs/flock v0.7.1 github.com/gofrs/flock v0.7.1
github.com/gogo/protobuf v1.2.1 github.com/gogo/protobuf v1.2.1
github.com/golang/protobuf v1.2.0 github.com/golang/protobuf v1.2.0

2
go.sum
View File

@@ -8,6 +8,8 @@ github.com/davecgh/go-spew v1.1.0 h1:ZDRjVQ15GmhC3fiQ8ni8+OwkZQO4DARzQgrnXU1Liz8
github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/derekparker/trie v0.0.0-20180212171413-e608c2733dc7 h1:Cab9yoTQh1TxObKfis1DzZ6vFLK5kbeenMjRES/UE3o=
github.com/derekparker/trie v0.0.0-20180212171413-e608c2733dc7/go.mod h1:D6ICZm05D9VN1n/8iOtBxLpXtoGp6HDFUJ1RNVieOSE=
github.com/fsnotify/fsnotify v1.4.7 h1:IXs+QLmnXW2CcXuY+8Mzv/fWEsPGWxqefPtCP5CnV9I= github.com/fsnotify/fsnotify v1.4.7 h1:IXs+QLmnXW2CcXuY+8Mzv/fWEsPGWxqefPtCP5CnV9I=
github.com/fsnotify/fsnotify v1.4.7/go.mod h1:jwhsz4b93w/PPRr/qN1Yymfu8t87LnFCMoQvtojpjFo= github.com/fsnotify/fsnotify v1.4.7/go.mod h1:jwhsz4b93w/PPRr/qN1Yymfu8t87LnFCMoQvtojpjFo=
github.com/gofrs/flock v0.7.1 h1:DP+LD/t0njgoPBvT5MJLeliUIVQR03hiKR6vezdwHlc= github.com/gofrs/flock v0.7.1 h1:DP+LD/t0njgoPBvT5MJLeliUIVQR03hiKR6vezdwHlc=

View File

@@ -25,15 +25,18 @@ func NewKeydir() *Keydir {
} }
} }
func (k *Keydir) Add(key string, fileid int, index, timestamp int64) { func (k *Keydir) Add(key string, fileid int, index, timestamp int64) Item {
k.Lock() item := Item{
defer k.Unlock()
k.kv[key] = Item{
FileID: fileid, FileID: fileid,
Index: index, Index: index,
Timestamp: timestamp, Timestamp: timestamp,
} }
k.Lock()
k.kv[key] = item
k.Unlock()
return item
} }
func (k *Keydir) Get(key string) (Item, bool) { func (k *Keydir) Get(key string) (Item, bool) {

View File

@@ -1,6 +1,7 @@
package streampb package streampb
import ( import (
"bufio"
"encoding/binary" "encoding/binary"
"io" "io"
@@ -16,32 +17,40 @@ const (
// NewEncoder creates a streaming protobuf encoder. // NewEncoder creates a streaming protobuf encoder.
func NewEncoder(w io.Writer) *Encoder { func NewEncoder(w io.Writer) *Encoder {
return &Encoder{w} return &Encoder{w: bufio.NewWriter(w)}
} }
// Encoder wraps an underlying io.Writer and allows you to stream // Encoder wraps an underlying io.Writer and allows you to stream
// proto encodings on it. // proto encodings on it.
type Encoder struct { type Encoder struct {
w io.Writer w *bufio.Writer
} }
// Encode takes any proto.Message and streams it to the underlying writer. // Encode takes any proto.Message and streams it to the underlying writer.
// Messages are framed with a length prefix. // Messages are framed with a length prefix.
func (e *Encoder) Encode(msg proto.Message) error { func (e *Encoder) Encode(msg proto.Message) (int64, error) {
prefixBuf := make([]byte, prefixSize) prefixBuf := make([]byte, prefixSize)
buf, err := proto.Marshal(msg) buf, err := proto.Marshal(msg)
if err != nil { if err != nil {
return err return 0, err
} }
binary.BigEndian.PutUint64(prefixBuf, uint64(len(buf))) binary.BigEndian.PutUint64(prefixBuf, uint64(len(buf)))
if _, err := e.w.Write(prefixBuf); err != nil { if _, err := e.w.Write(prefixBuf); err != nil {
return errors.Wrap(err, "failed writing length prefix") return 0, errors.Wrap(err, "failed writing length prefix")
} }
_, err = e.w.Write(buf) n, err := e.w.Write(buf)
return errors.Wrap(err, "failed writing marshaled data") if err != nil {
return 0, errors.Wrap(err, "failed writing marshaled data")
}
if err = e.w.Flush(); err != nil {
return 0, errors.Wrap(err, "failed flushing data")
}
return int64(n + prefixSize), nil
} }
// NewDecoder creates a streaming protobuf decoder. // NewDecoder creates a streaming protobuf decoder.