Compare commits

..

10 Commits

Author SHA1 Message Date
James Mills
52b6c74a21 Fixed compile error in CLI 2019-03-14 21:33:40 +10:00
James Mills
d24a01797a Added WithMaxKeySize() and WithMaxValueSize() options 2019-03-14 21:31:23 +10:00
James Mills
bc8f6c6718 Change locking error message 2019-03-14 21:31:01 +10:00
James Mills
b6c212d60c Refactored option handling 2019-03-14 21:24:31 +10:00
James Mills
3f1b90eb23 Update README.md 2019-03-14 18:18:57 +10:00
James Mills
71a42800fe Improved benchmark test suite for various key/value sizes 2019-03-14 18:17:20 +10:00
James Mills
3b9627aeb8 Fix concurrent read bug 2019-03-14 17:58:06 +10:00
James Mills
e0c4c4fdae Fix concurrent write bug with multiple goroutines writing to the to the active datafile 2019-03-14 17:58:06 +10:00
James Mills
fb50eb2f82 Update README.md 2019-03-14 15:36:37 +10:00
James Mills
fb2335e3c1 Fixed tests 2019-03-14 07:46:59 +10:00
8 changed files with 337 additions and 84 deletions

View File

@@ -12,6 +12,7 @@ A Bitcask (LSM+WAL) Key/Value Store written in Go.
* Embeddable
* Builtin CLI
* Builtin Redis-compatible server
* Predictable read/write performance
* Low latecny
* High throughput (See: [Performance](README.md#Performance)
@@ -96,12 +97,33 @@ Benchmarks run on a 11" Macbook with a 1.4Ghz Intel Core i7:
```
$ make bench
...
BenchmarkGet-4 300000 5065 ns/op 144 B/op 4 allocs/op
BenchmarkPut-4 100000 14640 ns/op 699 B/op 7 allocs/op
BenchmarkGet/128B-4 200000 5780 ns/op 400 B/op 5 allocs/op
BenchmarkGet/256B-4 200000 6138 ns/op 656 B/op 5 allocs/op
BenchmarkGet/512B-4 200000 5967 ns/op 1200 B/op 5 allocs/op
BenchmarkGet/1K-4 200000 6290 ns/op 2288 B/op 5 allocs/op
BenchmarkGet/2K-4 200000 6293 ns/op 4464 B/op 5 allocs/op
BenchmarkGet/4K-4 200000 7673 ns/op 9072 B/op 5 allocs/op
BenchmarkGet/8K-4 200000 10373 ns/op 17776 B/op 5 allocs/op
BenchmarkGet/16K-4 100000 14227 ns/op 34928 B/op 5 allocs/op
BenchmarkGet/32K-4 100000 25953 ns/op 73840 B/op 5 allocs/op
BenchmarkPut/128B-4 100000 17353 ns/op 680 B/op 5 allocs/op
BenchmarkPut/256B-4 100000 18620 ns/op 808 B/op 5 allocs/op
BenchmarkPut/512B-4 100000 19068 ns/op 1096 B/op 5 allocs/op
BenchmarkPut/1K-4 100000 23738 ns/op 1673 B/op 5 allocs/op
BenchmarkPut/2K-4 50000 25118 ns/op 2826 B/op 5 allocs/op
BenchmarkPut/4K-4 50000 44605 ns/op 5389 B/op 5 allocs/op
BenchmarkPut/8K-4 30000 55237 ns/op 10001 B/op 5 allocs/op
BenchmarkPut/16K-4 20000 78966 ns/op 18972 B/op 5 allocs/op
BenchmarkPut/32K-4 10000 116253 ns/op 41520 B/op 5 allocs/op
```
For 128B values:
* ~180,000 reads/sec
* ~60,000 writes/sec
* ~60,000 writes/sec
The full benchmark above shows linear performance as you increase key/value sizes.
## License

View File

@@ -2,31 +2,27 @@ package bitcask
import (
"errors"
"fmt"
"io"
"io/ioutil"
"os"
"path/filepath"
"sort"
"strconv"
"strings"
"time"
"github.com/gofrs/flock"
)
const (
DefaultMaxDatafileSize = 1 << 20 // 1MB
)
var (
ErrKeyNotFound = errors.New("error: key not found")
ErrCannotAcquireLock = errors.New("error: cannot acquire lock")
ErrKeyNotFound = errors.New("error: key not found")
ErrKeyTooLarge = errors.New("error: key too large")
ErrValueTooLarge = errors.New("error: value too large")
ErrDatabaseLocked = errors.New("error: database locked")
)
type Bitcask struct {
*flock.Flock
opts Options
path string
curr *Datafile
keydir *Keydir
@@ -73,6 +69,13 @@ func (b *Bitcask) Get(key string) ([]byte, error) {
}
func (b *Bitcask) Put(key string, value []byte) error {
if len(key) > b.opts.MaxKeySize {
return ErrKeyTooLarge
}
if len(value) > b.opts.MaxValueSize {
return ErrValueTooLarge
}
index, err := b.put(key, value)
if err != nil {
return err
@@ -135,39 +138,6 @@ func (b *Bitcask) setMaxDatafileSize(size int64) error {
return nil
}
func WithMaxDatafileSize(size int64) func(*Bitcask) error {
return func(b *Bitcask) error {
return b.setMaxDatafileSize(size)
}
}
func getDatafiles(path string) ([]string, error) {
fns, err := filepath.Glob(fmt.Sprintf("%s/*.data", path))
if err != nil {
return nil, err
}
sort.Strings(fns)
return fns, nil
}
func parseIds(fns []string) ([]int, error) {
var ids []int
for _, fn := range fns {
fn = filepath.Base(fn)
ext := filepath.Ext(fn)
if ext != ".data" {
continue
}
id, err := strconv.ParseInt(strings.TrimSuffix(fn, ext), 10, 32)
if err != nil {
return nil, err
}
ids = append(ids, int(id))
}
sort.Ints(ids)
return ids, nil
}
func Merge(path string, force bool) error {
fns, err := getDatafiles(path)
if err != nil {
@@ -353,6 +323,7 @@ func Open(path string, options ...func(*Bitcask) error) (*Bitcask, error) {
bitcask := &Bitcask{
Flock: flock.New(filepath.Join(path, "lock")),
opts: NewDefaultOptions(),
path: path,
curr: curr,
keydir: keydir,
@@ -374,7 +345,7 @@ func Open(path string, options ...func(*Bitcask) error) (*Bitcask, error) {
}
if !locked {
return nil, ErrCannotAcquireLock
return nil, ErrDatabaseLocked
}
return bitcask, nil

View File

@@ -4,6 +4,7 @@ import (
"fmt"
"io/ioutil"
"strings"
"sync"
"testing"
"github.com/stretchr/testify/assert"
@@ -127,6 +128,54 @@ func TestDeletedKeys(t *testing.T) {
})
}
func TestMaxKeySize(t *testing.T) {
assert := assert.New(t)
testdir, err := ioutil.TempDir("", "bitcask")
assert.NoError(err)
var db *Bitcask
size := 16
t.Run("Open", func(t *testing.T) {
db, err = Open(testdir, WithMaxKeySize(size))
assert.NoError(err)
})
t.Run("Put", func(t *testing.T) {
key := strings.Repeat(" ", size+1)
value := []byte("foobar")
err = db.Put(key, value)
assert.Error(err)
assert.Equal("error: key too large", err.Error())
})
}
func TestMaxValueSize(t *testing.T) {
assert := assert.New(t)
testdir, err := ioutil.TempDir("", "bitcask")
assert.NoError(err)
var db *Bitcask
size := 16
t.Run("Open", func(t *testing.T) {
db, err = Open(testdir, WithMaxValueSize(size))
assert.NoError(err)
})
t.Run("Put", func(t *testing.T) {
key := "foo"
value := []byte(strings.Repeat(" ", size+1))
err = db.Put(key, value)
assert.Error(err)
assert.Equal("error: value too large", err.Error())
})
}
func TestMerge(t *testing.T) {
assert := assert.New(t)
@@ -140,7 +189,7 @@ func TestMerge(t *testing.T) {
)
t.Run("Open", func(t *testing.T) {
db, err = Open(testdir, MaxDatafileSize(1024))
db, err = Open(testdir, WithMaxDatafileSize(1024))
assert.NoError(err)
})
@@ -198,6 +247,86 @@ func TestMerge(t *testing.T) {
})
}
func TestConcurrent(t *testing.T) {
var (
db *Bitcask
err error
)
assert := assert.New(t)
testdir, err := ioutil.TempDir("", "bitcask")
assert.NoError(err)
t.Run("Setup", func(t *testing.T) {
t.Run("Open", func(t *testing.T) {
db, err = Open(testdir)
assert.NoError(err)
})
t.Run("Put", func(t *testing.T) {
err = db.Put("foo", []byte("bar"))
assert.NoError(err)
})
})
t.Run("Concurrent", func(t *testing.T) {
t.Run("Put", func(t *testing.T) {
f := func(wg *sync.WaitGroup, x int) {
defer func() {
wg.Done()
}()
for i := 0; i <= 100; i++ {
if i%x == 0 {
key := fmt.Sprintf("k%d", i)
value := []byte(fmt.Sprintf("v%d", i))
err := db.Put(key, value)
assert.NoError(err)
}
}
}
wg := &sync.WaitGroup{}
go f(wg, 2)
wg.Add(1)
go f(wg, 3)
wg.Add(1)
wg.Wait()
})
t.Run("Get", func(t *testing.T) {
f := func(wg *sync.WaitGroup, N int) {
defer func() {
wg.Done()
}()
for i := 0; i <= N; i++ {
value, err := db.Get("foo")
assert.NoError(err)
assert.Equal([]byte("bar"), value)
}
}
wg := &sync.WaitGroup{}
go f(wg, 100)
wg.Add(1)
go f(wg, 100)
wg.Add(1)
wg.Wait()
})
t.Run("Close", func(t *testing.T) {
err = db.Close()
assert.NoError(err)
})
})
}
func TestLocking(t *testing.T) {
assert := assert.New(t)
@@ -210,7 +339,12 @@ func TestLocking(t *testing.T) {
_, err = Open(testdir)
assert.Error(err)
assert.Equal("error: cannot acquire lock", err.Error())
assert.Equal("error: database locked", err.Error())
}
type benchmarkTestCase struct {
name string
size int
}
func BenchmarkGet(b *testing.B) {
@@ -225,20 +359,39 @@ func BenchmarkGet(b *testing.B) {
}
defer db.Close()
err = db.Put("foo", []byte("bar"))
if err != nil {
b.Fatal(err)
tests := []benchmarkTestCase{
{"128B", 128},
{"256B", 256},
{"512B", 512},
{"1K", 1024},
{"2K", 2048},
{"4K", 4096},
{"8K", 8192},
{"16K", 16384},
{"32K", 32768},
}
b.ResetTimer()
for i := 0; i < b.N; i++ {
val, err := db.Get("foo")
if err != nil {
b.Fatal(err)
}
if string(val) != "bar" {
b.Errorf("expected val=bar got=%s", val)
}
for _, tt := range tests {
b.Run(tt.name, func(b *testing.B) {
key := "foo"
value := []byte(strings.Repeat(" ", tt.size))
err = db.Put(key, value)
if err != nil {
b.Fatal(err)
}
b.ResetTimer()
for i := 0; i < b.N; i++ {
val, err := db.Get(key)
if err != nil {
b.Fatal(err)
}
if string(val) != string(value) {
b.Errorf("unexpected value")
}
}
})
}
}
@@ -254,11 +407,29 @@ func BenchmarkPut(b *testing.B) {
}
defer db.Close()
b.ResetTimer()
for i := 0; i < b.N; i++ {
err := db.Put(fmt.Sprintf("key%d", i), []byte("bar"))
if err != nil {
b.Fatal(err)
}
tests := []benchmarkTestCase{
{"128B", 128},
{"256B", 256},
{"512B", 512},
{"1K", 1024},
{"2K", 2048},
{"4K", 4096},
{"8K", 8192},
{"16K", 16384},
{"32K", 32768},
}
for _, tt := range tests {
b.Run(tt.name, func(b *testing.B) {
key := "foo"
value := []byte(strings.Repeat(" ", tt.size))
b.ResetTimer()
for i := 0; i < b.N; i++ {
err := db.Put(key, value)
if err != nil {
b.Fatal(err)
}
}
})
}
}

View File

@@ -16,7 +16,7 @@ var (
bind string
debug bool
version bool
maxDatafileSize int64
maxDatafileSize int
)
func init() {
@@ -30,7 +30,7 @@ func init() {
flag.StringVarP(&bind, "bind", "b", ":6379", "interface and port to bind to")
flag.Int64Var(&maxDatafileSize, "max-datafile-size", 1<<20, "maximum datafile size in bytes")
flag.IntVar(&maxDatafileSize, "max-datafile-size", 1<<20, "maximum datafile size in bytes")
}
func main() {

View File

@@ -5,6 +5,7 @@ import (
"fmt"
"os"
"path/filepath"
"sync"
"time"
pb "github.com/prologic/bitcask/proto"
@@ -20,6 +21,8 @@ var (
)
type Datafile struct {
sync.RWMutex
id int
r *os.File
w *os.File
@@ -102,17 +105,23 @@ func (df *Datafile) Size() (int64, error) {
return stat.Size(), nil
}
func (df *Datafile) Read() (pb.Entry, error) {
var e pb.Entry
func (df *Datafile) Read() (e pb.Entry, err error) {
df.Lock()
defer df.Unlock()
return e, df.dec.Decode(&e)
}
func (df *Datafile) ReadAt(index int64) (e pb.Entry, err error) {
df.Lock()
defer df.Unlock()
_, err = df.r.Seek(index, os.SEEK_SET)
if err != nil {
return
}
return df.Read()
return e, df.dec.Decode(&e)
}
func (df *Datafile) Write(e pb.Entry) (int64, error) {
@@ -130,7 +139,10 @@ func (df *Datafile) Write(e pb.Entry) (int64, error) {
e.Index = index
e.Timestamp = time.Now().Unix()
df.Lock()
err = df.enc.Encode(&e)
df.Unlock()
if err != nil {
return -1, err
}

42
options.go Normal file
View File

@@ -0,0 +1,42 @@
package bitcask
const (
DefaultMaxDatafileSize = 1 << 20 // 1MB
DefaultMaxKeySize = 64 // 64 bytes
DefaultMaxValueSize = 1 << 16 // 65KB
)
type Options struct {
MaxDatafileSize int
MaxKeySize int
MaxValueSize int
}
func NewDefaultOptions() Options {
return Options{
MaxDatafileSize: DefaultMaxDatafileSize,
MaxKeySize: DefaultMaxKeySize,
MaxValueSize: DefaultMaxValueSize,
}
}
func WithMaxDatafileSize(size int) func(*Bitcask) error {
return func(b *Bitcask) error {
b.opts.MaxDatafileSize = size
return nil
}
}
func WithMaxKeySize(size int) func(*Bitcask) error {
return func(b *Bitcask) error {
b.opts.MaxKeySize = size
return nil
}
}
func WithMaxValueSize(size int) func(*Bitcask) error {
return func(b *Bitcask) error {
b.opts.MaxValueSize = size
return nil
}
}

View File

@@ -16,26 +16,27 @@ const (
// NewEncoder creates a streaming protobuf encoder.
func NewEncoder(w io.Writer) *Encoder {
return &Encoder{w: w, prefixBuf: make([]byte, prefixSize)}
return &Encoder{w}
}
// Encoder wraps an underlying io.Writer and allows you to stream
// proto encodings on it.
type Encoder struct {
w io.Writer
prefixBuf []byte
w io.Writer
}
// Encode takes any proto.Message and streams it to the underlying writer.
// Messages are framed with a length prefix.
func (e *Encoder) Encode(msg proto.Message) error {
prefixBuf := make([]byte, prefixSize)
buf, err := proto.Marshal(msg)
if err != nil {
return err
}
binary.BigEndian.PutUint64(e.prefixBuf, uint64(len(buf)))
binary.BigEndian.PutUint64(prefixBuf, uint64(len(buf)))
if _, err := e.w.Write(e.prefixBuf); err != nil {
if _, err := e.w.Write(prefixBuf); err != nil {
return errors.Wrap(err, "failed writing length prefix")
}
@@ -45,28 +46,26 @@ func (e *Encoder) Encode(msg proto.Message) error {
// NewDecoder creates a streaming protobuf decoder.
func NewDecoder(r io.Reader) *Decoder {
return &Decoder{
r: r,
prefixBuf: make([]byte, prefixSize),
}
return &Decoder{r: r}
}
// Decoder wraps an underlying io.Reader and allows you to stream
// proto decodings on it.
type Decoder struct {
r io.Reader
prefixBuf []byte
r io.Reader
}
// Decode takes a proto.Message and unmarshals the next payload in the
// underlying io.Reader. It returns an EOF when it's done.
func (d *Decoder) Decode(v proto.Message) error {
_, err := io.ReadFull(d.r, d.prefixBuf)
prefixBuf := make([]byte, prefixSize)
_, err := io.ReadFull(d.r, prefixBuf)
if err != nil {
return err
}
n := binary.BigEndian.Uint64(d.prefixBuf)
n := binary.BigEndian.Uint64(prefixBuf)
buf := make([]byte, n)

36
utils.go Normal file
View File

@@ -0,0 +1,36 @@
package bitcask
import (
"fmt"
"path/filepath"
"sort"
"strconv"
"strings"
)
func getDatafiles(path string) ([]string, error) {
fns, err := filepath.Glob(fmt.Sprintf("%s/*.data", path))
if err != nil {
return nil, err
}
sort.Strings(fns)
return fns, nil
}
func parseIds(fns []string) ([]int, error) {
var ids []int
for _, fn := range fns {
fn = filepath.Base(fn)
ext := filepath.Ext(fn)
if ext != ".data" {
continue
}
id, err := strconv.ParseInt(strings.TrimSuffix(fn, ext), 10, 32)
if err != nil {
return nil, err
}
ids = append(ids, int(id))
}
sort.Ints(ids)
return ids, nil
}