Compare commits

...

6 Commits

Author SHA1 Message Date
Ignacio Hagopian
a407905ae2 Improve Get/Put performance with optional mempooling (#36)
* avoid unnecessary use of encoder/decoder to decrease memory allocations

* add an optional configurable mempool to avoid extra allocs

* add doc.go with examples
2019-08-05 07:23:07 +10:00
James Mills
6ceeccfd64 Update README.md 2019-08-03 19:49:15 +10:00
James Mills
35dc7e70d2 Update README.md 2019-08-03 19:47:23 +10:00
James Mills
6cc1154611 Update README.md 2019-08-03 19:46:18 +10:00
Ignacio Hagopian
8aa66c66da keydir: avoid defers (#34) 2019-08-01 19:18:05 +10:00
Ignacio Hagopian
e3242c8426 README: typos (#35) 2019-08-01 13:48:36 +10:00
10 changed files with 148 additions and 54 deletions

View File

@@ -4,14 +4,9 @@
[![CodeCov](https://codecov.io/gh/prologic/bitcask/branch/master/graph/badge.svg)](https://codecov.io/gh/prologic/bitcask)
[![Go Report Card](https://goreportcard.com/badge/prologic/bitcask)](https://goreportcard.com/report/prologic/bitcask)
[![GoDoc](https://godoc.org/github.com/prologic/bitcask?status.svg)](https://godoc.org/github.com/prologic/bitcask)
[![Sourcegraph](https://sourcegraph.com/github.com/prologic/bitcask/-/badge.svg)](https://sourcegraph.com/github.com/prologic/bitcask?badge)
[![Github all releases](https://img.shields.io/github/downloads/prologic/bitcask/total.svg)](https://github.com/prologic/bitcask/releases)
[![GitHub license](https://img.shields.io/github/license/prologic/bitcask.svg)](https://github.com/prologic/bitcask)
[![](https://images.microbadger.com/badges/version/prologic/bitcask.svg)](https://microbadger.com/images/prologic/bitcask)
[![](https://images.microbadger.com/badges/image/prologic/bitcask.svg)](https://microbadger.com/images/prologic/bitcask)
A high performance Key/Value store written in [Go](https://golang.org) with a predictable read/write performance and high throughput. Uses a [Bitcask](https://en.wikipedia.org/wiki/Bitcask) on-disk layout (LSM+WAL) similar to [Riak](https://riak.com/). 🗃️
A high performance Key/Value store written in [Go](https://golang.org) with a predictable read/write performance and high throughput. Uses a [Bitcask](https://en.wikipedia.org/wiki/Bitcask) on-disk layout (LSM+WAL) similar to [Riak](https://riak.com/)
For a more feature-complete Redis-compatible server, distributed key/value store have a look at [Bitraft](https://github.com/prologic/bitraft) which uses this library as its backend. Use [Bitcask](https://github.com/prologic/bitcask) as a starting point or if you want to embed in your application, use [Bitraft](https://github.com/prologic/bitraft) if you need a complete server/client solution with high availability with a Redis-compatible API.
@@ -21,14 +16,14 @@ For a more feature-complete Redis-compatible server, distributed key/value store
* Builtin CLI (`bitcask`)
* Builtin Redis-compatible server (`bitcaskd`)
* Predictable read/write performance
* Low latecny
* Low latency
* High throughput (See: [Performance](README.md#Performance) )
## Development
1. Get the source
```#!bash
```#!sh
$ git clone https://github.com/prologic/bitcask.git
```
@@ -37,20 +32,19 @@ $ git clone https://github.com/prologic/bitcask.git
This library uses [Protobuf](https://github.com/protocolbuffers/protobuf) to serialize data on disk. Please follow the
instructions for installing `protobuf` on your system. You will also need the
following Go libraries/tools to generate Go code from Protobuf defs:
- [protoc-gen-go](https://github.com/golang/protobuf)
3. Build the project
```#!bash
```#!sh
$ make
```
This will invoke `go generate` and `go build`.
- [protoc-gen-go](https://github.com/golang/protobuf)
## Install
```#!bash
```#!sh
$ go get github.com/prologic/bitcask
```
@@ -58,7 +52,7 @@ $ go get github.com/prologic/bitcask
Install the package into your project:
```#!bash
```#!sh
$ go get github.com/prologic/bitcask
```
@@ -80,7 +74,7 @@ documentation and other examples.
## Usage (tool)
```#!bash
```#!sh
$ bitcask -p /tmp/db set Hello World
$ bitcask -p /tmp/db get Hello
World
@@ -90,14 +84,14 @@ World
There is also a builtin very simple Redis-compatible server called `bitcaskd`:
```#!bash
```#!sh
$ ./bitcaskd ./tmp
INFO[0000] starting bitcaskd v0.0.7@146f777 bind=":6379" path=./tmp
```
Example session:
```
```#!sh
$ telnet localhost 6379
Trying ::1...
Connected to localhost.
@@ -122,7 +116,7 @@ Connection closed by foreign host.
You can also use the [Bitcask Docker Image](https://cloud.docker.com/u/prologic/repository/docker/prologic/bitcask):
```#!bash
```#!sh
$ docker pull prologic/bitcask
$ docker run -d -p 6379:6379 prologic/bitcask
```
@@ -131,7 +125,7 @@ $ docker run -d -p 6379:6379 prologic/bitcask
Benchmarks run on a 11" Macbook with a 1.4Ghz Intel Core i7:
```
```#!sh
$ make bench
...
BenchmarkGet/128B-4 300000 4071 ns/op 31.43 MB/s 608 B/op 7 allocs/op

View File

@@ -35,6 +35,10 @@ var (
// ErrDatabaseLocked is the error returned if the database is locked
// (typically opened by another process)
ErrDatabaseLocked = errors.New("error: database locked")
// ErrCreatingMemPool is the error returned when trying to configurate
// the mempool fails
ErrCreatingMemPool = errors.New("error: creating the mempool failed")
)
// Bitcask is a struct that represents a on-disk LSM and WAL data structure
@@ -420,6 +424,8 @@ func Open(path string, options ...Option) (*Bitcask, error) {
}
}
internal.ConfigureMemPool(bitcask.config.maxConcurrency)
locked, err := bitcask.Flock.TryLock()
if err != nil {
return nil, err

View File

@@ -468,8 +468,9 @@ func TestLocking(t *testing.T) {
}
type benchmarkTestCase struct {
name string
size int
name string
size int
withPool bool
}
func BenchmarkGet(b *testing.B) {
@@ -484,22 +485,25 @@ func BenchmarkGet(b *testing.B) {
}
defer os.RemoveAll(testdir)
db, err := Open(testdir)
if err != nil {
b.Fatal(err)
}
defer db.Close()
tests := []benchmarkTestCase{
{"128B", 128},
{"256B", 256},
{"512B", 512},
{"1K", 1024},
{"2K", 2048},
{"4K", 4096},
{"8K", 8192},
{"16K", 16384},
{"32K", 32768},
{"128B", 128, false},
{"128BWithPool", 128, true},
{"256B", 256, false},
{"256BWithPool", 256, true},
{"512B", 512, false},
{"512BWithPool", 512, true},
{"1K", 1024, false},
{"1KWithPool", 1024, true},
{"2K", 2048, false},
{"2KWithPool", 2048, true},
{"4K", 4096, false},
{"4KWithPool", 4096, true},
{"8K", 8192, false},
{"8KWithPool", 8192, true},
{"16K", 16384, false},
{"16KWithPool", 16384, true},
{"32K", 32768, false},
{"32KWithPool", 32768, true},
}
for _, tt := range tests {
@@ -509,6 +513,18 @@ func BenchmarkGet(b *testing.B) {
key := "foo"
value := []byte(strings.Repeat(" ", tt.size))
options := []Option{
WithMaxKeySize(len(key)),
WithMaxValueSize(tt.size),
}
if tt.withPool {
options = append(options, WithMemPool(1))
}
db, err := Open(testdir, options...)
if err != nil {
b.Fatal(err)
}
err = db.Put(key, value)
if err != nil {
b.Fatal(err)
@@ -524,6 +540,8 @@ func BenchmarkGet(b *testing.B) {
b.Errorf("unexpected value")
}
}
b.StopTimer()
db.Close()
})
}
}
@@ -547,15 +565,15 @@ func BenchmarkPut(b *testing.B) {
defer db.Close()
tests := []benchmarkTestCase{
{"128B", 128},
{"256B", 256},
{"512B", 512},
{"1K", 1024},
{"2K", 2048},
{"4K", 4096},
{"8K", 8192},
{"16K", 16384},
{"32K", 32768},
{"128B", 128, false},
{"256B", 256, false},
{"512B", 512, false},
{"1K", 1024, false},
{"2K", 2048, false},
{"4K", 4096, false},
{"8K", 8192, false},
{"16K", 16384, false},
{"32K", 32768, false},
}
for _, tt := range tests {

13
doc.go Normal file
View File

@@ -0,0 +1,13 @@
// Package bitcask implements a high-performance key-value store based on a
// WAL and LSM.
//
// By default, the client assumes a default configuration regarding maximum key size,
// maximum value size, maximum datafile size, and memory pools to avoid allocations.
// Refer to Constants section to know default values.
//
// For extra performance, configure the memory pool option properly. This option
// requires to specify the maximum number of concurrent use of the package. Failing to
// set a high-enough value would impact latency and throughput. Likewise, overestimating
// would yield in an unnecessary big memory footprint.
// The default configuration doesn't use a memory pool.
package bitcask

14
doc_test.go Normal file
View File

@@ -0,0 +1,14 @@
package bitcask
func Example() {
_, _ = Open("path/to/db")
}
func Example_withOptions() {
opts := []Option{
WithMaxKeySize(1024),
WithMaxValueSize(4096),
WithMemPool(10),
}
_, _ = Open("path/to/db", opts...)
}

1
go.mod
View File

@@ -6,6 +6,7 @@ require (
github.com/golang/protobuf v1.3.2
github.com/konsorten/go-windows-terminal-sequences v1.0.2 // indirect
github.com/magiconair/properties v1.8.1 // indirect
github.com/oxtoacart/bpool v0.0.0-20190530202638-03653db5a59c
github.com/pelletier/go-toml v1.4.0 // indirect
github.com/pkg/errors v0.8.1
github.com/prologic/trie v0.0.0-20190322091023-3972df81f9b5

2
go.sum
View File

@@ -77,6 +77,8 @@ github.com/mitchellh/mapstructure v1.1.2 h1:fmNYVwqnSfB9mZU6OS2O6GsXM+wcskZDuKQz
github.com/mitchellh/mapstructure v1.1.2/go.mod h1:FVVH3fgwuzCH5S8UJGiWEs2h04kUh9fWfEaFds41c1Y=
github.com/mwitkow/go-conntrack v0.0.0-20161129095857-cc309e4a2223/go.mod h1:qRWi+5nqEBWmkhHvq77mSJWrCKwh8bxhgT7d/eI7P4U=
github.com/oklog/ulid v1.3.1/go.mod h1:CirwcVhetQ6Lv90oh/F+FBtV6XMibvdAFo93nm5qn4U=
github.com/oxtoacart/bpool v0.0.0-20190530202638-03653db5a59c h1:rp5dCmg/yLR3mgFuSOe4oEnDDmGLROTvMragMUXpTQw=
github.com/oxtoacart/bpool v0.0.0-20190530202638-03653db5a59c/go.mod h1:X07ZCGwUbLaax7L0S3Tw4hpejzu63ZrrQiUe6W0hcy0=
github.com/pelletier/go-toml v1.2.0 h1:T5zMGML61Wp+FlcbWjRDT7yAxhJNAiPPLOFECq181zc=
github.com/pelletier/go-toml v1.2.0/go.mod h1:5z9KED0ma1S8pY6P1sdut58dfprrGBbd/94hg7ilaic=
github.com/pelletier/go-toml v1.4.0 h1:u3Z1r+oOXJIkxqw34zVhyPgjBsm6X2wn21NWs/HfSeg=

View File

@@ -1,26 +1,31 @@
package internal
import (
"bytes"
"fmt"
"os"
"path/filepath"
"sync"
"github.com/oxtoacart/bpool"
"github.com/pkg/errors"
"golang.org/x/exp/mmap"
"github.com/gogo/protobuf/proto"
pb "github.com/prologic/bitcask/internal/proto"
"github.com/prologic/bitcask/internal/streampb"
)
const (
DefaultDatafileFilename = "%09d.data"
prefixSize = 8
)
var (
ErrReadonly = errors.New("error: read only datafile")
ErrReadError = errors.New("error: read error")
memPool *bpool.BufferPool
mxMemPool sync.RWMutex
)
type Datafile struct {
@@ -136,7 +141,17 @@ func (df *Datafile) Read() (e pb.Entry, n int64, err error) {
func (df *Datafile) ReadAt(index, size int64) (e pb.Entry, err error) {
var n int
b := make([]byte, size)
var b []byte
if memPool == nil {
b = make([]byte, size)
} else {
poolSlice := memPool.Get()
if poolSlice.Cap() < int(size) {
poolSlice.Grow(int(size) - poolSlice.Cap())
}
defer memPool.Put(poolSlice)
b = poolSlice.Bytes()[:size]
}
if df.w == nil {
n, err = df.ra.ReadAt(b, index)
@@ -151,9 +166,10 @@ func (df *Datafile) ReadAt(index, size int64) (e pb.Entry, err error) {
return
}
buf := bytes.NewBuffer(b)
dec := streampb.NewDecoder(buf)
_, err = dec.Decode(&e)
err = proto.Unmarshal(b[prefixSize:], &e)
if err != nil {
return
}
return
}
@@ -175,3 +191,15 @@ func (df *Datafile) Write(e pb.Entry) (int64, int64, error) {
return e.Offset, n, nil
}
// ConfigureMemPool configurate the mempool accordingly
func ConfigureMemPool(maxConcurrency *int) {
mxMemPool.Lock()
defer mxMemPool.Unlock()
if maxConcurrency == nil {
memPool = nil
} else {
memPool = bpool.NewBufferPool(*maxConcurrency)
}
return
}

View File

@@ -42,17 +42,15 @@ func (k *Keydir) Add(key string, fileid int, offset, size int64) Item {
func (k *Keydir) Get(key string) (Item, bool) {
k.RLock()
defer k.RUnlock()
item, ok := k.kv[key]
k.RUnlock()
return item, ok
}
func (k *Keydir) Delete(key string) {
k.Lock()
defer k.Unlock()
delete(k.kv, key)
k.Unlock()
}
func (k *Keydir) Len() int {
@@ -63,11 +61,11 @@ func (k *Keydir) Keys() chan string {
ch := make(chan string)
go func() {
k.RLock()
defer k.RUnlock()
for key := range k.kv {
ch <- key
}
close(ch)
k.RUnlock()
}()
return ch
}

View File

@@ -1,5 +1,7 @@
package bitcask
import "errors"
const (
// DefaultMaxDatafileSize is the default maximum datafile size in bytes
DefaultMaxDatafileSize = 1 << 20 // 1MB
@@ -11,6 +13,12 @@ const (
DefaultMaxValueSize = 1 << 16 // 65KB
)
var (
// ErrMaxConcurrencyLowerEqZero is the error returned for
// maxConcurrency option not greater than zero
ErrMaxConcurrencyLowerEqZero = errors.New("error: maxConcurrency must be greater than zero")
)
// Option is a function that takes a config struct and modifies it
type Option func(*config) error
@@ -18,6 +26,7 @@ type config struct {
maxDatafileSize int
maxKeySize int
maxValueSize int
maxConcurrency *int
}
func newDefaultConfig() *config {
@@ -51,3 +60,14 @@ func WithMaxValueSize(size int) Option {
return nil
}
}
// WithMemPool configures usage of a memory pool to avoid allocations
func WithMemPool(maxConcurrency int) Option {
return func(cfg *config) error {
if maxConcurrency <= 0 {
return ErrMaxConcurrencyLowerEqZero
}
cfg.maxConcurrency = &maxConcurrency
return nil
}
}