1
0
mirror of https://github.com/taigrr/bitcask synced 2025-01-18 04:03:17 -08:00

Increased read performance by ~3-4x by removing another unnecessary I/O operation (Seek)

This commit is contained in:
James Mills 2019-03-23 12:14:15 +10:00
parent c3b1a02371
commit 711d08ce91
No known key found for this signature in database
GPG Key ID: AC4C014F1440EBD6
7 changed files with 80 additions and 59 deletions

View File

@ -5,7 +5,7 @@ steps:
- name: build
image: golang:latest
commands:
- go test -v -short -cover -coverprofile=coverage.txt -coverpkg=$(go list) ./...
- go test -v -short -cover -coverprofile=coverage.txt -coverpkg=$(go list) .
- name: coverage
image: plugins/codecov

View File

@ -34,13 +34,17 @@ release:
@./tools/release.sh
profile: build
@go test -cpuprofile cpu.prof -memprofile mem.prof -v -bench ./...
@go test -cpuprofile cpu.prof -memprofile mem.prof -v -bench .
bench: build
@go test -v -benchmem -bench=. ./...
@go test -v -benchmem -bench=. .
test: build
@go test -v -cover -coverprofile=coverage.txt -covermode=atomic -coverpkg=$(shell go list) -race ./...
@go test -v \
-cover -coverprofile=coverage.txt -covermode=atomic \
-coverpkg=$(shell go list) \
-race \
.
clean:
@git clean -f -d -X

View File

@ -94,32 +94,32 @@ Benchmarks run on a 11" Macbook with a 1.4Ghz Intel Core i7:
```
$ make bench
...
BenchmarkGet/128B-4 300000 5178 ns/op 400 B/op 5 allocs/op
BenchmarkGet/256B-4 300000 5273 ns/op 656 B/op 5 allocs/op
BenchmarkGet/512B-4 200000 5368 ns/op 1200 B/op 5 allocs/op
BenchmarkGet/1K-4 200000 5800 ns/op 2288 B/op 5 allocs/op
BenchmarkGet/2K-4 200000 6766 ns/op 4464 B/op 5 allocs/op
BenchmarkGet/4K-4 200000 7857 ns/op 9072 B/op 5 allocs/op
BenchmarkGet/8K-4 200000 9538 ns/op 17776 B/op 5 allocs/op
BenchmarkGet/16K-4 100000 13188 ns/op 34928 B/op 5 allocs/op
BenchmarkGet/32K-4 100000 21620 ns/op 73840 B/op 5 allocs/op
BenchmarkGet/128B-4 300000 3737 ns/op 1632 B/op 16 allocs/op
BenchmarkGet/256B-4 300000 4183 ns/op 2016 B/op 16 allocs/op
BenchmarkGet/512B-4 300000 4295 ns/op 2848 B/op 16 allocs/op
BenchmarkGet/1K-4 300000 4455 ns/op 4512 B/op 16 allocs/op
BenchmarkGet/2K-4 300000 5536 ns/op 7841 B/op 16 allocs/op
BenchmarkGet/4K-4 200000 7101 ns/op 15010 B/op 16 allocs/op
BenchmarkGet/8K-4 200000 10664 ns/op 28325 B/op 16 allocs/op
BenchmarkGet/16K-4 100000 18173 ns/op 54442 B/op 16 allocs/op
BenchmarkGet/32K-4 50000 33081 ns/op 115893 B/op 16 allocs/op
BenchmarkPut/128B-4 200000 7875 ns/op 409 B/op 6 allocs/op
BenchmarkPut/256B-4 200000 8712 ns/op 538 B/op 6 allocs/op
BenchmarkPut/512B-4 200000 9832 ns/op 829 B/op 6 allocs/op
BenchmarkPut/1K-4 100000 13105 ns/op 1410 B/op 6 allocs/op
BenchmarkPut/2K-4 100000 18601 ns/op 2572 B/op 6 allocs/op
BenchmarkPut/4K-4 50000 36631 ns/op 5151 B/op 6 allocs/op
BenchmarkPut/8K-4 30000 56128 ns/op 9798 B/op 6 allocs/op
BenchmarkPut/16K-4 20000 83209 ns/op 18834 B/op 6 allocs/op
BenchmarkPut/32K-4 10000 135899 ns/op 41517 B/op 6 allocs/op
BenchmarkPut/128B-4 200000 7967 ns/op 409 B/op 6 allocs/op
BenchmarkPut/256B-4 200000 8563 ns/op 538 B/op 6 allocs/op
BenchmarkPut/512B-4 200000 9678 ns/op 829 B/op 6 allocs/op
BenchmarkPut/1K-4 200000 12786 ns/op 1410 B/op 6 allocs/op
BenchmarkPut/2K-4 100000 18582 ns/op 2572 B/op 6 allocs/op
BenchmarkPut/4K-4 50000 35335 ns/op 5151 B/op 6 allocs/op
BenchmarkPut/8K-4 30000 56047 ns/op 9797 B/op 6 allocs/op
BenchmarkPut/16K-4 20000 86137 ns/op 18834 B/op 6 allocs/op
BenchmarkPut/32K-4 10000 140162 ns/op 41517 B/op 6 allocs/op
BenchmarkScan-4 1000000 1851 ns/op 493 B/op 25 allocs/op
BenchmarkScan-4 1000000 1885 ns/op 493 B/op 25 allocs/op
```
For 128B values:
* ~200,000 reads/sec
* ~270,000 reads/sec
* ~130,000 writes/sec
The full benchmark above shows linear performance as you increase key/value sizes.

View File

@ -89,7 +89,7 @@ func (b *Bitcask) Get(key string) ([]byte, error) {
df = b.datafiles[item.FileID]
}
e, err := df.ReadAt(item.Offset)
e, err := df.ReadAt(item.Offset, item.Size)
if err != nil {
return nil, err
}
@ -117,12 +117,12 @@ func (b *Bitcask) Put(key string, value []byte) error {
return ErrValueTooLarge
}
offset, err := b.put(key, value)
offset, n, err := b.put(key, value)
if err != nil {
return err
}
item := b.keydir.Add(key, b.curr.FileID(), offset)
item := b.keydir.Add(key, b.curr.FileID(), offset, n)
b.trie.Add(key, item)
return nil
@ -131,7 +131,7 @@ func (b *Bitcask) Put(key string, value []byte) error {
// Delete deletes the named key. If the key doesn't exist or an I/O error
// occurs the error is returned.
func (b *Bitcask) Delete(key string) error {
_, err := b.put(key, []byte{})
_, _, err := b.put(key, []byte{})
if err != nil {
return err
}
@ -177,7 +177,7 @@ func (b *Bitcask) Fold(f func(key string) error) error {
return nil
}
func (b *Bitcask) put(key string, value []byte) (int64, error) {
func (b *Bitcask) put(key string, value []byte) (int64, int64, error) {
b.mu.Lock()
defer b.mu.Unlock()
@ -185,12 +185,12 @@ func (b *Bitcask) put(key string, value []byte) (int64, error) {
if size >= int64(b.config.maxDatafileSize) {
err := b.curr.Close()
if err != nil {
return -1, err
return -1, 0, err
}
df, err := internal.NewDatafile(b.path, b.curr.FileID(), true)
if err != nil {
return -1, err
return -1, 0, err
}
b.datafiles = append(b.datafiles, df)
@ -198,7 +198,7 @@ func (b *Bitcask) put(key string, value []byte) (int64, error) {
id := b.curr.FileID() + 1
curr, err := internal.NewDatafile(b.path, id, false)
if err != nil {
return -1, err
return -1, 0, err
}
b.curr = curr
}
@ -255,7 +255,7 @@ func Merge(path string, force bool) error {
defer df.Close()
for {
e, err := df.Read()
e, n, err := df.Read()
if err != nil {
if err == io.EOF {
break
@ -269,7 +269,7 @@ func Merge(path string, force bool) error {
continue
}
keydir.Add(e.Key, ids[i], e.Offset)
keydir.Add(e.Key, ids[i], e.Offset, n)
}
tempdf, err := internal.NewDatafile(temp, id, false)
@ -280,12 +280,12 @@ func Merge(path string, force bool) error {
for key := range keydir.Keys() {
item, _ := keydir.Get(key)
e, err := df.ReadAt(item.Offset)
e, err := df.ReadAt(item.Offset, item.Size)
if err != nil {
return err
}
_, err = tempdf.Write(e)
_, _, err = tempdf.Write(e)
if err != nil {
return err
}
@ -365,12 +365,12 @@ func Open(path string, options ...Option) (*Bitcask, error) {
for key := range hint.Keys() {
item, _ := hint.Get(key)
_ = keydir.Add(key, item.FileID, item.Offset)
_ = keydir.Add(key, item.FileID, item.Offset, item.Size)
trie.Add(key, item)
}
} else {
for {
e, err := df.Read()
e, n, err := df.Read()
if err != nil {
if err == io.EOF {
break
@ -384,7 +384,7 @@ func Open(path string, options ...Option) (*Bitcask, error) {
continue
}
item := keydir.Add(e.Key, ids[i], e.Offset)
item := keydir.Add(e.Key, ids[i], e.Offset, n)
trie.Add(e.Key, item)
}
}

View File

@ -1,12 +1,14 @@
package internal
import (
"bytes"
"fmt"
"os"
"path/filepath"
"sync"
"github.com/pkg/errors"
log "github.com/sirupsen/logrus"
pb "github.com/prologic/bitcask/internal/proto"
"github.com/prologic/bitcask/internal/streampb"
@ -17,7 +19,8 @@ const (
)
var (
ErrReadonly = errors.New("error: read only datafile")
ErrReadonly = errors.New("error: read only datafile")
ErrReadError = errors.New("error: read error")
)
type Datafile struct {
@ -104,28 +107,40 @@ func (df *Datafile) Size() int64 {
return df.offset
}
func (df *Datafile) Read() (e pb.Entry, err error) {
func (df *Datafile) Read() (e pb.Entry, n int64, err error) {
df.Lock()
defer df.Unlock()
return e, df.dec.Decode(&e)
}
func (df *Datafile) ReadAt(index int64) (e pb.Entry, err error) {
df.Lock()
defer df.Unlock()
_, err = df.r.Seek(index, os.SEEK_SET)
n, err = df.dec.Decode(&e)
if err != nil {
return
}
return e, df.dec.Decode(&e)
return
}
func (df *Datafile) Write(e pb.Entry) (int64, error) {
func (df *Datafile) ReadAt(index, size int64) (e pb.Entry, err error) {
log.WithField("index", index).WithField("size", size).Debug("ReadAt")
b := make([]byte, size)
n, err := df.r.ReadAt(b, index)
if err != nil {
return
}
if int64(n) != size {
err = ErrReadError
return
}
buf := bytes.NewBuffer(b)
dec := streampb.NewDecoder(buf)
_, err = dec.Decode(&e)
return
}
func (df *Datafile) Write(e pb.Entry) (int64, int64, error) {
if df.w == nil {
return -1, ErrReadonly
return -1, 0, ErrReadonly
}
df.Lock()
@ -135,9 +150,9 @@ func (df *Datafile) Write(e pb.Entry) (int64, error) {
n, err := df.enc.Encode(&e)
if err != nil {
return -1, err
return -1, 0, err
}
df.offset += n
return e.Offset, nil
return e.Offset, n, nil
}

View File

@ -11,6 +11,7 @@ import (
type Item struct {
FileID int
Offset int64
Size int64
}
type Keydir struct {
@ -24,10 +25,11 @@ func NewKeydir() *Keydir {
}
}
func (k *Keydir) Add(key string, fileid int, offset int64) Item {
func (k *Keydir) Add(key string, fileid int, offset, size int64) Item {
item := Item{
FileID: fileid,
Offset: offset,
Size: size,
}
k.Lock()

View File

@ -66,12 +66,12 @@ type Decoder struct {
// Decode takes a proto.Message and unmarshals the next payload in the
// underlying io.Reader. It returns an EOF when it's done.
func (d *Decoder) Decode(v proto.Message) error {
func (d *Decoder) Decode(v proto.Message) (int64, error) {
prefixBuf := make([]byte, prefixSize)
_, err := io.ReadFull(d.r, prefixBuf)
if err != nil {
return err
return 0, err
}
n := binary.BigEndian.Uint64(prefixBuf)
@ -82,11 +82,11 @@ func (d *Decoder) Decode(v proto.Message) error {
for idx < n {
m, err := d.r.Read(buf[idx:n])
if err != nil {
return errors.Wrap(translateError(err), "failed reading marshaled data")
return 0, errors.Wrap(translateError(err), "failed reading marshaled data")
}
idx += uint64(m)
}
return proto.Unmarshal(buf[:n], v)
return int64(idx + prefixSize), proto.Unmarshal(buf[:n], v)
}
func translateError(err error) error {