From 5be114adabe482e7e15dc50d32f214fb3df2b539 Mon Sep 17 00:00:00 2001 From: Ignacio Hagopian Date: Thu, 12 Sep 2019 10:44:26 -0300 Subject: [PATCH] Makefile setup & key/value coherent datatypes & refactoring (#98) * internal/data: comment exported functions * internal/data: make smaller codec exported api surface * make key and value sizes serializing bubble up to everything * Makefile setup & go mod tidy --- Makefile | 5 ++- bitcask.go | 12 ++--- bitcask_test.go | 4 +- cmd/bitcask/export.go | 4 +- cmd/bitcask/initdb.go | 8 ++-- go.sum | 1 + internal/config/config.go | 8 ++-- internal/data/codec.go | 71 ++++++++++++++++++++++-------- internal/data/datafile.go | 42 ++++++++++-------- internal/index/codec_index.go | 4 +- internal/index/codec_index_test.go | 2 +- internal/index/index.go | 4 +- internal/mocks/indexer.go | 8 ++-- options.go | 8 ++-- 14 files changed, 112 insertions(+), 69 deletions(-) diff --git a/Makefile b/Makefile index 37a258c..909f0d6 100644 --- a/Makefile +++ b/Makefile @@ -1,4 +1,4 @@ -.PHONY: dev build generate install image release profile bench test clean +.PHONY: dev build generate install image release profile bench test clean setup CGO_ENABLED=0 VERSION=$(shell git describe --abbrev=0 --tags) @@ -49,5 +49,8 @@ test: build -race \ . +setup: + @go install github.com/vektra/mockery/.../ + clean: @git clean -f -d -X diff --git a/bitcask.go b/bitcask.go index 95e00dd..8129fbe 100644 --- a/bitcask.go +++ b/bitcask.go @@ -149,10 +149,10 @@ func (b *Bitcask) Has(key []byte) bool { // Put stores the key and value in the database. func (b *Bitcask) Put(key, value []byte) error { - if len(key) > b.config.MaxKeySize { + if uint32(len(key)) > b.config.MaxKeySize { return ErrKeyTooLarge } - if len(value) > b.config.MaxValueSize { + if uint64(len(value)) > b.config.MaxValueSize { return ErrValueTooLarge } @@ -261,7 +261,7 @@ func (b *Bitcask) put(key, value []byte) (int64, int64, error) { id := b.curr.FileID() - df, err := data.NewDatafile(b.path, id, true) + df, err := data.NewDatafile(b.path, id, true, b.config.MaxKeySize, b.config.MaxValueSize) if err != nil { return -1, 0, err } @@ -269,7 +269,7 @@ func (b *Bitcask) put(key, value []byte) (int64, int64, error) { b.datafiles[id] = df id = b.curr.FileID() + 1 - curr, err := data.NewDatafile(b.path, id, false) + curr, err := data.NewDatafile(b.path, id, false, b.config.MaxKeySize, b.config.MaxValueSize) if err != nil { return -1, 0, err } @@ -297,7 +297,7 @@ func (b *Bitcask) reopen() error { datafiles := make(map[int]data.Datafile, len(ids)) for _, id := range ids { - df, err := data.NewDatafile(b.path, id, true) + df, err := data.NewDatafile(b.path, id, true, b.config.MaxKeySize, b.config.MaxValueSize) if err != nil { return err } @@ -338,7 +338,7 @@ func (b *Bitcask) reopen() error { id = ids[(len(ids) - 1)] } - curr, err := data.NewDatafile(b.path, id, false) + curr, err := data.NewDatafile(b.path, id, false, b.config.MaxKeySize, b.config.MaxValueSize) if err != nil { return err } diff --git a/bitcask_test.go b/bitcask_test.go index 0562dc2..aa9280a 100644 --- a/bitcask_test.go +++ b/bitcask_test.go @@ -1126,8 +1126,8 @@ func BenchmarkGet(b *testing.B) { value := []byte(strings.Repeat(" ", tt.size)) options := []Option{ - WithMaxKeySize(len(key)), - WithMaxValueSize(tt.size), + WithMaxKeySize(uint32(len(key))), + WithMaxValueSize(uint64(tt.size)), } db, err := Open(testdir, options...) if err != nil { diff --git a/cmd/bitcask/export.go b/cmd/bitcask/export.go index 290ca03..39c83f2 100644 --- a/cmd/bitcask/export.go +++ b/cmd/bitcask/export.go @@ -50,11 +50,11 @@ func init() { "with-max-datafile-size", "", bitcask.DefaultMaxDatafileSize, "Maximum size of each datafile", ) - exportCmd.PersistentFlags().IntP( + exportCmd.PersistentFlags().Uint32P( "with-max-key-size", "", bitcask.DefaultMaxKeySize, "Maximum size of each key", ) - exportCmd.PersistentFlags().IntP( + exportCmd.PersistentFlags().Uint64P( "with-max-value-size", "", bitcask.DefaultMaxValueSize, "Maximum size of each value", ) diff --git a/cmd/bitcask/initdb.go b/cmd/bitcask/initdb.go index 2b105e1..1e014c7 100644 --- a/cmd/bitcask/initdb.go +++ b/cmd/bitcask/initdb.go @@ -30,8 +30,8 @@ var initdbCmd = &cobra.Command{ path := viper.GetString("path") maxDatafileSize := viper.GetInt("with-max-datafile-size") - maxKeySize := viper.GetInt("with-max-key-size") - maxValueSize := viper.GetInt("with-max-value-size") + maxKeySize := viper.GetUint32("with-max-key-size") + maxValueSize := viper.GetUint64("with-max-value-size") db, err := bitcask.Open( path, @@ -56,11 +56,11 @@ func init() { "with-max-datafile-size", "", bitcask.DefaultMaxDatafileSize, "Maximum size of each datafile", ) - initdbCmd.PersistentFlags().IntP( + initdbCmd.PersistentFlags().Uint32P( "with-max-key-size", "", bitcask.DefaultMaxKeySize, "Maximum size of each key", ) - initdbCmd.PersistentFlags().IntP( + initdbCmd.PersistentFlags().Uint64P( "with-max-value-size", "", bitcask.DefaultMaxValueSize, "Maximum size of each value", ) diff --git a/go.sum b/go.sum index b93e4dd..bf97196 100644 --- a/go.sum +++ b/go.sum @@ -173,6 +173,7 @@ golang.org/x/tools v0.0.0-20180221164845-07fd8470d635/go.mod h1:n7NCudcB/nEzxVGm golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= golang.org/x/tools v0.0.0-20190114222345-bf090417da8b/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= golang.org/x/tools v0.0.0-20190311212946-11955173bddd/go.mod h1:LCzVGOaR6xXOjkQ3onu1FJEFr0SW1gC7cKk1uF8kGRs= +golang.org/x/tools v0.0.0-20190312151545-0bb0c0a6e846 h1:0oJP+9s5Z3MT6dym56c4f7nVeujVpL1QyD2Vp/bTql0= golang.org/x/tools v0.0.0-20190312151545-0bb0c0a6e846/go.mod h1:LCzVGOaR6xXOjkQ3onu1FJEFr0SW1gC7cKk1uF8kGRs= google.golang.org/appengine v1.1.0/go.mod h1:EbEs0AVv82hx2wNQdGPgUI5lhzA/G0D9YwlJXL52JkM= google.golang.org/genproto v0.0.0-20180817151627-c66870c02cf8/go.mod h1:JiN7NxoALGmiZfu7CAH4rXhgtRTLTxftemlI0sWmxmc= diff --git a/internal/config/config.go b/internal/config/config.go index 1997eac..76a8d0b 100644 --- a/internal/config/config.go +++ b/internal/config/config.go @@ -8,10 +8,10 @@ import ( // Config contains the bitcask configuration parameters type Config struct { - MaxDatafileSize int `json:"max_datafile_size"` - MaxKeySize int `json:"max_key_size"` - MaxValueSize int `json:"max_value_size"` - Sync bool `json:"sync"` + MaxDatafileSize int `json:"max_datafile_size"` + MaxKeySize uint32 `json:"max_key_size"` + MaxValueSize uint64 `json:"max_value_size"` + Sync bool `json:"sync"` } // Load loads a configuration from the given path diff --git a/internal/data/codec.go b/internal/data/codec.go index a49f45c..52d969e 100644 --- a/internal/data/codec.go +++ b/internal/data/codec.go @@ -10,11 +10,17 @@ import ( ) const ( - KeySize = 4 - ValueSize = 8 + keySize = 4 + valueSize = 8 checksumSize = 4 ) +var ( + // ErrInvalidKeyOrValueSize indicates a serialized key/value size + // which is greater than specified limit + ErrInvalidKeyOrValueSize = errors.New("key/value size is invalid") +) + // NewEncoder creates a streaming Entry encoder. func NewEncoder(w io.Writer) *Encoder { return &Encoder{w: bufio.NewWriter(w)} @@ -29,9 +35,9 @@ type Encoder struct { // Encode takes any Entry and streams it to the underlying writer. // Messages are framed with a key-length and value-length prefix. func (e *Encoder) Encode(msg internal.Entry) (int64, error) { - var bufKeyValue = make([]byte, KeySize+ValueSize) - binary.BigEndian.PutUint32(bufKeyValue[:KeySize], uint32(len(msg.Key))) - binary.BigEndian.PutUint64(bufKeyValue[KeySize:KeySize+ValueSize], uint64(len(msg.Value))) + var bufKeyValue = make([]byte, keySize+valueSize) + binary.BigEndian.PutUint32(bufKeyValue[:keySize], uint32(len(msg.Key))) + binary.BigEndian.PutUint64(bufKeyValue[keySize:keySize+valueSize], uint64(len(msg.Value))) if _, err := e.w.Write(bufKeyValue); err != nil { return 0, errors.Wrap(err, "failed writing key & value length prefix") } @@ -53,46 +59,73 @@ func (e *Encoder) Encode(msg internal.Entry) (int64, error) { return 0, errors.Wrap(err, "failed flushing data") } - return int64(KeySize + ValueSize + len(msg.Key) + len(msg.Value) + checksumSize), nil + return int64(keySize + valueSize + len(msg.Key) + len(msg.Value) + checksumSize), nil } // NewDecoder creates a streaming Entry decoder. -func NewDecoder(r io.Reader) *Decoder { - return &Decoder{r: r} +func NewDecoder(r io.Reader, maxKeySize uint32, maxValueSize uint64) *Decoder { + return &Decoder{ + r: r, + maxKeySize: maxKeySize, + maxValueSize: maxValueSize, + } } // Decoder wraps an underlying io.Reader and allows you to stream // Entry decodings on it. type Decoder struct { - r io.Reader + r io.Reader + maxKeySize uint32 + maxValueSize uint64 } +// Decode decodes the next Entry from the current stream func (d *Decoder) Decode(v *internal.Entry) (int64, error) { - prefixBuf := make([]byte, KeySize+ValueSize) + prefixBuf := make([]byte, keySize+valueSize) _, err := io.ReadFull(d.r, prefixBuf) if err != nil { return 0, err } - actualKeySize, actualValueSize := GetKeyValueSizes(prefixBuf) - buf := make([]byte, actualKeySize+actualValueSize+checksumSize) + actualKeySize, actualValueSize, err := getKeyValueSizes(prefixBuf, d.maxKeySize, d.maxValueSize) + if err != nil { + return 0, errors.Wrap(err, "error while getting key/value serialized sizes") + } + + buf := make([]byte, uint64(actualKeySize)+actualValueSize+checksumSize) if _, err = io.ReadFull(d.r, buf); err != nil { return 0, errors.Wrap(translateError(err), "failed reading saved data") } - DecodeWithoutPrefix(buf, actualKeySize, v) - return int64(KeySize + ValueSize + actualKeySize + actualValueSize + checksumSize), nil + decodeWithoutPrefix(buf, actualKeySize, v) + return int64(keySize + valueSize + uint64(actualKeySize) + actualValueSize + checksumSize), nil } -func GetKeyValueSizes(buf []byte) (uint64, uint64) { - actualKeySize := binary.BigEndian.Uint32(buf[:KeySize]) - actualValueSize := binary.BigEndian.Uint64(buf[KeySize:]) +// DecodeEntry decodes a serialized entry +func DecodeEntry(b []byte, e *internal.Entry, maxKeySize uint32, maxValueSize uint64) error { + valueOffset, _, err := getKeyValueSizes(b, maxKeySize, maxValueSize) + if err != nil { + return errors.Wrap(err, "key/value sizes are invalid") + } - return uint64(actualKeySize), actualValueSize + decodeWithoutPrefix(b[keySize+valueSize:], valueOffset, e) + + return nil } -func DecodeWithoutPrefix(buf []byte, valueOffset uint64, v *internal.Entry) { +func getKeyValueSizes(buf []byte, maxKeySize uint32, maxValueSize uint64) (uint32, uint64, error) { + actualKeySize := binary.BigEndian.Uint32(buf[:keySize]) + actualValueSize := binary.BigEndian.Uint64(buf[keySize:]) + + if actualKeySize > maxKeySize || actualValueSize > maxValueSize { + return 0, 0, ErrInvalidKeyOrValueSize + } + + return actualKeySize, actualValueSize, nil +} + +func decodeWithoutPrefix(buf []byte, valueOffset uint32, v *internal.Entry) { v.Key = buf[:valueOffset] v.Value = buf[valueOffset : len(buf)-checksumSize] v.Checksum = binary.BigEndian.Uint32(buf[len(buf)-checksumSize:]) diff --git a/internal/data/datafile.go b/internal/data/datafile.go index 46d7789..1ccc038 100644 --- a/internal/data/datafile.go +++ b/internal/data/datafile.go @@ -36,16 +36,19 @@ type Datafile interface { type datafile struct { sync.RWMutex - id int - r *os.File - ra *mmap.ReaderAt - w *os.File - offset int64 - dec *Decoder - enc *Encoder + id int + r *os.File + ra *mmap.ReaderAt + w *os.File + offset int64 + dec *Decoder + enc *Encoder + maxKeySize uint32 + maxValueSize uint64 } -func NewDatafile(path string, id int, readonly bool) (Datafile, error) { +// NewDatafile opens an existing datafile +func NewDatafile(path string, id int, readonly bool, maxKeySize uint32, maxValueSize uint64) (Datafile, error) { var ( r *os.File ra *mmap.ReaderAt @@ -78,17 +81,19 @@ func NewDatafile(path string, id int, readonly bool) (Datafile, error) { offset := stat.Size() - dec := NewDecoder(r) + dec := NewDecoder(r, maxKeySize, maxValueSize) enc := NewEncoder(w) return &datafile{ - id: id, - r: r, - ra: ra, - w: w, - offset: offset, - dec: dec, - enc: enc, + id: id, + r: r, + ra: ra, + w: w, + offset: offset, + dec: dec, + enc: enc, + maxKeySize: maxKeySize, + maxValueSize: maxValueSize, }, nil } @@ -131,6 +136,7 @@ func (df *datafile) Size() int64 { return df.offset } +// Read reads the next entry from the datafile func (df *datafile) Read() (e internal.Entry, n int64, err error) { df.Lock() defer df.Unlock() @@ -143,6 +149,7 @@ func (df *datafile) Read() (e internal.Entry, n int64, err error) { return } +// ReadAt the entry located at index offset with expected serialized size func (df *datafile) ReadAt(index, size int64) (e internal.Entry, err error) { var n int @@ -161,8 +168,7 @@ func (df *datafile) ReadAt(index, size int64) (e internal.Entry, err error) { return } - valueOffset, _ := GetKeyValueSizes(b) - DecodeWithoutPrefix(b[KeySize+ValueSize:], valueOffset, &e) + DecodeEntry(b, &e, df.maxKeySize, df.maxValueSize) return } diff --git a/internal/index/codec_index.go b/internal/index/codec_index.go index e779424..0914575 100644 --- a/internal/index/codec_index.go +++ b/internal/index/codec_index.go @@ -24,7 +24,7 @@ const ( sizeSize = int64Size ) -func readKeyBytes(r io.Reader, maxKeySize int) ([]byte, error) { +func readKeyBytes(r io.Reader, maxKeySize uint32) ([]byte, error) { s := make([]byte, int32Size) _, err := io.ReadFull(r, s) if err != nil { @@ -87,7 +87,7 @@ func writeItem(item internal.Item, w io.Writer) error { } // ReadIndex reads a persisted from a io.Reader into a Tree -func readIndex(r io.Reader, t art.Tree, maxKeySize int) error { +func readIndex(r io.Reader, t art.Tree, maxKeySize uint32) error { for { key, err := readKeyBytes(r, maxKeySize) if err != nil { diff --git a/internal/index/codec_index_test.go b/internal/index/codec_index_test.go index c2a3890..ab42a55 100644 --- a/internal/index/codec_index_test.go +++ b/internal/index/codec_index_test.go @@ -94,7 +94,7 @@ func TestReadCorruptedData(t *testing.T) { table := []struct { name string err error - maxKeySize int + maxKeySize uint32 data []byte }{ {name: "key-data-overflow", err: errKeySizeTooLarge, maxKeySize: 1024, data: overflowKeySize}, diff --git a/internal/index/index.go b/internal/index/index.go index 7991f7a..a48143c 100644 --- a/internal/index/index.go +++ b/internal/index/index.go @@ -8,7 +8,7 @@ import ( ) type Indexer interface { - Load(path string, maxkeySize int) (art.Tree, bool, error) + Load(path string, maxkeySize uint32) (art.Tree, bool, error) Save(t art.Tree, path string) error } @@ -18,7 +18,7 @@ func NewIndexer() Indexer { type indexer struct{} -func (i *indexer) Load(path string, maxKeySize int) (art.Tree, bool, error) { +func (i *indexer) Load(path string, maxKeySize uint32) (art.Tree, bool, error) { t := art.New() if !internal.Exists(path) { diff --git a/internal/mocks/indexer.go b/internal/mocks/indexer.go index 1a3c0a5..7680aac 100644 --- a/internal/mocks/indexer.go +++ b/internal/mocks/indexer.go @@ -12,11 +12,11 @@ type Indexer struct { } // Load provides a mock function with given fields: path, maxkeySize -func (_m *Indexer) Load(path string, maxkeySize int) (art.Tree, bool, error) { +func (_m *Indexer) Load(path string, maxkeySize uint32) (art.Tree, bool, error) { ret := _m.Called(path, maxkeySize) var r0 art.Tree - if rf, ok := ret.Get(0).(func(string, int) art.Tree); ok { + if rf, ok := ret.Get(0).(func(string, uint32) art.Tree); ok { r0 = rf(path, maxkeySize) } else { if ret.Get(0) != nil { @@ -25,14 +25,14 @@ func (_m *Indexer) Load(path string, maxkeySize int) (art.Tree, bool, error) { } var r1 bool - if rf, ok := ret.Get(1).(func(string, int) bool); ok { + if rf, ok := ret.Get(1).(func(string, uint32) bool); ok { r1 = rf(path, maxkeySize) } else { r1 = ret.Get(1).(bool) } var r2 error - if rf, ok := ret.Get(2).(func(string, int) error); ok { + if rf, ok := ret.Get(2).(func(string, uint32) error); ok { r2 = rf(path, maxkeySize) } else { r2 = ret.Error(2) diff --git a/options.go b/options.go index 1c32400..14c66d8 100644 --- a/options.go +++ b/options.go @@ -7,10 +7,10 @@ const ( DefaultMaxDatafileSize = 1 << 20 // 1MB // DefaultMaxKeySize is the default maximum key size in bytes - DefaultMaxKeySize = 64 // 64 bytes + DefaultMaxKeySize = uint32(64) // 64 bytes // DefaultMaxValueSize is the default value size in bytes - DefaultMaxValueSize = 1 << 16 // 65KB + DefaultMaxValueSize = uint64(1 << 16) // 65KB // DefaultSync is the default file synchronization action DefaultSync = false @@ -28,7 +28,7 @@ func WithMaxDatafileSize(size int) Option { } // WithMaxKeySize sets the maximum key size option -func WithMaxKeySize(size int) Option { +func WithMaxKeySize(size uint32) Option { return func(cfg *config.Config) error { cfg.MaxKeySize = size return nil @@ -36,7 +36,7 @@ func WithMaxKeySize(size int) Option { } // WithMaxValueSize sets the maximum value size option -func WithMaxValueSize(size int) Option { +func WithMaxValueSize(size uint64) Option { return func(cfg *config.Config) error { cfg.MaxValueSize = size return nil