Skip to content

Commit

Permalink
Merge pull request #152 from coinbase/patrick/more-storage-tuning
Browse files Browse the repository at this point in the history
[storage] Overhaul Badger Defaults
  • Loading branch information
patrick-ogrady authored Sep 14, 2020
2 parents f7d13f5 + 6b6c1bc commit 019fc1f
Show file tree
Hide file tree
Showing 5 changed files with 61 additions and 71 deletions.
93 changes: 45 additions & 48 deletions storage/badger_storage.go
Original file line number Diff line number Diff line change
Expand Up @@ -47,9 +47,17 @@ const (

// DefaultMaxTableSize is 256 MB. The larger
// this value is, the larger database transactions
// storage can handle.
// storage can handle (~15% of the max table size
// == max commit size).
DefaultMaxTableSize = 256 << 20

// DefaultLogValueSize is 64 MB.
DefaultLogValueSize = 64 << 20

// DefaultCompressionMode is the default block
// compression setting.
DefaultCompressionMode = options.None

// logModulo determines how often we should print
// logs while scanning data.
logModulo = 5000
Expand All @@ -64,10 +72,8 @@ const (
// BadgerStorage is a wrapper around Badger DB
// that implements the Database interface.
type BadgerStorage struct {
limitMemory bool
indexCacheSize int64
fileIOValueLogLoading bool
compressorEntries []*CompressorEntry
badgerOptions badger.Options
compressorEntries []*CompressorEntry

pool *BufferPool
db *badger.DB
Expand All @@ -80,13 +86,32 @@ type BadgerStorage struct {
closed chan struct{}
}

func defaultBadgerOptions(dir string) badger.Options {
// DefaultBadgerOptions are the default options used to initialized
// a new BadgerDB. These settings override many of the default BadgerDB
// settings to restrict memory usage to ~6 GB. If constraining memory
// usage is not desired for your use case, you can provide your own
// BadgerDB settings with the configuration option WithCustomSettings.
//
// There are many threads about optimizing memory usage in Badger (which
// can grow to many GBs if left untuned). Our own research indicates
// that each MB increase in MaxTableSize and/or ValueLogFileSize corresponds
// to a 10 MB increase in RAM usage (all other settings equal). Our primary
// concern is large database transaction size, so we configure MaxTableSize
// to be 4 times the size of ValueLogFileSize (if we skewed any further to
// MaxTableSize, we would quickly hit the default open file limit on many OSes).
func DefaultBadgerOptions(dir string) badger.Options {
opts := badger.DefaultOptions(dir)
opts.Logger = nil

// We increase the MaxTableSize to support larger database
// transactions (which are capped at 20% of MaxTableSize).
// By default, we do not compress the table at all. Doing so can
// significantly increase memory usage.
opts.Compression = DefaultCompressionMode

opts.MaxTableSize = DefaultMaxTableSize
opts.ValueLogFileSize = DefaultLogValueSize

// Don't load tables into memory.
opts.TableLoadingMode = options.FileIO
opts.ValueLogLoadingMode = options.FileIO

// To allow writes at a faster speed, we create a new memtable as soon as
// an existing memtable is filled up. This option determines how many
Expand All @@ -98,12 +123,6 @@ func defaultBadgerOptions(dir string) badger.Options {
opts.NumLevelZeroTables = 1
opts.NumLevelZeroTablesStall = 2

// By default, we set TableLoadingMode and ValueLogLoadingMode to use
// MemoryMap because it uses much less memory than RAM but is much faster than
// FileIO.
opts.TableLoadingMode = options.MemoryMap
opts.ValueLogLoadingMode = options.MemoryMap

// This option will have a significant effect the memory. If the level is kept
// in-memory, read are faster but the tables will be kept in memory. By default,
// this is set to false.
Expand All @@ -112,6 +131,11 @@ func defaultBadgerOptions(dir string) badger.Options {
// We don't compact L0 on close as this can greatly delay shutdown time.
opts.CompactL0OnClose = false

// LoadBloomsOnOpen=false will improve the db startup speed. This is also
// a waste to enable with a limited index cache size (as many of the loaded bloom
// filters will be immediately discarded from the cache).
opts.LoadBloomsOnOpen = false

// This value specifies how much memory should be used by table indices. These
// indices include the block offsets and the bloomfilters. Badger uses bloom
// filters to speed up lookups. Each table has its own bloom
Expand All @@ -125,51 +149,24 @@ func defaultBadgerOptions(dir string) badger.Options {
return opts
}

// lowMemoryOptions returns a set of BadgerDB configuration
// options that significantly reduce memory usage.
//
// Inspired by: https://github.com/dgraph-io/badger/issues/1304
func lowMemoryOptions(dir string) badger.Options {
opts := defaultBadgerOptions(dir)

// LoadBloomsOnOpen=false will improve the db startup speed
opts.LoadBloomsOnOpen = false

// Don't load tables into memory.
opts.TableLoadingMode = options.FileIO
opts.ValueLogLoadingMode = options.FileIO

return opts
}

// NewBadgerStorage creates a new BadgerStorage.
func NewBadgerStorage(
ctx context.Context,
dir string,
storageOptions ...BadgerOption,
) (Database, error) {
dir = path.Clean(dir)

b := &BadgerStorage{
indexCacheSize: DefaultIndexCacheSize,
closed: make(chan struct{}),
pool: NewBufferPool(),
badgerOptions: DefaultBadgerOptions(dir),
closed: make(chan struct{}),
pool: NewBufferPool(),
}
for _, opt := range storageOptions {
opt(b)
}

dir = path.Clean(dir)
dbOpts := defaultBadgerOptions(dir)

// Override dbOpts with provided options
if b.limitMemory {
dbOpts = lowMemoryOptions(dir)
}
dbOpts.IndexCacheSize = b.indexCacheSize
if b.fileIOValueLogLoading {
dbOpts.ValueLogLoadingMode = options.FileIO
}

db, err := badger.Open(dbOpts)
db, err := badger.Open(b.badgerOptions)
if err != nil {
return nil, fmt.Errorf("%w: %v", ErrDatabaseOpenFailed, err)
}
Expand Down
28 changes: 11 additions & 17 deletions storage/badger_storage_configuration.go
Original file line number Diff line number Diff line change
Expand Up @@ -14,19 +14,15 @@

package storage

import (
"github.com/dgraph-io/badger/v2"
)

// BadgerOption is used to overwrite default values in
// BadgerStorage construction. Any Option not provided
// falls back to the default value.
type BadgerOption func(b *BadgerStorage)

// WithMemoryLimit sets BadgerDB to use
// settings that limit memory.
func WithMemoryLimit() BadgerOption {
return func(b *BadgerStorage) {
b.limitMemory = true
}
}

// WithCompressorEntries provides zstd dictionaries
// for given namespaces.
func WithCompressorEntries(entries []*CompressorEntry) BadgerOption {
Expand All @@ -37,20 +33,18 @@ func WithCompressorEntries(entries []*CompressorEntry) BadgerOption {

// WithIndexCacheSize override the DefaultIndexCacheSize
// setting for the BadgerDB. The size here is in bytes.
// If you provide custom BadgerDB settings, do not use this
// config as it will be overridden by your custom settings.
func WithIndexCacheSize(size int64) BadgerOption {
return func(b *BadgerStorage) {
b.indexCacheSize = size
b.badgerOptions.IndexCacheSize = size
}
}

// WithFileIOValueLogLoading overrides the BadgerDB database
// options to use options.FileIO for ValueLogLoading. This
// incurs some performance penalty but massively reduces memory
// usage. This is a separate setting because it is often used
// in tandem with the default options (instead of using
// WithMemoryLimit).
func WithFileIOValueLogLoading() BadgerOption {
// WithCustomSettings allows for overriding all default BadgerDB
// options with custom settings.
func WithCustomSettings(settings badger.Options) BadgerOption {
return func(b *BadgerStorage) {
b.fileIOValueLogLoading = true
b.badgerOptions = settings
}
}
1 change: 0 additions & 1 deletion storage/badger_storage_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,6 @@ func newTestBadgerStorage(ctx context.Context, dir string) (Database, error) {
ctx,
dir,
WithIndexCacheSize(TinyIndexCacheSize),
WithFileIOValueLogLoading(),
)
}

Expand Down
4 changes: 2 additions & 2 deletions storage/compressor.go
Original file line number Diff line number Diff line change
Expand Up @@ -246,9 +246,9 @@ func (c *Compressor) decodeMap(input []byte) (map[string]interface{}, error) {
// subAccountMetadata|amountMetadata|currencyMetadata
//
// In both cases, the | character is represented by the unicodeRecordSeparator rune.
func (c *Compressor) EncodeAccountCoin(
func (c *Compressor) EncodeAccountCoin( // nolint:gocognit
accountCoin *AccountCoin,
) ([]byte, error) { // nolint:gocognit
) ([]byte, error) {
output := c.pool.Get()
if _, err := output.WriteString(accountCoin.Account.Address); err != nil {
return nil, fmt.Errorf("%w: %s", ErrObjectEncodeFailed, err.Error())
Expand Down
6 changes: 3 additions & 3 deletions syncer/syncer.go
Original file line number Diff line number Diff line change
Expand Up @@ -64,16 +64,16 @@ const (

// defaultTrailingWindow is the size of the trailing window
// of block sizes to keep when adjusting concurrency.
defaultTrailingWindow = 100
defaultTrailingWindow = 1000

// defaultAdjustmentWindow is how frequently we will
// consider increasing our concurrency.
defaultAdjustmentWindow = 10

// DefaultSizeMultiplier is used to pad our average size adjustment.
// This can be used to account for the overhead associated with processing
// a particular block with increased concurrency.
DefaultSizeMultiplier = float64(1.2) // nolint:gomnd
// a particular block (i.e. balance adjustments, coins created, etc).
DefaultSizeMultiplier = float64(10) // nolint:gomnd

// defaultSyncSleep is the amount of time to sleep
// when we are at tip but want to keep syncing.
Expand Down

0 comments on commit 019fc1f

Please sign in to comment.