From 4edd8ec2d4749a8cec37b83d5bb39703a4f3656a Mon Sep 17 00:00:00 2001 From: Patrick O'Grady Date: Sun, 13 Sep 2020 16:47:41 -0700 Subject: [PATCH 1/4] Overhaul badger settings --- storage/badger_storage.go | 81 ++++++++++--------------- storage/badger_storage_configuration.go | 28 ++++----- storage/badger_storage_test.go | 1 - storage/compressor.go | 4 +- 4 files changed, 46 insertions(+), 68 deletions(-) diff --git a/storage/badger_storage.go b/storage/badger_storage.go index b6881433..270dafe0 100644 --- a/storage/badger_storage.go +++ b/storage/badger_storage.go @@ -47,9 +47,17 @@ const ( // DefaultMaxTableSize is 256 MB. The larger // this value is, the larger database transactions - // storage can handle. + // storage can handle (~15% of the max table size + // == max commit size). DefaultMaxTableSize = 256 << 20 + // DefaultLogValueSize is 64 MB. + DefaultLogValueSize = 64 << 20 + + // DefaultCompressionMode is the default block + // compression setting. + DefaultCompressionMode = options.None + // logModulo determines how often we should print // logs while scanning data. logModulo = 5000 @@ -64,10 +72,8 @@ const ( // BadgerStorage is a wrapper around Badger DB // that implements the Database interface. type BadgerStorage struct { - limitMemory bool - indexCacheSize int64 - fileIOValueLogLoading bool - compressorEntries []*CompressorEntry + badgerOptions badger.Options + compressorEntries []*CompressorEntry pool *BufferPool db *badger.DB @@ -80,13 +86,22 @@ type BadgerStorage struct { closed chan struct{} } -func defaultBadgerOptions(dir string) badger.Options { +// DefaultBadgerOptions are the default options used to initialized +// a new BadgerDB. These settings override many of the default settings +// to minimize memory usage. +func DefaultBadgerOptions(dir string) badger.Options { opts := badger.DefaultOptions(dir) - opts.Logger = nil - // We increase the MaxTableSize to support larger database - // transactions (which are capped at 20% of MaxTableSize). + // By default, we do not compress the table at all. Doing so can + // significantly increase memory usage. + opts.Compression = DefaultCompressionMode + opts.MaxTableSize = DefaultMaxTableSize + opts.ValueLogFileSize = DefaultLogValueSize + + // Don't load tables into memory. + opts.TableLoadingMode = options.FileIO + opts.ValueLogLoadingMode = options.FileIO // To allow writes at a faster speed, we create a new memtable as soon as // an existing memtable is filled up. This option determines how many @@ -98,12 +113,6 @@ func defaultBadgerOptions(dir string) badger.Options { opts.NumLevelZeroTables = 1 opts.NumLevelZeroTablesStall = 2 - // By default, we set TableLoadingMode and ValueLogLoadingMode to use - // MemoryMap because it uses much less memory than RAM but is much faster than - // FileIO. - opts.TableLoadingMode = options.MemoryMap - opts.ValueLogLoadingMode = options.MemoryMap - // This option will have a significant effect the memory. If the level is kept // in-memory, read are faster but the tables will be kept in memory. By default, // this is set to false. @@ -112,6 +121,9 @@ func defaultBadgerOptions(dir string) badger.Options { // We don't compact L0 on close as this can greatly delay shutdown time. opts.CompactL0OnClose = false + // LoadBloomsOnOpen=false will improve the db startup speed + opts.LoadBloomsOnOpen = false + // This value specifies how much memory should be used by table indices. These // indices include the block offsets and the bloomfilters. Badger uses bloom // filters to speed up lookups. Each table has its own bloom @@ -125,51 +137,24 @@ func defaultBadgerOptions(dir string) badger.Options { return opts } -// lowMemoryOptions returns a set of BadgerDB configuration -// options that significantly reduce memory usage. -// -// Inspired by: https://github.com/dgraph-io/badger/issues/1304 -func lowMemoryOptions(dir string) badger.Options { - opts := defaultBadgerOptions(dir) - - // LoadBloomsOnOpen=false will improve the db startup speed - opts.LoadBloomsOnOpen = false - - // Don't load tables into memory. - opts.TableLoadingMode = options.FileIO - opts.ValueLogLoadingMode = options.FileIO - - return opts -} - // NewBadgerStorage creates a new BadgerStorage. func NewBadgerStorage( ctx context.Context, dir string, storageOptions ...BadgerOption, ) (Database, error) { + dir = path.Clean(dir) + b := &BadgerStorage{ - indexCacheSize: DefaultIndexCacheSize, - closed: make(chan struct{}), - pool: NewBufferPool(), + badgerOptions: DefaultBadgerOptions(dir), + closed: make(chan struct{}), + pool: NewBufferPool(), } for _, opt := range storageOptions { opt(b) } - dir = path.Clean(dir) - dbOpts := defaultBadgerOptions(dir) - - // Override dbOpts with provided options - if b.limitMemory { - dbOpts = lowMemoryOptions(dir) - } - dbOpts.IndexCacheSize = b.indexCacheSize - if b.fileIOValueLogLoading { - dbOpts.ValueLogLoadingMode = options.FileIO - } - - db, err := badger.Open(dbOpts) + db, err := badger.Open(b.badgerOptions) if err != nil { return nil, fmt.Errorf("%w: %v", ErrDatabaseOpenFailed, err) } diff --git a/storage/badger_storage_configuration.go b/storage/badger_storage_configuration.go index 1882974d..70106af8 100644 --- a/storage/badger_storage_configuration.go +++ b/storage/badger_storage_configuration.go @@ -14,19 +14,15 @@ package storage +import ( + "github.com/dgraph-io/badger/v2" +) + // BadgerOption is used to overwrite default values in // BadgerStorage construction. Any Option not provided // falls back to the default value. type BadgerOption func(b *BadgerStorage) -// WithMemoryLimit sets BadgerDB to use -// settings that limit memory. -func WithMemoryLimit() BadgerOption { - return func(b *BadgerStorage) { - b.limitMemory = true - } -} - // WithCompressorEntries provides zstd dictionaries // for given namespaces. func WithCompressorEntries(entries []*CompressorEntry) BadgerOption { @@ -37,20 +33,18 @@ func WithCompressorEntries(entries []*CompressorEntry) BadgerOption { // WithIndexCacheSize override the DefaultIndexCacheSize // setting for the BadgerDB. The size here is in bytes. +// If you provide custom BadgerDB settings, do not use this +// config as it will be overridden by your custom settings. func WithIndexCacheSize(size int64) BadgerOption { return func(b *BadgerStorage) { - b.indexCacheSize = size + b.badgerOptions.IndexCacheSize = size } } -// WithFileIOValueLogLoading overrides the BadgerDB database -// options to use options.FileIO for ValueLogLoading. This -// incurs some performance penalty but massively reduces memory -// usage. This is a separate setting because it is often used -// in tandem with the default options (instead of using -// WithMemoryLimit). -func WithFileIOValueLogLoading() BadgerOption { +// WithCustomSettings allows for overriding all default BadgerDB +// options with custom settings. +func WithCustomSettings(settings badger.Options) BadgerOption { return func(b *BadgerStorage) { - b.fileIOValueLogLoading = true + b.badgerOptions = settings } } diff --git a/storage/badger_storage_test.go b/storage/badger_storage_test.go index eea26f7e..63a96e8a 100644 --- a/storage/badger_storage_test.go +++ b/storage/badger_storage_test.go @@ -31,7 +31,6 @@ func newTestBadgerStorage(ctx context.Context, dir string) (Database, error) { ctx, dir, WithIndexCacheSize(TinyIndexCacheSize), - WithFileIOValueLogLoading(), ) } diff --git a/storage/compressor.go b/storage/compressor.go index aa110902..987e9b40 100644 --- a/storage/compressor.go +++ b/storage/compressor.go @@ -246,9 +246,9 @@ func (c *Compressor) decodeMap(input []byte) (map[string]interface{}, error) { // subAccountMetadata|amountMetadata|currencyMetadata // // In both cases, the | character is represented by the unicodeRecordSeparator rune. -func (c *Compressor) EncodeAccountCoin( +func (c *Compressor) EncodeAccountCoin( // nolint:gocognit accountCoin *AccountCoin, -) ([]byte, error) { // nolint:gocognit +) ([]byte, error) { output := c.pool.Get() if _, err := output.WriteString(accountCoin.Account.Address); err != nil { return nil, fmt.Errorf("%w: %s", ErrObjectEncodeFailed, err.Error()) From 36f1eff2814128ca345755b8ecaf7a943494874e Mon Sep 17 00:00:00 2001 From: Patrick O'Grady Date: Sun, 13 Sep 2020 21:30:09 -0700 Subject: [PATCH 2/4] Add more documentation --- storage/badger_storage.go | 14 ++++++++++++-- syncer/syncer.go | 4 ++-- 2 files changed, 14 insertions(+), 4 deletions(-) diff --git a/storage/badger_storage.go b/storage/badger_storage.go index 270dafe0..22749901 100644 --- a/storage/badger_storage.go +++ b/storage/badger_storage.go @@ -87,8 +87,18 @@ type BadgerStorage struct { } // DefaultBadgerOptions are the default options used to initialized -// a new BadgerDB. These settings override many of the default settings -// to minimize memory usage. +// a new BadgerDB. These settings override many of the default BadgerDB +// settings to restrict memory usage to ~8 GB. If constraining memory +// usage is not desired for your use case, you can provide your own +// BadgerDB settings with the configuration option WithCustomSettings. +// +// There are many threads about optimizing memory usage in Badger (which +// can grow to many GBs if left untuned). Our own research indicates +// that each MB increase in MaxTableSize and/or ValueLogFileSize corresponds +// to a 10 MB increase in RAM usage (all other settings equal). Our primary +// concern is large database transaction size, so we configure MaxTableSize +// to be 4 times the size of ValueLogFileSize (if we skewed any further to +// MaxTableSize, we would quickly hit the default open file limit on many OS's). func DefaultBadgerOptions(dir string) badger.Options { opts := badger.DefaultOptions(dir) diff --git a/syncer/syncer.go b/syncer/syncer.go index 71236f89..aeae6138 100644 --- a/syncer/syncer.go +++ b/syncer/syncer.go @@ -72,8 +72,8 @@ const ( // DefaultSizeMultiplier is used to pad our average size adjustment. // This can be used to account for the overhead associated with processing - // a particular block with increased concurrency. - DefaultSizeMultiplier = float64(1.2) // nolint:gomnd + // a particular block (i.e. balance adjustments, coins created, etc). + DefaultSizeMultiplier = float64(10) // nolint:gomnd // defaultSyncSleep is the amount of time to sleep // when we are at tip but want to keep syncing. From 2c1720e6f508dc58b0f71081eeca158d413a9e24 Mon Sep 17 00:00:00 2001 From: Patrick O'Grady Date: Sun, 13 Sep 2020 21:50:58 -0700 Subject: [PATCH 3/4] Increase trailing window to protect against large blocks --- syncer/syncer.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/syncer/syncer.go b/syncer/syncer.go index aeae6138..e967e8cb 100644 --- a/syncer/syncer.go +++ b/syncer/syncer.go @@ -64,7 +64,7 @@ const ( // defaultTrailingWindow is the size of the trailing window // of block sizes to keep when adjusting concurrency. - defaultTrailingWindow = 100 + defaultTrailingWindow = 1000 // defaultAdjustmentWindow is how frequently we will // consider increasing our concurrency. From 6b6c1bc9c9ec41cf9d2af7da07494ef0095b8a1e Mon Sep 17 00:00:00 2001 From: Patrick O'Grady Date: Sun, 13 Sep 2020 22:04:08 -0700 Subject: [PATCH 4/4] nits --- storage/badger_storage.go | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/storage/badger_storage.go b/storage/badger_storage.go index 22749901..0890cc80 100644 --- a/storage/badger_storage.go +++ b/storage/badger_storage.go @@ -88,7 +88,7 @@ type BadgerStorage struct { // DefaultBadgerOptions are the default options used to initialized // a new BadgerDB. These settings override many of the default BadgerDB -// settings to restrict memory usage to ~8 GB. If constraining memory +// settings to restrict memory usage to ~6 GB. If constraining memory // usage is not desired for your use case, you can provide your own // BadgerDB settings with the configuration option WithCustomSettings. // @@ -98,7 +98,7 @@ type BadgerStorage struct { // to a 10 MB increase in RAM usage (all other settings equal). Our primary // concern is large database transaction size, so we configure MaxTableSize // to be 4 times the size of ValueLogFileSize (if we skewed any further to -// MaxTableSize, we would quickly hit the default open file limit on many OS's). +// MaxTableSize, we would quickly hit the default open file limit on many OSes). func DefaultBadgerOptions(dir string) badger.Options { opts := badger.DefaultOptions(dir) @@ -131,7 +131,9 @@ func DefaultBadgerOptions(dir string) badger.Options { // We don't compact L0 on close as this can greatly delay shutdown time. opts.CompactL0OnClose = false - // LoadBloomsOnOpen=false will improve the db startup speed + // LoadBloomsOnOpen=false will improve the db startup speed. This is also + // a waste to enable with a limited index cache size (as many of the loaded bloom + // filters will be immediately discarded from the cache). opts.LoadBloomsOnOpen = false // This value specifies how much memory should be used by table indices. These