Skip to content

Commit

Permalink
Restructure the static B plus Tree, finally! :-D
Browse files Browse the repository at this point in the history
  • Loading branch information
kkli08 committed Nov 1, 2024
1 parent c88ae00 commit 8ad187d
Show file tree
Hide file tree
Showing 8 changed files with 515 additions and 353 deletions.
596 changes: 351 additions & 245 deletions Storage/DiskBTree/DiskBTree.cpp

Large diffs are not rendered by default.

101 changes: 53 additions & 48 deletions Storage/DiskBTree/DiskBTree.h
Original file line number Diff line number Diff line change
@@ -1,97 +1,102 @@
//
// Created by damian on 9/24/24.
//

//
// DiskBTree.h
//

#ifndef DISK_BTREE_H
#define DISK_BTREE_H

#include "PageManager.h"
#include "Page.h"
#include "KeyValue.h"
#include <string>
#include <vector>
#include <memory>
#include <unordered_map>
#include <cstdint>
#include <cmath>
#include "KeyValue.h"
#include "PageManager.h"
#include "BloomFilter.h"

class DiskBTree {
public:
// Constructor for building a new B+ tree from memtable data
DiskBTree(const std::string& sstFileName, int degree, const std::vector<KeyValueWrapper>& keyValues);
DiskBTree(const std::string& sstFileName, const std::vector<KeyValueWrapper>& keyValues, size_t pageSize = 4096);

// Constructor for opening an existing SST file
DiskBTree(const std::string& sstFileName, int degree);
DiskBTree(const std::string& sstFileName);

// Destructor
~DiskBTree();

// Search for a key in the B+ tree
KeyValueWrapper* search(const KeyValueWrapper& kv);

// Scan keys within a range
void scan(const KeyValueWrapper& startKey, const KeyValueWrapper& endKey, std::vector<KeyValueWrapper>& result);

// Get the SST file name
std::string getFileName() const;

// set degree
void setDegree(int _degree) {degree = _degree;};

// Set buffer pool parameters
void setBufferPoolParameters(size_t capacity, EvictionPolicy policy);
long long getCacheHit() const {return pageManager.getCacheHit();};

private:
// B+ tree degree
int degree;
// Get cache hit count
long long getCacheHit() const;

// Search for a key in the B+ tree
KeyValueWrapper* search(const KeyValueWrapper& kv);

// Scan keys within a range
void scan(const KeyValueWrapper& startKey, const KeyValueWrapper& endKey, std::vector<KeyValueWrapper>& result);

private:
// PageManager for disk I/O
PageManager pageManager;

// Offset of the root node
uint64_t rootOffset;

// Leaf node begin and end offsets
uint64_t leafBeginOffset;
uint64_t leafEndOffset;

// File name of the SST file
std::string sstFileName;

// In-memory representation of a node during construction
// Page size
size_t pageSize;

// Degree and height of the B+ tree
size_t degree;
size_t height;

// Fields used during SST building
// BTreeNode struct representing different types of nodes
struct BTreeNode {
bool isLeaf;
std::vector<KeyValueWrapper> keys;
std::vector<std::shared_ptr<BTreeNode>> children; // For internal nodes
uint64_t selfOffset; // Offset of this node on disk
std::vector<BTreeNode*> children; // For internal nodes
std::vector<size_t> leafPageIndices; // For nodes pointing to leaf pages

// Constructor
BTreeNode(bool isLeaf);
uint64_t offset; // Offset of the node in the SST file

// Methods used during tree construction
void insertNonFull(const KeyValueWrapper& kv, int degree);
void splitChild(int idx, int degree);

// Method to write the node and its children to disk
void writeNode(DiskBTree* tree);

// Set the offset after writing to disk
void setOffset(uint64_t offset);
BTreeNode(bool leaf) : isLeaf(leaf), offset(0) {}
};

// Build B+ tree from sorted key-values
std::shared_ptr<BTreeNode> buildTree(const std::vector<KeyValueWrapper>& keyValues);
// Vector of leaf pages
std::vector<Page> leafPages;

// Methods for search and scan
KeyValueWrapper* searchInNode(uint64_t nodeOffset, const KeyValueWrapper& kv);
void scanInNode(uint64_t nodeOffset, const KeyValueWrapper& startKey, const KeyValueWrapper& endKey, std::vector<KeyValueWrapper>& result);
// Vector of smallest keys from each leaf page
std::vector<KeyValueWrapper> leafPageSmallestKeys;

};

#endif // DISK_BTREE_H
// Root node of the tree
BTreeNode* root;

// For memory management
std::vector<BTreeNode*> allNodes; // To keep track of all nodes for deletion

// Levels of the tree, from leaf level upwards
std::vector<std::vector<BTreeNode*>> levels;

// Method to split input keyValues into leaf pages
void splitInputPairs(const std::vector<KeyValueWrapper>& keyValues);

// Method to compute degree and height
void computeDegreeAndHeight();

// Method to build the tree
void buildTree();

// Method to write the tree into the SST file
void writeTreeToSST();
};

#endif // DISK_BTREE_H
96 changes: 79 additions & 17 deletions Storage/Page/Page.cpp
Original file line number Diff line number Diff line change
@@ -1,9 +1,4 @@
//
// Created by damian on 9/24/24.
//
//
// Page.cpp
//

#include "Page.h"
#include <cstring>
Expand All @@ -17,6 +12,8 @@ Page::Page(PageType type) : pageType(type), numEntries(0) {
}
}

Page::Page(): numEntries(0) {}

// Add a key to the internal node
void Page::addKey(const KeyValueWrapper& key) {
if (pageType != PageType::INTERNAL_NODE) {
Expand Down Expand Up @@ -53,8 +50,24 @@ void Page::addLeafEntry(const KeyValueWrapper& kv) {
numEntries++;
}

// Remove the last leaf entry
void Page::removeLastLeafEntry() {
if (pageType != PageType::LEAF_NODE) {
throw std::logic_error("Attempting to remove leaf entry from non-leaf page");
}
if (!leafNodeData.keyValues.empty()) {
leafNodeData.keyValues.pop_back();
numEntries--;
} else {
throw std::runtime_error("No leaf entries to remove");
}
}

// Get leaf node entries
const std::vector<KeyValueWrapper>& Page::getLeafEntries() const {
if (pageType != PageType::LEAF_NODE) {
throw std::logic_error("Attempting to get leaf entries from non-leaf page");
}
return leafNodeData.keyValues;
}

Expand Down Expand Up @@ -145,13 +158,22 @@ std::vector<char> Page::serialize() const {
}

// After serializing the page
// At the end of Page::serialize()
if (buffer.size() > DEFAULT_PAGE_SIZE) {
std::cerr << "Serialized page size: " << buffer.size() << " bytes\n";
std::cerr << "Serialized page type: "
<< [](PageType type) {
switch (type) {
case PageType::INTERNAL_NODE: return "INTERNAL_NODE";
case PageType::LEAF_NODE: return "LEAF_NODE";
case PageType::SST_METADATA: return "SST_METADATA";
default: return "UNKNOWN";
}
}(pageType)
<< std::endl;

throw std::runtime_error("Page::serialize() --> Serialized page exceeds the maximum page size");
}


return buffer;
}

Expand Down Expand Up @@ -270,18 +292,17 @@ void Page::serializeLeafNode(std::vector<char>& buffer) const {

if (leafNodeData.hasBloomFilter) {
// Before serializing the Bloom filter
if (leafNodeData.hasBloomFilter) {
size_t bloomFilterSize = leafNodeData.bloomFilter.getSerializedSize();
std::cout << "Leaf Bloom filter size: " << bloomFilterSize << " bytes\n";
}
size_t bloomFilterSize = leafNodeData.bloomFilter.getSerializedSize();
// debug information
// std::cout << "Leaf Bloom filter size: " << bloomFilterSize << " bytes\n";

// Serialize Bloom filter
std::vector<char> bloomFilterData = leafNodeData.bloomFilter.serialize();
uint32_t bloomFilterSize = static_cast<uint32_t>(bloomFilterData.size());
uint32_t bloomFilterSize32 = static_cast<uint32_t>(bloomFilterData.size());

// Serialize bloomFilterSize
buffer.insert(buffer.end(), reinterpret_cast<const char*>(&bloomFilterSize),
reinterpret_cast<const char*>(&bloomFilterSize) + sizeof(bloomFilterSize));
buffer.insert(buffer.end(), reinterpret_cast<const char*>(&bloomFilterSize32),
reinterpret_cast<const char*>(&bloomFilterSize32) + sizeof(bloomFilterSize32));

// Serialize Bloom filter data
buffer.insert(buffer.end(), bloomFilterData.begin(), bloomFilterData.end());
Expand Down Expand Up @@ -447,12 +468,20 @@ void Page::buildLeafBloomFilter(size_t m, size_t n) {
throw std::logic_error("Attempting to build Bloom filter on non-leaf page");
}
leafNodeData.bloomFilter = BloomFilter(m, n);
for (const auto& kv : leafNodeData.keyValues) {
leafNodeData.bloomFilter.add(kv);
}
leafNodeData.hasBloomFilter = true;
}

// Add to leaf Bloom filter
void Page::addToLeafBloomFilter(const KeyValueWrapper& kv) {
if (pageType != PageType::LEAF_NODE) {
throw std::logic_error("Attempting to add to Bloom filter on non-leaf page");
}
if (!leafNodeData.hasBloomFilter) {
throw std::runtime_error("Bloom filter has not been initialized");
}
leafNodeData.bloomFilter.add(kv);
}

// Check if a key possibly exists in the leaf node
bool Page::leafBloomFilterContains(const KeyValueWrapper& kv) const {
if (pageType != PageType::LEAF_NODE) {
Expand All @@ -464,3 +493,36 @@ bool Page::leafBloomFilterContains(const KeyValueWrapper& kv) const {
}
return leafNodeData.bloomFilter.possiblyContains(kv);
}

// Estimate the base size of the page for serialization
size_t Page::getBaseSize() const {
size_t size = sizeof(PageType) + sizeof(uint16_t); // pageType and numEntries
switch (pageType) {
case PageType::INTERNAL_NODE:
// For internal node, size of numKeys and numChildOffsets
size += sizeof(uint16_t) * 2;
break;
case PageType::LEAF_NODE:
// For leaf node, size of numPairs and nextLeafOffset, hasBloomFilter flag
size += sizeof(uint16_t); // numPairs
size += sizeof(uint64_t); // nextLeafOffset
size += sizeof(uint8_t); // hasBloomFilter
if (leafNodeData.hasBloomFilter) {
size += sizeof(uint32_t); // bloomFilterSize
size += leafNodeData.bloomFilter.getSerializedSize();
}
break;
case PageType::SST_METADATA:
// For SST metadata, sizes of offsets and file name length
size += sizeof(uint64_t) * 3; // rootPageOffset, leafNodeBeginOffset, leafNodeEndOffset
size += sizeof(uint32_t); // nameSize
size += sstMetadata.fileName.size(); // fileName
size += sizeof(uint8_t); // hasBloomFilter
if (sstMetadata.hasBloomFilter) {
size += sizeof(uint32_t); // bloomFilterSize
size += sstMetadata.bloomFilter.getSerializedSize();
}
break;
}
return size;
}
13 changes: 7 additions & 6 deletions Storage/Page/Page.h
Original file line number Diff line number Diff line change
@@ -1,10 +1,4 @@
//
// Created by damian on 9/24/24.
//
//
// Page.h
//

#ifndef PAGE_H
#define PAGE_H

Expand All @@ -25,6 +19,8 @@ class Page {

// Constructor for different page types
Page(PageType type);
Page();


// Serialize the page to a byte buffer
std::vector<char> serialize() const;
Expand All @@ -43,12 +39,14 @@ class Page {

// Leaf Node specific methods
void addLeafEntry(const KeyValueWrapper& kv);
void removeLastLeafEntry();
const std::vector<KeyValueWrapper>& getLeafEntries() const;
void setNextLeafOffset(uint64_t offset);
uint64_t getNextLeafOffset() const;

// Build and use Bloom filter for leaf nodes
void buildLeafBloomFilter(size_t m, size_t n);
void addToLeafBloomFilter(const KeyValueWrapper& kv);
bool leafBloomFilterContains(const KeyValueWrapper& kv) const;

// SST Metadata specific methods
Expand All @@ -59,6 +57,9 @@ class Page {
void setSSTBloomFilter(const std::vector<char>& bloomFilterData);
bool getSSTBloomFilter(std::vector<char>& bloomFilterData) const;

// Estimate the base size of the page for serialization
size_t getBaseSize() const;

private:
const size_t DEFAULT_PAGE_SIZE = 4096;
// Common attributes
Expand Down
4 changes: 2 additions & 2 deletions Storage/SstFileManager/SstFileManager.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ SSTFileManager::SSTFileManager(const std::string& dbDirectory, int degree)
// Initialize by loading existing SST files if any
for (const auto& entry : std::filesystem::directory_iterator(dbDirectory)) {
if (entry.path().extension() == ".sst") {
auto sst = std::make_shared<DiskBTree>(entry.path().string(), degree);
auto sst = std::make_shared<DiskBTree>(entry.path().string());
sstFiles.push_back(sst);
}
}
Expand All @@ -44,7 +44,7 @@ void SSTFileManager::flushMemtable(const std::vector<KeyValueWrapper>& keyValues
std::string sstFileName = generateSSTFileName();

// Create a new DiskBTree instance for the SST file
auto sst = std::make_shared<DiskBTree>(sstFileName, degree, keyValues);
auto sst = std::make_shared<DiskBTree>(sstFileName, keyValues);

// Add the new SST to the list
sstFiles.push_back(sst);
Expand Down
3 changes: 3 additions & 0 deletions kv/KeyValue.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -243,6 +243,9 @@ std::string KeyValueWrapper::keyValueTypeToString(KeyValue::KeyValueType type) c
}
}

size_t KeyValueWrapper::getSerializedSize() const {
return sizeof(kv.key_case()) + sizeof(kv.value_case()) + sizeof(kv.key_type()) + sizeof(kv.value_type()) + sizeof(size_t);
}



Expand Down
2 changes: 1 addition & 1 deletion kv/KeyValue.h
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@ class KeyValueWrapper {
return kv.key_case() == KeyValue::KEY_NOT_SET && kv.value_case() == KeyValue::VALUE_NOT_SET;
}


size_t getSerializedSize() const;

private:

Expand Down
Loading

0 comments on commit 8ad187d

Please sign in to comment.