Skip to content

Commit c4f96a1

Browse files
unw9527lanlou1554
andauthored
refactor(p2): fall 2024 (#754)
* refactor(p2): remove transaction from b plus tree * sync b_plus_tree.h function param * sync b plus tree function header * rm txn in btree bench * nits * decouple b plus tree debugging tool * split array into two * fix make error * fix debugging tool function param * nits * fix linting * fix linting * fix file path * linting * linting * Modify P2 tests * fix concurrent test * fix clang-tidy * Modify header size * Update comments * sync with private repo * rm comments for task 2.a * fix style of debug functions * Apply comment suggestions * change val arr name; enlarge bench bpm size --------- Co-authored-by: Lan Lou <lanlou1554@gmail.com> Co-authored-by: Lan Lou <62441979+lanlou1554@users.noreply.github.com>
1 parent bd6c48e commit c4f96a1

22 files changed

+820
-698
lines changed

CMakeLists.txt

+11-10
Original file line numberDiff line numberDiff line change
@@ -291,16 +291,17 @@ add_custom_target(submit-p1
291291
)
292292

293293
set(P2_FILES
294-
"src/include/storage/page/page_guard.h"
295-
"src/storage/page/page_guard.cpp"
296-
"src/include/storage/page/extendible_htable_bucket_page.h"
297-
"src/storage/page/extendible_htable_bucket_page.cpp"
298-
"src/include/storage/page/extendible_htable_directory_page.h"
299-
"src/storage/page/extendible_htable_directory_page.cpp"
300-
"src/include/storage/page/extendible_htable_header_page.h"
301-
"src/storage/page/extendible_htable_header_page.cpp"
302-
"src/include/container/disk/hash/disk_extendible_hash_table.h"
303-
"src/container/disk/hash/disk_extendible_hash_table.cpp"
294+
"src/include/storage/page/b_plus_tree_page.h"
295+
"src/storage/page/b_plus_tree_page.cpp"
296+
"src/include/storage/page/b_plus_tree_internal_page.h"
297+
"src/storage/page/b_plus_tree_internal_page.cpp"
298+
"src/include/storage/page/b_plus_tree_leaf_page.h"
299+
"src/storage/page/b_plus_tree_leaf_page.cpp"
300+
"src/include/storage/index/index_iterator.h"
301+
"src/storage/index/index_iterator.cpp"
302+
"src/include/storage/index/b_plus_tree.h"
303+
"src/include/storage/index/b_plus_tree_debug.h"
304+
"src/storage/index/b_plus_tree.cpp"
304305
${P1_FILES}
305306
)
306307
add_custom_target(check-clang-tidy-p2

src/container/disk/hash/disk_extendible_hash_table_utils.cpp

+1-1
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
//
77
// Identification: src/container/disk/hash/disk_extendible_hash_table_utils.cpp
88
//
9-
// Copyright (c) 2015-2023, Carnegie Mellon University Database Group
9+
// Copyright (c) 2015-2024, Carnegie Mellon University Database Group
1010
//
1111
//===----------------------------------------------------------------------===//
1212

src/include/storage/index/b_plus_tree.h

+8-9
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,6 @@
2222

2323
#include "common/config.h"
2424
#include "common/macros.h"
25-
#include "concurrency/transaction.h"
2625
#include "storage/index/index_iterator.h"
2726
#include "storage/page/b_plus_tree_header_page.h"
2827
#include "storage/page/b_plus_tree_internal_page.h"
@@ -67,20 +66,20 @@ class BPlusTree {
6766

6867
public:
6968
explicit BPlusTree(std::string name, page_id_t header_page_id, BufferPoolManager *buffer_pool_manager,
70-
const KeyComparator &comparator, int leaf_max_size = LEAF_PAGE_SIZE,
71-
int internal_max_size = INTERNAL_PAGE_SIZE);
69+
const KeyComparator &comparator, int leaf_max_size = LEAF_PAGE_SLOT_CNT,
70+
int internal_max_size = INTERNAL_PAGE_SLOT_CNT);
7271

7372
// Returns true if this B+ tree has no keys and values.
7473
auto IsEmpty() const -> bool;
7574

7675
// Insert a key-value pair into this B+ tree.
77-
auto Insert(const KeyType &key, const ValueType &value, Transaction *txn = nullptr) -> bool;
76+
auto Insert(const KeyType &key, const ValueType &value) -> bool;
7877

7978
// Remove a key and its value from this B+ tree.
80-
void Remove(const KeyType &key, Transaction *txn);
79+
void Remove(const KeyType &key);
8180

8281
// Return the value associated with a given key
83-
auto GetValue(const KeyType &key, std::vector<ValueType> *result, Transaction *txn = nullptr) -> bool;
82+
auto GetValue(const KeyType &key, std::vector<ValueType> *result) -> bool;
8483

8584
// Return the page id of the root node
8685
auto GetRootPageId() -> page_id_t;
@@ -112,10 +111,10 @@ class BPlusTree {
112111
auto DrawBPlusTree() -> std::string;
113112

114113
// read data from file and insert one by one
115-
void InsertFromFile(const std::filesystem::path &file_name, Transaction *txn = nullptr);
114+
void InsertFromFile(const std::filesystem::path &file_name);
116115

117116
// read data from file and remove one by one
118-
void RemoveFromFile(const std::filesystem::path &file_name, Transaction *txn = nullptr);
117+
void RemoveFromFile(const std::filesystem::path &file_name);
119118

120119
/**
121120
* @brief Read batch operations from input file, below is a sample file format
@@ -126,7 +125,7 @@ class BPlusTree {
126125
* (3) (7)
127126
* (1,2) (3,4) (5,6) (7,10,30) // The output tree example
128127
*/
129-
void BatchOpsFromFile(const std::filesystem::path &file_name, Transaction *txn = nullptr);
128+
void BatchOpsFromFile(const std::filesystem::path &file_name);
130129

131130
private:
132131
/* Debug Routines for FREE!! */
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,278 @@
1+
//===----------------------------------------------------------------------===//
2+
//
3+
// BusTub
4+
//
5+
// b_plus_tree_debug.h
6+
//
7+
// Identification: bustub/src/include/storage/index/b_plus_tree_debug.h
8+
//
9+
// Copyright (c) 2024, Carnegie Mellon University Database Group
10+
//
11+
//===----------------------------------------------------------------------===//
12+
13+
#include <sstream>
14+
#include <string>
15+
16+
#include "common/exception.h"
17+
#include "common/logger.h"
18+
#include "common/rid.h"
19+
#include "storage/index/b_plus_tree.h"
20+
#include "storage/page/b_plus_tree_page.h"
21+
22+
namespace bustub {
23+
24+
/*
25+
* All the methods in this file are used for test and debug only.
26+
* You don't need to modify them.
27+
*/
28+
29+
/*****************************************************************************
30+
* UTILITIES AND DEBUG
31+
*****************************************************************************/
32+
33+
/*
34+
* This method is used for test only
35+
* Read data from file and insert one by one
36+
*/
37+
INDEX_TEMPLATE_ARGUMENTS
38+
void BPLUSTREE_TYPE::InsertFromFile(const std::filesystem::path &file_name) {
39+
int64_t key;
40+
std::ifstream input(file_name);
41+
while (input) {
42+
input >> key;
43+
44+
KeyType index_key;
45+
index_key.SetFromInteger(key);
46+
RID rid(key);
47+
Insert(index_key, rid);
48+
}
49+
}
50+
/*
51+
* This method is used for test only
52+
* Read data from file and remove one by one
53+
*/
54+
INDEX_TEMPLATE_ARGUMENTS
55+
void BPLUSTREE_TYPE::RemoveFromFile(const std::filesystem::path &file_name) {
56+
int64_t key;
57+
std::ifstream input(file_name);
58+
while (input) {
59+
input >> key;
60+
KeyType index_key;
61+
index_key.SetFromInteger(key);
62+
Remove(index_key);
63+
}
64+
}
65+
66+
INDEX_TEMPLATE_ARGUMENTS
67+
void BPLUSTREE_TYPE::Print(BufferPoolManager *bpm) {
68+
auto root_page_id = GetRootPageId();
69+
if (root_page_id != INVALID_PAGE_ID) {
70+
auto guard = bpm->ReadPage(root_page_id);
71+
PrintTree(guard.GetPageId(), guard.template As<BPlusTreePage>());
72+
}
73+
}
74+
75+
INDEX_TEMPLATE_ARGUMENTS
76+
void BPLUSTREE_TYPE::PrintTree(page_id_t page_id, const BPlusTreePage *page) {
77+
if (page->IsLeafPage()) {
78+
auto *leaf = reinterpret_cast<const LeafPage *>(page);
79+
std::cout << "Leaf Page: " << page_id << "\tNext: " << leaf->GetNextPageId() << std::endl;
80+
81+
// Print the contents of the leaf page.
82+
std::cout << "Contents: ";
83+
for (int i = 0; i < leaf->GetSize(); i++) {
84+
std::cout << leaf->KeyAt(i);
85+
if ((i + 1) < leaf->GetSize()) {
86+
std::cout << ", ";
87+
}
88+
}
89+
std::cout << std::endl;
90+
std::cout << std::endl;
91+
92+
} else {
93+
auto *internal = reinterpret_cast<const InternalPage *>(page);
94+
std::cout << "Internal Page: " << page_id << std::endl;
95+
96+
// Print the contents of the internal page.
97+
std::cout << "Contents: ";
98+
for (int i = 0; i < internal->GetSize(); i++) {
99+
if (i == 0) {
100+
std::cout << internal->ValueAt(i);
101+
} else {
102+
std::cout << internal->KeyAt(i) << ": " << internal->ValueAt(i);
103+
}
104+
if ((i + 1) < internal->GetSize()) {
105+
std::cout << ", ";
106+
}
107+
}
108+
std::cout << std::endl;
109+
std::cout << std::endl;
110+
for (int i = 0; i < internal->GetSize(); i++) {
111+
auto guard = bpm_->ReadPage(internal->ValueAt(i));
112+
PrintTree(guard.GetPageId(), guard.template As<BPlusTreePage>());
113+
}
114+
}
115+
}
116+
117+
INDEX_TEMPLATE_ARGUMENTS
118+
void BPLUSTREE_TYPE::Draw(BufferPoolManager *bpm, const std::filesystem::path &outf) {
119+
if (IsEmpty()) {
120+
LOG_WARN("Drawing an empty tree");
121+
return;
122+
}
123+
124+
std::ofstream out(outf);
125+
out << "digraph G {" << std::endl;
126+
auto root_page_id = GetRootPageId();
127+
auto guard = bpm->ReadPage(root_page_id);
128+
ToGraph(guard.GetPageId(), guard.template As<BPlusTreePage>(), out);
129+
out << "}" << std::endl;
130+
out.close();
131+
}
132+
133+
INDEX_TEMPLATE_ARGUMENTS
134+
void BPLUSTREE_TYPE::ToGraph(page_id_t page_id, const BPlusTreePage *page, std::ofstream &out) {
135+
std::string leaf_prefix("LEAF_");
136+
std::string internal_prefix("INT_");
137+
if (page->IsLeafPage()) {
138+
auto *leaf = reinterpret_cast<const LeafPage *>(page);
139+
// Print node name
140+
out << leaf_prefix << page_id;
141+
// Print node properties
142+
out << "[shape=plain color=green ";
143+
// Print data of the node
144+
out << "label=<<TABLE BORDER=\"0\" CELLBORDER=\"1\" CELLSPACING=\"0\" CELLPADDING=\"4\">\n";
145+
// Print data
146+
out << "<TR><TD COLSPAN=\"" << leaf->GetSize() << "\">P=" << page_id << "</TD></TR>\n";
147+
out << "<TR><TD COLSPAN=\"" << leaf->GetSize() << "\">"
148+
<< "max_size=" << leaf->GetMaxSize() << ",min_size=" << leaf->GetMinSize() << ",size=" << leaf->GetSize()
149+
<< "</TD></TR>\n";
150+
out << "<TR>";
151+
for (int i = 0; i < leaf->GetSize(); i++) {
152+
out << "<TD>" << leaf->KeyAt(i) << "</TD>\n";
153+
}
154+
out << "</TR>";
155+
// Print table end
156+
out << "</TABLE>>];\n";
157+
// Print Leaf node link if there is a next page
158+
if (leaf->GetNextPageId() != INVALID_PAGE_ID) {
159+
out << leaf_prefix << page_id << " -> " << leaf_prefix << leaf->GetNextPageId() << ";\n";
160+
out << "{rank=same " << leaf_prefix << page_id << " " << leaf_prefix << leaf->GetNextPageId() << "};\n";
161+
}
162+
} else {
163+
auto *inner = reinterpret_cast<const InternalPage *>(page);
164+
// Print node name
165+
out << internal_prefix << page_id;
166+
// Print node properties
167+
out << "[shape=plain color=pink "; // why not?
168+
// Print data of the node
169+
out << "label=<<TABLE BORDER=\"0\" CELLBORDER=\"1\" CELLSPACING=\"0\" CELLPADDING=\"4\">\n";
170+
// Print data
171+
out << "<TR><TD COLSPAN=\"" << inner->GetSize() << "\">P=" << page_id << "</TD></TR>\n";
172+
out << "<TR><TD COLSPAN=\"" << inner->GetSize() << "\">"
173+
<< "max_size=" << inner->GetMaxSize() << ",min_size=" << inner->GetMinSize() << ",size=" << inner->GetSize()
174+
<< "</TD></TR>\n";
175+
out << "<TR>";
176+
for (int i = 0; i < inner->GetSize(); i++) {
177+
out << "<TD PORT=\"p" << inner->ValueAt(i) << "\">";
178+
if (i > 0) {
179+
out << inner->KeyAt(i);
180+
} else {
181+
out << " ";
182+
}
183+
out << "</TD>\n";
184+
}
185+
out << "</TR>";
186+
// Print table end
187+
out << "</TABLE>>];\n";
188+
// Print leaves
189+
for (int i = 0; i < inner->GetSize(); i++) {
190+
auto child_guard = bpm_->ReadPage(inner->ValueAt(i));
191+
auto child_page = child_guard.template As<BPlusTreePage>();
192+
ToGraph(child_guard.GetPageId(), child_page, out);
193+
if (i > 0) {
194+
auto sibling_guard = bpm_->ReadPage(inner->ValueAt(i - 1));
195+
auto sibling_page = sibling_guard.template As<BPlusTreePage>();
196+
if (!sibling_page->IsLeafPage() && !child_page->IsLeafPage()) {
197+
out << "{rank=same " << internal_prefix << sibling_guard.GetPageId() << " " << internal_prefix
198+
<< child_guard.GetPageId() << "};\n";
199+
}
200+
}
201+
out << internal_prefix << page_id << ":p" << child_guard.GetPageId() << " -> ";
202+
if (child_page->IsLeafPage()) {
203+
out << leaf_prefix << child_guard.GetPageId() << ";\n";
204+
} else {
205+
out << internal_prefix << child_guard.GetPageId() << ";\n";
206+
}
207+
}
208+
}
209+
}
210+
211+
INDEX_TEMPLATE_ARGUMENTS
212+
auto BPLUSTREE_TYPE::DrawBPlusTree() -> std::string {
213+
if (IsEmpty()) {
214+
return "()";
215+
}
216+
217+
PrintableBPlusTree p_root = ToPrintableBPlusTree(GetRootPageId());
218+
std::ostringstream out_buf;
219+
p_root.Print(out_buf);
220+
221+
return out_buf.str();
222+
}
223+
224+
/*
225+
* This method is used for test only
226+
* Read data from file and insert/remove one by one
227+
*/
228+
INDEX_TEMPLATE_ARGUMENTS
229+
void BPLUSTREE_TYPE::BatchOpsFromFile(const std::filesystem::path &file_name) {
230+
int64_t key;
231+
char instruction;
232+
std::ifstream input(file_name);
233+
while (input) {
234+
input >> instruction >> key;
235+
RID rid(key);
236+
KeyType index_key;
237+
index_key.SetFromInteger(key);
238+
switch (instruction) {
239+
case 'i':
240+
Insert(index_key, rid);
241+
break;
242+
case 'd':
243+
Remove(index_key);
244+
break;
245+
default:
246+
break;
247+
}
248+
}
249+
}
250+
251+
INDEX_TEMPLATE_ARGUMENTS
252+
auto BPLUSTREE_TYPE::ToPrintableBPlusTree(page_id_t root_id) -> PrintableBPlusTree {
253+
auto root_page_guard = bpm_->ReadPage(root_id);
254+
auto root_page = root_page_guard.template As<BPlusTreePage>();
255+
PrintableBPlusTree proot;
256+
257+
if (root_page->IsLeafPage()) {
258+
auto leaf_page = root_page_guard.template As<LeafPage>();
259+
proot.keys_ = leaf_page->ToString();
260+
proot.size_ = proot.keys_.size() + 4; // 4 more spaces for indent
261+
262+
return proot;
263+
}
264+
265+
// draw internal page
266+
auto internal_page = root_page_guard.template As<InternalPage>();
267+
proot.keys_ = internal_page->ToString();
268+
proot.size_ = 0;
269+
for (int i = 0; i < internal_page->GetSize(); i++) {
270+
page_id_t child_id = internal_page->ValueAt(i);
271+
PrintableBPlusTree child_node = ToPrintableBPlusTree(child_id);
272+
proot.size_ += child_node.size_;
273+
proot.children_.push_back(child_node);
274+
}
275+
276+
return proot;
277+
}
278+
} // namespace bustub

src/include/storage/index/index_iterator.h

+2-1
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
* For range scan of b+ tree
1414
*/
1515
#pragma once
16+
#include <utility>
1617
#include "storage/page/b_plus_tree_leaf_page.h"
1718

1819
namespace bustub {
@@ -28,7 +29,7 @@ class IndexIterator {
2829

2930
auto IsEnd() -> bool;
3031

31-
auto operator*() -> const MappingType &;
32+
auto operator*() -> std::pair<const KeyType &, const ValueType &>;
3233

3334
auto operator++() -> IndexIterator &;
3435

0 commit comments

Comments
 (0)