Skip to content

Commit

Permalink
[#12] vdfs: improve VDF lookup times drastically
Browse files Browse the repository at this point in the history
A basic benchmark shows that using a sorted `std::set` instead
of a raw, unsorted `std::vector` improves entry lookup times
as well as VDF merge times by up to 90x.

While this commit does introduce a bunch of `const_cast`s, they
are mostly guarded by deprecation warnings and unless
`vdf_entry::name` is touched, they should still be safe.
  • Loading branch information
lmichaelis committed Nov 7, 2022
1 parent 32770f8 commit 604b61b
Show file tree
Hide file tree
Showing 4 changed files with 144 additions and 92 deletions.
9 changes: 9 additions & 0 deletions include/phoenix/phoenix.hh
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,15 @@ namespace phoenix {
/// \return ``true`` if both strings are equal when ignoring case.
bool iequals(std::string_view a, std::string_view b);

/// \brief Tests whether \p a is lexicographically less than \p b.
///
/// Internally, uses std::tolower to compare the strings character by character.
///
/// \param a A string.
/// \param b Another string.
/// \return ``true`` if \p a is lexicographically less than \p b.
bool icompare(std::string_view a, std::string_view b);

/// \brief A basic datetime structure used by the *ZenGin*.
struct date {
/// \brief Parses a date from a buffer.
Expand Down
21 changes: 16 additions & 5 deletions include/phoenix/vdfs.hh
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,9 @@

#include <cstdint>
#include <ctime>
#include <set>
#include <string>
#include <string_view>
#include <vector>

namespace phoenix {
static constexpr std::string_view VDF_SIGNATURE_G1 = "PSVDSC_V2.00\r\n\r\n";
Expand Down Expand Up @@ -79,6 +79,17 @@ namespace phoenix {
std::uint32_t version {VDF_VERSION};
};

class vdf_entry;

struct vdf_entry_comparator {
public:
using is_transparent = std::true_type;

bool operator()(const vdf_entry& a, const vdf_entry& b) const;
bool operator()(const vdf_entry& a, std::string_view b) const;
bool operator()(std::string_view a, const vdf_entry& b) const;
};

/// \brief Represents an entry of a VDF.
class vdf_entry {
public:
Expand Down Expand Up @@ -108,7 +119,7 @@ namespace phoenix {
/// \brief Searches the entry for the first child with the given name.
/// \param name The name of the child to search for.
/// \return The child with the give name or `nullptr` if no entry was found.
vdf_entry* find_child(std::string_view name);
[[deprecated("mutating vdf_entry children is broken!")]] vdf_entry* find_child(std::string_view name);

/// \brief Merges the given VDF entry into this one.
/// \param itm The entry to merge.
Expand Down Expand Up @@ -142,7 +153,7 @@ namespace phoenix {
std::string name;

/// \brief A list of child entries of the entry.
std::vector<vdf_entry> children {};
std::set<vdf_entry, vdf_entry_comparator> children {};

/// \brief The offset of the entry's data in the VDF.
std::uint32_t offset {0};
Expand Down Expand Up @@ -191,7 +202,7 @@ namespace phoenix {
/// \brief Searches the VDF file for the first entry with the given name.
/// \param name The name of the entry to search for.
/// \return The entry with the give name or `nullptr` if no entry was found.
vdf_entry* find_entry(std::string_view name);
[[deprecated("mutating vdf_entry children is broken!")]] vdf_entry* find_entry(std::string_view name);

/// \brief Merges the given VDF file into this one.
/// \param itm The file to merge.
Expand All @@ -200,7 +211,7 @@ namespace phoenix {

public:
/// \brief A list of root entries in the VDF file.
std::vector<vdf_entry> entries;
std::set<vdf_entry, vdf_entry_comparator> entries;

/// \brief The header data of the VDF file.
vdf_header header;
Expand Down
6 changes: 6 additions & 0 deletions source/phoenix.cc
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,12 @@ namespace phoenix {
});
}

bool icompare(std::string_view a, std::string_view b) {
return std::lexicographical_compare(a.begin(), a.end(), b.begin(), b.end(), [](char a, char b) {
return std::tolower(a) < std::tolower(b);
});
}

date date::parse(buffer& buf) {
auto dt = date {buf.get_uint(),
buf.get_ushort(),
Expand Down
200 changes: 113 additions & 87 deletions source/vdfs.cc
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,18 @@ namespace phoenix {
return dos;
}

bool vdf_entry_comparator::operator()(const vdf_entry& a, const vdf_entry& b) const {
return icompare(a.name, b.name);
}

bool vdf_entry_comparator::operator()(const vdf_entry& a, std::string_view b) const {
return icompare(a.name, b);
}

bool vdf_entry_comparator::operator()(std::string_view a, const vdf_entry& b) const {
return icompare(a, b.name);
}

vdf_header::vdf_header(std::string_view comment, std::time_t timestamp) : comment(comment), timestamp(timestamp) {}

vdf_header vdf_header::read(buffer& in) {
Expand Down Expand Up @@ -58,41 +70,52 @@ namespace phoenix {
auto it = path.find('/');
auto name = path.substr(0, it);

for (const auto& child : children) {
if (iequals(child.name, name)) {
if (it != std::string_view::npos) {
return child.resolve_path(path.substr(it + 1));
}
auto result = this->children.find(name);
if (result == this->children.end()) {
return nullptr;
}

return &child;
}
if (it != std::string_view::npos) {
return (*result).resolve_path(path.substr(it + 1));
}

return nullptr;
return &*result;
}

const vdf_entry* vdf_entry::find_child(std::string_view name) const {
for (const auto& child : children) {
if (iequals(child.name, name)) {
return &child;
} else if (const auto* v = child.find_child(name); v != nullptr) {
return v;
auto result = this->children.find(name);
if (result == this->children.end()) {
// recurse the search
const vdf_entry* child;

for (const auto& entry : children) {
if ((child = entry.find_child(name), child != nullptr)) {
return child;
}
}

return nullptr;
}

return nullptr;
return &*result;
}

vdf_entry* vdf_entry::find_child(std::string_view name) {
for (auto& child : children) {
if (iequals(child.name, name)) {
return &child;
} else if (auto* v = child.find_child(name); v != nullptr) {
return v;
auto result = this->children.find(name);
if (result == this->children.end()) {
// recurse the search
vdf_entry* child;

for (const auto& entry : children) {
if ((child = const_cast<vdf_entry*>(entry.find_child(name)), child != nullptr)) {
return child;
}
}

return nullptr;
}

return nullptr;
return const_cast<vdf_entry*>(&*result);
}

vdf_entry vdf_entry::read(buffer& in, std::uint32_t catalog_offset) {
Expand All @@ -112,9 +135,9 @@ namespace phoenix {
auto self_offset = in.position();
in.position(catalog_offset + entry.offset * vdf_entry::packed_size);

vdf_entry* child = nullptr;
const vdf_entry* child = nullptr;
do {
child = &entry.children.emplace_back(read(in, catalog_offset));
child = &*std::get<0>(entry.children.insert(read(in, catalog_offset)));
} while (!child->is_last());

in.position(self_offset);
Expand All @@ -126,73 +149,82 @@ namespace phoenix {
}

void vdf_entry::merge(const vdf_entry& itm, bool override_existing) {
for (auto it = children.begin(); it != children.end(); ++it) {
if (phoenix::iequals(it->name, itm.name)) {
if (itm.is_file() || it->is_file()) {
if (!override_existing) {
return;
}
auto result = this->children.find(name);
if (result == this->children.end()) {
// If no matching entry was found, this is a new one.
// Just add it to the children of this entry.
children.insert(itm);
} else {
if (itm.is_file() || (*result).is_file()) {
if (!override_existing) {
return;
}

// If an entry with the same name is found and either is a file,
// replace the entry with the new one.
children.erase(it);
children.push_back(itm);
} else {
// Otherwise, the entry is a directory, so we just continue the merge.
for (const auto& child : itm.children) {
it->merge(child, override_existing);
}
// If an entry with the same name is found and either is a file,
// replace the entry with the new one.
children.erase(result);
children.insert(itm);
} else {
// Otherwise, the entry is a directory, so we just continue the merge.
for (const auto& child : itm.children) {
const_cast<vdf_entry&>(*result).merge(child, override_existing);
}
return;
}
}

// If no matching entry was found, this is a new one.
// Just add it to the children of this entry.
children.push_back(itm);
}

vdf_file::vdf_file(std::string_view comment, std::time_t timestamp) : header(comment, timestamp) {}

const vdf_entry* vdf_file::find_entry(std::string_view name) const {
for (const auto& entry : entries) {
if (iequals(entry.name, name)) {
return &entry;
} else if (const auto* v = entry.find_child(name); v != nullptr) {
return v;
auto result = this->entries.find(name);
if (result == this->entries.end()) {
// recurse the search
vdf_entry* child;

for (const auto& entry : entries) {
if ((child = const_cast<vdf_entry*>(entry.find_child(name)), child != nullptr)) {
return child;
}
}

return nullptr;
}

return nullptr;
return &*result;
}

const vdf_entry* vdf_file::resolve_path(std::string_view path) const {
auto it = path.find('/');
auto name = path.substr(0, it);

for (const auto& child : entries) {
if (iequals(child.name, name)) {
if (it != std::string_view::npos) {
return child.resolve_path(path.substr(it + 1));
}
auto result = this->entries.find(name);
if (result == this->entries.end()) {
return nullptr;
}

return &child;
}
if (it != std::string_view::npos) {
return (*result).resolve_path(path.substr(it + 1));
}

return nullptr;
return &*result;
}

vdf_entry* vdf_file::find_entry(std::string_view name) {
for (auto& entry : entries) {
if (iequals(entry.name, name)) {
return &entry;
} else if (auto* v = entry.find_child(name); v != nullptr) {
return v;
auto result = this->entries.find(name);
if (result == this->entries.end()) {
// recurse the search
vdf_entry* child;

for (const auto& entry : entries) {
if ((child = const_cast<vdf_entry*>(entry.find_child(name)), child != nullptr)) {
return child;
}
}

return nullptr;
}

return nullptr;
return const_cast<vdf_entry*>(&*result);
}

vdf_file vdf_file::open(const std::filesystem::path& path) {
Expand All @@ -206,42 +238,36 @@ namespace phoenix {
vdf.header = vdf_header::read(buf);
buf.position(vdf.header.catalog_offset);

vdf_entry* entry = nullptr;
const vdf_entry* entry = nullptr;
do {
entry = &vdf.entries.emplace_back(vdf_entry::read(buf, vdf.header.catalog_offset));
entry = &*std::get<0>(vdf.entries.insert(vdf_entry::read(buf, vdf.header.catalog_offset)));
} while (!entry->is_last());

return vdf;
}

void vdf_file::merge(const vdf_file& file, bool override_existing) {
for (const auto& child : file.entries) {
bool child_found = false;

for (auto it = entries.begin(); it != entries.end(); ++it) {
if (phoenix::iequals(it->name, child.name)) {
if (child.is_file() || it->is_file()) {
// If an entry with the same name is found and either is a file,
// replace the entry with the new one.
if (override_existing) {
entries.erase(it);
entries.push_back(child);
}
} else {
// Otherwise, the entry is a directory, so we just continue the merge.
for (const auto& sub_child : child.children) {
it->merge(sub_child, override_existing);
}
auto result = this->entries.find(child.name);
if (result == this->entries.end()) {
// If no matching entry was found, this is a new one.
// Just add it to the children of this entry.
entries.insert(child);
} else {
if (child.is_file() || (*result).is_file()) {
// If an entry with the same name is found and either is a file,
// replace the entry with the new one.
if (override_existing) {
entries.erase(result);
entries.insert(child);
}
} else {
// Otherwise, the entry is a directory, so we just continue the merge.
for (const auto& sub_child : child.children) {
const_cast<vdf_entry&>(*result).merge(sub_child, override_existing);
}

child_found = true;
}
}

if (!child_found) {
entries.push_back(child);
}
}
}

} // namespace phoenix

0 comments on commit 604b61b

Please sign in to comment.