From 604b61bd889fa8330d9d30c2149d21a6bf32cb25 Mon Sep 17 00:00:00 2001 From: Luis Michaelis Date: Mon, 7 Nov 2022 17:15:21 +0100 Subject: [PATCH] [#12] vdfs: improve VDF lookup times drastically A basic benchmark shows that using a sorted `std::set` instead of a raw, unsorted `std::vector` improves entry lookup times as well as VDF merge times by up to 90x. While this commit does introduce a bunch of `const_cast`s, they are mostly guarded by deprecation warnings and unless `vdf_entry::name` is touched, they should still be safe. --- include/phoenix/phoenix.hh | 9 ++ include/phoenix/vdfs.hh | 21 +++- source/phoenix.cc | 6 ++ source/vdfs.cc | 200 +++++++++++++++++++++---------------- 4 files changed, 144 insertions(+), 92 deletions(-) diff --git a/include/phoenix/phoenix.hh b/include/phoenix/phoenix.hh index fb1ab0a2..ee0200e2 100644 --- a/include/phoenix/phoenix.hh +++ b/include/phoenix/phoenix.hh @@ -53,6 +53,15 @@ namespace phoenix { /// \return ``true`` if both strings are equal when ignoring case. bool iequals(std::string_view a, std::string_view b); + /// \brief Tests whether \p a is lexicographically less than \p b. + /// + /// Internally, uses std::tolower to compare the strings character by character. + /// + /// \param a A string. + /// \param b Another string. + /// \return ``true`` if \p a is lexicographically less than \p b. + bool icompare(std::string_view a, std::string_view b); + /// \brief A basic datetime structure used by the *ZenGin*. struct date { /// \brief Parses a date from a buffer. diff --git a/include/phoenix/vdfs.hh b/include/phoenix/vdfs.hh index 244f7e70..69120b44 100644 --- a/include/phoenix/vdfs.hh +++ b/include/phoenix/vdfs.hh @@ -5,9 +5,9 @@ #include #include +#include #include #include -#include namespace phoenix { static constexpr std::string_view VDF_SIGNATURE_G1 = "PSVDSC_V2.00\r\n\r\n"; @@ -79,6 +79,17 @@ namespace phoenix { std::uint32_t version {VDF_VERSION}; }; + class vdf_entry; + + struct vdf_entry_comparator { + public: + using is_transparent = std::true_type; + + bool operator()(const vdf_entry& a, const vdf_entry& b) const; + bool operator()(const vdf_entry& a, std::string_view b) const; + bool operator()(std::string_view a, const vdf_entry& b) const; + }; + /// \brief Represents an entry of a VDF. class vdf_entry { public: @@ -108,7 +119,7 @@ namespace phoenix { /// \brief Searches the entry for the first child with the given name. /// \param name The name of the child to search for. /// \return The child with the give name or `nullptr` if no entry was found. - vdf_entry* find_child(std::string_view name); + [[deprecated("mutating vdf_entry children is broken!")]] vdf_entry* find_child(std::string_view name); /// \brief Merges the given VDF entry into this one. /// \param itm The entry to merge. @@ -142,7 +153,7 @@ namespace phoenix { std::string name; /// \brief A list of child entries of the entry. - std::vector children {}; + std::set children {}; /// \brief The offset of the entry's data in the VDF. std::uint32_t offset {0}; @@ -191,7 +202,7 @@ namespace phoenix { /// \brief Searches the VDF file for the first entry with the given name. /// \param name The name of the entry to search for. /// \return The entry with the give name or `nullptr` if no entry was found. - vdf_entry* find_entry(std::string_view name); + [[deprecated("mutating vdf_entry children is broken!")]] vdf_entry* find_entry(std::string_view name); /// \brief Merges the given VDF file into this one. /// \param itm The file to merge. @@ -200,7 +211,7 @@ namespace phoenix { public: /// \brief A list of root entries in the VDF file. - std::vector entries; + std::set entries; /// \brief The header data of the VDF file. vdf_header header; diff --git a/source/phoenix.cc b/source/phoenix.cc index 6d538d1a..7e427da4 100644 --- a/source/phoenix.cc +++ b/source/phoenix.cc @@ -59,6 +59,12 @@ namespace phoenix { }); } + bool icompare(std::string_view a, std::string_view b) { + return std::lexicographical_compare(a.begin(), a.end(), b.begin(), b.end(), [](char a, char b) { + return std::tolower(a) < std::tolower(b); + }); + } + date date::parse(buffer& buf) { auto dt = date {buf.get_uint(), buf.get_ushort(), diff --git a/source/vdfs.cc b/source/vdfs.cc index 1818305f..c6d217f0 100644 --- a/source/vdfs.cc +++ b/source/vdfs.cc @@ -30,6 +30,18 @@ namespace phoenix { return dos; } + bool vdf_entry_comparator::operator()(const vdf_entry& a, const vdf_entry& b) const { + return icompare(a.name, b.name); + } + + bool vdf_entry_comparator::operator()(const vdf_entry& a, std::string_view b) const { + return icompare(a.name, b); + } + + bool vdf_entry_comparator::operator()(std::string_view a, const vdf_entry& b) const { + return icompare(a, b.name); + } + vdf_header::vdf_header(std::string_view comment, std::time_t timestamp) : comment(comment), timestamp(timestamp) {} vdf_header vdf_header::read(buffer& in) { @@ -58,41 +70,52 @@ namespace phoenix { auto it = path.find('/'); auto name = path.substr(0, it); - for (const auto& child : children) { - if (iequals(child.name, name)) { - if (it != std::string_view::npos) { - return child.resolve_path(path.substr(it + 1)); - } + auto result = this->children.find(name); + if (result == this->children.end()) { + return nullptr; + } - return &child; - } + if (it != std::string_view::npos) { + return (*result).resolve_path(path.substr(it + 1)); } - return nullptr; + return &*result; } const vdf_entry* vdf_entry::find_child(std::string_view name) const { - for (const auto& child : children) { - if (iequals(child.name, name)) { - return &child; - } else if (const auto* v = child.find_child(name); v != nullptr) { - return v; + auto result = this->children.find(name); + if (result == this->children.end()) { + // recurse the search + const vdf_entry* child; + + for (const auto& entry : children) { + if ((child = entry.find_child(name), child != nullptr)) { + return child; + } } + + return nullptr; } - return nullptr; + return &*result; } vdf_entry* vdf_entry::find_child(std::string_view name) { - for (auto& child : children) { - if (iequals(child.name, name)) { - return &child; - } else if (auto* v = child.find_child(name); v != nullptr) { - return v; + auto result = this->children.find(name); + if (result == this->children.end()) { + // recurse the search + vdf_entry* child; + + for (const auto& entry : children) { + if ((child = const_cast(entry.find_child(name)), child != nullptr)) { + return child; + } } + + return nullptr; } - return nullptr; + return const_cast(&*result); } vdf_entry vdf_entry::read(buffer& in, std::uint32_t catalog_offset) { @@ -112,9 +135,9 @@ namespace phoenix { auto self_offset = in.position(); in.position(catalog_offset + entry.offset * vdf_entry::packed_size); - vdf_entry* child = nullptr; + const vdf_entry* child = nullptr; do { - child = &entry.children.emplace_back(read(in, catalog_offset)); + child = &*std::get<0>(entry.children.insert(read(in, catalog_offset))); } while (!child->is_last()); in.position(self_offset); @@ -126,73 +149,82 @@ namespace phoenix { } void vdf_entry::merge(const vdf_entry& itm, bool override_existing) { - for (auto it = children.begin(); it != children.end(); ++it) { - if (phoenix::iequals(it->name, itm.name)) { - if (itm.is_file() || it->is_file()) { - if (!override_existing) { - return; - } + auto result = this->children.find(name); + if (result == this->children.end()) { + // If no matching entry was found, this is a new one. + // Just add it to the children of this entry. + children.insert(itm); + } else { + if (itm.is_file() || (*result).is_file()) { + if (!override_existing) { + return; + } - // If an entry with the same name is found and either is a file, - // replace the entry with the new one. - children.erase(it); - children.push_back(itm); - } else { - // Otherwise, the entry is a directory, so we just continue the merge. - for (const auto& child : itm.children) { - it->merge(child, override_existing); - } + // If an entry with the same name is found and either is a file, + // replace the entry with the new one. + children.erase(result); + children.insert(itm); + } else { + // Otherwise, the entry is a directory, so we just continue the merge. + for (const auto& child : itm.children) { + const_cast(*result).merge(child, override_existing); } - return; } } - - // If no matching entry was found, this is a new one. - // Just add it to the children of this entry. - children.push_back(itm); } vdf_file::vdf_file(std::string_view comment, std::time_t timestamp) : header(comment, timestamp) {} const vdf_entry* vdf_file::find_entry(std::string_view name) const { - for (const auto& entry : entries) { - if (iequals(entry.name, name)) { - return &entry; - } else if (const auto* v = entry.find_child(name); v != nullptr) { - return v; + auto result = this->entries.find(name); + if (result == this->entries.end()) { + // recurse the search + vdf_entry* child; + + for (const auto& entry : entries) { + if ((child = const_cast(entry.find_child(name)), child != nullptr)) { + return child; + } } + + return nullptr; } - return nullptr; + return &*result; } const vdf_entry* vdf_file::resolve_path(std::string_view path) const { auto it = path.find('/'); auto name = path.substr(0, it); - for (const auto& child : entries) { - if (iequals(child.name, name)) { - if (it != std::string_view::npos) { - return child.resolve_path(path.substr(it + 1)); - } + auto result = this->entries.find(name); + if (result == this->entries.end()) { + return nullptr; + } - return &child; - } + if (it != std::string_view::npos) { + return (*result).resolve_path(path.substr(it + 1)); } - return nullptr; + return &*result; } vdf_entry* vdf_file::find_entry(std::string_view name) { - for (auto& entry : entries) { - if (iequals(entry.name, name)) { - return &entry; - } else if (auto* v = entry.find_child(name); v != nullptr) { - return v; + auto result = this->entries.find(name); + if (result == this->entries.end()) { + // recurse the search + vdf_entry* child; + + for (const auto& entry : entries) { + if ((child = const_cast(entry.find_child(name)), child != nullptr)) { + return child; + } } + + return nullptr; } - return nullptr; + return const_cast(&*result); } vdf_file vdf_file::open(const std::filesystem::path& path) { @@ -206,9 +238,9 @@ namespace phoenix { vdf.header = vdf_header::read(buf); buf.position(vdf.header.catalog_offset); - vdf_entry* entry = nullptr; + const vdf_entry* entry = nullptr; do { - entry = &vdf.entries.emplace_back(vdf_entry::read(buf, vdf.header.catalog_offset)); + entry = &*std::get<0>(vdf.entries.insert(vdf_entry::read(buf, vdf.header.catalog_offset))); } while (!entry->is_last()); return vdf; @@ -216,32 +248,26 @@ namespace phoenix { void vdf_file::merge(const vdf_file& file, bool override_existing) { for (const auto& child : file.entries) { - bool child_found = false; - - for (auto it = entries.begin(); it != entries.end(); ++it) { - if (phoenix::iequals(it->name, child.name)) { - if (child.is_file() || it->is_file()) { - // If an entry with the same name is found and either is a file, - // replace the entry with the new one. - if (override_existing) { - entries.erase(it); - entries.push_back(child); - } - } else { - // Otherwise, the entry is a directory, so we just continue the merge. - for (const auto& sub_child : child.children) { - it->merge(sub_child, override_existing); - } + auto result = this->entries.find(child.name); + if (result == this->entries.end()) { + // If no matching entry was found, this is a new one. + // Just add it to the children of this entry. + entries.insert(child); + } else { + if (child.is_file() || (*result).is_file()) { + // If an entry with the same name is found and either is a file, + // replace the entry with the new one. + if (override_existing) { + entries.erase(result); + entries.insert(child); + } + } else { + // Otherwise, the entry is a directory, so we just continue the merge. + for (const auto& sub_child : child.children) { + const_cast(*result).merge(sub_child, override_existing); } - - child_found = true; } } - - if (!child_found) { - entries.push_back(child); - } } } - } // namespace phoenix