Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: Memory stats #2162

Merged
merged 6 commits into from
Nov 13, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion helio
75 changes: 75 additions & 0 deletions src/core/size_tracking_channel.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
// Copyright 2023, DragonflyDB authors. All rights reserved.
// See LICENSE for licensing terms.
//

#pragma once

#include <atomic>

#include "core/fibers.h"
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

i used core fibers because I migrated from boost fibers. let's just reference the correct include file directly

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That would mean using util::fb2::SimpleChannel instead of dfly, I thought that was the purpose of that file?


namespace dfly {

// SimpleQueue-like interface, but also keeps track over the size of Ts it owns.
// It has a slightly less efficient TryPush() API as it forces construction of Ts even if they are
// not pushed.
// T must have a .size() method, which should return the heap-allocated size of T, excluding
// anything included in sizeof(T). We could generalize this in the future.
template <typename T, typename Queue = folly::ProducerConsumerQueue<T>> class SizeTrackingChannel {
public:
SizeTrackingChannel(size_t n, unsigned num_producers = 1) : queue_(n, num_producers) {
}

// Here and below, we must accept a T instead of building it from variadic args, as we need to
// know its size in case it is added.
void Push(T t) noexcept {
size_.fetch_add(t.size(), std::memory_order_relaxed);
queue_.Push(std::move(t));
}

bool TryPush(T t) noexcept {
const size_t size = t.size();
if (queue_.TryPush(std::move(t))) {
size_.fetch_add(size, std::memory_order_relaxed);
return true;
}

return false;
}

bool Pop(T& dest) {
if (queue_.Pop(dest)) {
size_.fetch_sub(dest.size(), std::memory_order_relaxed);
return true;
}

return false;
}

void StartClosing() {
queue_.StartClosing();
}

bool TryPop(T& dest) {
if (queue_.TryPop(dest)) {
size_.fetch_sub(dest.size(), std::memory_order_relaxed);
return true;
}

return false;
}

bool IsClosing() const {
return queue_.IsClosing();
}

size_t GetSize() const {
return queue_.Capacity() * sizeof(T) + size_.load(std::memory_order_relaxed);
}

private:
SimpleChannel<T, Queue> queue_;
std::atomic<size_t> size_ = 0;
};

} // namespace dfly
4 changes: 4 additions & 0 deletions src/facade/dragonfly_connection.h
Original file line number Diff line number Diff line change
Expand Up @@ -187,6 +187,10 @@ class Connection : public util::Connection {
return name_;
}

base::IoBuf::MemoryUsage GetMemoryUsage() const {
return io_buf_.GetMemoryUsage();
}

ConnectionContext* cntx();

// Requests that at some point, this connection will be migrated to `dest` thread.
Expand Down
106 changes: 105 additions & 1 deletion src/server/memory_cmd.cc
Original file line number Diff line number Diff line change
Expand Up @@ -7,9 +7,13 @@
#include <absl/strings/str_cat.h>
#include <mimalloc.h>

#include "base/io_buf.h"
#include "facade/dragonfly_connection.h"
#include "facade/error.h"
#include "server/engine_shard_set.h"
#include "server/server_family.h"
#include "server/server_state.h"
#include "server/snapshot.h"

using namespace std;
using namespace facade;
Expand Down Expand Up @@ -75,7 +79,7 @@ size_t MemoryUsage(PrimeIterator it) {

} // namespace

MemoryCmd::MemoryCmd(ServerFamily* owner, ConnectionContext* cntx) : cntx_(cntx) {
MemoryCmd::MemoryCmd(ServerFamily* owner, ConnectionContext* cntx) : cntx_(cntx), owner_(owner) {
}

void MemoryCmd::Run(CmdArgList args) {
Expand All @@ -84,6 +88,8 @@ void MemoryCmd::Run(CmdArgList args) {
if (sub_cmd == "HELP") {
string_view help_arr[] = {
"MEMORY <subcommand> [<arg> ...]. Subcommands are:",
"STATS",
" Shows breakdown of memory.",
"MALLOC-STATS [BACKING] [thread-id]",
" Show malloc stats for a heap residing in specified thread-id. 0 by default.",
" If BACKING is specified, show stats for the backing heap.",
Expand All @@ -95,6 +101,10 @@ void MemoryCmd::Run(CmdArgList args) {
return (*cntx_)->SendSimpleStrArr(help_arr);
};

if (sub_cmd == "STATS") {
return Stats();
}

if (sub_cmd == "USAGE" && args.size() > 1) {
string_view key = ArgS(args, 1);
return Usage(key);
Expand Down Expand Up @@ -143,6 +153,100 @@ void MemoryCmd::Run(CmdArgList args) {
return (*cntx_)->SendError(err, kSyntaxErrType);
}

namespace {

struct ConnectionMemoryUsage {
size_t connection_count = 0;
size_t pipelined_bytes = 0;
base::IoBuf::MemoryUsage connections_memory;

size_t replication_connection_count = 0;
base::IoBuf::MemoryUsage replication_memory;
};

ConnectionMemoryUsage GetConnectionMemoryUsage(ServerFamily* server) {
Mutex mu;
ConnectionMemoryUsage mem ABSL_GUARDED_BY(mu);

for (auto* listener : server->GetListeners()) {
listener->TraverseConnections([&](unsigned thread_index, util::Connection* conn) {
auto* dfly_conn = static_cast<facade::Connection*>(conn);
auto* cntx = static_cast<ConnectionContext*>(dfly_conn->cntx());
lock_guard lock(mu);

if (cntx->replication_flow == nullptr) {
mem.connection_count++;
mem.connections_memory += dfly_conn->GetMemoryUsage();
} else {
mem.replication_connection_count++;
mem.replication_memory += dfly_conn->GetMemoryUsage();
}

if (cntx != nullptr) {
mem.pipelined_bytes += cntx->conn_state.exec_info.body.capacity() * sizeof(StoredCmd);
for (const auto& pipeline : cntx->conn_state.exec_info.body) {
mem.pipelined_bytes += pipeline.UsedHeapMemory();
}
}
});
}

return mem;
}

void PushMemoryUsageStats(const base::IoBuf::MemoryUsage& mem, string_view prefix, size_t total,
vector<pair<string, size_t>>* stats) {
stats->push_back({absl::StrCat(prefix, ".total_bytes"), total});
stats->push_back({absl::StrCat(prefix, ".consumed_bytes"), mem.consumed});
stats->push_back({absl::StrCat(prefix, ".pending_input_bytes"), mem.input_length});
stats->push_back({absl::StrCat(prefix, ".pending_output_bytes"), mem.append_length});
}

} // namespace

void MemoryCmd::Stats() {
vector<pair<string, size_t>> stats;
stats.reserve(25);
auto server_metrics = owner_->GetMetrics();

// RSS
stats.push_back({"rss_bytes", rss_mem_current.load(memory_order_relaxed)});
stats.push_back({"rss_peak_bytes", rss_mem_peak.load(memory_order_relaxed)});

// Used by DbShards and DashTable
stats.push_back({"data_bytes", used_mem_current.load(memory_order_relaxed)});
stats.push_back({"data_peak_bytes", used_mem_peak.load(memory_order_relaxed)});

ConnectionMemoryUsage connection_memory = GetConnectionMemoryUsage(owner_);

// Connection stats, excluding replication connections
stats.push_back({"connections.count", connection_memory.connection_count});
PushMemoryUsageStats(
connection_memory.connections_memory, "connections",
connection_memory.connections_memory.GetTotalSize() + connection_memory.pipelined_bytes,
&stats);
stats.push_back({"connections.pipeline_bytes", connection_memory.pipelined_bytes});

// Replication connection stats
stats.push_back(
{"replication.connections_count", connection_memory.replication_connection_count});
PushMemoryUsageStats(connection_memory.replication_memory, "replication",
connection_memory.replication_memory.GetTotalSize(), &stats);

atomic<size_t> serialization_memory = 0;
shard_set->pool()->AwaitFiberOnAll(
[&](auto*) { serialization_memory.fetch_add(SliceSnapshot::GetThreadLocalMemoryUsage()); });

// Serialization stats, including both replication-related serialization and saving to RDB files.
stats.push_back({"serialization", serialization_memory.load()});

(*cntx_)->StartCollection(stats.size(), RedisReplyBuilder::MAP);
for (const auto& [k, v] : stats) {
(*cntx_)->SendBulkString(k);
(*cntx_)->SendLong(v);
}
}

void MemoryCmd::Usage(std::string_view key) {
ShardId sid = Shard(key, shard_set->size());
ssize_t memory_usage = shard_set->pool()->at(sid)->AwaitBrief([key, this]() -> ssize_t {
Expand Down
2 changes: 2 additions & 0 deletions src/server/memory_cmd.h
Original file line number Diff line number Diff line change
Expand Up @@ -17,9 +17,11 @@ class MemoryCmd {
void Run(CmdArgList args);

private:
void Stats();
void Usage(std::string_view key);

ConnectionContext* cntx_;
ServerFamily* owner_;
};

} // namespace dfly
30 changes: 6 additions & 24 deletions src/server/rdb_save.cc
Original file line number Diff line number Diff line change
Expand Up @@ -948,10 +948,6 @@ class RdbSaver::Impl {
// Multi entry compression is available only on df snapshot, this will
// make snapshot size smaller and opreation faster.
CompressionMode compression_mode_;

struct Stats {
std::atomic<size_t> pulled_bytes{0};
} stats_;
};

// We pass K=sz to say how many producers are pushing data in order to maintain
Expand Down Expand Up @@ -1041,26 +1037,19 @@ error_code RdbSaver::Impl::ConsumeChannel(const Cancellation* cll) {
continue;

DVLOG(2) << "Pulled " << record->id;
stats_.pulled_bytes.fetch_add(record->value.size(), memory_order_relaxed);

io_error = sink_->Write(io::Buffer(record->value));
if (io_error) {
break;
}
} while ((record = records_popper.TryPop()));
} // while (records_popper.Pop())

size_t pushed_bytes = 0;
for (auto& ptr : shard_snapshots_) {
ptr->Join();
pushed_bytes += ptr->pushed_bytes();
}

DCHECK(!record.has_value() || !channel_.TryPop(*record));

VLOG(1) << "Channel pulled bytes: " << stats_.pulled_bytes.load(memory_order_relaxed)
<< " pushed bytes: " << pushed_bytes;

return io_error;
}

Expand Down Expand Up @@ -1103,31 +1092,24 @@ void RdbSaver::Impl::Cancel() {
// This function is called from connection thread when info command is invoked.
// All accessed variableds must be thread safe, as they are fetched not from the rdb saver thread.
size_t RdbSaver::Impl::GetTotalBuffersSize() const {
std::atomic<size_t> pushed_bytes{0};
std::atomic<size_t> channel_bytes{0};
std::atomic<size_t> serializer_bytes{0};
size_t pulled_bytes = stats_.pulled_bytes.load(memory_order_relaxed);

auto cb = [this, &pushed_bytes, &serializer_bytes](ShardId sid) {
auto cb = [this, &channel_bytes, &serializer_bytes](ShardId sid) {
auto& snapshot = shard_snapshots_[sid];
pushed_bytes.fetch_add(snapshot->pushed_bytes(), memory_order_relaxed);
channel_bytes.fetch_add(snapshot->GetTotalChannelCapacity(), memory_order_relaxed);
serializer_bytes.store(snapshot->GetTotalBufferCapacity(), memory_order_relaxed);
Comment on lines +1098 to 1101
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nit: I think we can do it without hops now (if the state doesn't change in-between) 🤔 But I think it doesn't matter for a monitoring command

};

if (shard_snapshots_.size() == 1) {
cb(0);
} else {
shard_set->RunBriefInParallel([&](EngineShard* es) { cb(es->shard_id()); });
// Note that pushed bytes and pulled bytes values are fetched at different times, as we need to
// calc the pushed bytes using RunBriefInParallel.
// pulled bytes might be higher untill we return here from RunBriefInParallel.
}
size_t total_bytes = pushed_bytes.load(memory_order_relaxed) +
serializer_bytes.load(memory_order_relaxed) - pulled_bytes;
VLOG(2) << "pushed_bytes:" << pushed_bytes.load(memory_order_relaxed)
<< " serializer_bytes: " << serializer_bytes.load(memory_order_relaxed)
<< " pulled_bytes: " << pulled_bytes << " total_bytes:" << total_bytes;

return total_bytes;
VLOG(2) << "channel_bytes:" << channel_bytes.load(memory_order_relaxed)
<< " serializer_bytes: " << serializer_bytes.load(memory_order_relaxed);
return channel_bytes.load(memory_order_relaxed) + serializer_bytes.load(memory_order_relaxed);
}

void RdbSaver::Impl::FillFreqMap(RdbTypeFreqMap* dest) const {
Expand Down
4 changes: 4 additions & 0 deletions src/server/server_family.h
Original file line number Diff line number Diff line change
Expand Up @@ -178,6 +178,10 @@ class ServerFamily {
return dfly_cmd_.get();
}

const std::vector<facade::Listener*>& GetListeners() const {
return listeners_;
}

bool HasReplica() const;
std::optional<Replica::Info> GetReplicaInfo() const;
std::string GetReplicaMasterId() const;
Expand Down
20 changes: 18 additions & 2 deletions src/server/snapshot.cc
Original file line number Diff line number Diff line change
Expand Up @@ -25,12 +25,26 @@ using namespace std;
using namespace util;
using namespace chrono_literals;

namespace {
thread_local absl::flat_hash_set<SliceSnapshot*> tl_slice_snapshots;
} // namespace

SliceSnapshot::SliceSnapshot(DbSlice* slice, RecordChannel* dest, CompressionMode compression_mode)
: db_slice_(slice), dest_(dest), compression_mode_(compression_mode) {
db_array_ = slice->databases();
tl_slice_snapshots.insert(this);
}

SliceSnapshot::~SliceSnapshot() {
tl_slice_snapshots.erase(this);
}

size_t SliceSnapshot::GetThreadLocalMemoryUsage() {
size_t mem = 0;
for (SliceSnapshot* snapshot : tl_slice_snapshots) {
mem += snapshot->GetTotalBufferCapacity() + snapshot->GetTotalChannelCapacity();
}
return mem;
}

void SliceSnapshot::Start(bool stream_journal, const Cancellation* cll) {
Expand Down Expand Up @@ -274,8 +288,6 @@ bool SliceSnapshot::PushSerializedToChannel(bool force) {
if (serialized == 0)
return 0;

stats_.pushed_bytes += serialized;

auto id = rec_id_++;
DVLOG(2) << "Pushed " << id;
DbRecord db_rec{.id = id, .value = std::move(sfile.val)};
Expand Down Expand Up @@ -335,4 +347,8 @@ size_t SliceSnapshot::GetTotalBufferCapacity() const {
return serializer_->GetTotalBufferCapacity();
}

size_t SliceSnapshot::GetTotalChannelCapacity() const {
return dest_->GetSize();
}

} // namespace dfly
Loading