dragonflydb · chakaz · Nov 13, 2023 · Nov 12, 2023 · Nov 13, 2023 · Nov 13, 2023
diff --git a/helio b/helio
diff --git a/src/core/size_tracking_channel.h b/src/core/size_tracking_channel.h
@@ -0,0 +1,75 @@
+// Copyright 2023, DragonflyDB authors.  All rights reserved.
+// See LICENSE for licensing terms.
+//
+
+#pragma once
+
+#include <atomic>
+
+#include "core/fibers.h"
+
+namespace dfly {
+
+// SimpleQueue-like interface, but also keeps track over the size of Ts it owns.
+// It has a slightly less efficient TryPush() API as it forces construction of Ts even if they are
+// not pushed.
+// T must have a .size() method, which should return the heap-allocated size of T, excluding
+// anything included in sizeof(T). We could generalize this in the future.
+template <typename T, typename Queue = folly::ProducerConsumerQueue<T>> class SizeTrackingChannel {
+ public:
+  SizeTrackingChannel(size_t n, unsigned num_producers = 1) : queue_(n, num_producers) {
+  }
+
+  // Here and below, we must accept a T instead of building it from variadic args, as we need to
+  // know its size in case it is added.
+  void Push(T t) noexcept {
+    size_.fetch_add(t.size(), std::memory_order_relaxed);
+    queue_.Push(std::move(t));
+  }
+
+  bool TryPush(T t) noexcept {
+    const size_t size = t.size();
+    if (queue_.TryPush(std::move(t))) {
+      size_.fetch_add(size, std::memory_order_relaxed);
+      return true;
+    }
+
+    return false;
+  }
+
+  bool Pop(T& dest) {
+    if (queue_.Pop(dest)) {
+      size_.fetch_sub(dest.size(), std::memory_order_relaxed);
+      return true;
+    }
+
+    return false;
+  }
+
+  void StartClosing() {
+    queue_.StartClosing();
+  }
+
+  bool TryPop(T& dest) {
+    if (queue_.TryPop(dest)) {
+      size_.fetch_sub(dest.size(), std::memory_order_relaxed);
+      return true;
+    }
+
+    return false;
+  }
+
+  bool IsClosing() const {
+    return queue_.IsClosing();
+  }
+
+  size_t GetSize() const {
+    return queue_.Capacity() * sizeof(T) + size_.load(std::memory_order_relaxed);
+  }
+
+ private:
+  SimpleChannel<T, Queue> queue_;
+  std::atomic<size_t> size_ = 0;
+};
+
+}  // namespace dfly
diff --git a/src/facade/dragonfly_connection.h b/src/facade/dragonfly_connection.h
@@ -187,6 +187,10 @@ class Connection : public util::Connection {
     return name_;
   }
 
+  base::IoBuf::MemoryUsage GetMemoryUsage() const {
+    return io_buf_.GetMemoryUsage();
+  }
+
   ConnectionContext* cntx();
 
   // Requests that at some point, this connection will be migrated to `dest` thread.

diff --git a/src/server/memory_cmd.cc b/src/server/memory_cmd.cc
@@ -7,9 +7,13 @@
 #include <absl/strings/str_cat.h>
 #include <mimalloc.h>
 
+#include "base/io_buf.h"
+#include "facade/dragonfly_connection.h"
 #include "facade/error.h"
 #include "server/engine_shard_set.h"
+#include "server/server_family.h"
 #include "server/server_state.h"
+#include "server/snapshot.h"
 
 using namespace std;
 using namespace facade;
@@ -75,7 +79,7 @@ size_t MemoryUsage(PrimeIterator it) {
 
 }  // namespace
 
-MemoryCmd::MemoryCmd(ServerFamily* owner, ConnectionContext* cntx) : cntx_(cntx) {
+MemoryCmd::MemoryCmd(ServerFamily* owner, ConnectionContext* cntx) : cntx_(cntx), owner_(owner) {
 }
 
 void MemoryCmd::Run(CmdArgList args) {
@@ -84,6 +88,8 @@ void MemoryCmd::Run(CmdArgList args) {
   if (sub_cmd == "HELP") {
     string_view help_arr[] = {
         "MEMORY <subcommand> [<arg> ...]. Subcommands are:",
+        "STATS",
+        "    Shows breakdown of memory.",
         "MALLOC-STATS [BACKING] [thread-id]",
         "    Show malloc stats for a heap residing in specified thread-id. 0 by default.",
         "    If BACKING is specified, show stats for the backing heap.",
@@ -95,6 +101,10 @@ void MemoryCmd::Run(CmdArgList args) {
     return (*cntx_)->SendSimpleStrArr(help_arr);
   };
 
+  if (sub_cmd == "STATS") {
+    return Stats();
+  }
+
   if (sub_cmd == "USAGE" && args.size() > 1) {
     string_view key = ArgS(args, 1);
     return Usage(key);
@@ -143,6 +153,100 @@ void MemoryCmd::Run(CmdArgList args) {
   return (*cntx_)->SendError(err, kSyntaxErrType);
 }
 
+namespace {
+
+struct ConnectionMemoryUsage {
+  size_t connection_count = 0;
+  size_t pipelined_bytes = 0;
+  base::IoBuf::MemoryUsage connections_memory;
+
+  size_t replication_connection_count = 0;
+  base::IoBuf::MemoryUsage replication_memory;
+};
+
+ConnectionMemoryUsage GetConnectionMemoryUsage(ServerFamily* server) {
+  Mutex mu;
+  ConnectionMemoryUsage mem ABSL_GUARDED_BY(mu);
+
+  for (auto* listener : server->GetListeners()) {
+    listener->TraverseConnections([&](unsigned thread_index, util::Connection* conn) {
+      auto* dfly_conn = static_cast<facade::Connection*>(conn);
+      auto* cntx = static_cast<ConnectionContext*>(dfly_conn->cntx());
+      lock_guard lock(mu);
+
+      if (cntx->replication_flow == nullptr) {
+        mem.connection_count++;
+        mem.connections_memory += dfly_conn->GetMemoryUsage();
+      } else {
+        mem.replication_connection_count++;
+        mem.replication_memory += dfly_conn->GetMemoryUsage();
+      }
+
+      if (cntx != nullptr) {
+        mem.pipelined_bytes += cntx->conn_state.exec_info.body.capacity() * sizeof(StoredCmd);
+        for (const auto& pipeline : cntx->conn_state.exec_info.body) {
+          mem.pipelined_bytes += pipeline.UsedHeapMemory();
+        }
+      }
+    });
+  }
+
+  return mem;
+}
+
+void PushMemoryUsageStats(const base::IoBuf::MemoryUsage& mem, string_view prefix, size_t total,
+                          vector<pair<string, size_t>>* stats) {
+  stats->push_back({absl::StrCat(prefix, ".total_bytes"), total});
+  stats->push_back({absl::StrCat(prefix, ".consumed_bytes"), mem.consumed});
+  stats->push_back({absl::StrCat(prefix, ".pending_input_bytes"), mem.input_length});
+  stats->push_back({absl::StrCat(prefix, ".pending_output_bytes"), mem.append_length});
+}
+
+}  // namespace
+
+void MemoryCmd::Stats() {
+  vector<pair<string, size_t>> stats;
+  stats.reserve(25);
+  auto server_metrics = owner_->GetMetrics();
+
+  // RSS
+  stats.push_back({"rss_bytes", rss_mem_current.load(memory_order_relaxed)});
+  stats.push_back({"rss_peak_bytes", rss_mem_peak.load(memory_order_relaxed)});
+
+  // Used by DbShards and DashTable
+  stats.push_back({"data_bytes", used_mem_current.load(memory_order_relaxed)});
+  stats.push_back({"data_peak_bytes", used_mem_peak.load(memory_order_relaxed)});
+
+  ConnectionMemoryUsage connection_memory = GetConnectionMemoryUsage(owner_);
+
+  // Connection stats, excluding replication connections
+  stats.push_back({"connections.count", connection_memory.connection_count});
+  PushMemoryUsageStats(
+      connection_memory.connections_memory, "connections",
+      connection_memory.connections_memory.GetTotalSize() + connection_memory.pipelined_bytes,
+      &stats);
+  stats.push_back({"connections.pipeline_bytes", connection_memory.pipelined_bytes});
+
+  // Replication connection stats
+  stats.push_back(
+      {"replication.connections_count", connection_memory.replication_connection_count});
+  PushMemoryUsageStats(connection_memory.replication_memory, "replication",
+                       connection_memory.replication_memory.GetTotalSize(), &stats);
+
+  atomic<size_t> serialization_memory = 0;
+  shard_set->pool()->AwaitFiberOnAll(
+      [&](auto*) { serialization_memory.fetch_add(SliceSnapshot::GetThreadLocalMemoryUsage()); });
+
+  // Serialization stats, including both replication-related serialization and saving to RDB files.
+  stats.push_back({"serialization", serialization_memory.load()});
+
+  (*cntx_)->StartCollection(stats.size(), RedisReplyBuilder::MAP);
+  for (const auto& [k, v] : stats) {
+    (*cntx_)->SendBulkString(k);
+    (*cntx_)->SendLong(v);
+  }
+}
+
 void MemoryCmd::Usage(std::string_view key) {
   ShardId sid = Shard(key, shard_set->size());
   ssize_t memory_usage = shard_set->pool()->at(sid)->AwaitBrief([key, this]() -> ssize_t {

diff --git a/src/server/memory_cmd.h b/src/server/memory_cmd.h
@@ -17,9 +17,11 @@ class MemoryCmd {
   void Run(CmdArgList args);
 
  private:
+  void Stats();
   void Usage(std::string_view key);
 
   ConnectionContext* cntx_;
+  ServerFamily* owner_;
 };
 
 }  // namespace dfly
diff --git a/src/server/rdb_save.cc b/src/server/rdb_save.cc
@@ -948,10 +948,6 @@ class RdbSaver::Impl {
   // Multi entry compression is available only on df snapshot, this will
   // make snapshot size smaller and opreation faster.
   CompressionMode compression_mode_;
-
-  struct Stats {
-    std::atomic<size_t> pulled_bytes{0};
-  } stats_;
 };
 
 // We pass K=sz to say how many producers are pushing data in order to maintain
@@ -1041,26 +1037,19 @@ error_code RdbSaver::Impl::ConsumeChannel(const Cancellation* cll) {
         continue;
 
       DVLOG(2) << "Pulled " << record->id;
-      stats_.pulled_bytes.fetch_add(record->value.size(), memory_order_relaxed);
-
       io_error = sink_->Write(io::Buffer(record->value));
       if (io_error) {
         break;
       }
     } while ((record = records_popper.TryPop()));
   }  // while (records_popper.Pop())
 
-  size_t pushed_bytes = 0;
   for (auto& ptr : shard_snapshots_) {
     ptr->Join();
-    pushed_bytes += ptr->pushed_bytes();
   }
 
   DCHECK(!record.has_value() || !channel_.TryPop(*record));
 
-  VLOG(1) << "Channel pulled bytes: " << stats_.pulled_bytes.load(memory_order_relaxed)
-          << " pushed bytes: " << pushed_bytes;
-
   return io_error;
 }
 
@@ -1103,31 +1092,24 @@ void RdbSaver::Impl::Cancel() {
 // This function is called from connection thread when info command is invoked.
 // All accessed variableds must be thread safe, as they are fetched not from the rdb saver thread.
 size_t RdbSaver::Impl::GetTotalBuffersSize() const {
-  std::atomic<size_t> pushed_bytes{0};
+  std::atomic<size_t> channel_bytes{0};
   std::atomic<size_t> serializer_bytes{0};
-  size_t pulled_bytes = stats_.pulled_bytes.load(memory_order_relaxed);
 
-  auto cb = [this, &pushed_bytes, &serializer_bytes](ShardId sid) {
+  auto cb = [this, &channel_bytes, &serializer_bytes](ShardId sid) {
     auto& snapshot = shard_snapshots_[sid];
-    pushed_bytes.fetch_add(snapshot->pushed_bytes(), memory_order_relaxed);
+    channel_bytes.fetch_add(snapshot->GetTotalChannelCapacity(), memory_order_relaxed);
     serializer_bytes.store(snapshot->GetTotalBufferCapacity(), memory_order_relaxed);
   };
 
   if (shard_snapshots_.size() == 1) {
     cb(0);
   } else {
     shard_set->RunBriefInParallel([&](EngineShard* es) { cb(es->shard_id()); });
-    // Note that pushed bytes and pulled bytes values are fetched at different times, as we need to
-    // calc the pushed bytes using RunBriefInParallel.
-    // pulled bytes might be higher untill we return here from RunBriefInParallel.
   }
-  size_t total_bytes = pushed_bytes.load(memory_order_relaxed) +
-                       serializer_bytes.load(memory_order_relaxed) - pulled_bytes;
-  VLOG(2) << "pushed_bytes:" << pushed_bytes.load(memory_order_relaxed)
-          << " serializer_bytes: " << serializer_bytes.load(memory_order_relaxed)
-          << " pulled_bytes: " << pulled_bytes << " total_bytes:" << total_bytes;
 
-  return total_bytes;
+  VLOG(2) << "channel_bytes:" << channel_bytes.load(memory_order_relaxed)
+          << " serializer_bytes: " << serializer_bytes.load(memory_order_relaxed);
+  return channel_bytes.load(memory_order_relaxed) + serializer_bytes.load(memory_order_relaxed);
 }
 
 void RdbSaver::Impl::FillFreqMap(RdbTypeFreqMap* dest) const {

diff --git a/src/server/server_family.h b/src/server/server_family.h
@@ -178,6 +178,10 @@ class ServerFamily {
     return dfly_cmd_.get();
   }
 
+  const std::vector<facade::Listener*>& GetListeners() const {
+    return listeners_;
+  }
+
   bool HasReplica() const;
   std::optional<Replica::Info> GetReplicaInfo() const;
   std::string GetReplicaMasterId() const;

diff --git a/src/server/snapshot.cc b/src/server/snapshot.cc
@@ -25,12 +25,26 @@ using namespace std;
 using namespace util;
 using namespace chrono_literals;
 
+namespace {
+thread_local absl::flat_hash_set<SliceSnapshot*> tl_slice_snapshots;
+}  // namespace
+
 SliceSnapshot::SliceSnapshot(DbSlice* slice, RecordChannel* dest, CompressionMode compression_mode)
     : db_slice_(slice), dest_(dest), compression_mode_(compression_mode) {
   db_array_ = slice->databases();
+  tl_slice_snapshots.insert(this);
 }
 
 SliceSnapshot::~SliceSnapshot() {
+  tl_slice_snapshots.erase(this);
+}
+
+size_t SliceSnapshot::GetThreadLocalMemoryUsage() {
+  size_t mem = 0;
+  for (SliceSnapshot* snapshot : tl_slice_snapshots) {
+    mem += snapshot->GetTotalBufferCapacity() + snapshot->GetTotalChannelCapacity();
+  }
+  return mem;
 }
 
 void SliceSnapshot::Start(bool stream_journal, const Cancellation* cll) {
@@ -274,8 +288,6 @@ bool SliceSnapshot::PushSerializedToChannel(bool force) {
   if (serialized == 0)
     return 0;
 
-  stats_.pushed_bytes += serialized;
-
   auto id = rec_id_++;
   DVLOG(2) << "Pushed " << id;
   DbRecord db_rec{.id = id, .value = std::move(sfile.val)};
@@ -335,4 +347,8 @@ size_t SliceSnapshot::GetTotalBufferCapacity() const {
   return serializer_->GetTotalBufferCapacity();
 }
 
+size_t SliceSnapshot::GetTotalChannelCapacity() const {
+  return dest_->GetSize();
+}
+
 }  // namespace dfly
+4 −0		util/fibers/simple_channel.h
+3 −5		util/tls/tls_engine.cc
+9 −3		util/tls/tls_engine.h
+19 −18		util/tls/tls_socket.cc
+3 −4		util/tls/tls_socket.h