@@ -1086,3 +1086,61 @@ TEST(replication, newTermWhileAppending, setUp, tearDown, 0, NULL)
1086
1086
1087
1087
return MUNIT_OK ;
1088
1088
}
1089
+
1090
+ /* A leader with slow disk commits an entry that it hasn't persisted yet,
1091
+ * because enough followers to have a majority have aknowledged that they have
1092
+ * appended the entry. The leader's last_stored field hence lags behind its
1093
+ * commit_index. A new leader gets elected, with a higher commit index and sends
1094
+ * first a new entry than a heartbeat to the old leader, that needs to update
1095
+ * its commit_index taking into account its lagging last_stored. */
1096
+ TEST (replication , lastStoredLaggingBehindCommitIndex , setUp , tearDown , 0 , NULL )
1097
+ {
1098
+ struct fixture * f = data ;
1099
+ CLUSTER_GROW ;
1100
+
1101
+ /* Server 0 takes a long time to persist entry 2 (the barrier) */
1102
+ CLUSTER_SET_DISK_LATENCY (0 , 10000 );
1103
+
1104
+ /* Server 0 gets elected and creates a barrier entry at index 2 */
1105
+ BOOTSTRAP_START_AND_ELECT ;
1106
+
1107
+ /* Server 0 commits and applies barrier entry 2 even if it not persist it
1108
+ * yet. */
1109
+ CLUSTER_STEP_UNTIL_APPLIED (0 , 2 , 2000 );
1110
+
1111
+ munit_assert_int (CLUSTER_RAFT (0 )-> last_stored , = = , 1 );
1112
+ munit_assert_int (CLUSTER_RAFT (0 )-> commit_index , = = , 2 );
1113
+ munit_assert_int (CLUSTER_RAFT (0 )-> last_applied , = = , 2 );
1114
+
1115
+ /* Server 1 stored barrier entry 2, but did not yet receive a notification
1116
+ * from server 0 about the new commit index. */
1117
+ munit_assert_int (CLUSTER_RAFT (1 )-> last_stored , = = , 2 );
1118
+ munit_assert_int (CLUSTER_RAFT (1 )-> commit_index , = = , 1 );
1119
+ munit_assert_int (CLUSTER_RAFT (1 )-> last_applied , = = , 1 );
1120
+
1121
+ /* Disconnect server 0 from server 1 and 2. */
1122
+ CLUSTER_DISCONNECT (0 , 1 );
1123
+ CLUSTER_DISCONNECT (0 , 2 );
1124
+
1125
+ /* Set a very high election timeout on server 0, so it won't step down for a
1126
+ * while, even if disconnected. */
1127
+ raft_fixture_set_randomized_election_timeout (& f -> cluster , 0 , 10000 );
1128
+ raft_set_election_timeout (CLUSTER_RAFT (0 ), 10000 );
1129
+
1130
+ /* Server 1 and 2 eventually timeout and start an election, server 1
1131
+ * wins. */
1132
+ CLUSTER_STEP_UNTIL_HAS_NO_LEADER (4000 );
1133
+ CLUSTER_STEP_UNTIL_HAS_LEADER (2000 );
1134
+ munit_assert_int (CLUSTER_LEADER , = = , 1 );
1135
+
1136
+ /* Server 1 commits the barrier entry at index 3 that it created at the
1137
+ * start of its term. */
1138
+ CLUSTER_STEP_UNTIL_APPLIED (1 , 3 , 2000 );
1139
+
1140
+ /* Reconnect server 0 to server 1, which will start replicating entry 3 to
1141
+ * it. */
1142
+ CLUSTER_RECONNECT (0 , 1 );
1143
+ CLUSTER_STEP_UNTIL_APPLIED (0 , 3 , 20000 );
1144
+
1145
+ return MUNIT_OK ;
1146
+ }
0 commit comments