Skip to content

Commit 50527ed

Browse files
[FAB-10012] Reset attempt counter on connect success
If broadcastClient succeed to connect to orderer, we reset attempts counter. Change-Id: I0489c73b62b27c9acf960b74644152f03bc11276 Signed-off-by: gennady <gennady@il.ibm.com>
1 parent 08e5958 commit 50527ed

File tree

2 files changed

+87
-3
lines changed

2 files changed

+87
-3
lines changed

core/deliverservice/client.go

+8-3
Original file line numberDiff line numberDiff line change
@@ -83,10 +83,14 @@ func (bc *broadcastClient) try(action func() (interface{}, error)) (interface{},
8383
var totalRetryTime time.Duration
8484
var backoffDuration time.Duration
8585
retry := true
86+
resetAttemptCounter := func() {
87+
attempt = 0
88+
totalRetryTime = 0
89+
}
8690
for retry && !bc.shouldStop() {
87-
attempt++
88-
resp, err := bc.doAction(action)
91+
resp, err := bc.doAction(action, resetAttemptCounter)
8992
if err != nil {
93+
attempt++
9094
backoffDuration, retry = bc.shouldRetry(attempt, totalRetryTime)
9195
if !retry {
9296
logger.Warning("Got error:", err, "at", attempt, "attempt. Ceasing to retry")
@@ -105,12 +109,13 @@ func (bc *broadcastClient) try(action func() (interface{}, error)) (interface{},
105109
return nil, fmt.Errorf("attempts (%d) or elapsed time (%v) exhausted", attempt, totalRetryTime)
106110
}
107111

108-
func (bc *broadcastClient) doAction(action func() (interface{}, error)) (interface{}, error) {
112+
func (bc *broadcastClient) doAction(action func() (interface{}, error), actionOnNewConnection func()) (interface{}, error) {
109113
if bc.conn == nil {
110114
err := bc.connect()
111115
if err != nil {
112116
return nil, err
113117
}
118+
actionOnNewConnection()
114119
}
115120
resp, err := action()
116121
if err != nil {

core/deliverservice/deliveryclient_test.go

+79
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@ import (
2121
"github.com/hyperledger/fabric/core/deliverservice/mocks"
2222
"github.com/hyperledger/fabric/gossip/api"
2323
"github.com/hyperledger/fabric/gossip/common"
24+
"github.com/hyperledger/fabric/gossip/util"
2425
"github.com/hyperledger/fabric/msp/mgmt/testtools"
2526
"github.com/hyperledger/fabric/protos/orderer"
2627
"github.com/spf13/viper"
@@ -547,6 +548,68 @@ func TestDeliverServiceShutdownRespawn(t *testing.T) {
547548
osn2.Shutdown()
548549
}
549550

551+
func TestDeliverServiceDisconnectReconnect(t *testing.T) {
552+
// Scenario: Launch an ordering service node and let the client pull some blocks.
553+
// Stop ordering service, wait for while - simulate disconnect and restart it back.
554+
// Wait for some time, without sending blocks - simulate recv wait on empty channel.
555+
// Repeat stop/start sequence multiple times, to make sure total retry time will pass
556+
// value returned by getReConnectTotalTimeThreshold - in test it set to 2 seconds
557+
// (0.5s + 1s + 2s + 4s) > 2s.
558+
// Send new block and check that delivery client got it.
559+
// So, we can see that waiting on recv in empty channel do reset total time spend in reconnection.
560+
orgReconnectTotalTimeThreshold := util.GetDurationOrDefault("peer.deliveryclient.reconnectTotalTimeThreshold", defaultReConnectTotalTimeThreshold)
561+
viper.Set("peer.deliveryclient.reconnectTotalTimeThreshold", time.Second*2)
562+
defer func() {
563+
viper.Set("peer.deliveryclient.reconnectTotalTimeThreshold", orgReconnectTotalTimeThreshold)
564+
}()
565+
defer ensureNoGoroutineLeak(t)()
566+
567+
osn := mocks.NewOrderer(5614, t)
568+
569+
time.Sleep(time.Second)
570+
gossipServiceAdapter := &mocks.MockGossipServiceAdapter{GossipBlockDisseminations: make(chan uint64)}
571+
572+
service, err := NewDeliverService(&Config{
573+
Endpoints: []string{"localhost:5614"},
574+
Gossip: gossipServiceAdapter,
575+
CryptoSvc: &mockMCS{},
576+
ABCFactory: DefaultABCFactory,
577+
ConnFactory: DefaultConnectionFactory,
578+
})
579+
assert.NoError(t, err)
580+
581+
li := &mocks.MockLedgerInfo{Height: uint64(100)}
582+
osn.SetNextExpectedSeek(uint64(100))
583+
err = service.StartDeliverForChannel("TEST_CHAINID", li, func() {})
584+
assert.NoError(t, err, "can't start delivery")
585+
586+
// Check that delivery service requests blocks in order
587+
go osn.SendBlock(uint64(100))
588+
assertBlockDissemination(100, gossipServiceAdapter.GossipBlockDisseminations, t)
589+
go osn.SendBlock(uint64(101))
590+
assertBlockDissemination(101, gossipServiceAdapter.GossipBlockDisseminations, t)
591+
atomic.StoreUint64(&li.Height, uint64(102))
592+
593+
for i := 0; i < 5; i += 1 {
594+
// Shutdown orderer, simulate network disconnect
595+
osn.Shutdown()
596+
// Now wait for a disconnect to be discovered
597+
assert.True(t, waitForConnectionCount(osn, 0), "deliverService can't disconnect from orderer")
598+
// Recreate orderer, simulating network is back
599+
osn = mocks.NewOrderer(5614, t)
600+
osn.SetNextExpectedSeek(atomic.LoadUint64(&li.Height))
601+
// Now wait for a while, to client connect back and simulate empty channel
602+
assert.True(t, waitForConnectionCount(osn, 1), "deliverService can't reconnect to orderer")
603+
}
604+
605+
// Send a block from orderer
606+
go osn.SendBlock(uint64(102))
607+
// Ensure it is received
608+
assertBlockDissemination(102, gossipServiceAdapter.GossipBlockDisseminations, t)
609+
service.Stop()
610+
osn.Shutdown()
611+
}
612+
550613
func TestDeliverServiceBadConfig(t *testing.T) {
551614
// Empty endpoints
552615
service, err := NewDeliverService(&Config{
@@ -647,3 +710,19 @@ func getStackTrace() string {
647710
runtime.Stack(buf, true)
648711
return string(buf)
649712
}
713+
714+
func waitForConnectionCount(orderer *mocks.Orderer, connCount int) bool {
715+
ctx, cancel := context.WithTimeout(context.Background(), time.Second*5)
716+
defer cancel()
717+
718+
for {
719+
select {
720+
case <-time.After(time.Millisecond * 100):
721+
if orderer.ConnCount() == connCount {
722+
return true
723+
}
724+
case <-ctx.Done():
725+
return false
726+
}
727+
}
728+
}

0 commit comments

Comments
 (0)