Skip to content

Commit 86cb647

Browse files
authored
GODRIVER-3145 Don't retry on context timeout or cancellation. (#1598)
1 parent e9a633c commit 86cb647

File tree

2 files changed

+78
-0
lines changed

2 files changed

+78
-0
lines changed

mongo/integration/client_test.go

+71
Original file line numberDiff line numberDiff line change
@@ -768,6 +768,77 @@ func TestClient(t *testing.T) {
768768
"expected 'OP_MSG' OpCode in wire message, got %q", pair.Sent.OpCode.String())
769769
}
770770
})
771+
772+
opts := mtest.NewOptions().
773+
// Blocking failpoints don't work on pre-4.2 and sharded clusters.
774+
Topologies(mtest.Single, mtest.ReplicaSet).
775+
MinServerVersion("4.2").
776+
// Expliticly enable retryable reads and retryable writes.
777+
ClientOptions(options.Client().SetRetryReads(true).SetRetryWrites(true))
778+
mt.RunOpts("operations don't retry after a context timeout", opts, func(mt *mtest.T) {
779+
testCases := []struct {
780+
desc string
781+
operation func(context.Context, *mongo.Collection) error
782+
}{
783+
{
784+
desc: "read op",
785+
operation: func(ctx context.Context, coll *mongo.Collection) error {
786+
return coll.FindOne(ctx, bson.D{}).Err()
787+
},
788+
},
789+
{
790+
desc: "write op",
791+
operation: func(ctx context.Context, coll *mongo.Collection) error {
792+
_, err := coll.InsertOne(ctx, bson.D{})
793+
return err
794+
},
795+
},
796+
}
797+
798+
for _, tc := range testCases {
799+
mt.Run(tc.desc, func(mt *mtest.T) {
800+
_, err := mt.Coll.InsertOne(context.Background(), bson.D{})
801+
require.NoError(mt, err)
802+
803+
mt.SetFailPoint(mtest.FailPoint{
804+
ConfigureFailPoint: "failCommand",
805+
Mode: "alwaysOn",
806+
Data: mtest.FailPointData{
807+
FailCommands: []string{"find", "insert"},
808+
BlockConnection: true,
809+
BlockTimeMS: 500,
810+
},
811+
})
812+
813+
mt.ClearEvents()
814+
815+
for i := 0; i < 50; i++ {
816+
// Run 50 operations, each with a timeout of 50ms. Expect
817+
// them to all return a timeout error because the failpoint
818+
// blocks find operations for 500ms. Run 50 to increase the
819+
// probability that an operation will time out in a way that
820+
// can cause a retry.
821+
ctx, cancel := context.WithTimeout(context.Background(), 50*time.Millisecond)
822+
err = tc.operation(ctx, mt.Coll)
823+
cancel()
824+
assert.ErrorIs(mt, err, context.DeadlineExceeded)
825+
assert.True(mt, mongo.IsTimeout(err), "expected mongo.IsTimeout(err) to be true")
826+
827+
// Assert that each operation reported exactly one command
828+
// started events, which means the operation did not retry
829+
// after the context timeout.
830+
evts := mt.GetAllStartedEvents()
831+
require.Len(mt,
832+
mt.GetAllStartedEvents(),
833+
1,
834+
"expected exactly 1 command started event per operation, but got %d after %d iterations",
835+
len(evts),
836+
i)
837+
mt.ClearEvents()
838+
}
839+
})
840+
}
841+
})
771842
}
772843

773844
func TestClient_BSONOptions(t *testing.T) {

x/mongo/driver/operation.go

+7
Original file line numberDiff line numberDiff line change
@@ -622,6 +622,13 @@ func (op Operation) Execute(ctx context.Context) error {
622622
}
623623
}()
624624
for {
625+
// If we're starting a retry and the the error from the previous try was
626+
// a context canceled or deadline exceeded error, stop retrying and
627+
// return that error.
628+
if errors.Is(prevErr, context.Canceled) || errors.Is(prevErr, context.DeadlineExceeded) {
629+
return prevErr
630+
}
631+
625632
requestID := wiremessage.NextRequestID()
626633

627634
// If the server or connection are nil, try to select a new server and get a new connection.

0 commit comments

Comments
 (0)