Skip to content

Commit 8850d1d

Browse files
[Release 2.1] Fix | Rework timer to ensure guaranteed execution (#929)
1 parent 046a58f commit 8850d1d

File tree

12 files changed

+522
-60
lines changed

12 files changed

+522
-60
lines changed

src/Microsoft.Data.SqlClient/netcore/src/Microsoft/Data/SqlClient/SqlBulkCopy.cs

+1-1
Original file line numberDiff line numberDiff line change
@@ -2783,7 +2783,7 @@ private void CleanUpStateObject(bool isCancelRequested = true)
27832783
{
27842784
_stateObj.CancelRequest();
27852785
}
2786-
_stateObj._internalTimeout = false;
2786+
_stateObj.SetTimeoutStateStopped();
27872787
_stateObj.CloseSession();
27882788
_stateObj._bulkCopyOpperationInProgress = false;
27892789
_stateObj._bulkCopyWriteTimeout = false;

src/Microsoft.Data.SqlClient/netcore/src/Microsoft/Data/SqlClient/SqlDataReader.cs

+1-1
Original file line numberDiff line numberDiff line change
@@ -956,7 +956,7 @@ private bool TryCloseInternal(bool closeReader)
956956
{
957957
_sharedState._dataReady = true; // set _sharedState._dataReady to not confuse CleanPartialRead
958958
}
959-
_stateObj._internalTimeout = false;
959+
_stateObj.SetTimeoutStateStopped();
960960
if (_sharedState._dataReady)
961961
{
962962
cleanDataFailed = true;

src/Microsoft.Data.SqlClient/netcore/src/Microsoft/Data/SqlClient/TdsParser.cs

+2-2
Original file line numberDiff line numberDiff line change
@@ -1896,7 +1896,7 @@ internal bool TryRun(RunBehavior runBehavior, SqlCommand cmdHandler, SqlDataRead
18961896
// If there is data ready, but we didn't exit the loop, then something is wrong
18971897
Debug.Assert(!dataReady, "dataReady not expected - did we forget to skip the row?");
18981898

1899-
if (stateObj._internalTimeout)
1899+
if (stateObj.IsTimeoutStateExpired)
19001900
{
19011901
runBehavior = RunBehavior.Attention;
19021902
}
@@ -2520,7 +2520,7 @@ internal bool TryRun(RunBehavior runBehavior, SqlCommand cmdHandler, SqlDataRead
25202520
stateObj._attentionSent = false;
25212521
stateObj.HasReceivedAttention = false;
25222522

2523-
if (RunBehavior.Clean != (RunBehavior.Clean & runBehavior) && !stateObj._internalTimeout)
2523+
if (RunBehavior.Clean != (RunBehavior.Clean & runBehavior) && !stateObj.IsTimeoutStateExpired)
25242524
{
25252525
// Add attention error to collection - if not RunBehavior.Clean!
25262526
stateObj.AddError(new SqlError(0, 0, TdsEnums.MIN_ERROR_CLASS, _server, SQLMessage.OperationCancelled(), "", 0));

src/Microsoft.Data.SqlClient/netcore/src/Microsoft/Data/SqlClient/TdsParserStateObject.cs

+133-22
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,23 @@ internal enum SnapshottedStateFlags : byte
3636
AttentionReceived = 1 << 5 // NOTE: Received is not volatile as it is only ever accessed\modified by TryRun its callees (i.e. single threaded access)
3737
}
3838

39+
private sealed class TimeoutState
40+
{
41+
public const int Stopped = 0;
42+
public const int Running = 1;
43+
public const int ExpiredAsync = 2;
44+
public const int ExpiredSync = 3;
45+
46+
private readonly int _value;
47+
48+
public TimeoutState(int value)
49+
{
50+
_value = value;
51+
}
52+
53+
public int IdentityValue => _value;
54+
}
55+
3956
private const int AttentionTimeoutSeconds = 5;
4057

4158
private static readonly ContextCallback s_readAdyncCallbackComplete = ReadAsyncCallbackComplete;
@@ -113,9 +130,17 @@ internal enum SnapshottedStateFlags : byte
113130
// Timeout variables
114131
private long _timeoutMilliseconds;
115132
private long _timeoutTime; // variable used for timeout computations, holds the value of the hi-res performance counter at which this request should expire
133+
private int _timeoutState; // expected to be one of the constant values TimeoutStopped, TimeoutRunning, TimeoutExpiredAsync, TimeoutExpiredSync
134+
private int _timeoutIdentitySource;
135+
private volatile int _timeoutIdentityValue;
116136
internal volatile bool _attentionSent; // true if we sent an Attention to the server
117137
internal volatile bool _attentionSending;
118-
internal bool _internalTimeout; // an internal timeout occurred
138+
139+
// Below 2 properties are used to enforce timeout delays in code to
140+
// reproduce issues related to theadpool starvation and timeout delay.
141+
// It should always be set to false by default, and only be enabled during testing.
142+
internal bool _enforceTimeoutDelay = false;
143+
internal int _enforcedTimeoutDelayInMilliSeconds = 5000;
119144

120145
private readonly LastIOTimer _lastSuccessfulIOTimer;
121146

@@ -760,7 +785,7 @@ private void ResetCancelAndProcessAttention()
760785
// operations.
761786
Parser.ProcessPendingAck(this);
762787
}
763-
_internalTimeout = false;
788+
SetTimeoutStateStopped();
764789
}
765790
}
766791

@@ -1042,7 +1067,7 @@ internal bool TryProcessHeader()
10421067
return false;
10431068
}
10441069

1045-
if (_internalTimeout)
1070+
if (IsTimeoutStateExpired)
10461071
{
10471072
ThrowExceptionAndWarning();
10481073
return true;
@@ -1447,7 +1472,7 @@ internal bool TryReadInt16(out short value)
14471472
{
14481473
// The entire int16 is in the packet and in the buffer, so just return it
14491474
// and take care of the counters.
1450-
buffer = _inBuff.AsSpan(_inBytesUsed,2);
1475+
buffer = _inBuff.AsSpan(_inBytesUsed, 2);
14511476
_inBytesUsed += 2;
14521477
_inBytesPacket -= 2;
14531478
}
@@ -1481,7 +1506,7 @@ internal bool TryReadInt32(out int value)
14811506
}
14821507

14831508
AssertValidState();
1484-
value = (buffer[3] << 24) + (buffer[2] <<16) + (buffer[1] << 8) + buffer[0];
1509+
value = (buffer[3] << 24) + (buffer[2] << 16) + (buffer[1] << 8) + buffer[0];
14851510
return true;
14861511

14871512
}
@@ -2247,11 +2272,62 @@ internal void OnConnectionClosed()
22472272
}
22482273
}
22492274

2250-
private void OnTimeout(object state)
2275+
public void SetTimeoutStateStopped()
2276+
{
2277+
Interlocked.Exchange(ref _timeoutState, TimeoutState.Stopped);
2278+
_timeoutIdentityValue = 0;
2279+
}
2280+
2281+
public bool IsTimeoutStateExpired
2282+
{
2283+
get
2284+
{
2285+
int state = _timeoutState;
2286+
return state == TimeoutState.ExpiredAsync || state == TimeoutState.ExpiredSync;
2287+
}
2288+
}
2289+
2290+
private void OnTimeoutAsync(object state)
22512291
{
2252-
if (!_internalTimeout)
2292+
if (_enforceTimeoutDelay)
22532293
{
2254-
_internalTimeout = true;
2294+
Thread.Sleep(_enforcedTimeoutDelayInMilliSeconds);
2295+
}
2296+
2297+
int currentIdentityValue = _timeoutIdentityValue;
2298+
TimeoutState timeoutState = (TimeoutState)state;
2299+
if (timeoutState.IdentityValue == _timeoutIdentityValue)
2300+
{
2301+
// the return value is not useful here because no choice is going to be made using it
2302+
// we only want to make this call to set the state knowing that it will be seen later
2303+
OnTimeoutCore(TimeoutState.Running, TimeoutState.ExpiredAsync);
2304+
}
2305+
else
2306+
{
2307+
Debug.WriteLine($"OnTimeoutAsync called with identity state={timeoutState.IdentityValue} but current identity is {currentIdentityValue} so it is being ignored");
2308+
}
2309+
}
2310+
2311+
private bool OnTimeoutSync()
2312+
{
2313+
return OnTimeoutCore(TimeoutState.Running, TimeoutState.ExpiredSync);
2314+
}
2315+
2316+
/// <summary>
2317+
/// attempts to change the timout state from the expected state to the target state and if it succeeds
2318+
/// will setup the the stateobject into the timeout expired state
2319+
/// </summary>
2320+
/// <param name="expectedState">the state that is the expected current state, state will change only if this is correct</param>
2321+
/// <param name="targetState">the state that will be changed to if the expected state is correct</param>
2322+
/// <returns>boolean value indicating whether the call changed the timeout state</returns>
2323+
private bool OnTimeoutCore(int expectedState, int targetState)
2324+
{
2325+
Debug.Assert(targetState == TimeoutState.ExpiredAsync || targetState == TimeoutState.ExpiredSync, "OnTimeoutCore must have an expiry state as the targetState");
2326+
2327+
bool retval = false;
2328+
if (Interlocked.CompareExchange(ref _timeoutState, targetState, expectedState) == expectedState)
2329+
{
2330+
retval = true;
22552331
// lock protects against Close and Cancel
22562332
lock (this)
22572333
{
@@ -2349,6 +2425,7 @@ private void OnTimeout(object state)
23492425
}
23502426
}
23512427
}
2428+
return retval;
23522429
}
23532430

23542431
internal void ReadSni(TaskCompletionSource<object> completion)
@@ -2383,19 +2460,32 @@ internal void ReadSni(TaskCompletionSource<object> completion)
23832460
{
23842461
Debug.Assert(completion != null, "Async on but null asyncResult passed");
23852462

2386-
if (_networkPacketTimeout == null)
2463+
// if the state is currently stopped then change it to running and allocate a new identity value from
2464+
// the identity source. The identity value is used to correlate timer callback events to the currently
2465+
// running timeout and prevents a late timer callback affecting a result it does not relate to
2466+
int previousTimeoutState = Interlocked.CompareExchange(ref _timeoutState, TimeoutState.Running, TimeoutState.Stopped);
2467+
if (previousTimeoutState == TimeoutState.Stopped)
23872468
{
2388-
_networkPacketTimeout = ADP.UnsafeCreateTimer(
2389-
new TimerCallback(OnTimeout),
2390-
null,
2391-
Timeout.Infinite,
2392-
Timeout.Infinite);
2469+
Debug.Assert(_timeoutIdentityValue == 0, "timer was previously stopped without resetting the _identityValue");
2470+
_timeoutIdentityValue = Interlocked.Increment(ref _timeoutIdentitySource);
23932471
}
23942472

2473+
_networkPacketTimeout?.Dispose();
2474+
2475+
_networkPacketTimeout = ADP.UnsafeCreateTimer(
2476+
new TimerCallback(OnTimeoutAsync),
2477+
new TimeoutState(_timeoutIdentityValue),
2478+
Timeout.Infinite,
2479+
Timeout.Infinite
2480+
);
2481+
2482+
23952483
// -1 == Infinite
23962484
// 0 == Already timed out (NOTE: To simulate the same behavior as sync we will only timeout on 0 if we receive an IO Pending from SNI)
23972485
// >0 == Actual timeout remaining
23982486
int msecsRemaining = GetTimeoutRemaining();
2487+
2488+
Debug.Assert(previousTimeoutState == TimeoutState.Stopped, "previous timeout state was not Stopped");
23992489
if (msecsRemaining > 0)
24002490
{
24012491
ChangeNetworkPacketTimeout(msecsRemaining, Timeout.Infinite);
@@ -2445,12 +2535,15 @@ internal void ReadSni(TaskCompletionSource<object> completion)
24452535
_networkPacketTaskSource.TrySetResult(null);
24462536
}
24472537
// Disable timeout timer on error
2538+
SetTimeoutStateStopped();
24482539
ChangeNetworkPacketTimeout(Timeout.Infinite, Timeout.Infinite);
24492540
}
24502541
else if (msecsRemaining == 0)
2451-
{ // Got IO Pending, but we have no time left to wait
2452-
// Immediately schedule the timeout timer to fire
2453-
ChangeNetworkPacketTimeout(0, Timeout.Infinite);
2542+
{
2543+
// Got IO Pending, but we have no time left to wait
2544+
// disable the timer and set the error state by calling OnTimeoutSync
2545+
ChangeNetworkPacketTimeout(Timeout.Infinite, Timeout.Infinite);
2546+
OnTimeoutSync();
24542547
}
24552548
// DO NOT HANDLE PENDING READ HERE - which is TdsEnums.SNI_SUCCESS_IO_PENDING state.
24562549
// That is handled by user who initiated async read, or by ReadNetworkPacket which is sync over async.
@@ -2565,13 +2658,13 @@ private void ReadSniError(TdsParserStateObject stateObj, uint error)
25652658
Debug.Assert(_syncOverAsync, "Should never reach here with async on!");
25662659
bool fail = false;
25672660

2568-
if (_internalTimeout)
2661+
if (IsTimeoutStateExpired)
25692662
{ // This is now our second timeout - time to give up.
25702663
fail = true;
25712664
}
25722665
else
25732666
{
2574-
stateObj._internalTimeout = true;
2667+
stateObj.SetTimeoutStateStopped();
25752668
Debug.Assert(_parser.Connection != null, "SqlConnectionInternalTds handler can not be null at this point.");
25762669
AddError(new SqlError(TdsEnums.TIMEOUT_EXPIRED, (byte)0x00, TdsEnums.MIN_ERROR_CLASS, _parser.Server, _parser.Connection.TimeoutErrorInternal.GetErrorMessage(), "", 0, TdsEnums.SNI_WAIT_TIMEOUT));
25772670

@@ -2794,6 +2887,25 @@ public void ReadAsyncCallback(IntPtr key, PacketHandle packet, uint error)
27942887

27952888
ChangeNetworkPacketTimeout(Timeout.Infinite, Timeout.Infinite);
27962889

2890+
// The timer thread may be unreliable under high contention scenarios. It cannot be
2891+
// assumed that the timeout has happened on the timer thread callback. Check the timeout
2892+
// synchrnously and then call OnTimeoutSync to force an atomic change of state.
2893+
if (TimeoutHasExpired)
2894+
{
2895+
OnTimeoutSync();
2896+
}
2897+
2898+
// try to change to the stopped state but only do so if currently in the running state
2899+
// and use cmpexch so that all changes out of the running state are atomic
2900+
int previousState = Interlocked.CompareExchange(ref _timeoutState, TimeoutState.Running, TimeoutState.Stopped);
2901+
2902+
// if the state is anything other than running then this query has reached an end so
2903+
// set the correlation _timeoutIdentityValue to 0 to prevent late callbacks executing
2904+
if (_timeoutState != TimeoutState.Running)
2905+
{
2906+
_timeoutIdentityValue = 0;
2907+
}
2908+
27972909
ProcessSniPacket(packet, error);
27982910
}
27992911
catch (Exception e)
@@ -3454,7 +3566,6 @@ internal void SendAttention(bool mustTakeWriteLock = false)
34543566
// Set _attentionSending to true before sending attention and reset after setting _attentionSent
34553567
// This prevents a race condition between receiving the attention ACK and setting _attentionSent
34563568
_attentionSending = true;
3457-
34583569
#if DEBUG
34593570
if (!_skipSendAttention)
34603571
{
@@ -3489,7 +3600,7 @@ internal void SendAttention(bool mustTakeWriteLock = false)
34893600
}
34903601
}
34913602
#if DEBUG
3492-
}
3603+
}
34933604
#endif
34943605

34953606
SetTimeoutSeconds(AttentionTimeoutSeconds); // Initialize new attention timeout of 5 seconds.
@@ -3862,7 +3973,7 @@ internal void AssertStateIsClean()
38623973
// Attention\Cancellation\Timeouts
38633974
Debug.Assert(!HasReceivedAttention && !_attentionSent && !_attentionSending, $"StateObj is still dealing with attention: Sent: {_attentionSent}, Received: {HasReceivedAttention}, Sending: {_attentionSending}");
38643975
Debug.Assert(!_cancelled, "StateObj still has cancellation set");
3865-
Debug.Assert(!_internalTimeout, "StateObj still has internal timeout set");
3976+
Debug.Assert(_timeoutState == TimeoutState.Stopped, "StateObj still has internal timeout set");
38663977
// Errors and Warnings
38673978
Debug.Assert(!_hasErrorOrWarning, "StateObj still has stored errors or warnings");
38683979
}

src/Microsoft.Data.SqlClient/netfx/src/Microsoft/Data/SqlClient/SqlBulkCopy.cs

+1-1
Original file line numberDiff line numberDiff line change
@@ -3039,7 +3039,7 @@ private void CleanUpStateObject(bool isCancelRequested = true)
30393039
{
30403040
_stateObj.CancelRequest();
30413041
}
3042-
_stateObj._internalTimeout = false;
3042+
_stateObj.SetTimeoutStateStopped();
30433043
_stateObj.CloseSession();
30443044
_stateObj._bulkCopyOpperationInProgress = false;
30453045
_stateObj._bulkCopyWriteTimeout = false;

src/Microsoft.Data.SqlClient/netfx/src/Microsoft/Data/SqlClient/SqlDataReader.cs

+1-1
Original file line numberDiff line numberDiff line change
@@ -1067,7 +1067,7 @@ private bool TryCloseInternal(bool closeReader)
10671067
{
10681068
_sharedState._dataReady = true; // set _sharedState._dataReady to not confuse CleanPartialRead
10691069
}
1070-
_stateObj._internalTimeout = false;
1070+
_stateObj.SetTimeoutStateStopped();
10711071
if (_sharedState._dataReady)
10721072
{
10731073
cleanDataFailed = true;

src/Microsoft.Data.SqlClient/netfx/src/Microsoft/Data/SqlClient/TdsParser.cs

+2-2
Original file line numberDiff line numberDiff line change
@@ -2262,7 +2262,7 @@ internal bool TryRun(RunBehavior runBehavior, SqlCommand cmdHandler, SqlDataRead
22622262
// If there is data ready, but we didn't exit the loop, then something is wrong
22632263
Debug.Assert(!dataReady, "dataReady not expected - did we forget to skip the row?");
22642264

2265-
if (stateObj._internalTimeout)
2265+
if (stateObj.IsTimeoutStateExpired)
22662266
{
22672267
runBehavior = RunBehavior.Attention;
22682268
}
@@ -2891,7 +2891,7 @@ internal bool TryRun(RunBehavior runBehavior, SqlCommand cmdHandler, SqlDataRead
28912891
stateObj._attentionSent = false;
28922892
stateObj._attentionReceived = false;
28932893

2894-
if (RunBehavior.Clean != (RunBehavior.Clean & runBehavior) && !stateObj._internalTimeout)
2894+
if (RunBehavior.Clean != (RunBehavior.Clean & runBehavior) && !stateObj.IsTimeoutStateExpired)
28952895
{
28962896
// Add attention error to collection - if not RunBehavior.Clean!
28972897
stateObj.AddError(new SqlError(0, 0, TdsEnums.MIN_ERROR_CLASS, _server, SQLMessage.OperationCancelled(), "", 0));

0 commit comments

Comments
 (0)