Skip to content

Commit 2558242

Browse files
bzbarsky-applepull[bot]
authored andcommitted
Add the ability for OperationalSessionSetup to retry a few times automatically. (#24808)
The basic idea is that if CASE establishment fails we try again, after a brief backoff.
1 parent ad4fada commit 2558242

6 files changed

+193
-11
lines changed

src/app/CASESessionManager.cpp

+10-1
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,12 @@ CHIP_ERROR CASESessionManager::Init(chip::System::Layer * systemLayer, const CAS
3030
}
3131

3232
void CASESessionManager::FindOrEstablishSession(const ScopedNodeId & peerId, Callback::Callback<OnDeviceConnected> * onConnection,
33-
Callback::Callback<OnDeviceConnectionFailure> * onFailure)
33+
Callback::Callback<OnDeviceConnectionFailure> * onFailure
34+
#if CHIP_DEVICE_CONFIG_ENABLE_AUTOMATIC_CASE_RETRIES
35+
,
36+
uint8_t attemptCount
37+
#endif // CHIP_DEVICE_CONFIG_ENABLE_AUTOMATIC_CASE_RETRIES
38+
)
3439
{
3540
ChipLogDetail(CASESessionManager, "FindOrEstablishSession: PeerId = [%d:" ChipLogFormatX64 "]", peerId.GetFabricIndex(),
3641
ChipLogValueX64(peerId.GetNodeId()));
@@ -53,6 +58,10 @@ void CASESessionManager::FindOrEstablishSession(const ScopedNodeId & peerId, Cal
5358
}
5459
}
5560

61+
#if CHIP_DEVICE_CONFIG_ENABLE_AUTOMATIC_CASE_RETRIES
62+
session->UpdateAttemptCount(attemptCount);
63+
#endif // CHIP_DEVICE_CONFIG_ENABLE_AUTOMATIC_CASE_RETRIES
64+
5665
session->Connect(onConnection, onFailure);
5766
}
5867

src/app/CASESessionManager.h

+9-1
Original file line numberDiff line numberDiff line change
@@ -77,9 +77,17 @@ class CASESessionManager : public OperationalSessionReleaseDelegate, public Sess
7777
*
7878
* The `onFailure` callback may be called before the FindOrEstablishSession
7979
* call returns, for error cases that are detected synchronously.
80+
*
81+
* attemptCount can be used to automatically retry multiple times if session
82+
* setup is not successful.
8083
*/
8184
void FindOrEstablishSession(const ScopedNodeId & peerId, Callback::Callback<OnDeviceConnected> * onConnection,
82-
Callback::Callback<OnDeviceConnectionFailure> * onFailure);
85+
Callback::Callback<OnDeviceConnectionFailure> * onFailure
86+
#if CHIP_DEVICE_CONFIG_ENABLE_AUTOMATIC_CASE_RETRIES
87+
,
88+
uint8_t attemptCount = 1
89+
#endif // CHIP_DEVICE_CONFIG_ENABLE_AUTOMATIC_CASE_RETRIES
90+
);
8391

8492
void ReleaseSessionsForFabric(FabricIndex fabricIndex);
8593

src/app/OperationalSessionSetup.cpp

+113-6
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,7 @@
3636
#include <lib/dnssd/Resolver.h>
3737
#include <lib/support/CodeUtils.h>
3838
#include <lib/support/logging/CHIPLogging.h>
39+
#include <system/SystemClock.h>
3940
#include <system/SystemLayer.h>
4041

4142
using namespace chip::Callback;
@@ -49,7 +50,7 @@ void OperationalSessionSetup::MoveToState(State aTargetState)
4950
{
5051
if (mState != aTargetState)
5152
{
52-
ChipLogDetail(Controller, "OperationalSessionSetup[%u:" ChipLogFormatX64 "]: State change %d --> %d",
53+
ChipLogDetail(Discovery, "OperationalSessionSetup[%u:" ChipLogFormatX64 "]: State change %d --> %d",
5354
mPeerId.GetFabricIndex(), ChipLogValueX64(mPeerId.GetNodeId()), to_underlying(mState),
5455
to_underlying(aTargetState));
5556
mState = aTargetState;
@@ -70,7 +71,7 @@ bool OperationalSessionSetup::AttachToExistingSecureSession()
7071
if (!sessionHandle.HasValue())
7172
return false;
7273

73-
ChipLogProgress(Controller, "Found an existing secure session to [%u:" ChipLogFormatX64 "]!", mPeerId.GetFabricIndex(),
74+
ChipLogProgress(Discovery, "Found an existing secure session to [%u:" ChipLogFormatX64 "]!", mPeerId.GetFabricIndex(),
7475
ChipLogValueX64(mPeerId.GetNodeId()));
7576

7677
mDeviceAddress = sessionHandle.Value()->AsSecureSession()->GetPeerAddress();
@@ -214,7 +215,7 @@ void OperationalSessionSetup::UpdateDeviceData(const Transport::PeerAddress & ad
214215
return;
215216
}
216217

217-
ChipLogError(Controller, "Received UpdateDeviceData in incorrect state");
218+
ChipLogError(Discovery, "Received UpdateDeviceData in incorrect state");
218219
DequeueConnectionCallbacks(CHIP_ERROR_INCORRECT_STATE);
219220
// Do not touch `this` instance anymore; it has been destroyed in DequeueConnectionCallbacks.
220221
}
@@ -304,7 +305,7 @@ void OperationalSessionSetup::DequeueConnectionCallbacks(CHIP_ERROR error)
304305
void OperationalSessionSetup::OnSessionEstablishmentError(CHIP_ERROR error)
305306
{
306307
VerifyOrReturn(mState != State::Uninitialized && mState != State::NeedsAddress,
307-
ChipLogError(Controller, "HandleCASEConnectionFailure was called while the device was not initialized"));
308+
ChipLogError(Discovery, "HandleCASEConnectionFailure was called while the device was not initialized"));
308309

309310
if (CHIP_ERROR_TIMEOUT == error)
310311
{
@@ -313,6 +314,17 @@ void OperationalSessionSetup::OnSessionEstablishmentError(CHIP_ERROR error)
313314
MoveToState(State::ResolvingAddress);
314315
return;
315316
}
317+
318+
#if CHIP_DEVICE_CONFIG_ENABLE_AUTOMATIC_CASE_RETRIES
319+
if (mRemainingAttempts > 0)
320+
{
321+
CHIP_ERROR err = ScheduleSessionSetupReattempt();
322+
if (err == CHIP_NO_ERROR)
323+
{
324+
return;
325+
}
326+
}
327+
#endif // CHIP_DEVICE_CONFIG_ENABLE_AUTOMATIC_CASE_RETRIES
316328
}
317329

318330
DequeueConnectionCallbacks(error);
@@ -322,7 +334,7 @@ void OperationalSessionSetup::OnSessionEstablishmentError(CHIP_ERROR error)
322334
void OperationalSessionSetup::OnSessionEstablished(const SessionHandle & session)
323335
{
324336
VerifyOrReturn(mState != State::Uninitialized,
325-
ChipLogError(Controller, "HandleCASEConnected was called while the device was not initialized"));
337+
ChipLogError(Discovery, "HandleCASEConnected was called while the device was not initialized"));
326338

327339
if (!mSecureSession.Grab(session))
328340
return; // Got an invalid session, do not change any state
@@ -377,6 +389,17 @@ OperationalSessionSetup::~OperationalSessionSetup()
377389

378390
CHIP_ERROR OperationalSessionSetup::LookupPeerAddress()
379391
{
392+
#if CHIP_DEVICE_CONFIG_ENABLE_AUTOMATIC_CASE_RETRIES
393+
if (mRemainingAttempts > 0)
394+
{
395+
--mRemainingAttempts;
396+
}
397+
if (mAttemptsDone < UINT8_MAX)
398+
{
399+
++mAttemptsDone;
400+
}
401+
#endif // CHIP_DEVICE_CONFIG_ENABLE_AUTOMATIC_CASE_RETRIES
402+
380403
// NOTE: This is public API that can be used to update our stored peer
381404
// address even when we are in State::Connected, so we do not make any
382405
// MoveToState calls in this method.
@@ -418,7 +441,7 @@ void OperationalSessionSetup::PerformAddressUpdate()
418441
CHIP_ERROR err = LookupPeerAddress();
419442
if (err != CHIP_NO_ERROR)
420443
{
421-
ChipLogError(Controller, "Failed to look up peer address: %" CHIP_ERROR_FORMAT, err.Format());
444+
ChipLogError(Discovery, "Failed to look up peer address: %" CHIP_ERROR_FORMAT, err.Format());
422445
DequeueConnectionCallbacks(err);
423446
// Do not touch `this` instance anymore; it has been destroyed in DequeueConnectionCallbacks.
424447
return;
@@ -435,9 +458,93 @@ void OperationalSessionSetup::OnNodeAddressResolutionFailed(const PeerId & peerI
435458
ChipLogError(Discovery, "OperationalSessionSetup[%u:" ChipLogFormatX64 "]: operational discovery failed: %" CHIP_ERROR_FORMAT,
436459
mPeerId.GetFabricIndex(), ChipLogValueX64(mPeerId.GetNodeId()), reason.Format());
437460

461+
// Does it make sense to ScheduleSessionSetupReattempt() here? DNS-SD
462+
// resolution has its own retry/backoff mechanisms, so if it's failed we
463+
// have already done a lot of that.
464+
438465
// No need to modify any variables in `this` since call below releases `this`.
439466
DequeueConnectionCallbacks(reason);
440467
// Do not touch `this` instance anymore; it has been destroyed in DequeueConnectionCallbacks.
441468
}
442469

470+
#if CHIP_DEVICE_CONFIG_ENABLE_AUTOMATIC_CASE_RETRIES
471+
void OperationalSessionSetup::UpdateAttemptCount(uint8_t attemptCount)
472+
{
473+
if (attemptCount == 0)
474+
{
475+
// Nothing to do.
476+
return;
477+
}
478+
479+
if (mState != State::NeedsAddress)
480+
{
481+
// We're in the middle of an attempt already, so decrement attemptCount
482+
// by 1 to account for that.
483+
--attemptCount;
484+
}
485+
486+
if (attemptCount > mRemainingAttempts)
487+
{
488+
mRemainingAttempts = attemptCount;
489+
}
490+
}
491+
492+
CHIP_ERROR OperationalSessionSetup::ScheduleSessionSetupReattempt()
493+
{
494+
VerifyOrDie(mRemainingAttempts > 0);
495+
// Try again, but not if things are in shutdown such that we can't get
496+
// to a system layer, and not if we've run out of attempts.
497+
if (!mInitParams.exchangeMgr->GetSessionManager() || !mInitParams.exchangeMgr->GetSessionManager()->SystemLayer())
498+
{
499+
return CHIP_ERROR_INCORRECT_STATE;
500+
}
501+
502+
MoveToState(State::NeedsAddress);
503+
System::Clock::Seconds16 timerDelay;
504+
// Stop exponential backoff before our delays get too large.
505+
//
506+
// Note that mAttemptsDone is always > 0 here, because we have
507+
// just finished one attempt.
508+
VerifyOrDie(mAttemptsDone > 0);
509+
static_assert(UINT16_MAX / CHIP_DEVICE_CONFIG_AUTOMATIC_CASE_RETRY_INITIAL_DELAY_SECONDS >=
510+
(1 << CHIP_DEVICE_CONFIG_AUTOMATIC_CASE_RETRY_MAX_BACKOFF),
511+
"Our backoff calculation will overflow.");
512+
timerDelay = System::Clock::Seconds16(
513+
static_cast<uint16_t>(CHIP_DEVICE_CONFIG_AUTOMATIC_CASE_RETRY_INITIAL_DELAY_SECONDS
514+
<< min((mAttemptsDone - 1), CHIP_DEVICE_CONFIG_AUTOMATIC_CASE_RETRY_MAX_BACKOFF)));
515+
CHIP_ERROR err = mInitParams.exchangeMgr->GetSessionManager()->SystemLayer()->StartTimer(timerDelay, TrySetupAgain, this);
516+
// The cast on count() is needed because the type count() returns might not
517+
// actually be uint16_t; on some platforms it's int.
518+
ChipLogProgress(Discovery,
519+
"OperationalSessionSetup:attempts done: %u, attempts left: %u, retry delay %us, status %" CHIP_ERROR_FORMAT,
520+
mAttemptsDone, mRemainingAttempts, static_cast<unsigned>(timerDelay.count()), err.Format());
521+
return err;
522+
}
523+
524+
void OperationalSessionSetup::TrySetupAgain(System::Layer * systemLayer, void * state)
525+
{
526+
auto * self = static_cast<OperationalSessionSetup *>(state);
527+
528+
CHIP_ERROR err = CHIP_NO_ERROR;
529+
530+
if (self->mState != State::NeedsAddress)
531+
{
532+
err = CHIP_ERROR_INCORRECT_STATE;
533+
}
534+
else
535+
{
536+
self->MoveToState(State::ResolvingAddress);
537+
err = self->LookupPeerAddress();
538+
if (err == CHIP_NO_ERROR)
539+
{
540+
return;
541+
}
542+
}
543+
544+
// Give up; we're either in a bad state or could not start a lookup.
545+
self->DequeueConnectionCallbacks(err);
546+
// Do not touch `self` instance anymore; it has been destroyed in DequeueConnectionCallbacks.
547+
}
548+
#endif // CHIP_DEVICE_CONFIG_ENABLE_AUTOMATIC_CASE_RETRIES
549+
443550
} // namespace chip

src/app/OperationalSessionSetup.h

+23-2
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,7 @@
3636
#include <messaging/ExchangeDelegate.h>
3737
#include <messaging/ExchangeMgr.h>
3838
#include <messaging/Flags.h>
39+
#include <platform/CHIPDeviceConfig.h>
3940
#include <protocols/secure_channel/CASESession.h>
4041
#include <system/SystemLayer.h>
4142
#include <transport/SessionManager.h>
@@ -158,7 +159,6 @@ class DLL_EXPORT OperationalSessionSetup : public SessionDelegate,
158159
}
159160

160161
mClientPool = clientPool;
161-
mSystemLayer = params.exchangeMgr->GetSessionManager()->SystemLayer();
162162
mPeerId = peerId;
163163
mReleaseDelegate = releaseDelegate;
164164
mState = State::NeedsAddress;
@@ -224,6 +224,11 @@ class DLL_EXPORT OperationalSessionSetup : public SessionDelegate,
224224
void OnNodeAddressResolved(const PeerId & peerId, const AddressResolve::ResolveResult & result) override;
225225
void OnNodeAddressResolutionFailed(const PeerId & peerId, CHIP_ERROR reason) override;
226226

227+
#if CHIP_DEVICE_CONFIG_ENABLE_AUTOMATIC_CASE_RETRIES
228+
// Update our remaining attempt count to be at least the given value.
229+
void UpdateAttemptCount(uint8_t attemptCount);
230+
#endif // CHIP_DEVICE_CONFIG_ENABLE_AUTOMATIC_CASE_RETRIES
231+
227232
private:
228233
enum class State : uint8_t
229234
{
@@ -237,7 +242,6 @@ class DLL_EXPORT OperationalSessionSetup : public SessionDelegate,
237242

238243
CASEClientInitParams mInitParams;
239244
CASEClientPoolDelegate * mClientPool = nullptr;
240-
System::Layer * mSystemLayer;
241245

242246
// mCASEClient is only non-null if we are in State::Connecting or just
243247
// allocated it as part of an attempt to enter State::Connecting.
@@ -261,6 +265,11 @@ class DLL_EXPORT OperationalSessionSetup : public SessionDelegate,
261265

262266
bool mPerformingAddressUpdate = false;
263267

268+
#if CHIP_DEVICE_CONFIG_ENABLE_AUTOMATIC_CASE_RETRIES
269+
uint8_t mRemainingAttempts = 0;
270+
uint8_t mAttemptsDone = 0;
271+
#endif // CHIP_DEVICE_CONFIG_ENABLE_AUTOMATIC_CASE_RETRIES
272+
264273
void MoveToState(State aTargetState);
265274

266275
CHIP_ERROR EstablishConnection(const ReliableMessageProtocolConfig & config);
@@ -301,6 +310,18 @@ class DLL_EXPORT OperationalSessionSetup : public SessionDelegate,
301310
* This function will set new IP address, port and MRP retransmission intervals of the device.
302311
*/
303312
void UpdateDeviceData(const Transport::PeerAddress & addr, const ReliableMessageProtocolConfig & config);
313+
314+
#if CHIP_DEVICE_CONFIG_ENABLE_AUTOMATIC_CASE_RETRIES
315+
/**
316+
* Schedule a setup reattempt, if possible.
317+
*/
318+
CHIP_ERROR ScheduleSessionSetupReattempt();
319+
320+
/**
321+
* Helper for our backoff retry timer.
322+
*/
323+
static void TrySetupAgain(System::Layer * systemLayer, void * state);
324+
#endif // CHIP_DEVICE_CONFIG_ENABLE_AUTOMATIC_CASE_RETRIES
304325
};
305326

306327
} // namespace chip

src/controller/CHIPDeviceController.cpp

+6-1
Original file line numberDiff line numberDiff line change
@@ -2441,7 +2441,12 @@ void DeviceCommissioner::PerformCommissioningStep(DeviceProxy * proxy, Commissio
24412441
// If there is an error, CommissioningStageComplete will be called from OnDeviceConnectionFailureFn.
24422442
auto scopedPeerId = GetPeerScopedId(proxy->GetDeviceId());
24432443
mSystemState->CASESessionMgr()->FindOrEstablishSession(scopedPeerId, &mOnDeviceConnectedCallback,
2444-
&mOnDeviceConnectionFailureCallback);
2444+
&mOnDeviceConnectionFailureCallback
2445+
#if CHIP_DEVICE_CONFIG_ENABLE_AUTOMATIC_CASE_RETRIES
2446+
,
2447+
/* attemptCount = */ 3
2448+
#endif // CHIP_DEVICE_CONFIG_ENABLE_AUTOMATIC_CASE_RETRIES
2449+
);
24452450
}
24462451
break;
24472452
case CommissioningStage::kSendComplete: {

src/include/platform/CHIPDeviceConfig.h

+32
Original file line numberDiff line numberDiff line change
@@ -1282,6 +1282,38 @@
12821282
#define CHIP_DEVICE_CONFIG_PAIRING_SECONDARY_INSTRUCTION ""
12831283
#endif
12841284

1285+
/**
1286+
* CHIP_DEVICE_CONFIG_ENABLE_AUTOMATIC_CASE_RETRIES
1287+
*
1288+
* If 1, enable support for automatic CASE establishment retries.
1289+
*/
1290+
#ifndef CHIP_DEVICE_CONFIG_ENABLE_AUTOMATIC_CASE_RETRIES
1291+
#define CHIP_DEVICE_CONFIG_ENABLE_AUTOMATIC_CASE_RETRIES 1
1292+
#endif
1293+
1294+
#if CHIP_DEVICE_CONFIG_ENABLE_AUTOMATIC_CASE_RETRIES
1295+
1296+
/**
1297+
* CHIP_DEVICE_CONFIG_AUTOMATIC_CASE_RETRY_INITIAL_DELAY_SECONDS
1298+
*
1299+
* The initial retry delay, in seconds, for our automatic CASE retries.
1300+
*/
1301+
#ifndef CHIP_DEVICE_CONFIG_AUTOMATIC_CASE_RETRY_INITIAL_DELAY_SECONDS
1302+
#define CHIP_DEVICE_CONFIG_AUTOMATIC_CASE_RETRY_INITIAL_DELAY_SECONDS 1
1303+
#endif
1304+
1305+
/**
1306+
* CHIP_DEVICE_CONFIG_AUTOMATIC_CASE_RETRY_MAX_BACKOFF
1307+
*
1308+
* The maximum number of times we back off, by a factor of 2 each time, from our
1309+
* initial CASE retry interval before we plateau.
1310+
*/
1311+
#ifndef CHIP_DEVICE_CONFIG_AUTOMATIC_CASE_RETRY_MAX_BACKOFF
1312+
#define CHIP_DEVICE_CONFIG_AUTOMATIC_CASE_RETRY_MAX_BACKOFF 5
1313+
#endif
1314+
1315+
#endif // CHIP_DEVICE_CONFIG_ENABLE_AUTOMATIC_CASE_RETRIES
1316+
12851317
// -------------------- App Platform Configuration --------------------
12861318

12871319
/**

0 commit comments

Comments
 (0)