Skip to content

Commit 075faeb

Browse files
nivi-applebzbarsky-apple
authored and
David Lechner
committed
Add a timer to track whether we have received BDX init after a query … (project-chip#24777)
* Add a timer to track whether we have received BDX init after a query image was successful - Currently if an ota requester has a successful query image and an image it available but if for any reason, the ota requester doesn't send BDX init, the provider will be stuck until we reboot the resident. In order to have a fail safe, we are adding a timer that starts after query image returns image available and waits for a BDX init to come. In case BDX init doesn't come, it times out and resets state - Add code to reset state if any API fails on the provider once we prepare for BDX transfer - Stop polling when BDX transfer reset is called - Return QueryImageResponse status busy instead of general failure if the sdk is busy and gets a sexond query image so accessory can handle the error correctly and retry until the sdk is done - When we send a message, handle the case where if an error happens in sending the message close the exchange context and reset state. If the message is a status report and it succeeds, null out the exchange and reset state * Update src/darwin/Framework/CHIP/MTROTAProviderDelegateBridge.mm Co-authored-by: Boris Zbarsky <bzbarsky@apple.com> * Update src/darwin/Framework/CHIP/MTROTAProviderDelegateBridge.mm Co-authored-by: Boris Zbarsky <bzbarsky@apple.com> * Reset mStopPolling to false when we call PrepareForTransfer to start polling again - This covers the use case where we have an image available but the requestor rejects the transfer for some reason. On the next query image the requestor is able to continue with the BDX transfer. The provider state was not being reset for this scenario due to the polling flag not being reset. - Fixes project-chip#23573 * Addressed review comments * Fix the handling of the use case when hasUpdate is false we need to send the response as is - Log the error if the BDX timer fails to start --------- Co-authored-by: Boris Zbarsky <bzbarsky@apple.com>
1 parent 921181a commit 075faeb

File tree

2 files changed

+157
-78
lines changed

2 files changed

+157
-78
lines changed

src/darwin/Framework/CHIP/MTROTAProviderDelegateBridge.mm

+155-78
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,14 @@
4242
// TODO Expose a method onto the delegate to make that configurable.
4343
constexpr uint32_t kMaxBdxBlockSize = 1024;
4444
constexpr uint32_t kMaxBDXURILen = 256;
45+
46+
// Since the BDX timeout is 5 minutes and we are starting this after query image is available and before the BDX init comes,
47+
// we just double the timeout to give enough time for the BDX init to come in a reasonable amount of time.
48+
constexpr System::Clock::Timeout kBdxInitReceivedTimeout = System::Clock::Seconds16(10 * 60);
49+
50+
// Time in seconds after which the requestor should retry calling query image if busy status is receieved
51+
constexpr uint32_t kDelayedActionTimeSeconds = 120;
52+
4553
constexpr System::Clock::Timeout kBdxTimeout = System::Clock::Seconds16(5 * 60); // OTA Spec mandates >= 5 minutes
4654
constexpr System::Clock::Timeout kBdxPollIntervalMs = System::Clock::Milliseconds32(50);
4755
constexpr bdx::TransferRole kBdxRole = bdx::TransferRole::kSender;
@@ -89,13 +97,12 @@ CHIP_ERROR Shutdown()
8997
VerifyOrReturnError(mExchangeMgr != nullptr, CHIP_ERROR_INCORRECT_STATE);
9098

9199
mExchangeMgr->UnregisterUnsolicitedMessageHandlerForProtocol(Protocols::BDX::Id);
100+
ResetState();
92101

93102
mExchangeMgr = nullptr;
94103
mSystemLayer = nullptr;
95104
mDelegateNotificationQueue = nil;
96105

97-
ResetState();
98-
99106
return CHIP_NO_ERROR;
100107
}
101108

@@ -120,7 +127,39 @@ void SetDelegate(id<MTROTAProviderDelegate> delegate, dispatch_queue_t delegateN
120127
}
121128
}
122129

130+
void ResetState()
131+
{
132+
assertChipStackLockedByCurrentThread();
133+
if (mSystemLayer) {
134+
mSystemLayer->CancelTimer(HandleBdxInitReceivedTimeoutExpired, this);
135+
}
136+
// TODO: Check if this can be removed. It seems like we can close the exchange context and reset transfer regardless.
137+
if (!mInitialized) {
138+
return;
139+
}
140+
Responder::ResetTransfer();
141+
++mTransferGeneration;
142+
mFabricIndex.ClearValue();
143+
mNodeId.ClearValue();
144+
145+
if (mExchangeCtx != nullptr) {
146+
mExchangeCtx->Close();
147+
mExchangeCtx = nullptr;
148+
}
149+
150+
mInitialized = false;
151+
}
152+
123153
private:
154+
/**
155+
* Timer callback called when we don't receive a BDX init within a reasonable time after a successful QueryImage response.
156+
*/
157+
static void HandleBdxInitReceivedTimeoutExpired(chip::System::Layer * systemLayer, void * state)
158+
{
159+
VerifyOrReturn(state != nullptr);
160+
static_cast<BdxOTASender *>(state)->ResetState();
161+
}
162+
124163
CHIP_ERROR OnMessageToSend(TransferSession::OutputEvent & event)
125164
{
126165
assertChipStackLockedByCurrentThread();
@@ -137,12 +176,41 @@ CHIP_ERROR OnMessageToSend(TransferSession::OutputEvent & event)
137176
}
138177

139178
auto & msgTypeData = event.msgTypeData;
140-
return mExchangeCtx->SendMessage(msgTypeData.ProtocolId, msgTypeData.MessageType, std::move(event.MsgData), sendFlags);
179+
// If there's an error sending the message, close the exchange and call ResetState.
180+
// TODO: If we can remove the !mInitialized check in ResetState(), just calling ResetState() will suffice here.
181+
CHIP_ERROR err
182+
= mExchangeCtx->SendMessage(msgTypeData.ProtocolId, msgTypeData.MessageType, std::move(event.MsgData), sendFlags);
183+
if (err != CHIP_NO_ERROR) {
184+
mExchangeCtx->Close();
185+
mExchangeCtx = nullptr;
186+
ResetState();
187+
} else if (event.msgTypeData.HasMessageType(Protocols::SecureChannel::MsgType::StatusReport)) {
188+
// If the send was successful for a status report, since we are not expecting a response the exchange context is
189+
// already closed. We need to null out the reference to avoid having a dangling pointer.
190+
mExchangeCtx = nullptr;
191+
ResetState();
192+
}
193+
return err;
194+
}
195+
196+
bdx::StatusCode GetBdxStatusCodeFromChipError(CHIP_ERROR err)
197+
{
198+
if (err == CHIP_ERROR_INCORRECT_STATE) {
199+
return bdx::StatusCode::kUnexpectedMessage;
200+
}
201+
if (err == CHIP_ERROR_INVALID_ARGUMENT) {
202+
return bdx::StatusCode::kBadMessageContents;
203+
}
204+
return bdx::StatusCode::kUnknown;
141205
}
142206

143207
CHIP_ERROR OnTransferSessionBegin(TransferSession::OutputEvent & event)
144208
{
145209
assertChipStackLockedByCurrentThread();
210+
// Once we receive the BDX init, cancel the BDX Init timeout and start the BDX session
211+
if (mSystemLayer) {
212+
mSystemLayer->CancelTimer(HandleBdxInitReceivedTimeoutExpired, this);
213+
}
146214

147215
VerifyOrReturnError(mFabricIndex.HasValue(), CHIP_ERROR_INCORRECT_STATE);
148216
VerifyOrReturnError(mNodeId.HasValue(), CHIP_ERROR_INCORRECT_STATE);
@@ -169,8 +237,9 @@ CHIP_ERROR OnTransferSessionBegin(TransferSession::OutputEvent & event)
169237
}
170238

171239
if (error != nil) {
172-
LogErrorOnFailure([MTRError errorToCHIPErrorCode:error]);
173-
LogErrorOnFailure(mTransfer.AbortTransfer(bdx::StatusCode::kUnknown));
240+
CHIP_ERROR err = [MTRError errorToCHIPErrorCode:error];
241+
LogErrorOnFailure(err);
242+
LogErrorOnFailure(mTransfer.AbortTransfer(GetBdxStatusCodeFromChipError(err)));
174243
return;
175244
}
176245

@@ -328,6 +397,9 @@ void HandleTransferSessionOutput(TransferSession::OutputEvent & event) override
328397
switch (event.EventType) {
329398
case TransferSession::OutputEventType::kInitReceived:
330399
err = OnTransferSessionBegin(event);
400+
if (err != CHIP_NO_ERROR) {
401+
LogErrorOnFailure(mTransfer.AbortTransfer(GetBdxStatusCodeFromChipError(err)));
402+
}
331403
break;
332404
case TransferSession::OutputEventType::kStatusReceived:
333405
ChipLogError(BDX, "Got StatusReport %x", static_cast<uint16_t>(event.statusData.statusCode));
@@ -370,6 +442,14 @@ CHIP_ERROR ConfigureState(chip::FabricIndex fabricIndex, chip::NodeId nodeId)
370442
ResetState();
371443
}
372444

445+
// Start a timer to track whether we receive a BDX init after a successful query image in a reasonable amount of time
446+
CHIP_ERROR err = mSystemLayer->StartTimer(kBdxInitReceivedTimeout, HandleBdxInitReceivedTimeoutExpired, this);
447+
LogErrorOnFailure(err);
448+
449+
// The caller of this method maps CHIP_ERROR to specific BDX Status Codes (see GetBdxStatusCodeFromChipError)
450+
// and those are used by the BDX session to prepare the status report.
451+
VerifyOrReturnError(err == CHIP_NO_ERROR, CHIP_ERROR_INCORRECT_STATE);
452+
373453
mFabricIndex.SetValue(fabricIndex);
374454
mNodeId.SetValue(nodeId);
375455

@@ -378,27 +458,6 @@ CHIP_ERROR ConfigureState(chip::FabricIndex fabricIndex, chip::NodeId nodeId)
378458
return CHIP_NO_ERROR;
379459
}
380460

381-
void ResetState()
382-
{
383-
assertChipStackLockedByCurrentThread();
384-
385-
if (!mInitialized) {
386-
return;
387-
}
388-
389-
Responder::ResetTransfer();
390-
++mTransferGeneration;
391-
mFabricIndex.ClearValue();
392-
mNodeId.ClearValue();
393-
394-
if (mExchangeCtx != nullptr) {
395-
mExchangeCtx->Close();
396-
mExchangeCtx = nullptr;
397-
}
398-
399-
mInitialized = false;
400-
}
401-
402461
bool mInitialized = false;
403462
Optional<FabricIndex> mFabricIndex;
404463
Optional<NodeId> mNodeId;
@@ -549,63 +608,81 @@ bool GetPeerNodeInfo(CommandHandler * commandHandler, const ConcreteCommandPath
549608
__block CommandHandler::Handle handle(commandObj);
550609
__block ConcreteCommandPath cachedCommandPath(commandPath.mEndpointId, commandPath.mClusterId, commandPath.mCommandId);
551610

552-
auto completionHandler
553-
= ^(MTROTASoftwareUpdateProviderClusterQueryImageResponseParams * _Nullable data, NSError * _Nullable error) {
554-
[controller
555-
asyncDispatchToMatterQueue:^() {
556-
assertChipStackLockedByCurrentThread();
611+
auto completionHandler = ^(
612+
MTROTASoftwareUpdateProviderClusterQueryImageResponseParams * _Nullable data, NSError * _Nullable error) {
613+
[controller
614+
asyncDispatchToMatterQueue:^() {
615+
assertChipStackLockedByCurrentThread();
557616

558-
CommandHandler * handler = EnsureValidState(handle, cachedCommandPath, "QueryImage", data, error);
559-
VerifyOrReturn(handler != nullptr);
617+
CommandHandler * handler = EnsureValidState(handle, cachedCommandPath, "QueryImage", data, error);
618+
VerifyOrReturn(handler != nullptr);
560619

561-
ChipLogDetail(Controller, "QueryImage: application responded with: %s",
562-
[[data description] cStringUsingEncoding:NSUTF8StringEncoding]);
620+
ChipLogDetail(Controller, "QueryImage: application responded with: %s",
621+
[[data description] cStringUsingEncoding:NSUTF8StringEncoding]);
563622

564-
Commands::QueryImageResponse::Type response;
565-
ConvertFromQueryImageResponseParms(data, response);
566-
567-
auto hasUpdate = [data.status isEqual:@(MTROtaSoftwareUpdateProviderOTAQueryStatusUpdateAvailable)];
568-
auto isBDXProtocolSupported = [commandParams.protocolsSupported
569-
containsObject:@(MTROtaSoftwareUpdateProviderOTADownloadProtocolBDXSynchronous)];
570-
571-
if (hasUpdate && isBDXProtocolSupported) {
572-
auto fabricIndex = handler->GetSubjectDescriptor().fabricIndex;
573-
auto nodeId = handler->GetSubjectDescriptor().subject;
574-
CHIP_ERROR err = gOtaSender.PrepareForTransfer(fabricIndex, nodeId);
575-
if (CHIP_NO_ERROR != err) {
576-
LogErrorOnFailure(err);
577-
handler->AddStatus(cachedCommandPath, Protocols::InteractionModel::Status::Failure);
578-
handle.Release();
579-
return;
580-
}
581-
582-
auto targetNodeId
583-
= handler->GetExchangeContext()->GetSessionHandle()->AsSecureSession()->GetLocalScopedNodeId();
584-
585-
char uriBuffer[kMaxBDXURILen];
586-
MutableCharSpan uri(uriBuffer);
587-
err = bdx::MakeURI(targetNodeId.GetNodeId(), AsCharSpan(data.imageURI), uri);
588-
if (CHIP_NO_ERROR != err) {
589-
LogErrorOnFailure(err);
590-
handler->AddStatus(cachedCommandPath, Protocols::InteractionModel::Status::Failure);
591-
handle.Release();
592-
return;
593-
}
594-
595-
response.imageURI.SetValue(uri);
596-
handler->AddResponse(cachedCommandPath, response);
597-
handle.Release();
598-
return;
599-
}
623+
Commands::QueryImageResponse::Type response;
624+
ConvertFromQueryImageResponseParms(data, response);
600625

601-
handler->AddResponse(cachedCommandPath, response);
602-
handle.Release();
603-
}
626+
auto hasUpdate = [data.status isEqual:@(MTROtaSoftwareUpdateProviderOTAQueryStatusUpdateAvailable)];
627+
auto isBDXProtocolSupported = [commandParams.protocolsSupported
628+
containsObject:@(MTROtaSoftwareUpdateProviderOTADownloadProtocolBDXSynchronous)];
604629

605-
errorHandler:^(NSError *) {
606-
// Not much we can do here
607-
}];
608-
};
630+
if (hasUpdate && isBDXProtocolSupported) {
631+
auto fabricIndex = handler->GetSubjectDescriptor().fabricIndex;
632+
auto nodeId = handler->GetSubjectDescriptor().subject;
633+
CHIP_ERROR err = gOtaSender.PrepareForTransfer(fabricIndex, nodeId);
634+
if (CHIP_NO_ERROR != err) {
635+
LogErrorOnFailure(err);
636+
if (err == CHIP_ERROR_BUSY) {
637+
Commands::QueryImageResponse::Type busyResponse;
638+
busyResponse.status = static_cast<OTAQueryStatus>(MTROTASoftwareUpdateProviderOTAQueryStatusBusy);
639+
busyResponse.delayedActionTime.SetValue(response.delayedActionTime.ValueOr(kDelayedActionTimeSeconds));
640+
handler->AddResponse(cachedCommandPath, busyResponse);
641+
handle.Release();
642+
return;
643+
}
644+
handler->AddStatus(cachedCommandPath, Protocols::InteractionModel::Status::Failure);
645+
handle.Release();
646+
gOtaSender.ResetState();
647+
return;
648+
}
649+
auto targetNodeId
650+
= handler->GetExchangeContext()->GetSessionHandle()->AsSecureSession()->GetLocalScopedNodeId();
651+
652+
char uriBuffer[kMaxBDXURILen];
653+
MutableCharSpan uri(uriBuffer);
654+
err = bdx::MakeURI(targetNodeId.GetNodeId(), AsCharSpan(data.imageURI), uri);
655+
if (CHIP_NO_ERROR != err) {
656+
LogErrorOnFailure(err);
657+
handler->AddStatus(cachedCommandPath, Protocols::InteractionModel::Status::Failure);
658+
handle.Release();
659+
gOtaSender.ResetState();
660+
return;
661+
}
662+
663+
response.imageURI.SetValue(uri);
664+
handler->AddResponse(cachedCommandPath, response);
665+
handle.Release();
666+
return;
667+
}
668+
if (!hasUpdate) {
669+
// Send whatever error response our delegate decided on.
670+
handler->AddResponse(cachedCommandPath, response);
671+
} else {
672+
// We must have isBDXProtocolSupported false. Send the corresponding error status.
673+
Commands::QueryImageResponse::Type protocolNotSupportedResponse;
674+
protocolNotSupportedResponse.status
675+
= static_cast<OTAQueryStatus>(MTROTASoftwareUpdateProviderOTAQueryStatusDownloadProtocolNotSupported);
676+
handler->AddResponse(cachedCommandPath, protocolNotSupportedResponse);
677+
}
678+
handle.Release();
679+
gOtaSender.ResetState();
680+
}
681+
682+
errorHandler:^(NSError *) {
683+
// Not much we can do here
684+
}];
685+
};
609686

610687
auto strongDelegate = mDelegate;
611688
dispatch_async(mDelegateNotificationQueue, ^{

src/protocols/bdx/TransferFacilitator.cpp

+2
Original file line numberDiff line numberDiff line change
@@ -102,6 +102,7 @@ CHIP_ERROR Responder::PrepareForTransfer(System::Layer * layer, TransferRole rol
102102

103103
mPollFreq = pollFreq;
104104
mSystemLayer = layer;
105+
mStopPolling = false;
105106

106107
ReturnErrorOnFailure(mTransfer.WaitForTransfer(role, xferControlOpts, maxBlockSize, timeout));
107108

@@ -112,6 +113,7 @@ CHIP_ERROR Responder::PrepareForTransfer(System::Layer * layer, TransferRole rol
112113
void Responder::ResetTransfer()
113114
{
114115
mTransfer.Reset();
116+
mStopPolling = true;
115117
}
116118

117119
CHIP_ERROR Initiator::InitiateTransfer(System::Layer * layer, TransferRole role, const TransferSession::TransferInitData & initData,

0 commit comments

Comments
 (0)