Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix timer cancellation to be reliable in LayerImplSelect. #22375

Merged
Merged
48 changes: 33 additions & 15 deletions src/controller/tests/data_model/TestRead.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -298,6 +298,11 @@ class TestReadInteraction : public app::ReadHandler::ApplicationCallback
// succeed.
static void MultipleReadHelper(nlTestSuite * apSuite, TestContext & aCtx, size_t aReadCount);

// Helper for MultipleReadHelper that does not spin the event loop, so we
// don't end up with nested event loops.
static void MultipleReadHelperInternal(nlTestSuite * apSuite, TestContext & aCtx, size_t aReadCount,
uint32_t & aNumSuccessCalls, uint32_t & aNumFailureCalls);

// Establish the given number of subscriptions, then issue the given number
// of reads in parallel and wait for them all to succeed.
static void SubscribeThenReadHelper(nlTestSuite * apSuite, TestContext & aCtx, size_t aSubscribeCount, size_t aReadCount);
Expand Down Expand Up @@ -2484,6 +2489,9 @@ void TestReadInteraction::SubscribeThenReadHelper(nlTestSuite * apSuite, TestCon
uint32_t numSuccessCalls = 0;
uint32_t numSubscriptionEstablishedCalls = 0;

uint32_t numReadSuccessCalls = 0;
uint32_t numReadFailureCalls = 0;

responseDirective = kSendDataResponse;

// Passing of stack variables by reference is only safe because of synchronous completion of the interaction. Otherwise, it's
Expand All @@ -2501,12 +2509,12 @@ void TestReadInteraction::SubscribeThenReadHelper(nlTestSuite * apSuite, TestCon
NL_TEST_ASSERT(apSuite, false);
};

auto onSubscriptionEstablishedCb = [&numSubscriptionEstablishedCalls, &apSuite, &aCtx, aSubscribeCount,
aReadCount](const app::ReadClient & readClient) {
auto onSubscriptionEstablishedCb = [&numSubscriptionEstablishedCalls, &apSuite, &aCtx, aSubscribeCount, aReadCount,
&numReadSuccessCalls, &numReadFailureCalls](const app::ReadClient & readClient) {
numSubscriptionEstablishedCalls++;
if (numSubscriptionEstablishedCalls == aSubscribeCount)
{
MultipleReadHelper(apSuite, aCtx, aReadCount);
MultipleReadHelperInternal(apSuite, aCtx, aReadCount, numReadSuccessCalls, numReadFailureCalls);
}
};

Expand All @@ -2522,40 +2530,50 @@ void TestReadInteraction::SubscribeThenReadHelper(nlTestSuite * apSuite, TestCon

NL_TEST_ASSERT(apSuite, numSuccessCalls == aSubscribeCount);
NL_TEST_ASSERT(apSuite, numSubscriptionEstablishedCalls == aSubscribeCount);
NL_TEST_ASSERT(apSuite, numReadSuccessCalls == aReadCount);
NL_TEST_ASSERT(apSuite, numReadFailureCalls == 0);
}

void TestReadInteraction::MultipleReadHelper(nlTestSuite * apSuite, TestContext & aCtx, size_t aReadCount)
// The guts of MultipleReadHelper which take references to the success/failure
// counts to modify and assume the consumer will be spinning the event loop.
void TestReadInteraction::MultipleReadHelperInternal(nlTestSuite * apSuite, TestContext & aCtx, size_t aReadCount,
uint32_t & aNumSuccessCalls, uint32_t & aNumFailureCalls)
{
auto sessionHandle = aCtx.GetSessionBobToAlice();
uint32_t numSuccessCalls = 0;
uint32_t numFailureCalls = 0;
NL_TEST_ASSERT(apSuite, aNumSuccessCalls == 0);
NL_TEST_ASSERT(apSuite, aNumFailureCalls == 0);

auto sessionHandle = aCtx.GetSessionBobToAlice();

responseDirective = kSendDataResponse;

uint16_t firstExpectedResponse = totalReadCount + 1;

// Passing of stack variables by reference is only safe because of synchronous completion of the interaction. Otherwise, it's
// not safe to do so.
auto onFailureCb = [&apSuite, &numFailureCalls](const app::ConcreteDataAttributePath * attributePath, CHIP_ERROR aError) {
numFailureCalls++;
auto onFailureCb = [apSuite, &aNumFailureCalls](const app::ConcreteDataAttributePath * attributePath, CHIP_ERROR aError) {
aNumFailureCalls++;

NL_TEST_ASSERT(apSuite, attributePath == nullptr);
};

for (size_t i = 0; i < aReadCount; ++i)
{
// Passing of stack variables by reference is only safe because of synchronous completion of the interaction. Otherwise,
// it's not safe to do so.
auto onSuccessCb = [&numSuccessCalls, &apSuite, firstExpectedResponse,
auto onSuccessCb = [&aNumSuccessCalls, apSuite, firstExpectedResponse,
i](const app::ConcreteDataAttributePath & attributePath, const auto & dataResponse) {
NL_TEST_ASSERT(apSuite, dataResponse == firstExpectedResponse + i);
numSuccessCalls++;
aNumSuccessCalls++;
};

NL_TEST_ASSERT(apSuite,
Controller::ReadAttribute<TestCluster::Attributes::Int16u::TypeInfo>(
&aCtx.GetExchangeManager(), sessionHandle, kTestEndpointId, onSuccessCb, onFailureCb) == CHIP_NO_ERROR);
}
}

void TestReadInteraction::MultipleReadHelper(nlTestSuite * apSuite, TestContext & aCtx, size_t aReadCount)
{
uint32_t numSuccessCalls = 0;
uint32_t numFailureCalls = 0;

MultipleReadHelperInternal(apSuite, aCtx, aReadCount, numSuccessCalls, numFailureCalls);

aCtx.DrainAndServiceIO();

Expand Down
24 changes: 23 additions & 1 deletion src/system/SystemLayerImplFreeRTOS.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -88,10 +88,32 @@ CHIP_ERROR LayerImplFreeRTOS::ScheduleWork(TimerCompleteCallback onComplete, voi
{
VerifyOrReturnError(mLayerState.IsInitialized(), CHIP_ERROR_INCORRECT_STATE);

// Ideally we would not use a timer here at all, but if we try to just
// ScheduleLambda the lambda needs to capture the following:
// 1) onComplete
// 2) appState
// 3) The `this` pointer, because onComplete needs to be passed a pointer to
// the System::Layer.
//
// On a 64-bit system that's 24 bytes, but lambdas passed to ScheduleLambda
// are capped at CHIP_CONFIG_LAMBDA_EVENT_SIZE which is 16 bytes.
//
// So for now use a timer as a poor-man's closure that captures `this` and
// onComplete and appState in a single pointer, so we fit inside the size
// limit.
//
// TODO: We could do something here where we compile-time condition on the
// sizes of things and use a direct ScheduleLambda if it would fit and this
// setup otherwise.
TimerList::Node * timer = mTimerPool.Create(*this, SystemClock().GetMonotonicTimestamp(), onComplete, appState);
VerifyOrReturnError(timer != nullptr, CHIP_ERROR_NO_MEMORY);

return ScheduleLambda([this, timer] { this->mTimerPool.Invoke(timer); });
CHIP_ERROR err = ScheduleLambda([this, timer] { this->mTimerPool.Invoke(timer); });
if (err != CHIP_NO_ERROR)
{
mTimerPool.Release(timer);
}
return err;
}

/**
Expand Down
40 changes: 36 additions & 4 deletions src/system/SystemLayerImplSelect.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -170,6 +170,14 @@ void LayerImplSelect::CancelTimer(TimerCompleteCallback onComplete, void * appSt
VerifyOrReturn(mLayerState.IsInitialized());

TimerList::Node * timer = mTimerList.Remove(onComplete, appState);
if (timer == nullptr)
{
// The timer was not in our "will fire in the future" list, but it might
// be in the "we're about to fire these" chunk we already grabbed from
// that list. Check for it there too, and if found there we still want
// to cancel it.
timer = mExpiredTimers.Remove(onComplete, appState);
}
VerifyOrReturn(timer != nullptr);

#if CHIP_SYSTEM_CONFIG_USE_DISPATCH
Expand Down Expand Up @@ -199,8 +207,31 @@ CHIP_ERROR LayerImplSelect::ScheduleWork(TimerCompleteCallback onComplete, void
}
#endif // CHIP_SYSTEM_CONFIG_USE_DISPATCH

CancelTimer(onComplete, appState);

// Ideally we would not use a timer here at all, but if we try to just
// ScheduleLambda the lambda needs to capture the following:
// 1) onComplete
// 2) appState
// 3) The `this` pointer, because onComplete needs to be passed a pointer to
// the System::Layer.
//
// On a 64-bit system that's 24 bytes, but lambdas passed to ScheduleLambda
// are capped at CHIP_CONFIG_LAMBDA_EVENT_SIZE which is 16 bytes.
//
// So for now use a timer as a poor-man's closure that captures `this` and
// onComplete and appState in a single pointer, so we fit inside the size
// limit.
//
// TODO: We could do something here where we compile-time condition on the
// sizes of things and use a direct ScheduleLambda if it would fit and this
// setup otherwise.
//
// TODO: But also, unit tests seem to do SystemLayer::ScheduleWork without
// actually running a useful event loop (in the PlatformManager sense),
// which breaks if we use ScheduleLambda here, since that does rely on the
// PlatformManager event loop. So for now, keep scheduling an expires-ASAP
// timer, but just make sure we don't cancel existing timers with the same
// callback and appState, so ScheduleWork invocations don't stomp on each
// other.
TimerList::Node * timer = mTimerPool.Create(*this, SystemClock().GetMonotonicTimestamp(), onComplete, appState);
VerifyOrReturnError(timer != nullptr, CHIP_ERROR_NO_MEMORY);

Expand Down Expand Up @@ -469,9 +500,10 @@ void LayerImplSelect::HandleEvents()

// Obtain the list of currently expired timers. Any new timers added by timer callback are NOT handled on this pass,
// since that could result in infinite handling of new timers blocking any other progress.
TimerList expiredTimers = mTimerList.ExtractEarlier(Clock::Timeout(1) + SystemClock().GetMonotonicTimestamp());
VerifyOrDieWithMsg(mExpiredTimers.Empty(), DeviceLayer, "Re-entry into HandleEvents from a timer callback?");
mExpiredTimers = mTimerList.ExtractEarlier(Clock::Timeout(1) + SystemClock().GetMonotonicTimestamp());
TimerList::Node * timer = nullptr;
while ((timer = expiredTimers.PopEarliest()) != nullptr)
while ((timer = mExpiredTimers.PopEarliest()) != nullptr)
{
mTimerPool.Invoke(timer);
}
Expand Down
3 changes: 3 additions & 0 deletions src/system/SystemLayerImplSelect.h
Original file line number Diff line number Diff line change
Expand Up @@ -101,6 +101,9 @@ class LayerImplSelect : public LayerSocketsLoop

TimerPool<TimerList::Node> mTimerPool;
TimerList mTimerList;
// List of expired timers being processed right now. Stored in a member so
// we can cancel them.
TimerList mExpiredTimers;
timeval mNextTimeout;

// Members for select loop
Expand Down
4 changes: 4 additions & 0 deletions src/system/tests/BUILD.gn
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,10 @@ chip_test_suite("tests") {
"TestTimeSource.cpp",
]

if (chip_device_platform != "esp32" && chip_device_platform != "fake") {
test_sources += [ "TestSystemScheduleWork.cpp" ]
}

cflags = [ "-Wconversion" ]

public_deps = [
Expand Down
2 changes: 1 addition & 1 deletion src/system/tests/TestSystemClock.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -118,7 +118,7 @@ static const nlTest sTests[] =
int TestSystemClock(void)
{
nlTestSuite theSuite = {
"chip-timesource", &sTests[0], nullptr /* setup */, nullptr /* teardown */
"chip-systemclock", &sTests[0], nullptr /* setup */, nullptr /* teardown */
};

// Run test suit againt one context.
Expand Down
107 changes: 107 additions & 0 deletions src/system/tests/TestSystemScheduleWork.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,107 @@
/*
* Copyright (c) 2021 Project CHIP Authors
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#include <system/SystemConfig.h>

#include <lib/support/CodeUtils.h>
#include <lib/support/ErrorStr.h>
#include <lib/support/UnitTestRegistration.h>
#include <nlunit-test.h>
#include <platform/CHIPDeviceLayer.h>

static void IncrementIntCounter(chip::System::Layer *, void * state)
{
++(*static_cast<int *>(state));
}

static void StopEventLoop(chip::System::Layer *, void *)
{
chip::DeviceLayer::PlatformMgr().StopEventLoopTask();
}

static void CheckScheduleWorkTwice(nlTestSuite * inSuite, void * aContext)
{
int * callCount = new int(0);
NL_TEST_ASSERT(inSuite, chip::DeviceLayer::SystemLayer().ScheduleWork(IncrementIntCounter, callCount) == CHIP_NO_ERROR);
NL_TEST_ASSERT(inSuite, chip::DeviceLayer::SystemLayer().ScheduleWork(IncrementIntCounter, callCount) == CHIP_NO_ERROR);
NL_TEST_ASSERT(inSuite, chip::DeviceLayer::SystemLayer().ScheduleWork(StopEventLoop, nullptr) == CHIP_NO_ERROR);
chip::DeviceLayer::PlatformMgr().RunEventLoop();
NL_TEST_ASSERT(inSuite, *callCount == 2);
delete callCount;
}

// Test Suite

/**
* Test Suite. It lists all the test functions.
*/
// clang-format off
static const nlTest sTests[] =
{
NL_TEST_DEF("System::TestScheduleWorkTwice", CheckScheduleWorkTwice),
NL_TEST_SENTINEL()
};
// clang-format on

static int TestSetup(void * aContext);
static int TestTeardown(void * aContext);

// clang-format off
static nlTestSuite kTheSuite =
{
"chip-system-schedule-work",
&sTests[0],
TestSetup,
TestTeardown
};
// clang-format on

/**
* Set up the test suite.
*/
static int TestSetup(void * aContext)
{
if (chip::Platform::MemoryInit() != CHIP_NO_ERROR)
{
return FAILURE;
}

if (chip::DeviceLayer::PlatformMgr().InitChipStack() != CHIP_NO_ERROR)
return FAILURE;

return (SUCCESS);
}

/**
* Tear down the test suite.
* Free memory reserved at TestSetup.
*/
static int TestTeardown(void * aContext)
{
chip::DeviceLayer::PlatformMgr().Shutdown();
chip::Platform::MemoryShutdown();
return (SUCCESS);
}

int TestSystemScheduleWork(void)
{
// Run test suit againt one lContext.
nlTestRunner(&kTheSuite, nullptr);

return nlTestRunnerStats(&kTheSuite);
}

CHIP_REGISTER_TEST_SUITE(TestSystemScheduleWork)
Loading