Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Stepper optimisations #25474

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
47 commits
Select commit Hold shift + click to select a range
27cca14
When moving fast, do not waste CPU cycles applying an offset to impro…
tombrazier Feb 20, 2023
52771c5
For the fastest step rates, do the least work in calc_timer_interval()
tombrazier Feb 20, 2023
e42ba84
Save 256 bytes of PROGMEM and gain cycles by saving the LERP gradient…
tombrazier Feb 20, 2023
535a931
Use hal_timer_t for timers in the stepper ISR
tombrazier Feb 20, 2023
8391f31
The compiler is not smart enough to distribute the multiplication thr…
tombrazier Mar 3, 2023
bbf1d4e
Removed redundant check against step_needed
tombrazier Mar 3, 2023
a60777c
Use the AxisFlags bitfield rather than the xyze_bool_t array for step…
tombrazier Mar 4, 2023
67ee043
Add 1 byte to RAM and save a lot of load/compare of 32 bit numbers
tombrazier Mar 4, 2023
848a638
Helped the compiler to keep bresenham logic in registers
tombrazier Mar 4, 2023
3469556
As well as not needing to disable interrupts on AVR, you do not need …
tombrazier Mar 5, 2023
6ecf510
Added multistepping debugging to max7219
tombrazier Mar 6, 2023
62870e5
Not clean: added dynamic CPU control
tombrazier Mar 6, 2023
c537719
Merge branch 'bugfix-2.1.x' into stepper_optimisations
tombrazier Mar 6, 2023
e34ce79
Update createSpeedLookupTable.py
thinkyhead Mar 6, 2023
0c1057c
remove dead friends
thinkyhead Mar 6, 2023
4a8ca11
sei() automatically invoked by AVR reti
thinkyhead Mar 6, 2023
6c8431a
style tweak
thinkyhead Mar 6, 2023
ae934bf
Minor calc_timer_interval optimizations
thinkyhead Mar 6, 2023
ea948d6
style tweak
thinkyhead Mar 6, 2023
6c039bd
Corrected logic for speed_lookuptable_fast to match values from 27cca…
tombrazier Mar 7, 2023
1a51ec8
Optimisation of dynamic multi-steppping
tombrazier Mar 7, 2023
25bd2fb
Don't update max7219 from within stepper ISR
tombrazier Mar 8, 2023
f5b69e4
Use steps_per_isr directly
thinkyhead Mar 11, 2023
a648a3c
Actually, don't modify
thinkyhead Mar 11, 2023
3a4e3f6
Drop MAX_STEP_ISR_FREQUENCY_*X
thinkyhead Mar 11, 2023
010ab01
Merge branch 'bugfix-2.1.x' into pr/25474
thinkyhead Mar 11, 2023
2fc197e
cosmetic
thinkyhead Mar 11, 2023
ea1ee11
Added MULTISTEPPING_LIMIT to config to limit the effect of multistepp…
tombrazier Mar 11, 2023
cb933fb
tweaks to allow CI test
thinkyhead Mar 11, 2023
776cfaa
MULTISTEPPING_LIMIT must be simple
thinkyhead Mar 11, 2023
e694a9c
comment tweak
thinkyhead Mar 11, 2023
3b1af15
comment tweak
thinkyhead Mar 11, 2023
96ed3d0
move conditional
thinkyhead Mar 11, 2023
216ba61
reorder calc_timer_interval for comparison
thinkyhead Mar 11, 2023
3644860
"calc_multistep_timer_interval"
thinkyhead Mar 11, 2023
49d69aa
filter by multistepping limit
thinkyhead Mar 12, 2023
3150048
Merge branch 'bugfix-2.1.x' into pr/25474
thinkyhead Mar 12, 2023
6ec1f29
general cleanups from review
thinkyhead Mar 12, 2023
b8455e7
Signficant optimisation of input shaping
tombrazier Mar 12, 2023
0a2df85
format tweak
thinkyhead Mar 12, 2023
3a6716c
branch-not-taken is faster…
thinkyhead Mar 12, 2023
4db7e8c
Merge branch 'bugfix-2.1.x' into pr/25474
thinkyhead Mar 12, 2023
3cb8b2d
Merge branch 'bugfix-2.1.x' into pr/25474
thinkyhead Mar 13, 2023
fa0e4d8
remove Q
thinkyhead Mar 17, 2023
8c70c3e
Old adaptive multi-stepping option
thinkyhead Mar 17, 2023
e7a2bab
tweak e motor direction
thinkyhead Mar 17, 2023
679a587
Merge branch 'bugfix-2.1.x' into pr/25474
thinkyhead Mar 18, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
166 changes: 111 additions & 55 deletions Marlin/src/module/stepper.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -194,6 +194,10 @@ uint32_t Stepper::acceleration_time, Stepper::deceleration_time;
uint8_t Stepper::steps_per_isr = 1; // Count of steps to perform per Stepper ISR call
#endif

#if DISABLED(OLD_ADAPTIVE_MULTISTEPPING)
hal_timer_t Stepper::time_spent_in_isr = 0, Stepper::time_spent_out_isr = 0;
#endif

#if ENABLED(FREEZE_FEATURE)
bool Stepper::frozen; // = false
#endif
Expand Down Expand Up @@ -614,27 +618,26 @@ void Stepper::set_directions() {
TERN_(HAS_V_DIR, SET_STEP_DIR(V));
TERN_(HAS_W_DIR, SET_STEP_DIR(W));

#if ENABLED(MIXING_EXTRUDER)
#if HAS_EXTRUDERS
// Because this is valid for the whole block we don't know
// what E steppers will step. Likely all. Set all.
if (motor_direction(E_AXIS)) {
MIXER_STEPPER_LOOP(j) REV_E_DIR(j);
count_direction.e = -1;
}
else {
MIXER_STEPPER_LOOP(j) NORM_E_DIR(j);
count_direction.e = 1;
}
#elif HAS_EXTRUDERS
if (motor_direction(E_AXIS)) {
REV_E_DIR(stepper_extruder);
#if ENABLED(MIXING_EXTRUDER)
MIXER_STEPPER_LOOP(j) REV_E_DIR(j);
#else
REV_E_DIR(stepper_extruder);
#endif
count_direction.e = -1;
}
else {
NORM_E_DIR(stepper_extruder);
#if ENABLED(MIXING_EXTRUDER)
MIXER_STEPPER_LOOP(j) NORM_E_DIR(j);
#else
NORM_E_DIR(stepper_extruder);
#endif
count_direction.e = 1;
}
#endif
#endif // HAS_EXTRUDERS

DIR_WAIT_AFTER();
}
Expand Down Expand Up @@ -1587,16 +1590,44 @@ void Stepper::isr() {
*/
min_ticks = HAL_timer_get_count(MF_TIMER_STEP) + hal_timer_t(TERN(__AVR__, 8, 1) * (STEPPER_TIMER_TICKS_PER_US));

/**
* NB: If for some reason the stepper monopolizes the MPU, eventually the
* timer will wrap around (and so will 'next_isr_ticks'). So, limit the
* loop to 10 iterations. Beyond that, there's no way to ensure correct pulse
* timing, since the MCU isn't fast enough.
*/
if (!--max_loops) next_isr_ticks = min_ticks;
#if ENABLED(OLD_ADAPTIVE_MULTISTEPPING)
/**
* NB: If for some reason the stepper monopolizes the MPU, eventually the
* timer will wrap around (and so will 'next_isr_ticks'). So, limit the
* loop to 10 iterations. Beyond that, there's no way to ensure correct pulse
* timing, since the MCU isn't fast enough.
*/
if (!--max_loops) next_isr_ticks = min_ticks;
#endif

// Advance pulses if not enough time to wait for the next ISR
} while (next_isr_ticks < min_ticks);
} while (TERN(OLD_ADAPTIVE_MULTISTEPPING, true, --max_loops) && next_isr_ticks < min_ticks);

#if DISABLED(OLD_ADAPTIVE_MULTISTEPPING)

// Track the time spent in the ISR
const hal_timer_t time_spent = HAL_timer_get_count(MF_TIMER_STEP);
time_spent_in_isr += time_spent;

if (next_isr_ticks < min_ticks) {
next_isr_ticks = min_ticks;

// When forced out of the ISR, increase multi-stepping
#if MULTISTEPPING_LIMIT > 1
if (steps_per_isr < MULTISTEPPING_LIMIT) {
steps_per_isr <<= 1;
// ticks_nominal will need to be recalculated if we are in cruise phase
ticks_nominal = 0;
}
#endif
}
else {
// Track the time spent voluntarily outside the ISR
time_spent_out_isr += next_isr_ticks;
time_spent_out_isr -= time_spent;
}

#endif // !OLD_ADAPTIVE_MULTISTEPPING

// Now 'next_isr_ticks' contains the period to the next Stepper ISR - And we are
// sure that the time has not arrived yet - Warrantied by the scheduler
Expand Down Expand Up @@ -2091,44 +2122,56 @@ hal_timer_t Stepper::calc_timer_interval(uint32_t step_rate) {

// Get the timer interval and the number of loops to perform per tick
hal_timer_t Stepper::calc_multistep_timer_interval(uint32_t step_rate) {
#if MULTISTEPPING_LIMIT == 1

// Just make sure the step rate is doable
NOMORE(step_rate, uint32_t(MAX_STEP_ISR_FREQUENCY_1X));
#if ENABLED(OLD_ADAPTIVE_MULTISTEPPING)

#else
#if MULTISTEPPING_LIMIT == 1

// The stepping frequency limits for each multistepping rate
static const uint32_t limit[] PROGMEM = {
( MAX_STEP_ISR_FREQUENCY_1X )
, ( MAX_STEP_ISR_FREQUENCY_2X >> 1)
#if MULTISTEPPING_LIMIT >= 4
, ( MAX_STEP_ISR_FREQUENCY_4X >> 2)
#endif
#if MULTISTEPPING_LIMIT >= 8
, ( MAX_STEP_ISR_FREQUENCY_8X >> 3)
#endif
#if MULTISTEPPING_LIMIT >= 16
, ( MAX_STEP_ISR_FREQUENCY_16X >> 4)
#endif
#if MULTISTEPPING_LIMIT >= 32
, ( MAX_STEP_ISR_FREQUENCY_32X >> 5)
#endif
#if MULTISTEPPING_LIMIT >= 64
, ( MAX_STEP_ISR_FREQUENCY_64X >> 6)
#endif
#if MULTISTEPPING_LIMIT >= 128
, (MAX_STEP_ISR_FREQUENCY_128X >> 7)
#endif
};
// Just make sure the step rate is doable
NOMORE(step_rate, uint32_t(MAX_STEP_ISR_FREQUENCY_1X));

// Find a doable step rate using multistepping
uint8_t multistep = 1;
for (uint8_t i = 0; i < COUNT(limit) && step_rate > uint32_t(pgm_read_dword(&limit[i])); ++i) {
step_rate >>= 1;
multistep <<= 1;
}
steps_per_isr = multistep;
#else

// The stepping frequency limits for each multistepping rate
static const uint32_t limit[] PROGMEM = {
( MAX_STEP_ISR_FREQUENCY_1X )
, (((F_CPU) / ISR_EXECUTION_CYCLES(1)) >> 1)
#if MULTISTEPPING_LIMIT >= 4
, (((F_CPU) / ISR_EXECUTION_CYCLES(2)) >> 2)
#endif
#if MULTISTEPPING_LIMIT >= 8
, (((F_CPU) / ISR_EXECUTION_CYCLES(3)) >> 3)
#endif
#if MULTISTEPPING_LIMIT >= 16
, (((F_CPU) / ISR_EXECUTION_CYCLES(4)) >> 4)
#endif
#if MULTISTEPPING_LIMIT >= 32
, (((F_CPU) / ISR_EXECUTION_CYCLES(5)) >> 5)
#endif
#if MULTISTEPPING_LIMIT >= 64
, (((F_CPU) / ISR_EXECUTION_CYCLES(6)) >> 6)
#endif
#if MULTISTEPPING_LIMIT >= 128
, (((F_CPU) / ISR_EXECUTION_CYCLES(7)) >> 7)
#endif
};

// Find a doable step rate using multistepping
uint8_t multistep = 1;
for (uint8_t i = 0; i < COUNT(limit) && step_rate > uint32_t(pgm_read_dword(&limit[i])); ++i) {
step_rate >>= 1;
multistep <<= 1;
}
steps_per_isr = multistep;

#endif

#elif MULTISTEPPING_LIMIT > 1

uint8_t loops = steps_per_isr;
if (MULTISTEPPING_LIMIT >= 16 && loops >= 16) { step_rate >>= 4; loops >>= 4; }
if (MULTISTEPPING_LIMIT >= 4 && loops >= 4) { step_rate >>= 2; loops >>= 2; }
if (MULTISTEPPING_LIMIT >= 2 && loops >= 2) { step_rate >>= 1; }

#endif

Expand All @@ -2141,6 +2184,19 @@ hal_timer_t Stepper::calc_multistep_timer_interval(uint32_t step_rate) {
* have been done, so it is less time critical.
*/
hal_timer_t Stepper::block_phase_isr() {
#if DISABLED(OLD_ADAPTIVE_MULTISTEPPING)
// If the ISR uses < 50% of MPU time, halve multi-stepping
const hal_timer_t time_spent = HAL_timer_get_count(MF_TIMER_STEP);
#if MULTISTEPPING_LIMIT > 1
if (steps_per_isr > 1 && time_spent_out_isr >= time_spent_in_isr + time_spent) {
steps_per_isr >>= 1;
// ticks_nominal will need to be recalculated if we are in cruise phase
ticks_nominal = 0;
}
#endif
time_spent_in_isr = -time_spent; // unsigned but guaranteed to be +ve when needed
time_spent_out_isr = 0;
#endif

// If no queued movements, just wait 1ms for the next block
hal_timer_t interval = (STEPPER_TIMER_RATE) / 1000UL;
Expand Down
29 changes: 13 additions & 16 deletions Marlin/src/module/stepper.h
Original file line number Diff line number Diff line change
Expand Up @@ -212,12 +212,12 @@
#error "Expected at least one of MINIMUM_STEPPER_PULSE or MAXIMUM_STEPPER_RATE to be defined"
#endif

// The loop takes the base time plus the time for all the bresenham logic for R pulses plus the time
// between pulses for (R-1) pulses. But the user could be enforcing a minimum time so the loop time is:
// The loop takes the base time plus the time for all the bresenham logic for 1 << R pulses plus the time
// between pulses for ((1 << R) - 1) pulses. But the user could be enforcing a minimum time so the loop time is:
#define ISR_LOOP_CYCLES(R) ((ISR_LOOP_BASE_CYCLES + MIN_ISR_LOOP_CYCLES + MIN_STEPPER_PULSE_CYCLES) * ((1UL << R) - 1) + _MAX(MIN_ISR_LOOP_CYCLES, MIN_STEPPER_PULSE_CYCLES))

// Model input shaping as an extra loop call
#define ISR_SHAPING_LOOP_CYCLES(R) (TERN0(HAS_SHAPING, ((ISR_LOOP_BASE_CYCLES) + TERN0(INPUT_SHAPING_X, ISR_X_STEPPER_CYCLES) + TERN0(INPUT_SHAPING_Y, ISR_Y_STEPPER_CYCLES)) << R))
#define ISR_SHAPING_LOOP_CYCLES(R) (TERN0(HAS_SHAPING, (ISR_LOOP_BASE_CYCLES + TERN0(INPUT_SHAPING_X, ISR_X_STEPPER_CYCLES) + TERN0(INPUT_SHAPING_Y, ISR_Y_STEPPER_CYCLES)) << R))

// If linear advance is enabled, then it is handled separately
#if ENABLED(LIN_ADVANCE)
Expand All @@ -241,24 +241,17 @@
#define ISR_LA_LOOP_CYCLES 0UL
#endif

// Now estimate the total ISR execution time in cycles given a step per ISR multiplier
#define ISR_EXECUTION_CYCLES(R) (((ISR_BASE_CYCLES + ISR_S_CURVE_CYCLES + ISR_SHAPING_BASE_CYCLES + ISR_LOOP_CYCLES(R) + ISR_SHAPING_LOOP_CYCLES(R) + ISR_LA_BASE_CYCLES + ISR_LA_LOOP_CYCLES)) >> R)
// Estimate the total ISR execution time in cycles given a step-per-ISR shift multiplier
#define ISR_EXECUTION_CYCLES(R) ((ISR_BASE_CYCLES + ISR_S_CURVE_CYCLES + ISR_SHAPING_BASE_CYCLES + ISR_LOOP_CYCLES(R) + ISR_SHAPING_LOOP_CYCLES(R) + ISR_LA_BASE_CYCLES + ISR_LA_LOOP_CYCLES) >> R)

// The maximum allowable stepping frequency when doing x128-x1 stepping (in Hz)
#define MAX_STEP_ISR_FREQUENCY_128X ((F_CPU) / ISR_EXECUTION_CYCLES(7))
#define MAX_STEP_ISR_FREQUENCY_64X ((F_CPU) / ISR_EXECUTION_CYCLES(6))
#define MAX_STEP_ISR_FREQUENCY_32X ((F_CPU) / ISR_EXECUTION_CYCLES(5))
#define MAX_STEP_ISR_FREQUENCY_16X ((F_CPU) / ISR_EXECUTION_CYCLES(4))
#define MAX_STEP_ISR_FREQUENCY_8X ((F_CPU) / ISR_EXECUTION_CYCLES(3))
#define MAX_STEP_ISR_FREQUENCY_4X ((F_CPU) / ISR_EXECUTION_CYCLES(2))
#define MAX_STEP_ISR_FREQUENCY_2X ((F_CPU) / ISR_EXECUTION_CYCLES(1))
#define MAX_STEP_ISR_FREQUENCY_1X ((F_CPU) / ISR_EXECUTION_CYCLES(0))
// The maximum allowable stepping frequency when doing 1x stepping (in Hz)
#define MAX_STEP_ISR_FREQUENCY_1X ((F_CPU) / ISR_EXECUTION_CYCLES(0))

// The minimum step ISR rate used by ADAPTIVE_STEP_SMOOTHING to target 50% CPU usage
// This does not account for the possibility of multi-stepping.
// Should a MULTISTEPPING_LIMIT of 1 should be required with ADAPTIVE_STEP_SMOOTHING?
#define MIN_STEP_ISR_FREQUENCY (MAX_STEP_ISR_FREQUENCY_1X / 2)
#define MIN_STEP_ISR_FREQUENCY (MAX_STEP_ISR_FREQUENCY_1X >> 1)

// Number of axes that could be enabled/disabled. Dual/multiple steppers are combined.
#define ENABLE_COUNT (NUM_AXES + E_STEPPERS)
typedef bits_t(ENABLE_COUNT) ena_mask_t;

Expand Down Expand Up @@ -547,6 +540,10 @@ class Stepper {
static uint8_t steps_per_isr;
#endif

#if DISABLED(OLD_ADAPTIVE_MULTISTEPPING)
static hal_timer_t time_spent_in_isr, time_spent_out_isr;
#endif

#if ENABLED(ADAPTIVE_STEP_SMOOTHING)
static uint8_t oversampling_factor; // Oversampling factor (log2(multiplier)) to increase temporal resolution of axis
#else
Expand Down