Skip to content

Commit f0b037c

Browse files
authored
Merge pull request #568 from jkczyz/2020-03-handle-error-deadlock
Fix deadlock in ChannelManager's handle_error!()
2 parents b8876a9 + 3968647 commit f0b037c

File tree

2 files changed

+124
-107
lines changed

2 files changed

+124
-107
lines changed

lightning/src/ln/channelmanager.rs

+54-107
Original file line numberDiff line numberDiff line change
@@ -467,21 +467,41 @@ pub struct ChannelDetails {
467467
}
468468

469469
macro_rules! handle_error {
470-
($self: ident, $internal: expr, $their_node_id: expr, $locked_channel_state: expr) => {
470+
($self: ident, $internal: expr, $their_node_id: expr) => {
471471
match $internal {
472472
Ok(msg) => Ok(msg),
473473
Err(MsgHandleErrInternal { err, shutdown_finish }) => {
474+
#[cfg(debug_assertions)]
475+
{
476+
// In testing, ensure there are no deadlocks where the lock is already held upon
477+
// entering the macro.
478+
assert!($self.channel_state.try_lock().is_ok());
479+
}
480+
481+
let mut msg_events = Vec::with_capacity(2);
482+
474483
if let Some((shutdown_res, update_option)) = shutdown_finish {
475484
$self.finish_force_close_channel(shutdown_res);
476485
if let Some(update) = update_option {
477-
$locked_channel_state.pending_msg_events.push(events::MessageSendEvent::BroadcastChannelUpdate {
486+
msg_events.push(events::MessageSendEvent::BroadcastChannelUpdate {
478487
msg: update
479488
});
480489
}
481490
}
491+
482492
log_error!($self, "{}", err.err);
483493
if let msgs::ErrorAction::IgnoreError = err.action {
484-
} else { $locked_channel_state.pending_msg_events.push(events::MessageSendEvent::HandleError { node_id: $their_node_id, action: err.action.clone() }); }
494+
} else {
495+
msg_events.push(events::MessageSendEvent::HandleError {
496+
node_id: $their_node_id,
497+
action: err.action.clone()
498+
});
499+
}
500+
501+
if !msg_events.is_empty() {
502+
$self.channel_state.lock().unwrap().pending_msg_events.append(&mut msg_events);
503+
}
504+
485505
// Return error in case higher-API need one
486506
Err(err)
487507
},
@@ -1191,9 +1211,8 @@ impl<ChanSigner: ChannelKeys, M: Deref, T: Deref, K: Deref, F: Deref> ChannelMan
11911211

11921212
let _ = self.total_consistency_lock.read().unwrap();
11931213

1194-
let mut channel_lock = self.channel_state.lock().unwrap();
11951214
let err: Result<(), _> = loop {
1196-
1215+
let mut channel_lock = self.channel_state.lock().unwrap();
11971216
let id = match channel_lock.short_to_id.get(&route.hops.first().unwrap().short_channel_id) {
11981217
None => return Err(APIError::ChannelUnavailable{err: "No channel available with first hop!"}),
11991218
Some(id) => id.clone(),
@@ -1242,7 +1261,7 @@ impl<ChanSigner: ChannelKeys, M: Deref, T: Deref, K: Deref, F: Deref> ChannelMan
12421261
return Ok(());
12431262
};
12441263

1245-
match handle_error!(self, err, route.hops.first().unwrap().pubkey, channel_lock) {
1264+
match handle_error!(self, err, route.hops.first().unwrap().pubkey) {
12461265
Ok(_) => unreachable!(),
12471266
Err(e) => { Err(APIError::ChannelUnavailable { err: e.err }) }
12481267
}
@@ -1261,8 +1280,7 @@ impl<ChanSigner: ChannelKeys, M: Deref, T: Deref, K: Deref, F: Deref> ChannelMan
12611280
let _ = self.total_consistency_lock.read().unwrap();
12621281

12631282
let (mut chan, msg, chan_monitor) = {
1264-
let mut channel_state = self.channel_state.lock().unwrap();
1265-
let (res, chan) = match channel_state.by_id.remove(temporary_channel_id) {
1283+
let (res, chan) = match self.channel_state.lock().unwrap().by_id.remove(temporary_channel_id) {
12661284
Some(mut chan) => {
12671285
(chan.get_outbound_funding_created(funding_txo)
12681286
.map_err(|e| if let ChannelError::Close(msg) = e {
@@ -1272,7 +1290,7 @@ impl<ChanSigner: ChannelKeys, M: Deref, T: Deref, K: Deref, F: Deref> ChannelMan
12721290
},
12731291
None => return
12741292
};
1275-
match handle_error!(self, res, chan.get_their_node_id(), channel_state) {
1293+
match handle_error!(self, res, chan.get_their_node_id()) {
12761294
Ok(funding_msg) => {
12771295
(chan, funding_msg.0, funding_msg.1)
12781296
},
@@ -1284,12 +1302,9 @@ impl<ChanSigner: ChannelKeys, M: Deref, T: Deref, K: Deref, F: Deref> ChannelMan
12841302
if let Err(e) = self.monitor.add_monitor(chan_monitor.get_funding_txo().unwrap(), chan_monitor) {
12851303
match e {
12861304
ChannelMonitorUpdateErr::PermanentFailure => {
1287-
{
1288-
let mut channel_state = self.channel_state.lock().unwrap();
1289-
match handle_error!(self, Err(MsgHandleErrInternal::from_finish_shutdown("ChannelMonitor storage failure", *temporary_channel_id, chan.force_shutdown(true), None)), chan.get_their_node_id(), channel_state) {
1290-
Err(_) => { return; },
1291-
Ok(()) => unreachable!(),
1292-
}
1305+
match handle_error!(self, Err(MsgHandleErrInternal::from_finish_shutdown("ChannelMonitor storage failure", *temporary_channel_id, chan.force_shutdown(true), None)), chan.get_their_node_id()) {
1306+
Err(_) => { return; },
1307+
Ok(()) => unreachable!(),
12931308
}
12941309
},
12951310
ChannelMonitorUpdateErr::TemporaryFailure => {
@@ -1520,10 +1535,8 @@ impl<ChanSigner: ChannelKeys, M: Deref, T: Deref, K: Deref, F: Deref> ChannelMan
15201535
},
15211536
ChannelError::CloseDelayBroadcast { .. } => { panic!("Wait is only generated on receipt of channel_reestablish, which is handled by try_chan_entry, we don't bother to support it here"); }
15221537
};
1523-
match handle_error!(self, err, their_node_id, channel_state) {
1524-
Ok(_) => unreachable!(),
1525-
Err(_) => { continue; },
1526-
}
1538+
handle_errors.push((their_node_id, err));
1539+
continue;
15271540
}
15281541
};
15291542
if let Err(e) = self.monitor.update_monitor(chan.get().get_funding_txo().unwrap(), monitor_update) {
@@ -1579,11 +1592,8 @@ impl<ChanSigner: ChannelKeys, M: Deref, T: Deref, K: Deref, F: Deref> ChannelMan
15791592
};
15801593
}
15811594

1582-
if handle_errors.len() > 0 {
1583-
let mut channel_state_lock = self.channel_state.lock().unwrap();
1584-
for (their_node_id, err) in handle_errors.drain(..) {
1585-
let _ = handle_error!(self, err, their_node_id, channel_state_lock);
1586-
}
1595+
for (their_node_id, err) in handle_errors.drain(..) {
1596+
let _ = handle_error!(self, err, their_node_id);
15871597
}
15881598

15891599
if new_events.is_empty() { return }
@@ -1835,7 +1845,8 @@ impl<ChanSigner: ChannelKeys, M: Deref, T: Deref, K: Deref, F: Deref> ChannelMan
18351845
return;
18361846
};
18371847

1838-
let _ = handle_error!(self, err, their_node_id, channel_state_lock);
1848+
mem::drop(channel_state_lock);
1849+
let _ = handle_error!(self, err, their_node_id);
18391850
}
18401851

18411852
/// Gets the node_id held by this ChannelManager
@@ -2579,9 +2590,9 @@ impl<ChanSigner: ChannelKeys, M: Deref, T: Deref, K: Deref, F: Deref> ChannelMan
25792590
#[doc(hidden)]
25802591
pub fn update_fee(&self, channel_id: [u8;32], feerate_per_kw: u64) -> Result<(), APIError> {
25812592
let _ = self.total_consistency_lock.read().unwrap();
2582-
let mut channel_state_lock = self.channel_state.lock().unwrap();
25832593
let their_node_id;
25842594
let err: Result<(), _> = loop {
2595+
let mut channel_state_lock = self.channel_state.lock().unwrap();
25852596
let channel_state = &mut *channel_state_lock;
25862597

25872598
match channel_state.by_id.entry(channel_id) {
@@ -2620,7 +2631,7 @@ impl<ChanSigner: ChannelKeys, M: Deref, T: Deref, K: Deref, F: Deref> ChannelMan
26202631
return Ok(())
26212632
};
26222633

2623-
match handle_error!(self, err, their_node_id, channel_state_lock) {
2634+
match handle_error!(self, err, their_node_id) {
26242635
Ok(_) => unreachable!(),
26252636
Err(e) => { Err(APIError::APIMisuseError { err: e.err })}
26262637
}
@@ -2830,146 +2841,82 @@ impl<ChanSigner: ChannelKeys, M: Deref + Sync + Send, T: Deref + Sync + Send, K:
28302841
{
28312842
fn handle_open_channel(&self, their_node_id: &PublicKey, their_features: InitFeatures, msg: &msgs::OpenChannel) {
28322843
let _ = self.total_consistency_lock.read().unwrap();
2833-
let res = self.internal_open_channel(their_node_id, their_features, msg);
2834-
if res.is_err() {
2835-
let mut channel_state_lock = self.channel_state.lock().unwrap();
2836-
let _ = handle_error!(self, res, *their_node_id, channel_state_lock);
2837-
}
2844+
let _ = handle_error!(self, self.internal_open_channel(their_node_id, their_features, msg), *their_node_id);
28382845
}
28392846

28402847
fn handle_accept_channel(&self, their_node_id: &PublicKey, their_features: InitFeatures, msg: &msgs::AcceptChannel) {
28412848
let _ = self.total_consistency_lock.read().unwrap();
2842-
let res = self.internal_accept_channel(their_node_id, their_features, msg);
2843-
if res.is_err() {
2844-
let mut channel_state_lock = self.channel_state.lock().unwrap();
2845-
let _ = handle_error!(self, res, *their_node_id, channel_state_lock);
2846-
}
2849+
let _ = handle_error!(self, self.internal_accept_channel(their_node_id, their_features, msg), *their_node_id);
28472850
}
28482851

28492852
fn handle_funding_created(&self, their_node_id: &PublicKey, msg: &msgs::FundingCreated) {
28502853
let _ = self.total_consistency_lock.read().unwrap();
2851-
let res = self.internal_funding_created(their_node_id, msg);
2852-
if res.is_err() {
2853-
let mut channel_state_lock = self.channel_state.lock().unwrap();
2854-
let _ = handle_error!(self, res, *their_node_id, channel_state_lock);
2855-
}
2854+
let _ = handle_error!(self, self.internal_funding_created(their_node_id, msg), *their_node_id);
28562855
}
28572856

28582857
fn handle_funding_signed(&self, their_node_id: &PublicKey, msg: &msgs::FundingSigned) {
28592858
let _ = self.total_consistency_lock.read().unwrap();
2860-
let res = self.internal_funding_signed(their_node_id, msg);
2861-
if res.is_err() {
2862-
let mut channel_state_lock = self.channel_state.lock().unwrap();
2863-
let _ = handle_error!(self, res, *their_node_id, channel_state_lock);
2864-
}
2859+
let _ = handle_error!(self, self.internal_funding_signed(their_node_id, msg), *their_node_id);
28652860
}
28662861

28672862
fn handle_funding_locked(&self, their_node_id: &PublicKey, msg: &msgs::FundingLocked) {
28682863
let _ = self.total_consistency_lock.read().unwrap();
2869-
let res = self.internal_funding_locked(their_node_id, msg);
2870-
if res.is_err() {
2871-
let mut channel_state_lock = self.channel_state.lock().unwrap();
2872-
let _ = handle_error!(self, res, *their_node_id, channel_state_lock);
2873-
}
2864+
let _ = handle_error!(self, self.internal_funding_locked(their_node_id, msg), *their_node_id);
28742865
}
28752866

28762867
fn handle_shutdown(&self, their_node_id: &PublicKey, msg: &msgs::Shutdown) {
28772868
let _ = self.total_consistency_lock.read().unwrap();
2878-
let res = self.internal_shutdown(their_node_id, msg);
2879-
if res.is_err() {
2880-
let mut channel_state_lock = self.channel_state.lock().unwrap();
2881-
let _ = handle_error!(self, res, *their_node_id, channel_state_lock);
2882-
}
2869+
let _ = handle_error!(self, self.internal_shutdown(their_node_id, msg), *their_node_id);
28832870
}
28842871

28852872
fn handle_closing_signed(&self, their_node_id: &PublicKey, msg: &msgs::ClosingSigned) {
28862873
let _ = self.total_consistency_lock.read().unwrap();
2887-
let res = self.internal_closing_signed(their_node_id, msg);
2888-
if res.is_err() {
2889-
let mut channel_state_lock = self.channel_state.lock().unwrap();
2890-
let _ = handle_error!(self, res, *their_node_id, channel_state_lock);
2891-
}
2874+
let _ = handle_error!(self, self.internal_closing_signed(their_node_id, msg), *their_node_id);
28922875
}
28932876

28942877
fn handle_update_add_htlc(&self, their_node_id: &PublicKey, msg: &msgs::UpdateAddHTLC) {
28952878
let _ = self.total_consistency_lock.read().unwrap();
2896-
let res = self.internal_update_add_htlc(their_node_id, msg);
2897-
if res.is_err() {
2898-
let mut channel_state_lock = self.channel_state.lock().unwrap();
2899-
let _ = handle_error!(self, res, *their_node_id, channel_state_lock);
2900-
}
2879+
let _ = handle_error!(self, self.internal_update_add_htlc(their_node_id, msg), *their_node_id);
29012880
}
29022881

29032882
fn handle_update_fulfill_htlc(&self, their_node_id: &PublicKey, msg: &msgs::UpdateFulfillHTLC) {
29042883
let _ = self.total_consistency_lock.read().unwrap();
2905-
let res = self.internal_update_fulfill_htlc(their_node_id, msg);
2906-
if res.is_err() {
2907-
let mut channel_state_lock = self.channel_state.lock().unwrap();
2908-
let _ = handle_error!(self, res, *their_node_id, channel_state_lock);
2909-
}
2884+
let _ = handle_error!(self, self.internal_update_fulfill_htlc(their_node_id, msg), *their_node_id);
29102885
}
29112886

29122887
fn handle_update_fail_htlc(&self, their_node_id: &PublicKey, msg: &msgs::UpdateFailHTLC) {
29132888
let _ = self.total_consistency_lock.read().unwrap();
2914-
let res = self.internal_update_fail_htlc(their_node_id, msg);
2915-
if res.is_err() {
2916-
let mut channel_state_lock = self.channel_state.lock().unwrap();
2917-
let _ = handle_error!(self, res, *their_node_id, channel_state_lock);
2918-
}
2889+
let _ = handle_error!(self, self.internal_update_fail_htlc(their_node_id, msg), *their_node_id);
29192890
}
29202891

29212892
fn handle_update_fail_malformed_htlc(&self, their_node_id: &PublicKey, msg: &msgs::UpdateFailMalformedHTLC) {
29222893
let _ = self.total_consistency_lock.read().unwrap();
2923-
let res = self.internal_update_fail_malformed_htlc(their_node_id, msg);
2924-
if res.is_err() {
2925-
let mut channel_state_lock = self.channel_state.lock().unwrap();
2926-
let _ = handle_error!(self, res, *their_node_id, channel_state_lock);
2927-
}
2894+
let _ = handle_error!(self, self.internal_update_fail_malformed_htlc(their_node_id, msg), *their_node_id);
29282895
}
29292896

29302897
fn handle_commitment_signed(&self, their_node_id: &PublicKey, msg: &msgs::CommitmentSigned) {
29312898
let _ = self.total_consistency_lock.read().unwrap();
2932-
let res = self.internal_commitment_signed(their_node_id, msg);
2933-
if res.is_err() {
2934-
let mut channel_state_lock = self.channel_state.lock().unwrap();
2935-
let _ = handle_error!(self, res, *their_node_id, channel_state_lock);
2936-
}
2899+
let _ = handle_error!(self, self.internal_commitment_signed(their_node_id, msg), *their_node_id);
29372900
}
29382901

29392902
fn handle_revoke_and_ack(&self, their_node_id: &PublicKey, msg: &msgs::RevokeAndACK) {
29402903
let _ = self.total_consistency_lock.read().unwrap();
2941-
let res = self.internal_revoke_and_ack(their_node_id, msg);
2942-
if res.is_err() {
2943-
let mut channel_state_lock = self.channel_state.lock().unwrap();
2944-
let _ = handle_error!(self, res, *their_node_id, channel_state_lock);
2945-
}
2904+
let _ = handle_error!(self, self.internal_revoke_and_ack(their_node_id, msg), *their_node_id);
29462905
}
29472906

29482907
fn handle_update_fee(&self, their_node_id: &PublicKey, msg: &msgs::UpdateFee) {
29492908
let _ = self.total_consistency_lock.read().unwrap();
2950-
let res = self.internal_update_fee(their_node_id, msg);
2951-
if res.is_err() {
2952-
let mut channel_state_lock = self.channel_state.lock().unwrap();
2953-
let _ = handle_error!(self, res, *their_node_id, channel_state_lock);
2954-
}
2909+
let _ = handle_error!(self, self.internal_update_fee(their_node_id, msg), *their_node_id);
29552910
}
29562911

29572912
fn handle_announcement_signatures(&self, their_node_id: &PublicKey, msg: &msgs::AnnouncementSignatures) {
29582913
let _ = self.total_consistency_lock.read().unwrap();
2959-
let res = self.internal_announcement_signatures(their_node_id, msg);
2960-
if res.is_err() {
2961-
let mut channel_state_lock = self.channel_state.lock().unwrap();
2962-
let _ = handle_error!(self, res, *their_node_id, channel_state_lock);
2963-
}
2914+
let _ = handle_error!(self, self.internal_announcement_signatures(their_node_id, msg), *their_node_id);
29642915
}
29652916

29662917
fn handle_channel_reestablish(&self, their_node_id: &PublicKey, msg: &msgs::ChannelReestablish) {
29672918
let _ = self.total_consistency_lock.read().unwrap();
2968-
let res = self.internal_channel_reestablish(their_node_id, msg);
2969-
if res.is_err() {
2970-
let mut channel_state_lock = self.channel_state.lock().unwrap();
2971-
let _ = handle_error!(self, res, *their_node_id, channel_state_lock);
2972-
}
2919+
let _ = handle_error!(self, self.internal_channel_reestablish(their_node_id, msg), *their_node_id);
29732920
}
29742921

29752922
fn peer_disconnected(&self, their_node_id: &PublicKey, no_connection_possible: bool) {

lightning/src/ln/functional_tests.rs

+70
Original file line numberDiff line numberDiff line change
@@ -2988,6 +2988,76 @@ fn test_commitment_revoked_fail_backward_exhaustive_b() {
29882988
do_test_commitment_revoked_fail_backward_exhaustive(true, false, true);
29892989
}
29902990

2991+
#[test]
2992+
fn fail_backward_pending_htlc_upon_channel_failure() {
2993+
let chanmon_cfgs = create_chanmon_cfgs(2);
2994+
let node_cfgs = create_node_cfgs(2, &chanmon_cfgs);
2995+
let node_chanmgrs = create_node_chanmgrs(2, &node_cfgs, &[None, None]);
2996+
let mut nodes = create_network(2, &node_cfgs, &node_chanmgrs);
2997+
let chan = create_announced_chan_between_nodes_with_value(&nodes, 0, 1, 1_000_000, 500_000_000, InitFeatures::supported(), InitFeatures::supported());
2998+
2999+
// Alice -> Bob: Route a payment but without Bob sending revoke_and_ack.
3000+
{
3001+
let (_, payment_hash) = get_payment_preimage_hash!(nodes[0]);
3002+
let route = nodes[0].router.get_route(&nodes[1].node.get_our_node_id(), None, &Vec::new(), 50_000, TEST_FINAL_CLTV).unwrap();
3003+
nodes[0].node.send_payment(route, payment_hash).unwrap();
3004+
check_added_monitors!(nodes[0], 1);
3005+
3006+
let payment_event = {
3007+
let mut events = nodes[0].node.get_and_clear_pending_msg_events();
3008+
assert_eq!(events.len(), 1);
3009+
SendEvent::from_event(events.remove(0))
3010+
};
3011+
assert_eq!(payment_event.node_id, nodes[1].node.get_our_node_id());
3012+
assert_eq!(payment_event.msgs.len(), 1);
3013+
}
3014+
3015+
// Alice -> Bob: Route another payment but now Alice waits for Bob's earlier revoke_and_ack.
3016+
let (_, failed_payment_hash) = get_payment_preimage_hash!(nodes[0]);
3017+
{
3018+
let route = nodes[0].router.get_route(&nodes[1].node.get_our_node_id(), None, &Vec::new(), 50_000, TEST_FINAL_CLTV).unwrap();
3019+
nodes[0].node.send_payment(route, failed_payment_hash).unwrap();
3020+
check_added_monitors!(nodes[0], 0);
3021+
3022+
assert!(nodes[0].node.get_and_clear_pending_msg_events().is_empty());
3023+
}
3024+
3025+
// Alice <- Bob: Send a malformed update_add_htlc so Alice fails the channel.
3026+
{
3027+
let route = nodes[1].router.get_route(&nodes[0].node.get_our_node_id(), None, &Vec::new(), 50_000, TEST_FINAL_CLTV).unwrap();
3028+
let (_, payment_hash) = get_payment_preimage_hash!(nodes[1]);
3029+
3030+
let secp_ctx = Secp256k1::new();
3031+
let session_priv = {
3032+
let mut session_key = [0; 32];
3033+
let mut rng = thread_rng();
3034+
rng.fill_bytes(&mut session_key);
3035+
SecretKey::from_slice(&session_key).expect("RNG is bad!")
3036+
};
3037+
3038+
let current_height = nodes[1].node.latest_block_height.load(Ordering::Acquire) as u32 + 1;
3039+
let (onion_payloads, _amount_msat, cltv_expiry) = onion_utils::build_onion_payloads(&route, current_height).unwrap();
3040+
let onion_keys = onion_utils::construct_onion_keys(&secp_ctx, &route, &session_priv).unwrap();
3041+
let onion_routing_packet = onion_utils::construct_onion_packet(onion_payloads, onion_keys, [0; 32], &payment_hash);
3042+
3043+
// Send a 0-msat update_add_htlc to fail the channel.
3044+
let update_add_htlc = msgs::UpdateAddHTLC {
3045+
channel_id: chan.2,
3046+
htlc_id: 0,
3047+
amount_msat: 0,
3048+
payment_hash,
3049+
cltv_expiry,
3050+
onion_routing_packet,
3051+
};
3052+
nodes[0].node.handle_update_add_htlc(&nodes[1].node.get_our_node_id(), &update_add_htlc);
3053+
}
3054+
3055+
// Check that Alice fails backward the pending HTLC from the second payment.
3056+
expect_payment_failed!(nodes[0], failed_payment_hash, true);
3057+
check_closed_broadcast!(nodes[0], true);
3058+
check_added_monitors!(nodes[0], 1);
3059+
}
3060+
29913061
#[test]
29923062
fn test_htlc_ignore_latest_remote_commitment() {
29933063
// Test that HTLC transactions spending the latest remote commitment transaction are simply

0 commit comments

Comments
 (0)