Skip to content

Commit ad6931c

Browse files
Backport of fix(txn): validate verbs into release/1.19.x (#21520)
* backport of commit 5807c2c * backport of commit 8ef60b8 --------- Co-authored-by: DanStough <dan.stough@hashicorp.com>
1 parent 3c7555c commit ad6931c

File tree

6 files changed

+350
-30
lines changed

6 files changed

+350
-30
lines changed

.changelog/21519.txt

+3
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
```release-note:bug
2+
txn: Fix a bug where mismatched Consul server versions could result in undetected data loss for when using newer Transaction verbs.
3+
```

agent/consul/kvs_endpoint.go

+5-2
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,8 @@ func kvsPreApply(logger hclog.Logger, srv *Server, authz resolver.Result, op api
4242
return false, fmt.Errorf("Must provide key")
4343
}
4444

45-
// Apply the ACL policy if any.
45+
// Apply the ACL policy if any, and validate operation.
46+
// enumcover:api.KVOp
4647
switch op {
4748
case api.KVDeleteTree:
4849
var authzContext acl.AuthorizerContext
@@ -66,13 +67,15 @@ func kvsPreApply(logger hclog.Logger, srv *Server, authz resolver.Result, op api
6667
return false, err
6768
}
6869

69-
default:
70+
case api.KVCheckNotExists, api.KVUnlock, api.KVLock, api.KVCAS, api.KVDeleteCAS, api.KVDelete, api.KVSet:
7071
var authzContext acl.AuthorizerContext
7172
dirEnt.FillAuthzContext(&authzContext)
7273

7374
if err := authz.ToAllowAuthorizer().KeyWriteAllowed(dirEnt.Key, &authzContext); err != nil {
7475
return false, err
7576
}
77+
default:
78+
return false, fmt.Errorf("unknown KV operation: %s", op)
7679
}
7780

7881
// If this is a lock, we must check for a lock-delay. Since lock-delay

agent/consul/state/txn.go

+43-13
Original file line numberDiff line numberDiff line change
@@ -4,17 +4,31 @@
44
package state
55

66
import (
7+
"errors"
78
"fmt"
89

910
"github.com/hashicorp/consul/agent/structs"
1011
"github.com/hashicorp/consul/api"
1112
)
1213

14+
type UnsupportedFSMApplyPanicError struct {
15+
Wrapped error
16+
}
17+
18+
func (e *UnsupportedFSMApplyPanicError) Unwrap() error {
19+
return e.Wrapped
20+
}
21+
22+
func (e *UnsupportedFSMApplyPanicError) Error() string {
23+
return e.Wrapped.Error()
24+
}
25+
1326
// txnKVS handles all KV-related operations.
1427
func (s *Store) txnKVS(tx WriteTxn, idx uint64, op *structs.TxnKVOp) (structs.TxnResults, error) {
1528
var entry *structs.DirEntry
1629
var err error
1730

31+
// enumcover: api.KVOp
1832
switch op.Verb {
1933
case api.KVSet:
2034
entry = &op.DirEnt
@@ -95,7 +109,7 @@ func (s *Store) txnKVS(tx WriteTxn, idx uint64, op *structs.TxnKVOp) (structs.Tx
95109
}
96110

97111
default:
98-
err = fmt.Errorf("unknown KV verb %q", op.Verb)
112+
err = &UnsupportedFSMApplyPanicError{fmt.Errorf("unknown KV verb %q", op.Verb)}
99113
}
100114
if err != nil {
101115
return nil, err
@@ -123,11 +137,12 @@ func (s *Store) txnKVS(tx WriteTxn, idx uint64, op *structs.TxnKVOp) (structs.Tx
123137
func txnSession(tx WriteTxn, idx uint64, op *structs.TxnSessionOp) error {
124138
var err error
125139

140+
// enumcover: api.SessionOp
126141
switch op.Verb {
127142
case api.SessionDelete:
128143
err = sessionDeleteWithSession(tx, &op.Session, idx)
129144
default:
130-
err = fmt.Errorf("unknown Session verb %q", op.Verb)
145+
return &UnsupportedFSMApplyPanicError{fmt.Errorf("unknown session verb %q", op.Verb)}
131146
}
132147
if err != nil {
133148
return fmt.Errorf("failed to delete session: %v", err)
@@ -146,11 +161,17 @@ func txnLegacyIntention(tx WriteTxn, idx uint64, op *structs.TxnIntentionOp) err
146161
case structs.IntentionOpDelete:
147162
return legacyIntentionDeleteTxn(tx, idx, op.Intention.ID)
148163
case structs.IntentionOpDeleteAll:
149-
fallthrough // deliberately not available via this api
164+
// deliberately not available via this api
165+
return fmt.Errorf("Intention op not supported %q", op.Op)
150166
case structs.IntentionOpUpsert:
151-
fallthrough // deliberately not available via this api
167+
// deliberately not available via this api
168+
return fmt.Errorf("Intention op not supported %q", op.Op)
152169
default:
153-
return fmt.Errorf("unknown Intention op %q", op.Op)
170+
// If we've gotten to this point, the unknown verb has slipped by
171+
// endpoint validation. This means it could be a mismatch in Server versions
172+
// that are sending known verbs as part of Raft logs. We panic rather than silently
173+
// swallowing the error during Raft Apply.
174+
panic(fmt.Sprintf("unknown Intention op %q", op.Op))
154175
}
155176
}
156177

@@ -202,7 +223,7 @@ func (s *Store) txnNode(tx WriteTxn, idx uint64, op *structs.TxnNodeOp) (structs
202223
}
203224

204225
default:
205-
err = fmt.Errorf("unknown Node verb %q", op.Verb)
226+
err = &UnsupportedFSMApplyPanicError{fmt.Errorf("unknown Node verb %q", op.Verb)}
206227
}
207228
if err != nil {
208229
return nil, err
@@ -271,7 +292,7 @@ func (s *Store) txnService(tx WriteTxn, idx uint64, op *structs.TxnServiceOp) (s
271292
return nil, err
272293

273294
default:
274-
return nil, fmt.Errorf("unknown Service verb %q", op.Verb)
295+
return nil, &UnsupportedFSMApplyPanicError{fmt.Errorf("unknown Service verb %q", op.Verb)}
275296
}
276297
}
277298

@@ -326,7 +347,7 @@ func (s *Store) txnCheck(tx WriteTxn, idx uint64, op *structs.TxnCheckOp) (struc
326347
}
327348

328349
default:
329-
err = fmt.Errorf("unknown Check verb %q", op.Verb)
350+
err = &UnsupportedFSMApplyPanicError{fmt.Errorf("unknown check verb %q", op.Verb)}
330351
}
331352
if err != nil {
332353
return nil, err
@@ -352,7 +373,7 @@ func (s *Store) txnCheck(tx WriteTxn, idx uint64, op *structs.TxnCheckOp) (struc
352373
// txnDispatch runs the given operations inside the state store transaction.
353374
func (s *Store) txnDispatch(tx WriteTxn, idx uint64, ops structs.TxnOps) (structs.TxnResults, structs.TxnErrors) {
354375
results := make(structs.TxnResults, 0, len(ops))
355-
errors := make(structs.TxnErrors, 0, len(ops))
376+
errs := make(structs.TxnErrors, 0, len(ops))
356377
for i, op := range ops {
357378
var ret structs.TxnResults
358379
var err error
@@ -374,24 +395,33 @@ func (s *Store) txnDispatch(tx WriteTxn, idx uint64, ops structs.TxnOps) (struct
374395
// compatibility with pre-1.9.0 raft logs and during upgrades.
375396
err = txnLegacyIntention(tx, idx, op.Intention)
376397
default:
377-
err = fmt.Errorf("no operation specified")
398+
panic("no operation specified")
378399
}
379400

380401
// Accumulate the results.
381402
results = append(results, ret...)
382403

404+
var panicErr *UnsupportedFSMApplyPanicError
405+
if errors.As(err, &panicErr) {
406+
// If we've gotten to this point, the unknown verb has slipped by
407+
// endpoint validation. This means it could be a mismatch in Server versions
408+
// that are sending known verbs as part of Raft logs. We panic rather than silently
409+
// swallowing the error during Raft Apply. See NET-9016 for historical context.
410+
panic(panicErr.Wrapped)
411+
}
412+
383413
// Capture any error along with the index of the operation that
384414
// failed.
385415
if err != nil {
386-
errors = append(errors, &structs.TxnError{
416+
errs = append(errs, &structs.TxnError{
387417
OpIndex: i,
388418
What: err.Error(),
389419
})
390420
}
391421
}
392422

393-
if len(errors) > 0 {
394-
return nil, errors
423+
if len(errs) > 0 {
424+
return nil, errs
395425
}
396426

397427
return results, nil

agent/consul/state/txn_test.go

+61-9
Original file line numberDiff line numberDiff line change
@@ -1058,14 +1058,6 @@ func TestStateStore_Txn_KVS_Rollback(t *testing.T) {
10581058
},
10591059
},
10601060
},
1061-
&structs.TxnOp{
1062-
KV: &structs.TxnKVOp{
1063-
Verb: "nope",
1064-
DirEnt: structs.DirEntry{
1065-
Key: "foo/delete",
1066-
},
1067-
},
1068-
},
10691061
}
10701062
results, errors := s.TxnRW(7, ops)
10711063
if len(errors) != len(ops) {
@@ -1086,7 +1078,6 @@ func TestStateStore_Txn_KVS_Rollback(t *testing.T) {
10861078
`key "nope" doesn't exist`,
10871079
"current modify index",
10881080
`key "nope" doesn't exist`,
1089-
"unknown KV verb",
10901081
}
10911082
if len(errors) != len(expected) {
10921083
t.Fatalf("bad len: %d != %d", len(errors), len(expected))
@@ -1415,3 +1406,64 @@ func TestStateStore_Txn_KVS_ModifyIndexes(t *testing.T) {
14151406
}
14161407
}
14171408
}
1409+
1410+
// TestStateStore_UnknownTxnOperationsPanic validates that unknown txn operations panic.
1411+
// If we error in this case this is from an FSM Apply, the state store of this agent could potentially be out of
1412+
// sync with other agents that applied the operation. In the case of responding to a local endpoint, we require
1413+
// that the operation type be validated prior to being sent to the state store.
1414+
// See NET-9016 for historical context.
1415+
func TestStateStore_UnknownTxnOperationsPanic(t *testing.T) {
1416+
s := testStateStore(t)
1417+
1418+
testCases := []structs.TxnOps{
1419+
{
1420+
&structs.TxnOp{
1421+
KV: &structs.TxnKVOp{
1422+
Verb: "sand-the-floor",
1423+
DirEnt: structs.DirEntry{
1424+
Key: "foo/a",
1425+
},
1426+
},
1427+
},
1428+
},
1429+
{
1430+
&structs.TxnOp{
1431+
Node: &structs.TxnNodeOp{
1432+
Verb: "wax-the-car",
1433+
},
1434+
},
1435+
},
1436+
{
1437+
&structs.TxnOp{
1438+
Service: &structs.TxnServiceOp{
1439+
Verb: "paint-the-house",
1440+
},
1441+
},
1442+
},
1443+
{
1444+
&structs.TxnOp{
1445+
Check: &structs.TxnCheckOp{
1446+
Verb: "paint-the-fence",
1447+
},
1448+
},
1449+
},
1450+
{
1451+
&structs.TxnOp{
1452+
Session: &structs.TxnSessionOp{
1453+
Verb: "sweep-the-knee",
1454+
},
1455+
},
1456+
},
1457+
{
1458+
&structs.TxnOp{
1459+
Intention: &structs.TxnIntentionOp{ // nolint:staticcheck // SA1019 intentional use of deprecated field
1460+
Op: "flying-crane-kick",
1461+
},
1462+
},
1463+
},
1464+
}
1465+
1466+
for _, tc := range testCases {
1467+
require.Panics(t, func() { s.TxnRW(3, tc) })
1468+
}
1469+
}

0 commit comments

Comments
 (0)