Skip to content

Commit 72aa9bd

Browse files
Add an integration test for acquireShard (#3678)
1 parent bece27d commit 72aa9bd

11 files changed

+579
-27
lines changed

common/config/config.go

+72
Original file line numberDiff line numberDiff line change
@@ -251,7 +251,70 @@ type (
251251
}
252252

253253
FaultInjection struct {
254+
// Rate is the probability that we will return an error from any call to any datastore.
255+
// The value should be between 0.0 and 1.0.
256+
// The fault injector will inject different errors depending on the data store and method. See the
257+
// implementation for details.
258+
// This field is ignored if Targets is non-empty.
254259
Rate float64 `yaml:"rate"`
260+
261+
// Targets is a mapping of data store name to a targeted fault injection config for that data store.
262+
// If Targets is non-empty, then Rate is ignored.
263+
// Here is an example config for targeted fault injection. This config will inject errors into the
264+
// UpdateShard method of the ShardStore at a rate of 100%. No other methods will be affected.
265+
/*
266+
targets:
267+
dataStores:
268+
ShardStore:
269+
methods:
270+
UpdateShard:
271+
seed: 42
272+
errors:
273+
ShardOwnershipLostError: 1.0 # all UpdateShard calls will fail with ShardOwnershipLostError
274+
*/
275+
// This will cause the UpdateShard method of the ShardStore to always return ShardOwnershipLostError.
276+
Targets FaultInjectionTargets `yaml:"targets"`
277+
}
278+
279+
// FaultInjectionTargets is the set of targets for fault injection. A target is a method of a data store.
280+
FaultInjectionTargets struct {
281+
// DataStores is a map of datastore name to fault injection config.
282+
// Use this to configure fault injection for specific datastores. The key is the name of the datastore,
283+
// e.g. "ShardStore". See DataStoreName for the list of valid datastore names.
284+
DataStores map[DataStoreName]FaultInjectionDataStoreConfig `yaml:"dataStores"`
285+
}
286+
287+
// DataStoreName is the name of a datastore, e.g. "ShardStore". The full list is defined later in this file.
288+
DataStoreName string
289+
290+
// FaultInjectionDataStoreConfig is the fault injection config for a single datastore, e.g., the ShardStore.
291+
FaultInjectionDataStoreConfig struct {
292+
// Methods is a map of data store method name to a fault injection config for that method.
293+
// We create an error generator that infers the method name from the call stack using reflection.
294+
// For example, if a test with targeted fault injection enabled calls ShardStore.UpdateShard, then
295+
// we fetch the error generator from this map using the key "UpdateShard".
296+
// The key is the name of the method to inject faults for.
297+
// The value is the config for that method.
298+
Methods map[string]FaultInjectionMethodConfig `yaml:"methods"`
299+
}
300+
301+
// FaultInjectionMethodConfig is the fault injection config for a single method of a data store.
302+
FaultInjectionMethodConfig struct {
303+
// Errors is a map of error type to probability of returning that error.
304+
// For example: `ShardOwnershipLostError: 0.1` will cause the method to return a ShardOwnershipLostError 10% of
305+
// the time.
306+
// The other 90% of the time, the method will call the underlying datastore.
307+
// If there are multiple errors for a method, the probability of each error is independent of the others.
308+
// For example, if there are two errors with probabilities 0.1 and 0.2, then the first error will be returned
309+
// 10% of the time, the second error will be returned 20% of the time,
310+
// and the underlying method will be called 70% of the time.
311+
Errors map[string]float64 `yaml:"errors"`
312+
313+
// Seed is the seed for the random number generator used to sample faults from the Errors map. You can use this
314+
// to make the fault injection deterministic.
315+
// If the test config does not set this to a non-zero number, the fault injector will set it to the current time
316+
// in nanoseconds.
317+
Seed int64 `yaml:"seed"`
255318
}
256319

257320
// Cassandra contains configuration to connect to Cassandra cluster
@@ -479,6 +542,15 @@ type (
479542
// @@@SNIPEND
480543
)
481544

545+
const (
546+
ShardStoreName DataStoreName = "ShardStore"
547+
TaskStoreName DataStoreName = "TaskStore"
548+
MetadataStoreName DataStoreName = "MetadataStore"
549+
ExecutionStoreName DataStoreName = "ExecutionStore"
550+
QueueName DataStoreName = "Queue"
551+
ClusterMDStoreName DataStoreName = "ClusterMDStore"
552+
)
553+
482554
// Validate validates this config
483555
func (c *Config) Validate() error {
484556
if err := c.Persistence.Validate(); err != nil {

common/persistence/client/fault_injection.go

+60-20
Original file line numberDiff line numberDiff line change
@@ -141,16 +141,22 @@ func (d *FaultInjectionDataStoreFactory) UpdateRate(rate float64) {
141141
d.Queue.UpdateRate(rate)
142142
d.ClusterMDStore.UpdateRate(rate)
143143
}
144-
145144
func (d *FaultInjectionDataStoreFactory) NewTaskStore() (persistence.TaskStore, error) {
146145
if d.TaskStore == nil {
147146
baseFactory, err := d.baseFactory.NewTaskStore()
148147
if err != nil {
149148
return nil, err
150149
}
151-
d.TaskStore, err = NewFaultInjectionTaskStore(d.ErrorGenerator.Rate(), baseFactory)
152-
if err != nil {
153-
return nil, err
150+
if storeConfig, ok := d.config.Targets.DataStores[config.TaskStoreName]; ok {
151+
d.TaskStore = &FaultInjectionTaskStore{
152+
baseTaskStore: baseFactory,
153+
ErrorGenerator: NewTargetedDataStoreErrorGenerator(&storeConfig),
154+
}
155+
} else {
156+
d.TaskStore, err = NewFaultInjectionTaskStore(d.ErrorGenerator.Rate(), baseFactory)
157+
if err != nil {
158+
return nil, err
159+
}
154160
}
155161
}
156162
return d.TaskStore, nil
@@ -162,9 +168,16 @@ func (d *FaultInjectionDataStoreFactory) NewShardStore() (persistence.ShardStore
162168
if err != nil {
163169
return nil, err
164170
}
165-
d.ShardStore, err = NewFaultInjectionShardStore(d.ErrorGenerator.Rate(), baseFactory)
166-
if err != nil {
167-
return nil, err
171+
if storeConfig, ok := d.config.Targets.DataStores[config.ShardStoreName]; ok {
172+
d.ShardStore = &FaultInjectionShardStore{
173+
baseShardStore: baseFactory,
174+
ErrorGenerator: NewTargetedDataStoreErrorGenerator(&storeConfig),
175+
}
176+
} else {
177+
d.ShardStore, err = NewFaultInjectionShardStore(d.ErrorGenerator.Rate(), baseFactory)
178+
if err != nil {
179+
return nil, err
180+
}
168181
}
169182
}
170183
return d.ShardStore, nil
@@ -176,9 +189,16 @@ func (d *FaultInjectionDataStoreFactory) NewMetadataStore() (persistence.Metadat
176189
if err != nil {
177190
return nil, err
178191
}
179-
d.MetadataStore, err = NewFaultInjectionMetadataStore(d.ErrorGenerator.Rate(), baseStore)
180-
if err != nil {
181-
return nil, err
192+
if storeConfig, ok := d.config.Targets.DataStores[config.MetadataStoreName]; ok {
193+
d.MetadataStore = &FaultInjectionMetadataStore{
194+
baseMetadataStore: baseStore,
195+
ErrorGenerator: NewTargetedDataStoreErrorGenerator(&storeConfig),
196+
}
197+
} else {
198+
d.MetadataStore, err = NewFaultInjectionMetadataStore(d.ErrorGenerator.Rate(), baseStore)
199+
if err != nil {
200+
return nil, err
201+
}
182202
}
183203
}
184204
return d.MetadataStore, nil
@@ -190,9 +210,16 @@ func (d *FaultInjectionDataStoreFactory) NewExecutionStore() (persistence.Execut
190210
if err != nil {
191211
return nil, err
192212
}
193-
d.ExecutionStore, err = NewFaultInjectionExecutionStore(d.ErrorGenerator.Rate(), baseStore)
194-
if err != nil {
195-
return nil, err
213+
if storeConfig, ok := d.config.Targets.DataStores[config.ExecutionStoreName]; ok {
214+
d.ExecutionStore = &FaultInjectionExecutionStore{
215+
baseExecutionStore: baseStore,
216+
ErrorGenerator: NewTargetedDataStoreErrorGenerator(&storeConfig),
217+
}
218+
} else {
219+
d.ExecutionStore, err = NewFaultInjectionExecutionStore(d.ErrorGenerator.Rate(), baseStore)
220+
if err != nil {
221+
return nil, err
222+
}
196223
}
197224

198225
}
@@ -205,11 +232,17 @@ func (d *FaultInjectionDataStoreFactory) NewQueue(queueType persistence.QueueTyp
205232
if err != nil {
206233
return baseQueue, err
207234
}
208-
d.Queue, err = NewFaultInjectionQueue(d.ErrorGenerator.Rate(), baseQueue)
209-
if err != nil {
210-
return nil, err
235+
if storeConfig, ok := d.config.Targets.DataStores[config.QueueName]; ok {
236+
d.Queue = &FaultInjectionQueue{
237+
baseQueue: baseQueue,
238+
ErrorGenerator: NewTargetedDataStoreErrorGenerator(&storeConfig),
239+
}
240+
} else {
241+
d.Queue, err = NewFaultInjectionQueue(d.ErrorGenerator.Rate(), baseQueue)
242+
if err != nil {
243+
return nil, err
244+
}
211245
}
212-
213246
}
214247
return d.Queue, nil
215248
}
@@ -220,9 +253,16 @@ func (d *FaultInjectionDataStoreFactory) NewClusterMetadataStore() (persistence.
220253
if err != nil {
221254
return nil, err
222255
}
223-
d.ClusterMDStore, err = NewFaultInjectionClusterMetadataStore(d.ErrorGenerator.Rate(), baseStore)
224-
if err != nil {
225-
return nil, err
256+
if storeConfig, ok := d.config.Targets.DataStores[config.ClusterMDStoreName]; ok {
257+
d.ClusterMDStore = &FaultInjectionClusterMetadataStore{
258+
baseCMStore: baseStore,
259+
ErrorGenerator: NewTargetedDataStoreErrorGenerator(&storeConfig),
260+
}
261+
} else {
262+
d.ClusterMDStore, err = NewFaultInjectionClusterMetadataStore(d.ErrorGenerator.Rate(), baseStore)
263+
if err != nil {
264+
return nil, err
265+
}
226266
}
227267

228268
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,127 @@
1+
// The MIT License
2+
//
3+
// Copyright (c) 2020 Temporal Technologies Inc. All rights reserved.
4+
//
5+
// Copyright (c) 2020 Uber Technologies, Inc.
6+
//
7+
// Permission is hereby granted, free of charge, to any person obtaining a copy
8+
// of this software and associated documentation files (the "Software"), to deal
9+
// in the Software without restriction, including without limitation the rights
10+
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
11+
// copies of the Software, and to permit persons to whom the Software is
12+
// furnished to do so, subject to the following conditions:
13+
//
14+
// The above copyright notice and this permission notice shall be included in
15+
// all copies or substantial portions of the Software.
16+
//
17+
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18+
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19+
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
20+
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21+
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22+
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
23+
// THE SOFTWARE.
24+
25+
package client
26+
27+
import (
28+
"context"
29+
"fmt"
30+
"math/rand"
31+
"runtime"
32+
"strings"
33+
"time"
34+
35+
"go.temporal.io/server/common/config"
36+
"go.temporal.io/server/common/persistence"
37+
)
38+
39+
// NewTargetedDataStoreErrorGenerator returns a new instance of a data store error generator that will inject errors
40+
// into the persistence layer based on the provided configuration.
41+
func NewTargetedDataStoreErrorGenerator(cfg *config.FaultInjectionDataStoreConfig) ErrorGenerator {
42+
methods := make(map[string]ErrorGenerator, len(cfg.Methods))
43+
for methodName, methodConfig := range cfg.Methods {
44+
var faultWeights []FaultWeight
45+
methodErrorRate := 0.0
46+
for errorName, errorRate := range methodConfig.Errors {
47+
err := getErrorFromName(errorName)
48+
faultWeights = append(faultWeights, FaultWeight{
49+
errFactory: func(data string) error {
50+
return err
51+
},
52+
weight: errorRate,
53+
})
54+
methodErrorRate += errorRate
55+
}
56+
errorGenerator := NewDefaultErrorGenerator(methodErrorRate, faultWeights)
57+
seed := methodConfig.Seed
58+
if seed == 0 {
59+
seed = time.Now().UnixNano()
60+
}
61+
errorGenerator.r = rand.New(rand.NewSource(seed))
62+
methods[methodName] = errorGenerator
63+
}
64+
return &dataStoreErrorGenerator{MethodErrorGenerators: methods}
65+
}
66+
67+
// dataStoreErrorGenerator is an implementation of ErrorGenerator that will inject errors into the persistence layer
68+
// using a per-method configuration.
69+
type dataStoreErrorGenerator struct {
70+
MethodErrorGenerators map[string]ErrorGenerator
71+
}
72+
73+
// Generate returns an error from the configured error types and rates for this method.
74+
// This method infers the fault injection target's method name from the function name of the caller.
75+
// As a result, this method should only be called from the persistence layer.
76+
// This method will panic if the method name cannot be inferred.
77+
// If no errors are configured for the method, or if there are some errors configured for this method,
78+
// but no error is sampled, then this method returns nil.
79+
// When this method returns nil, this causes the persistence layer to use the real implementation.
80+
func (d *dataStoreErrorGenerator) Generate() error {
81+
pc, _, _, ok := runtime.Caller(1)
82+
if !ok {
83+
panic("failed to get caller info")
84+
}
85+
runtimeFunc := runtime.FuncForPC(pc)
86+
if runtimeFunc == nil {
87+
panic("failed to get runtime function")
88+
}
89+
parts := strings.Split(runtimeFunc.Name(), ".")
90+
methodName := parts[len(parts)-1]
91+
methodErrorGenerator, ok := d.MethodErrorGenerators[methodName]
92+
if !ok {
93+
return nil
94+
}
95+
err := methodErrorGenerator.Generate()
96+
return err
97+
}
98+
99+
// getErrorFromName returns an error based on the provided name. If the name is not recognized, then this method will
100+
// panic.
101+
func getErrorFromName(name string) error {
102+
switch name {
103+
case "ShardOwnershipLostError":
104+
return &persistence.ShardOwnershipLostError{}
105+
case "DeadlineExceededError":
106+
return context.DeadlineExceeded
107+
default:
108+
panic(fmt.Sprintf("unknown error type: %v", name))
109+
}
110+
}
111+
112+
// UpdateRate should not be called for the data store error generator since the rate is defined on a per-method basis.
113+
func (d *dataStoreErrorGenerator) UpdateRate(rate float64) {
114+
panic("UpdateRate not supported for data store error generators")
115+
}
116+
117+
// UpdateWeights should not be called for the data store error generator since the weights are defined on a per-method
118+
// basis.
119+
func (d *dataStoreErrorGenerator) UpdateWeights(weights []FaultWeight) {
120+
panic("UpdateWeights not supported for data store error generators")
121+
}
122+
123+
// Rate should not be called for the data store error generator since there is no global rate for the data store, only
124+
// per-method rates.
125+
func (d *dataStoreErrorGenerator) Rate() float64 {
126+
panic("Rate not supported for data store error generators")
127+
}

common/persistence/sql/sqlPersistenceTest.go

+1-1
Original file line numberDiff line numberDiff line change
@@ -113,7 +113,7 @@ func (s *TestCluster) Config() config.Persistence {
113113
DefaultStore: "test",
114114
VisibilityStore: "test",
115115
DataStores: map[string]config.DataStore{
116-
"test": {SQL: &cfg},
116+
"test": {SQL: &cfg, FaultInjection: s.faultInjection},
117117
},
118118
TransactionSizeLimit: dynamicconfig.GetIntPropertyFn(common.DefaultTransactionSizeLimit),
119119
}

0 commit comments

Comments
 (0)