From 2e498974031ca2d45ee0a4887da4ba83e0b26119 Mon Sep 17 00:00:00 2001 From: Tomoya Oda Date: Tue, 8 Aug 2023 23:51:50 +0000 Subject: [PATCH 1/6] fix(stepfunctions-tasks): FastFile mode for SageMaker Training Job --- .../cdk.out | 2 +- .../integ-stepfunctions-sagemaker.assets.json | 6 +- ...nteg-stepfunctions-sagemaker.template.json | 14 ++--- .../integ.json | 2 +- .../manifest.json | 4 +- .../tree.json | 60 +++++++++---------- .../sagemaker/integ.create-training-job.ts | 5 +- .../lib/sagemaker/base-types.ts | 7 ++- .../sagemaker/create-training-job.test.ts | 4 +- 9 files changed, 55 insertions(+), 49 deletions(-) diff --git a/packages/@aws-cdk-testing/framework-integ/test/aws-stepfunctions-tasks/test/sagemaker/integ.create-training-job.js.snapshot/cdk.out b/packages/@aws-cdk-testing/framework-integ/test/aws-stepfunctions-tasks/test/sagemaker/integ.create-training-job.js.snapshot/cdk.out index ae4b03c54e770..560dae10d018f 100644 --- a/packages/@aws-cdk-testing/framework-integ/test/aws-stepfunctions-tasks/test/sagemaker/integ.create-training-job.js.snapshot/cdk.out +++ b/packages/@aws-cdk-testing/framework-integ/test/aws-stepfunctions-tasks/test/sagemaker/integ.create-training-job.js.snapshot/cdk.out @@ -1 +1 @@ -{"version":"30.0.0"} \ No newline at end of file +{"version":"33.0.0"} \ No newline at end of file diff --git a/packages/@aws-cdk-testing/framework-integ/test/aws-stepfunctions-tasks/test/sagemaker/integ.create-training-job.js.snapshot/integ-stepfunctions-sagemaker.assets.json b/packages/@aws-cdk-testing/framework-integ/test/aws-stepfunctions-tasks/test/sagemaker/integ.create-training-job.js.snapshot/integ-stepfunctions-sagemaker.assets.json index 4eece88e7c92c..348af0fc6edfa 100644 --- a/packages/@aws-cdk-testing/framework-integ/test/aws-stepfunctions-tasks/test/sagemaker/integ.create-training-job.js.snapshot/integ-stepfunctions-sagemaker.assets.json +++ b/packages/@aws-cdk-testing/framework-integ/test/aws-stepfunctions-tasks/test/sagemaker/integ.create-training-job.js.snapshot/integ-stepfunctions-sagemaker.assets.json @@ -1,7 +1,7 @@ { - "version": "30.0.0", + "version": "33.0.0", "files": { - "81a9a9ef07dfc045ad84514bf0771e76c20386202c7c26130f4f954b8411de26": { + "11bc70dd89e4afb387b1b639ea823eb492c0e7a7dd664e50aa483f10ef3e204a": { "source": { "path": "integ-stepfunctions-sagemaker.template.json", "packaging": "file" @@ -9,7 +9,7 @@ "destinations": { "current_account-current_region": { "bucketName": "cdk-hnb659fds-assets-${AWS::AccountId}-${AWS::Region}", - "objectKey": "81a9a9ef07dfc045ad84514bf0771e76c20386202c7c26130f4f954b8411de26.json", + "objectKey": "11bc70dd89e4afb387b1b639ea823eb492c0e7a7dd664e50aa483f10ef3e204a.json", "assumeRoleArn": "arn:${AWS::Partition}:iam::${AWS::AccountId}:role/cdk-hnb659fds-file-publishing-role-${AWS::AccountId}-${AWS::Region}" } } diff --git a/packages/@aws-cdk-testing/framework-integ/test/aws-stepfunctions-tasks/test/sagemaker/integ.create-training-job.js.snapshot/integ-stepfunctions-sagemaker.template.json b/packages/@aws-cdk-testing/framework-integ/test/aws-stepfunctions-tasks/test/sagemaker/integ.create-training-job.js.snapshot/integ-stepfunctions-sagemaker.template.json index 61c634750abc4..c3525778207e9 100644 --- a/packages/@aws-cdk-testing/framework-integ/test/aws-stepfunctions-tasks/test/sagemaker/integ.create-training-job.js.snapshot/integ-stepfunctions-sagemaker.template.json +++ b/packages/@aws-cdk-testing/framework-integ/test/aws-stepfunctions-tasks/test/sagemaker/integ.create-training-job.js.snapshot/integ-stepfunctions-sagemaker.template.json @@ -276,12 +276,6 @@ "StateMachine2E01A3A5": { "Type": "AWS::StepFunctions::StateMachine", "Properties": { - "RoleArn": { - "Fn::GetAtt": [ - "StateMachineRoleB840431D", - "Arn" - ] - }, "DefinitionString": { "Fn::Join": [ "", @@ -297,7 +291,7 @@ "Arn" ] }, - "\",\"AlgorithmSpecification\":{\"TrainingInputMode\":\"File\",\"AlgorithmName\":\"arn:aws:sagemaker:us-east-1:865070037744:algorithm/scikit-decision-trees-15423055-57b73412d2e93e9239e4e16f83298b8f\"},\"InputDataConfig\":[{\"ChannelName\":\"InputData\",\"DataSource\":{\"S3DataSource\":{\"S3Uri\":\"https://s3.", + "\",\"AlgorithmSpecification\":{\"TrainingInputMode\":\"FastFile\",\"AlgorithmName\":\"BlazingText\"},\"InputDataConfig\":[{\"ChannelName\":\"InputData\",\"DataSource\":{\"S3DataSource\":{\"S3Uri\":\"https://s3.", { "Ref": "AWS::Region" }, @@ -324,6 +318,12 @@ "/result/\"},\"ResourceConfig\":{\"InstanceCount\":1,\"InstanceType\":\"ml.m4.xlarge\",\"VolumeSizeInGB\":10},\"StoppingCondition\":{\"MaxRuntimeInSeconds\":3600}}}}}" ] ] + }, + "RoleArn": { + "Fn::GetAtt": [ + "StateMachineRoleB840431D", + "Arn" + ] } }, "DependsOn": [ diff --git a/packages/@aws-cdk-testing/framework-integ/test/aws-stepfunctions-tasks/test/sagemaker/integ.create-training-job.js.snapshot/integ.json b/packages/@aws-cdk-testing/framework-integ/test/aws-stepfunctions-tasks/test/sagemaker/integ.create-training-job.js.snapshot/integ.json index 81307a7853194..77a3a44fc5844 100644 --- a/packages/@aws-cdk-testing/framework-integ/test/aws-stepfunctions-tasks/test/sagemaker/integ.create-training-job.js.snapshot/integ.json +++ b/packages/@aws-cdk-testing/framework-integ/test/aws-stepfunctions-tasks/test/sagemaker/integ.create-training-job.js.snapshot/integ.json @@ -1,5 +1,5 @@ { - "version": "30.0.0", + "version": "33.0.0", "testCases": { "integ.create-training-job": { "stacks": [ diff --git a/packages/@aws-cdk-testing/framework-integ/test/aws-stepfunctions-tasks/test/sagemaker/integ.create-training-job.js.snapshot/manifest.json b/packages/@aws-cdk-testing/framework-integ/test/aws-stepfunctions-tasks/test/sagemaker/integ.create-training-job.js.snapshot/manifest.json index 4f8b732c01a3f..43d36434b4bfc 100644 --- a/packages/@aws-cdk-testing/framework-integ/test/aws-stepfunctions-tasks/test/sagemaker/integ.create-training-job.js.snapshot/manifest.json +++ b/packages/@aws-cdk-testing/framework-integ/test/aws-stepfunctions-tasks/test/sagemaker/integ.create-training-job.js.snapshot/manifest.json @@ -1,5 +1,5 @@ { - "version": "30.0.0", + "version": "33.0.0", "artifacts": { "integ-stepfunctions-sagemaker.assets": { "type": "cdk:asset-manifest", @@ -17,7 +17,7 @@ "validateOnSynth": false, "assumeRoleArn": "arn:${AWS::Partition}:iam::${AWS::AccountId}:role/cdk-hnb659fds-deploy-role-${AWS::AccountId}-${AWS::Region}", "cloudFormationExecutionRoleArn": "arn:${AWS::Partition}:iam::${AWS::AccountId}:role/cdk-hnb659fds-cfn-exec-role-${AWS::AccountId}-${AWS::Region}", - "stackTemplateAssetObjectUrl": "s3://cdk-hnb659fds-assets-${AWS::AccountId}-${AWS::Region}/81a9a9ef07dfc045ad84514bf0771e76c20386202c7c26130f4f954b8411de26.json", + "stackTemplateAssetObjectUrl": "s3://cdk-hnb659fds-assets-${AWS::AccountId}-${AWS::Region}/11bc70dd89e4afb387b1b639ea823eb492c0e7a7dd664e50aa483f10ef3e204a.json", "requiresBootstrapStackVersion": 6, "bootstrapStackVersionSsmParameter": "/cdk-bootstrap/hnb659fds/version", "additionalDependencies": [ diff --git a/packages/@aws-cdk-testing/framework-integ/test/aws-stepfunctions-tasks/test/sagemaker/integ.create-training-job.js.snapshot/tree.json b/packages/@aws-cdk-testing/framework-integ/test/aws-stepfunctions-tasks/test/sagemaker/integ.create-training-job.js.snapshot/tree.json index 99805c2c1e4b0..3f52cb2cf799f 100644 --- a/packages/@aws-cdk-testing/framework-integ/test/aws-stepfunctions-tasks/test/sagemaker/integ.create-training-job.js.snapshot/tree.json +++ b/packages/@aws-cdk-testing/framework-integ/test/aws-stepfunctions-tasks/test/sagemaker/integ.create-training-job.js.snapshot/tree.json @@ -49,13 +49,13 @@ } }, "constructInfo": { - "fqn": "@aws-cdk/aws-kms.CfnKey", + "fqn": "aws-cdk-lib.aws_kms.CfnKey", "version": "0.0.0" } } }, "constructInfo": { - "fqn": "@aws-cdk/aws-kms.Key", + "fqn": "aws-cdk-lib.aws_kms.Key", "version": "0.0.0" } }, @@ -87,13 +87,13 @@ } }, "constructInfo": { - "fqn": "@aws-cdk/aws-s3.CfnBucket", + "fqn": "aws-cdk-lib.aws_s3.CfnBucket", "version": "0.0.0" } } }, "constructInfo": { - "fqn": "@aws-cdk/aws-s3.Bucket", + "fqn": "aws-cdk-lib.aws_s3.Bucket", "version": "0.0.0" } }, @@ -109,7 +109,7 @@ "id": "ImportSagemakerRole", "path": "integ-stepfunctions-sagemaker/TrainTask/SagemakerRole/ImportSagemakerRole", "constructInfo": { - "fqn": "@aws-cdk/core.Resource", + "fqn": "aws-cdk-lib.Resource", "version": "0.0.0" } }, @@ -156,7 +156,7 @@ } }, "constructInfo": { - "fqn": "@aws-cdk/aws-iam.CfnRole", + "fqn": "aws-cdk-lib.aws_iam.CfnRole", "version": "0.0.0" } }, @@ -264,25 +264,25 @@ } }, "constructInfo": { - "fqn": "@aws-cdk/aws-iam.CfnPolicy", + "fqn": "aws-cdk-lib.aws_iam.CfnPolicy", "version": "0.0.0" } } }, "constructInfo": { - "fqn": "@aws-cdk/aws-iam.Policy", + "fqn": "aws-cdk-lib.aws_iam.Policy", "version": "0.0.0" } } }, "constructInfo": { - "fqn": "@aws-cdk/aws-iam.Role", + "fqn": "aws-cdk-lib.aws_iam.Role", "version": "0.0.0" } } }, "constructInfo": { - "fqn": "@aws-cdk/aws-stepfunctions-tasks.SageMakerCreateTrainingJob", + "fqn": "aws-cdk-lib.aws_stepfunctions_tasks.SageMakerCreateTrainingJob", "version": "0.0.0" } }, @@ -298,7 +298,7 @@ "id": "ImportRole", "path": "integ-stepfunctions-sagemaker/StateMachine/Role/ImportRole", "constructInfo": { - "fqn": "@aws-cdk/core.Resource", + "fqn": "aws-cdk-lib.Resource", "version": "0.0.0" } }, @@ -323,7 +323,7 @@ } }, "constructInfo": { - "fqn": "@aws-cdk/aws-iam.CfnRole", + "fqn": "aws-cdk-lib.aws_iam.CfnRole", "version": "0.0.0" } }, @@ -399,19 +399,19 @@ } }, "constructInfo": { - "fqn": "@aws-cdk/aws-iam.CfnPolicy", + "fqn": "aws-cdk-lib.aws_iam.CfnPolicy", "version": "0.0.0" } } }, "constructInfo": { - "fqn": "@aws-cdk/aws-iam.Policy", + "fqn": "aws-cdk-lib.aws_iam.Policy", "version": "0.0.0" } } }, "constructInfo": { - "fqn": "@aws-cdk/aws-iam.Role", + "fqn": "aws-cdk-lib.aws_iam.Role", "version": "0.0.0" } }, @@ -421,12 +421,6 @@ "attributes": { "aws:cdk:cloudformation:type": "AWS::StepFunctions::StateMachine", "aws:cdk:cloudformation:props": { - "roleArn": { - "Fn::GetAtt": [ - "StateMachineRoleB840431D", - "Arn" - ] - }, "definitionString": { "Fn::Join": [ "", @@ -442,7 +436,7 @@ "Arn" ] }, - "\",\"AlgorithmSpecification\":{\"TrainingInputMode\":\"File\",\"AlgorithmName\":\"arn:aws:sagemaker:us-east-1:865070037744:algorithm/scikit-decision-trees-15423055-57b73412d2e93e9239e4e16f83298b8f\"},\"InputDataConfig\":[{\"ChannelName\":\"InputData\",\"DataSource\":{\"S3DataSource\":{\"S3Uri\":\"https://s3.", + "\",\"AlgorithmSpecification\":{\"TrainingInputMode\":\"FastFile\",\"AlgorithmName\":\"BlazingText\"},\"InputDataConfig\":[{\"ChannelName\":\"InputData\",\"DataSource\":{\"S3DataSource\":{\"S3Uri\":\"https://s3.", { "Ref": "AWS::Region" }, @@ -469,17 +463,23 @@ "/result/\"},\"ResourceConfig\":{\"InstanceCount\":1,\"InstanceType\":\"ml.m4.xlarge\",\"VolumeSizeInGB\":10},\"StoppingCondition\":{\"MaxRuntimeInSeconds\":3600}}}}}" ] ] + }, + "roleArn": { + "Fn::GetAtt": [ + "StateMachineRoleB840431D", + "Arn" + ] } } }, "constructInfo": { - "fqn": "@aws-cdk/aws-stepfunctions.CfnStateMachine", + "fqn": "aws-cdk-lib.aws_stepfunctions.CfnStateMachine", "version": "0.0.0" } } }, "constructInfo": { - "fqn": "@aws-cdk/aws-stepfunctions.StateMachine", + "fqn": "aws-cdk-lib.aws_stepfunctions.StateMachine", "version": "0.0.0" } }, @@ -487,7 +487,7 @@ "id": "stateMachineArn", "path": "integ-stepfunctions-sagemaker/stateMachineArn", "constructInfo": { - "fqn": "@aws-cdk/core.CfnOutput", + "fqn": "aws-cdk-lib.CfnOutput", "version": "0.0.0" } }, @@ -495,7 +495,7 @@ "id": "BootstrapVersion", "path": "integ-stepfunctions-sagemaker/BootstrapVersion", "constructInfo": { - "fqn": "@aws-cdk/core.CfnParameter", + "fqn": "aws-cdk-lib.CfnParameter", "version": "0.0.0" } }, @@ -503,13 +503,13 @@ "id": "CheckBootstrapVersion", "path": "integ-stepfunctions-sagemaker/CheckBootstrapVersion", "constructInfo": { - "fqn": "@aws-cdk/core.CfnRule", + "fqn": "aws-cdk-lib.CfnRule", "version": "0.0.0" } } }, "constructInfo": { - "fqn": "@aws-cdk/core.Stack", + "fqn": "aws-cdk-lib.Stack", "version": "0.0.0" } }, @@ -518,12 +518,12 @@ "path": "Tree", "constructInfo": { "fqn": "constructs.Construct", - "version": "10.1.237" + "version": "10.2.69" } } }, "constructInfo": { - "fqn": "@aws-cdk/core.App", + "fqn": "aws-cdk-lib.App", "version": "0.0.0" } } diff --git a/packages/@aws-cdk-testing/framework-integ/test/aws-stepfunctions-tasks/test/sagemaker/integ.create-training-job.ts b/packages/@aws-cdk-testing/framework-integ/test/aws-stepfunctions-tasks/test/sagemaker/integ.create-training-job.ts index aafd6f282b410..b72f48c5ef3a9 100644 --- a/packages/@aws-cdk-testing/framework-integ/test/aws-stepfunctions-tasks/test/sagemaker/integ.create-training-job.ts +++ b/packages/@aws-cdk-testing/framework-integ/test/aws-stepfunctions-tasks/test/sagemaker/integ.create-training-job.ts @@ -2,7 +2,7 @@ import { Key } from 'aws-cdk-lib/aws-kms'; import { Bucket, BucketEncryption } from 'aws-cdk-lib/aws-s3'; import { StateMachine } from 'aws-cdk-lib/aws-stepfunctions'; import { App, CfnOutput, RemovalPolicy, Stack } from 'aws-cdk-lib'; -import { S3Location, SageMakerCreateTrainingJob } from 'aws-cdk-lib/aws-stepfunctions-tasks'; +import { S3Location, SageMakerCreateTrainingJob, InputMode } from 'aws-cdk-lib/aws-stepfunctions-tasks'; /* * Creates a state machine with a task state to create a training job in AWS SageMaker @@ -35,7 +35,8 @@ const trainingData = new Bucket(stack, 'TrainingData', { const sm = new StateMachine(stack, 'StateMachine', { definition: new SageMakerCreateTrainingJob(stack, 'TrainTask', { algorithmSpecification: { - algorithmName: 'arn:aws:sagemaker:us-east-1:865070037744:algorithm/scikit-decision-trees-15423055-57b73412d2e93e9239e4e16f83298b8f', + algorithmName: 'BlazingText', + trainingInputMode: InputMode.FASTFILE, }, inputDataConfig: [{ channelName: 'InputData', diff --git a/packages/aws-cdk-lib/aws-stepfunctions-tasks/lib/sagemaker/base-types.ts b/packages/aws-cdk-lib/aws-stepfunctions-tasks/lib/sagemaker/base-types.ts index 7abaea9b6b0e6..d073166abf82b 100644 --- a/packages/aws-cdk-lib/aws-stepfunctions-tasks/lib/sagemaker/base-types.ts +++ b/packages/aws-cdk-lib/aws-stepfunctions-tasks/lib/sagemaker/base-types.ts @@ -464,7 +464,12 @@ export enum InputMode { /** * File mode. */ - FILE = 'File' + FILE = 'File', + + /** + * FastFile mode. + */ + FASTFILE = 'FastFile' } /** diff --git a/packages/aws-cdk-lib/aws-stepfunctions-tasks/test/sagemaker/create-training-job.test.ts b/packages/aws-cdk-lib/aws-stepfunctions-tasks/test/sagemaker/create-training-job.test.ts index 31bf02c7c8a18..71613a7578156 100644 --- a/packages/aws-cdk-lib/aws-stepfunctions-tasks/test/sagemaker/create-training-job.test.ts +++ b/packages/aws-cdk-lib/aws-stepfunctions-tasks/test/sagemaker/create-training-job.test.ts @@ -134,7 +134,7 @@ test('create complex training job', () => { role, algorithmSpecification: { algorithmName: 'BlazingText', - trainingInputMode: tasks.InputMode.FILE, + trainingInputMode: tasks.InputMode.FASTFILE, metricDefinitions: [ { name: 'mymetric', regex: 'regex_pattern', @@ -218,7 +218,7 @@ test('create complex training job', () => { TrainingJobName: 'MyTrainJob', RoleArn: { 'Fn::GetAtt': ['Role1ABCC5F0', 'Arn'] }, AlgorithmSpecification: { - TrainingInputMode: 'File', + TrainingInputMode: 'FastFile', AlgorithmName: 'BlazingText', MetricDefinitions: [ { Name: 'mymetric', Regex: 'regex_pattern' }, From 03c77ec541668e446178f1f12f6bcb98eea586a4 Mon Sep 17 00:00:00 2001 From: Tomoya Oda Date: Sat, 12 Aug 2023 01:02:10 +0000 Subject: [PATCH 2/6] add: validation for algorithmName --- .../lib/sagemaker/create-training-job.ts | 8 ++ .../sagemaker/create-training-job.test.ts | 94 +++++++++++++++++++ 2 files changed, 102 insertions(+) diff --git a/packages/aws-cdk-lib/aws-stepfunctions-tasks/lib/sagemaker/create-training-job.ts b/packages/aws-cdk-lib/aws-stepfunctions-tasks/lib/sagemaker/create-training-job.ts index f04842249e24b..14becf426da6f 100644 --- a/packages/aws-cdk-lib/aws-stepfunctions-tasks/lib/sagemaker/create-training-job.ts +++ b/packages/aws-cdk-lib/aws-stepfunctions-tasks/lib/sagemaker/create-training-job.ts @@ -163,6 +163,14 @@ export class SageMakerCreateTrainingJob extends sfn.TaskStateBase implements iam throw new Error('Must define either an algorithm name or training image URI in the algorithm specification'); } + // validate the algorithmName if the algorithmName is defined + if (props.algorithmSpecification.algorithmName) { + const regex = /^(arn:aws[a-z\-]*:sagemaker:[a-z0-9\-]*:[0-9]{12}:[a-z\-]*\/)?([a-zA-Z0-9]([a-zA-Z0-9-]){0,62})(? { + + const task = new SageMakerCreateTrainingJob(stack, 'SageMakerTrainingTask', { + trainingJobName: 'myTrainJob', + algorithmSpecification: { + trainingImage: tasks.DockerImage.fromJsonExpression(sfn.JsonPath.stringAt('$.Training.imageName')), + }, + inputDataConfig: [ + { + channelName: 'train', + dataSource: { + s3DataSource: { + s3DataType: tasks.S3DataType.S3_PREFIX, + s3Location: tasks.S3Location.fromJsonExpression('$.S3Bucket'), + }, + }, + }, + ], + outputDataConfig: { + s3OutputLocation: tasks.S3Location.fromBucket(s3.Bucket.fromBucketName(stack, 'Bucket', 'mybucket'), 'myoutputpath/'), + }, + }); + + // THEN + expect(stack.resolve(task.toStateJson())).toMatchObject({ + Parameters: { + AlgorithmSpecification: { + 'TrainingImage.$': '$.Training.imageName', + 'TrainingInputMode': 'File', + }, + }, + }); +}); + +test('create a SageMaker train task with image URI algorithmName', () => { + + const task = new SageMakerCreateTrainingJob(stack, 'SageMakerTrainingTask', { + trainingJobName: 'myTrainJob', + algorithmSpecification: { + algorithmName: 'arn:aws:sagemaker:us-east-1:123456789012:algorithm/scikit-decision-trees', + trainingInputMode: tasks.InputMode.FILE, + }, + inputDataConfig: [ + { + channelName: 'train', + dataSource: { + s3DataSource: { + s3DataType: tasks.S3DataType.S3_PREFIX, + s3Location: tasks.S3Location.fromJsonExpression('$.S3Bucket'), + }, + }, + }, + ], + outputDataConfig: { + s3OutputLocation: tasks.S3Location.fromBucket(s3.Bucket.fromBucketName(stack, 'Bucket', 'mybucket'), 'myoutputpath/'), + }, + }); + + // THEN + expect(stack.resolve(task.toStateJson())).toMatchObject({ + Parameters: { + AlgorithmSpecification: { + AlgorithmName: 'arn:aws:sagemaker:us-east-1:123456789012:algorithm/scikit-decision-trees', + }, + }, + }); +}); + +test('Cannot create a SageMaker train task with incorrect algorithmName', () => { + + expect(() => new SageMakerCreateTrainingJob(stack, 'SageMakerTrainingTask', { + trainingJobName: 'myTrainJob', + algorithmSpecification: { + algorithmName: 'Blazing_Text', // underscores are not allowed + trainingInputMode: tasks.InputMode.FILE, + }, + inputDataConfig: [ + { + channelName: 'train', + dataSource: { + s3DataSource: { + s3DataType: tasks.S3DataType.S3_PREFIX, + s3Location: tasks.S3Location.fromJsonExpression('$.S3Bucket'), + }, + }, + }, + ], + outputDataConfig: { + s3OutputLocation: tasks.S3Location.fromBucket(s3.Bucket.fromBucketName(stack, 'Bucket', 'mybucket'), 'myoutputpath/'), + }, + })) + .toThrowError(/'Blazing_Text' at 'algorithmName' must satisfy regular expression pattern/); +}); From 65b304fe6e07eef505c2bd7bfa6b13d0be8a4c47 Mon Sep 17 00:00:00 2001 From: Tomoya Oda Date: Sat, 12 Aug 2023 21:49:48 +0000 Subject: [PATCH 3/6] add validation --- .../lib/sagemaker/create-training-job.ts | 27 +++++++-- .../sagemaker/create-training-job.test.ts | 55 ++++++++++++++++++- 2 files changed, 73 insertions(+), 9 deletions(-) diff --git a/packages/aws-cdk-lib/aws-stepfunctions-tasks/lib/sagemaker/create-training-job.ts b/packages/aws-cdk-lib/aws-stepfunctions-tasks/lib/sagemaker/create-training-job.ts index 14becf426da6f..dc99275f34634 100644 --- a/packages/aws-cdk-lib/aws-stepfunctions-tasks/lib/sagemaker/create-training-job.ts +++ b/packages/aws-cdk-lib/aws-stepfunctions-tasks/lib/sagemaker/create-training-job.ts @@ -163,14 +163,14 @@ export class SageMakerCreateTrainingJob extends sfn.TaskStateBase implements iam throw new Error('Must define either an algorithm name or training image URI in the algorithm specification'); } - // validate the algorithmName if the algorithmName is defined - if (props.algorithmSpecification.algorithmName) { - const regex = /^(arn:aws[a-z\-]*:sagemaker:[a-z0-9\-]*:[0-9]{12}:[a-z\-]*\/)?([a-zA-Z0-9]([a-zA-Z0-9-]){0,62})(? { + + expect(() => new SageMakerCreateTrainingJob(stack, 'SageMakerTrainingTask', { + trainingJobName: 'myTrainJob', + algorithmSpecification: { + algorithmName: 'BlazingText', + trainingImage: tasks.DockerImage.fromJsonExpression(sfn.JsonPath.stringAt('$.Training.imageName')), + }, + inputDataConfig: [ + { + channelName: 'train', + dataSource: { + s3DataSource: { + s3DataType: tasks.S3DataType.S3_PREFIX, + s3Location: tasks.S3Location.fromJsonExpression('$.S3Bucket'), + }, + }, + }, + ], + outputDataConfig: { + s3OutputLocation: tasks.S3Location.fromBucket(s3.Bucket.fromBucketName(stack, 'Bucket', 'mybucket'), 'myoutputpath/'), + }, + })) + .toThrowError(/Cannot define both an algorithm name and training image URI in the algorithm specification/); +}); + test('create a SageMaker train task with trainingImage', () => { const task = new SageMakerCreateTrainingJob(stack, 'SageMakerTrainingTask', { @@ -449,7 +475,6 @@ test('create a SageMaker train task with image URI algorithmName', () => { trainingJobName: 'myTrainJob', algorithmSpecification: { algorithmName: 'arn:aws:sagemaker:us-east-1:123456789012:algorithm/scikit-decision-trees', - trainingInputMode: tasks.InputMode.FILE, }, inputDataConfig: [ { @@ -477,13 +502,37 @@ test('create a SageMaker train task with image URI algorithmName', () => { }); }); +test('Cannot create a SageMaker train task when algorithmName length is 171 or more', () => { + + expect(() => new SageMakerCreateTrainingJob(stack, 'SageMakerTrainingTask', { + trainingJobName: 'myTrainJob', + algorithmSpecification: { + algorithmName: 'a'.repeat(171), // maximum length is 170 + }, + inputDataConfig: [ + { + channelName: 'train', + dataSource: { + s3DataSource: { + s3DataType: tasks.S3DataType.S3_PREFIX, + s3Location: tasks.S3Location.fromJsonExpression('$.S3Bucket'), + }, + }, + }, + ], + outputDataConfig: { + s3OutputLocation: tasks.S3Location.fromBucket(s3.Bucket.fromBucketName(stack, 'Bucket', 'mybucket'), 'myoutputpath/'), + }, + })) + .toThrowError(/Algorithm name length must be between 1 and 170, but got 171/); +}); + test('Cannot create a SageMaker train task with incorrect algorithmName', () => { expect(() => new SageMakerCreateTrainingJob(stack, 'SageMakerTrainingTask', { trainingJobName: 'myTrainJob', algorithmSpecification: { algorithmName: 'Blazing_Text', // underscores are not allowed - trainingInputMode: tasks.InputMode.FILE, }, inputDataConfig: [ { @@ -500,5 +549,5 @@ test('Cannot create a SageMaker train task with incorrect algorithmName', () => s3OutputLocation: tasks.S3Location.fromBucket(s3.Bucket.fromBucketName(stack, 'Bucket', 'mybucket'), 'myoutputpath/'), }, })) - .toThrowError(/'Blazing_Text' at 'algorithmName' must satisfy regular expression pattern/); + .toThrowError(/Expected algorithm name to match pattern/); }); From 9f2bb760415edad4ad2b621d6319f602ff9b0a35 Mon Sep 17 00:00:00 2001 From: Tomoya Oda Date: Mon, 14 Aug 2023 17:32:42 +0000 Subject: [PATCH 4/6] fix: renamed FASTFILE to FAST_FILE --- .../test/sagemaker/integ.create-training-job.ts | 2 +- .../aws-stepfunctions-tasks/lib/sagemaker/base-types.ts | 2 +- .../test/sagemaker/create-training-job.test.ts | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/packages/@aws-cdk-testing/framework-integ/test/aws-stepfunctions-tasks/test/sagemaker/integ.create-training-job.ts b/packages/@aws-cdk-testing/framework-integ/test/aws-stepfunctions-tasks/test/sagemaker/integ.create-training-job.ts index b72f48c5ef3a9..3d3e138e080cd 100644 --- a/packages/@aws-cdk-testing/framework-integ/test/aws-stepfunctions-tasks/test/sagemaker/integ.create-training-job.ts +++ b/packages/@aws-cdk-testing/framework-integ/test/aws-stepfunctions-tasks/test/sagemaker/integ.create-training-job.ts @@ -36,7 +36,7 @@ const sm = new StateMachine(stack, 'StateMachine', { definition: new SageMakerCreateTrainingJob(stack, 'TrainTask', { algorithmSpecification: { algorithmName: 'BlazingText', - trainingInputMode: InputMode.FASTFILE, + trainingInputMode: InputMode.FAST_FILE, }, inputDataConfig: [{ channelName: 'InputData', diff --git a/packages/aws-cdk-lib/aws-stepfunctions-tasks/lib/sagemaker/base-types.ts b/packages/aws-cdk-lib/aws-stepfunctions-tasks/lib/sagemaker/base-types.ts index d073166abf82b..6fe886c8c7846 100644 --- a/packages/aws-cdk-lib/aws-stepfunctions-tasks/lib/sagemaker/base-types.ts +++ b/packages/aws-cdk-lib/aws-stepfunctions-tasks/lib/sagemaker/base-types.ts @@ -469,7 +469,7 @@ export enum InputMode { /** * FastFile mode. */ - FASTFILE = 'FastFile' + FAST_FILE = 'FastFile' } /** diff --git a/packages/aws-cdk-lib/aws-stepfunctions-tasks/test/sagemaker/create-training-job.test.ts b/packages/aws-cdk-lib/aws-stepfunctions-tasks/test/sagemaker/create-training-job.test.ts index 872897f605b9c..7ed4df320de10 100644 --- a/packages/aws-cdk-lib/aws-stepfunctions-tasks/test/sagemaker/create-training-job.test.ts +++ b/packages/aws-cdk-lib/aws-stepfunctions-tasks/test/sagemaker/create-training-job.test.ts @@ -134,7 +134,7 @@ test('create complex training job', () => { role, algorithmSpecification: { algorithmName: 'BlazingText', - trainingInputMode: tasks.InputMode.FASTFILE, + trainingInputMode: tasks.InputMode.FAST_FILE, metricDefinitions: [ { name: 'mymetric', regex: 'regex_pattern', From 014009c9ffd80deae746f6001b62aaf5f016752b Mon Sep 17 00:00:00 2001 From: Kaizen Conroy <36202692+kaizencc@users.noreply.github.com> Date: Wed, 16 Aug 2023 16:00:28 -0400 Subject: [PATCH 5/6] Apply suggestions from code review --- .../test/sagemaker/create-training-job.test.ts | 5 ----- 1 file changed, 5 deletions(-) diff --git a/packages/aws-cdk-lib/aws-stepfunctions-tasks/test/sagemaker/create-training-job.test.ts b/packages/aws-cdk-lib/aws-stepfunctions-tasks/test/sagemaker/create-training-job.test.ts index 7ed4df320de10..753fe3b56d52b 100644 --- a/packages/aws-cdk-lib/aws-stepfunctions-tasks/test/sagemaker/create-training-job.test.ts +++ b/packages/aws-cdk-lib/aws-stepfunctions-tasks/test/sagemaker/create-training-job.test.ts @@ -410,7 +410,6 @@ test('Cannot create a SageMaker train task with both algorithm name and image na }); test('Cannot create a SageMaker train task with both algorithm name and image name defined', () => { - expect(() => new SageMakerCreateTrainingJob(stack, 'SageMakerTrainingTask', { trainingJobName: 'myTrainJob', algorithmSpecification: { @@ -436,7 +435,6 @@ test('Cannot create a SageMaker train task with both algorithm name and image na }); test('create a SageMaker train task with trainingImage', () => { - const task = new SageMakerCreateTrainingJob(stack, 'SageMakerTrainingTask', { trainingJobName: 'myTrainJob', algorithmSpecification: { @@ -470,7 +468,6 @@ test('create a SageMaker train task with trainingImage', () => { }); test('create a SageMaker train task with image URI algorithmName', () => { - const task = new SageMakerCreateTrainingJob(stack, 'SageMakerTrainingTask', { trainingJobName: 'myTrainJob', algorithmSpecification: { @@ -503,7 +500,6 @@ test('create a SageMaker train task with image URI algorithmName', () => { }); test('Cannot create a SageMaker train task when algorithmName length is 171 or more', () => { - expect(() => new SageMakerCreateTrainingJob(stack, 'SageMakerTrainingTask', { trainingJobName: 'myTrainJob', algorithmSpecification: { @@ -528,7 +524,6 @@ test('Cannot create a SageMaker train task when algorithmName length is 171 or m }); test('Cannot create a SageMaker train task with incorrect algorithmName', () => { - expect(() => new SageMakerCreateTrainingJob(stack, 'SageMakerTrainingTask', { trainingJobName: 'myTrainJob', algorithmSpecification: { From 1a7fa59003356d9de4aa9b38489300f7d84b414e Mon Sep 17 00:00:00 2001 From: Tomoya Oda Date: Sat, 19 Aug 2023 18:22:14 +0000 Subject: [PATCH 6/6] removed algorithm name validation --- .../aws-stepfunctions-tasks/README.md | 6 + .../lib/sagemaker/create-training-job.ts | 23 --- .../sagemaker/create-training-job.test.ts | 143 ------------------ 3 files changed, 6 insertions(+), 166 deletions(-) diff --git a/packages/aws-cdk-lib/aws-stepfunctions-tasks/README.md b/packages/aws-cdk-lib/aws-stepfunctions-tasks/README.md index 7cff3c209fb2f..d291ebf894b4c 100644 --- a/packages/aws-cdk-lib/aws-stepfunctions-tasks/README.md +++ b/packages/aws-cdk-lib/aws-stepfunctions-tasks/README.md @@ -1060,6 +1060,12 @@ new tasks.SageMakerCreateTrainingJob(this, 'TrainSagemaker', { }); ``` +You can specify [TrainingInputMode](https://docs.aws.amazon.com/sagemaker/latest/APIReference/API_AlgorithmSpecification.html#API_AlgorithmSpecification_Contents) via the trainingInputMode property. + +- To download the data from Amazon Simple Storage Service (Amazon S3) to the provisioned ML storage volume, and mount the directory to a Docker volume, choose `InputMode.FILE` if an algorithm supports it. +- To stream data directly from Amazon S3 to the container, choose `InputMode.PIPE` if an algorithm supports it. +- To stream data directly from Amazon S3 to the container with no code changes and to provide file system access to the data, choose `InputMode.FAST_FILE` if an algorithm supports it. + ### Create Transform Job You can call the [`CreateTransformJob`](https://docs.aws.amazon.com/sagemaker/latest/dg/API_CreateTransformJob.html) API from a `Task` state. diff --git a/packages/aws-cdk-lib/aws-stepfunctions-tasks/lib/sagemaker/create-training-job.ts b/packages/aws-cdk-lib/aws-stepfunctions-tasks/lib/sagemaker/create-training-job.ts index dc99275f34634..f04842249e24b 100644 --- a/packages/aws-cdk-lib/aws-stepfunctions-tasks/lib/sagemaker/create-training-job.ts +++ b/packages/aws-cdk-lib/aws-stepfunctions-tasks/lib/sagemaker/create-training-job.ts @@ -163,14 +163,6 @@ export class SageMakerCreateTrainingJob extends sfn.TaskStateBase implements iam throw new Error('Must define either an algorithm name or training image URI in the algorithm specification'); } - // check that both algorithm name and image are not defined - if (props.algorithmSpecification.algorithmName && props.algorithmSpecification.trainingImage) { - throw new Error('Cannot define both an algorithm name and training image URI in the algorithm specification'); - } - - // validate algorithm name - this.validateAlgorithmName(props.algorithmSpecification.algorithmName); - // set the input mode to 'File' if not defined this.algorithmSpecification = props.algorithmSpecification.trainingInputMode ? props.algorithmSpecification @@ -332,21 +324,6 @@ export class SageMakerCreateTrainingJob extends sfn.TaskStateBase implements iam : {}; } - private validateAlgorithmName(algorithmName?: string): void { - if (algorithmName === undefined) { - return; - } - - if (algorithmName.length < 1 || 170 < algorithmName.length) { - throw new Error(`Algorithm name length must be between 1 and 170, but got ${algorithmName.length}`); - } - - const regex = /^(arn:aws[a-z\-]*:sagemaker:[a-z0-9\-]*:[0-9]{12}:[a-z\-]*\/)?([a-zA-Z0-9]([a-zA-Z0-9-]){0,62})(? { - - expect(() => new SageMakerCreateTrainingJob(stack, 'SageMakerTrainingTask', { - trainingJobName: 'myTrainJob', - algorithmSpecification: { - algorithmName: 'BlazingText', - trainingImage: tasks.DockerImage.fromJsonExpression(sfn.JsonPath.stringAt('$.Training.imageName')), - }, - inputDataConfig: [ - { - channelName: 'train', - dataSource: { - s3DataSource: { - s3DataType: tasks.S3DataType.S3_PREFIX, - s3Location: tasks.S3Location.fromJsonExpression('$.S3Bucket'), - }, - }, - }, - ], - outputDataConfig: { - s3OutputLocation: tasks.S3Location.fromBucket(s3.Bucket.fromBucketName(stack, 'Bucket', 'mybucket'), 'myoutputpath/'), - }, - })) - .toThrowError(/Cannot define both an algorithm name and training image URI in the algorithm specification/); -}); - -test('create a SageMaker train task with trainingImage', () => { - - const task = new SageMakerCreateTrainingJob(stack, 'SageMakerTrainingTask', { - trainingJobName: 'myTrainJob', - algorithmSpecification: { - trainingImage: tasks.DockerImage.fromJsonExpression(sfn.JsonPath.stringAt('$.Training.imageName')), - }, - inputDataConfig: [ - { - channelName: 'train', - dataSource: { - s3DataSource: { - s3DataType: tasks.S3DataType.S3_PREFIX, - s3Location: tasks.S3Location.fromJsonExpression('$.S3Bucket'), - }, - }, - }, - ], - outputDataConfig: { - s3OutputLocation: tasks.S3Location.fromBucket(s3.Bucket.fromBucketName(stack, 'Bucket', 'mybucket'), 'myoutputpath/'), - }, - }); - - // THEN - expect(stack.resolve(task.toStateJson())).toMatchObject({ - Parameters: { - AlgorithmSpecification: { - 'TrainingImage.$': '$.Training.imageName', - 'TrainingInputMode': 'File', - }, - }, - }); -}); - -test('create a SageMaker train task with image URI algorithmName', () => { - - const task = new SageMakerCreateTrainingJob(stack, 'SageMakerTrainingTask', { - trainingJobName: 'myTrainJob', - algorithmSpecification: { - algorithmName: 'arn:aws:sagemaker:us-east-1:123456789012:algorithm/scikit-decision-trees', - }, - inputDataConfig: [ - { - channelName: 'train', - dataSource: { - s3DataSource: { - s3DataType: tasks.S3DataType.S3_PREFIX, - s3Location: tasks.S3Location.fromJsonExpression('$.S3Bucket'), - }, - }, - }, - ], - outputDataConfig: { - s3OutputLocation: tasks.S3Location.fromBucket(s3.Bucket.fromBucketName(stack, 'Bucket', 'mybucket'), 'myoutputpath/'), - }, - }); - - // THEN - expect(stack.resolve(task.toStateJson())).toMatchObject({ - Parameters: { - AlgorithmSpecification: { - AlgorithmName: 'arn:aws:sagemaker:us-east-1:123456789012:algorithm/scikit-decision-trees', - }, - }, - }); -}); - -test('Cannot create a SageMaker train task when algorithmName length is 171 or more', () => { - - expect(() => new SageMakerCreateTrainingJob(stack, 'SageMakerTrainingTask', { - trainingJobName: 'myTrainJob', - algorithmSpecification: { - algorithmName: 'a'.repeat(171), // maximum length is 170 - }, - inputDataConfig: [ - { - channelName: 'train', - dataSource: { - s3DataSource: { - s3DataType: tasks.S3DataType.S3_PREFIX, - s3Location: tasks.S3Location.fromJsonExpression('$.S3Bucket'), - }, - }, - }, - ], - outputDataConfig: { - s3OutputLocation: tasks.S3Location.fromBucket(s3.Bucket.fromBucketName(stack, 'Bucket', 'mybucket'), 'myoutputpath/'), - }, - })) - .toThrowError(/Algorithm name length must be between 1 and 170, but got 171/); -}); - -test('Cannot create a SageMaker train task with incorrect algorithmName', () => { - - expect(() => new SageMakerCreateTrainingJob(stack, 'SageMakerTrainingTask', { - trainingJobName: 'myTrainJob', - algorithmSpecification: { - algorithmName: 'Blazing_Text', // underscores are not allowed - }, - inputDataConfig: [ - { - channelName: 'train', - dataSource: { - s3DataSource: { - s3DataType: tasks.S3DataType.S3_PREFIX, - s3Location: tasks.S3Location.fromJsonExpression('$.S3Bucket'), - }, - }, - }, - ], - outputDataConfig: { - s3OutputLocation: tasks.S3Location.fromBucket(s3.Bucket.fromBucketName(stack, 'Bucket', 'mybucket'), 'myoutputpath/'), - }, - })) - .toThrowError(/Expected algorithm name to match pattern/); -});