Skip to content

Commit 6798c16

Browse files
committed
Check PREMIS validity before preservation
[skip-codecov]
1 parent 92feb35 commit 6798c16

File tree

15 files changed

+1584
-22
lines changed

15 files changed

+1584
-22
lines changed

.dockerignore

+1
Original file line numberDiff line numberDiff line change
@@ -5,3 +5,4 @@
55
!go.mod
66
!go.sum
77
!main.go
8+
!hack/xsd/premis.xsd

Dockerfile

+15
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,9 @@
33
ARG TARGET=enduro
44
ARG GO_VERSION
55

6+
FROM alpine:3.20 AS build-libxml
7+
RUN apk add --no-cache libxml2-utils
8+
69
FROM golang:${GO_VERSION}-alpine AS build-go
710
WORKDIR /src
811
ENV CGO_ENABLED=0
@@ -61,10 +64,22 @@ FROM base AS enduro-a3m-worker
6164
COPY --from=build-enduro-a3m-worker --link /out/enduro-a3m-worker /home/enduro/bin/enduro-a3m-worker
6265
COPY --from=build-enduro-a3m-worker --link /src/enduro.toml /home/enduro/.config/enduro.toml
6366
CMD ["/home/enduro/bin/enduro-a3m-worker", "--config", "/home/enduro/.config/enduro.toml"]
67+
COPY hack/xsd/premis.xsd /home/enduro/premis.xsd
68+
COPY --from=build-libxml /usr/bin/xmllint /usr/bin/xmllint
69+
COPY --from=build-libxml /usr/lib/libxml2.so.2 /usr/lib/libxml2.so.2
70+
COPY --from=build-libxml /lib/ld-musl-x86_64.so.1 /lib/ld-musl-x86_64.so.1
71+
COPY --from=build-libxml /lib/libz.so.1 /lib/libz.so.1
72+
COPY --from=build-libxml /usr/lib/liblzma.so.5 /usr/lib/liblzma.so.5
6473

6574
FROM base AS enduro-am-worker
6675
COPY --from=build-enduro-am-worker --link /out/enduro-am-worker /home/enduro/bin/enduro-am-worker
6776
COPY --from=build-enduro-am-worker --link /src/enduro.toml /home/enduro/.config/enduro.toml
6877
CMD ["/home/enduro/bin/enduro-am-worker", "--config", "/home/enduro/.config/enduro.toml"]
78+
COPY hack/xsd/premis.xsd /home/enduro/premis.xsd
79+
COPY --from=build-libxml /usr/bin/xmllint /usr/bin/xmllint
80+
COPY --from=build-libxml /usr/lib/libxml2.so.2 /usr/lib/libxml2.so.2
81+
COPY --from=build-libxml /lib/ld-musl-x86_64.so.1 /lib/ld-musl-x86_64.so.1
82+
COPY --from=build-libxml /lib/libz.so.1 /lib/libz.so.1
83+
COPY --from=build-libxml /usr/lib/liblzma.so.5 /usr/lib/liblzma.so.5
6984

7085
FROM ${TARGET}

cmd/enduro-a3m-worker/main.go

+5
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@ import (
1919
"github.com/artefactual-sdps/temporal-activities/bagvalidate"
2020
"github.com/artefactual-sdps/temporal-activities/bucketupload"
2121
"github.com/artefactual-sdps/temporal-activities/removepaths"
22+
"github.com/artefactual-sdps/temporal-activities/xmlvalidate"
2223
"github.com/hashicorp/go-cleanhttp"
2324
"github.com/oklog/run"
2425
"github.com/prometheus/client_golang/prometheus/promhttp"
@@ -252,6 +253,10 @@ func main() {
252253
archiveextract.New(cfg.ExtractActivity).Execute,
253254
temporalsdk_activity.RegisterOptions{Name: archiveextract.Name},
254255
)
256+
w.RegisterActivityWithOptions(
257+
xmlvalidate.New(xmlvalidate.NewXMLLintValidator()).Execute,
258+
temporalsdk_activity.RegisterOptions{Name: xmlvalidate.Name},
259+
)
255260
w.RegisterActivityWithOptions(
256261
activities.NewClassifyPackageActivity().Execute,
257262
temporalsdk_activity.RegisterOptions{Name: activities.ClassifyPackageActivityName},

cmd/enduro-am-worker/main.go

+5
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@ import (
2020
"github.com/artefactual-sdps/temporal-activities/bagvalidate"
2121
"github.com/artefactual-sdps/temporal-activities/bucketupload"
2222
"github.com/artefactual-sdps/temporal-activities/removepaths"
23+
"github.com/artefactual-sdps/temporal-activities/xmlvalidate"
2324
"github.com/hashicorp/go-cleanhttp"
2425
"github.com/jonboulle/clockwork"
2526
"github.com/oklog/run"
@@ -329,6 +330,10 @@ func main() {
329330
bucketupload.New(failedPIPs).Execute,
330331
temporalsdk_activity.RegisterOptions{Name: activities.SendToFailedPIPsName},
331332
)
333+
w.RegisterActivityWithOptions(
334+
xmlvalidate.New(xmlvalidate.NewXMLLintValidator()).Execute,
335+
temporalsdk_activity.RegisterOptions{Name: xmlvalidate.Name},
336+
)
332337

333338
g.Add(
334339
func() error {

enduro.toml

+4
Original file line numberDiff line numberDiff line change
@@ -232,6 +232,10 @@ namespace = "default"
232232
taskQueue = "preprocessing"
233233
workflowName = "preprocessing"
234234

235+
[validatePremis]
236+
enabled = true
237+
xsdPath = "/home/enduro/premis.xsd"
238+
235239
[failedSips]
236240
endpoint = "http://minio.enduro-sdps:9000"
237241
pathStyle = true

go.mod

+1-1
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@ require (
1212
github.com/XSAM/otelsql v0.29.0
1313
github.com/alicebob/miniredis/v2 v2.32.1
1414
github.com/artefactual-labs/bagit-gython v0.2.0
15-
github.com/artefactual-sdps/temporal-activities v0.0.0-20240821162351-47302711bc7b
15+
github.com/artefactual-sdps/temporal-activities v0.0.0-20241018212855-8ea34d29bdf4
1616
github.com/coreos/go-oidc/v3 v3.10.0
1717
github.com/cyphar/filepath-securejoin v0.2.4
1818
github.com/dolmen-go/contextio v1.0.0

go.sum

+2-2
Original file line numberDiff line numberDiff line change
@@ -442,8 +442,8 @@ github.com/apparentlymart/go-textseg/v15 v15.0.0 h1:uYvfpb3DyLSCGWnctWKGj857c6ew
442442
github.com/apparentlymart/go-textseg/v15 v15.0.0/go.mod h1:K8XmNZdhEBkdlyDdvbmmsvpAG721bKi0joRfFdHIWJ4=
443443
github.com/artefactual-labs/bagit-gython v0.2.0 h1:Zje4Lb1goZVUPoxpc/k65sWtYpNgK9Rvphvaok5cYzE=
444444
github.com/artefactual-labs/bagit-gython v0.2.0/go.mod h1:C+hFZQMDnji1hjGt3nrlMK3BahaBhvo/hU2uqd+Q9Z4=
445-
github.com/artefactual-sdps/temporal-activities v0.0.0-20240821162351-47302711bc7b h1:kTOc2pbkdII6/Z84Bus1q52z5KAOaT8vLpfRoOs1l1I=
446-
github.com/artefactual-sdps/temporal-activities v0.0.0-20240821162351-47302711bc7b/go.mod h1:FVh79rCGNlUU1QnioAU+lrSjLqrA1PJFYKIhWPsmyug=
445+
github.com/artefactual-sdps/temporal-activities v0.0.0-20241018212855-8ea34d29bdf4 h1:WF95IOkZRVSCST/26SAqPYsUrtUuJpavBht6lvdeKl0=
446+
github.com/artefactual-sdps/temporal-activities v0.0.0-20241018212855-8ea34d29bdf4/go.mod h1:FVh79rCGNlUU1QnioAU+lrSjLqrA1PJFYKIhWPsmyug=
447447
github.com/aws/aws-sdk-go v1.55.5 h1:KKUZBfBoyqy5d3swXyiC7Q76ic40rYcbqH7qjh59kzU=
448448
github.com/aws/aws-sdk-go v1.55.5/go.mod h1:eRwEWoyTWFMVYVQzKMNHWP5/RV4xIUGMQfXQHfHkpNU=
449449
github.com/aws/aws-sdk-go-v2 v1.30.3 h1:jUeBtG0Ih+ZIFH0F4UkmL9w3cSpaMv9tYYDbzILP8dY=

hack/kube/base/enduro.yaml

+2
Original file line numberDiff line numberDiff line change
@@ -70,6 +70,8 @@ spec:
7070
value: "grafana-alloy.enduro-sdps:4317"
7171
- name: ENDURO_TELEMETRY_TRACES_SAMPLING_RATIO
7272
value: "1.0"
73+
- name: ENDURO_VALIDATEPREMIS_ENABLED
74+
value: "false"
7375
ports:
7476
- containerPort: 9000
7577
- containerPort: 9002

0 commit comments

Comments
 (0)