From eb35133a2d8847e8430dd64426f48f469a0d5285 Mon Sep 17 00:00:00 2001 From: Konstantin Baierer Date: Wed, 22 Apr 2020 20:20:47 +0200 Subject: [PATCH 1/4] add OCR-D/ocrd_fileformat, fix #80 --- .gitmodules | 3 +++ Makefile | 11 +++++++++-- ocrd_fileformat | 1 + 3 files changed, 13 insertions(+), 2 deletions(-) create mode 160000 ocrd_fileformat diff --git a/.gitmodules b/.gitmodules index 4fc682b0..90c455c8 100644 --- a/.gitmodules +++ b/.gitmodules @@ -70,6 +70,9 @@ [submodule "ocrd_repair_inconsistencies"] path = ocrd_repair_inconsistencies url = https://github.com/qurator-spk/ocrd_repair_inconsistencies.git +[submodule "ocrd_fileformat"] + path = ocrd_fileformat + url = https://github.com/OCR-D/ocrd_fileformat [submodule "ocrd_pagetopdf"] path = ocrd_pagetopdf url = https://github.com/JKamlah/ocrd_pagetopdf.git diff --git a/Makefile b/Makefile index 28303445..ba6d5a5b 100644 --- a/Makefile +++ b/Makefile @@ -210,6 +210,13 @@ $(BIN)/ocrd-im6convert: ocrd_im6convert . $(ACTIVATE_VENV) && $(MAKE) -C $< install endif +ifneq ($(findstring ocrd_fileformat, $(OCRD_MODULES)),) +ocrd_fileformat: GIT_RECURSIVE = --recursive +OCRD_EXECUTABLES += $(BIN)/ocrd-fileformat +$(BIN)/ocrd-fileformat: ocrd_fileformat + . $(ACTIVATE_VENV) && $(MAKE) -C $< install-fileformat install +endif + ifneq ($(findstring ocrd_olena, $(OCRD_MODULES)),) ocrd_olena: GIT_RECURSIVE = --recursive deps-ubuntu: ocrd_olena @@ -481,9 +488,9 @@ endif docker-minimum-git docker-medium-git docker-maximum-git: PIP_OPTIONS = -e # Minimum-size selection: use Ocropy binarization, use Tesseract from PPA -docker-minimum docker-minimum-git: DOCKER_MODULES = core ocrd_im6convert ocrd_cis ocrd_pagetopdf ocrd_tesserocr tesserocr workflow-configuration ocrd_repair_inconsistencies +docker-minimum docker-minimum-git: DOCKER_MODULES = core ocrd_im6convert ocrd_cis ocrd_pagetopdf ocrd_tesserocr tesserocr workflow-configuration ocrd_repair_inconsistencies ocrd_fileformat # Medium-size selection: add Olena binarization and Calamari, use Tesseract from git, add evaluation -docker-medium docker-medium-git: DOCKER_MODULES = core ocrd_im6convert format-converters ocrd_cis ocrd_pagetopdf ocrd_tesserocr tesserocr tesseract ocrd_olena ocrd_segment ocrd_keraslm ocrd_calamari dinglehopper cor-asv-ann workflow-configuration ocrd_repair_inconsistencies +docker-medium docker-medium-git: DOCKER_MODULES = core ocrd_im6convert format-converters ocrd_cis ocrd_pagetopdf ocrd_tesserocr tesserocr tesseract ocrd_olena ocrd_segment ocrd_keraslm ocrd_calamari dinglehopper cor-asv-ann workflow-configuration ocrd_repair_inconsistencies ocrd_fileformat # Maximum-size selection: use all modules docker-maximum docker-maximum-git: DOCKER_MODULES = $(OCRD_MODULES) diff --git a/ocrd_fileformat b/ocrd_fileformat new file mode 160000 index 00000000..de5b5328 --- /dev/null +++ b/ocrd_fileformat @@ -0,0 +1 @@ +Subproject commit de5b53282243c0b9696bf1ca9a22c327793ef37d From b7aa8f511de99c9cf6a66cd09994856a7a65a8a8 Mon Sep 17 00:00:00 2001 From: Konstantin Baierer Date: Wed, 22 Apr 2020 20:34:33 +0200 Subject: [PATCH 2/4] Makefile: sort DOCKER_MODULES alphabetically (but core first) --- Makefile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Makefile b/Makefile index ba6d5a5b..93667977 100644 --- a/Makefile +++ b/Makefile @@ -488,9 +488,9 @@ endif docker-minimum-git docker-medium-git docker-maximum-git: PIP_OPTIONS = -e # Minimum-size selection: use Ocropy binarization, use Tesseract from PPA -docker-minimum docker-minimum-git: DOCKER_MODULES = core ocrd_im6convert ocrd_cis ocrd_pagetopdf ocrd_tesserocr tesserocr workflow-configuration ocrd_repair_inconsistencies ocrd_fileformat +docker-minimum docker-minimum-git: DOCKER_MODULES = core ocrd_cis ocrd_fileformat ocrd_im6convert ocrd_pagetopdf ocrd_repair_inconsistencies ocrd_tesserocr tesserocr workflow-configuration # Medium-size selection: add Olena binarization and Calamari, use Tesseract from git, add evaluation -docker-medium docker-medium-git: DOCKER_MODULES = core ocrd_im6convert format-converters ocrd_cis ocrd_pagetopdf ocrd_tesserocr tesserocr tesseract ocrd_olena ocrd_segment ocrd_keraslm ocrd_calamari dinglehopper cor-asv-ann workflow-configuration ocrd_repair_inconsistencies ocrd_fileformat +docker-medium docker-medium-git: DOCKER_MODULES = core cor-asv-ann dinglehopper format-converters ocrd_calamari ocrd_cis ocrd_fileformat ocrd_im6convert ocrd_keraslm ocrd_olena ocrd_pagetopdf ocrd_repair_inconsistencies ocrd_segment ocrd_tesserocr tesseract tesserocr workflow-configuration # Maximum-size selection: use all modules docker-maximum docker-maximum-git: DOCKER_MODULES = $(OCRD_MODULES) From 5e6da3b9a234bcc208dcb4f5692dce7606d0bab0 Mon Sep 17 00:00:00 2001 From: Konstantin Baierer Date: Wed, 22 Apr 2020 20:53:42 +0200 Subject: [PATCH 3/4] Update .gitmodules Co-Authored-By: Stefan Weil --- .gitmodules | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.gitmodules b/.gitmodules index 90c455c8..ceb6dde3 100644 --- a/.gitmodules +++ b/.gitmodules @@ -72,7 +72,7 @@ url = https://github.com/qurator-spk/ocrd_repair_inconsistencies.git [submodule "ocrd_fileformat"] path = ocrd_fileformat - url = https://github.com/OCR-D/ocrd_fileformat + url = https://github.com/OCR-D/ocrd_fileformat.git [submodule "ocrd_pagetopdf"] path = ocrd_pagetopdf url = https://github.com/JKamlah/ocrd_pagetopdf.git From 27a507f433cc8ca44f62a6cf355b0ff616ebfd34 Mon Sep 17 00:00:00 2001 From: Stefan Weil Date: Thu, 23 Apr 2020 07:50:08 +0200 Subject: [PATCH 4/4] Add unzip as a requirement (needed for ocrd_fileformat / ocr-fileformat) Signed-off-by: Stefan Weil --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 93667977..3b838f6c 100644 --- a/Makefile +++ b/Makefile @@ -31,7 +31,7 @@ PKG_CONFIG_PATH := $(VIRTUAL_ENV)/lib/pkgconfig export PKG_CONFIG_PATH OCRD_EXECUTABLES = $(BIN)/ocrd # add more CLIs below -CUSTOM_DEPS = wget python3-venv # add more packages for deps-ubuntu below (or modules as preqrequisites) +CUSTOM_DEPS = unzip wget python3-venv # add more packages for deps-ubuntu below (or modules as preqrequisites) DISABLED_MODULES ?= cor-asv-fst opencv-python ocrd_kraken clstm ocrd_ocropy # Default to all submodules, but allow overriding by user