Skip to content

Commit 712d1d3

Browse files
committed
Merge branch 'port-to-v3' of https://github.com/OCR-D/ocrd_kraken into port-to-v3
# Conflicts: # ocrd_kraken/binarize.py
2 parents c0c1eb7 + a497287 commit 712d1d3

7 files changed

+38
-41
lines changed

ocrd_kraken/binarize.py

-2
Original file line numberDiff line numberDiff line change
@@ -21,8 +21,6 @@ def executable(self):
2121

2222
def setup(self):
2323
self.logger = getLogger('processor.KrakenBinarize')
24-
assert_file_grp_cardinality(self.input_file_grp, 1)
25-
assert_file_grp_cardinality(self.output_file_grp, 1)
2624

2725
def process_page_pcgts(self, *input_pcgts: OcrdPage, output_file_id: Optional[str] = None, page_id: Optional[str] = None) -> OcrdProcessResult:
2826
"""Binarize the pages/regions/lines with Kraken.

ocrd_kraken/ocrd-tool.json

+6-6
Original file line numberDiff line numberDiff line change
@@ -4,8 +4,8 @@
44
"tools": {
55
"ocrd-kraken-binarize": {
66
"executable": "ocrd-kraken-binarize",
7-
"input_file_grp": ["OCR-D-IMG", "OCR-D-PRE-CROP", "OCR-D-SEG-REGION", "OCR-D-SEG-LINE"],
8-
"output_file_grp": ["OCR-D-PRE-BIN"],
7+
"input_file_grp_cardinality": 1,
8+
"output_file_grp_cardinality": 1,
99
"categories": [
1010
"Image preprocessing"
1111
],
@@ -24,8 +24,8 @@
2424
},
2525
"ocrd-kraken-segment": {
2626
"executable": "ocrd-kraken-segment",
27-
"input_file_grp": ["OCR-D-IMG", "OCR-D-PRE-CROP", "OCR-D-PRE-BIN"],
28-
"output_file_grp": ["OCR-D-SEG-REGION", "OCR-D-SEG-LINE"],
27+
"input_file_grp_cardinality": 1,
28+
"output_file_grp_cardinality": 1,
2929
"categories": [
3030
"Layout analysis"
3131
],
@@ -128,8 +128,8 @@
128128
},
129129
"ocrd-kraken-recognize": {
130130
"executable": "ocrd-kraken-recognize",
131-
"input_file_grp": ["OCR-D-SEG-LINE"],
132-
"output_file_grp": ["OCR-D-OCR-KRAK"],
131+
"input_file_grp_cardinality": 1,
132+
"output_file_grp_cardinality": 1,
133133
"categories": ["Text recognition and optimization"],
134134
"steps": ["recognition/text-recognition"],
135135
"description": "Text recognition with Kraken",

ocrd_kraken/recognize.py

+1-3
Original file line numberDiff line numberDiff line change
@@ -55,10 +55,8 @@ def executable(self):
5555

5656
def setup(self):
5757
"""
58-
Assert filegrp cardinality, load model, set predict function
58+
Load model, set predict function
5959
"""
60-
assert_file_grp_cardinality(self.input_file_grp, 1)
61-
assert_file_grp_cardinality(self.output_file_grp, 1)
6260

6361
self.logger = getLogger('processor.KrakenRecognize')
6462
import torch

ocrd_kraken/segment.py

-2
Original file line numberDiff line numberDiff line change
@@ -46,8 +46,6 @@ def setup(self):
4646
Load models
4747
"""
4848
self.logger = getLogger('processor.KrakenSegment')
49-
assert_file_grp_cardinality(self.input_file_grp, 1)
50-
assert_file_grp_cardinality(self.output_file_grp, 1)
5149
kwargs = {}
5250
kwargs['text_direction'] = self.parameter['text_direction']
5351
self.use_legacy = self.parameter['use_legacy']

tests/test_binarize.py

+13-13
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66

77
from tests.base import assets, main
88

9-
from ocrd import Resolver
9+
from ocrd import Resolver, run_processor
1010
from ocrd_kraken.binarize import KrakenBinarize
1111
from ocrd_utils.logging import setOverrideLogLevel
1212

@@ -37,24 +37,24 @@ def workspace(tmpdir):
3737
# )
3838

3939
def test_binarize_regions(workspace):
40-
proc = KrakenBinarize(
41-
workspace,
42-
input_file_grp="OCR-D-GT-PAGE",
43-
output_file_grp="OCR-D-IMG-BIN-KRAKEN",
44-
parameter={'level-of-operation': 'region'}
40+
run_processor(KrakenBinarize,
41+
workspace=workspace,
42+
input_file_grp="OCR-D-GT-PAGE",
43+
output_file_grp="OCR-D-IMG-BIN-KRAKEN",
44+
parameter={'level-of-operation': 'region'}
4545
)
46-
proc.process()
4746
workspace.save_mets()
47+
# FIXME: add result assertions (find_files, parsing PAGE etc)
4848

4949
def test_binarize_lines(workspace):
50-
proc = KrakenBinarize(
51-
workspace,
52-
input_file_grp="OCR-D-GT-PAGE",
53-
output_file_grp="OCR-D-IMG-BIN-KRAKEN",
54-
parameter={'level-of-operation': 'line'}
50+
run_processor(KrakenBinarize,
51+
workspace=workspace,
52+
input_file_grp="OCR-D-GT-PAGE",
53+
output_file_grp="OCR-D-IMG-BIN-KRAKEN",
54+
parameter={'level-of-operation': 'line'}
5555
)
56-
proc.process()
5756
workspace.save_mets()
57+
# FIXME: add result assertions (find_files, parsing PAGE etc)
5858

5959
if __name__ == "__main__":
6060
main(__file__)

tests/test_recognize.py

+5-5
Original file line numberDiff line numberDiff line change
@@ -20,13 +20,13 @@ def test_recognize(self):
2020
with pushd_popd(tempdir=True) as tempdir:
2121
workspace = resolver.workspace_from_url(assets.path_to('communist_manifesto/data/mets.xml'), dst_dir=tempdir, download=True)
2222
workspace.overwrite_mode = True
23-
proc = KrakenRecognize(
24-
workspace,
25-
input_file_grp="OCR-D-SEG-KRAKEN",
26-
output_file_grp="OCR-D-OCR-KRAKEN",
23+
run_processor(KrakenRecognize,
24+
workspace=workspace,
25+
input_file_grp="OCR-D-SEG-KRAKEN",
26+
output_file_grp="OCR-D-OCR-KRAKEN",
2727
)
28-
proc.process()
2928
workspace.save_mets()
29+
# FIXME: add result assertions (find_files, parsing PAGE etc)
3030

3131
if __name__ == "__main__":
3232
main(__file__)

tests/test_segment.py

+13-10
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55

66
from tests.base import TestCase, assets, main
77

8-
from ocrd import Resolver
8+
from ocrd import Resolver, run_processor
99
from ocrd_utils import initLogging, pushd_popd
1010
from ocrd_kraken.segment import KrakenSegment
1111

@@ -18,42 +18,45 @@ def test_run_blla(self):
1818
resolver = Resolver()
1919
with pushd_popd(tempdir=True) as tempdir:
2020
workspace = resolver.workspace_from_url(assets.path_to('communist_manifesto/data/mets.xml'), dst_dir=tempdir, download=True)
21-
proc = KrakenSegment(
22-
workspace,
21+
run_processor(
22+
KrakenSegment,
23+
workspace=workspace,
2324
input_file_grp="OCR-D-IMG-BIN",
2425
output_file_grp="OCR-D-SEG-LINE-KRAKEN",
2526
parameter={'maxcolseps': 0, 'use_legacy': False}
2627
)
27-
proc.process()
2828
workspace.save_mets()
29+
# FIXME: add result assertions (find_files, parsing PAGE etc)
2930

3031
def test_run_blla_regionlevel(self):
3132
resolver = Resolver()
3233
with pushd_popd(tempdir=True) as tempdir:
3334
workspace = resolver.workspace_from_url(assets.path_to('kant_aufklaerung_1784-page-region/data/mets.xml'), dst_dir=tempdir, download=True)
34-
proc = KrakenSegment(
35-
workspace,
35+
run_processor(
36+
KrakenSegment,
37+
workspace=workspace,
3638
input_file_grp="OCR-D-GT-SEG-REGION",
3739
output_file_grp="OCR-D-SEG-LINE-KRAKEN",
3840
page_id="phys_0005",
3941
parameter={'maxcolseps': 0, 'use_legacy': False}
4042
)
41-
proc.process()
4243
workspace.save_mets()
44+
# FIXME: add result assertions (find_files, parsing PAGE etc)
4345

4446
def test_run_legacy(self):
4547
resolver = Resolver()
4648
# with pushd_popd('/tmp/kraken-test') as tempdir:
4749
with pushd_popd(tempdir=True) as tempdir:
4850
workspace = resolver.workspace_from_url(assets.path_to('communist_manifesto/data/mets.xml'), dst_dir=tempdir, download=True)
49-
proc = KrakenSegment(
50-
workspace,
51+
run_processor(
52+
KrakenSegment,
53+
workspace=workspace,
5154
input_file_grp="OCR-D-IMG-BIN",
5255
output_file_grp="OCR-D-SEG-LINE-KRAKEN",
5356
parameter={'maxcolseps': 0, 'use_legacy': True}
5457
)
55-
proc.process()
5658
workspace.save_mets()
59+
# FIXME: add result assertions (find_files, parsing PAGE etc)
5760

5861
if __name__ == "__main__":
5962
main(__file__)

0 commit comments

Comments
 (0)