Skip to content

Commit fd15e2a

Browse files
committed
tests: add actual assertions
1 parent ae6445b commit fd15e2a

File tree

3 files changed

+53
-6
lines changed

3 files changed

+53
-6
lines changed

tests/test_binarize.py

+20-2
Original file line numberDiff line numberDiff line change
@@ -1,15 +1,31 @@
11
# pylint: disable=import-error
22

33
import json
4+
import os
45

56
from ocrd import run_processor
7+
from ocrd_utils import MIMETYPE_PAGE
8+
from ocrd_models.constants import NAMESPACES
9+
from ocrd_modelfactory import page_from_file
10+
611
from ocrd_kraken.binarize import KrakenBinarize
712

813
from .assets import assets
914

1015

1116
PARAM_JSON = assets.url_of('param-binarize.json')
1217

18+
def analyse_result(ws, level):
19+
assert os.path.isdir(os.path.join(ws.directory, 'OCR-D-BIN-KRAKEN'))
20+
out_files = list(ws.find_files(fileGrp="OCR-D-BIN-KRAKEN", mimetype=MIMETYPE_PAGE))
21+
assert len(out_files), "found no output PAGE file"
22+
out_images = list(ws.find_files(fileGrp="OCR-D-BIN-KRAKEN", mimetype="//^image/.*"))
23+
assert len(out_images), "found no output image file"
24+
out_pcgts = page_from_file(out_files[0])
25+
assert out_pcgts is not None
26+
out_images = out_pcgts.etree.xpath('//page:%s/page:AlternativeImage[contains(@comments,"binarized")]' % level, namespaces=NAMESPACES)
27+
assert len(out_images) > 0, "found no binarized AlternativeImages in output PAGE file"
28+
1329
def test_param_json(workspace_sbb):
1430
run_processor(KrakenBinarize,
1531
input_file_grp="OCR-D-IMG",
@@ -19,6 +35,7 @@ def test_param_json(workspace_sbb):
1935
)
2036
ws = workspace_sbb['workspace']
2137
ws.save_mets()
38+
analyse_result(ws, 'Page')
2239

2340
def test_binarize_regions(workspace_aufklaerung):
2441
run_processor(KrakenBinarize,
@@ -29,7 +46,7 @@ def test_binarize_regions(workspace_aufklaerung):
2946
)
3047
ws = workspace_aufklaerung['workspace']
3148
ws.save_mets()
32-
# FIXME: add result assertions (find_files, parsing PAGE etc)
49+
analyse_result(ws, 'TextRegion')
3350

3451
def test_binarize_lines(workspace_aufklaerung):
3552
run_processor(KrakenBinarize,
@@ -40,4 +57,5 @@ def test_binarize_lines(workspace_aufklaerung):
4057
)
4158
ws = workspace_aufklaerung['workspace']
4259
ws.save_mets()
43-
# FIXME: add result assertions (find_files, parsing PAGE etc)
60+
analyse_result(ws, 'TextLine')
61+

tests/test_recognize.py

+13-1
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,12 @@
11
# pylint: disable=import-error
22

3+
import os
4+
35
from ocrd import run_processor
6+
from ocrd_utils import MIMETYPE_PAGE
7+
from ocrd_models.constants import NAMESPACES
8+
from ocrd_modelfactory import page_from_file
9+
410
from ocrd_kraken.recognize import KrakenRecognize
511
from ocrd_kraken.binarize import KrakenBinarize
612

@@ -21,4 +27,10 @@ def test_recognize(workspace_aufklaerung):
2127
)
2228
ws = workspace_aufklaerung['workspace']
2329
ws.save_mets()
24-
# FIXME: add result assertions (find_files, parsing PAGE etc)
30+
assert os.path.isdir(os.path.join(ws.directory, 'OCR-D-OCR-KRAKEN'))
31+
results = ws.find_files(file_grp='OCR-D-OCR-KRAKEN', mimetype=MIMETYPE_PAGE)
32+
result0 = next(results, False)
33+
assert result0, "found no output PAGE file"
34+
result0 = page_from_file(result0)
35+
text0 = result0.etree.xpath('//page:Glyph/page:TextEquiv/page:Unicode', namespaces=NAMESPACES)
36+
assert len(text0) > 0, "found no glyph text in output PAGE file"

tests/test_segment.py

+20-3
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,27 @@
11
# pylint: disable=import-error
22

3+
import os
4+
35
from ocrd import run_processor
6+
from ocrd_utils import MIMETYPE_PAGE
7+
from ocrd_models.constants import NAMESPACES
8+
from ocrd_modelfactory import page_from_file
9+
410
from ocrd_kraken.segment import KrakenSegment
511
from ocrd_kraken.binarize import KrakenBinarize
612

713

14+
def analyse_result(ws):
15+
assert os.path.isdir(os.path.join(ws.directory, 'OCR-D-SEG-LINE-KRAKEN'))
16+
out_files = list(ws.find_files(fileGrp="OCR-D-SEG-LINE-KRAKEN", mimetype=MIMETYPE_PAGE))
17+
assert len(out_files), "found no output PAGE file"
18+
out_pcgts = page_from_file(out_files[0])
19+
assert out_pcgts is not None
20+
out_regions = out_pcgts.etree.xpath('//page:TextRegion/page:Coords', namespaces=NAMESPACES)
21+
assert len(out_regions) > 0, "found no text regions in output PAGE file"
22+
out_lines = out_pcgts.get_Page().get_AllTextLines()
23+
assert len(out_lines), "found no text lines in output PAGE file"
24+
825
def test_run_blla(workspace_aufklaerung):
926
run_processor(KrakenSegment,
1027
input_file_grp="OCR-D-IMG",
@@ -14,7 +31,7 @@ def test_run_blla(workspace_aufklaerung):
1431
)
1532
ws = workspace_aufklaerung['workspace']
1633
ws.save_mets()
17-
# FIXME: add result assertions (find_files, parsing PAGE etc)
34+
analyse_result(ws)
1835

1936
def test_run_blla_regionlevel(workspace_aufklaerung_region):
2037
run_processor(KrakenSegment,
@@ -27,7 +44,7 @@ def test_run_blla_regionlevel(workspace_aufklaerung_region):
2744
)
2845
ws = workspace_aufklaerung_region['workspace']
2946
ws.save_mets()
30-
# FIXME: add result assertions (find_files, parsing PAGE etc)
47+
analyse_result(ws)
3148

3249
def test_run_legacy(workspace_aufklaerung):
3350
# legacy segmentation requires binarized images
@@ -45,4 +62,4 @@ def test_run_legacy(workspace_aufklaerung):
4562
)
4663
ws = workspace_aufklaerung['workspace']
4764
ws.save_mets()
48-
# FIXME: add result assertions (find_files, parsing PAGE etc)
65+
analyse_result(ws)

0 commit comments

Comments
 (0)