1
1
from __future__ import absolute_import
2
- import os
3
2
from os .path import join
4
3
from typing import Optional
5
4
6
- from ocrd_models .ocrd_process_result import OcrdProcessResult
5
+ from ocrd .processor .base import OcrdPageResult
6
+ from ocrd .processor .ocrd_page_result import OcrdPageResultImage
7
+
7
8
import kraken .binarization
8
9
from ocrd import Processor
9
10
from ocrd_utils import assert_file_grp_cardinality , getLogger , make_file_id , MIMETYPE_PAGE
@@ -22,7 +23,7 @@ def executable(self):
22
23
def setup (self ):
23
24
self .logger = getLogger ('processor.KrakenBinarize' )
24
25
25
- def process_page_pcgts (self , * input_pcgts : OcrdPage , output_file_id : Optional [str ] = None , page_id : Optional [str ] = None ) -> OcrdProcessResult :
26
+ def process_page_pcgts (self , * input_pcgts : OcrdPage , output_file_id : Optional [str ] = None , page_id : Optional [str ] = None ) -> OcrdPageResult :
26
27
"""Binarize the pages/regions/lines with Kraken.
27
28
28
29
Iterate over the input PAGE element hierarchy down to the requested
@@ -50,14 +51,14 @@ def process_page_pcgts(self, *input_pcgts: OcrdPage, output_file_id: Optional[st
50
51
assert page
51
52
page_image , page_xywh , _ = self .workspace .image_from_page (
52
53
page , page_id , feature_filter = 'binarized' )
53
- images = []
54
+ result = OcrdPageResult ( pcgts )
54
55
if self .parameter ['level-of-operation' ] == 'page' :
55
56
self .logger .info ("Binarizing page '%s'" , page_id )
56
57
bin_image = kraken .binarization .nlbin (page_image )
57
58
bin_image_id = f'{ output_file_id } .IMG-BIN'
58
59
bin_image_path = join (self .output_file_grp , f'{ bin_image_id } .png' )
59
60
page .add_AlternativeImage (AlternativeImageType (filename = bin_image_path , comments = f'{ page_xywh ["features" ]} ,binarized' ))
60
- images .append ((bin_image , bin_image_id , bin_image_path ))
61
+ result . images .append (OcrdPageResultImage (bin_image , bin_image_id , bin_image_path ))
61
62
else :
62
63
for region in page .get_AllRegions (classes = ['Text' ]):
63
64
region_image , region_xywh = self .workspace .image_from_segment (
@@ -68,7 +69,7 @@ def process_page_pcgts(self, *input_pcgts: OcrdPage, output_file_id: Optional[st
68
69
bin_image_id = f'{ output_file_id } _{ region .id } .IMG-BIN'
69
70
bin_image_path = join (self .output_file_grp , f'{ bin_image_id } .png' )
70
71
region .add_AlternativeImage (AlternativeImageType (filename = bin_image_path , comments = f'{ region_xywh ["features" ]} ,binarized' ))
71
- images .append ((bin_image , bin_image_id , bin_image_path ))
72
+ result . images .append (OcrdPageResultImage (bin_image , bin_image_id , bin_image_path ))
72
73
else :
73
74
for line in region .get_TextLine ():
74
75
line_image , line_xywh = self .workspace .image_from_segment (
@@ -78,5 +79,5 @@ def process_page_pcgts(self, *input_pcgts: OcrdPage, output_file_id: Optional[st
78
79
bin_image_id = f'{ output_file_id } _{ region .id } _{ line .id } .IMG-BIN'
79
80
bin_image_path = join (self .output_file_grp , f'{ bin_image_id } .png' )
80
81
line .add_AlternativeImage (AlternativeImageType (filename = bin_image_path , comments = f'{ page_xywh ["features" ]} ,binarized' ))
81
- images .append ((bin_image , bin_image_id , bin_image_path ))
82
- return OcrdProcessResult ( pcgts , images )
82
+ result . images .append (OcrdPageResultImage (bin_image , bin_image_id , bin_image_path ))
83
+ return result
0 commit comments