@@ -23,7 +23,7 @@ def executable(self):
23
23
def setup (self ):
24
24
self .logger = getLogger ('processor.KrakenBinarize' )
25
25
26
- def process_page_pcgts (self , * input_pcgts : OcrdPage , output_file_id : Optional [str ] = None , page_id : Optional [str ] = None ) -> OcrdPageResult :
26
+ def process_page_pcgts (self , * input_pcgts : Optional [OcrdPage ] , page_id : Optional [str ] = None ) -> OcrdPageResult :
27
27
"""Binarize the pages/regions/lines with Kraken.
28
28
29
29
Iterate over the input PAGE element hierarchy down to the requested
@@ -47,37 +47,32 @@ def process_page_pcgts(self, *input_pcgts: OcrdPage, output_file_id: Optional[st
47
47
self .logger .debug ('Level of operation: "%s"' , self .parameter ['level-of-operation' ])
48
48
49
49
pcgts = input_pcgts [0 ]
50
+ assert pcgts
50
51
page = pcgts .get_Page ()
51
52
assert page
52
53
page_image , page_xywh , _ = self .workspace .image_from_page (
53
54
page , page_id , feature_filter = 'binarized' )
54
55
result = OcrdPageResult (pcgts )
55
56
if self .parameter ['level-of-operation' ] == 'page' :
56
57
self .logger .info ("Binarizing page '%s'" , page_id )
57
- bin_image = kraken .binarization .nlbin (page_image )
58
- bin_image_id = f'{ output_file_id } .IMG-BIN'
59
- bin_image_path = join (self .output_file_grp , f'{ bin_image_id } .png' )
60
- page .add_AlternativeImage (AlternativeImageType (filename = bin_image_path , comments = f'{ page_xywh ["features" ]} ,binarized' ))
61
- result .images .append (OcrdPageResultImage (bin_image , bin_image_id , bin_image_path ))
58
+ alternative_image = AlternativeImageType (comments = f'{ page_xywh ["features" ]} ,binarized' )
59
+ page .add_AlternativeImage (alternative_image )
60
+ result .images .append (OcrdPageResultImage (kraken .binarization .nlbin (page_image ), '.IMG-BIN' , alternative_image ))
62
61
else :
63
62
for region in page .get_AllRegions (classes = ['Text' ]):
64
63
region_image , region_xywh = self .workspace .image_from_segment (
65
64
region , page_image , page_xywh , feature_filter = 'binarized' )
66
65
if self .parameter ['level-of-operation' ] == 'region' :
67
66
self .logger .info ("Binarizing region '%s'" , region .id )
68
- bin_image = kraken .binarization .nlbin (region_image )
69
- bin_image_id = f'{ output_file_id } _{ region .id } .IMG-BIN'
70
- bin_image_path = join (self .output_file_grp , f'{ bin_image_id } .png' )
71
- region .add_AlternativeImage (AlternativeImageType (filename = bin_image_path , comments = f'{ region_xywh ["features" ]} ,binarized' ))
72
- result .images .append (OcrdPageResultImage (bin_image , bin_image_id , bin_image_path ))
67
+ alternative_image = AlternativeImageType (comments = f'{ region_xywh ["features" ]} ,binarized' )
68
+ region .add_AlternativeImage (alternative_image )
69
+ result .images .append (OcrdPageResultImage (kraken .binarization .nlbin (region_image ), f'{ region .id } .IMG-BIN' , alternative_image ))
73
70
else :
74
71
for line in region .get_TextLine ():
75
72
line_image , line_xywh = self .workspace .image_from_segment (
76
73
line , region_image , region_xywh , feature_filter = 'binarized' )
77
74
self .logger .info ("Binarizing line '%s'" , line .id )
78
- bin_image = kraken .binarization .nlbin (line_image )
79
- bin_image_id = f'{ output_file_id } _{ region .id } _{ line .id } .IMG-BIN'
80
- bin_image_path = join (self .output_file_grp , f'{ bin_image_id } .png' )
81
- line .add_AlternativeImage (AlternativeImageType (filename = bin_image_path , comments = f'{ page_xywh ["features" ]} ,binarized' ))
82
- result .images .append (OcrdPageResultImage (bin_image , bin_image_id , bin_image_path ))
75
+ alternative_image = AlternativeImageType (comments = f'{ line_xywh ["features" ]} ,binarized' )
76
+ line .add_AlternativeImage (alternative_image )
77
+ result .images .append (OcrdPageResultImage (kraken .binarization .nlbin (line_image ), f'{ region .id } _{ line .id } .IMG-BIN' , alternative_image ))
83
78
return result
0 commit comments