5
5
import numpy as np
6
6
from scipy .sparse .csgraph import minimum_spanning_tree
7
7
from shapely .geometry import Polygon , LineString
8
+ from shapely .geometry .polygon import orient
9
+ from shapely import set_precision
8
10
from shapely .ops import unary_union , nearest_points
9
11
10
12
from ocrd import Processor
@@ -120,22 +122,22 @@ def process(self):
120
122
content = to_xml (pcgts ))
121
123
122
124
def _process_segment (self , segment , constituents , page_id ):
123
- """Shrink segment outline to become the minimal convex hull of its constituent segments."""
125
+ """Overwrite segment outline to become the minimal convex hull of its constituent segments."""
124
126
LOG = getLogger ('processor.ProjectHull' )
125
127
polygons = [make_valid (Polygon (polygon_from_points (constituent .get_Coords ().points )))
126
128
for constituent in constituents ]
127
129
polygon = join_polygons (polygons ).buffer (self .parameter ['padding' ]).exterior .coords [:- 1 ]
130
+ # make sure the segment still fits into its parent's parent
128
131
if isinstance (segment , PageType ):
129
- oldborder = segment .Border
130
- segment .Border = None # ensure interim parent is the page frame itself
131
- # make sure the segment still fits into its own parent
132
- polygon2 = polygon_for_parent (polygon , segment )
133
- if polygon2 is None :
132
+ # ensure interim parent is the page frame itself
133
+ parent = PageType (** segment .__dict__ )
134
+ parent .Border = None
135
+ else :
136
+ parent = segment .parent_object_
137
+ polygon = polygon_for_parent (polygon , parent )
138
+ if polygon is None :
134
139
LOG .info ('Ignoring extant segment: %s' , segment .id )
135
- if isinstance (segment , PageType ):
136
- segment .Border = oldborder
137
140
else :
138
- polygon = polygon2
139
141
points = points_from_polygon (polygon )
140
142
coords = CoordsType (points = points )
141
143
LOG .debug ('Using new coordinates from %d constituents for segment "%s"' ,
@@ -152,11 +154,13 @@ def pairwise(iterable):
152
154
153
155
def join_polygons (polygons , scale = 20 ):
154
156
"""construct concave hull (alpha shape) from input polygons by connecting their pairwise nearest points"""
155
- # ensure input polygons are simply typed
156
- polygons = list (itertools .chain .from_iterable ([
157
- poly .geoms if poly .type in ['MultiPolygon' , 'GeometryCollection' ]
158
- else [poly ]
159
- for poly in polygons ]))
157
+ # ensure input polygons are simply typed and all oriented equally
158
+ polygons = [orient (poly )
159
+ for poly in itertools .chain .from_iterable (
160
+ [poly .geoms
161
+ if poly .geom_type in ['MultiPolygon' , 'GeometryCollection' ]
162
+ else [poly ]
163
+ for poly in polygons ])]
160
164
npoly = len (polygons )
161
165
if npoly == 1 :
162
166
return polygons [0 ]
@@ -175,16 +179,18 @@ def join_polygons(polygons, scale=20):
175
179
prevp = polygons [prevp ]
176
180
nextp = polygons [nextp ]
177
181
nearest = nearest_points (prevp , nextp )
178
- bridgep = LineString (nearest ).buffer (max (1 , scale / 5 ), resolution = 1 )
182
+ bridgep = orient ( LineString (nearest ).buffer (max (1 , scale / 5 ), resolution = 1 ), - 1 )
179
183
polygons .append (bridgep )
180
184
jointp = unary_union (polygons )
181
- assert jointp .type == 'Polygon' , jointp .wkt
182
- if jointp .minimum_clearance < 1.0 :
183
- # follow-up calculations will necessarily be integer;
184
- # so anticipate rounding here and then ensure validity
185
- jointp = Polygon (np .round (jointp .exterior .coords ))
186
- jointp = make_valid (jointp )
187
- return jointp
185
+ assert jointp .geom_type == 'Polygon' , jointp .wkt
186
+ # follow-up calculations will necessarily be integer;
187
+ # so anticipate rounding here and then ensure validity
188
+ jointp2 = set_precision (jointp , 1.0 )
189
+ if jointp2 .geom_type != 'Polygon' or not jointp2 .is_valid :
190
+ jointp2 = Polygon (np .round (jointp .exterior .coords ))
191
+ jointp2 = make_valid (jointp2 )
192
+ assert jointp2 .geom_type == 'Polygon' , jointp2 .wkt
193
+ return jointp2
188
194
189
195
def polygon_for_parent (polygon , parent ):
190
196
"""Clip polygon to parent polygon range.
@@ -227,30 +233,38 @@ def make_intersection(poly1, poly2):
227
233
# post-process
228
234
if interp .is_empty or interp .area == 0.0 :
229
235
return None
230
- if interp .type == 'GeometryCollection' :
236
+ if interp .geom_type == 'GeometryCollection' :
231
237
# heterogeneous result: filter zero-area shapes (LineString, Point)
232
238
interp = unary_union ([geom for geom in interp .geoms if geom .area > 0 ])
233
- if interp .type == 'MultiPolygon' :
239
+ if interp .geom_type == 'MultiPolygon' :
234
240
# homogeneous result: construct convex hull to connect
235
241
interp = join_polygons (interp .geoms )
236
- if interp .minimum_clearance < 1.0 :
237
- # follow-up calculations will necessarily be integer;
238
- # so anticipate rounding here and then ensure validity
239
- interp = Polygon (np .round (interp .exterior .coords ))
240
- interp = make_valid (interp )
242
+ # follow-up calculations will necessarily be integer;
243
+ # so anticipate rounding here and then ensure validity
244
+ interp = set_precision (interp , 1.0 )
241
245
return interp
242
246
243
247
def make_valid (polygon ):
248
+ """Ensures shapely.geometry.Polygon object is valid by repeated rearrangement/simplification/enlargement."""
244
249
points = list (polygon .exterior .coords )
250
+ # try by re-arranging points
245
251
for split in range (1 , len (points )):
246
252
if polygon .is_valid or polygon .simplify (polygon .area ).is_valid :
247
253
break
248
254
# simplification may not be possible (at all) due to ordering
249
255
# in that case, try another starting point
250
256
polygon = Polygon (points [- split :]+ points [:- split ])
251
- for tolerance in range (int (polygon .area )):
257
+ # try by simplification
258
+ for tolerance in range (int (polygon .area + 1.5 )):
252
259
if polygon .is_valid :
253
260
break
254
261
# simplification may require a larger tolerance
255
262
polygon = polygon .simplify (tolerance + 1 )
263
+ # try by enlarging
264
+ for tolerance in range (1 , int (polygon .area + 2.5 )):
265
+ if polygon .is_valid :
266
+ break
267
+ # enlargement may require a larger tolerance
268
+ polygon = polygon .buffer (tolerance )
269
+ assert polygon .is_valid , polygon .wkt
256
270
return polygon
0 commit comments