Skip to content

Commit

Permalink
Implement geometry traits on geos objects and simplify geos conversio…
Browse files Browse the repository at this point in the history
…ns (#318)

Closes #317
  • Loading branch information
kylebarron authored Dec 15, 2023
1 parent ce42fd1 commit 153f086
Show file tree
Hide file tree
Showing 13 changed files with 245 additions and 459 deletions.
4 changes: 2 additions & 2 deletions src/algorithm/geos/buffer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,9 +15,9 @@ impl Buffer for PointArray {
fn buffer(&self, width: f64, quadsegs: i32) -> Result<Self::Output> {
// NOTE: the bumpalo allocator didn't appear to make any perf difference with geos :shrug:
// Presumably GEOS is allocating on its own before we can put the geometry in the Bump?
let bump = bumpalo::Bump::new();
// let bump = bumpalo::Bump::new();

let mut geos_geoms = bumpalo::collections::Vec::with_capacity_in(self.len(), &bump);
let mut geos_geoms = Vec::with_capacity(self.len());

for maybe_g in self.iter_geos() {
if let Some(g) = maybe_g {
Expand Down
73 changes: 1 addition & 72 deletions src/io/geos/array/linestring.rs
Original file line number Diff line number Diff line change
@@ -1,90 +1,19 @@
use arrow_array::OffsetSizeTrait;

use crate::array::linestring::LineStringCapacity;
use crate::array::{LineStringArray, LineStringBuilder};
use crate::error::{GeoArrowError, Result};
use crate::geo_traits::LineStringTrait;
use crate::io::geos::scalar::GEOSLineString;

// NOTE: this, `first_pass`, and `second_pass` are copied from their main implementations, because
// implementing geometry access traits on GEOS geometries that yield ConstGeometry objects with two
// lifetimes seemed really, really hard. Ideally one day we can unify the two branches!

impl<O: OffsetSizeTrait> LineStringBuilder<O> {
/// Add a new GEOS LineString to the end of this array.
///
/// # Errors
///
/// This function errors iff the new last item is larger than what O supports.
#[allow(dead_code)]
fn push_geos_line_string(&mut self, value: Option<&GEOSLineString>) -> Result<()> {
if let Some(line_string) = value {
// For each ring:
// - Get ring
// - Add ring's # of coords to self.ring_offsets
// - Push ring's coords to self.coords

self.geom_offsets
.try_push_usize(line_string.num_coords())
.unwrap();

for coord_idx in 0..line_string.num_coords() {
let coord = line_string.coord(coord_idx).unwrap();
self.coords.push_coord(&coord);
}

self.validity.append(true);
} else {
self.push_null();
}
Ok(())
}
}

pub(crate) fn first_pass(geoms: &[Option<GEOSLineString>], geoms_length: usize) -> (usize, usize) {
let mut coord_capacity = 0;
let geom_capacity = geoms_length;

for line_string in geoms.iter().flatten() {
coord_capacity += line_string.num_coords();
}

(coord_capacity, geom_capacity)
}

pub(crate) fn second_pass<'a, O: OffsetSizeTrait>(
geoms: impl Iterator<Item = Option<GEOSLineString<'a>>>,
coord_capacity: usize,
geom_capacity: usize,
) -> LineStringBuilder<O> {
let capacity = LineStringCapacity::new(coord_capacity, geom_capacity);
let mut array = LineStringBuilder::with_capacity(capacity);

geoms
.into_iter()
.try_for_each(|maybe_multi_point| array.push_line_string(maybe_multi_point.as_ref()))
.unwrap();

array
}

impl<O: OffsetSizeTrait> TryFrom<Vec<Option<geos::Geometry<'_>>>> for LineStringBuilder<O> {
type Error = GeoArrowError;

fn try_from(value: Vec<Option<geos::Geometry<'_>>>) -> Result<Self> {
let length = value.len();
// TODO: don't use new_unchecked
let geos_objects: Vec<Option<GEOSLineString>> = value
.into_iter()
.map(|geom| geom.map(GEOSLineString::new_unchecked))
.collect();

let (coord_capacity, geom_capacity) = first_pass(&geos_objects, length);
Ok(second_pass(
geos_objects.into_iter(),
coord_capacity,
geom_capacity,
))
Ok(geos_objects.into())
}
}

Expand Down
64 changes: 2 additions & 62 deletions src/io/geos/array/multipoint.rs
Original file line number Diff line number Diff line change
@@ -1,79 +1,19 @@
use arrow_array::OffsetSizeTrait;
use geos::Geom;

use crate::array::multipoint::MultiPointCapacity;
use crate::array::{MultiPointArray, MultiPointBuilder};
use crate::error::{GeoArrowError, Result};
use crate::error::GeoArrowError;
use crate::io::geos::scalar::GEOSMultiPoint;

// NOTE: this, `first_pass`, and `second_pass` are copied from their main implementations, because
// implementing geometry access traits on GEOS geometries that yield ConstGeometry objects with two
// lifetimes seemed really, really hard. Ideally one day we can unify the two branches!

impl<O: OffsetSizeTrait> MultiPointBuilder<O> {
/// Push a GEOS multi point
fn push_geos_multi_point(&mut self, value: Option<&GEOSMultiPoint>) -> Result<()> {
if let Some(multi_point) = value {
let num_points = multi_point.num_points();
for point_idx in 0..num_points {
let point = multi_point.0.get_geometry_n(point_idx).unwrap();
let x = point.get_x()?;
let y = point.get_y()?;
unsafe {
self.push_xy(x, y)?;
}
}
self.try_push_length(num_points)?;
} else {
self.push_null();
}
Ok(())
}
}

fn first_pass(geoms: &[Option<GEOSMultiPoint>], geoms_length: usize) -> (usize, usize) {
let mut coord_capacity = 0;
let geom_capacity = geoms_length;

for multi_point in geoms.iter().flatten() {
coord_capacity += multi_point.num_points();
}

(coord_capacity, geom_capacity)
}

fn second_pass<'a, O: OffsetSizeTrait>(
geoms: impl Iterator<Item = Option<GEOSMultiPoint<'a>>>,
coord_capacity: usize,
geom_capacity: usize,
) -> MultiPointBuilder<O> {
let capacity = MultiPointCapacity::new(coord_capacity, geom_capacity);
let mut array = MultiPointBuilder::with_capacity(capacity);

geoms
.into_iter()
.try_for_each(|maybe_multi_point| array.push_geos_multi_point(maybe_multi_point.as_ref()))
.unwrap();

array
}

impl<'a, O: OffsetSizeTrait> TryFrom<Vec<Option<geos::Geometry<'a>>>> for MultiPointBuilder<O> {
type Error = GeoArrowError;

fn try_from(value: Vec<Option<geos::Geometry<'a>>>) -> std::result::Result<Self, Self::Error> {
let length = value.len();
// TODO: don't use new_unchecked
let geos_objects: Vec<Option<GEOSMultiPoint>> = value
.into_iter()
.map(|geom| geom.map(GEOSMultiPoint::new_unchecked))
.collect();
let (coord_capacity, geom_capacity) = first_pass(&geos_objects, length);
Ok(second_pass(
geos_objects.into_iter(),
coord_capacity,
geom_capacity,
))
Ok(geos_objects.into())
}
}

Expand Down
195 changes: 2 additions & 193 deletions src/io/geos/array/multipolygon.rs
Original file line number Diff line number Diff line change
@@ -1,210 +1,19 @@
use arrow_array::OffsetSizeTrait;

use crate::array::multipolygon::MultiPolygonCapacity;
use crate::array::{MultiPolygonArray, MultiPolygonBuilder};
use crate::error::{GeoArrowError, Result};
use crate::io::geos::scalar::{GEOSConstPolygon, GEOSMultiPolygon, GEOSPolygon};
use geos::Geom;

// NOTE: this, `first_pass`, and `second_pass` are copied from their main implementations, because
// implementing geometry access traits on GEOS geometries that yield ConstGeometry objects with two
// lifetimes seemed really, really hard. Ideally one day we can unify the two branches!

impl<O: OffsetSizeTrait> MultiPolygonBuilder<O> {
/// Add a new GEOS Polygon to the end of this array.
///
/// # Errors
///
/// This function errors iff the new last item is larger than what O supports.
#[allow(dead_code)]
fn push_geos_polygon(&mut self, value: Option<&GEOSPolygon>) -> Result<()> {
if let Some(polygon) = value {
let exterior_ring = polygon.exterior();
if exterior_ring.is_none() {
self.push_empty();
return Ok(());
}

// Total number of polygons in this MultiPolygon
let num_polygons = 1;
self.geom_offsets.try_push_usize(num_polygons).unwrap();

let ext_ring = polygon.exterior().unwrap();
let coord_seq = ext_ring.0.get_coord_seq()?;
for coord_idx in 0..ext_ring.num_coords() {
self.coords
.push_xy(coord_seq.get_x(coord_idx)?, coord_seq.get_y(coord_idx)?);
}

// Total number of rings in this Multipolygon
self.polygon_offsets
.try_push_usize(polygon.num_interiors() + 1)
.unwrap();

// Number of coords for each ring
self.ring_offsets
.try_push_usize(ext_ring.num_coords())
.unwrap();

for int_ring_idx in 0..polygon.num_interiors() {
let int_ring = polygon.interior(int_ring_idx).unwrap();
self.ring_offsets
.try_push_usize(int_ring.num_coords())
.unwrap();
let coord_seq = int_ring.0.get_coord_seq()?;

for coord_idx in 0..int_ring.num_coords() {
self.coords
.push_xy(coord_seq.get_x(coord_idx)?, coord_seq.get_y(coord_idx)?);
}
}
} else {
self.push_null();
};
Ok(())
}

/// Add a new GEOS MultiPolygon to the end of this array.
///
/// # Errors
///
/// This function errors iff the new last item is larger than what O supports.
fn push_geos_multi_polygon(&mut self, value: Option<&GEOSMultiPolygon>) -> Result<()> {
if let Some(multi_polygon) = value {
// Total number of polygons in this MultiPolygon
let num_polygons = multi_polygon.num_polygons();
self.geom_offsets.try_push_usize(num_polygons).unwrap();

// Iterate over polygons
for polygon_idx in 0..num_polygons {
let polygon = multi_polygon.polygon(polygon_idx).unwrap();

// Here we unwrap the exterior ring because a polygon inside a multi polygon should
// never be empty.
let ext_ring = polygon.exterior().unwrap();
let coord_seq = ext_ring.0.get_coord_seq()?;
for coord_idx in 0..ext_ring.num_coords() {
self.coords
.push_xy(coord_seq.get_x(coord_idx)?, coord_seq.get_y(coord_idx)?);
}

// Total number of rings in this Multipolygon
self.polygon_offsets
.try_push_usize(polygon.num_interiors() + 1)
.unwrap();

// Number of coords for each ring
self.ring_offsets
.try_push_usize(ext_ring.num_coords())
.unwrap();

for int_ring_idx in 0..polygon.num_interiors() {
let int_ring = polygon.interior(int_ring_idx).unwrap();
self.ring_offsets
.try_push_usize(int_ring.num_coords())
.unwrap();
let coord_seq = int_ring.0.get_coord_seq()?;

for coord_idx in 0..int_ring.num_coords() {
self.coords
.push_xy(coord_seq.get_x(coord_idx)?, coord_seq.get_y(coord_idx)?);
}
}
}
} else {
self.push_null();
};
Ok(())
}
}

fn first_pass(
geoms: &[Option<GEOSMultiPolygon>],
geoms_length: usize,
) -> (usize, usize, usize, usize) {
let mut coord_capacity = 0;
let mut ring_capacity = 0;
let mut polygon_capacity = 0;
let geom_capacity = geoms_length;

for multi_polygon in geoms.iter().flatten() {
// Total number of polygons in this MultiPolygon
let num_polygons = multi_polygon.num_polygons();
polygon_capacity += num_polygons;

for polygon_idx in 0..num_polygons {
let polygon = GEOSConstPolygon::new_unchecked(
multi_polygon.0.get_geometry_n(polygon_idx).unwrap(),
);

// Total number of rings in this MultiPolygon
ring_capacity += polygon.num_interiors() + 1;

// Number of coords for each ring
if let Some(exterior) = polygon.exterior() {
coord_capacity += exterior.num_coords();
}

for int_ring_idx in 0..polygon.num_interiors() {
let int_ring = polygon.interior(int_ring_idx).unwrap();
coord_capacity += int_ring.num_coords();
}
}
}

(
coord_capacity,
ring_capacity,
polygon_capacity,
geom_capacity,
)
}

fn second_pass<'a, O: OffsetSizeTrait>(
geoms: impl Iterator<Item = Option<GEOSMultiPolygon<'a>>>,
coord_capacity: usize,
ring_capacity: usize,
polygon_capacity: usize,
geom_capacity: usize,
) -> MultiPolygonBuilder<O> {
let capacity = MultiPolygonCapacity::new(
coord_capacity,
ring_capacity,
polygon_capacity,
geom_capacity,
);
let mut array = MultiPolygonBuilder::with_capacity(capacity);

geoms
.into_iter()
.try_for_each(|maybe_multi_polygon| {
array.push_geos_multi_polygon(maybe_multi_polygon.as_ref())
})
.unwrap();

array
}
use crate::io::geos::scalar::GEOSMultiPolygon;

impl<O: OffsetSizeTrait> TryFrom<Vec<Option<geos::Geometry<'_>>>> for MultiPolygonBuilder<O> {
type Error = GeoArrowError;

fn try_from(value: Vec<Option<geos::Geometry<'_>>>) -> Result<Self> {
let length = value.len();
// TODO: don't use new_unchecked
let geos_objects: Vec<Option<GEOSMultiPolygon>> = value
.into_iter()
.map(|geom| geom.map(GEOSMultiPolygon::new_unchecked))
.collect();

let (coord_capacity, ring_capacity, polygon_capacity, geom_capacity) =
first_pass(&geos_objects, length);
Ok(second_pass(
geos_objects.into_iter(),
coord_capacity,
ring_capacity,
polygon_capacity,
geom_capacity,
))
Ok(geos_objects.into())
}
}

Expand Down
Loading

0 comments on commit 153f086

Please sign in to comment.