Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Refactor file io #609

Merged
merged 12 commits into from
Feb 4, 2025
178 changes: 104 additions & 74 deletions Cargo.lock

Large diffs are not rendered by default.

19 changes: 4 additions & 15 deletions nemo-cli/src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -35,25 +35,14 @@ use nemo::{
meta::timing::{TimedCode, TimedDisplay},
rule_model::{
self,
components::{
fact::Fact,
import_export::{file_formats::FileFormat, ExportDirective},
tag::Tag,
term::map::Map,
ProgramComponent,
},
components::{fact::Fact, import_export::ExportDirective, tag::Tag, ProgramComponent},
error::ValidationErrorBuilder,
program::Program,
},
};

fn default_export(predicate: Tag) -> ExportDirective {
ExportDirective::new(
predicate,
FileFormat::CSV,
Map::empty_unnamed(),
Default::default(),
)
ExportDirective::new_csv(predicate)
}

/// Set exports according to command-line parameter.
Expand Down Expand Up @@ -291,7 +280,7 @@ fn run(mut cli: CliApp) -> Result<(), CliError> {
ExecutionEngine::initialize(program.clone(), import_manager)?;

for (predicate, handler) in engine.exports() {
export_manager.validate(&predicate, &*handler)?;
export_manager.validate(&predicate, &handler)?;
}

TimedCode::instance().sub("Reading & Preprocessing").stop();
Expand All @@ -313,7 +302,7 @@ fn run(mut cli: CliApp) -> Result<(), CliError> {
for (predicate, handler) in engine.exports() {
stdout_used |= export_manager.export_table(
&predicate,
&*handler,
&handler,
engine.predicate_rows(&predicate)?,
)?;
}
Expand Down
3 changes: 1 addition & 2 deletions nemo-physical/src/columnar/column/rle.rs
Original file line number Diff line number Diff line change
Expand Up @@ -420,7 +420,6 @@ mod test {
use crate::columnar::columnscan::ColumnScan;
use crate::datatypes::{Double, Float, RunLengthEncodable};
use quickcheck_macros::quickcheck;
use std::iter::repeat;
use std::num::NonZeroUsize;
#[cfg(not(miri))]
use test_log::test;
Expand Down Expand Up @@ -458,7 +457,7 @@ mod test {
}

fn get_control_data_with_inc_zero() -> Vec<u8> {
repeat(1).take(1000000).collect()
std::iter::repeat_n(1, 1000000).collect()
}

fn get_test_column_with_inc_zero() -> ColumnRle<u8> {
Expand Down
6 changes: 2 additions & 4 deletions nemo-physical/src/datasources/table_providers.rs
Original file line number Diff line number Diff line change
@@ -1,9 +1,7 @@
//! Module for defining a trait that can be implemented by code that can provide tabular data,
//! such as file readers.

use std::error::Error;

use crate::management::bytesized::ByteSized;
use crate::{error::ReadingError, management::bytesized::ByteSized};

use super::tuple_writer::TupleWriter;

Expand All @@ -14,7 +12,7 @@ pub trait TableProvider: std::fmt::Debug + ByteSized {
fn provide_table_data(
self: Box<Self>,
tuple_writer: &mut TupleWriter,
) -> Result<(), Box<dyn Error>>;
) -> Result<(), ReadingError>;

/// Return the number of columns of this table.
fn arity(&self) -> usize;
Expand Down
4 changes: 2 additions & 2 deletions nemo-physical/src/datatypes/double.rs
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ use num::{
};

use crate::{
error::{Error, ReadingError},
error::{Error, ReadingError, ReadingErrorKind},
function::definitions::numeric::traits::{CheckedPow, CheckedSquareRoot},
};

Expand All @@ -33,7 +33,7 @@ impl Double {
/// Returns an error if `value` is [f32::NAN] or infinite.
pub fn new(value: f64) -> Result<Self, ReadingError> {
if !value.is_finite() {
return Err(ReadingError::InvalidFloat);
return Err(ReadingError::new(ReadingErrorKind::InvalidFloat));
}

Ok(Self(value))
Expand Down
11 changes: 6 additions & 5 deletions nemo-physical/src/datatypes/float.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,14 +12,15 @@ use num::{
One, Zero,
};

use super::{run_length_encodable::FloatingStep, FloorToUsize, RunLengthEncodable};
#[cfg(test)]
use quickcheck::{Arbitrary, Gen};

use crate::{
error::{Error, ReadingError},
error::{Error, ReadingError, ReadingErrorKind},
function::definitions::numeric::traits::{CheckedPow, CheckedSquareRoot},
};

#[cfg(test)]
use quickcheck::{Arbitrary, Gen};
use super::{run_length_encodable::FloatingStep, FloorToUsize, RunLengthEncodable};

/// Wrapper for [f32] that excludes [f32::NAN] and infinite values
#[derive(Copy, Clone, Debug, PartialEq, Default)]
Expand All @@ -32,7 +33,7 @@ impl Float {
/// Returns an error if `value` is [f32::NAN] or infinite.
pub fn new(value: f32) -> Result<Self, ReadingError> {
if !value.is_finite() {
return Err(ReadingError::InvalidFloat);
return Err(ReadingError::new(ReadingErrorKind::InvalidFloat));
}

Ok(Float(value))
Expand Down
4 changes: 2 additions & 2 deletions nemo-physical/src/datavalues/datavalue.rs
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ pub(crate) fn quote_string(s: &str) -> String {
/// Encloses a string in pointy brackets. No other escaping is done, since we assume that the IRI has already
/// been processed to be in a suitable form without inner `<` or `>`.
pub(crate) fn quote_iri(s: &str) -> String {
"<".to_owned() + s + ">"
format!("<{}>", s)
}

/// Enum of different value domains that are distinguished in this code.
Expand Down Expand Up @@ -60,7 +60,7 @@ pub enum ValueDomain {
/// This is a superset of [ValueDomain::NonNegativeLong] and its respective subtypes.
UnsignedLong,
/// Domain of all signed 64bit integer numbers that are not negative: 0…+9223372036854775807, or 0 … +2^63-1.
/// This is a superset of [ValueDomain::Int].
/// This is a superset of [ValueDomain::UnsignedInt].
NonNegativeLong,
/// Domain of all unsigned 32bit integer numbers: 0…+4294967295, or 0 … +2^32-1.
/// This is a superset of [ValueDomain::NonNegativeInt].
Expand Down
2 changes: 1 addition & 1 deletion nemo-physical/src/datavalues/iri_datavalue.rs
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ pub struct IriDataValue(String);
impl IriDataValue {
/// Constructor. It is currently not checked whether the IRI is valid according to applicable specifications
/// and standards -- we just treat it like a string.
pub(crate) fn new(iri: String) -> Self {
pub fn new(iri: String) -> Self {
IriDataValue(iri)
}

Expand Down
2 changes: 1 addition & 1 deletion nemo-physical/src/datavalues/map_datavalue.rs
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ pub struct MapDataValue {
impl MapDataValue {
/// Constructor.
#[allow(dead_code)]
pub(crate) fn new<T: IntoIterator<Item = (AnyDataValue, AnyDataValue)>>(
pub fn new<T: IntoIterator<Item = (AnyDataValue, AnyDataValue)>>(
label: Option<IriDataValue>,
pairs_iter: T,
) -> Self {
Expand Down
2 changes: 1 addition & 1 deletion nemo-physical/src/datavalues/tuple_datavalue.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ pub struct TupleDataValue {
impl TupleDataValue {
/// Constructor.
#[allow(dead_code)]
pub(crate) fn new<T: IntoIterator<Item = AnyDataValue>>(
pub fn new<T: IntoIterator<Item = AnyDataValue>>(
label: Option<IriDataValue>,
values: T,
) -> Self {
Expand Down
94 changes: 74 additions & 20 deletions nemo-physical/src/error.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,15 +4,15 @@ use std::{convert::Infallible, fmt::Display};

use thiserror::Error;

use crate::resource::Resource;
use crate::{datavalues::DataValueCreationError, resource::Resource};

/// Trait that can be used by external libraries extending Nemo to communicate a error during reading
pub trait ExternalReadingError: Display + std::fmt::Debug {}

/// Collection or errors related to reading input tables.
/// The collection of possible causes for a [`ReadingError`]
#[allow(variant_size_differences)]
#[derive(Error, Debug)]
pub enum ReadingError {
pub enum ReadingErrorKind {
/// An external error during reading.
#[error(transparent)]
ExternalError(#[from] Box<dyn std::error::Error>),
Expand All @@ -26,30 +26,20 @@ pub enum ReadingError {
#[error(transparent)]
ParseFloat(#[from] std::num::ParseFloatError),
/// Errors on reading a file
#[error("failed to read \"{filename}\": {error}")]
IoReading {
/// Contains the wrapped error
error: std::io::Error,
/// Filename which caused the error
filename: String,
},
#[error("failed to read file: {0}")]
IoReading(#[from] std::io::Error),
/// Decompression error
#[error("failed to decompress \"{resource}\" with {decompression_format}")]
#[error("failed to decompress with {decompression_format}")]
Decompression {
/// Resource (filename etc.) where the error originated
resource: String,
/// name of decompression method that was used
decompression_format: String,
},
/// Error when a resource could not be provided by any resource provider
#[error("resource at \"{resource}\" was not provided by any resource provider")]
ResourceNotProvided {
/// Resource which was not provided
resource: Resource,
},
#[error("resource was not provided by any resource provider")]
ResourceNotProvided,
/// A provided resource is not a valid local file:// URI
#[error(r#"resource "{0}" is not a valid local file:// URI"#)]
InvalidFileUri(Resource),
#[error(r#"resource is not a valid local file:// URI"#)]
InvalidFileUri,
/// Error in Reqwest's HTTP handler
#[error(transparent)]
HttpTransfer(#[from] reqwest::Error),
Expand All @@ -62,6 +52,70 @@ pub enum ReadingError {
/// Reading error caused by a library which extends nemo
#[error("reading error caused by a external library extending Nemo: {0}")]
ExternalReadingError(Box<dyn ExternalReadingError>),
/// Error during creation of data-values
#[error(transparent)]
DataValueCreation(#[from] DataValueCreationError),
}

/// An error that occurred while reading input tables
#[derive(Debug, Error)]
pub struct ReadingError {
kind: ReadingErrorKind,
resource: Option<Resource>,
predicate: Option<String>,
}

impl ReadingError {
/// Creates a new [`ReadingError`]
pub fn new(kind: ReadingErrorKind) -> Self {
ReadingError {
kind,
resource: None,
predicate: None,
}
}

/// Create a new [`ReadingError`] of kind [`ReadingErrorKind::ExternalError`]
pub fn new_external(error: Box<dyn std::error::Error>) -> Self {
Self::new(error.into())
}

/// Set the resource which was being read while the error occurred
pub fn with_resource(mut self, resource: Resource) -> Self {
self.resource = Some(resource);
self
}

/// Set the predicate which was being processed while the error occurred
pub fn with_predicate(mut self, predicate: String) -> Self {
self.predicate = Some(predicate);
self
}
}

impl<T> From<T> for ReadingError
where
ReadingErrorKind: From<T>,
{
fn from(value: T) -> Self {
ReadingError::new(ReadingErrorKind::from(value))
}
}

impl Display for ReadingError {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
Display::fmt(&self.kind, f)?;

if let Some(resource) = &self.resource {
f.write_fmt(format_args!("\nwhile reading at `{resource}`"))?
}

if let Some(predicate) = &self.predicate {
f.write_fmt(format_args!("\nwhile processing table for `{predicate}`"))?
}

Ok(())
}
}

/// Error-Collection for all the possible Errors occurring in this crate
Expand Down
2 changes: 1 addition & 1 deletion nemo-physical/src/function/definitions/string.rs
Original file line number Diff line number Diff line change
Expand Up @@ -328,7 +328,7 @@ impl BinaryFunction for StringSubstring {
}
}

const REGEX_CACHE_SIZE: NonZero<usize> = unsafe { NonZero::new_unchecked(32) };
const REGEX_CACHE_SIZE: NonZero<usize> = NonZero::new(32).unwrap();
static REGEX_CACHE: OnceCell<Mutex<lru::LruCache<String, regex::Regex>>> = OnceCell::new();

/// Regex string matching
Expand Down
5 changes: 3 additions & 2 deletions nemo-physical/src/management/database/sources.rs
Original file line number Diff line number Diff line change
@@ -1,10 +1,11 @@
//! This module defines possible sources of tables to be stored as [Trie][crate::tabular::trie::Trie]s.

use std::{error::Error, mem::size_of};
use std::mem::size_of;

use crate::{
datasources::{table_providers::TableProvider, tuple_writer::TupleWriter},
datavalues::AnyDataValue,
error::ReadingError,
management::bytesized::ByteSized,
};

Expand Down Expand Up @@ -43,7 +44,7 @@ impl TableProvider for SimpleTable {
fn provide_table_data(
self: Box<Self>,
tuple_writer: &mut TupleWriter,
) -> Result<(), Box<dyn Error>> {
) -> Result<(), ReadingError> {
self.write_tuples(tuple_writer);
Ok(())
}
Expand Down
8 changes: 2 additions & 6 deletions nemo-physical/src/management/database/storage.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,7 @@
use std::cell::RefCell;

use crate::{
datasources::tuple_writer::TupleWriter,
error::{Error, ReadingError},
management::bytesized::ByteSized,
datasources::tuple_writer::TupleWriter, error::Error, management::bytesized::ByteSized,
tabular::trie::Trie,
};

Expand Down Expand Up @@ -44,9 +42,7 @@ impl TableStorage {
log::info!("Loading source {source:?}");
debug_assert!(source.arity() == arity);

source
.provide_table_data(&mut tuple_writer)
.map_err(ReadingError::ExternalError)?;
source.provide_table_data(&mut tuple_writer)?;
}

Ok(Trie::from_tuple_writer(tuple_writer))
Expand Down
3 changes: 1 addition & 2 deletions nemo-physical/src/util/mapping/permutation.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@ use std::{
collections::HashMap,
fmt::Display,
hash::{Hash, Hasher},
iter::repeat,
};

use super::traits::NatMapping;
Expand All @@ -17,7 +16,7 @@ pub struct Permutation {
}

fn invert(input: &[usize]) -> Box<[usize]> {
let mut result: Box<[usize]> = repeat(0).take(input.len()).collect();
let mut result: Box<[usize]> = std::iter::repeat_n(0, input.len()).collect();

for (k, &v) in input.iter().enumerate() {
result[v] = k;
Expand Down
2 changes: 1 addition & 1 deletion nemo-python/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -460,7 +460,7 @@ impl NemoEngine {
.0
.export_table(
&tag,
&*export_handler,
&export_handler,
self.engine.predicate_rows(&tag).py_res()?,
)
.py_res()?;
Expand Down
Loading