Skip to content

Commit 3284811

Browse files
authored
NQuads, TriG: Support writing to the default graph (#615)
2 parents ff6df68 + b3d7eab commit 3284811

File tree

7 files changed

+75
-152
lines changed

7 files changed

+75
-152
lines changed

.gitignore

+3
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,9 @@ flycheck_*.el
2626
/nemo/test-files
2727
/nemo-benches/test-files
2828

29+
# default output directory
30+
/results
31+
2932
# nix build
3033
/result
3134

CONTRIBUTING.md

+2-2
Original file line numberDiff line numberDiff line change
@@ -72,12 +72,12 @@ Add `quickcheck` tests whenever it is applicable.
7272

7373
### Integration testing
7474

75-
Integration testing is done in the related `tests` directory on the top-level of this crate.
75+
Integration testing is done in the related `tests` directory on the top-level of the `nemo-cli` crate.
7676

7777
## Coding conventions
7878

7979
Start reading our code and you'll get the hang of it. Code format and essential coding guidelines are already ensured
80-
by our use of `rstufmt` and `clippy` (as mentioned above). Some further conventions are listed below.
80+
by our use of `rustfmt` and `clippy` (as mentioned above). Some further conventions are listed below.
8181

8282
* We try to reduce redundancies in enumeration-variant names.
8383
* We try to use the `where` clause over embedded clauses for better readability

Cargo.toml

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
[workspace]
22
resolver = "2"
33
default-members = [
4-
"nemo",
4+
"nemo",
55
"nemo-cli",
66
"nemo-physical",
77
"nemo-python",

nemo-language-server/Cargo.toml

-1
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,6 @@ homepage.workspace = true
88
license.workspace = true
99
readme = "README.md"
1010
repository.workspace = true
11-
default-run = "nemo-language-server"
1211

1312
[[bin]]
1413
name = "nemo-language-server"

nemo/src/io/formats/rdf.rs

+7
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,13 @@ use crate::{
4444
use super::FileFormatMeta;
4545
use super::{ExportHandler, FormatBuilder, ImportHandler, TableWriter};
4646

47+
/// IRI to be used for the default graph used by Nemo when loading RDF data with
48+
/// named graphs (quads).
49+
///
50+
/// SPARQL 1.1 has failed to provide any standard identifier for this purpose.
51+
/// If future SPARQL or RDF versions are adding this, we could align accordingly.
52+
const DEFAULT_GRAPH_IRI: &str = "tag:nemo:defaultgraph";
53+
4754
/// The different supported variants of the RDF format.
4855
#[derive(Assoc, Debug, Clone, Copy, PartialEq, Eq, VariantArray)]
4956
#[func(pub fn media_type(&self) -> &'static str)]

nemo/src/io/formats/rdf/reader.rs

+8-120
Original file line numberDiff line numberDiff line change
@@ -27,16 +27,9 @@ use crate::io::formats::PROGRESS_NOTIFY_INCREMENT;
2727
use super::{
2828
error::RdfFormatError,
2929
value_format::{RdfValueFormat, RdfValueFormats},
30-
RdfVariant,
30+
RdfVariant, DEFAULT_GRAPH_IRI,
3131
};
3232

33-
/// IRI to be used for the default graph used by Nemo when loading RDF data with
34-
/// named graphs (quads).
35-
///
36-
/// SPARQL 1.1 has failed to provide any standard identifier for this purpose.
37-
/// If future SPARQL or RDF versions are adding this, we could align accordingly.
38-
const DEFAULT_GRAPH: &str = "tag:nemo:defaultgraph";
39-
4033
/// A [TableProvider] for RDF 1.1 files containing triples.
4134
pub(super) struct RdfReader {
4235
/// Buffer from which content is read
@@ -157,7 +150,7 @@ impl RdfReader {
157150
value: Option<GraphName<'_>>,
158151
) -> Result<AnyDataValue, RdfFormatError> {
159152
match value {
160-
None => Ok(AnyDataValue::new_iri(DEFAULT_GRAPH.to_string())),
153+
None => Ok(AnyDataValue::new_iri(DEFAULT_GRAPH_IRI.to_string())),
161154
Some(GraphName::NamedNode(nn)) => Ok(Self::datavalue_from_named_node(nn)),
162155
Some(GraphName::BlankNode(bn)) => {
163156
Ok(Self::datavalue_from_blank_node(bnode_map, tuple_writer, bn))
@@ -368,7 +361,7 @@ impl ByteSized for RdfReader {
368361

369362
#[cfg(test)]
370363
mod test {
371-
use super::{RdfReader, DEFAULT_GRAPH};
364+
use super::{RdfReader, DEFAULT_GRAPH_IRI};
372365
use std::cell::RefCell;
373366

374367
use nemo_physical::{
@@ -454,7 +447,7 @@ mod test {
454447
let dict = RefCell::new(Dict::default());
455448
let mut tuple_writer = TupleWriter::new(&dict, 3);
456449
let mut null_map = NullMap::default();
457-
let graph_dv = AnyDataValue::new_iri(DEFAULT_GRAPH.to_string());
450+
let graph_dv = AnyDataValue::new_iri(DEFAULT_GRAPH_IRI.to_string());
458451

459452
// check that we use our own default graph IRI
460453
assert_eq!(
@@ -463,115 +456,10 @@ mod test {
463456
);
464457
// check that our default graph is a valid IRI in the first place
465458
assert_eq!(
466-
Iri::parse(DEFAULT_GRAPH.to_string()).unwrap().to_string(),
467-
DEFAULT_GRAPH.to_string()
459+
Iri::parse(DEFAULT_GRAPH_IRI.to_string())
460+
.unwrap()
461+
.to_string(),
462+
DEFAULT_GRAPH_IRI.to_string()
468463
);
469464
}
470-
471-
// #[test]
472-
// fn example_1() {
473-
// macro_rules! parse_example_with_rdf_parser {
474-
// ($data:tt, $make_parser:expr) => {
475-
// let $data = r#"<http://one.example/subject1> <http://one.example/predicate1> <http://one.example/object1> . # comments here
476-
// # or on a line by themselves
477-
// _:subject1 <http://an.example/predicate1> "object1" .
478-
// _:subject2 <http://an.example/predicate2> "object2" .
479-
// "#.as_bytes();
480-
481-
// let dict = RefCell::new(Dict::default());
482-
// let mut builders = vec![
483-
// PhysicalBuilderProxyEnum::String(PhysicalStringColumnBuilderProxy::new(&dict)),
484-
// PhysicalBuilderProxyEnum::String(PhysicalStringColumnBuilderProxy::new(&dict)),
485-
// PhysicalBuilderProxyEnum::String(PhysicalStringColumnBuilderProxy::new(&dict)),
486-
// ];
487-
// let reader = RDFReader::new(ResourceProviders::empty(), String::new(), None, vec![PrimitiveType::Any, PrimitiveType::Any, PrimitiveType::Any]);
488-
489-
// let result = reader.read_triples_with_parser(&mut builders, $make_parser);
490-
// assert!(result.is_ok());
491-
492-
// let columns = builders
493-
// .into_iter()
494-
// .map(|builder| match builder {
495-
// PhysicalBuilderProxyEnum::String(b) => b.finalize(),
496-
// _ => unreachable!("only string columns here"),
497-
// })
498-
// .collect::<Vec<_>>();
499-
500-
// log::debug!("columns: {columns:?}");
501-
// let triples = (0..=2)
502-
// .map(|idx| {
503-
// columns
504-
// .iter()
505-
// .map(|column| {
506-
// column
507-
// .get(idx)
508-
// .and_then(|value| value.try_into().ok())
509-
// .and_then(|u64: u64| usize::try_from(u64).ok())
510-
// .and_then(|usize| dict.borrow_mut().get(usize))
511-
// .unwrap()
512-
// })
513-
// .map(PhysicalString::from)
514-
// .collect::<Vec<_>>()
515-
// })
516-
// .collect::<Vec<_>>();
517-
// log::debug!("triple: {triples:?}");
518-
// for (value, expected) in PrimitiveType::Any.serialize_output(DataValueIteratorT::String(Box::new(triples[0].iter().cloned()))).zip(vec!["http://one.example/subject1", "http://one.example/predicate1", "http://one.example/object1"]) {
519-
// assert_eq!(value, expected);
520-
// }
521-
// for (value, expected) in PrimitiveType::Any.serialize_output(DataValueIteratorT::String(Box::new(triples[1].iter().cloned()))).zip(vec!["_:subject1", "http://an.example/predicate1", r#""object1""#]) {
522-
// assert_eq!(value, expected);
523-
// }
524-
// for (value, expected) in PrimitiveType::Any.serialize_output(DataValueIteratorT::String(Box::new(triples[2].iter().cloned()))).zip(vec!["_:subject2", "http://an.example/predicate2", r#""object2""#]) {
525-
// assert_eq!(value, expected);
526-
// }
527-
// };
528-
// }
529-
530-
// parse_example_with_rdf_parser!(reader, || NTriplesParser::new(reader));
531-
// parse_example_with_rdf_parser!(reader, || TurtleParser::new(reader, None));
532-
// }
533-
534-
// #[test]
535-
// fn rollback() {
536-
// let data = r#"<http://example.org/> <http://example.org/> <http://example.org/> .
537-
// malformed <http://example.org/> <http://example.org/>
538-
// <http://example.org/> malformed <http://example.org/> .
539-
// <http://example.org/> <http://example.org/> malformed .
540-
// <http://example.org/> <http://example.org/> "123"^^<http://www.w3.org/2001/XMLSchema#integer> .
541-
// <http://example.org/> <http://example.org/> "123.45"^^<http://www.w3.org/2001/XMLSchema#integer> .
542-
// <http://example.org/> <http://example.org/> "123.45"^^<http://www.w3.org/2001/XMLSchema#decimal> .
543-
// <http://example.org/> <http://example.org/> "123.45a"^^<http://www.w3.org/2001/XMLSchema#decimal> .
544-
// <https://example.org/> <https://example.org/> <https://example.org/> .
545-
// "#
546-
// .as_bytes();
547-
548-
// let dict = RefCell::new(Dict::default());
549-
// let mut builders = vec![
550-
// PhysicalBuilderProxyEnum::String(PhysicalStringColumnBuilderProxy::new(&dict)),
551-
// PhysicalBuilderProxyEnum::String(PhysicalStringColumnBuilderProxy::new(&dict)),
552-
// PhysicalBuilderProxyEnum::String(PhysicalStringColumnBuilderProxy::new(&dict)),
553-
// ];
554-
// let reader = RDFReader::new(
555-
// ResourceProviders::empty(),
556-
// String::new(),
557-
// None,
558-
// vec![PrimitiveType::Any, PrimitiveType::Any, PrimitiveType::Any],
559-
// );
560-
561-
// let result = reader.read_triples_with_parser(&mut builders, || NTriplesParser::new(data));
562-
// assert!(result.is_ok());
563-
564-
// let columns = builders
565-
// .into_iter()
566-
// .map(|builder| match builder {
567-
// PhysicalBuilderProxyEnum::String(b) => b.finalize(),
568-
// _ => unreachable!("only string columns here"),
569-
// })
570-
// .collect::<Vec<_>>();
571-
572-
// assert_eq!(columns.len(), 3);
573-
// assert_eq!(columns[0].len(), 4);
574-
// assert_eq!(columns[1].len(), 4);
575-
// assert_eq!(columns[2].len(), 4);
576-
// }
577465
}

nemo/src/io/formats/rdf/writer.rs

+54-28
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@ use crate::{
1616

1717
use super::{
1818
value_format::{RdfValueFormat, RdfValueFormats},
19-
RdfVariant,
19+
RdfVariant, DEFAULT_GRAPH_IRI,
2020
};
2121

2222
/// Private struct to record the type of an RDF term that
@@ -31,13 +31,43 @@ enum RdfTermType {
3131
SimpleStringLiteral,
3232
}
3333

34+
#[derive(Debug, Default)]
35+
enum QuadGraphName {
36+
#[default]
37+
DefaultGraph,
38+
NamedNode(String),
39+
BlankNode(String),
40+
}
41+
42+
#[derive(Debug)]
43+
struct InvalidGraphNameError;
44+
45+
impl TryFrom<&AnyDataValue> for QuadGraphName {
46+
type Error = InvalidGraphNameError;
47+
48+
fn try_from(value: &AnyDataValue) -> Result<Self, Self::Error> {
49+
match value.value_domain() {
50+
ValueDomain::Iri => {
51+
let iri = value.to_iri_unchecked();
52+
53+
if iri == DEFAULT_GRAPH_IRI {
54+
Ok(Self::DefaultGraph)
55+
} else {
56+
Ok(Self::NamedNode(iri))
57+
}
58+
}
59+
ValueDomain::Null => Ok(Self::BlankNode(value.lexical_value())),
60+
_ => Err(InvalidGraphNameError),
61+
}
62+
}
63+
}
64+
3465
/// Struct to store information of one quad (or triple) for export.
3566
/// This is necessary since all RIO RDF term implementations use `&str`
3667
/// pointers internally, that must be owned elsewhere.
3768
#[derive(Debug, Default)]
3869
struct QuadBuffer {
39-
graph_name_is_blank: bool,
40-
graph_name: String,
70+
graph_name: QuadGraphName,
4171
subject_is_blank: bool,
4272
subject: String,
4373
predicate: String,
@@ -88,15 +118,15 @@ impl<'a> QuadBuffer {
88118
}
89119
}
90120

91-
fn graph_name(&'a self) -> GraphName<'a> {
92-
if self.graph_name_is_blank {
93-
GraphName::BlankNode(BlankNode {
94-
id: self.graph_name.as_str(),
95-
})
96-
} else {
97-
GraphName::NamedNode(NamedNode {
98-
iri: self.graph_name.as_str(),
99-
})
121+
fn graph_name(&'a self) -> Option<GraphName<'a>> {
122+
match &self.graph_name {
123+
QuadGraphName::DefaultGraph => None,
124+
QuadGraphName::NamedNode(iri) => {
125+
Some(GraphName::NamedNode(NamedNode { iri: iri.as_str() }))
126+
}
127+
QuadGraphName::BlankNode(id) => {
128+
Some(GraphName::BlankNode(BlankNode { id: id.as_str() }))
129+
}
100130
}
101131
}
102132

@@ -170,20 +200,13 @@ impl<'a> QuadBuffer {
170200
true
171201
}
172202

173-
fn set_graph_name_from_datavalue(&mut self, datavalue: &AnyDataValue) -> bool {
174-
match datavalue.value_domain() {
175-
ValueDomain::Iri => {
176-
self.graph_name = datavalue.to_iri_unchecked();
177-
self.graph_name_is_blank = false;
178-
true
179-
}
180-
ValueDomain::Null => {
181-
self.graph_name = datavalue.lexical_value();
182-
self.graph_name_is_blank = true;
183-
true
184-
}
185-
_ => false,
186-
}
203+
fn set_graph_name_from_datavalue(
204+
&mut self,
205+
datavalue: &AnyDataValue,
206+
) -> Result<(), InvalidGraphNameError> {
207+
self.graph_name = QuadGraphName::try_from(datavalue)?;
208+
209+
Ok(())
187210
}
188211
}
189212

@@ -318,14 +341,17 @@ impl RdfWriter {
318341
if !buffer.set_object_from_datavalue(&record[o_pos]) {
319342
continue;
320343
}
321-
if !buffer.set_graph_name_from_datavalue(&record[g_pos]) {
344+
if buffer
345+
.set_graph_name_from_datavalue(&record[g_pos])
346+
.is_err()
347+
{
322348
continue;
323349
}
324350
if let Err(e) = formatter.format(&Quad {
325351
subject: buffer.subject(),
326352
predicate: buffer.predicate(),
327353
object: buffer.object(),
328-
graph_name: Some(buffer.graph_name()),
354+
graph_name: buffer.graph_name(),
329355
}) {
330356
log::debug!("failed to write quad: {e}");
331357
drop_count += 1;

0 commit comments

Comments
 (0)