diff --git a/.github/CODEOWNER b/.github/CODEOWNER index 1e6b4ac..7959812 100644 --- a/.github/CODEOWNER +++ b/.github/CODEOWNER @@ -6,8 +6,6 @@ # CG chairs own the whole repository -- and in particular of the CODEOWNERS file itself. * @pchampin @Tpt -# Owners of the /dummy work-item -/dummy @pchampin @Tpt # ... - -# Owners of another work item -# ... +# Owners of individual work items +/statement @pchampin # welcoming co-owners +/statement_validation @pchampin # welcoming co-owners diff --git a/Cargo.toml b/Cargo.toml index 86fff64..5e571a4 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,7 +1,8 @@ [workspace] members = [ - "dummy", + "statement", + "statement_validation", ] resolver = "3" @@ -15,7 +16,8 @@ license-file = "./LICENSE.md" keywords = ["rdf", "linked-data", "semantic-web", "w3c"] # no more than 5 [workspace.dependencies] -dummy = { version = "0.1.0", path = "dummy" } +r2c2_statement = { version = "0.1.0", path = "statement" } +r2c2_statement_validation = { version = "0.1.0", path = "statement_validation" } [workspace.lints.clippy] enum_glob_use = "allow" diff --git a/dummy/Cargo.toml b/dummy/Cargo.toml deleted file mode 100644 index e7e3c84..0000000 --- a/dummy/Cargo.toml +++ /dev/null @@ -1,15 +0,0 @@ -[package] -name = "r2c2_dummy" -description = "This is a dummy package that will disappear as soon as the first real work-item is added." -version.workspace = true -authors.workspace = true -edition.workspace = true -repository.workspace = true -readme.workspace = true -license-file.workspace = true -keywords.workspace = true - -[dependencies] - -[lints] -workspace = true diff --git a/dummy/src/lib.rs b/dummy/src/lib.rs deleted file mode 100644 index a4b02ca..0000000 --- a/dummy/src/lib.rs +++ /dev/null @@ -1,18 +0,0 @@ -//! Dummy crate, just used to make the whole repo a valid Cargo workspace. -//! -//! It will disappear as soon as the first real work-item is added. - -pub fn add(left: u64, right: u64) -> u64 { - left + right -} - -#[cfg(test)] -mod tests { - use super::*; - - #[test] - fn it_works() { - let result = add(2, 2); - assert_eq!(result, 4); - } -} diff --git a/statement/Cargo.toml b/statement/Cargo.toml new file mode 100644 index 0000000..074dbc7 --- /dev/null +++ b/statement/Cargo.toml @@ -0,0 +1,20 @@ +[package] +name = "r2c2_statement" +version.workspace = true +authors.workspace = true +edition.workspace = true +repository.workspace = true +readme.workspace = true +license-file.workspace = true +keywords.workspace = true + +[dependencies] +langtag = { version = "0.4.0", optional = true } +oxrdf = { version = "0.2.4", optional = true, features = ["rdf-star"] } +rdf-types = { version = "0.22.5", optional = true } + +[lints] +workspace = true + +[features] +poc_impl = ["dep:langtag", "dep:oxrdf", "dep:rdf-types"] diff --git a/statement/src/_graph_name.rs b/statement/src/_graph_name.rs new file mode 100644 index 0000000..c748d69 --- /dev/null +++ b/statement/src/_graph_name.rs @@ -0,0 +1,96 @@ +use std::borrow::Cow; + +use crate::Iri; + +/// A trait for [RDF terms] allowed as a [graph name] in an [RDF dataset]. +/// +/// [RDF terms]: https://www.w3.org/TR/rdf12-concepts/#dfn-rdf-term +/// [graph name]: https://www.w3.org/TR/rdf12-concepts/#dfn-graph-name +/// [RDF dataset]: https://www.w3.org/TR/rdf12-concepts/#dfn-rdf-dataset +pub trait GraphName { + /// Return a [`GraphNameProxy`] representing this graph name. + /// + /// [RDF term]: https://www.w3.org/TR/rdf12-concepts/#dfn-rdf-term + fn as_graph_name_proxy(&self) -> GraphNameProxy<'_>; + + /// Return the [kind](GraphNameKind) of this graph name. + /// + /// # Implementers + /// A default implementation is provided for this method, based on [`GraphName::as_graph_name_proxy`]. + /// It may be useful to override it, especially for types where the inner values of [`GraphNameProxy`] + /// are allocated as owned [`Cow`](std::borrow::Cow) rather than borrowed. + fn graph_name_kind(&self) -> GraphNameKind { + match self.as_graph_name_proxy() { + GraphNameProxy::Iri(_) => GraphNameKind::Iri, + GraphNameProxy::BlankNode(_) => GraphNameKind::BlankNode, + } + } + + /// Whether this graph_name is [ground](https://https://www.w3.org/TR/rdf12-concepts/#dfn-ground). + fn ground(&self) -> bool { + match self.graph_name_kind() { + GraphNameKind::Iri => true, + GraphNameKind::BlankNode => false, + } + } +} + +/// An enum conveying the inner information of a value implementing [`GraphName`]. +/// The return type of [`GraphName::as_graph_name_proxy`]. +#[derive(Clone, Debug, Eq, Hash, PartialEq)] +pub enum GraphNameProxy<'a> { + /// An [IRI](https://www.w3.org/TR/rdf12-concepts/#section-IRIs) + Iri(Iri<'a>), + /// A [blank node](https://www.w3.org/TR/rdf12-concepts/#dfn-blank-node) + /// + /// The inner value is an internal [blank node identifier](https://www.w3.org/TR/rdf12-concepts/#dfn-blank-node-identifier). + /// This identifier is not part of RDF's abstract syntax, and only *locally* identifies the blank node.A + /// + /// Note that this API does not impose any constraint on blank node identifiers, + /// but concrete syntax usually do, so serializer may alter these identifiers. + BlankNode(Cow<'a, str>), +} + +/// An enum representing the different kinds of [RDF terms] that can be [graph name]. +/// The return type of [`GraphName::graph_name_kind`]. +/// +/// [RDF terms]: https://www.w3.org/TR/rdf12-concepts/#dfn-rdf-term +/// [graph name]: https://www.w3.org/TR/rdf12-concepts/#dfn-graph_name +#[derive(Clone, Copy, Debug, Eq, Hash, PartialEq)] +pub enum GraphNameKind { + /// An [IRI](https://www.w3.org/TR/rdf12-concepts/#section-IRIs) + Iri, + /// A [blank node](https://www.w3.org/TR/rdf12-concepts/#dfn-blank-node) + BlankNode, +} + +/// Any reference to a [`GraphName`] also trivially implements [`GraphName`] +/// (as all methods of [`GraphName`] apply to `&self` anyway). +impl GraphName for &'_ T { + fn as_graph_name_proxy(&self) -> GraphNameProxy<'_> { + (*self).as_graph_name_proxy() + } + + fn graph_name_kind(&self) -> GraphNameKind { + (*self).graph_name_kind() + } + + fn ground(&self) -> bool { + (*self).ground() + } +} + +/// [`GraphNameProxy`] implements the trait [`GraphName`]. +/// This has not particular interest for [`GraphNameProxy`]s obtained from another [`GraphName`]-implementing type, +/// via the [`GraphName::as_graph_name_proxy`] method. +/// +/// It can be useful, on the other hand, to provide a straightforward implementation of [`GraphName`] +/// (e.g. for testing or prototyping). +impl GraphName for GraphNameProxy<'_> { + fn as_graph_name_proxy(&self) -> GraphNameProxy<'_> { + match self { + GraphNameProxy::Iri(iri) => GraphNameProxy::Iri(iri.borrowed()), + GraphNameProxy::BlankNode(cow) => GraphNameProxy::BlankNode(Cow::from(cow.as_ref())), + } + } +} diff --git a/statement/src/_iri.rs b/statement/src/_iri.rs new file mode 100644 index 0000000..9808af0 --- /dev/null +++ b/statement/src/_iri.rs @@ -0,0 +1,109 @@ +use std::borrow::Cow; + +/// Wrapper around a [`Cow`] guaranteeing that the underlying text satisfies [RFC3987]. +/// +/// [RFC3987]: https://datatracker.ietf.org/doc/rfc3987/ +#[derive(Clone, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)] +pub struct Iri<'a>(Cow<'a, str>); + +impl<'a> Iri<'a> { + /// Return a new [`Iri`], assuming the argument is a valid IRI. + pub fn new_unchecked(txt: impl Into>) -> Self { + Iri(txt.into()) + } + + /// Return the inner [`Cow`](Cow). + pub fn unwrap(self) -> Cow<'a, str> { + self.0 + } + + /// Apply a function to the inner txt, assuming the result of the function is still a valid IRI. + pub fn unchecked_map(self, mut f: impl FnMut(Cow<'a, str>) -> Cow<'a, str>) -> Self { + Self(f(self.0)) + } + + /// Borrow this [`Iri`] as another [`Iri`]. + pub fn borrowed(&self) -> Iri<'_> { + Iri::new_unchecked(self.as_ref()) + } +} + +impl std::borrow::Borrow for Iri<'_> { + fn borrow(&self) -> &str { + self.0.as_ref() + } +} + +impl std::convert::AsRef for Iri<'_> { + fn as_ref(&self) -> &str { + self.0.as_ref() + } +} + +impl std::ops::Deref for Iri<'_> { + type Target = str; + + fn deref(&self) -> &Self::Target { + self.0.as_ref() + } +} + +impl std::cmp::PartialEq<&str> for Iri<'_> { + fn eq(&self, other: &&str) -> bool { + self.0.as_ref() == *other + } +} + +impl std::cmp::PartialEq> for &str { + fn eq(&self, other: &Iri) -> bool { + *self == other.0.as_ref() + } +} + +impl std::cmp::PartialOrd<&str> for Iri<'_> { + fn partial_cmp(&self, other: &&str) -> Option { + Some(self.0.as_ref().cmp(other)) + } +} + +impl std::cmp::PartialOrd> for &str { + fn partial_cmp(&self, other: &Iri<'_>) -> Option { + Some(self.cmp(&other.0.as_ref())) + } +} + +impl std::fmt::Display for Iri<'_> { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "<{}>", self.0.as_ref()) + } +} + +#[cfg(test)] +mod test { + use super::*; + + #[test] + fn as_str() { + let ex = "http://example.org/foo/bar"; + let iri1 = Iri::new_unchecked(ex.to_string()); + assert!(iri1.starts_with("http:")); + assert_eq!(iri1, ex); + assert_eq!(ex, iri1); + assert!("http:" < iri1 && iri1 < "i"); + } + + #[test] + fn borrowed() { + let ex = "http://example.org/foo/bar"; + let iri1 = Iri::new_unchecked(ex.to_string()); + let iri2 = iri1.borrowed(); + assert_eq!(iri1, iri2); + } + + #[test] + fn display() { + let ex = "http://example.org/foo/bar"; + let iri1 = Iri::new_unchecked(ex.to_string()); + assert_eq!(iri1.to_string(), format!("<{ex}>")); + } +} diff --git a/statement/src/_literal.rs b/statement/src/_literal.rs new file mode 100644 index 0000000..efe512a --- /dev/null +++ b/statement/src/_literal.rs @@ -0,0 +1,86 @@ +mod _language_tag; +use std::borrow::Cow; + +pub use _language_tag::*; + +use crate::Iri; + +/// The different possible value for literals' [base direction]. +/// +/// [base direction]: https://www.w3.org/TR/rdf12-concepts/#dfn-base-direction +#[derive(Clone, Copy, Debug, Default, Eq, Hash, PartialEq)] +pub enum BaseDir { + #[default] + /// The [base direction] `ltr` (left to right) + /// + /// [base direction]: https://www.w3.org/TR/rdf12-concepts/#dfn-base-direction + Ltr, + /// The [base direction] `rtl` (right to left) + /// + /// [base direction]: https://www.w3.org/TR/rdf12-concepts/#dfn-base-direction + Rtl, +} + +/// A utility type representing an RDF [literal]. +/// +/// [literal]: https://www.w3.org/TR/rdf12-concepts/#dfn-literal +#[derive(Clone, Debug, Eq, Hash, PartialEq)] +pub enum Literal<'a> { + /// A literal with a specified datatype. + Typed(Cow<'a, str>, Iri<'a>), + /// A [language tagged string](https://www.w3.org/TR/rdf12-concepts/#dfn-language-tagged-string), + /// or a [directional language tagged string](https://www.w3.org/TR/rdf12-concepts/#dfn-language-tagged-string), + /// depending on the presence of a [`BaseDir`] in the third component. + LanguageString(Cow<'a, str>, LangTag<'a>, Option), +} + +impl Literal<'_> { + /// Borrow this [`Literal`] as another [`Literal`]. + pub fn borrowed(&self) -> Literal { + match self { + Literal::Typed(lex, iri) => Literal::Typed(Cow::from(lex.as_ref()), iri.borrowed()), + Literal::LanguageString(lex, lang_tag, base_dir) => { + Literal::LanguageString(Cow::from(lex.as_ref()), lang_tag.borrowed(), *base_dir) + } + } + } + + /// [lexical form](https://www.w3.org/TR/rdf12-concepts/#dfn-lexical-form) of this literal + pub fn lexical_form(&self) -> Cow { + let ref_cow = match self { + Literal::Typed(lex, ..) => lex, + Literal::LanguageString(lex, ..) => lex, + }; + Cow::from(ref_cow.as_ref()) + } + + /// [datatype IRI](https://www.w3.org/TR/rdf12-concepts/#dfn-datatype-iri) of this literal + pub fn datatype_iri(&self) -> Iri<'_> { + match self { + Literal::Typed(_, iri) => iri.borrowed(), + Literal::LanguageString(_, _, None) => Iri::new_unchecked(RDF_LANG_STRING), + Literal::LanguageString(_, _, Some(_)) => Iri::new_unchecked(RDF_DIR_LANG_STRING), + } + } + + /// [language tag](https://www.w3.org/TR/rdf12-concepts/#dfn-language-tag) of this literal, if any + pub fn language_tag(&self) -> Option> { + if let Literal::LanguageString(_, tag, _) = self { + Some(tag.borrowed()) + } else { + None + } + } + + /// [base direction](https://www.w3.org/TR/rdf12-concepts/#dfn-base-direction) of this literal, if any + pub fn base_direction(&self) -> Option { + if let Literal::LanguageString(_, _, Some(dir)) = self { + Some(*dir) + } else { + None + } + } +} + +static RDF_LANG_STRING: &str = "http://www.w3.org/1999/02/22-rdf-syntax-ns#langString"; +static RDF_DIR_LANG_STRING: &str = "http://www.w3.org/1999/02/22-rdf-syntax-ns#dirLangString"; diff --git a/statement/src/_literal/_language_tag.rs b/statement/src/_literal/_language_tag.rs new file mode 100644 index 0000000..94189d1 --- /dev/null +++ b/statement/src/_literal/_language_tag.rs @@ -0,0 +1,145 @@ +use std::borrow::Cow; + +/// Wrapper around a [`Cow`] guaranteeing that the underlying text satisfies [BCP47]. +/// +/// NB: This type checks that the structure of the tag complies with the grammar, +/// but does *not* check that each component is a valid code +/// (i.e. ISO 639 for 2-3 characters language tag, or ISO 15924 for the script) +/// +/// [BCP47]: https://datatracker.ietf.org/doc/bcp47/ +#[derive(Clone, Debug, Eq, Ord)] +pub struct LangTag<'a>(Cow<'a, str>); + +impl<'a> LangTag<'a> { + /// Return a new [`LangTag`], assuming the argument is a valid language tag. + pub fn new_unchecked(txt: impl Into>) -> Self { + LangTag(txt.into()) + } + + /// Return the inner [`Cow`](Cow). + pub fn unwrap(self) -> Cow<'a, str> { + self.0 + } + + /// Apply a function to the inner txt, assuming the result of the function is still a valid language tag. + pub fn unchecked_map(self, mut f: impl FnMut(Cow<'a, str>) -> Cow<'a, str>) -> Self { + Self(f(self.0)) + } + + /// Borrow this [`LangTag`] as another [`LangTag`]. + pub fn borrowed(&self) -> LangTag<'_> { + LangTag::new_unchecked(self.0.as_ref()) + } +} + +impl std::borrow::Borrow for LangTag<'_> { + fn borrow(&self) -> &str { + self.0.as_ref() + } +} + +impl std::convert::AsRef for LangTag<'_> { + fn as_ref(&self) -> &str { + self.0.as_ref() + } +} + +impl std::ops::Deref for LangTag<'_> { + type Target = str; + + fn deref(&self) -> &Self::Target { + self.0.as_ref() + } +} + +impl std::hash::Hash for LangTag<'_> { + fn hash(&self, state: &mut H) { + self.0.as_ref().to_ascii_lowercase().hash(state) + } +} + +impl std::cmp::PartialEq for LangTag<'_> { + fn eq(&self, other: &Self) -> bool { + self.0.as_ref().eq_ignore_ascii_case(other.0.as_ref()) + } +} + +impl std::cmp::PartialEq<&str> for LangTag<'_> { + fn eq(&self, other: &&str) -> bool { + self.0.as_ref().eq_ignore_ascii_case(other) + } +} + +impl std::cmp::PartialEq> for &str { + fn eq(&self, other: &LangTag) -> bool { + self.eq_ignore_ascii_case(other.0.as_ref()) + } +} + +impl std::cmp::PartialOrd for LangTag<'_> { + fn partial_cmp(&self, other: &Self) -> Option { + Some( + self.0 + .to_ascii_lowercase() + .cmp(&other.0.to_ascii_lowercase()), + ) + } +} + +impl std::cmp::PartialOrd<&str> for LangTag<'_> { + fn partial_cmp(&self, other: &&'_ str) -> Option { + Some(self.0.to_ascii_lowercase().cmp(&other.to_ascii_lowercase())) + } +} + +impl std::cmp::PartialOrd> for &str { + fn partial_cmp(&self, other: &LangTag<'_>) -> Option { + Some(self.to_ascii_lowercase().cmp(&other.0.to_ascii_lowercase())) + } +} + +impl std::fmt::Display for LangTag<'_> { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + self.0.as_ref().fmt(f) + } +} + +#[cfg(test)] +mod test { + use super::*; + + #[test] + fn as_str() { + let ex = "en-GB"; + let tag1 = LangTag::new_unchecked(ex.to_string()); + assert!(tag1.starts_with("en")); + assert_eq!(tag1, ex); + assert_eq!(ex, tag1); + assert!(("en"..="en-GB").contains(&tag1)); + } + + #[test] + fn borrowed() { + let ex = "en-GB"; + let tag1 = LangTag::new_unchecked(ex.to_string()); + let tag2 = tag1.borrowed(); + assert_eq!(tag1, tag2); + } + + #[test] + fn display() { + let ex = "en-GB"; + let tag1 = LangTag::new_unchecked(ex.to_string()); + assert_eq!(tag1.to_string(), ex); + } + + #[test] + fn case_insensitive() { + let tag1 = LangTag::new_unchecked("en-GB"); + let tag2 = LangTag::new_unchecked("en-gb"); + assert_eq!(tag1, tag2); + assert_eq!(tag1, "en-gb"); + assert!(tag1 <= tag2 && tag2 <= tag1); + assert!("EN" < tag1 && tag1 < "EN-ZZ"); + } +} diff --git a/statement/src/_object.rs b/statement/src/_object.rs new file mode 100644 index 0000000..a49c963 --- /dev/null +++ b/statement/src/_object.rs @@ -0,0 +1,129 @@ +use std::borrow::Cow; + +use crate::{Iri, Literal, Triple}; + +/// A trait for [RDF terms] allowed in the [object] position of an [RDF triple]. +/// +/// [RDF terms]: https://www.w3.org/TR/rdf12-concepts/#dfn-rdf-term +/// [object]: https://www.w3.org/TR/rdf12-concepts/#dfn-object +/// [RDF triple]: https://www.w3.org/TR/rdf12-concepts/#dfn-rdf-triple +pub trait Object { + /// The type representing [triple terms] for this implementation of [`Object`] + /// + /// [triple term]: https://www.w3.org/TR/rdf12-concepts/#dfn-triple-term + type Triple<'x>: Triple + where + Self: 'x; + + /// Return a [`ObjectProxy`] representing this object. + fn as_object_proxy(&self) -> ObjectProxy<'_, Self::Triple<'_>>; + + /// Return the [kind](ObjectKind) of this object. + /// + /// # Implementers + /// A default implementation is provided for this method, based on [`Object::as_object_proxy`]. + /// It may be useful to override it, especially for types where the inner values of [`ObjectProxy`] + /// are allocated as owned [`Cow`](std::borrow::Cow) rather than borrowed. + fn object_kind(&self) -> ObjectKind { + match self.as_object_proxy() { + ObjectProxy::Iri(_) => ObjectKind::Iri, + ObjectProxy::BlankNode(_) => ObjectKind::BlankNode, + ObjectProxy::Literal(_) => ObjectKind::Literal, + ObjectProxy::Triple(_) => ObjectKind::Triple, + } + } + + /// Whether this object is [ground](https://https://www.w3.org/TR/rdf12-concepts/#dfn-ground). + fn ground(&self) -> bool { + match self.object_kind() { + ObjectKind::Iri | ObjectKind::Literal => true, + ObjectKind::BlankNode => false, + ObjectKind::Triple => { + let ObjectProxy::Triple(triple) = self.as_object_proxy() else { + unreachable!() + }; + triple.ground() + } + } + } +} + +/// An enum conveying the inner information of a value implementing [`Object`]. +/// The return type of [`Object::as_object_proxy`]. +#[derive(Clone, Debug, Eq, Hash, PartialEq)] +pub enum ObjectProxy<'a, T: Triple + 'a> { + /// An [IRI](https://www.w3.org/TR/rdf12-concepts/#section-IRIs) + Iri(Iri<'a>), + /// A [blank node](https://www.w3.org/TR/rdf12-concepts/#dfn-blank-node) + /// + /// The inner value is an internal [blank node identifier](https://www.w3.org/TR/rdf12-concepts/#dfn-blank-node-identifier). + /// This identifier is not part of RDF's abstract syntax, and only *locally* identifies the blank node.A + /// + /// Note that this API does not impose any constraint on blank node identifiers, + /// but concrete syntax usually do, so serializer may alter these identifiers. + BlankNode(Cow<'a, str>), + /// A [literal](https://www.w3.org/TR/rdf12-concepts/#dfn-literal) + Literal(Literal<'a>), + /// A [triple term](https://www.w3.org/TR/rdf12-concepts/#dfn-triple-term) + Triple(T), +} + +/// An enum representing the different kinds of [RDF terms] that can be [object]. +/// The return type of [`Object::object_kind`]. +/// +/// [RDF terms]: https://www.w3.org/TR/rdf12-concepts/#dfn-rdf-term +/// [object]: https://www.w3.org/TR/rdf12-concepts/#dfn-object +#[derive(Clone, Copy, Debug, Eq, Hash, PartialEq)] +pub enum ObjectKind { + /// An [IRI](https://www.w3.org/TR/rdf12-concepts/#section-IRIs) + Iri, + /// A [blank node](https://www.w3.org/TR/rdf12-concepts/#dfn-blank-node) + BlankNode, + /// A [literal](https://www.w3.org/TR/rdf12-concepts/#dfn-literal) + Literal, + /// A [triple term](https://www.w3.org/TR/rdf12-concepts/#dfn-triple-term) + Triple, +} + +/// Any reference to a [`Object`] also trivially implements [`Object`] +/// (as all methods of [`Object`] apply to `&self` anyway). +impl Object for &'_ T { + type Triple<'x> + = T::Triple<'x> + where + Self: 'x; + + fn as_object_proxy(&self) -> ObjectProxy<'_, Self::Triple<'_>> { + (*self).as_object_proxy() + } + + fn object_kind(&self) -> ObjectKind { + (*self).object_kind() + } + + fn ground(&self) -> bool { + (*self).ground() + } +} + +/// [`ObjectProxy`] implements the trait [`Object`]. +/// This has not particular interest for [`ObjectProxy`]s obtained from another [`Object`]-implementing type, +/// via the [`Object::as_object_proxy`] method. +/// +/// It can be useful, on the other hand, to provide a straightforward implementation of [`Object`] +/// (e.g. for testing or prototyping). +impl Object for ObjectProxy<'_, T> { + type Triple<'x> + = &'x T + where + Self: 'x; + + fn as_object_proxy(&self) -> ObjectProxy<'_, &T> { + match self { + ObjectProxy::Iri(iri) => ObjectProxy::Iri(iri.borrowed()), + ObjectProxy::BlankNode(cow) => ObjectProxy::BlankNode(Cow::from(cow.as_ref())), + ObjectProxy::Literal(literal) => ObjectProxy::Literal(literal.borrowed()), + ObjectProxy::Triple(triple) => ObjectProxy::Triple(triple), + } + } +} diff --git a/statement/src/_predicate.rs b/statement/src/_predicate.rs new file mode 100644 index 0000000..8371df4 --- /dev/null +++ b/statement/src/_predicate.rs @@ -0,0 +1,31 @@ +use crate::Iri; + +/// A trait for [RDF terms] allowed in the [predicate] position of an [RDF triple]. +/// +/// [RDF terms]: https://www.w3.org/TR/rdf12-concepts/#dfn-rdf-term +/// [predicate]: https://www.w3.org/TR/rdf12-concepts/#dfn-predicate +/// [RDF triple]: https://www.w3.org/TR/rdf12-concepts/#dfn-rdf-triple +pub trait Predicate { + /// Return the [`Iri`] of this predicate. + fn as_iri(&self) -> Iri<'_>; +} + +/// Any reference to a [`Predicate`] also trivially implements [`Predicate`] +/// (as all methods of [`Predicate`] apply to `&self` anyway). +impl Predicate for &'_ T { + fn as_iri(&self) -> Iri<'_> { + (*self).as_iri() + } +} + +/// [`Iri`] implements the trait [`Predicate`]. +/// This has not particular interest for [`Iri`]s obtained from another [`Predicate`]-implementing type, +/// via the [`Predicate::as_iri`] method. +/// +/// It can be useful, on the other hand, to provide a straightforward implementation of [`Predicate`] +/// (e.g. for testing or prototyping). +impl Predicate for Iri<'_> { + fn as_iri(&self) -> Iri<'_> { + self.borrowed() + } +} diff --git a/statement/src/_quad.rs b/statement/src/_quad.rs new file mode 100644 index 0000000..19c184b --- /dev/null +++ b/statement/src/_quad.rs @@ -0,0 +1,111 @@ +use crate::{GraphName, Object, Predicate, Subject}; + +/// A trait for RDF [quads]. +/// +/// [quads]: https://www.w3.org/TR/rdf12-concepts/#dfn-quad +pub trait Quad { + /// The type of [RDF terms] appearing in the [subjects] position, + /// as returned by [`Quad::subject`]. + /// + /// [RDF terms]: https://www.w3.org/TR/rdf12-concepts/#dfn-rdf-term + /// [subjects]: https://www.w3.org/TR/rdf12-concepts/#dfn-subject + type Subject<'x>: Subject + where + Self: 'x; + /// The type of [RDF terms] appearing in the [predicates] position, + /// as returned by [`Quad::predicate`]. + /// + /// [RDF terms]: https://www.w3.org/TR/rdf12-concepts/#dfn-rdf-term + /// [predicates]: https://www.w3.org/TR/rdf12-concepts/#dfn-predicate + type Predicate<'x>: Predicate + where + Self: 'x; + /// The type of [RDF terms] appearing in the [objects] position, + /// as returned by [`Quad::object`]. + /// + /// [RDF terms]: https://www.w3.org/TR/rdf12-concepts/#dfn-rdf-term + /// [objects]: https://www.w3.org/TR/rdf12-concepts/#dfn-object + type Object<'x>: Object + where + Self: 'x; + /// The type of [RDF terms] used as [graph name], + /// as returned by [`Quad::graph_name`]. + /// + /// [RDF terms]: https://www.w3.org/TR/rdf12-concepts/#dfn-rdf-term + /// [graph name]: https://www.w3.org/TR/rdf12-concepts/#dfn-graph-name + type GraphName<'x>: GraphName + where + Self: 'x; + + /// The [subject] of this quad + /// + /// [subject]: https://www.w3.org/TR/rdf12-concepts/#dfn-subject + fn subject(&self) -> Self::Subject<'_>; + /// The [predicate] of this quad + /// + /// [predicate]: https://www.w3.org/TR/rdf12-concepts/#dfn-predicate + fn predicate(&self) -> Self::Predicate<'_>; + /// The [object] of this quad + /// + /// [object]: https://www.w3.org/TR/rdf12-concepts/#dfn-object + fn object(&self) -> Self::Object<'_>; + /// The [graph name] of this quad, if any. + /// [RDF triples] belonging to the [default graph] of an [RDF dataset] + /// have no [graph name]. + /// + /// [graph name]: https://www.w3.org/TR/rdf12-concepts/#dfn-graph-name + /// [RDF triples]: https://www.w3.org/TR/rdf12-concepts/#dfn-rdf-triple + /// [default graph]: https://www.w3.org/TR/rdf12-concepts/#dfn-default-graph + /// [RDF dataset]: https://www.w3.org/TR/rdf12-concepts/#dfn-rdf-dataset + fn graph_name(&self) -> Option>; + + /// Whether this quad is [ground](https://https://www.w3.org/TR/rdf12-concepts/#dfn-ground). + /// + /// NB: RDF Concepts does not actually defined the notion of "ground quad", + /// but this is a natural extension: all terms, including the graph name if present, must be ground. + fn ground(&self) -> bool { + self.subject().ground() + && self.object().ground() + && self.graph_name().map(|n| n.ground()).unwrap_or(true) + } +} + +/// Any reference to a [`Quad`] also trivially implements [`Quad`] +/// (as all methods of [`Quad`] apply to `&self` anyway). +impl Quad for &'_ T { + type Subject<'x> + = T::Subject<'x> + where + Self: 'x; + + type Predicate<'x> + = T::Predicate<'x> + where + Self: 'x; + + type Object<'x> + = T::Object<'x> + where + Self: 'x; + + type GraphName<'x> + = T::GraphName<'x> + where + Self: 'x; + + fn subject(&self) -> Self::Subject<'_> { + (*self).subject() + } + + fn predicate(&self) -> Self::Predicate<'_> { + (*self).predicate() + } + + fn object(&self) -> Self::Object<'_> { + (*self).object() + } + + fn graph_name(&self) -> Option> { + (*self).graph_name() + } +} diff --git a/statement/src/_subject.rs b/statement/src/_subject.rs new file mode 100644 index 0000000..df9716b --- /dev/null +++ b/statement/src/_subject.rs @@ -0,0 +1,94 @@ +use std::borrow::Cow; + +use crate::Iri; + +/// A trait for [RDF terms] allowed in the [subject] position of an [RDF triple]. +/// +/// [RDF terms]: https://www.w3.org/TR/rdf12-concepts/#dfn-rdf-term +/// [subject]: https://www.w3.org/TR/rdf12-concepts/#dfn-subject +/// [RDF triple]: https://www.w3.org/TR/rdf12-concepts/#dfn-rdf-triple +pub trait Subject { + /// Return a [`SubjectProxy`] representing this subject. + fn as_subject_proxy(&self) -> SubjectProxy<'_>; + + /// Return the [kind](SubjectKind) of this subject. + /// + /// # Implementers + /// A default implementation is provided for this method, based on [`Subject::as_subject_proxy`]. + /// It may be useful to override it, especially for types where the inner values of [`SubjectProxy`] + /// are allocated as owned [`Cow`](std::borrow::Cow) rather than borrowed. + fn subject_kind(&self) -> SubjectKind { + match self.as_subject_proxy() { + SubjectProxy::Iri(_) => SubjectKind::Iri, + SubjectProxy::BlankNode(_) => SubjectKind::BlankNode, + } + } + + /// Whether this subject is [ground](https://https://www.w3.org/TR/rdf12-concepts/#dfn-ground). + fn ground(&self) -> bool { + match self.subject_kind() { + SubjectKind::Iri => true, + SubjectKind::BlankNode => false, + } + } +} + +/// An enum conveying the inner information of a value implementing [`Subject`]. +/// The return type of [`Subject::as_subject_proxy`]. +#[derive(Clone, Debug, Eq, Hash, PartialEq)] +pub enum SubjectProxy<'a> { + /// An [IRI](https://www.w3.org/TR/rdf12-concepts/#section-IRIs) + Iri(Iri<'a>), + /// A [blank node](https://www.w3.org/TR/rdf12-concepts/#dfn-blank-node) + /// + /// The inner value is an internal [blank node identifier](https://www.w3.org/TR/rdf12-concepts/#dfn-blank-node-identifier). + /// This identifier is not part of RDF's abstract syntax, and only *locally* identifies the blank node.A + /// + /// Note that this API does not impose any constraint on blank node identifiers, + /// but concrete syntax usually do, so serializer may alter these identifiers. + BlankNode(Cow<'a, str>), +} + +/// An enum representing the different kinds of [RDF terms] that can be [subject]. +/// The return type of [`Subject::subject_kind`]. +/// +/// [RDF terms]: https://www.w3.org/TR/rdf12-concepts/#dfn-rdf-term +/// [subject]: https://www.w3.org/TR/rdf12-concepts/#dfn-subject +#[derive(Clone, Copy, Debug, Eq, Hash, PartialEq)] +pub enum SubjectKind { + /// An [IRI](https://www.w3.org/TR/rdf12-concepts/#section-IRIs) + Iri, + /// A [blank node](https://www.w3.org/TR/rdf12-concepts/#dfn-blank-node) + BlankNode, +} + +/// Any reference to a [`Subject`] also trivially implements [`Subject`] +/// (as all methods of [`Subject`] apply to `&self` anyway). +impl Subject for &'_ T { + fn as_subject_proxy(&self) -> SubjectProxy<'_> { + (*self).as_subject_proxy() + } + + fn subject_kind(&self) -> SubjectKind { + (*self).subject_kind() + } + + fn ground(&self) -> bool { + (*self).ground() + } +} + +/// [`SubjectProxy`] implements the trait [`Subject`]. +/// This has not particular interest for [`SubjectProxy`]s obtained from another [`Subject`]-implementing type, +/// via the [`Subject::as_subject_proxy`] method. +/// +/// It can be useful, on the other hand, to provide a straightforward implementation of [`Subject`] +/// (e.g. for testing or prototyping). +impl Subject for SubjectProxy<'_> { + fn as_subject_proxy(&self) -> SubjectProxy<'_> { + match self { + SubjectProxy::Iri(iri) => SubjectProxy::Iri(iri.borrowed()), + SubjectProxy::BlankNode(cow) => SubjectProxy::BlankNode(Cow::from(cow.as_ref())), + } + } +} diff --git a/statement/src/_triple.rs b/statement/src/_triple.rs new file mode 100644 index 0000000..e135cc3 --- /dev/null +++ b/statement/src/_triple.rs @@ -0,0 +1,143 @@ +use crate::{Object, Predicate, Subject}; + +/// A trait for [RDF triples]. +/// +/// [RDF triples]: https://www.w3.org/TR/rdf12-concepts/#dfn-rdf-triple +pub trait Triple { + /// The type of [RDF terms] appearing in the [subjects] position, + /// as returned by [`Triple::subject`]. + /// + /// [RDF terms]: https://www.w3.org/TR/rdf12-concepts/#dfn-rdf-term + /// [subjects]: https://www.w3.org/TR/rdf12-concepts/#dfn-subject + type Subject<'x>: Subject + where + Self: 'x; + /// The type of [RDF terms] appearing in the [predicates] position, + /// as returned by [`Triple::predicate`]. + /// + /// [RDF terms]: https://www.w3.org/TR/rdf12-concepts/#dfn-rdf-term + /// [predicates]: https://www.w3.org/TR/rdf12-concepts/#dfn-predicate + type Predicate<'x>: Predicate + where + Self: 'x; + /// The type of [RDF terms] appearing in the [objects] position, + /// as returned by [`Triple::object`]. + /// + /// [RDF terms]: https://www.w3.org/TR/rdf12-concepts/#dfn-rdf-term + /// [objects]: https://www.w3.org/TR/rdf12-concepts/#dfn-object + type Object<'x>: Object + where + Self: 'x; + + /// The [subject] of this triple + /// + /// [subject]: https://www.w3.org/TR/rdf12-concepts/#dfn-subject + fn subject(&self) -> Self::Subject<'_>; + /// The [predicate] of this triple + /// + /// [predicate]: https://www.w3.org/TR/rdf12-concepts/#dfn-predicate + fn predicate(&self) -> Self::Predicate<'_>; + /// The [object] of this triple + /// + /// [object]: https://www.w3.org/TR/rdf12-concepts/#dfn-object + fn object(&self) -> Self::Object<'_>; + + /// Whether this triple is [ground](https://https://www.w3.org/TR/rdf12-concepts/#dfn-ground). + fn ground(&self) -> bool { + self.subject().ground() && self.object().ground() + } +} + +/// Any reference to a [`Triple`] also trivially implements [`Triple`] +/// (as all methods of [`Triple`] apply to `&self` anyway). +impl Triple for &'_ T { + type Subject<'x> + = T::Subject<'x> + where + Self: 'x; + + type Predicate<'x> + = T::Predicate<'x> + where + Self: 'x; + + type Object<'x> + = T::Object<'x> + where + Self: 'x; + + fn subject(&self) -> Self::Subject<'_> { + (*self).subject() + } + + fn predicate(&self) -> Self::Predicate<'_> { + (*self).predicate() + } + + fn object(&self) -> Self::Object<'_> { + (*self).object() + } +} + +/// Any boxed [`Triple`] also trivially implements [`Triple`] +/// (as all methods of [`Triple`] apply to `&self` anyway). +impl Triple for Box { + type Subject<'x> + = T::Subject<'x> + where + Self: 'x; + + type Predicate<'x> + = T::Predicate<'x> + where + Self: 'x; + + type Object<'x> + = T::Object<'x> + where + Self: 'x; + + fn subject(&self) -> Self::Subject<'_> { + self.as_ref().subject() + } + + fn predicate(&self) -> Self::Predicate<'_> { + self.as_ref().predicate() + } + + fn object(&self) -> Self::Object<'_> { + self.as_ref().object() + } +} + +/// A utility empty type for indicating that a given implementation does not support triple terms. +pub enum NeverTriple {} + +impl Triple for NeverTriple { + type Subject<'x> + = crate::SubjectProxy<'x> + where + Self: 'x; + + type Predicate<'x> + = crate::Iri<'x> + where + Self: 'x; + + type Object<'x> + = crate::ObjectProxy<'x, NeverTriple> + where + Self: 'x; + + fn subject(&self) -> Self::Subject<'_> { + unreachable!() + } + + fn predicate(&self) -> Self::Predicate<'_> { + unreachable!() + } + + fn object(&self) -> Self::Object<'_> { + unreachable!() + } +} diff --git a/statement/src/impl_oxrdf.rs b/statement/src/impl_oxrdf.rs new file mode 100644 index 0000000..c2cab47 --- /dev/null +++ b/statement/src/impl_oxrdf.rs @@ -0,0 +1,583 @@ +//! Proof-of-concept implementation of this crate's traits for [`oxrdf`]. +//! +//! Only present with the `poc_impl` feature. +//! +//! This module is developed as if [`oxrdf`] implemented RDF 1.2 completely and strictly, +//! which is not entirely true: +//! - [`oxrdf`] does not support base direction in literals, so it is not complete; +//! - [`oxrdf`] with the [`rdf-star`] feature allows triple terms in the subject position, so it is not strict. +//! +//! This is handled by panic'ing when those situations are encountered. +//! +//! A more future proof way of dealing with this would be: +//! - for incomplete implementations, the conversions *from* R2C2 should use +//! [`TryFrom`] rather than [`From`] (see for example [`crate::impl_rdf_types`]) +//! - for generalized implementations, R2C2 would need to be augmented, +//! possibly with a GeneralizedTerm trait that would allow fallible conversions to strict term categories. +use crate::*; +use oxrdf as ox; + +// oxrdf::Triple as Triple + +impl Triple for ox::Triple { + type Subject<'x> + = &'x ox::Subject + where + Self: 'x; + + type Predicate<'x> + = &'x ox::NamedNode + where + Self: 'x; + + type Object<'x> + = &'x ox::Term + where + Self: 'x; + + fn subject(&self) -> Self::Subject<'_> { + &self.subject + } + + fn predicate(&self) -> Self::Predicate<'_> { + &self.predicate + } + + fn object(&self) -> Self::Object<'_> { + &self.object + } +} + +/// This function would typically be implemented as a method of oxrdf::Triple in the crate itself. +pub fn from_r2c2_triple(triple: T) -> ox::Triple { + ox::Triple::new( + triple.subject().as_subject_proxy(), + triple.predicate().as_iri(), + triple.object().as_object_proxy(), + ) +} + +// oxrdf::TripleRef as Triple + +impl Triple for ox::TripleRef<'_> { + type Subject<'x> + = ox::SubjectRef<'x> + where + Self: 'x; + + type Predicate<'x> + = ox::NamedNodeRef<'x> + where + Self: 'x; + + type Object<'x> + = ox::TermRef<'x> + where + Self: 'x; + + fn subject(&self) -> Self::Subject<'_> { + self.subject + } + + fn predicate(&self) -> Self::Predicate<'_> { + self.predicate + } + + fn object(&self) -> Self::Object<'_> { + self.object + } +} + +// oxrdf::Quad as Quad + +impl Quad for ox::Quad { + type Subject<'x> + = &'x ox::Subject + where + Self: 'x; + + type Predicate<'x> + = &'x ox::NamedNode + where + Self: 'x; + + type Object<'x> + = &'x ox::Term + where + Self: 'x; + + type GraphName<'x> + = ox::NamedOrBlankNodeRef<'x> + where + Self: 'x; + + fn subject(&self) -> Self::Subject<'_> { + &self.subject + } + + fn predicate(&self) -> Self::Predicate<'_> { + &self.predicate + } + + fn object(&self) -> Self::Object<'_> { + &self.object + } + + fn graph_name(&self) -> Option> { + match self.graph_name.as_ref() { + ox::GraphNameRef::NamedNode(named_node) => { + Some(ox::NamedOrBlankNodeRef::NamedNode(named_node)) + } + ox::GraphNameRef::BlankNode(blank_node) => { + Some(ox::NamedOrBlankNodeRef::BlankNode(blank_node)) + } + ox::GraphNameRef::DefaultGraph => None, + } + } +} + +/// This function would typically be implemented as a method of oxrdf::Quad in the crate itself. +pub fn from_r2c2_quad(quad: T) -> ox::Quad { + ox::Quad::new( + quad.subject().as_subject_proxy(), + quad.predicate().as_iri(), + quad.object().as_object_proxy(), + match quad.graph_name() { + None => ox::GraphName::DefaultGraph, + Some(gn) => gn.as_graph_name_proxy().into(), + }, + ) +} + +// oxrdf::QuadRef as Quad + +impl Quad for ox::QuadRef<'_> { + type Subject<'x> + = ox::SubjectRef<'x> + where + Self: 'x; + + type Predicate<'x> + = ox::NamedNodeRef<'x> + where + Self: 'x; + + type Object<'x> + = ox::TermRef<'x> + where + Self: 'x; + + type GraphName<'x> + = ox::NamedOrBlankNodeRef<'x> + where + Self: 'x; + + fn subject(&self) -> Self::Subject<'_> { + self.subject + } + + fn predicate(&self) -> Self::Predicate<'_> { + self.predicate + } + + fn object(&self) -> Self::Object<'_> { + self.object + } + + fn graph_name(&self) -> Option> { + match self.graph_name { + ox::GraphNameRef::NamedNode(named_node) => { + Some(ox::NamedOrBlankNodeRef::NamedNode(named_node)) + } + ox::GraphNameRef::BlankNode(blank_node) => { + Some(ox::NamedOrBlankNodeRef::BlankNode(blank_node)) + } + ox::GraphNameRef::DefaultGraph => None, + } + } +} + +// oxrdf::Subject as Subject + +impl Subject for ox::Subject { + fn as_subject_proxy(&self) -> SubjectProxy<'_> { + match self { + ox::Subject::NamedNode(named_node) => SubjectProxy::Iri(named_node.as_iri()), + ox::Subject::BlankNode(blank_node) => { + SubjectProxy::BlankNode(blank_node.as_str().into()) + } + ox::Subject::Triple(_) => { + panic!() + // This only exists because we enabled the `rdf-star` feature, in order to emulate RDF 1.2's triple terms. + // It is assumed that OxRdf will eventually implement (strict) RDF 1.2, and that this panic!() will disappear. + // + // In the future we may have traits for types that *extend* RDF, + // with methods of the form `try_as_subject_proxy`, etc... + } + } + } +} + +impl<'a> From> for ox::Subject { + fn from(value: SubjectProxy<'a>) -> Self { + match value { + SubjectProxy::Iri(iri) => ox::NamedNode::from(iri).into(), + SubjectProxy::BlankNode(bnid) => safe_bnode(bnid).into(), + } + } +} + +// oxrdf::SubjectRef as Subject + +impl Subject for ox::SubjectRef<'_> { + fn as_subject_proxy(&self) -> SubjectProxy<'_> { + match self { + ox::SubjectRef::NamedNode(named_node) => SubjectProxy::Iri(named_node.as_iri()), + ox::SubjectRef::BlankNode(blank_node) => { + SubjectProxy::BlankNode(blank_node.as_str().into()) + } + ox::SubjectRef::Triple(_) => { + panic!() + // This only exists because we enabled the `rdf-star` feature, in order to emulate RDF 1.2's triple terms. + // It is assumed that OxRdf will eventually implement (strict) RDF 1.2, and that this panic!() will disappear. + // + // In the future we may have traits for types that *extend* RDF, + // with methods of the form `try_as_subject_proxy`, etc... + } + } + } +} + +// oxrdf::NamedNode as Predicate + +impl Predicate for ox::NamedNode { + fn as_iri(&self) -> Iri<'_> { + Iri::new_unchecked(self.as_str()) + } +} + +impl<'a> From> for ox::NamedNode { + fn from(value: Iri<'a>) -> Self { + ox::NamedNode::new_unchecked(value.unwrap().into_owned()) + } +} + +// oxrdf::NamedNodeRef as Predicate + +impl Predicate for ox::NamedNodeRef<'_> { + fn as_iri(&self) -> Iri<'_> { + Iri::new_unchecked(self.as_str()) + } +} + +// oxrdf::Term as Object + +impl Object for ox::Term { + type Triple<'x> + = &'x ox::Triple + where + Self: 'x; + + fn as_object_proxy(&'_ self) -> ObjectProxy<'_, &'_ ox::Triple> { + match self { + ox::Term::NamedNode(named_node) => ObjectProxy::Iri(named_node.as_iri()), + ox::Term::BlankNode(blank_node) => ObjectProxy::BlankNode(blank_node.as_str().into()), + ox::Term::Literal(literal) => ObjectProxy::Literal(match literal.as_ref().destruct() { + (lex, None, None) => Literal::Typed(lex.into(), Iri::new_unchecked(XSD_STRING)), + (lex, _, Some(tag)) => { + Literal::LanguageString(lex.into(), LangTag::new_unchecked(tag), None) + } + (lex, Some(dt), _) => Literal::Typed(lex.into(), Iri::new_unchecked(dt.as_str())), + }), + ox::Term::Triple(triple) => ObjectProxy::Triple(triple), + } + } +} + +impl<'a, T: Triple> From> for ox::Term { + fn from(value: ObjectProxy<'a, T>) -> Self { + match value { + ObjectProxy::Iri(iri) => ox::NamedNode::from(iri).into(), + ObjectProxy::BlankNode(bnid) => safe_bnode(bnid).into(), + ObjectProxy::Literal(literal) => match literal { + Literal::Typed(lex, iri) => { + ox::Literal::new_typed_literal(lex.into_owned(), iri).into() + } + Literal::LanguageString(lex, lang_tag, base_dir) => { + if base_dir.is_some() { + panic!() + // Assuming here that oxrdf will eventually support base direction, + // this panic!() will go away. + // + // For a type that is *not* expected to implement all of RDF 1.2, + // they should implement TryFrom instead. + } + ox::Literal::new_language_tagged_literal_unchecked( + lex.into_owned(), + lang_tag.unwrap().into_owned(), + ) + .into() + } + }, + ObjectProxy::Triple(triple) => ox::Term::Triple(Box::new(from_r2c2_triple(triple))), + } + } +} + +// oxrdf::TermRef as Object + +impl Object for ox::TermRef<'_> { + type Triple<'x> + = &'x ox::Triple + where + Self: 'x; + + fn as_object_proxy(&'_ self) -> ObjectProxy<'_, &'_ ox::Triple> { + match self { + ox::TermRef::NamedNode(named_node) => ObjectProxy::Iri(named_node.as_iri()), + ox::TermRef::BlankNode(blank_node) => { + ObjectProxy::BlankNode(blank_node.as_str().into()) + } + ox::TermRef::Literal(literal) => ObjectProxy::Literal(match literal.destruct() { + (lex, None, None) => Literal::Typed(lex.into(), Iri::new_unchecked(XSD_STRING)), + (lex, _, Some(tag)) => { + Literal::LanguageString(lex.into(), LangTag::new_unchecked(tag), None) + } + (lex, Some(dt), _) => Literal::Typed(lex.into(), Iri::new_unchecked(dt.as_str())), + }), + ox::TermRef::Triple(triple) => ObjectProxy::Triple(triple), + } + } +} + +// oxrdf::Subject as GraphName +// +// NB: ox::GraphName can not implement GraphName, +// because it has a variant 'DefaultGraph' which does not correspond to any term kind recognized by r2c2 +// +// Note however that, conversely, GraphNameProxy can be converted to an ox::GraphName + +impl GraphName for ox::Subject { + fn as_graph_name_proxy(&self) -> GraphNameProxy<'_> { + match self { + ox::Subject::NamedNode(named_node) => GraphNameProxy::Iri(named_node.as_iri()), + ox::Subject::BlankNode(blank_node) => { + GraphNameProxy::BlankNode(blank_node.as_str().into()) + } + ox::Subject::Triple(_) => { + panic!() + // This only exists because we enabled the `rdf-star` feature, in order to emulate RDF 1.2's triple terms. + // It is assumed that OxRdf will eventually implement (strict) RDF 1.2, and that this panic!() will disappear. + // + // In the future we may have traits for types that *extend* RDF, + // with methods of the form `try_as_subject_proxy`, etc... + } + } + } +} + +impl<'a> From> for ox::GraphName { + fn from(value: GraphNameProxy<'a>) -> Self { + match value { + GraphNameProxy::Iri(iri) => ox::NamedNode::from(iri).into(), + GraphNameProxy::BlankNode(bnid) => safe_bnode(bnid).into(), + } + } +} + +// oxrdf::NamedOrBlankNodeRef as GraphName +// +// NB: ox::GraphNameRef can not implement GraphName, +// because it has a variant 'DefaultGraph' which does not correspond to any term kind recognized by r2c2 + +impl GraphName for ox::NamedOrBlankNodeRef<'_> { + fn as_graph_name_proxy(&self) -> GraphNameProxy<'_> { + match self { + ox::NamedOrBlankNodeRef::NamedNode(named_node) => { + GraphNameProxy::Iri(named_node.as_iri()) + } + ox::NamedOrBlankNodeRef::BlankNode(blank_node) => { + GraphNameProxy::BlankNode(blank_node.as_str().into()) + } + } + } +} + +// utility functions and constants + +/// This function converts an R2C2 bnode label into an OxRDF Blank Node, +/// ensuring that bnode labels that are not valid SPARQL bnodeIds are correctly handled +fn safe_bnode(bnid: std::borrow::Cow) -> ox::BlankNode { + use std::hash::{DefaultHasher, Hash, Hasher}; + let mut s = DefaultHasher::new(); + bnid.hash(&mut s); + let h = s.finish(); + + ox::BlankNode::new(bnid.into_owned()) + .unwrap_or_else(|_| ox::BlankNode::new_from_unique_id(h as u128)) +} + +static XSD_STRING: &str = "http://www.w3.org/2001/XMLSchema#string"; + +#[cfg(test)] +mod test_round_trip { + use super::*; + + #[test] + fn subject_iri() -> TestResult { + let s1: ox::Subject = ox::NamedNode::new("https://example.org/ns/alice")?.into(); + let s2: ox::Subject = s1.as_subject_proxy().into(); + assert_eq!(s1, s2); + let s2: ox::Subject = s1.as_ref().as_subject_proxy().into(); + assert_eq!(s1, s2); + Ok(()) + } + + #[test] + fn subject_bnode() -> TestResult { + let s1: ox::Subject = ox::BlankNode::default().into(); + let s2: ox::Subject = s1.as_subject_proxy().into(); + assert_eq!(s1, s2); + let s2: ox::Subject = s1.as_ref().as_subject_proxy().into(); + assert_eq!(s1, s2); + Ok(()) + } + + #[test] + fn predicate() -> TestResult { + let p1 = ox::NamedNode::new("https://example.org/ns/alice")?; + let p2: ox::NamedNode = p1.as_iri().into(); + assert_eq!(p1, p2); + let p2: ox::NamedNode = p1.as_ref().as_iri().into(); + assert_eq!(p1, p2); + Ok(()) + } + + #[test] + fn object_iri() -> TestResult { + let o1: ox::Term = ox::NamedNode::new("https://example.org/ns/alice")?.into(); + let o2: ox::Term = o1.as_object_proxy().into(); + assert_eq!(o1, o2); + let o2: ox::Term = o1.as_ref().as_object_proxy().into(); + assert_eq!(o1, o2); + Ok(()) + } + + #[test] + fn object_bnode() -> TestResult { + let o1: ox::Term = ox::BlankNode::default().into(); + let o2: ox::Term = o1.as_object_proxy().into(); + assert_eq!(o1, o2); + let o2: ox::Term = o1.as_ref().as_object_proxy().into(); + assert_eq!(o1, o2); + Ok(()) + } + + #[test] + fn object_simple_literal() -> TestResult { + let o1: ox::Term = ox::Literal::new_simple_literal("⛄").into(); + let o2: ox::Term = o1.as_object_proxy().into(); + assert_eq!(o1, o2); + let o2: ox::Term = o1.as_ref().as_object_proxy().into(); + assert_eq!(o1, o2); + Ok(()) + } + + #[test] + fn object_typed_literal() -> TestResult { + let o1: ox::Term = + ox::Literal::new_typed_literal("42", ox::NamedNode::new(XSD_INTEGER)?).into(); + let o2: ox::Term = o1.as_object_proxy().into(); + assert_eq!(o1, o2); + let o2: ox::Term = o1.as_ref().as_object_proxy().into(); + assert_eq!(o1, o2); + Ok(()) + } + + #[test] + fn object_language_tagged_literal() -> TestResult { + let o1: ox::Term = ox::Literal::new_language_tagged_literal("chat", "en-Latn-UK")?.into(); + let o2: ox::Term = o1.as_object_proxy().into(); + assert_eq!(o1, o2); + let o2: ox::Term = o1.as_ref().as_object_proxy().into(); + assert_eq!(o1, o2); + Ok(()) + } + + #[test] + fn object_triple_term() -> TestResult { + let subject = ox::BlankNode::default().into(); + let predicate = ox::NamedNode::new("https://example.org/ns/p")?; + let object = ox::Literal::new_simple_literal("⛄").into(); + let o1: ox::Term = ox::Triple { + subject, + predicate, + object, + } + .into(); + let o2: ox::Term = o1.as_object_proxy().into(); + assert_eq!(o1, o2); + let o2: ox::Term = o1.as_ref().as_object_proxy().into(); + assert_eq!(o1, o2); + Ok(()) + } + + #[test] + fn triple() -> TestResult { + let subject = ox::BlankNode::default().into(); + let predicate = ox::NamedNode::new("https://example.org/ns/p")?; + let object = ox::Literal::new_simple_literal("⛄").into(); + let t1 = ox::Triple { + subject, + predicate, + object, + }; + let t2 = from_r2c2_triple(&t1); + assert_eq!(t1, t2); + let t2 = from_r2c2_triple(t1.as_ref()); + assert_eq!(t1, t2); + Ok(()) + } + + #[test] + fn quad_default_graph() -> TestResult { + let subject = ox::BlankNode::default().into(); + let predicate = ox::NamedNode::new("https://example.org/ns/p")?; + let object = ox::Literal::new_simple_literal("⛄").into(); + let graph_name = ox::GraphName::DefaultGraph; + let q1 = ox::Quad { + subject, + predicate, + object, + graph_name, + }; + let q2 = from_r2c2_quad(&q1); + assert_eq!(q1, q2); + let q2 = from_r2c2_quad(q1.as_ref()); + assert_eq!(q1, q2); + Ok(()) + } + + #[test] + fn quad_named_graph() -> TestResult { + let subject = ox::BlankNode::default().into(); + let predicate = ox::NamedNode::new("https://example.org/ns/p")?; + let object = ox::Literal::new_simple_literal("⛄").into(); + let graph_name = ox::NamedNode::new("https://example.org/")?.into(); + let q1 = ox::Quad { + subject, + predicate, + object, + graph_name, + }; + let q2 = from_r2c2_quad(&q1); + assert_eq!(q1, q2); + let q2 = from_r2c2_quad(q1.as_ref()); + assert_eq!(q1, q2); + Ok(()) + } + + type TestResult = Result<(), Box>; + + static XSD_INTEGER: &str = "http://www.w3.org/2001/XMLSchema#integer"; +} diff --git a/statement/src/impl_rdf_types.rs b/statement/src/impl_rdf_types.rs new file mode 100644 index 0000000..f51499d --- /dev/null +++ b/statement/src/impl_rdf_types.rs @@ -0,0 +1,628 @@ +//! Proof-of-concept implementation of this crate's traits for [`rdf_types`]. +//! +//! Only present with the `poc_impl` feature. +//! +//! [`rdf_types`] is an implementation of RDF 1.1, +//! which makes it a strict subset of RDF 1.2. +//! Therefore, while [`rdf_types`] types can implement R2C2 traits, +//! conversion from R2C2 is performed using TryFrom. +use crate::*; +use rdf_types as rt; + +// rdf_types::LexicalTriple as Triple + +impl Triple for rt::LexicalTriple { + type Subject<'x> + = rt::LexicalSubjectRef<'x> + where + Self: 'x; + + type Predicate<'x> + = &'x rt::Iri + where + Self: 'x; + + type Object<'x> + = rt::LexicalObjectRef<'x> + where + Self: 'x; + + fn subject(&self) -> Self::Subject<'_> { + self.subject().as_lexical_subject_ref() + } + + fn predicate(&self) -> Self::Predicate<'_> { + self.predicate() + } + + fn object(&self) -> Self::Object<'_> { + self.object().as_lexical_object_ref() + } +} + +/// This function would typically be implemented as a method of rdf_types::Triple in the crate itself. +pub fn try_from_r2c2_triple(triple: T) -> Result { + Ok(rt::Triple( + triple.subject().as_subject_proxy().into(), + triple.predicate().as_iri().into(), + triple.object().as_object_proxy().try_into()?, + )) +} + +// rdf_types::LexicalTripleRef as Triple + +impl Triple for rt::LexicalTripleRef<'_> { + type Subject<'x> + = rt::LexicalSubjectRef<'x> + where + Self: 'x; + + type Predicate<'x> + = &'x rt::Iri + where + Self: 'x; + + type Object<'x> + = rt::LexicalObjectRef<'x> + where + Self: 'x; + + fn subject(&self) -> Self::Subject<'_> { + *self.subject() + } + + fn predicate(&self) -> Self::Predicate<'_> { + *self.predicate() + } + + fn object(&self) -> Self::Object<'_> { + *self.object() + } +} + +// rdf_types::LexicalQuad as Quad + +impl Quad for rt::LexicalQuad { + type Subject<'x> + = rt::LexicalSubjectRef<'x> + where + Self: 'x; + + type Predicate<'x> + = &'x rt::Iri + where + Self: 'x; + + type Object<'x> + = rt::LexicalObjectRef<'x> + where + Self: 'x; + + type GraphName<'x> + = rt::LexicalGraphLabelRef<'x> + where + Self: 'x; + + fn subject(&self) -> Self::Subject<'_> { + self.subject().as_lexical_subject_ref() + } + + fn predicate(&self) -> Self::Predicate<'_> { + self.predicate() + } + + fn object(&self) -> Self::Object<'_> { + self.object().as_lexical_object_ref() + } + + fn graph_name(&self) -> Option> { + self.graph().map(|gn| gn.as_graph_label_ref()) + } +} + +/// This function would typically be implemented as a method of rdf_types::Quad in the crate itself. +pub fn try_from_r2c2_quad(quad: T) -> Result { + Ok(rt::Quad( + quad.subject().as_subject_proxy().into(), + quad.predicate().as_iri().into(), + quad.object().as_object_proxy().try_into()?, + quad.graph_name().map(|gn| gn.as_graph_name_proxy().into()), + )) +} + +// rdf_types::LexicalQuadRef as Quad + +impl Quad for rt::LexicalQuadRef<'_> { + type Subject<'x> + = rt::LexicalSubjectRef<'x> + where + Self: 'x; + + type Predicate<'x> + = &'x rt::Iri + where + Self: 'x; + + type Object<'x> + = rt::LexicalObjectRef<'x> + where + Self: 'x; + + type GraphName<'x> + = rt::LexicalGraphLabelRef<'x> + where + Self: 'x; + + fn subject(&self) -> Self::Subject<'_> { + *self.subject() + } + + fn predicate(&self) -> Self::Predicate<'_> { + *self.predicate() + } + + fn object(&self) -> Self::Object<'_> { + *self.object() + } + + fn graph_name(&self) -> Option> { + self.graph().copied() + } +} + +// rdf_types::Subject as Subject + +impl Subject for rt::Subject { + fn as_subject_proxy(&self) -> SubjectProxy<'_> { + match self { + rt::Id::Blank(bid) => SubjectProxy::BlankNode(bid.as_str()[2..].into()), + rt::Id::Iri(iri) => SubjectProxy::Iri(Iri::new_unchecked(iri.as_str())), + } + } +} + +impl<'a> From> for rt::Subject { + fn from(value: SubjectProxy<'a>) -> Self { + match value { + SubjectProxy::Iri(iri) => rt::Subject::Iri(rt::IriBuf::from(iri)), + SubjectProxy::BlankNode(bnid) => rt::Subject::Blank(safe_bnode(bnid)), + } + } +} + +// rdf::types::LexicalSubjectRef as Subject + +impl Subject for rt::LexicalSubjectRef<'_> { + fn as_subject_proxy(&self) -> SubjectProxy<'_> { + match self { + rt::Id::Blank(bid) => SubjectProxy::BlankNode(bid.as_str()[2..].into()), + rt::Id::Iri(iri) => SubjectProxy::Iri(Iri::new_unchecked(iri.as_str())), + } + } +} + +// rdf_types::IriBuf as Predicate + +impl Predicate for rt::IriBuf { + fn as_iri(&self) -> Iri<'_> { + Iri::new_unchecked(self.as_str()) + } +} + +impl<'a> From> for rt::IriBuf { + fn from(value: Iri<'a>) -> Self { + unsafe { + // SAFETY: we know that value is a valid IRI + rt::IriBuf::new_unchecked(value.unwrap().into_owned()) + } + } +} + +// rdf_types::Iri as Predicate + +impl Predicate for &rt::Iri { + fn as_iri(&self) -> Iri<'_> { + Iri::new_unchecked(self.as_str()) + } +} + +// rdf_types::Object as Object + +impl Object for rt::Object { + type Triple<'x> + = NeverTriple + where + Self: 'x; + + fn as_object_proxy(&self) -> ObjectProxy<'_, Self::Triple<'_>> { + match self { + rt::Term::Id(rt::Id::Blank(bid)) => ObjectProxy::BlankNode(bid.as_str()[2..].into()), + rt::Term::Id(rt::Id::Iri(iri)) => ObjectProxy::Iri(Iri::new_unchecked(iri.as_str())), + rt::Term::Literal(lit) => ObjectProxy::Literal(match &lit.type_ { + rt::LiteralType::Any(iri) => { + Literal::Typed(lit.as_str().into(), Iri::new_unchecked(iri.as_str())) + } + rt::LiteralType::LangString(lang_tag_buf) => Literal::LanguageString( + lit.as_str().into(), + LangTag::new_unchecked(lang_tag_buf.as_str()), + None, + ), + }), + } + } +} + +impl<'a, T: Triple> TryFrom> for rt::Object { + type Error = &'static str; + + fn try_from(value: ObjectProxy<'a, T>) -> Result { + Ok(match value { + ObjectProxy::Iri(iri) => rt::Object::Id(rt::Id::Iri(rt::IriBuf::from(iri))), + ObjectProxy::BlankNode(bnid) => rt::Object::Id(rt::Id::Blank(safe_bnode(bnid))), + ObjectProxy::Literal(literal) => rt::Object::Literal(match literal { + Literal::Typed(lex, iri) => rt::Literal::new( + lex.into_owned(), + rt::LiteralType::Any(unsafe { + // SAFETY: iri is known to be a valid IRI + rt::IriBuf::new_unchecked(iri.unwrap().into_owned()) + }), + ), + Literal::LanguageString(lex, lang_tag, None) => rt::Literal::new( + lex.into_owned(), + rt::LiteralType::LangString(unsafe { + // SAFETY: lang_tag is known to be a valid language tag + langtag::LangTagBuf::new_unchecked(lang_tag.unwrap().into_owned()) + }), + ), + Literal::LanguageString(_, _, Some(_)) => { + Err("directional language strings are not supported by the crate rdf_types")? + } + }), + ObjectProxy::Triple(_) => Err("triple-terms are not supported by the crate rdf_types")?, + }) + } +} + +// rdf_types::LexicalObjectRef as Object + +impl Object for rt::LexicalObjectRef<'_> { + type Triple<'x> + = NeverTriple + where + Self: 'x; + + fn as_object_proxy(&self) -> ObjectProxy<'_, Self::Triple<'_>> { + match self { + rt::Term::Id(rt::Id::Blank(bid)) => ObjectProxy::BlankNode(bid.as_str()[2..].into()), + rt::Term::Id(rt::Id::Iri(iri)) => ObjectProxy::Iri(Iri::new_unchecked(iri.as_str())), + rt::Term::Literal(lit) => ObjectProxy::Literal(match &lit.type_ { + rt::LiteralType::Any(iri) => { + Literal::Typed(lit.as_str().into(), Iri::new_unchecked(iri.as_str())) + } + rt::LiteralType::LangString(lang_tag_buf) => Literal::LanguageString( + lit.as_str().into(), + LangTag::new_unchecked(lang_tag_buf.as_str()), + None, + ), + }), + } + } +} + +// rdf_types::GraphLabel as GraphName + +impl GraphName for rt::GraphLabel { + fn as_graph_name_proxy(&self) -> GraphNameProxy<'_> { + match self { + rt::Id::Blank(bid) => GraphNameProxy::BlankNode(bid.as_str()[2..].into()), + rt::Id::Iri(iri) => GraphNameProxy::Iri(Iri::new_unchecked(iri.as_str())), + } + } +} + +impl<'a> From> for rt::GraphLabel { + fn from(value: GraphNameProxy<'a>) -> Self { + match value { + GraphNameProxy::Iri(iri) => rt::GraphLabel::Iri(rt::IriBuf::from(iri)), + GraphNameProxy::BlankNode(bnid) => rt::GraphLabel::Blank(safe_bnode(bnid)), + } + } +} + +// rdf::types::LexicalGraphLabelRef as GraphName + +impl GraphName for rt::LexicalGraphLabelRef<'_> { + fn as_graph_name_proxy(&self) -> GraphNameProxy<'_> { + match self { + rt::Id::Blank(bid) => GraphNameProxy::BlankNode(bid.as_str()[2..].into()), + rt::Id::Iri(iri) => GraphNameProxy::Iri(Iri::new_unchecked(iri.as_str())), + } + } +} + +// utility functions + +/// This function converts an R2C2 bnode label into an rdf_types Blank Node, +/// ensuring that bnode labels that are not valid SPARQL bnodeIds are correctly handled +fn safe_bnode(bnid: std::borrow::Cow) -> rt::BlankIdBuf { + rt::BlankIdBuf::new(format!("_:{bnid}")).unwrap_or_else(|err| { + use std::hash::{DefaultHasher, Hash, Hasher}; + let mut s = DefaultHasher::new(); + err.0.hash(&mut s); + rt::BlankIdBuf::from_u64(s.finish()) + }) +} + +#[cfg(test)] +mod test_round_trip { + use rdf_types::{FromBlankId, FromIri}; + + use super::*; + + #[test] + fn subject_iri() -> TestResult { + let s1 = rt::Subject::Iri(rt::IriBuf::new("https://example.org/ns/alice".into())?); + let s2: rt::Subject = s1.as_subject_proxy().into(); + assert_eq!(s1, s2); + let s2: rt::Subject = s1.as_lexical_subject_ref().as_subject_proxy().into(); + assert_eq!(s1, s2); + Ok(()) + } + + #[test] + fn subject_bnode() -> TestResult { + let s1 = rt::Subject::Blank(rt::BlankIdBuf::new("_:b1".into()).unwrap()); + let s2: rt::Subject = s1.as_subject_proxy().into(); + assert_eq!(s1, s2); + let s2: rt::Subject = s1.as_lexical_subject_ref().as_subject_proxy().into(); + assert_eq!(s1, s2); + Ok(()) + } + + #[test] + fn predicate() -> TestResult { + let p1 = rt::IriBuf::new("https://example.org/ns/alice".into())?; + let p2: rt::IriBuf = Predicate::as_iri(&p1).into(); + assert_eq!(p1, p2); + let p2: rt::IriBuf = Predicate::as_iri(&p1.as_iri()).into(); + assert_eq!(p1, p2); + Ok(()) + } + + #[test] + fn object_iri() -> TestResult { + let o1 = rt::Object::from_iri(rt::IriBuf::new("https://example.org/ns/alice".into())?); + let o2: rt::Object = o1.as_object_proxy().try_into()?; + assert_eq!(o1, o2); + let o2: rt::Object = o1.as_lexical_object_ref().as_object_proxy().try_into()?; + assert_eq!(o1, o2); + Ok(()) + } + + #[test] + fn object_bnode() -> TestResult { + let o1 = rt::Object::from_blank(rt::BlankIdBuf::new("_:b1".into()).unwrap()); + let o2: rt::Object = o1.as_object_proxy().try_into()?; + assert_eq!(o1, o2); + let o2: rt::Object = o1.as_lexical_object_ref().as_object_proxy().try_into()?; + assert_eq!(o1, o2); + Ok(()) + } + + #[test] + fn object_typed_literal() -> TestResult { + let o1 = rt::Object::Literal(rt::Literal { + value: "⛄".into(), + type_: rt::LiteralType::Any(rt::IriBuf::new(XSD_STRING.into())?), + }); + let o2: rt::Object = o1.as_object_proxy().try_into()?; + assert_eq!(o1, o2); + let o2: rt::Object = o1.as_lexical_object_ref().as_object_proxy().try_into()?; + assert_eq!(o1, o2); + Ok(()) + } + + #[test] + fn object_language_tagged_literal() -> TestResult { + let o1 = rt::Object::Literal(rt::Literal { + value: "⛄".into(), + type_: rt::LiteralType::LangString(langtag::LangTagBuf::new("en-Latn-UK".into())?), + }); + let o2: rt::Object = o1.as_object_proxy().try_into()?; + assert_eq!(o1, o2); + let o2: rt::Object = o1.as_lexical_object_ref().as_object_proxy().try_into()?; + assert_eq!(o1, o2); + Ok(()) + } + + #[test] + fn triple() -> TestResult { + let s0 = rt::Subject::Blank(rt::BlankIdBuf::new("_:b1".into()).unwrap()); + let p0 = rt::IriBuf::new("https://example.org/ns/alice".into())?; + let o0 = rt::Object::Literal(rt::Literal { + value: "⛄".into(), + type_: rt::LiteralType::Any(rt::IriBuf::new(XSD_STRING.into())?), + }); + let t1 = rt::Triple(s0, p0, o0); + let t2 = try_from_r2c2_triple(&t1)?; + assert_eq!(t1, t2); + let t2 = try_from_r2c2_triple(t1.as_lexical_triple_ref())?; + assert_eq!(t1, t2); + Ok(()) + } + + #[test] + fn quad_default_graph() -> TestResult { + let s0 = rt::Subject::Blank(rt::BlankIdBuf::new("_:b1".into()).unwrap()); + let p0 = rt::IriBuf::new("https://example.org/ns/alice".into())?; + let o0 = rt::Object::Literal(rt::Literal { + value: "⛄".into(), + type_: rt::LiteralType::Any(rt::IriBuf::new(XSD_STRING.into())?), + }); + let q1 = rt::Quad(s0, p0, o0, None); + let q2 = try_from_r2c2_quad(&q1)?; + assert_eq!(q1, q2); + let q2 = try_from_r2c2_quad(q1.as_lexical_quad_ref())?; + assert_eq!(q1, q2); + Ok(()) + } + + #[test] + fn quad_named_graph() -> TestResult { + let s0 = rt::Subject::Blank(rt::BlankIdBuf::new("_:b1".into()).unwrap()); + let p0 = rt::IriBuf::new("https://example.org/ns/alice".into())?; + let o0 = rt::Object::Literal(rt::Literal { + value: "⛄".into(), + type_: rt::LiteralType::Any(rt::IriBuf::new(XSD_STRING.into())?), + }); + let g0 = rt::GraphLabel::Iri(rt::IriBuf::new("https://example.org/".into())?); + let q1 = rt::Quad(s0, p0, o0, Some(g0)); + let q2 = try_from_r2c2_quad(&q1)?; + assert_eq!(q1, q2); + let q2 = try_from_r2c2_quad(q1.as_lexical_quad_ref())?; + assert_eq!(q1, q2); + Ok(()) + } + + // testing round trip rdf_types → oxrdf → rdf_types -> oxrdf + + #[test] + fn subject_iri_via_oxrdf() -> TestResult { + let s1 = rt::Subject::Iri(rt::IriBuf::new("https://example.org/ns/alice".into())?); + let s2: oxrdf::Subject = s1.as_subject_proxy().into(); + let s3: rt::Subject = s2.as_subject_proxy().into(); + assert_eq!(s1, s3); + let s4: oxrdf::Subject = s3.as_subject_proxy().into(); + assert_eq!(s2, s4); + Ok(()) + } + + #[test] + fn subject_bnode_via_oxrdf() -> TestResult { + let s1 = rt::Subject::Blank(rt::BlankIdBuf::new("_:b1".into()).unwrap()); + let s2: oxrdf::Subject = s1.as_subject_proxy().into(); + let s3: rt::Subject = s2.as_subject_proxy().into(); + assert_eq!(s1, s3); + let s4: oxrdf::Subject = s3.as_subject_proxy().into(); + assert_eq!(s2, s4); + Ok(()) + } + + #[test] + fn predicate_via_oxrdf() -> TestResult { + let p1 = rt::IriBuf::new("https://example.org/ns/alice".into())?; + let p2: oxrdf::NamedNode = Predicate::as_iri(&p1).into(); + let p3: rt::IriBuf = p2.as_iri().into(); + assert_eq!(p1, p3); + let p4: oxrdf::NamedNode = Predicate::as_iri(&p3).into(); + assert_eq!(p2, p4); + Ok(()) + } + + #[test] + fn object_iri_via_oxrdf() -> TestResult { + let o1: rt::Object = + rt::Object::from_iri(rt::IriBuf::new("https://example.org/ns/alice".into())?); + let o2: oxrdf::Term = o1.as_object_proxy().into(); + let o3: rt::Object = o2.as_object_proxy().try_into()?; + assert_eq!(o1, o3); + let o4: oxrdf::Term = o3.as_object_proxy().into(); + assert_eq!(o2, o4); + Ok(()) + } + + #[test] + fn object_bnode_via_oxrdf() -> TestResult { + let o1: rt::Object = rt::Object::from_blank(rt::BlankIdBuf::new("_:b1".into()).unwrap()); + let o2: oxrdf::Term = o1.as_object_proxy().into(); + let o3: rt::Object = o2.as_object_proxy().try_into()?; + assert_eq!(o1, o3); + let o4: oxrdf::Term = o3.as_object_proxy().into(); + assert_eq!(o2, o4); + Ok(()) + } + + #[test] + fn object_typed_literal_via_oxrdf() -> TestResult { + let o1 = rt::Object::Literal(rt::Literal { + value: "⛄".into(), + type_: rt::LiteralType::Any(rt::IriBuf::new(XSD_STRING.into())?), + }); + let o2: oxrdf::Term = o1.as_object_proxy().into(); + let o3: rt::Object = o2.as_object_proxy().try_into()?; + assert_eq!(o1, o3); + let o4: oxrdf::Term = o3.as_object_proxy().into(); + assert_eq!(o2, o4); + Ok(()) + } + + #[test] + fn object_language_tagged_literal_via_oxrdf() -> TestResult { + let o1 = rt::Object::Literal(rt::Literal { + value: "⛄".into(), + type_: rt::LiteralType::LangString(langtag::LangTagBuf::new("en-Latn-UK".into())?), + }); + let o2: oxrdf::Term = o1.as_object_proxy().into(); + let o3: rt::Object = o2.as_object_proxy().try_into()?; + assert_eq!(o1, o3); + let o4: oxrdf::Term = o3.as_object_proxy().into(); + assert_eq!(o2, o4); + Ok(()) + } + + #[test] + fn triple_via_oxrdf() -> TestResult { + let s0 = rt::Subject::Blank(rt::BlankIdBuf::new("_:b1".into()).unwrap()); + let p0 = rt::IriBuf::new("https://example.org/ns/alice".into())?; + let o0 = rt::Object::Literal(rt::Literal { + value: "⛄".into(), + type_: rt::LiteralType::Any(rt::IriBuf::new(XSD_STRING.into())?), + }); + let t1 = rt::Triple(s0, p0, o0); + let t2 = crate::impl_oxrdf::from_r2c2_triple(&t1); + let t3 = try_from_r2c2_triple(&t2)?; + assert_eq!(t1, t3); + let t4 = crate::impl_oxrdf::from_r2c2_triple(&t3); + assert_eq!(t2, t4); + Ok(()) + } + + #[test] + fn quad_default_graph_via_oxrdf() -> TestResult { + let s0 = rt::Subject::Blank(rt::BlankIdBuf::new("_:b1".into()).unwrap()); + let p0 = rt::IriBuf::new("https://example.org/ns/alice".into())?; + let o0 = rt::Object::Literal(rt::Literal { + value: "⛄".into(), + type_: rt::LiteralType::Any(rt::IriBuf::new(XSD_STRING.into())?), + }); + let q1 = rt::Quad(s0, p0, o0, None); + let q2 = crate::impl_oxrdf::from_r2c2_quad(&q1); + let q3 = try_from_r2c2_quad(&q2)?; + assert_eq!(q1, q3); + let q4 = crate::impl_oxrdf::from_r2c2_quad(&q3); + assert_eq!(q2, q4); + Ok(()) + } + + #[test] + fn quad_named_graph_via_oxrdf() -> TestResult { + let s0 = rt::Subject::Blank(rt::BlankIdBuf::new("_:b1".into()).unwrap()); + let p0 = rt::IriBuf::new("https://example.org/ns/alice".into())?; + let o0 = rt::Object::Literal(rt::Literal { + value: "⛄".into(), + type_: rt::LiteralType::Any(rt::IriBuf::new(XSD_STRING.into())?), + }); + let g0 = rt::GraphLabel::Iri(rt::IriBuf::new("https://example.org/".into())?); + let q1 = rt::Quad(s0, p0, o0, Some(g0)); + let q2 = crate::impl_oxrdf::from_r2c2_quad(&q1); + let q3 = try_from_r2c2_quad(&q2)?; + assert_eq!(q1, q3); + let q4 = crate::impl_oxrdf::from_r2c2_quad(&q3); + assert_eq!(q2, q4); + Ok(()) + } + + type TestResult = Result<(), Box>; + + static XSD_STRING: &str = "http://www.w3.org/2001/XMLSchema#string"; +} diff --git a/statement/src/lib.rs b/statement/src/lib.rs new file mode 100644 index 0000000..8f5a69a --- /dev/null +++ b/statement/src/lib.rs @@ -0,0 +1,39 @@ +//! I define traits and utility types for describing RDF statements +//! ([triples] and [quads]) as well as their constituent [RDF terms]. +//! +//! [triples]: https://www.w3.org/TR/rdf12-concepts/#dfn-rdf-triple +//! [quads]: https://www.w3.org/TR/rdf12-concepts/#dfn-quad +//! [RDF terms]: https://www.w3.org/TR/rdf12-concepts/#dfn-rdf-term +//! [RDF datasets]: https://www.w3.org/TR/rdf12-concepts/#dfn-rdf-dataset +//! +//! # Features +//! * `poc_impl`: include implementation of the traits defined in this crate +//! for existing RDF implementations. +//! +//! As the name implies, this is only a proof of concept implementation. +//! It is expected that such RDF implementations will eventually implements the traits themselves. +#![deny(missing_docs)] + +mod _iri; +pub use _iri::*; +mod _literal; +pub use _literal::*; + +mod _subject; +pub use _subject::*; +mod _predicate; +pub use _predicate::*; +mod _graph_name; +pub use _graph_name::*; +mod _object; +pub use _object::*; + +mod _triple; +pub use _triple::*; +mod _quad; +pub use _quad::*; + +#[cfg(feature = "poc_impl")] +pub mod impl_oxrdf; +#[cfg(feature = "poc_impl")] +pub mod impl_rdf_types; diff --git a/statement_validation/Cargo.toml b/statement_validation/Cargo.toml new file mode 100644 index 0000000..ec55c74 --- /dev/null +++ b/statement_validation/Cargo.toml @@ -0,0 +1,23 @@ +[package] +name = "r2c2_statement_validation" +version.workspace = true +authors.workspace = true +edition.workspace = true +repository.workspace = true +readme.workspace = true +license-file.workspace = true +keywords.workspace = true + +[dependencies] +r2c2_statement.workspace = true +regex = "1.11.1" + +langtag = { version = "0.4.0", optional = true } +oxrdf = { version = "0.2.4", optional = true, features = ["rdf-star"] } +rdf-types = { version = "0.22.5", optional = true } + +[lints] +workspace = true + +[features] +poc_impl = ["dep:langtag", "dep:oxrdf", "dep:rdf-types"] diff --git a/statement_validation/src/_iri.rs b/statement_validation/src/_iri.rs new file mode 100644 index 0000000..167c76c --- /dev/null +++ b/statement_validation/src/_iri.rs @@ -0,0 +1,231 @@ +use std::{borrow::Cow, sync::LazyLock}; + +use r2c2_statement::Iri; +use regex::Regex; + +/// Extension trait for [`Iri`] providing validation methods. +pub trait IriValid<'a> { + /// Return a new [`Iri`] if the argument is a valid IRI, otherwise None. + #[allow(clippy::new_ret_no_self)] + fn new(txt: impl Into>) -> Option>; + + /// In debug mode, panic if this [`Iri`] is not valid. + /// In release mode, does nothing. + /// + /// Can be useful after a [`new_unchecked`](Iri::new_unchecked) + fn debug_assert_is_valid(&self); +} + +impl<'a> IriValid<'a> for Iri<'a> { + fn new(txt: impl Into>) -> Option { + let inner = txt.into(); + IRI_REGEX + .is_match(&inner) + .then_some(Iri::new_unchecked(inner)) + } + + #[inline] + fn debug_assert_is_valid(&self) { + debug_assert!(IRI_REGEX.is_match(self.as_ref())) + } +} + +pub(crate) static IRI_REGEX: LazyLock = LazyLock::new(|| Regex::new(IRI_REGEX_SRC).unwrap()); + +/// Match an absolute IRI reference. +pub static IRI_REGEX_SRC: &str = r"(?x)^ + #scheme + ( # CAPTURE scheme + [A-Za-z] [-A-Za-z0-9+.]* + ) + : + #ihier_part + (?: #iauthority + ipath_abempty + // + ( # CAPTURE iauthority + (?: # iuserinfo + (?: [-A-Za-z0-9._~\u{A0}-\u{D7FF}\u{F900}-\u{FDCF}\u{FDF0}-\u{FFEF}\u{10000}-\u{1FFFD}\u{20000}-\u{2FFFD}\u{30000}-\u{3FFFD}\u{40000}-\u{4FFFD}\u{50000}-\u{5FFFD}\u{60000}-\u{6FFFD}\u{70000}-\u{7FFFD}\u{80000}-\u{8FFFD}\u{90000}-\u{9FFFD}\u{A0000}-\u{AFFFD}\u{B0000}-\u{BFFFD}\u{C0000}-\u{CFFFD}\u{D0000}-\u{DFFFD}\u{E1000}-\u{EFFFD}!$&'()*+,;=:] + | + %[0-9a-fA-F]{2} + )* + @ + )? + # ihost + (?: # ip_literal + \[ + (?: # ipv6address + (?: + (?:[0-9a-fA-F]{1,4}:){6} + (?:[0-9a-fA-F]{1,4}:[0-9a-fA-F]{1,4}|(?:[0-9]|(?:[1-9][0-9])|(?:1[0-9]{2})|(?:2[0-4][0-9])|(?:25[0-5]))(?:\.(?:[0-9]|(?:[1-9][0-9])|(?:1[0-9]{2})|(?:2[0-4][0-9])|(?:25[0-5]))){3}) + | + :: + (?:[0-9a-fA-F]{1,4}:){5} + (?:[0-9a-fA-F]{1,4}:[0-9a-fA-F]{1,4}|(?:[0-9]|(?:[1-9][0-9])|(?:1[0-9]{2})|(?:2[0-4][0-9])|(?:25[0-5]))(?:\.(?:[0-9]|(?:[1-9][0-9])|(?:1[0-9]{2})|(?:2[0-4][0-9])|(?:25[0-5]))){3}) + | + (?:[0-9a-fA-F]{1,4})? + :: + (?:[0-9a-fA-F]{1,4}:){4} + (?:[0-9a-fA-F]{1,4}:[0-9a-fA-F]{1,4}|(?:[0-9]|(?:[1-9][0-9])|(?:1[0-9]{2})|(?:2[0-4][0-9])|(?:25[0-5]))(?:\.(?:[0-9]|(?:[1-9][0-9])|(?:1[0-9]{2})|(?:2[0-4][0-9])|(?:25[0-5]))){3}) + | + (?:(?:[0-9a-fA-F]{1,4}:){0,1}:[0-9a-fA-F]{1,4})? + :: + (?:[0-9a-fA-F]{1,4}:){3} + (?:[0-9a-fA-F]{1,4}:[0-9a-fA-F]{1,4}|(?:[0-9]|(?:[1-9][0-9])|(?:1[0-9]{2})|(?:2[0-4][0-9])|(?:25[0-5]))(?:\.(?:[0-9]|(?:[1-9][0-9])|(?:1[0-9]{2})|(?:2[0-4][0-9])|(?:25[0-5]))){3}) + | + (?:(?:[0-9a-fA-F]{1,4}:){0,2}:[0-9a-fA-F]{1,4})? + :: + (?:[0-9a-fA-F]{1,4}:){2} + (?:[0-9a-fA-F]{1,4}:[0-9a-fA-F]{1,4}|(?:[0-9]|(?:[1-9][0-9])|(?:1[0-9]{2})|(?:2[0-4][0-9])|(?:25[0-5]))(?:\.(?:[0-9]|(?:[1-9][0-9])|(?:1[0-9]{2})|(?:2[0-4][0-9])|(?:25[0-5]))){3}) + | + (?:(?:[0-9a-fA-F]{1,4}:){0,3}:[0-9a-fA-F]{1,4})? + :: + [0-9a-fA-F]{1,4}: + (?:[0-9a-fA-F]{1,4}:[0-9a-fA-F]{1,4}|(?:[0-9]|(?:[1-9][0-9])|(?:1[0-9]{2})|(?:2[0-4][0-9])|(?:25[0-5]))(?:\.(?:[0-9]|(?:[1-9][0-9])|(?:1[0-9]{2})|(?:2[0-4][0-9])|(?:25[0-5]))){3}) + | + (?:(?:[0-9a-fA-F]{1,4}:){0,4}:[0-9a-fA-F]{1,4})? + :: + (?:[0-9a-fA-F]{1,4}:[0-9a-fA-F]{1,4}|(?:[0-9]|(?:[1-9][0-9])|(?:1[0-9]{2})|(?:2[0-4][0-9])|(?:25[0-5]))(?:\.(?:[0-9]|(?:[1-9][0-9])|(?:1[0-9]{2})|(?:2[0-4][0-9])|(?:25[0-5]))){3}) + | + (?:(?:[0-9a-fA-F]{1,4}:){0,5}:[0-9a-fA-F]{1,4})? + :: + [0-9a-fA-F]{1,4} + | + (?:(?:[0-9a-fA-F]{1,4}:){0,6}:[0-9a-fA-F]{1,4})? + :: + ) + | # ipvfuture + v[0-9a-fA-F]+ \. [-A-Za-z0-9._~!$&'()*+,;=:]+ + ) + \] + | # ipv4address + (?:[0-9]|(?:[1-9][0-9])|(?:1[0-9]{2})|(?:2[0-4][0-9])|(?:25[0-5])) (?:\.(?:[0-9]|(?:[1-9][0-9])|(?:1[0-9]{2})|(?:2[0-4][0-9])|(?:25[0-5]))){3} + | # ireg_name + (?: [-A-Za-z0-9._~\u{A0}-\u{D7FF}\u{F900}-\u{FDCF}\u{FDF0}-\u{FFEF}\u{10000}-\u{1FFFD}\u{20000}-\u{2FFFD}\u{30000}-\u{3FFFD}\u{40000}-\u{4FFFD}\u{50000}-\u{5FFFD}\u{60000}-\u{6FFFD}\u{70000}-\u{7FFFD}\u{80000}-\u{8FFFD}\u{90000}-\u{9FFFD}\u{A0000}-\u{AFFFD}\u{B0000}-\u{BFFFD}\u{C0000}-\u{CFFFD}\u{D0000}-\u{DFFFD}\u{E1000}-\u{EFFFD}!$&'()*+,;=] + | %[0-9a-fA-F]{2} + )* + ) + (?: + : + [0-9]* # port + )? + ) + #ipath_abempty + ( # CAPTURE ipath_abempty + (?: + / + (?: [-A-Za-z0-9._~\u{A0}-\u{D7FF}\u{F900}-\u{FDCF}\u{FDF0}-\u{FFEF}\u{10000}-\u{1FFFD}\u{20000}-\u{2FFFD}\u{30000}-\u{3FFFD}\u{40000}-\u{4FFFD}\u{50000}-\u{5FFFD}\u{60000}-\u{6FFFD}\u{70000}-\u{7FFFD}\u{80000}-\u{8FFFD}\u{90000}-\u{9FFFD}\u{A0000}-\u{AFFFD}\u{B0000}-\u{BFFFD}\u{C0000}-\u{CFFFD}\u{D0000}-\u{DFFFD}\u{E1000}-\u{EFFFD}!$&'()*+,;=:@] + | %[0-9a-fA-F]{2} + )* + )* + ) + | #ipath_absolute + ( # CAPTURE ipath_absolute + / + (?: + (?: [-A-Za-z0-9._~\u{A0}-\u{D7FF}\u{F900}-\u{FDCF}\u{FDF0}-\u{FFEF}\u{10000}-\u{1FFFD}\u{20000}-\u{2FFFD}\u{30000}-\u{3FFFD}\u{40000}-\u{4FFFD}\u{50000}-\u{5FFFD}\u{60000}-\u{6FFFD}\u{70000}-\u{7FFFD}\u{80000}-\u{8FFFD}\u{90000}-\u{9FFFD}\u{A0000}-\u{AFFFD}\u{B0000}-\u{BFFFD}\u{C0000}-\u{CFFFD}\u{D0000}-\u{DFFFD}\u{E1000}-\u{EFFFD}!$&'()*+,;=:@] + | %[0-9a-fA-F]{2} + )* + (?: + / + (?: [-A-Za-z0-9._~\u{A0}-\u{D7FF}\u{F900}-\u{FDCF}\u{FDF0}-\u{FFEF}\u{10000}-\u{1FFFD}\u{20000}-\u{2FFFD}\u{30000}-\u{3FFFD}\u{40000}-\u{4FFFD}\u{50000}-\u{5FFFD}\u{60000}-\u{6FFFD}\u{70000}-\u{7FFFD}\u{80000}-\u{8FFFD}\u{90000}-\u{9FFFD}\u{A0000}-\u{AFFFD}\u{B0000}-\u{BFFFD}\u{C0000}-\u{CFFFD}\u{D0000}-\u{DFFFD}\u{E1000}-\u{EFFFD}!$&'()*+,;=:@] + | %[0-9a-fA-F]{2} + )* + )* + )? + ) + | #ipath_rootless + ( # CAPTURE ipath_rootless + (?: [-A-Za-z0-9._~\u{A0}-\u{D7FF}\u{F900}-\u{FDCF}\u{FDF0}-\u{FFEF}\u{10000}-\u{1FFFD}\u{20000}-\u{2FFFD}\u{30000}-\u{3FFFD}\u{40000}-\u{4FFFD}\u{50000}-\u{5FFFD}\u{60000}-\u{6FFFD}\u{70000}-\u{7FFFD}\u{80000}-\u{8FFFD}\u{90000}-\u{9FFFD}\u{A0000}-\u{AFFFD}\u{B0000}-\u{BFFFD}\u{C0000}-\u{CFFFD}\u{D0000}-\u{DFFFD}\u{E1000}-\u{EFFFD}!$&'()*+,;=:@] + | %[0-9a-fA-F]{2} + )+ + (?: + / + (?: [-A-Za-z0-9._~\u{A0}-\u{D7FF}\u{F900}-\u{FDCF}\u{FDF0}-\u{FFEF}\u{10000}-\u{1FFFD}\u{20000}-\u{2FFFD}\u{30000}-\u{3FFFD}\u{40000}-\u{4FFFD}\u{50000}-\u{5FFFD}\u{60000}-\u{6FFFD}\u{70000}-\u{7FFFD}\u{80000}-\u{8FFFD}\u{90000}-\u{9FFFD}\u{A0000}-\u{AFFFD}\u{B0000}-\u{BFFFD}\u{C0000}-\u{CFFFD}\u{D0000}-\u{DFFFD}\u{E1000}-\u{EFFFD}!$&'()*+,;=:@] + | %[0-9a-fA-F]{2} + )* + )* + ) + )? # optional because of ipath_empty + (?: # ?iquery + \? + ( # CAPTURE iquery + (?: + [-A-Za-z0-9._~\u{A0}-\u{D7FF}\u{F900}-\u{FDCF}\u{FDF0}-\u{FFEF}\u{10000}-\u{1FFFD}\u{20000}-\u{2FFFD}\u{30000}-\u{3FFFD}\u{40000}-\u{4FFFD}\u{50000}-\u{5FFFD}\u{60000}-\u{6FFFD}\u{70000}-\u{7FFFD}\u{80000}-\u{8FFFD}\u{90000}-\u{9FFFD}\u{A0000}-\u{AFFFD}\u{B0000}-\u{BFFFD}\u{C0000}-\u{CFFFD}\u{D0000}-\u{DFFFD}\u{E1000}-\u{EFFFD}!$&'()*+,;=:@'\u{E000}-\u{F8FF}\u{F0000}-\u{FFFFD}\u{100000}-\u{10FFFD}/?] + | %[0-9a-fA-F]{2} + )* + ) + )? + (?: # #ifragment + \# + ( # CAPTURE ifragment + (?: + [-A-Za-z0-9._~\u{A0}-\u{D7FF}\u{F900}-\u{FDCF}\u{FDF0}-\u{FFEF}\u{10000}-\u{1FFFD}\u{20000}-\u{2FFFD}\u{30000}-\u{3FFFD}\u{40000}-\u{4FFFD}\u{50000}-\u{5FFFD}\u{60000}-\u{6FFFD}\u{70000}-\u{7FFFD}\u{80000}-\u{8FFFD}\u{90000}-\u{9FFFD}\u{A0000}-\u{AFFFD}\u{B0000}-\u{BFFFD}\u{C0000}-\u{CFFFD}\u{D0000}-\u{DFFFD}\u{E1000}-\u{EFFFD}!$&'()*+,;=:@/?] + | %[0-9a-fA-F]{2} + )* + ) + )? +$"; + +#[cfg(test)] +mod test { + use super::*; + + #[test] + fn regex() { + for txt in POSITIVE_IRIS { + assert!(IRI_REGEX.is_match(txt)); + } + for txt in NEGATIVE_IRIS { + assert!(!IRI_REGEX.is_match(txt)); + } + } + + /// An array of valid IRIs + pub const POSITIVE_IRIS: &[&str] = &[ + "http:", + "http://example.org", + "http://127.0.0.1", + "http://[::]", + "http://%0D", + "http://example.org/", + "http://éxample.org/", + "http://user:pw@example.org:1234/", + "http://example.org/foo/bar/baz", + "http://example.org/foo/bar/", + "http://example.org/foo/bar/bàz", + "http://example.org/foo/.././/bar", + "http://example.org/!$&'()*+,=:@/foo%0D", + "http://example.org/?abc", + "http://example.org/?!$&'()*+,=:@/?\u{E000}", + "http://example.org/#def", + "http://example.org/?abc#def", + "tag:abc/def", + "tag:", + "http://example.org/#Andr%C3%A9", + "http://example.org/?Andr%C3%A9", + ]; + + /// An array of invalid IRIs. + pub const NEGATIVE_IRIS: &[&str] = &[ + // valid IRI references that are not IRIs (relative) + "foo", + "..", + "//example.org", + "?", + "#", + "?#", + "?Andr%C3%A9#Andr%C3%A9", + // invalid IRI references + "http://[/", + "http://a/[", + "http://a/]", + "http://a/|", + "http://a/ ", + "http://a/\u{E000}", + "[", + "]", + "|", + " ", + "\u{E000}", + ]; +} diff --git a/statement_validation/src/_language_tag.rs b/statement_validation/src/_language_tag.rs new file mode 100644 index 0000000..7260777 --- /dev/null +++ b/statement_validation/src/_language_tag.rs @@ -0,0 +1,312 @@ +use std::{borrow::Cow, sync::LazyLock}; + +use r2c2_statement::LangTag; +use regex::Regex; + +/// Extension trait for [`LangTag`] providing validation methods. +pub trait LangTagValid<'a> { + /// Return a new [`LangTag`] if the argument is a valid IRI, otherwise None. + #[allow(clippy::new_ret_no_self)] + fn new(txt: impl Into>) -> Option>; + + /// In debug mode, panic if this [`LangTag`] is not valid. + /// In release mode, does nothing. + /// + /// Can be useful after a [`new_unchecked`](LangTag::new_unchecked) + fn debug_assert_is_valid(&self); +} + +impl<'a> LangTagValid<'a> for LangTag<'a> { + fn new(txt: impl Into>) -> Option { + let inner = txt.into(); + TAG_REGEX + .is_match(&inner) + .then_some(LangTag::new_unchecked(inner)) + } + + #[inline] + fn debug_assert_is_valid(&self) { + debug_assert!(TAG_REGEX.is_match(self.as_ref())) + } +} + +pub(crate) static TAG_REGEX: LazyLock = LazyLock::new(|| Regex::new(TAG_REGEX_SRC).unwrap()); + +/// Match a valid BCP47 language tag +pub static TAG_REGEX_SRC: &str = r"(?xi-u)^ +( + (?: + (?: #language + (?: + [A-Z]{2,3} + (?: #extlang + (?: + -[A-Z]{3} + ){0,3} + ) + ) + | + [A-Z]{4,8} + ) + (?: #script + -[A-Z]{4} + )? + (?: #region + - + (?: + [A-Z]{2} + | + [0-9]{3} + ) + )? + (?: #variant + - + (?: + [A-Z0-9]{5,8} + | + [0-9][A-Z0-9]{3} + ) + )* + (?: #extension + -[0-9A-WY-Z] + (?: + -[A-Z0-9]{2,8} + )+ + )* + (?: #privateUse + -X + (?: + -[A-Z0-9]{1,8} + )+ + )? + ) +| + (?: #privateUse + X + (?: + -[A-Z0-9]{1,8} + )+ + ) +| + (?: #grandfathered + en-GB-oed|i-ami|i-bnn|i-default|i-enochian|i-hak|i-klingon|i-lux|i-mingo|i-navajo|i-pwn|i-tao|i-tay|i-tsu|sgn-BE-FR|sgn-BE-NL|sgn-CH-DE + # NB regular grandfathered tags are not included, + # as they will be matched by the normal case + ) +)$"; + +#[cfg(test)] +mod test { + use std::iter::once; + + use super::*; + + #[test] + fn regex_valid() { + for mut tag in valid_tags() { + assert!(TAG_REGEX.is_match(&tag), "{tag}"); + tag.make_ascii_uppercase(); + assert!(TAG_REGEX.is_match(&tag), "{tag}"); + } + for mut txt in private_uses(3) { + let tag = &txt[1..]; + assert!(TAG_REGEX.is_match(tag), "{tag}"); + txt.make_ascii_uppercase(); + let tag = &txt[1..]; + assert!(TAG_REGEX.is_match(tag), "{tag}"); + } + for tag in GRANDFATHERED_TAGS { + assert!(TAG_REGEX.is_match(tag), "{tag}"); + assert!(TAG_REGEX.is_match(&tag.to_ascii_uppercase()), "{tag}"); + assert!(TAG_REGEX.is_match(&tag.to_ascii_lowercase()), "{tag}"); + } + } + + #[test] + fn regex_invalid() { + for tag in valid_tags() { + for invalid_suffix in ["a@", "abcdefghi"] { + let txt = format!("{tag}-{invalid_suffix}"); + assert!(!TAG_REGEX.is_match(&txt), "{txt}"); + } + } + for txt in INVALID_TAGS { + assert!(!TAG_REGEX.is_match(txt), "{txt}"); + } + } + + // below are utility functions used to generate valid (and invalid) tags for testing + + fn valid_tags() -> impl Iterator { + let (tx, rx) = std::sync::mpsc::channel(); + std::thread::spawn(move || { + for language in languages() { + for script in once("").chain(scripts()) { + for region in once("").chain(regions()) { + for variant in once("".to_string()).chain(variants(1)) { + for extension in once("".to_string()).chain(extensions(1)) { + for private_use in once("".to_string()).chain(private_uses(1)) { + let tag = format!( + "{language}{script}{region}{variant}{extension}{private_use}" + ); + tx.send(tag).unwrap(); + } + } + } + } + } + } + for variant in variants(2) { + let tag = format!("en{variant}"); + tx.send(tag).unwrap(); + } + for extension in extensions(2) { + let tag = format!("en{extension}"); + tx.send(tag).unwrap(); + } + for private_use in private_uses(2) { + let tag = format!("en{private_use}"); + tx.send(tag).unwrap(); + } + }); + rx.into_iter() + } + + fn languages() -> impl Iterator { + ["en", "eng"] + .into_iter() + .flat_map(|language| langexts().map(move |exts| format!("{language}{exts}"))) + .chain(["dial", "diale", "dialec", "dialect", "dialects"].map(Into::into)) + } + + fn langexts() -> impl Iterator { + ["", "-ext", "-ext-ext", "-ext-ext-ext"].into_iter() + } + + fn scripts() -> impl Iterator { + ["-latn"].into_iter() + } + + fn regions() -> impl Iterator { + ["-uk", "-826"].into_iter() + } + fn variants(max: u8) -> impl Iterator { + debug_assert!(max >= 1); + (1..=max).flat_map(variant_parts) + } + + fn variant_parts(n: u8) -> Box> { + match n { + 0 => Box::new(once("".to_string())), + n => Box::new(variant_parts(n - 1).flat_map(|prefix| { + ["varia", "variaa", "variant", "variants", "0var"] + .map(move |suffix| format!("{prefix}-{suffix}")) + })), + } + } + + fn extensions(max: u8) -> impl Iterator { + debug_assert!(max >= 1); + (1..=max).flat_map(move |i| extension_parts(i, max)) + } + + fn extension_parts(n: u8, max: u8) -> Box> { + match n { + 0 => Box::new(once("".to_string())), + n => Box::new(extension_parts(n - 1, max).flat_map(move |prefix| { + (1..=max) + .flat_map(extension_part_parts) + .map(move |suffix| format!("{prefix}-{suffix}")) + })), + } + } + + fn extension_part_parts(n: u8) -> Box> { + match n { + 0 => Box::new(["a", "1"].into_iter().map(ToString::to_string)), + n => Box::new(extension_part_parts(n - 1).flat_map(|prefix| { + [ + "ab", "abc", "abcd", "abcde", "abcdefg", "abcdefgh", "12", "123", "1234", + "12345", "1234567", "12345678", "1b", "1b3", "1b3d", "1b3d5", "1b3d5f7", + "1b3d5f7h", + ] + .map(|suffix| format!("{prefix}-{suffix}")) + })), + } + } + + fn private_uses(max: u8) -> impl Iterator { + debug_assert!(max >= 1); + (1..=max).flat_map(private_use_parts) + } + + fn private_use_parts(n: u8) -> Box> { + match n { + 0 => Box::new(once("-x".to_string())), + n => Box::new(private_use_parts(n - 1).flat_map(|prefix| { + [ + "a", "ab", "abc", "abcd", "abcde", "abcdefg", "abcdefgh", "1", "12", "123", + "1234", "12345", "1234567", "12345678", "1b", "1b3", "1b3d", "1b3d5", + "1b3d5f7", "1b3d5f7h", + ] + .map(|suffix| format!("{prefix}-{suffix}")) + })), + } + } + + /// An array of valid TAGs + pub const GRANDFATHERED_TAGS: &[&str] = &[ + // irregular grandfathered + "en-GB-oed", + "i-ami", + "i-bnn", + "i-default", + "i-enochian", + "i-hak", + "i-klingon", + "i-lux", + "i-mingo", + "i-navajo", + "i-pwn", + "i-tao", + "i-tay", + "i-tsu", + "sgn-BE-FR", + "sgn-BE-NL", + "sgn-CH-DE", + // regular grandfathered + "art-lojban", + "cel-gaulish", + "no-bok", + "no-nyn", + "zh-guoyu", + "zh-hakka", + "zh-min", + "zh-min-nan", + "zh-xiang", + ]; + + /// An array of valid TAGs + pub const INVALID_TAGS: &[&str] = &[ + "12", // invalid characters + "a@", // invalid characters + "a", // too short + "abcdefghi", // too long + // wrong ordering + "ab-ab-abc", + "ab-ab-abcd", + "ab-123-abc", + "ab-123-abcd", + "ab-abcd-abc", + "ab-1bcd-ab", + "ab-1bcd-abc", + "ab-1bcd-123", + "ab-1bcd-abcd", + "ab-abcde-ab", + "ab-abcde-abc", + "ab-abcde-123", + "ab-abcde-abcd", + "ab-a-b", + "abcd-abc", + ]; +} diff --git a/statement_validation/src/lib.rs b/statement_validation/src/lib.rs new file mode 100644 index 0000000..6c74857 --- /dev/null +++ b/statement_validation/src/lib.rs @@ -0,0 +1,8 @@ +//! I extends the utility types of [`r2c2_statement`] +//! with validating constructors, as a convenience for implementers. +#![deny(missing_docs)] + +mod _iri; +pub use _iri::*; +mod _language_tag; +pub use _language_tag::*;