Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ edition = "2021"
rdf = "0.1.4"
rio_turtle = "0.7.1"
rio_api = "0.7.1"
oxrdf = "0.2.4"
oxrdf = "0.3.3"
farmhash = "1.1.5"
datafrog = "2.0.1"
roaring = "0.5.2"
Expand Down
190 changes: 190 additions & 0 deletions lib/src/reasoner.rs
Original file line number Diff line number Diff line change
Expand Up @@ -406,6 +406,92 @@ impl Reasoner {
self.errors.push(error);
}

fn inject_rdfs_container_membership_axioms(&mut self) {
let rdf_type = self.index.put(rdf!("type"));
let rdf_property = self.index.put(rdf!("Property"));
let rdfs_container_membership_property =
self.index.put(rdfs!("ContainerMembershipProperty"));
let rdfs_sub_property_of = self.index.put(rdfs!("subPropertyOf"));
let rdfs_member = self.index.put(rdfs!("member"));

let mut seen_predicates = HashSet::new();
let mut axioms = Vec::new();
for (_, (predicate, _)) in &self.input {
if *predicate == 0 || !seen_predicates.insert(*predicate) {
continue;
}
let Some(Term::NamedNode(predicate_node)) = self.index.get(*predicate) else {
continue;
};
if !is_rdf_container_membership_property_iri(predicate_node.as_str()) {
continue;
}
axioms.push((*predicate, (rdf_type, rdf_property)));
axioms.push((*predicate, (rdf_type, rdfs_container_membership_property)));
axioms.push((*predicate, (rdfs_sub_property_of, rdfs_member)));
}
if axioms.is_empty() {
return;
}
axioms.sort_unstable();
self.input.sort_unstable();
get_unique(&self.input, &mut axioms);
self.add_base_triples(axioms);
}

fn detect_rdfs_datatype_inconsistencies(&mut self, triples: &[KeyedTriple]) {
let rdfs_range = self.index.put(rdfs!("range"));
let mut ranges: HashMap<URI, Vec<URI>> = HashMap::new();
for (subject, (predicate, object)) in triples {
if *predicate == rdfs_range {
ranges.entry(*subject).or_default().push(*object);
}
}

let mut violations: Vec<(String, String)> = Vec::new();

for (subject, (predicate, object)) in triples {
if let Some(Term::Literal(literal)) = self.index.get(*object) {
let datatype_iri = literal.datatype().as_str();
if let Some(false) = literal_well_formed_for_datatype(literal, datatype_iri) {
let message = format!(
"ill-formed literal {} used in triple {} {} {}",
literal,
self.to_u(*subject),
self.to_u(*predicate),
self.to_u(*object)
);
violations.push(("rdfs-datatype".to_string(), message));
}
}

let Some(range_targets) = ranges.get(predicate) else {
continue;
};
let Some(object_term) = self.index.get(*object) else {
continue;
};
for range_target in range_targets {
let Some(Term::NamedNode(range_node)) = self.index.get(*range_target) else {
continue;
};
if let Some(false) = term_in_datatype_space(object_term, range_node.as_str()) {
let message = format!(
"range clash: object {} is not in datatype {} for predicate {}",
self.to_u(*object),
range_node,
self.to_u(*predicate)
);
violations.push(("rdfs-datatype-range".to_string(), message));
}
}
}

for (rule, message) in violations {
self.add_error(rule, message);
}
}

/// Returns a read-only view of errors detected during reasoning (e.g., disjointness violations).
pub fn errors(&self) -> &[ReasoningError] {
&self.errors
Expand Down Expand Up @@ -559,6 +645,13 @@ impl Reasoner {
let owlcomplementof_node = self.index.put(owl!("complementOf"));
let owl_pdw = self.index.put(owl!("propertyDisjointWith"));

// RDFS container membership properties (rdf:_n) carry axiomatic consequences:
// rdf:_n rdf:type rdf:Property ;
// rdf:type rdfs:ContainerMembershipProperty ;
// rdfs:subPropertyOf rdfs:member .
// We inject those axioms once before reasoning so they can participate in closure.
self.inject_rdfs_container_membership_axioms();

//TODO: need to keep the variables persistent in the reasoner so they last between changes
//to the input

Expand Down Expand Up @@ -1580,6 +1673,7 @@ impl Reasoner {
})
.cloned()
.collect();
self.detect_rdfs_datatype_inconsistencies(&output);
// Build output with capacity hints
let mut out_triples: Vec<Triple> = Vec::with_capacity(output.len());
for inst in output.iter() {
Expand Down Expand Up @@ -1659,6 +1753,102 @@ impl Reasoner {
}
}

fn is_rdf_container_membership_property_iri(iri: &str) -> bool {
const RDF_CONTAINER_PREFIX: &str = "http://www.w3.org/1999/02/22-rdf-syntax-ns#_";
let Some(suffix) = iri.strip_prefix(RDF_CONTAINER_PREFIX) else {
return false;
};
!suffix.is_empty()
&& suffix.chars().all(|c| c.is_ascii_digit())
&& suffix.parse::<u64>().ok().is_some_and(|n| n > 0)
}

fn integer_lexical_is_valid(value: &str) -> bool {
if value.is_empty() {
return false;
}
let mut chars = value.chars();
let Some(first) = chars.next() else {
return false;
};
let digits_only = if first == '+' || first == '-' {
chars.collect::<String>()
} else {
value.to_string()
};
!digits_only.is_empty() && digits_only.chars().all(|c| c.is_ascii_digit())
}

fn int_lexical_is_valid(value: &str) -> bool {
integer_lexical_is_valid(value) && value.parse::<i32>().is_ok()
}

fn xml_literal_lexical_is_well_formed(value: &str) -> bool {
let trimmed = value.trim();
if trimmed.is_empty() {
return true;
}
if !trimmed.contains('<') {
return true;
}
if !trimmed.starts_with('<') || !trimmed.ends_with('>') {
return false;
}
if trimmed == "<" || trimmed == ">" {
return false;
}
trimmed.ends_with("/>") || trimmed.contains("</")
}

fn literal_well_formed_for_datatype(literal: &oxrdf::Literal, datatype_iri: &str) -> Option<bool> {
const XSD_STRING: &str = "http://www.w3.org/2001/XMLSchema#string";
const XSD_INTEGER: &str = "http://www.w3.org/2001/XMLSchema#integer";
const XSD_INT: &str = "http://www.w3.org/2001/XMLSchema#int";
const RDF_LANG_STRING: &str = "http://www.w3.org/1999/02/22-rdf-syntax-ns#langString";
const RDF_XML_LITERAL: &str = "http://www.w3.org/1999/02/22-rdf-syntax-ns#XMLLiteral";

let lexical = literal.value();
Some(match datatype_iri {
XSD_STRING => true,
XSD_INTEGER => integer_lexical_is_valid(lexical),
XSD_INT => int_lexical_is_valid(lexical),
RDF_LANG_STRING => literal.language().is_some(),
RDF_XML_LITERAL => xml_literal_lexical_is_well_formed(lexical),
_ => return None,
})
}

fn term_in_datatype_space(term: &Term, datatype_iri: &str) -> Option<bool> {
const XSD_STRING: &str = "http://www.w3.org/2001/XMLSchema#string";
const XSD_INTEGER: &str = "http://www.w3.org/2001/XMLSchema#integer";
const XSD_INT: &str = "http://www.w3.org/2001/XMLSchema#int";
const RDF_LANG_STRING: &str = "http://www.w3.org/1999/02/22-rdf-syntax-ns#langString";
const RDF_XML_LITERAL: &str = "http://www.w3.org/1999/02/22-rdf-syntax-ns#XMLLiteral";

let Term::Literal(literal) = term else {
return Some(false);
};
let literal_datatype = literal.datatype().as_str();
Some(match datatype_iri {
XSD_STRING => literal.language().is_none() && literal_datatype == XSD_STRING,
XSD_INTEGER => match literal_datatype {
XSD_INTEGER => literal_well_formed_for_datatype(literal, XSD_INTEGER).unwrap_or(false),
XSD_INT => literal_well_formed_for_datatype(literal, XSD_INT).unwrap_or(false),
_ => false,
},
XSD_INT => {
literal_datatype == XSD_INT
&& literal_well_formed_for_datatype(literal, XSD_INT).unwrap_or(false)
}
RDF_LANG_STRING => literal.language().is_some(),
RDF_XML_LITERAL => {
literal_datatype == RDF_XML_LITERAL
&& literal_well_formed_for_datatype(literal, RDF_XML_LITERAL).unwrap_or(false)
}
_ => return None,
})
}

/**
Removes from rv the triples that are in src using a linear merge.
Both src and rv must be sorted ascending.
Expand Down