diff --git a/Gemfile b/Gemfile index 11561bdb7..8f553e04f 100644 --- a/Gemfile +++ b/Gemfile @@ -36,8 +36,8 @@ group :development do gem 'rubocop', require: false end # NCBO gems (can be from a local dev path or from rubygems/git) -gem 'goo', github: 'ncbo/goo', branch: 'master' -gem 'sparql-client', github: 'ncbo/sparql-client', branch: 'master' +gem 'goo', github: 'ncbo/goo', branch: 'develop' +gem 'sparql-client', github: 'ncbo/sparql-client', branch: 'develop' gem 'public_suffix', '~> 5.1.1' gem 'net-imap', '~> 0.4.18' diff --git a/Gemfile.lock b/Gemfile.lock index fd4e234dd..64956b6f4 100644 --- a/Gemfile.lock +++ b/Gemfile.lock @@ -1,7 +1,7 @@ GIT remote: https://github.com/ncbo/goo.git - revision: b9019ad9e1eb78c74105fc6c6a879085066da17d - branch: master + revision: c3f9a7f789bf2f52ed31f0272d2725117d3ac04e + branch: develop specs: goo (0.0.2) addressable (~> 2.8) @@ -16,8 +16,8 @@ GIT GIT remote: https://github.com/ncbo/sparql-client.git - revision: e89c26aa96f184dbe9b52d51e04fb3d9ba998dbc - branch: master + revision: 1657f0dd69fd4b522d3549a6848670175f5e98cc + branch: develop specs: sparql-client (1.0.1) json_pure (>= 1.4) diff --git a/bin/owlapi-wrapper-1.4.2.jar b/bin/owlapi-wrapper-1.5.0.jar old mode 100755 new mode 100644 similarity index 92% rename from bin/owlapi-wrapper-1.4.2.jar rename to bin/owlapi-wrapper-1.5.0.jar index 7dd3cc489..aae64a374 Binary files a/bin/owlapi-wrapper-1.4.2.jar and b/bin/owlapi-wrapper-1.5.0.jar differ diff --git a/config/schemes/ontology_submission.yml b/config/schemes/ontology_submission.yml index 750202048..6bc606291 100644 --- a/config/schemes/ontology_submission.yml +++ b/config/schemes/ontology_submission.yml @@ -56,7 +56,7 @@ version: "PAV: The version number of a resource.", "DOAP: A project release", "SCHEMA: The version of the CreativeWork embodied by a specified resource."] - extractedMetadata: true + extractedMetadata: false metadataMappings: [ "omv:version", "mod:version", "owl:versionInfo", "pav:version", "doap:release", "schema:version", "oboInOwl:data-version", "oboInOwl:version" ] #Status diff --git a/lib/ontologies_linked_data/concerns/ontology_submissions/submission_metadata_extractor.rb b/lib/ontologies_linked_data/concerns/ontology_submissions/submission_metadata_extractor.rb index 229b46301..ad446a1d9 100644 --- a/lib/ontologies_linked_data/concerns/ontology_submissions/submission_metadata_extractor.rb +++ b/lib/ontologies_linked_data/concerns/ontology_submissions/submission_metadata_extractor.rb @@ -3,21 +3,43 @@ module Concerns module OntologySubmission module MetadataExtractor - def extract_metadata + def extract_metadata(logger = nil, heavy_extraction = true, user_params = nil) + logger ||= Logger.new(STDOUT) + logger.info('Extracting metadata from the ontology submission.') + + @submission = self version_info = extract_version ontology_iri = extract_ontology_iri + @submission.version = version_info if version_info + @submission.uri = ontology_iri if ontology_iri + @submission.save - self.version = version_info if version_info - self.uri = RDF::URI.new(ontology_iri) if ontology_iri + if heavy_extraction + begin + # Extract metadata directly from the ontology + extract_ontology_metadata(logger, user_params, skip_attrs: [:version, :uri]) + logger.info('Additional metadata extracted.') + rescue StandardError => e + e.backtrace + logger.error("Error while extracting additional metadata: #{e}") + end + end + if @submission.valid? + @submission.save + else + logger.error("Error while extracting additional metadata: #{@submission.errors}") + @submission = LinkedData::Models::OntologySubmission.find(@submission.id).first.bring_remaining + end end def extract_version + query = Goo.sparql_query_client.select(:versionInfo).distinct - .from(self.id) - .where([RDF::URI.new('http://bioportal.bioontology.org/ontologies/versionSubject'), - RDF::URI.new('http://www.w3.org/2002/07/owl#versionInfo'), - :versionInfo]) + .from(@submission.id) + .where([RDF::URI.new('http://bioportal.bioontology.org/ontologies/versionSubject'), + RDF::URI.new('http://www.w3.org/2002/07/owl#versionInfo'), + :versionInfo]) sol = query.each_solution.first || {} sol[:versionInfo]&.to_s @@ -25,12 +47,239 @@ def extract_version def extract_ontology_iri query = Goo.sparql_query_client.select(:uri).distinct - .from(self.id) + .from(@submission.id) .where([:uri, RDF::URI.new('http://www.w3.org/1999/02/22-rdf-syntax-ns#type'), RDF::URI.new('http://www.w3.org/2002/07/owl#Ontology')]) sol = query.each_solution.first || {} - sol[:uri]&.to_s + RDF::URI.new(sol[:uri]) if sol[:uri] + end + + # Extract additional metadata about the ontology + # First it extracts the main metadata, then the mapped metadata + def extract_ontology_metadata(logger, user_params, skip_attrs: []) + user_params = {} if user_params.nil? || !user_params + ontology_uri = @submission.uri + logger.info("Extraction metadata from ontology #{ontology_uri}") + + # go through all OntologySubmission attributes. Returns symbols + LinkedData::Models::OntologySubmission.attributes(:all).each do |attr| + next if skip_attrs.include? attr + # for attribute with the :extractedMetadata setting on, and that have not been defined by the user + attr_settings = LinkedData::Models::OntologySubmission.attribute_settings(attr) + + attr_not_excluded = user_params && !(user_params.key?(attr) && !user_params[attr].nil? && !user_params[attr].empty?) + + next unless attr_settings[:extractedMetadata] && attr_not_excluded + + # a boolean to check if a value that should be single have already been extracted + single_extracted = false + type = enforce?(attr, :list) ? :list : :string + old_value = value(attr, type) + + unless attr_settings[:namespace].nil? + property_to_extract = "#{attr_settings[:namespace].to_s}:#{attr.to_s}" + hash_results = extract_each_metadata(ontology_uri, attr, property_to_extract, logger) + single_extracted = send_value(attr, hash_results, logger) unless hash_results.empty? + end + + # extracts attribute value from metadata mappings + attr_settings[:metadataMappings] ||= [] + + attr_settings[:metadataMappings].each do |mapping| + break if single_extracted + + hash_mapping_results = extract_each_metadata(ontology_uri, attr, mapping.to_s, logger) + single_extracted = send_value(attr, hash_mapping_results, logger) unless hash_mapping_results.empty? + end + + new_value = value(attr, type) + + send_value(attr, old_value, logger) if empty_value?(new_value) && !empty_value?(old_value) + end + end + + def empty_value?(value) + value.nil? || (value.is_a?(Array) && value.empty?) || value.to_s.strip.empty? + end + + def value(attr, type) + val = @submission.send(attr.to_s) + type.eql?(:list) ? Array(val) || [] : val || '' + end + + def send_value(attr, new_value, logger) + old_val = nil + single_extracted = false + + if enforce?(attr, :list) + old_val = value(attr, :list) + old_values = old_val.dup + new_values = new_value.values + new_values = new_values.map { |v| find_or_create_agent(attr, v, logger) }.compact if enforce?(attr, :Agent) + + old_values.push(*new_values) + + @submission.send("#{attr}=", old_values.uniq) + elsif enforce?(attr, :concatenate) + # if multiple value for this attribute, then we concatenate it + # Add the concat at the very end, to easily join the content of the array + old_val = value(attr, :string) + metadata_values = old_val.split(', ') + new_values = new_value.values.map { |x| x.to_s.split(', ') }.flatten + + @submission.send("#{attr}=", (metadata_values + new_values).uniq.join(', ')) + else + new_value = new_value.values.first + + new_value = find_or_create_agent(attr, nil, logger) if enforce?(attr, :Agent) + + @submission.send("#{attr}=", new_value) + single_extracted = true + end + + unless @submission.valid? + logger.error("Error while extracting metadata for the attribute #{attr}: #{@submission.errors[attr] || @submission.errors}") + new_value&.delete if enforce?(attr, :Agent) && new_value.respond_to?(:delete) + @submission.send("#{attr}=", old_val) + end + + single_extracted + end + + # Return a hash with the best literal value for an URI + # it selects the literal according to their language: no language > english > french > other languages + def select_metadata_literal(metadata_uri, metadata_literal, hash) + return unless metadata_literal.is_a?(RDF::Literal) + + if hash.key?(metadata_uri) + if metadata_literal.has_language? + if !hash[metadata_uri].has_language? + return hash + else + case metadata_literal.language + when :en, :eng + # Take the value with english language over other languages + hash[metadata_uri] = metadata_literal + return hash + when :fr, :fre + # If no english, take french + if hash[metadata_uri].language == :en || hash[metadata_uri].language == :eng + return hash + else + hash[metadata_uri] = metadata_literal + return hash + end + else + return hash + end + end + else + # Take the value with no language in priority (considered as a default) + hash[metadata_uri] = metadata_literal + return hash + end + else + hash[metadata_uri] = metadata_literal + hash + end + end + + # A function to extract additional metadata + # Take the literal data if the property is pointing to a literal + # If pointing to an URI: first it takes the "omv:name" of the object pointed by the property, if nil it takes the "rdfs:label". + # If not found it check for "omv:firstName + omv:lastName" (for "omv:Person") of this object. And to finish it takes the "URI" + # The hash_results contains the metadataUri (objet pointed on by the metadata property) with the value we are using from it + def extract_each_metadata(ontology_uri, attr, prop_to_extract, logger) + + query_metadata = < #{prop_to_extract} ?extractedObject . + OPTIONAL { ?extractedObject omv:name ?omvname } . + OPTIONAL { ?extractedObject omv:firstName ?omvfirstname } . + OPTIONAL { ?extractedObject omv:lastName ?omvlastname } . + OPTIONAL { ?extractedObject rdfs:label ?rdfslabel } . +} +eos + Goo.namespaces.each do |prefix, uri| + query_metadata = "PREFIX #{prefix}: <#{uri}>\n" + query_metadata + end + + # logger.info(query_metadata) + # This hash will contain the "literal" metadata for each object (uri or literal) pointed by the metadata predicate + hash_results = {} + Goo.sparql_query_client.query(query_metadata).each_solution do |sol| + value = sol[:extractedObject] + if enforce?(attr, :uri) + # If the attr is enforced as URI then it directly takes the URI + uri_value = value ? RDF::URI.new(value.to_s.strip) : nil + hash_results[value] = uri_value if uri_value&.valid? + elsif enforce?(attr, :date_time) + begin + hash_results[value] = DateTime.iso8601(value.to_s) + rescue StandardError => e + logger.error("Impossible to extract DateTime metadata for #{attr}: #{value}. It should follow iso8601 standards. Error message: #{e}") + end + elsif enforce?(attr, :integer) + begin + hash_results[value] = value.to_s.to_i + rescue StandardError => e + logger.error("Impossible to extract integer metadata for #{attr}: #{value}. Error message: #{e}") + end + elsif enforce?(attr, :boolean) + case value.to_s.downcase + when 'true' + hash_results[value] = true + when 'false' + hash_results[value] = false + else + logger.error("Impossible to extract boolean metadata for #{attr}: #{value}. Error message: #{e}") + end + elsif value.is_a?(RDF::URI) + hash_results = find_object_label(hash_results, sol, value) + else + # If this is directly a literal + hash_results = select_metadata_literal(value, value, hash_results) + end + end + hash_results + end + + def find_object_label(hash_results, sol, value) + if !sol[:omvname].nil? + hash_results = select_metadata_literal(value, sol[:omvname], hash_results) + elsif !sol[:rdfslabel].nil? + hash_results = select_metadata_literal(value, sol[:rdfslabel], hash_results) + elsif !sol[:omvfirstname].nil? + hash_results = select_metadata_literal(value, sol[:omvfirstname], hash_results) + # if first and last name are defined (for omv:Person) + hash_results[value] = "#{hash_results[value]} #{sol[:omvlastname]}" unless sol[:omvlastname].nil? + elsif !sol[:omvlastname].nil? + # if only last name is defined + hash_results = select_metadata_literal(value, sol[:omvlastname], hash_results) + else + # if the object is an URI but we are requesting a String + hash_results[value] = value.to_s + end + hash_results + end + + def enforce?(attr, type) + LinkedData::Models::OntologySubmission.attribute_settings(attr)[:enforce].include?(type) + end + + def find_or_create_agent(attr, old_val, logger) + agent = LinkedData::Models::Agent.where(agentType: 'person', name: old_val).first + begin + agent ||= LinkedData::Models::Agent.new(name: old_val, agentType: 'person', creator: @submission.ontology.administeredBy.first).save + rescue + logger.error("Error while extracting metadata for the attribute #{attr}: Can't create Agent #{agent.errors} ") + agent = nil + end + agent end end end diff --git a/lib/ontologies_linked_data/mappings/mappings.rb b/lib/ontologies_linked_data/mappings/mappings.rb index 10abc75e1..be43e9454 100644 --- a/lib/ontologies_linked_data/mappings/mappings.rb +++ b/lib/ontologies_linked_data/mappings/mappings.rb @@ -5,21 +5,32 @@ module LinkedData module Mappings OUTSTANDING_LIMIT = 30 - def self.mapping_predicates() - predicates = {} - predicates["CUI"] = ["http://bioportal.bioontology.org/ontologies/umls/cui"] - predicates["SAME_URI"] = - ["http://data.bioontology.org/metadata/def/mappingSameURI"] - predicates["LOOM"] = - ["http://data.bioontology.org/metadata/def/mappingLoom"] - predicates["REST"] = - ["http://data.bioontology.org/metadata/def/mappingRest"] - return predicates - end + def self.mapping_predicates + predicates = {} + predicates["CUI"] = ["http://bioportal.bioontology.org/ontologies/umls/cui"] + predicates["SAME_URI"] = + ["http://data.bioontology.org/metadata/def/mappingSameURI"] + predicates["LOOM"] = + ["http://data.bioontology.org/metadata/def/mappingLoom"] + predicates["REST"] = + ["http://data.bioontology.org/metadata/def/mappingRest"] + return predicates + end - def self.handle_triple_store_downtime(logger=nil) - epr = Goo.sparql_query_client(:main) - status = epr.status + def self.internal_mapping_predicates + predicates = {} + predicates["SKOS:EXACT_MATCH"] = ["http://www.w3.org/2004/02/skos/core#exactMatch"] + predicates["SKOS:CLOSE_MATCH"] = ["http://www.w3.org/2004/02/skos/core#closeMatch"] + predicates["SKOS:BROAD_MATH"] = ["http://www.w3.org/2004/02/skos/core#broadMatch"] + predicates["SKOS:NARROW_MATH"] = ["http://www.w3.org/2004/02/skos/core#narrowMatch"] + predicates["SKOS:RELATED_MATH"] = ["http://www.w3.org/2004/02/skos/core#relatedMatch"] + + return predicates + end + + def self.handle_triple_store_downtime(logger = nil) + epr = Goo.sparql_query_client(:main) + status = epr.status if status[:exception] logger.info(status[:exception]) if logger @@ -145,142 +156,59 @@ def self.empty_page(page,size) return p end - def self.mappings_ontologies(sub1,sub2,page,size,classId=nil,reload_cache=false) - union_template = <<-eos -{ - GRAPH <#{sub1.id.to_s}> { - classId ?o . - } - GRAPH graph { - ?s2 ?o . - } - bind -} -eos - blocks = [] - mappings = [] - persistent_count = 0 - acr1 = sub1.id.to_s.split("/")[-3] - - if classId.nil? - acr2 = nil - acr2 = sub2.id.to_s.split("/")[-3] unless sub2.nil? - pcount = LinkedData::Models::MappingCount.where(ontologies: acr1) - pcount = pcount.and(ontologies: acr2) unless acr2.nil? - f = Goo::Filter.new(:pair_count) == (not acr2.nil?) - pcount = pcount.filter(f) - pcount = pcount.include(:count) - pcount_arr = pcount.all - persistent_count = pcount_arr.length == 0 ? 0 : pcount_arr.first.count - - return LinkedData::Mappings.empty_page(page,size) if persistent_count == 0 - end + def self.mappings_ontologies(sub1, sub2, page, size, classId = nil, reload_cache = false) + sub1, acr1 = extract_acronym(sub1) + sub2, acr2 = extract_acronym(sub2) - if classId.nil? - union_template = union_template.gsub("classId", "?s1") - else - union_template = union_template.gsub("classId", "<#{classId.to_s}>") - end - # latest_sub_ids = self.retrieve_latest_submission_ids - - mapping_predicates().each do |_source,mapping_predicate| - union_block = union_template.gsub("predicate", mapping_predicate[0]) - union_block = union_block.gsub("bind","BIND ('#{_source}' AS ?source)") + mappings = [] + persistent_count = 0 - if sub2.nil? - union_block = union_block.gsub("graph","?g") - else - union_block = union_block.gsub("graph","<#{sub2.id.to_s}>") + if classId.nil? + persistent_count = count_mappings(acr1, acr2) + return LinkedData::Mappings.empty_page(page, size) if persistent_count == 0 end - blocks << union_block - end - unions = blocks.join("\nUNION\n") - mappings_in_ontology = <<-eos -SELECT DISTINCT query_variables -WHERE { -unions -filter -} page_group -eos - query = mappings_in_ontology.gsub("unions", unions) - variables = "?s2 graph ?source ?o" - variables = "?s1 " + variables if classId.nil? - query = query.gsub("query_variables", variables) - filter = classId.nil? ? "FILTER ((?s1 != ?s2) || (?source = 'SAME_URI'))" : '' + query = mappings_ont_build_query(classId, page, size, sub1, sub2) + epr = Goo.sparql_query_client(:main) + graphs = [sub1] + unless sub2.nil? + graphs << sub2 + end + solutions = epr.query(query, graphs: graphs, reload_cache: reload_cache) + s1 = nil + s1 = RDF::URI.new(classId.to_s) unless classId.nil? + + solutions.each do |sol| + graph2 = sub2.nil? ? sol[:g] : sub2 + s1 = sol[:s1] if classId.nil? + backup_mapping = nil + + if sol[:source].to_s == "REST" + backup_mapping = LinkedData::Models::RestBackupMapping + .find(sol[:o]).include(:process, :class_urns).first + backup_mapping.process.bring_remaining + end - if sub2.nil? - query = query.gsub("graph","?g") - ont_id = sub1.id.to_s.split("/")[0..-3].join("/") + classes = get_mapping_classes_instance(s1, sub1, sol[:s2], graph2) - # latest_sub_filter_arr = latest_sub_ids.map { |_, id| "?g = <#{id}>" } - # filter += "\nFILTER (#{latest_sub_filter_arr.join(' || ')}) " + mapping = if backup_mapping.nil? + LinkedData::Models::Mapping.new(classes, sol[:source].to_s) + else + LinkedData::Models::Mapping.new( + classes, sol[:source].to_s, + backup_mapping.process, backup_mapping.id) + end - #STRSTARTS is used to not count older graphs - #no need since now we delete older graphs - filter += "\nFILTER (!STRSTARTS(str(?g),'#{ont_id}'))" - else - query = query.gsub("graph", "") - end - query = query.gsub("filter", filter) - - if size > 0 - pagination = "OFFSET offset LIMIT limit" - query = query.gsub("page_group",pagination) - limit = size - offset = (page-1) * size - query = query.gsub("limit", "#{limit}").gsub("offset", "#{offset}") - else - query = query.gsub("page_group","") - end - epr = Goo.sparql_query_client(:main) - graphs = [sub1.id] - unless sub2.nil? - graphs << sub2.id - end - solutions = epr.query(query, graphs: graphs, reload_cache: reload_cache) - s1 = nil - unless classId.nil? - s1 = RDF::URI.new(classId.to_s) - end - solutions.each do |sol| - graph2 = nil - if sub2.nil? - graph2 = sol[:g] - else - graph2 = sub2.id + mappings << mapping end - if classId.nil? - s1 = sol[:s1] - end - classes = [ read_only_class(s1.to_s,sub1.id.to_s), - read_only_class(sol[:s2].to_s,graph2.to_s) ] - backup_mapping = nil - mapping = nil - if sol[:source].to_s == "REST" - backup_mapping = LinkedData::Models::RestBackupMapping - .find(sol[:o]).include(:process).first - backup_mapping.process.bring_remaining + if size == 0 + return mappings end - if backup_mapping.nil? - mapping = LinkedData::Models::Mapping.new( - classes,sol[:source].to_s) - else - mapping = LinkedData::Models::Mapping.new( - classes,sol[:source].to_s, - backup_mapping.process,backup_mapping.id) - end - mappings << mapping - end - if size == 0 - return mappings + page = Goo::Base::Page.new(page, size, persistent_count, mappings) + return page end - page = Goo::Base::Page.new(page,size,nil,mappings) - page.aggregate = persistent_count - return page - end def self.mappings_ontology(sub,page,size,classId=nil,reload_cache=false) return self.mappings_ontologies(sub,nil,page,size,classId=classId, @@ -383,18 +311,18 @@ def self.get_rest_mapping(mapping_id) FILTER(?uuid = <#{LinkedData::Models::Base.replace_url_prefix_to_id(mapping_id)}>) FILTER(?s1 != ?s2) } LIMIT 1 -eos - epr = Goo.sparql_query_client(:main) - graphs = [LinkedData::Models::MappingProcess.type_uri] - mapping = nil - epr.query(qmappings, - graphs: graphs).each do |sol| - classes = [ read_only_class(sol[:c1].to_s,sol[:s1].to_s), - read_only_class(sol[:c2].to_s,sol[:s2].to_s) ] - process = LinkedData::Models::MappingProcess.find(sol[:o]).first - mapping = LinkedData::Models::Mapping.new(classes,"REST", - process, - sol[:uuid]) + eos + epr = Goo.sparql_query_client(:main) + graphs = [LinkedData::Models::MappingProcess.type_uri] + mapping = nil + epr.query(qmappings, + graphs: graphs).each do |sol| + classes = [read_only_class(sol[:c1].to_s, sol[:s1].to_s), + read_only_class(sol[:c2].to_s, sol[:s2].to_s)] + process = LinkedData::Models::MappingProcess.find(sol[:o]).first + mapping = LinkedData::Models::Mapping.new(classes, 'REST', + process, + sol[:uuid]) end return mapping end @@ -437,7 +365,7 @@ def self.create_rest_mapping(classes,process) graph_insert << [c.id, RDF::URI.new(rest_predicate), backup_mapping.id] Goo.sparql_update_client.insert_data(graph_insert, graph: sub.id) end - mapping = LinkedData::Models::Mapping.new(classes,"REST",process, backup_mapping.id) + mapping = LinkedData::Models::Mapping.new(classes,"REST", process, backup_mapping.id) return mapping end @@ -773,5 +701,115 @@ def self.create_mapping_count_pairs_for_ontologies(logger, arr_acronyms) # fsave.close end + private + + def self.get_mapping_classes_instance(s1, graph1, s2, graph2) + [read_only_class(s1.to_s, graph1.to_s), + read_only_class(s2.to_s, graph2.to_s)] + end + + def self.mappings_ont_build_query(class_id, page, size, sub1, sub2) + blocks = [] + mapping_predicates.each do |_source, mapping_predicate| + blocks << mappings_union_template(class_id, sub1, sub2, + mapping_predicate[0], + "BIND ('#{_source}' AS ?source)") + end + + + + + + + filter = class_id.nil? ? "FILTER ((?s1 != ?s2) || (?source = 'SAME_URI'))" : '' + if sub2.nil? + + class_id_subject = class_id.nil? ? '?s1' : "<#{class_id.to_s}>" + source_graph = sub1.nil? ? '?g' : "<#{sub1.to_s}>" + internal_mapping_predicates.each do |_source, predicate| + blocks << <<-eos + { + GRAPH #{source_graph} { + #{class_id_subject} <#{predicate[0]}> ?s2 . + } + BIND( AS ?g) + BIND(?s2 AS ?o) + BIND ('#{_source}' AS ?source) + } + eos + end + + ont_id = sub1.to_s.split("/")[0..-3].join("/") + #STRSTARTS is used to not count older graphs + #no need since now we delete older graphs + + filter += "\nFILTER (!STRSTARTS(str(?g),'#{ont_id}')" + filter += " || " + internal_mapping_predicates.keys.map{|x| "(?source = '#{x}')"}.join('||') + filter += ")" + end + + variables = "?s2 #{sub2.nil? ? '?g' : ''} ?source ?o" + variables = "?s1 " + variables if class_id.nil? + + pagination = '' + if size > 0 + limit = size + offset = (page - 1) * size + pagination = "OFFSET #{offset} LIMIT #{limit}" + end + + query = <<-eos +SELECT DISTINCT #{variables} +WHERE { + #{blocks.join("\nUNION\n")} + #{filter} +} #{pagination} + eos + + query + end + + def self.mappings_union_template(class_id, sub1, sub2, predicate, bind) + class_id_subject = class_id.nil? ? '?s1' : "<#{class_id.to_s}>" + target_graph = sub2.nil? ? '?g' : "<#{sub2.to_s}>" + union_template = <<-eos +{ + GRAPH <#{sub1.to_s}> { + #{class_id_subject} <#{predicate}> ?o . + } + GRAPH #{target_graph} { + ?s2 <#{predicate}> ?o . + } + #{bind} +} + eos + end + + def self.count_mappings(acr1, acr2) + count = LinkedData::Models::MappingCount.where(ontologies: acr1) + count = count.and(ontologies: acr2) unless acr2.nil? + f = Goo::Filter.new(:pair_count) == (not acr2.nil?) + count = count.filter(f) + count = count.include(:count) + pcount_arr = count.all + pcount_arr.length == 0 ? 0 : pcount_arr.first.count + end + + def self.extract_acronym(submission) + sub = submission + if submission.nil? + acr = nil + elsif submission.respond_to?(:id) + # Case where sub2 is a Submission + sub = submission.id + acr = sub.to_s.split("/")[-3] + else + acr = sub.to_s + end + + return sub, acr + end + + end end -end + diff --git a/lib/ontologies_linked_data/models/ontology_submission.rb b/lib/ontologies_linked_data/models/ontology_submission.rb index b19a18534..e4bd7df6a 100644 --- a/lib/ontologies_linked_data/models/ontology_submission.rb +++ b/lib/ontologies_linked_data/models/ontology_submission.rb @@ -245,6 +245,7 @@ def synchronize(&block) def URI=(value) self.uri = value end + def URI self.uri end diff --git a/lib/ontologies_linked_data/parser/owlapi.rb b/lib/ontologies_linked_data/parser/owlapi.rb index 1a83239d7..ab3016674 100644 --- a/lib/ontologies_linked_data/parser/owlapi.rb +++ b/lib/ontologies_linked_data/parser/owlapi.rb @@ -13,7 +13,7 @@ class RDFFileNotGeneratedException < Parser::ParserException class OWLAPICommand def initialize(input_file, output_repo, opts = {}) - @owlapi_wrapper_jar_path = LinkedData.bindir + "/owlapi-wrapper-1.4.2.jar" + @owlapi_wrapper_jar_path = LinkedData.bindir + "/owlapi-wrapper-1.5.0.jar" @input_file = input_file @output_repo = output_repo @master_file = opts[:master_file] diff --git a/test/models/test_mappings.rb b/test/models/test_mappings.rb index e53a6b80d..501baa18c 100644 --- a/test/models/test_mappings.rb +++ b/test/models/test_mappings.rb @@ -35,6 +35,11 @@ def self.ontologies_parse process_rdf: true, extract_metadata: false) end + def delete_all_rest_mappings + LinkedData::Models::RestBackupMapping.all.each do |m| + LinkedData::Mappings.delete_rest_mapping(m.id) + end + end def test_mapping_count_models LinkedData::Models::MappingCount.where.all(&:delete) diff --git a/test/models/test_ontology.rb b/test/models/test_ontology.rb index f0b4f3c64..6e11a8ce1 100644 --- a/test/models/test_ontology.rb +++ b/test/models/test_ontology.rb @@ -153,7 +153,7 @@ def test_ontology_properties ont.bring(:submissions) sub = ont.submissions[0] props = ont.properties() - assert_equal 85, props.length + assert_equal 86, props.length # verify sorting assert_equal "http://bioontology.org/ontologies/BiomedicalResourceOntology.owl#AlgorithmPurpose", props[0].id.to_s @@ -192,7 +192,7 @@ def test_ontology_properties # test property roots pr = ont.property_roots(sub, extra_include=[:hasChildren, :children]) - assert_equal 64, pr.length + assert_equal 65, pr.length # verify sorting assert_equal "http://bioontology.org/ontologies/BiomedicalResourceOntology.owl#AlgorithmPurpose", pr[0].id.to_s @@ -206,7 +206,7 @@ def test_ontology_properties assert_equal 33, dpr.length # count annotation properties apr = pr.select { |p| p.class == LinkedData::Models::AnnotationProperty } - assert_equal 13, apr.length + assert_equal 14, apr.length # check for non-root properties assert_empty pr.select { |p| ["http://www.w3.org/2004/02/skos/core#broaderTransitive", "http://www.w3.org/2004/02/skos/core#topConceptOf", diff --git a/test/models/test_ontology_submission.rb b/test/models/test_ontology_submission.rb index 79ae72fe4..2f01db990 100644 --- a/test/models/test_ontology_submission.rb +++ b/test/models/test_ontology_submission.rb @@ -289,13 +289,13 @@ def test_submission_parse unless ENV["BP_SKIP_HEAVY_TESTS"] == "1" submission_parse("MCCLTEST", "MCCLS TEST", "./test/data/ontology_files/CellLine_OWL_BioPortal_v1.0.owl", 11, - process_rdf: true, extract_metadata: false) + process_rdf: true, extract_metadata: true) sub = LinkedData::Models::OntologySubmission.where(ontology: [acronym: "MCCLTEST"], submissionId: 11) .include(:version) .first - assert sub.version == "3.0" + assert_equal sub.version, "3.0" end #This one has resources wih accents. @@ -448,7 +448,7 @@ def test_index_properties "./test/data/ontology_files/BRO_v3.5.owl", 1, process_rdf: true, extract_metadata: false, index_properties: true) res = LinkedData::Models::Class.search("*:*", {:fq => "submissionAcronym:\"BRO\"", :start => 0, :rows => 80}, :property) - assert_equal 83 , res["response"]["numFound"] + assert_equal 84, res["response"]["numFound"] found = 0 res["response"]["docs"].each do |doc| @@ -1257,4 +1257,41 @@ def test_copy_file_repository_from_tempfile tmp.unlink end end + + # To test extraction of metadata when parsing a submission (we extract the submission attributes that have the + # extractedMetadata on true) + def test_submission_extract_metadata + 2.times.each do |i| + submission_parse("AGROOE", "AGROOE Test extract metadata ontology", + "./test/data/ontology_files/agrooeMappings-05-05-2016.owl", i + 1, + process_rdf: true, extract_metadata: true, generate_missing_labels: false) + ont = LinkedData::Models::Ontology.find("AGROOE").first + sub = ont.latest_submission + refute_nil sub + + sub.bring_remaining + assert_equal false, sub.deprecated + assert_equal '2015-09-28', sub.creationDate.to_date.to_s + assert_equal '2015-10-01', sub.modificationDate.to_date.to_s + assert_equal "description example, AGROOE is an ontology used to test the metadata extraction, AGROOE is an ontology to illustrate how to describe their ontologies", sub.description + assert_equal [RDF::URI.new('http://agroportal.lirmm.fr')], sub.identifier + assert_equal ["http://lexvo.org/id/iso639-3/fra", "http://lexvo.org/id/iso639-3/eng"].sort, sub.naturalLanguage.sort + assert_equal [RDF::URI.new("http://lirmm.fr/2015/ontology/door-relation.owl"), RDF::URI.new("http://lirmm.fr/2015/ontology/dc-relation.owl"), + RDF::URI.new("http://lirmm.fr/2015/ontology/dcterms-relation.owl"), + RDF::URI.new("http://lirmm.fr/2015/ontology/voaf-relation.owl"), + RDF::URI.new("http://lirmm.fr/2015/ontology/void-import.owl") + ].sort, sub.ontologyRelatedTo.sort + + + + + # assert_equal ["Agence 007", "Éditions \"La Science en Marche\"", " LIRMM (default name) "].sort, sub.publisher.map { |x| x.bring_remaining.name }.sort + # assert_equal ["Alfred DC", "Clement Jonquet", "Gaston Dcterms", "Huguette Doap", "Mirabelle Prov", "Paul Foaf", "Vincent Emonet"].sort, sub.hasCreator.map { |x| x.bring_remaining.name }.sort + # assert_equal ["Léontine Dessaiterm", "Anne Toulet", "Benjamine Dessay", "Augustine Doap", "Vincent Emonet"].sort, sub.hasContributor.map { |x| x.bring_remaining.name }.sort + # assert_equal 1, LinkedData::Models::Agent.where(name: "Vincent Emonet").count + + sub.description = "test changed value" + sub.save + end + end end