Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
113 changes: 113 additions & 0 deletions app/services/datacore/doi_metadata_service.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,113 @@
# frozen_string_literal: true

module Datacore
class DoiMetadataService

PUBLISHER = "Indiana University".freeze
RESOURCE_TYPE = "Dataset".freeze
BASIC_KEYS = [:creators, :titles, :publisher, :publicationYear, :types, :url]
ADDITIONAL_KEYS = [:descriptions, :subjects, :contributors, :dates, :geoLocations, :fundingReferences, :language, :alternateIdentifiers, :rightsList]
# uses datacite metadata schema 4.5
# https://datacite-metadata-schema.readthedocs.io/en/4.5/properties/

attr_reader :work

# @param work DataSet
def initialize(work:)
@work = work
end

# @return Hash all available work metadata
def metadata
@metadata ||= full_metadata
end

# @return nil
# reloads work and metadata
def reload
work.reload
@metadata = nil
end

# @return [Hash] work minimal required metadata for remote record creation or update
def basic_metadata
{ creators: work.creator.map { |c| { name: c } },
titles: work.title.map { |t| { title: t } },
publisher: PUBLISHER,
publicationYear: Date.today.year.to_s,
types: { resourceTypeGeneral: RESOURCE_TYPE },
url: Rails.application.routes.url_helpers.hyrax_data_set_url(id: work.id)
}
end

# @return [Hash] expanded work metadata
def expanded_metadata(include_empty: false)
{
descriptions: descriptions,
subjects: work.subject.map { |s| { subject: s} },
contributors: work.contributor.map { |c| { contributorType: 'Researcher', name: c} },
dates: dates,
geoLocations: geo_location,
fundingReferences: funding,
language: language,
alternateIdentifiers: { alternateIdentifierType: 'DataCORE internal ID', alternateIdentifier: work.id },
rightsList: rights_license,
# below held in abeyance pending determination of relationType [IULRDC-174]
# relatedIdentifiers: work.related_url.map { |url| { relatedIdentifierType: 'URL', relationType: 'IsDescribedBy', relatedIdentifier: url } }
}.select { |k,v| v.present? || include_empty }
end

# @return [Hash] all work metadata
def full_metadata
basic_metadata.merge(expanded_metadata)
end

# @return Array
def dates
dates = []
dates << { dateType: 'Submitted', date: work.date_uploaded.strftime('%Y-%m-%d') } if work.date_uploaded.present?
dates << { dateType: 'Updated', date: work.date_modified.strftime('%Y-%m-%d') } if work.date_modified.present?
dates << { dateType: 'Collected', date: work.date_coverage } if work.date_coverage.present?
return dates
end

# @return Array
def descriptions
descriptions = []
descriptions += work.description.map { |desc| { lang: language, description: desc, descriptionType: 'Other' } }
descriptions += work.description_abstract.map { |desc| { lang: language, description: desc, descriptionType: 'Abstract' } }
descriptions << { lang: language, description: work.methodology, descriptionType: 'Methods' } if work.methodology.present?
return descriptions
end

# @return Array
def funding
(work.fundedby.to_a + work.fundedby_other.to_a).reject { |fund| fund == 'Other Funding Agency' }.map { |fund| { funderName: fund } }
end

def geo_location
geo_location = []
geo_location << { geoLocationPlace: work.geo_location_place } if work.geo_location_place.present?
geo_location << { geoLocationBox: geo_location_box } if geo_location_box.present?
return geo_location
end

def geo_location_box
return nil unless work.geo_location_box.present?
points = work.geo_location_box.gsub(/[^- .0-9]/, ' ').split(' ').select { |p| Float(p, exception: false) }
return nil unless points.size == 4
return [:southBoundLatitude, :westBoundLongitude, :northBoundLatitude, :eastBoundLongitude].zip(points).to_h
end

def language
work.language&.first || 'en'
end

def rights_license
rights = []
rights << { rightsUri: work.rights_license, rights: Hyrax::RightsLicenseService.new.select_active_options.map(&:reverse).to_h[work.rights_license] }
rights << { rights: work.rights_license_other } if work.rights_license_other.present?
return rights
end
end
end
19 changes: 3 additions & 16 deletions app/services/datacore/doi_minting_service.rb
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,8 @@ module Datacore

class DoiMintingService

PUBLISHER = "Indiana University".freeze
RESOURCE_TYPE = "Dataset".freeze

attr_reader :current_user, :work, :metadata, :prefix, :doi
attr_reader :current_user, :work, :prefix, :doi, :metadata_service
delegate :basic_metadata, :expanded_metadata, :full_metadata, :metadata, to: :metadata_service

# @return Boolean
def self.enabled?
Expand All @@ -25,7 +23,7 @@ def self.mint_doi_for(work:, current_user:)
def initialize(work:, current_user:)
@work = work
@current_user = current_user
@metadata = local_metadata
@metadata_service = Datacore::DoiMetadataService.new(work: work)
@prefix = Settings.datacite.prefix&.to_s # cast inadvertent Float to String
@doi = work.doi
end
Expand Down Expand Up @@ -76,17 +74,6 @@ def update_work_with_doi!(doi, update_provenance: false)
@doi
end

# @return [Hash] work minimal metadata for remote record creation or update
def local_metadata
{ creators: work.creator.map { |c| { name: c } },
titles: work.title.map { |t| { title: t } },
publisher: PUBLISHER,
publicationYear: Date.today.year.to_s,
types: { resourceTypeGeneral: RESOURCE_TYPE },
url: Rails.application.routes.url_helpers.hyrax_data_set_url(id: work.id)
}
end

# @return [DataCite::Client] client instance for Datacite interactions
def client(host: Settings.datacite.host, username: Settings.datacite.username, password: Settings.datacite.password)
@client ||= Datacite::Client.new(host: host, username: username, password: password)
Expand Down
245 changes: 245 additions & 0 deletions spec/services/datacore/doi_metadata_service_spec.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,245 @@
# frozen_string_literal: true

require 'rails_helper'

describe Datacore::DoiMetadataService do
let(:work) { FactoryBot.create(:data_set, subject: subjects, contributor: contributors) }
let(:subjects) { ['subject1', 'subject2'] }
let(:contributors) { ['contributor1', 'contributor2'] }
let(:service) { described_class.new(work: work) }

describe "#metadata" do
it "returns #full_metadata" do
expect(service).to receive(:full_metadata)
expect(service.metadata)
end
it "caches @metadata" do
expect(service.instance_variable_get(:@metadata)).to be_nil
expect(service.metadata)
expect(service.instance_variable_get(:@metadata)).not_to be_nil
end
end

describe "#reload" do
it "reloads the work" do
expect(work).to receive(:reload)
service.reload
end
it "clears out @metadata" do
service.metadata
expect(service.instance_variable_get(:@metadata)).not_to be_nil
service.reload
expect(service.instance_variable_get(:@metadata)).to be_nil
end
end

describe "#basic_metadata" do
it "returns a schema-compliant Hash" do
basic_metadata = service.basic_metadata
expect(basic_metadata).to be_a Hash
expect(basic_metadata.keys).to eq service.class.const_get(:BASIC_KEYS)
expect(basic_metadata[:creators].first.keys).to eq [:name]
expect(basic_metadata[:titles].first.keys).to eq [:title]
expect(basic_metadata[:publisher]).to eq described_class.const_get(:PUBLISHER)
expect(basic_metadata[:publicationYear]).to eq Date.today.year.to_s
expect(basic_metadata[:types]).to eq({ resourceTypeGeneral: described_class.const_get(:RESOURCE_TYPE) })
expect(basic_metadata[:url]).to match /http.*#{work.id}/
end
end

describe "#expanded_metadata" do
context "with implicit include_empty: false" do
it "returns a populated Hash" do
expanded_metadata = service.expanded_metadata
expect(expanded_metadata).to be_a Hash
expect(expanded_metadata.values).to be_all(&:present?)
expect(service.class.const_get(:ADDITIONAL_KEYS)).to include(*expanded_metadata.keys)
expect(expanded_metadata[:subjects].map(&:values).flatten.sort).to eq subjects
expect(expanded_metadata[:contributors].map { |v| v[:name] }.flatten.sort).to eq contributors
expect(expanded_metadata[:alternateIdentifiers][:alternateIdentifierType]).to eq 'DataCORE internal ID'
expect(expanded_metadata[:alternateIdentifiers][:alternateIdentifier]).to eq work.id
end
end
context "with explicit include_empty: true" do
it "returns a Hash including empty values" do
without_empty = service.expanded_metadata
with_empty = service.expanded_metadata(include_empty: true)
expect(with_empty.size).to be > without_empty.size
expect(with_empty.values.any?(&:empty?)).to eq true
end
end
end

describe "#full_metadata" do
it "returns basic and expanded metadata, merged" do
expect(service.full_metadata).to eq service.basic_metadata.merge(service.expanded_metadata)
end
end

describe "#dates" do
context "with a date_uploaded" do
before { work.date_uploaded = DateTime.now }
it "includes a Submitted entry" do
expect(work.date_uploaded).to be_present
expect(service.dates.map { |d| d[:dateType] }).to include 'Submitted'
end
end
context "with a date_modified" do
before { work.date_modified = DateTime.now }
it "includes an Updated entry" do
expect(work.date_modified).to be_present
expect(service.dates.map { |d| d[:dateType] }).to include 'Updated'
end
end
context "with date_coverage" do
before { work.date_coverage = 'date coverage' }
it "includes a Collected entry" do
expect(service.dates.map { |d| d[:dateType] }).to include 'Collected'
end
end
context "without date values" do
it "returns an empty Array" do
expect(work.date_uploaded).to be_nil
expect(work.date_modified).to be_nil
expect(work.date_coverage).to be_nil
expect(service.dates).to be_empty
end
end
end

describe "#descriptions" do
context "with work.description" do
before { work.description = ['desc1', 'desc2'] }
it "includes Other entries" do
expect(work.description).to be_present
expect(service.descriptions.map { |d| d[:descriptionType] }).to include 'Other'
end
end
context "includes Abstract entries" do
before { work.description_abstract = ['abstract1', 'abstract2'] }
it "includes Other entries" do
Copy link
Contributor

@rdlebeau rdlebeau Mar 9, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think this line should say, it "includes Abstract entries" do
Edited: Also, line 118 could say "with abstract description(s)"

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

No change here.

expect(work.description_abstract).to be_present
expect(service.descriptions.map { |d| d[:descriptionType] }).to include 'Abstract'
end
end
context "with methodology" do
before { work.methodology = 'methodology1' }
it "includes a Methods entry" do
expect(service.descriptions.map { |d| d[:descriptionType] }).to include 'Methods'
end
end
context "without description values" do
before { work.description = [] }
before { work.methodology = nil }
it "returns an empty Array" do
expect(work.description).to be_empty
expect(work.description_abstract).to be_empty
expect(work.methodology).to be_nil
expect(service.descriptions).to be_empty
end
end
end

describe "#funding" do
context "with work.fundedby" do
before { work.fundedby = ['fund1', 'fund2'] }
it "returns funding sources" do
expect(service.funding.map(&:values).flatten.sort).to eq work.fundedby.sort
end
end
context "with work.fundedby and work.fundedby_other" do
before { work.fundedby = ['Other Funding Agency'] }
before { work.fundedby_other = ['other1', 'other2'] }
it "returns funding sources, removing Other Funding Agency" do
expect(service.funding.map(&:values).flatten.sort).to eq work.fundedby_other.sort
end
end
context "without fundedby.* fields" do
it "returns an empty Array" do
expect(work.fundedby).to be_empty
expect(work.fundedby_other).to be_empty
expect(service.funding).to be_empty
end
end
end

describe "#geo_location" do
context "with geo_location_place" do
before { work.geo_location_place = 'a place' }
it "returns a geoLocationPlace" do
expect(service.geo_location.map(&:keys).flatten).to include :geoLocationPlace
end
end
context "with geo_location_box" do
before { work.geo_location_box = '1, 2, 3, 4' }
it "returns a geoLocationBox" do
expect(service.geo_location.map(&:keys).flatten).to include :geoLocationBox
end
end
context "without geo_location values" do
it "returns an empty Array" do
expect(work.geo_location_place).to be_nil
expect(work.geo_location_box).to be_nil
expect(service.geo_location).to be_empty
end
end
end

describe "#geo_location_box" do
context "without work.geo_location_box present" do
before { work.geo_location_box = nil }
it "returns nil" do
expect(service.geo_location_box).to be_nil
end
end
context "with invalid work.geo_location_box present" do
before { work.geo_location_box = '1 2 3' }
it "returns nil" do
expect(service.geo_location_box).to be_nil
end
end
context "with valid work.geo_location_box present" do
let(:values) { %w[1 2.1 -3 4] }
before { work.geo_location_box = values.join(', ') }
it "returns a formatted Hash" do
expect(service.geo_location_box).to be_a Hash
expect(service.geo_location_box.keys).to eq [:southBoundLatitude, :westBoundLongitude, :northBoundLatitude, :eastBoundLongitude]
expect(service.geo_location_box.values).to eq values
end
end
end

describe "#language" do
context "with a work language" do
before { work.language = ['lang1', 'lang2'] }
it "returns the work language" do
expect(service.language).to eq 'lang1'
end
end
context "without a work language" do
before { work.language = nil }
it "returns default value: en" do
expect(service.language).to eq 'en'
end
end
end

describe "#rights_license" do
it "returns an populated array" do
expect(service.rights_license).to be_a Array
expect(service.rights_license).not_to be_empty
end
context "without a rights_license_other" do
it "returns a single value" do
expect(work.rights_license_other).to be_nil
expect(service.rights_license.size).to eq 1
end
end
context "with a rights_license_other" do
before { work.rights_license_other = 'other license' }
it "returns a second value" do
expect(service.rights_license.size).to eq 2
end
end
end
end
Loading