Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions lib/termium.rb
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,10 @@
require "glossarist"

require "lutaml/model"
require "lutaml/model/xml_adapter/nokogiri_adapter"
require "lutaml/model/xml/nokogiri_adapter"

Lutaml::Model::Config.configure do |config|
config.xml_adapter = Lutaml::Model::XmlAdapter::NokogiriAdapter
config.xml_adapter = Lutaml::Model::Xml::NokogiriAdapter
end

module Termium
Expand Down
12 changes: 12 additions & 0 deletions lib/termium/abbreviation.rb
Original file line number Diff line number Diff line change
Expand Up @@ -52,5 +52,17 @@ def to_h

set
end

def to_designation
attrs = {
designation: value,
normative_status: deprecated ? "deprecated" : "preferred",
}

attrs[:gender] = gender if gender
attrs[:part_of_speech] = part_of_speech if part_of_speech

Glossarist::Designation::Abbreviation.new(attrs)
end
end
end
19 changes: 11 additions & 8 deletions lib/termium/core.rb
Original file line number Diff line number Diff line change
Expand Up @@ -47,10 +47,13 @@ def uuid(str = identification_number)
# details="Compartment - ISO/IEC JTC 1 Information Technology Vocabulary" />
def to_concept(options = {})
Glossarist::ManagedConcept.new.tap do |concept|
# The way to set the universal concept's identifier: data.identifier
concept.id = identification_number
# V2: Create new data object to ensure it's serialized (not marked as default)
concept.data = Glossarist::ManagedConceptData.new(
id: identification_number,
sources: concept_sources
)

concept.uuid = uuid
concept.id = uuid

# Assume no related concepts
concept.related = []
Expand All @@ -60,19 +63,19 @@ def to_concept(options = {})
concept.date_accepted = options[:date_accepted]
end

language_module.map do |lang_mod|
language_module.each do |lang_mod|
localized_concept = lang_mod.to_concept(options)

# TODO: This is needed to skip the empty french entries of 10031781 and 10031778
next if localized_concept.nil?

localized_concept.id = identification_number
localized_concept.uuid = uuid("#{identification_number}-#{lang_mod.language}")
localized_concept.data.id = identification_number
localized_concept.id = uuid("#{identification_number}-#{lang_mod.language}")

universal_entry.each do |entry|
localized_concept.notes << Glossarist::DetailedDefinition.new(content: entry.value)
localized_concept.data.notes << Glossarist::DetailedDefinition.new(content: entry.value)
end
localized_concept.sources = concept_sources
localized_concept.data.sources = concept_sources
concept.add_localization(localized_concept)
end
end
Expand Down
14 changes: 14 additions & 0 deletions lib/termium/entry_term.rb
Original file line number Diff line number Diff line change
Expand Up @@ -86,5 +86,19 @@ def to_h

set
end

def to_designation
attrs = {
designation: value,
normative_status: normative_status,
}

attrs[:geographical_area] = geographical_area if geographical_area
attrs[:plurality] = plurality if plurality
attrs[:gender] = gender if gender
attrs[:part_of_speech] = part_of_speech if part_of_speech

Glossarist::Designation::Expression.new(attrs)
end
end
end
5 changes: 3 additions & 2 deletions lib/termium/extract.rb
Original file line number Diff line number Diff line change
Expand Up @@ -30,8 +30,9 @@ class Extract < Lutaml::Model::Serializable

def to_concept(options = {})
coll = Glossarist::ManagedConceptCollection.new
coll.managed_concepts = core.map do |managed_concept|
managed_concept.to_concept(options)
core.each do |managed_concept|
concept = managed_concept.to_concept(options)
coll.store(concept)
end
coll
end
Expand Down
21 changes: 13 additions & 8 deletions lib/termium/language_module.rb
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ def abbreviations

def designations
# NOTE: entry_term is a collection
entry_term + abbreviations
(entry_term + abbreviations).compact
end

def to_h
Expand All @@ -69,17 +69,22 @@ def to_h
end

def to_concept(options = {})
x = to_h
return nil unless x
return nil unless definition

Glossarist::LocalizedConcept.new.tap do |concept|
concept.data = Glossarist::ConceptData.new(
language_code: LANGUAGE_CODE_MAPPING[language.downcase],
terms: designations.map(&:to_designation),
definition: [Glossarist::DetailedDefinition.new(content: definition)],
notes: notes.map { |n| Glossarist::DetailedDefinition.new(content: n) },
examples: examples.map { |e| Glossarist::DetailedDefinition.new(content: e) },
entry_status: "valid",
domain: domain
)

Glossarist::LocalizedConcept.new(x).tap do |concept|
# Fill in register parameters
if options[:date_accepted]
puts options[:date_accepted].inspect
concept.date_accepted = options[:date_accepted]
end

puts concept.inspect
end
end
end
Expand Down
10 changes: 5 additions & 5 deletions lib/termium/source.rb
Original file line number Diff line number Diff line change
Expand Up @@ -25,11 +25,11 @@ def content
end

def to_concept_source
Glossarist::ConceptSource.new({
"type" => "lineage",
"ref" => content,
"status" => "identical",
})
Glossarist::ConceptSource.new(
type: "lineage",
status: "identical",
origin: Glossarist::Citation.new(ref: content)
)
end
end
end
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
---
data:
identifier: '2123225'
localized_concepts:
eng: 33f0da1d-b8af-511c-8a7a-2f777419fa95
fre: 848965b7-2090-5f93-af30-8cba405c248c
sources:
- origin:
ref: ISO/IEC 2382-16:1996
status: identical
type: lineage
- origin:
ref: Ranger, Natalie * 2006 * Bureau de la traduction
status: identical
type: lineage
id: 4d2d4ac4-af47-545c-9d41-19285d785fc5
status: valid
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
---
data:
definition:
- content: quotient of the character mean entropy by the mean duration of a character
examples: []
id: '2123225'
notes:
- content: The average information rate may be expressed in shannons per second.
- content: 16.04.07 (2382)
sources:
- origin:
ref: ISO/IEC 2382-16:1996
status: identical
type: lineage
- origin:
ref: Ranger, Natalie * 2006 * Bureau de la traduction
status: identical
type: lineage
terms:
- type: expression
normative_status: preferred
designation: average information rate
language_code: eng
entry_status: valid
id: 33f0da1d-b8af-511c-8a7a-2f777419fa95
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
---
data:
definition:
- content: quotient de l'entropie moyenne par caractère par la durée moyenne d'un
caractère
examples: []
id: '2123225'
notes:
- content: Le débit moyen d'entropie peut s'exprimer en shannons par seconde.
- content: 16.04.07 (2382)
sources:
- origin:
ref: ISO/IEC 2382-16:1996
status: identical
type: lineage
- origin:
ref: Ranger, Natalie * 2006 * Bureau de la traduction
status: identical
type: lineage
terms:
- type: expression
normative_status: preferred
designation: débit moyen d'entropie
language_code: fre
entry_status: valid
id: 848965b7-2090-5f93-af30-8cba405c248c
45 changes: 45 additions & 0 deletions spec/fixtures/single_entry.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
<?xml version='1.0' encoding='UTF-8' standalone='yes'?>
<ns2:termium_extract xmlns:ns2="http://termium.tpsgc-pwgsc.gc.ca/schemas/2012/06/Termium" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" language="EN" xsi:schemaLocation="http://termium.tpsgc-pwgsc.gc.ca/schemas/2012/06/Termium http://termium.tpsgc-pwgsc.gc.ca/schemas/2012/06/Termium.xsd">
<extractLanguage language="EN" order="0"/>
<extractLanguage language="FR" order="1"/>
<core identificationNumber="2123225" disseminationLevel="P">
<languageModule language="EN">
<entryTerm order="1" value="average information rate">
<sourceRef order="1"/>
<parameter abbreviation="COR"/>
<parameter abbreviation="NORM"/>
</entryTerm>
<textualSupport order="1" type="DEF">
<value>quotient of the character mean entropy by the mean duration of a character</value>
<sourceRef order="1"/>
</textualSupport>
<textualSupport order="2" type="OBS">
<value>The average information rate may be expressed in shannons per second.</value>
<sourceRef order="1"/>
</textualSupport>
</languageModule>
<languageModule language="FR">
<entryTerm order="1" value="débit moyen d'entropie">
<sourceRef order="1"/>
<parameter abbreviation="COR"/>
<parameter abbreviation="M"/>
<parameter abbreviation="NORM"/>
</entryTerm>
<textualSupport order="1" type="DEF">
<value>quotient de l'entropie moyenne par caractère par la durée moyenne d'un caractère</value>
<sourceRef order="1"/>
</textualSupport>
<textualSupport order="2" type="OBS">
<value>Le débit moyen d'entropie peut s'exprimer en shannons par seconde.</value>
<sourceRef order="1"/>
</textualSupport>
</languageModule>
<universalEntry order="1">
<value>16.04.07 (2382)</value>
<sourceRef order="1"/>
<parameter abbreviation="CONUM"/>
</universalEntry>
<source order="1" details="ISO-IEC-2382-16 * 1996 * * * "/>
<source order="2" details="Ranger, Natalie * 2006 * Bureau de la traduction"/>
</core>
</ns2:termium_extract>
Loading
Loading