diff --git a/linkml_model/model/schema/array.yaml b/linkml_model/model/schema/array.yaml index 1d5f67fd..9cdd62c6 100644 --- a/linkml_model/model/schema/array.yaml +++ b/linkml_model/model/schema/array.yaml @@ -2,8 +2,7 @@ id: https://w3id.org/linkml/lib/arrays name: arrays title: LinkML Arrays description: >- - LinkML templates for storing one-dimensional series, two-dimensional arrays, - and arrays of higher dimensionality. + LinkML templates for storing arrays. Status: Experimental @@ -19,6 +18,9 @@ status: testing # - github:mavaylon1 # - github:ialarmedalien # - github:cmungall +# - github:sneakers-the-rat +# - github:bendichter +# - github:melonora prefixes: linkml: https://w3id.org/linkml/ @@ -39,141 +41,29 @@ classes: DataStructure: abstract: true - NDArray: + Array: description: >- - a data structure consisting of a collection of *elements*, each identified by at least one array index tuple. - abstract: true + A data structure where an N-dimensional array is represented as a class rather than an attribute. There + must be exactly one attribute that is an array. There may be other attributes associated with the array + but they must not be arrays themselves. is_a: DataStructure - slots: - - dimensions - - elements - - array_linearization_order - slot_usage: - elements: - description: >- - the collection of values that make up the array. The elements have a *direct* representation which is - an ordered sequence of values. The elements also have an *array interpretation*, where each - element has a unique index which is determined by array_linearization_order DataArray: description: >- - a data structure containing an NDArray and a set of one-dimensional series that are used to label - the elements of the array + A data structure containing an Array and a set of Arrays that are used to label the elements of the Array. + The set of Arrays are also known as coordinates. is_a: DataStructure - slots: - - axis - - array see_also: - https://docs.xarray.dev/en/stable/generated/xarray.DataArray.html - GroupingByArrayOrder: - mixin: true - description: >- - A mixin that describes an array whose elements are mapped from a linear sequence to an array index - via a specified mapping - - ColumnOrderedArray: - mixin: true - is_a: GroupingByArrayOrder - description: >- - An array ordering that is column-order - slots: - - array_linearization_order - slot_usage: - array_linearization_order: - equals_string: COLUMN_MAJOR_ARRAY_ORDER - - RowOrderedArray: - mixin: true - is_a: GroupingByArrayOrder - description: >- - An array ordering that is row-order or generalizations thereof - slots: - - array_linearization_order - slot_usage: - array_linearization_order: - equals_string: ROW_MAJOR_ARRAY_ORDER - -slots: - dimensions: - description: >- - The number of elements in the tuple used to access elements of an array - aliases: - - rank - - dimensionality - - number of axes - - number of elements - range: integer - axis: - range: NDArray - slot_usage: - dimensions: - equals_number: 1 - aliases: - - dimension - description: >- - A one-dimensional series that contains elements that form one part of a tuple used to access an array - required: true - axis_index: - range: integer - description: >- - The position of an axis in a tuple used to access an array - array: - range: NDArray - description: >- - An array that is labeled by a set of one-dimensional series - required: true - elements: - # this will be serialized as one big long list that should be interpreted as a 2D array - range: Any - aliases: - - values - required: true - multivalued: true - description: >- - A collection of values that make up the contents of an array. These elements may be interpreted - as a contiguous linear sequence (direct representation) or as elements to be accessed via an - array index - series_label: # the row label - key: true - description: >- - A name that uniquely identifiers a series - length: - description: >- - The number of elements in the array - range: integer - equals_expression: "length(elements)" - array_linearization_order: - range: ArrayLinearizationOrderOptions - ifabsent: "string(ROW_MAJOR_ARRAY_ORDER)" - - specified_input: - range: DataStructure - multivalued: true - specified_output: - range: DataStructure - multivalued: true - operation_parameters: - range: Any - multivalued: true - -enums: - ArrayLinearizationOrderOptions: + Dataset: description: >- - Determines how a linear contiguous representation of the elements of an array map - to array indices - permissible_values: - COLUMN_MAJOR_ARRAY_ORDER: - meaning: gom:columnMajorArray - description: >- - An array layout option in which the elements in each column is stored in consecutive positions, - or any generalization thereof to dimensionality greater than 2 - aliases: - - F order - ROW_MAJOR_ARRAY_ORDER: - meaning: gom:rowMajorArray - description: >- - An array layout option in which the elements in each row is stored in consecutive positions, - or any generalization thereof to dimensionality greater than 2 - aliases: - - C order + A data structure containing one or more main Arrays with aligned dimensions and a set of Arrays that are used to + label the elements of the Arrays. The set of Arrays are also known as coordinates. A Dataset with only one + main Array is equivalent to a DataArray. If there are multiple main Arrays, then all dimensions must refer to + points in the same shared coordinate system, i.e., if two Arrays have the same dimension "x", that dimension + must be identical in both Arrays. + is_a: DataStructure + see_also: + - https://docs.xarray.dev/en/stable/generated/xarray.Dataset.html + - https://docs.unidata.ucar.edu/netcdf-c/current/netcdf_data_model.html diff --git a/tests/input/examples/schema_definition-native-array-1.yaml b/tests/input/examples/schema_definition-native-array-1.yaml index 7b084924..05a0655e 100644 --- a/tests/input/examples/schema_definition-native-array-1.yaml +++ b/tests/input/examples/schema_definition-native-array-1.yaml @@ -3,6 +3,7 @@ name: arrays-temperature-example title: Array Temperature Example description: |- Example LinkML schema to demonstrate a 3D DataArray of temperature values with labeled axes + using array slots for the axes and data instead of classes containing arrays license: MIT prefixes: @@ -22,11 +23,11 @@ classes: annotations: array_data_mapping: data: temperatures_in_K - dims: [x, y, t] + dims: ["x", "y", "t"] # YAML 1.1 treats unquoted y as True coords: - latitude_in_deg: x - longitude_in_deg: y - time_in_d: t + latitude_in_deg: "x" + longitude_in_deg: "y" + time_in_d: "t" attributes: name: identifier: true @@ -65,4 +66,3 @@ classes: ucum_code: K array: exact_number_dimensions: 3 - diff --git a/tests/input/examples/schema_definition-native-array-2.yaml b/tests/input/examples/schema_definition-native-array-2.yaml new file mode 100644 index 00000000..fc4e50aa --- /dev/null +++ b/tests/input/examples/schema_definition-native-array-2.yaml @@ -0,0 +1,104 @@ +id: https://example.org/arrays +name: arrays-temperature-example-2 +title: Array Temperature Example Using NDArray Classes +description: |- + Example LinkML schema to demonstrate a 3D DataArray of temperature values with labeled axes + using classes containing arrays for the axes and data instead of using array slots +license: MIT + +prefixes: + linkml: https://w3id.org/linkml/ + wgs84: http://www.w3.org/2003/01/geo/wgs84_pos# + example: https://example.org/ + +default_prefix: example + +imports: + - linkml:types + +classes: + + TemperatureDataset: + tree_root: true + implements: + - linkml:DataArray + annotations: + array_data_mapping: + data: temperatures_in_K + dims: ["x", "y", "t"] # YAML 1.1 treats unquoted y as True + coords: + latitude_in_deg: "x" + longitude_in_deg: "y" + time_in_d: "t" + attributes: + name: + identifier: true + range: string + latitude_in_deg: + range: LatitudeSeries + required: true + longitude_in_deg: + range: LongitudeSeries + required: true + time_in_d: + range: DaySeries + required: true + temperatures_in_K: + range: TemperatureMatrix + required: true + + TemperatureMatrix: + description: A 3D array of temperatures + attributes: + values: + range: float + multivalued: true + implements: + - linkml:elements # signals to a containing DataArray that this has the data + required: true + unit: + ucum_code: K + array: + exact_number_dimensions: 3 + + LatitudeSeries: + description: A series whose values represent latitude + attributes: + values: + range: float + multivalued: true + implements: + - linkml:elements + required: true + unit: + ucum_code: deg + array: + exact_number_dimensions: 1 + + LongitudeSeries: + description: A series whose values represent longitude + attributes: + values: + range: float + multivalued: true + implements: + - linkml:elements + required: true + unit: + ucum_code: deg + array: + exact_number_dimensions: 1 + + DaySeries: + description: A series whose values represent the days since the start of the measurement period + attributes: + values: + range: float + multivalued: true + implements: + - linkml:elements + required: true + unit: + ucum_code: d + array: + exact_number_dimensions: 1 diff --git a/tests/input/examples/schema_definition-native-array-2c.yaml b/tests/input/examples/schema_definition-native-array-2c.yaml new file mode 100644 index 00000000..04faa320 --- /dev/null +++ b/tests/input/examples/schema_definition-native-array-2c.yaml @@ -0,0 +1,138 @@ +id: https://example.org/arrays +name: arrays-temperature-example-2 +title: Array Temperature Example Using NDArray Classes +description: |- + Example LinkML schema to demonstrate a 3D DataArray of temperature values with labeled axes + using classes containing arrays for the axes and data instead of using array slots/attributes. + Creating separate types for the array slots enables reuse and extension. +license: MIT + +prefixes: + linkml: https://w3id.org/linkml/ + wgs84: http://www.w3.org/2003/01/geo/wgs84_pos# + example: https://example.org/ + +default_prefix: example + +imports: + - linkml:types + +classes: + + TemperatureDataset: + tree_root: true + implements: + - linkml:DataArray + attributes: + name: + identifier: true + range: string + latitude_in_deg: + range: LatitudeSeries + required: true + longitude_in_deg: + range: LongitudeSeries + required: true + date: + range: DateSeries + required: true + day_in_d: + range: DaysSinceSeries + # one could define `reference_date` at this level but it really should be an attribute on `DaysSinceSeries`. + # however, this means `reference_date` cannot be a non-dimension (constant) coordinate of `temperatures_in_K` + # as structured in Xarray. + temperatures_in_K: + range: TemperatureMatrix + required: true + array: + # it does not make sense to put `labeled_by` on `TemperatureMatrix` because the index slots are only + # accessible from this DataArray class. + labeled_by: + - alias: lat + label_slot: latitude_in_deg + labeled_dimensions: [0, 1] + - alias: lon + label_slot: longitude_in_deg + labeled_dimensions: [0, 1] + - alias: date + label_slot: date + labeled_dimensions: [2] + - alias: day + label_slot: day_in_d + labeled_dimensions: [2] + + LatitudeSeries: + description: A 2D array whose values represent latitude + attributes: + name: + identifier: true # an identifier is required for referencing in other classes + range: string + latitude_in_deg: # the name of the attribute does not matter when it is used within a DataArray or Dataset + required: true + range: float + unit: + ucum_code: deg + array: # exactly one attribute within this class must be an array + exact_number_dimensions: 2 + + LongitudeSeries: + description: A 2D array whose values represent longitude + attributes: + name: + identifier: true + range: string + longitude_in_deg: + required: true + range: float + unit: + ucum_code: deg + array: + exact_number_dimensions: 2 + + DateSeries: + description: A 1D series of dates + attributes: + name: + identifier: true + range: string + date: + required: true + range: date + array: + exact_number_dimensions: 1 + + DaysSinceSeries: + description: A 1D series whose values represent the number of days since a reference date + attributes: + name: + identifier: true + range: string + day_in_d: + required: true + range: integer + unit: + ucum_code: d + array: + exact_number_dimensions: 1 + reference_date: + description: The reference date for the `day_in_d` values + required: true + range: date + + TemperatureMatrix: + description: A 3D array of temperatures + attributes: + name: + identifier: true + range: string + temperatures_in_K: + required: true + range: float + unit: + ucum_code: K + array: + exact_number_dimensions: 3 + dimensions: + - alias: "x" + - alias: "y" + - alias: "date" diff --git a/tests/input/examples/schema_definition-native-array-3a.yaml b/tests/input/examples/schema_definition-native-array-3a.yaml new file mode 100644 index 00000000..670b4336 --- /dev/null +++ b/tests/input/examples/schema_definition-native-array-3a.yaml @@ -0,0 +1,118 @@ +id: https://example.org/arrays +name: arrays-temperature-example-3 +title: Array Temperature Example Using NDArray Classes +description: |- + Example LinkML schema to demonstrate a complex 3D DataArray of temperature values + with labeled axes using array slots for the axes and data instead of classes containing + arrays +license: MIT + +prefixes: + linkml: https://w3id.org/linkml/ + wgs84: http://www.w3.org/2003/01/geo/wgs84_pos# + example: https://example.org/ + +default_prefix: example + +imports: + - linkml:types + +classes: + + TemperatureDataset: + tree_root: true + implements: + - linkml:DataArray + annotations: + # See also the Xarray DataArray data structure + # https://docs.xarray.dev/en/latest/user-guide/data-structures.html#dataarray + # with the main differences being + # 1) the coordinates are not DataArray objects + # 2) coordinates are not named + array_data_mapping: + # The name of the array attribute within this class that contains the data. + # This maps to the "values" attribute of an Xarray DataArray. + data: temperatures_in_K + + # The 3 dimensions of the array attribute "temperatures_in_K" are named + # "x", "y", and "t" in the DataArray. These dims do not need to be the same as the + # dimensions of the array attribute. A name must be provided for each dimension + # of the data array. + # This maps to the "dims" attribute of an Xarray DataArray. + dims: ["x", "y", "t"] # NOTE: y without quotes is parsed as True in YAML 1.1 + + # An array attribute within this class that is not the data array above may + # serve as a coordinate for a set of dimensions in the data array. + # In most cases, a 1D array is a coordinate for a single dimension in the + # data array, but it is possible to have an N-dimensional array that is a + # coordinate for N dimensions in the data array. The format is: + # : + # The number of dimensions of a coordinate must equal the length of the + # list specified here. Multiple coordinates can be specified for the same + # dimension or set of dimensions. + # This maps to the "coords" attribute of an Xarray DataArray. + coords: + # Here, the latitude for the temperature value at index (i,j,k) is equal to + # latitude_in_deg[i,j]. Similarly, the longitude for the temperature value at + # index (i,j,k) is equal to longitude_in_deg[i,j]. The date for the temperature + # value at index (i,j,k) is equal to date_in_d[k]. The days_with_rain for the + # temperature value at index (i,j,k) is equal to days_with_rain[k]. + latitude_in_deg: ["x", "y"] + longitude_in_deg: ["x", "y"] + date: "t" + day_in_d: "t" + # The reference date for the temperature values is the same for all values + # in the array. In Xarray terms, this is a non-dimension (constant) coordinate. + reference_date: False + + # Additional attributes for storing arbitrary metadata about the DataArray may + # be specified in the "attributes" below. These map to the "attrs" attribute of + # an Xarray DataArray. + attributes: + name: + identifier: true + range: string + latitude_in_deg: + required: true + range: float + unit: + ucum_code: deg + array: + exact_number_dimensions: 2 + longitude_in_deg: + required: true + range: float + unit: + ucum_code: deg + array: + exact_number_dimensions: 2 + date: + required: true + range: date + array: + exact_number_dimensions: 1 + day_in_d: + description: Number of days since `reference_date` + required: true + range: integer + unit: + ucum_code: d + array: + exact_number_dimensions: 1 + reference_date: + description: The reference date for the `day_in_d` values + required: true + range: date + unit: + ucum_code: d + temperatures_in_K: + required: true + range: float + unit: + ucum_code: K + array: + exact_number_dimensions: 3 + dimensions: + - alias: "x" + - alias: "y" + - alias: "date" diff --git a/tests/input/examples/schema_definition-native-array-3b.yaml b/tests/input/examples/schema_definition-native-array-3b.yaml new file mode 100644 index 00000000..f1725519 --- /dev/null +++ b/tests/input/examples/schema_definition-native-array-3b.yaml @@ -0,0 +1,97 @@ +id: https://example.org/arrays +name: arrays-temperature-example-3 +title: Array Temperature Example Using NDArray Classes +description: |- + Example LinkML schema to demonstrate a complex 3D DataArray of temperature values + with labeled axes using array slots for the axes and data instead of classes containing + arrays +license: MIT + +prefixes: + linkml: https://w3id.org/linkml/ + wgs84: http://www.w3.org/2003/01/geo/wgs84_pos# + example: https://example.org/ + +default_prefix: example + +imports: + - linkml:types + +classes: + + TemperatureDataset: + tree_root: true + implements: + - linkml:DataArray + attributes: + name: + identifier: true + range: string + latitude_in_deg: + required: true + range: float + unit: + ucum_code: deg + array: + exact_number_dimensions: 2 + indexes: + temperatures_in_K: + alias: lat + index_dims: [0, 1] + # The latitude for the temperature value at index (i,j,k) is equal to latitude_in_deg[i,j]. + # NOTE in xarray, multi-dimensional coordinates are referenced by name, but here we reference + # by index because dimensions are not required to have names. + longitude_in_deg: + required: true + range: float + unit: + ucum_code: deg + array: + exact_number_dimensions: 2 + indexes: + temperatures_in_K: + alias: lon + index_dims: [0, 1] + date: + required: true + range: date + array: + exact_number_dimensions: 1 + indexes: + temperatures_in_K: + alias: date + index_dims: [2] + day_in_d: + description: Number of days since `reference_date` + required: true + range: integer + unit: + ucum_code: d + array: + exact_number_dimensions: 1 + indexes: + temperatures_in_K: + alias: day + index_dims: [2] + reference_date: + description: The reference date for the `day_in_d` values + required: true + range: date + indexes: + temperatures_in_K: + alias: reference_date + index_dims: null + # this is a non-dimension (constant) coordinate for the entire array and is supported by xarray. + # the use case is not clear; this should just be an attribute on `day`. but we can support it by + # allowing index_dims: null + temperatures_in_K: + required: true + range: float + unit: + ucum_code: K + array: + exact_number_dimensions: 3 + dimensions: + - alias: "x" + - alias: "y" + - alias: "date" diff --git a/tests/input/examples/schema_definition-native-array-3c.yaml b/tests/input/examples/schema_definition-native-array-3c.yaml new file mode 100644 index 00000000..6a97847b --- /dev/null +++ b/tests/input/examples/schema_definition-native-array-3c.yaml @@ -0,0 +1,90 @@ +id: https://example.org/arrays +name: arrays-temperature-example-3 +title: Array Temperature Example Using NDArray Classes +description: |- + Example LinkML schema to demonstrate a complex 3D DataArray of temperature values + with labeled axes using array slots for the axes and data instead of classes containing + arrays +license: MIT + +prefixes: + linkml: https://w3id.org/linkml/ + wgs84: http://www.w3.org/2003/01/geo/wgs84_pos# + example: https://example.org/ + +default_prefix: example + +imports: + - linkml:types + +classes: + + TemperatureDataset: + tree_root: true + implements: + - linkml:DataArray + attributes: + name: + identifier: true + range: string + latitude_in_deg: + required: true + range: float + unit: + ucum_code: deg + array: + exact_number_dimensions: 2 + longitude_in_deg: + required: true + range: float + unit: + ucum_code: deg + array: + exact_number_dimensions: 2 + date: + required: true + range: date + array: + exact_number_dimensions: 1 + day_in_d: + description: Number of days since `reference_date` + required: true + range: integer + unit: + ucum_code: d + array: + exact_number_dimensions: 1 + reference_date: + description: The reference date for the `day_in_d` values + required: true + range: date + temperatures_in_K: + required: true + range: float + unit: + ucum_code: K + array: + exact_number_dimensions: 3 + dimensions: + - alias: "x" + - alias: "y" + - alias: "date" + labeled_by: + - alias: lat + label_slot: latitude_in_deg + labeled_dimensions: [0, 1] + - alias: lon + label_slot: longitude_in_deg + labeled_dimensions: [0, 1] + - alias: date + label_slot: date + labeled_dimensions: [2] + - alias: day + label_slot: day_in_d + labeled_dimensions: [2] + - alias: reference_date + label_slot: reference_date + labeled_dimensions: null + # this is a non-dimension (constant) coordinate for the entire array and is supported by xarray. + # the use case is not clear; this should just be an attribute on `day_in_d`. but we can support it by + # allowing index_dims: null