|
| 1 | +// Licensed to the Apache Software Foundation (ASF) under one |
| 2 | +// or more contributor license agreements. See the NOTICE file |
| 3 | +// distributed with this work for additional information |
| 4 | +// regarding copyright ownership. The ASF licenses this file |
| 5 | +// to you under the Apache License, Version 2.0 (the |
| 6 | +// "License"); you may not use this file except in compliance |
| 7 | +// with the License. You may obtain a copy of the License at |
| 8 | +// |
| 9 | +// http://www.apache.org/licenses/LICENSE-2.0 |
| 10 | +// |
| 11 | +// Unless required by applicable law or agreed to in writing, |
| 12 | +// software distributed under the License is distributed on an |
| 13 | +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
| 14 | +// KIND, either express or implied. See the License for the |
| 15 | +// specific language governing permissions and limitations |
| 16 | +// under the License. |
| 17 | + |
| 18 | +/// \file abi.h Arrow C Data Interface |
| 19 | +/// |
| 20 | +/// The Arrow C Data interface defines a very small, stable set |
| 21 | +/// of C definitions which can be easily copied into any project's |
| 22 | +/// source code and vendored to be used for columnar data interchange |
| 23 | +/// in the Arrow format. For non-C/C++ languages and runtimes, |
| 24 | +/// it should be almost as easy to translate the C definitions into |
| 25 | +/// the corresponding C FFI declarations. |
| 26 | +/// |
| 27 | +/// Applications and libraries can therefore work with Arrow memory |
| 28 | +/// without necessarily using the Arrow libraries or reinventing |
| 29 | +/// the wheel. Developers can choose between tight integration |
| 30 | +/// with the Arrow software project or minimal integration with |
| 31 | +/// the Arrow format only. |
| 32 | + |
| 33 | +#pragma once |
| 34 | + |
| 35 | +#include <stdint.h> |
| 36 | + |
| 37 | +// Spec and documentation: https://arrow.apache.org/docs/format/CDataInterface.html |
| 38 | + |
| 39 | +#ifdef __cplusplus |
| 40 | +extern "C" { |
| 41 | +#endif |
| 42 | + |
| 43 | +#ifndef ARROW_C_DATA_INTERFACE |
| 44 | +#define ARROW_C_DATA_INTERFACE |
| 45 | + |
| 46 | +#define ARROW_FLAG_DICTIONARY_ORDERED 1 |
| 47 | +#define ARROW_FLAG_NULLABLE 2 |
| 48 | +#define ARROW_FLAG_MAP_KEYS_SORTED 4 |
| 49 | + |
| 50 | +struct ArrowSchema { |
| 51 | + // Array type description |
| 52 | + const char* format; |
| 53 | + const char* name; |
| 54 | + const char* metadata; |
| 55 | + int64_t flags; |
| 56 | + int64_t n_children; |
| 57 | + struct ArrowSchema** children; |
| 58 | + struct ArrowSchema* dictionary; |
| 59 | + |
| 60 | + // Release callback |
| 61 | + void (*release)(struct ArrowSchema*); |
| 62 | + // Opaque producer-specific data |
| 63 | + void* private_data; |
| 64 | +}; |
| 65 | + |
| 66 | +struct ArrowArray { |
| 67 | + // Array data description |
| 68 | + int64_t length; |
| 69 | + int64_t null_count; |
| 70 | + int64_t offset; |
| 71 | + int64_t n_buffers; |
| 72 | + int64_t n_children; |
| 73 | + const void** buffers; |
| 74 | + struct ArrowArray** children; |
| 75 | + struct ArrowArray* dictionary; |
| 76 | + |
| 77 | + // Release callback |
| 78 | + void (*release)(struct ArrowArray*); |
| 79 | + // Opaque producer-specific data |
| 80 | + void* private_data; |
| 81 | +}; |
| 82 | + |
| 83 | +#endif // ARROW_C_DATA_INTERFACE |
| 84 | + |
| 85 | +#ifndef ARROW_C_DEVICE_DATA_INTERFACE |
| 86 | +#define ARROW_C_DEVICE_DATA_INTERFACE |
| 87 | + |
| 88 | +// Spec and Documentation: https://arrow.apache.org/docs/format/CDeviceDataInterface.html |
| 89 | + |
| 90 | +// DeviceType for the allocated memory |
| 91 | +typedef int32_t ArrowDeviceType; |
| 92 | + |
| 93 | +// CPU device, same as using ArrowArray directly |
| 94 | +#define ARROW_DEVICE_CPU 1 |
| 95 | +// CUDA GPU Device |
| 96 | +#define ARROW_DEVICE_CUDA 2 |
| 97 | +// Pinned CUDA CPU memory by cudaMallocHost |
| 98 | +#define ARROW_DEVICE_CUDA_HOST 3 |
| 99 | +// OpenCL Device |
| 100 | +#define ARROW_DEVICE_OPENCL 4 |
| 101 | +// Vulkan buffer for next-gen graphics |
| 102 | +#define ARROW_DEVICE_VULKAN 7 |
| 103 | +// Metal for Apple GPU |
| 104 | +#define ARROW_DEVICE_METAL 8 |
| 105 | +// Verilog simulator buffer |
| 106 | +#define ARROW_DEVICE_VPI 9 |
| 107 | +// ROCm GPUs for AMD GPUs |
| 108 | +#define ARROW_DEVICE_ROCM 10 |
| 109 | +// Pinned ROCm CPU memory allocated by hipMallocHost |
| 110 | +#define ARROW_DEVICE_ROCM_HOST 11 |
| 111 | +// Reserved for extension |
| 112 | +#define ARROW_DEVICE_EXT_DEV 12 |
| 113 | +// CUDA managed/unified memory allocated by cudaMallocManaged |
| 114 | +#define ARROW_DEVICE_CUDA_MANAGED 13 |
| 115 | +// unified shared memory allocated on a oneAPI non-partitioned device. |
| 116 | +#define ARROW_DEVICE_ONEAPI 14 |
| 117 | +// GPU support for next-gen WebGPU standard |
| 118 | +#define ARROW_DEVICE_WEBGPU 15 |
| 119 | +// Qualcomm Hexagon DSP |
| 120 | +#define ARROW_DEVICE_HEXAGON 16 |
| 121 | + |
| 122 | +struct ArrowDeviceArray { |
| 123 | + // the Allocated Array |
| 124 | + // |
| 125 | + // the buffers in the array (along with the buffers of any |
| 126 | + // children) are what is allocated on the device. |
| 127 | + struct ArrowArray array; |
| 128 | + // The device id to identify a specific device |
| 129 | + int64_t device_id; |
| 130 | + // The type of device which can access this memory. |
| 131 | + ArrowDeviceType device_type; |
| 132 | + // An event-like object to synchronize on if needed. |
| 133 | + void* sync_event; |
| 134 | + // Reserved bytes for future expansion. |
| 135 | + int64_t reserved[3]; |
| 136 | +}; |
| 137 | + |
| 138 | +#endif // ARROW_C_DEVICE_DATA_INTERFACE |
| 139 | + |
| 140 | +#ifndef ARROW_C_STREAM_INTERFACE |
| 141 | +#define ARROW_C_STREAM_INTERFACE |
| 142 | + |
| 143 | +struct ArrowArrayStream { |
| 144 | + // Callback to get the stream type |
| 145 | + // (will be the same for all arrays in the stream). |
| 146 | + // |
| 147 | + // Return value: 0 if successful, an `errno`-compatible error code otherwise. |
| 148 | + // |
| 149 | + // If successful, the ArrowSchema must be released independently from the stream. |
| 150 | + int (*get_schema)(struct ArrowArrayStream*, struct ArrowSchema* out); |
| 151 | + |
| 152 | + // Callback to get the next array |
| 153 | + // (if no error and the array is released, the stream has ended) |
| 154 | + // |
| 155 | + // Return value: 0 if successful, an `errno`-compatible error code otherwise. |
| 156 | + // |
| 157 | + // If successful, the ArrowArray must be released independently from the stream. |
| 158 | + int (*get_next)(struct ArrowArrayStream*, struct ArrowArray* out); |
| 159 | + |
| 160 | + // Callback to get optional detailed error information. |
| 161 | + // This must only be called if the last stream operation failed |
| 162 | + // with a non-0 return code. |
| 163 | + // |
| 164 | + // Return value: pointer to a null-terminated character array describing |
| 165 | + // the last error, or NULL if no description is available. |
| 166 | + // |
| 167 | + // The returned pointer is only valid until the next operation on this stream |
| 168 | + // (including release). |
| 169 | + const char* (*get_last_error)(struct ArrowArrayStream*); |
| 170 | + |
| 171 | + // Release callback: release the stream's own resources. |
| 172 | + // Note that arrays returned by `get_next` must be individually released. |
| 173 | + void (*release)(struct ArrowArrayStream*); |
| 174 | + |
| 175 | + // Opaque producer-specific data |
| 176 | + void* private_data; |
| 177 | +}; |
| 178 | + |
| 179 | +#endif // ARROW_C_STREAM_INTERFACE |
| 180 | + |
| 181 | +#ifndef ARROW_C_DEVICE_STREAM_INTERFACE |
| 182 | +#define ARROW_C_DEVICE_STREAM_INTERFACE |
| 183 | + |
| 184 | +// Equivalent to ArrowArrayStream, but for ArrowDeviceArrays. |
| 185 | +// |
| 186 | +// This stream is intended to provide a stream of data on a single |
| 187 | +// device, if a producer wants data to be produced on multiple devices |
| 188 | +// then multiple streams should be provided. One per device. |
| 189 | +struct ArrowDeviceArrayStream { |
| 190 | + // The device that this stream produces data on. |
| 191 | + ArrowDeviceType device_type; |
| 192 | + |
| 193 | + // Callback to get the stream schema |
| 194 | + // (will be the same for all arrays in the stream). |
| 195 | + // |
| 196 | + // Return value 0 if successful, an `errno`-compatible error code otherwise. |
| 197 | + // |
| 198 | + // If successful, the ArrowSchema must be released independently from the stream. |
| 199 | + // The schema should be accessible via CPU memory. |
| 200 | + int (*get_schema)(struct ArrowDeviceArrayStream* self, struct ArrowSchema* out); |
| 201 | + |
| 202 | + // Callback to get the next array |
| 203 | + // (if no error and the array is released, the stream has ended) |
| 204 | + // |
| 205 | + // Return value: 0 if successful, an `errno`-compatible error code otherwise. |
| 206 | + // |
| 207 | + // If successful, the ArrowDeviceArray must be released independently from the stream. |
| 208 | + int (*get_next)(struct ArrowDeviceArrayStream* self, struct ArrowDeviceArray* out); |
| 209 | + |
| 210 | + // Callback to get optional detailed error information. |
| 211 | + // This must only be called if the last stream operation failed |
| 212 | + // with a non-0 return code. |
| 213 | + // |
| 214 | + // Return value: pointer to a null-terminated character array describing |
| 215 | + // the last error, or NULL if no description is available. |
| 216 | + // |
| 217 | + // The returned pointer is only valid until the next operation on this stream |
| 218 | + // (including release). |
| 219 | + const char* (*get_last_error)(struct ArrowDeviceArrayStream* self); |
| 220 | + |
| 221 | + // Release callback: release the stream's own resources. |
| 222 | + // Note that arrays returned by `get_next` must be individually released. |
| 223 | + void (*release)(struct ArrowDeviceArrayStream* self); |
| 224 | + |
| 225 | + // Opaque producer-specific data |
| 226 | + void* private_data; |
| 227 | +}; |
| 228 | + |
| 229 | +#endif // ARROW_C_DEVICE_STREAM_INTERFACE |
| 230 | + |
| 231 | +#ifdef __cplusplus |
| 232 | +} |
| 233 | +#endif |
0 commit comments