diff --git a/cpp/src/arrow/CMakeLists.txt b/cpp/src/arrow/CMakeLists.txt
index ec8b6c1b32f..f759819bb2d 100644
--- a/cpp/src/arrow/CMakeLists.txt
+++ b/cpp/src/arrow/CMakeLists.txt
@@ -565,6 +565,12 @@ if(ARROW_WITH_ZSTD)
   list(APPEND ARROW_UTIL_SRCS util/compression_zstd.cc)
 endif()
 
+# ALP (for Parquet encoder/decoder)
+list(APPEND ARROW_UTIL_SRCS
+  util/alp/Alp.cc
+  util/alp/AlpSampler.cc
+  util/alp/AlpWrapper.cc)
+
 arrow_add_object_library(ARROW_UTIL ${ARROW_UTIL_SRCS})
 
 # Disable DLL exports in vendored uriparser library
diff --git a/cpp/src/arrow/util/CMakeLists.txt b/cpp/src/arrow/util/CMakeLists.txt
index a41b63f07b3..6e27e5af273 100644
--- a/cpp/src/arrow/util/CMakeLists.txt
+++ b/cpp/src/arrow/util/CMakeLists.txt
@@ -103,6 +103,13 @@ add_arrow_test(bit-utility-test
                rle_encoding_test.cc
                test_common.cc)
 
+add_arrow_test(alp-test
+               SOURCES
+               alp/alp_test.cc
+               alp/Alp.cc
+               alp/AlpSampler.cc
+	       alp/AlpWrapper.cc)
+
 add_arrow_test(crc32-test
                SOURCES
                crc32_test.cc
diff --git a/cpp/src/arrow/util/alp/Alp.cc b/cpp/src/arrow/util/alp/Alp.cc
new file mode 100644
index 00000000000..777e4b2c72a
--- /dev/null
+++ b/cpp/src/arrow/util/alp/Alp.cc
@@ -0,0 +1,789 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "arrow/util/alp/Alp.h"
+
+#include <cmath>
+#include <cstring>
+#include <functional>
+#include <iostream>
+#include <map>
+
+#include "arrow/util/alp/AlpConstants.h"
+#include "arrow/util/bit_stream_utils_internal.h"
+#include "arrow/util/bpacking_internal.h"
+#include "arrow/util/logging.h"
+#include "arrow/util/small_vector.h"
+#include "arrow/util/span.h"
+#include "arrow/util/ubsan.h"
+
+namespace arrow {
+namespace util {
+namespace alp {
+
+// ----------------------------------------------------------------------
+// AlpEncodedVectorInfo implementation
+
+bool AlpEncodedVectorInfo::operator==(const AlpEncodedVectorInfo& other) const {
+  return exponent_and_factor == other.exponent_and_factor &&
+         frame_of_reference == other.frame_of_reference &&
+         bit_width == other.bit_width && bit_packed_size == other.bit_packed_size &&
+         num_elements == other.num_elements && num_exceptions == other.num_exceptions;
+}
+
+void AlpEncodedVectorInfo::Store(arrow::util::span<char> output_buffer) const {
+  ARROW_CHECK(output_buffer.size() >= GetStoredSize())
+      << "alp_vector_info_output_too_small: " << output_buffer.size() << " vs "
+      << GetStoredSize();
+
+  std::memcpy(output_buffer.data(), this, GetStoredSize());
+}
+
+AlpEncodedVectorInfo AlpEncodedVectorInfo::Load(
+    arrow::util::span<const char> input_buffer) {
+  ARROW_CHECK(input_buffer.size() >= GetStoredSize())
+      << "alp_vector_info_input_too_small: " << input_buffer.size() << " vs "
+      << GetStoredSize();
+
+  AlpEncodedVectorInfo result;
+  std::memcpy(&result, input_buffer.data(), GetStoredSize());
+  ARROW_CHECK(result.num_elements <= AlpConstants::kAlpVectorSize)
+      << "alp_compression_state_element_count_too_large: " << result.num_elements
+      << " vs " << AlpConstants::kAlpVectorSize;
+
+  return result;
+}
+
+uint64_t AlpEncodedVectorInfo::GetStoredSize() { return sizeof(AlpEncodedVectorInfo); }
+
+// ----------------------------------------------------------------------
+// AlpEncodedVector implementation
+
+template <typename T>
+void AlpEncodedVector<T>::Store(arrow::util::span<char> output_buffer) const {
+  const uint64_t overall_size = GetStoredSize();
+  ARROW_CHECK(output_buffer.size() >= overall_size)
+      << "alp_bit_packed_vector_store_output_too_small: " << output_buffer.size()
+      << " vs " << overall_size;
+  vector_info.Store(output_buffer);
+  uint64_t compression_offset = AlpEncodedVectorInfo::GetStoredSize();
+
+  // Store all successfully compressed values first.
+  std::memcpy(output_buffer.data() + compression_offset, packed_values.data(),
+              vector_info.bit_packed_size);
+  compression_offset += vector_info.bit_packed_size;
+
+  ARROW_CHECK(vector_info.num_exceptions == exceptions.size() &&
+              vector_info.num_exceptions == exception_positions.size())
+      << "alp_bit_packed_vector_store_num_exceptions_mismatch: "
+      << vector_info.num_exceptions << " vs " << exceptions.size() << " vs "
+      << exception_positions.size();
+
+  // Store exceptions, consisting of their positions and their values.
+  const uint64_t exception_position_size =
+      vector_info.num_exceptions * sizeof(AlpConstants::PositionType);
+  std::memcpy(output_buffer.data() + compression_offset, exception_positions.data(),
+              exception_position_size);
+  compression_offset += exception_position_size;
+
+  const uint64_t exception_size = vector_info.num_exceptions * sizeof(T);
+  std::memcpy(output_buffer.data() + compression_offset, exceptions.data(),
+              exception_size);
+  compression_offset += exception_size;
+
+  ARROW_CHECK(compression_offset == overall_size)
+      << "alp_bit_packed_vector_size_mismatch: " << compression_offset << " vs "
+      << overall_size;
+}
+
+template <typename T>
+AlpEncodedVector<T> AlpEncodedVector<T>::Load(
+    arrow::util::span<const char> input_buffer) {
+  AlpEncodedVector<T> result;
+  result.vector_info = AlpEncodedVectorInfo::Load(input_buffer);
+  uint64_t input_offset = AlpEncodedVectorInfo::GetStoredSize();
+
+  const uint64_t overall_size = GetStoredSize(result.vector_info);
+
+  ARROW_CHECK(input_buffer.size() >= overall_size)
+      << "alp_compression_state_input_too_small: " << input_buffer.size() << " vs "
+      << overall_size;
+  ARROW_CHECK(result.vector_info.num_elements <= AlpConstants::kAlpVectorSize)
+      << "alp_compression_state_element_count_too_large: "
+      << result.vector_info.num_elements << " vs " << AlpConstants::kAlpVectorSize;
+
+  // Optimization: Use UnsafeResize to avoid zero-initialization before memcpy.
+  // This is safe for POD types since we immediately overwrite with memcpy.
+  result.packed_values.UnsafeResize(result.vector_info.bit_packed_size);
+  std::memcpy(result.packed_values.data(), input_buffer.data() + input_offset,
+              result.vector_info.bit_packed_size);
+  input_offset += result.vector_info.bit_packed_size;
+
+  result.exception_positions.UnsafeResize(result.vector_info.num_exceptions);
+  const uint64_t exception_position_size =
+      result.vector_info.num_exceptions * sizeof(AlpConstants::PositionType);
+  std::memcpy(result.exception_positions.data(), input_buffer.data() + input_offset,
+              exception_position_size);
+  input_offset += exception_position_size;
+
+  result.exceptions.UnsafeResize(result.vector_info.num_exceptions);
+  const uint64_t exception_size = result.vector_info.num_exceptions * sizeof(T);
+  std::memcpy(result.exceptions.data(), input_buffer.data() + input_offset,
+              exception_size);
+  return result;
+}
+
+template <typename T>
+uint64_t AlpEncodedVector<T>::GetStoredSize() const {
+  return AlpEncodedVectorInfo::GetStoredSize() + vector_info.bit_packed_size +
+         vector_info.num_exceptions * (sizeof(AlpConstants::PositionType) + sizeof(T));
+}
+
+// ----------------------------------------------------------------------
+// AlpEncodedVectorView implementation
+
+template <typename T>
+AlpEncodedVectorView<T> AlpEncodedVectorView<T>::LoadView(
+    arrow::util::span<const char> input_buffer) {
+  AlpEncodedVectorView<T> result;
+  result.vector_info = AlpEncodedVectorInfo::Load(input_buffer);
+  uint64_t input_offset = AlpEncodedVectorInfo::GetStoredSize();
+
+  const uint64_t overall_size = AlpEncodedVector<T>::GetStoredSize(result.vector_info);
+
+  ARROW_CHECK(input_buffer.size() >= overall_size)
+      << "alp_view_input_too_small: " << input_buffer.size() << " vs " << overall_size;
+  ARROW_CHECK(result.vector_info.num_elements <= AlpConstants::kAlpVectorSize)
+      << "alp_view_element_count_too_large: " << result.vector_info.num_elements
+      << " vs " << AlpConstants::kAlpVectorSize;
+
+  // Create spans pointing directly into the input buffer (zero-copy)
+  result.packed_values = {
+      reinterpret_cast<const uint8_t*>(input_buffer.data() + input_offset),
+      result.vector_info.bit_packed_size};
+  input_offset += result.vector_info.bit_packed_size;
+
+  const uint64_t exception_position_size =
+      result.vector_info.num_exceptions * sizeof(AlpConstants::PositionType);
+  result.exception_positions = {
+      reinterpret_cast<const uint16_t*>(input_buffer.data() + input_offset),
+      result.vector_info.num_exceptions};
+  input_offset += exception_position_size;
+
+  result.exceptions = {reinterpret_cast<const T*>(input_buffer.data() + input_offset),
+                       result.vector_info.num_exceptions};
+
+  return result;
+}
+
+template <typename T>
+uint64_t AlpEncodedVectorView<T>::GetStoredSize() const {
+  return AlpEncodedVectorInfo::GetStoredSize() + vector_info.bit_packed_size +
+         vector_info.num_exceptions * (sizeof(AlpConstants::PositionType) + sizeof(T));
+}
+
+template struct AlpEncodedVectorView<float>;
+template struct AlpEncodedVectorView<double>;
+
+template <typename T>
+uint64_t AlpEncodedVector<T>::GetStoredSize(const AlpEncodedVectorInfo& info) {
+  return AlpEncodedVectorInfo::GetStoredSize() + info.bit_packed_size +
+         info.num_exceptions * (sizeof(AlpConstants::PositionType) + sizeof(T));
+}
+
+template <typename T>
+bool AlpEncodedVector<T>::operator==(const AlpEncodedVector<T>& other) const {
+  // Manual comparison since StaticVector doesn't have operator==.
+  const bool packed_values_equal =
+      (packed_values.size() == other.packed_values.size()) &&
+      std::equal(packed_values.begin(), packed_values.end(),
+                 other.packed_values.begin());
+  const bool exceptions_equal =
+      (exceptions.size() == other.exceptions.size()) &&
+      std::equal(exceptions.begin(), exceptions.end(), other.exceptions.begin());
+  const bool exception_positions_equal =
+      (exception_positions.size() == other.exception_positions.size()) &&
+      std::equal(exception_positions.begin(), exception_positions.end(),
+                 other.exception_positions.begin());
+  return vector_info == other.vector_info && packed_values_equal && exceptions_equal &&
+         exception_positions_equal;
+}
+
+template class AlpEncodedVector<float>;
+template class AlpEncodedVector<double>;
+
+// ----------------------------------------------------------------------
+// Internal helper classes
+
+namespace {
+
+/// \brief Helper class for encoding/decoding individual values
+template <typename T>
+class AlpInlines : private AlpConstants {
+ public:
+  using Constants = AlpTypedConstants<T>;
+  using ExactType = typename Constants::FloatingToExact;
+  using SignedExactType = typename Constants::FloatingToSignedExact;
+
+  /// \brief Check if float is a special value that cannot be converted
+  static inline bool IsImpossibleToEncode(const T n) {
+    // We do not have to check for positive or negative infinity, since
+    // std::numeric_limits<T>::infinity() > std::numeric_limits<T>::max()
+    // and vice versa for negative infinity.
+    return std::isnan(n) || n > Constants::kEncodingUpperLimit ||
+           n < Constants::kEncodingLowerLimit ||
+           (n == 0.0 && std::signbit(n));  // Verification for -0.0
+  }
+
+  /// \brief Convert a float to an int without rounding
+  static inline auto FastRound(T n) -> SignedExactType {
+    n = n + Constants::kMagicNumber - Constants::kMagicNumber;
+    return static_cast<SignedExactType>(n);
+  }
+
+  /// \brief Fast way to round float to nearest integer
+  static inline auto NumberToInt(T n) -> SignedExactType {
+    if (IsImpossibleToEncode(n)) {
+      return static_cast<SignedExactType>(Constants::kEncodingUpperLimit);
+    }
+    return FastRound(n);
+  }
+
+  /// \brief Convert a float into an int using encoding options
+  static inline SignedExactType EncodeValue(
+      const T value, const AlpExponentAndFactor exponent_and_factor) {
+    const T tmp_encoded_value = value *
+                                Constants::GetExponent(exponent_and_factor.exponent) *
+                                Constants::GetFactor(exponent_and_factor.factor);
+    return NumberToInt(tmp_encoded_value);
+  }
+
+  /// \brief Reconvert an int to a float using encoding options
+  static inline T DecodeValue(const SignedExactType encoded_value,
+                              const AlpExponentAndFactor exponent_and_factor) {
+    // The cast to T is needed to prevent a signed integer overflow.
+    return static_cast<T>(encoded_value) * GetFactor(exponent_and_factor.factor) *
+           Constants::GetFactor(exponent_and_factor.exponent);
+  }
+};
+
+/// \brief Helper struct for tracking compression combinations
+struct AlpCombination {
+  AlpExponentAndFactor exponent_and_factor;
+  uint64_t num_appearances{0};
+  uint64_t estimated_compression_size{0};
+};
+
+/// \brief Compare two ALP combinations to determine which is better
+///
+/// Return true if c1 is a better combination than c2.
+/// First criteria is number of times it appears as best combination.
+/// Second criteria is the estimated compression size.
+/// Third criteria is bigger exponent.
+/// Fourth criteria is bigger factor.
+bool CompareAlpCombinations(const AlpCombination& c1, const AlpCombination& c2) {
+  return (c1.num_appearances > c2.num_appearances) ||
+         (c1.num_appearances == c2.num_appearances &&
+          (c1.estimated_compression_size < c2.estimated_compression_size)) ||
+         ((c1.num_appearances == c2.num_appearances &&
+           c1.estimated_compression_size == c2.estimated_compression_size) &&
+          (c2.exponent_and_factor.exponent < c1.exponent_and_factor.exponent)) ||
+         ((c1.num_appearances == c2.num_appearances &&
+           c1.estimated_compression_size == c2.estimated_compression_size &&
+           c2.exponent_and_factor.exponent == c1.exponent_and_factor.exponent) &&
+          (c2.exponent_and_factor.factor < c1.exponent_and_factor.factor));
+}
+
+}  // namespace
+
+// ----------------------------------------------------------------------
+// AlpCompression implementation
+
+template <typename T>
+std::optional<uint64_t> AlpCompression<T>::EstimateCompressedSize(
+    const std::vector<T>& input_vector,
+    const AlpExponentAndFactor exponent_and_factor,
+    const bool penalize_exceptions) {
+  // Dry compress a vector (ideally a sample) to estimate ALP compression size
+  // given an exponent and factor.
+  SignedExactType max_encoded_value = std::numeric_limits<SignedExactType>::min();
+  SignedExactType min_encoded_value = std::numeric_limits<SignedExactType>::max();
+
+  uint64_t num_exceptions = 0;
+  uint64_t num_non_exceptions = 0;
+  for (const T& value : input_vector) {
+    const SignedExactType encoded_value =
+        AlpInlines<T>::EncodeValue(value, exponent_and_factor);
+    T decoded_value = AlpInlines<T>::DecodeValue(encoded_value, exponent_and_factor);
+    if (decoded_value == value) {
+      num_non_exceptions++;
+      max_encoded_value = std::max(encoded_value, max_encoded_value);
+      min_encoded_value = std::min(encoded_value, min_encoded_value);
+      continue;
+    }
+    num_exceptions++;
+  }
+
+  // We penalize combinations which yield almost all exceptions.
+  if (penalize_exceptions && num_non_exceptions < 2) {
+    return std::nullopt;
+  }
+
+  // Evaluate factor/exponent compression size (we optimize for FOR).
+  const ExactType delta = (static_cast<ExactType>(max_encoded_value) -
+                           static_cast<ExactType>(min_encoded_value));
+
+  const uint32_t estimated_bits_per_value =
+      static_cast<uint32_t>(std::ceil(std::log2(delta + 1)));
+  uint64_t estimated_compression_size = input_vector.size() * estimated_bits_per_value;
+  estimated_compression_size +=
+      num_exceptions * (kExactTypeBitSize + (sizeof(PositionType) * 8));
+  return estimated_compression_size;
+}
+
+template <typename T>
+AlpEncodingPreset AlpCompression<T>::CreateEncodingPreset(
+    const std::vector<std::vector<T>>& vectors_sampled) {
+  // Find the best combinations of factor-exponent from each sampled vector.
+  // This function is called once per segment.
+  // This operates over ALP first level samples.
+  static constexpr uint64_t kMaxCombinationCount =
+      (Constants::kMaxExponent + 1) * (Constants::kMaxExponent + 2) / 2;
+
+  std::map<AlpExponentAndFactor, uint64_t> best_k_combinations_hash;
+
+  uint64_t best_compressed_size_bits = std::numeric_limits<uint64_t>::max();
+  // For each vector sampled.
+  for (const std::vector<T>& sampled_vector : vectors_sampled) {
+    const uint64_t num_samples = sampled_vector.size();
+    const AlpExponentAndFactor best_encoding_options{Constants::kMaxExponent,
+                                                     Constants::kMaxExponent};
+
+    // Start optimization with worst possible total bits from compression.
+    const uint64_t best_total_bits =
+        (num_samples * (kExactTypeBitSize + sizeof(PositionType) * 8)) +
+        (num_samples * kExactTypeBitSize);
+
+    // N of appearances is irrelevant at this phase; we search for best compression.
+    AlpCombination best_combination{best_encoding_options, 0, best_total_bits};
+    // Try all combinations to find the one which minimizes compression size.
+    for (uint8_t exp_idx = 0; exp_idx <= Constants::kMaxExponent; exp_idx++) {
+      for (uint8_t factor_idx = 0; factor_idx <= exp_idx; factor_idx++) {
+        const AlpExponentAndFactor current_exponent_and_factor{exp_idx, factor_idx};
+        std::optional<uint64_t> estimated_compression_size = EstimateCompressedSize(
+            sampled_vector, current_exponent_and_factor, /*penalize_exceptions=*/true);
+
+        // Skip comparison for values that are not compressible.
+        if (!estimated_compression_size.has_value()) {
+          continue;
+        }
+
+        const AlpCombination current_combination{current_exponent_and_factor, 0,
+                                                 *estimated_compression_size};
+        if (CompareAlpCombinations(current_combination, best_combination)) {
+          best_combination = current_combination;
+          best_compressed_size_bits =
+              std::min(best_compressed_size_bits, *estimated_compression_size);
+        }
+      }
+    }
+    best_k_combinations_hash[best_combination.exponent_and_factor]++;
+  }
+
+  // Convert our hash to a Combination vector to be able to sort.
+  // Note that this vector should mostly be small (< 10 combinations).
+  std::vector<AlpCombination> best_k_combinations;
+  best_k_combinations.reserve(
+      std::min(best_k_combinations_hash.size(), kMaxCombinationCount));
+  for (const auto& combination : best_k_combinations_hash) {
+    best_k_combinations.emplace_back(AlpCombination{
+        combination.first,   // Encoding Indices
+        combination.second,  // N of times it appeared (hash value)
+        0  // Compression size is irrelevant since we compare different vectors.
+    });
+  }
+  std::sort(best_k_combinations.begin(), best_k_combinations.end(),
+            CompareAlpCombinations);
+
+  std::vector<AlpExponentAndFactor> combinations;
+  // Save k' best combinations.
+  for (uint64_t i = 0;
+       i < std::min(kMaxCombinations, static_cast<uint8_t>(best_k_combinations.size()));
+       i++) {
+    combinations.push_back(best_k_combinations[i].exponent_and_factor);
+  }
+
+  const uint64_t best_compressed_size_bytes =
+      std::ceil(best_compressed_size_bits / 8.0);
+  return {combinations, best_compressed_size_bytes};
+}
+
+template <typename T>
+std::vector<T> AlpCompression<T>::CreateSample(arrow::util::span<const T> input) {
+  // Sample equidistant values within a vector; skip a fixed number of values.
+  const auto idx_increments = std::max<uint32_t>(
+      1, static_cast<uint32_t>(std::ceil(static_cast<double>(input.size()) /
+                                         AlpConstants::kSamplerSamplesPerVector)));
+  std::vector<T> vector_sample;
+  vector_sample.reserve(std::ceil(input.size() / static_cast<double>(idx_increments)));
+  for (uint64_t i = 0; i < input.size(); i += idx_increments) {
+    vector_sample.push_back(input[i]);
+  }
+  return vector_sample;
+}
+
+template <typename T>
+AlpExponentAndFactor AlpCompression<T>::FindBestExponentAndFactor(
+    arrow::util::span<const T> input,
+    const std::vector<AlpExponentAndFactor>& combinations) {
+  // Find the best factor-exponent combination from within the best k combinations.
+  // This is ALP second level sampling.
+  if (combinations.size() == 1) {
+    return combinations.front();
+  }
+
+  const std::vector<T> sample_vector = CreateSample(input);
+
+  AlpExponentAndFactor best_exponent_and_factor;
+  uint64_t best_total_bits = std::numeric_limits<uint64_t>::max();
+  uint64_t worse_total_bits_counter = 0;
+
+  // Try each K combination to find the one which minimizes compression size.
+  for (const AlpExponentAndFactor& exponent_and_factor : combinations) {
+    std::optional<uint64_t> estimated_compression_size = EstimateCompressedSize(
+        sample_vector, exponent_and_factor, /*penalize_exceptions=*/false);
+
+    // Skip exponents and factors which result in many exceptions.
+    if (!estimated_compression_size.has_value()) {
+      continue;
+    }
+
+    // If current compression size is worse or equal than current best combination.
+    if (estimated_compression_size >= best_total_bits) {
+      worse_total_bits_counter += 1;
+      // Early exit strategy.
+      if (worse_total_bits_counter == kSamplingEarlyExitThreshold) {
+        break;
+      }
+      continue;
+    }
+    // Otherwise replace the best and continue trying with next combination.
+    best_total_bits = estimated_compression_size.value();
+    best_exponent_and_factor = exponent_and_factor;
+    worse_total_bits_counter = 0;
+  }
+  return best_exponent_and_factor;
+}
+
+template <typename T>
+auto AlpCompression<T>::EncodeVector(arrow::util::span<const T> input_vector,
+                                     AlpExponentAndFactor exponent_and_factor)
+    -> EncodingResult {
+  arrow::internal::StaticVector<SignedExactType, kAlpVectorSize> encoded_integers;
+  arrow::internal::StaticVector<T, kAlpVectorSize> exceptions;
+  arrow::internal::StaticVector<PositionType, kAlpVectorSize> exception_positions;
+
+  // Encoding Float/Double to SignedExactType(Int32, Int64).
+  // Encode all values regardless of correctness to recover original floating-point.
+  uint64_t input_offset = 0;
+  for (const T input : input_vector) {
+    const SignedExactType encoded_value =
+        AlpInlines<T>::EncodeValue(input, exponent_and_factor);
+    const T decoded_value = AlpInlines<T>::DecodeValue(encoded_value, exponent_and_factor);
+    encoded_integers.push_back(encoded_value);
+    // Detect exceptions using a predicated comparison.
+    if (decoded_value != input) {
+      exception_positions.push_back(input_offset);
+    }
+    input_offset++;
+  }
+
+  // Finding first non-exception value.
+  SignedExactType first_non_exception_value = 0;
+  PositionType exception_offset = 0;
+  for (const PositionType exception_position : exception_positions) {
+    if (exception_offset != exception_position) {
+      first_non_exception_value = encoded_integers[exception_offset];
+      break;
+    }
+    exception_offset++;
+  }
+
+  // Use first non-exception value as placeholder for all exception values.
+  for (const PositionType exception_position : exception_positions) {
+    const T actual_value = input_vector[exception_position];
+    encoded_integers[exception_position] = first_non_exception_value;
+    exceptions.push_back(actual_value);
+  }
+
+  // Analyze FOR.
+  const auto [min, max] =
+      std::minmax_element(encoded_integers.begin(), encoded_integers.end());
+  const auto frame_of_reference = static_cast<ExactType>(*min);
+
+  for (SignedExactType& encoded_integer : encoded_integers) {
+    ExactType& u_encoded_integer = *reinterpret_cast<ExactType*>(&encoded_integer);
+    u_encoded_integer -= frame_of_reference;
+  }
+
+  const ExactType min_max_diff =
+      (static_cast<ExactType>(*max) - static_cast<ExactType>(*min));
+  return EncodingResult{encoded_integers, exceptions, exception_positions, min_max_diff,
+                        frame_of_reference};
+}
+
+template <typename T>
+auto AlpCompression<T>::BitPackIntegers(
+    arrow::util::span<const SignedExactType> integers, const uint64_t min_max_diff)
+    -> BitPackingResult {
+  uint8_t bit_width = 0;
+
+  if (min_max_diff == 0) {
+    bit_width = 0;
+  } else if constexpr (std::is_same_v<T, float>) {
+    bit_width = sizeof(T) * 8 - __builtin_clz(min_max_diff);
+  } else if constexpr (std::is_same_v<T, double>) {
+    bit_width = sizeof(T) * 8 - __builtin_clzll(min_max_diff);
+  }
+  const uint64_t bit_packed_size = std::ceil((bit_width * integers.size()) / 8.0);
+
+  arrow::internal::StaticVector<uint8_t, kAlpVectorSize * sizeof(T)> packed_integers;
+  // Use unsafe resize to avoid zero-initialization. Zero initialization was
+  // resulting in around 2-3% degradation in compression speed.
+  packed_integers.UnsafeResize(bit_packed_size);
+  if (bit_width > 0) {  // Only execute BP if writing data.
+    // Use Arrow's BitWriter for packing (loop-based).
+    arrow::bit_util::BitWriter writer(packed_integers.data(),
+                                      static_cast<int>(bit_packed_size));
+    for (uint64_t i = 0; i < integers.size(); ++i) {
+      writer.PutValue(static_cast<uint64_t>(integers[i]), bit_width);
+    }
+    writer.Flush(false);
+  }
+  return {packed_integers, bit_width, bit_packed_size};
+}
+
+template <typename T>
+AlpEncodedVector<T> AlpCompression<T>::CompressVector(const T* input_vector,
+                                                      uint16_t num_elements,
+                                                      const AlpEncodingPreset& preset) {
+  // Compress by finding a fitting exponent/factor, encode input, and bitpack.
+  const arrow::util::span<const T> input_span{input_vector, num_elements};
+  const AlpExponentAndFactor exponent_and_factor =
+      FindBestExponentAndFactor(input_span, preset.combinations);
+  const EncodingResult encoding_result = EncodeVector(input_span, exponent_and_factor);
+  BitPackingResult bitpacking_result;
+  switch (preset.bit_pack_layout) {
+    case AlpBitPackLayout::kNormal:
+      bitpacking_result =
+          BitPackIntegers(encoding_result.encoded_integers, encoding_result.min_max_diff);
+      break;
+    default:
+      ARROW_CHECK(false) << "invalid_bit_pack_layout: "
+                         << static_cast<int>(preset.bit_pack_layout);
+      break;
+  }
+
+  // Transfer compressed data into a serializable format.
+  const AlpEncodedVectorInfo vector_info{
+      exponent_and_factor,
+      encoding_result.frame_of_reference,
+      bitpacking_result.bit_width,
+      bitpacking_result.bit_packed_size,
+      num_elements,
+      static_cast<uint16_t>(encoding_result.exceptions.size())};
+
+  return AlpEncodedVector<T>{vector_info, bitpacking_result.packed_integers,
+                             encoding_result.exceptions,
+                             encoding_result.exception_positions};
+}
+
+template <typename T>
+auto AlpCompression<T>::BitUnpackIntegers(
+    arrow::util::span<const uint8_t> packed_integers,
+    const AlpEncodedVectorInfo vector_info)
+    -> arrow::internal::StaticVector<ExactType, kAlpVectorSize> {
+  arrow::internal::StaticVector<ExactType, kAlpVectorSize> encoded_integers;
+  // Optimization: Use UnsafeResize to avoid zero-initialization.
+  // Safe because we immediately write to all elements via unpack().
+  encoded_integers.UnsafeResize(vector_info.num_elements);
+
+  if (vector_info.bit_width > 0) {
+    // Arrow's SIMD unpack works in fixed batch sizes. All SIMD implementations
+    // (SIMD128/NEON, SIMD256/AVX2, SIMD512/AVX512) have identical batch sizes:
+    // - uint32_t (float): Simd*UnpackerForWidth::kValuesUnpacked = 32
+    // - uint64_t (double): Simd*UnpackerForWidth::kValuesUnpacked = 64
+    // These constants are in anonymous namespaces (internal implementation detail),
+    // so we hardcode them here.
+    constexpr int kMinBatchSize = std::is_same_v<T, float> ? 32 : 64;
+    const int num_elements = static_cast<int>(vector_info.num_elements);
+    const int num_complete_batches = num_elements / kMinBatchSize;
+    const int num_complete_elements = num_complete_batches * kMinBatchSize;
+
+    // Use Arrow's SIMD-optimized unpack for complete batches.
+    if (num_complete_elements > 0) {
+      arrow::internal::unpack(packed_integers.data(), encoded_integers.data(),
+                              num_complete_elements, vector_info.bit_width);
+    }
+
+    // Handle remaining elements (<64) with BitReader to match BitWriter format.
+    const int remaining = num_elements - num_complete_elements;
+    if (remaining > 0) {
+      // Calculate byte offset where SIMD unpack finished
+      const uint64_t bits_consumed_by_simd =
+          static_cast<uint64_t>(num_complete_elements) * vector_info.bit_width;
+      // Round up to next byte
+      const uint64_t bytes_consumed_by_simd = (bits_consumed_by_simd + 7) / 8;
+
+      // Use BitReader for remaining elements starting from where SIMD left off
+      arrow::bit_util::BitReader reader(
+          packed_integers.data() + bytes_consumed_by_simd,
+          static_cast<int>(packed_integers.size() - bytes_consumed_by_simd));
+
+      for (int i = 0; i < remaining; ++i) {
+        uint64_t value = 0;
+        if (reader.GetValue(vector_info.bit_width, &value)) {
+          encoded_integers[num_complete_elements + i] = static_cast<ExactType>(value);
+        } else {
+          encoded_integers[num_complete_elements + i] = 0;
+        }
+      }
+    }
+  } else {
+    std::memset(encoded_integers.data(), 0, vector_info.num_elements * sizeof(ExactType));
+  }
+  return encoded_integers;
+}
+
+template <typename T>
+template <typename TargetType>
+void AlpCompression<T>::DecodeVector(TargetType* output_vector,
+                                     arrow::util::span<ExactType> input_vector,
+                                     const AlpEncodedVectorInfo vector_info) {
+  // Fused unFOR + decode loop - reduces memory traffic by avoiding
+  // intermediate write-then-read of the unFOR'd values.
+  const size_t num_elements = input_vector.size();
+  const ExactType* data = input_vector.data();
+  const ExactType frame_of_ref = vector_info.frame_of_reference;
+
+#pragma GCC unroll AlpConstants::kLoopUnrolls
+#pragma GCC ivdep
+  for (size_t i = 0; i < num_elements; ++i) {
+    // 1. Apply frame of reference (unFOR) - unsigned arithmetic
+    const ExactType unfored_value = data[i] + frame_of_ref;
+    // 2. Reinterpret as signed integer for decode
+    SignedExactType signed_value;
+    std::memcpy(&signed_value, &unfored_value, sizeof(SignedExactType));
+    // 3. Decode using original function to preserve exact floating-point behavior
+    output_vector[i] =
+        AlpInlines<T>::DecodeValue(signed_value, vector_info.exponent_and_factor);
+  }
+}
+
+template <typename T>
+template <typename TargetType>
+void AlpCompression<T>::PatchExceptions(
+    TargetType* output, arrow::util::span<const T> exceptions,
+    arrow::util::span<const uint16_t> exception_positions) {
+  // Exceptions Patching.
+  uint64_t exception_idx = 0;
+#pragma GCC unroll AlpConstants::kLoopUnrolls
+#pragma GCC ivdep
+  for (uint16_t const exception_position : exception_positions) {
+    output[exception_position] = static_cast<T>(exceptions[exception_idx]);
+    exception_idx++;
+  }
+}
+
+template <typename T>
+template <typename TargetType>
+void AlpCompression<T>::DecompressVector(const AlpEncodedVector<T>& packed_vector,
+                                         const AlpBitPackLayout bit_pack_layout,
+                                         TargetType* output) {
+  static_assert(sizeof(T) <= sizeof(TargetType));
+  const AlpEncodedVectorInfo& vector_info = packed_vector.vector_info;
+
+  switch (bit_pack_layout) {
+    case AlpBitPackLayout::kNormal: {
+      arrow::internal::StaticVector<ExactType, kAlpVectorSize> encoded_integers =
+          BitUnpackIntegers(packed_vector.packed_values, vector_info);
+      DecodeVector<TargetType>(output, {encoded_integers.data(), vector_info.num_elements},
+                               vector_info);
+      PatchExceptions<TargetType>(output, packed_vector.exceptions,
+                                  packed_vector.exception_positions);
+    } break;
+    default:
+      ARROW_CHECK(false) << "invalid_bit_pack_layout: "
+                         << static_cast<int>(bit_pack_layout);
+      break;
+  }
+}
+
+template <typename T>
+template <typename TargetType>
+void AlpCompression<T>::DecompressVectorView(const AlpEncodedVectorView<T>& encoded_view,
+                                             const AlpBitPackLayout bit_pack_layout,
+                                             TargetType* output) {
+  static_assert(sizeof(T) <= sizeof(TargetType));
+  const AlpEncodedVectorInfo& vector_info = encoded_view.vector_info;
+
+  switch (bit_pack_layout) {
+    case AlpBitPackLayout::kNormal: {
+      // Use the view's spans directly - no copy needed
+      arrow::internal::StaticVector<ExactType, kAlpVectorSize> encoded_integers =
+          BitUnpackIntegers(encoded_view.packed_values, vector_info);
+      DecodeVector<TargetType>(output, {encoded_integers.data(), vector_info.num_elements},
+                               vector_info);
+      PatchExceptions<TargetType>(output, encoded_view.exceptions,
+                                  encoded_view.exception_positions);
+    } break;
+    default:
+      ARROW_CHECK(false) << "invalid_bit_pack_layout: "
+                         << static_cast<int>(bit_pack_layout);
+      break;
+  }
+}
+
+// ----------------------------------------------------------------------
+// Template instantiations
+
+template void AlpCompression<float>::DecompressVector<double>(
+    const AlpEncodedVector<float>& packed_vector, AlpBitPackLayout bit_pack_layout,
+    double* output);
+template void AlpCompression<float>::DecompressVector<float>(
+    const AlpEncodedVector<float>& packed_vector, AlpBitPackLayout bit_pack_layout,
+    float* output);
+template void AlpCompression<double>::DecompressVector<double>(
+    const AlpEncodedVector<double>& packed_vector, AlpBitPackLayout bit_pack_layout,
+    double* output);
+
+template void AlpCompression<float>::DecompressVectorView<double>(
+    const AlpEncodedVectorView<float>& encoded_view, AlpBitPackLayout bit_pack_layout,
+    double* output);
+template void AlpCompression<float>::DecompressVectorView<float>(
+    const AlpEncodedVectorView<float>& encoded_view, AlpBitPackLayout bit_pack_layout,
+    float* output);
+template void AlpCompression<double>::DecompressVectorView<double>(
+    const AlpEncodedVectorView<double>& encoded_view, AlpBitPackLayout bit_pack_layout,
+    double* output);
+
+template class AlpCompression<float>;
+template class AlpCompression<double>;
+
+}  // namespace alp
+}  // namespace util
+}  // namespace arrow
diff --git a/cpp/src/arrow/util/alp/Alp.h b/cpp/src/arrow/util/alp/Alp.h
new file mode 100644
index 00000000000..e73ba5b6bac
--- /dev/null
+++ b/cpp/src/arrow/util/alp/Alp.h
@@ -0,0 +1,529 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+// Adaptive Lossless floating-Point (ALP) compression implementation
+
+#pragma once
+
+#include <vector>
+
+#include "arrow/util/alp/AlpConstants.h"
+#include "arrow/util/small_vector.h"
+#include "arrow/util/span.h"
+
+namespace arrow {
+namespace util {
+namespace alp {
+
+// ----------------------------------------------------------------------
+// ALP Overview
+//
+// IMPORTANT: For abstract interfaces or examples how to use ALP, consult
+// AlpWrapper.h.
+// This is our implementation of the adaptive lossless floating-point
+// compression for decimals (ALP) (https://dl.acm.org/doi/10.1145/3626717).
+// It works by converting a float into a decimal (if possible). The exponent
+// and factor are chosen per vector. Each float is converted using
+// c(f) = int64(f * 10^exponent * 10^-factor). The converted floats are then
+// encoded via a delta frame of reference and bitpacked. Every exception,
+// where the conversion/reconversion changes the value of the float, is stored
+// separately and has to be patched into the decompressed vector afterwards.
+//
+// ==========================================================================
+//                    ALP COMPRESSION/DECOMPRESSION PIPELINE
+// ==========================================================================
+//
+// COMPRESSION FLOW:
+// -----------------
+//
+//   Input: float/double array
+//        |
+//        v
+//   +------------------------------------------------------------------+
+//   | 1. SAMPLING & PRESET GENERATION                                  |
+//   |    * Sample vectors from dataset                                 |
+//   |    * Try all exponent/factor combinations (e, f)                 |
+//   |    * Select best k combinations for preset                       |
+//   +------------------------------------+-----------------------------+
+//                                        | preset.combinations
+//                                        v
+//   +------------------------------------------------------------------+
+//   | 2. PER-VECTOR COMPRESSION                                        |
+//   |    a) Find best (e,f) from preset for this vector                |
+//   |    b) Encode: encoded[i] = int64(value[i] * 10^e * 10^-f)        |
+//   |    c) Verify: if decode(encoded[i]) != value[i] -> exception     |
+//   |    d) Replace exceptions with placeholder value                  |
+//   +------------------------------------+-----------------------------+
+//                                        | encoded integers + exceptions
+//                                        v
+//   +------------------------------------------------------------------+
+//   | 3. FRAME OF REFERENCE (FOR)                                      |
+//   |    * Find min value in encoded integers                          |
+//   |    * Subtract min from all values: delta[i] = encoded[i] - min   |
+//   +------------------------------------+-----------------------------+
+//                                        | delta values (smaller range)
+//                                        v
+//   +------------------------------------------------------------------+
+//   | 4. BIT PACKING                                                   |
+//   |    * Calculate bit_width = log2(max_delta)                       |
+//   |    * Pack each value into bit_width bits                         |
+//   |    * Result: tightly packed binary data                          |
+//   +------------------------------------+-----------------------------+
+//                                        | packed bytes
+//                                        v
+//   +------------------------------------------------------------------+
+//   | 5. SERIALIZATION (see AlpEncodedVector diagram below)            |
+//   |    [VectorInfo][PackedData][ExceptionPos][ExceptionValues]       |
+//   +------------------------------------------------------------------+
+//
+//
+// DECOMPRESSION FLOW:
+// -------------------
+//
+//   Serialized bytes -> AlpEncodedVector::Load()
+//        |
+//        v
+//   +------------------------------------------------------------------+
+//   | 1. BIT UNPACKING                                                 |
+//   |    * Extract bit_width from metadata                             |
+//   |    * Unpack each value from bit_width bits -> delta values       |
+//   +------------------------------------+-----------------------------+
+//                                        | delta values
+//                                        v
+//   +------------------------------------------------------------------+
+//   | 2. REVERSE FRAME OF REFERENCE (unFOR)                            |
+//   |    * Add back min: encoded[i] = delta[i] + frame_of_reference    |
+//   +------------------------------------+-----------------------------+
+//                                        | encoded integers
+//                                        v
+//   +------------------------------------------------------------------+
+//   | 3. DECODE                                                        |
+//   |    * Apply inverse formula: value[i] = encoded[i] * 10^-e * 10^f |
+//   +------------------------------------+-----------------------------+
+//                                        | decoded floats (with placeholders)
+//                                        v
+//   +------------------------------------------------------------------+
+//   | 4. PATCH EXCEPTIONS                                              |
+//   |    * Replace values at exception_positions[] with exceptions[]   |
+//   +------------------------------------+-----------------------------+
+//                                        |
+//                                        v
+//   Output: Original float/double array (lossless!)
+//
+// ==========================================================================
+
+// ----------------------------------------------------------------------
+// AlpMode
+
+/// \brief ALP compression mode
+///
+/// Currently only ALP (decimal compression) is implemented.
+enum class AlpMode { kAlp };
+
+// ----------------------------------------------------------------------
+// AlpExponentAndFactor
+
+/// \brief Helper struct to encapsulate the exponent and factor
+struct AlpExponentAndFactor {
+  uint8_t exponent{0};
+  uint8_t factor{0};
+
+  bool operator==(const AlpExponentAndFactor& other) const {
+    return exponent == other.exponent && factor == other.factor;
+  }
+
+  /// \brief Comparison operator for deterministic std::map ordering
+  bool operator<(const AlpExponentAndFactor& other) const {
+    if (exponent != other.exponent) return exponent < other.exponent;
+    return factor < other.factor;
+  }
+};
+
+// ----------------------------------------------------------------------
+// AlpEncodedVectorInfo
+
+/// \brief Metadata for an encoded vector
+///
+/// Helper class to encapsulate all metadata of an encoded vector to be able
+/// to load and decompress it.
+///
+/// Serialization format (stored as raw binary struct):
+///
+///   +------------------------------------------+
+///   |  AlpEncodedVectorInfo (23+ bytes)        |
+///   +------------------------------------------+
+///   |  Offset |  Field              |  Size    |
+///   +---------+---------------------+----------+
+///   |    0    |  exponent (uint8_t) |  1 byte  |
+///   |    1    |  factor (uint8_t)   |  1 byte  |
+///   |    2    |  [padding]          |  6 bytes |
+///   |    8    |  frame_of_reference |  8 bytes |
+///   |   16    |  bit_width (uint8_t)|  1 byte  |
+///   |   17    |  [padding]          |  7 bytes |
+///   |   24    |  bit_packed_size    |  8 bytes |
+///   |   32    |  num_elements       |  2 bytes |
+///   |   34    |  num_exceptions     |  2 bytes |
+///   +------------------------------------------+
+struct AlpEncodedVectorInfo {
+  /// Exponent and factor used for compression
+  AlpExponentAndFactor exponent_and_factor;
+  /// Delta used for frame of reference encoding
+  uint64_t frame_of_reference = 0;
+  /// Bitwidth used for bitpacking
+  uint8_t bit_width = 0;
+  /// Overall bitpacked size of non-exception values
+  uint64_t bit_packed_size = 0;
+  /// Number of elements encoded in this vector
+  uint16_t num_elements = 0;
+  /// Number of exceptions stored in this vector
+  uint16_t num_exceptions = 0;
+
+  /// \brief Store the compressed vector in a compact format into an output buffer
+  ///
+  /// \param[out] output_buffer the buffer to store the compressed data into
+  void Store(arrow::util::span<char> output_buffer) const;
+
+  /// \brief Load a compressed vector into the state from a compact format
+  ///
+  /// \param[in] input_buffer the buffer to load from
+  /// \return the loaded AlpEncodedVectorInfo
+  static AlpEncodedVectorInfo Load(arrow::util::span<const char> input_buffer);
+
+  /// \brief Get serialized size of the encoded vector info
+  ///
+  /// \return the size in bytes
+  static uint64_t GetStoredSize();
+
+  bool operator==(const AlpEncodedVectorInfo& other) const;
+};
+
+// ----------------------------------------------------------------------
+// AlpEncodedVector
+
+/// \class AlpEncodedVector
+/// \brief A compressed ALP vector with metadata
+///
+/// Complete serialization format for an ALP compressed vector:
+///
+///   +------------------------------------------------------------+
+///   |  AlpEncodedVector<T> Serialized Layout                     |
+///   +------------------------------------------------------------+
+///   |  Section              |  Size (bytes)        | Description |
+///   +-----------------------+----------------------+-------------+
+///   |  1. VectorInfo        |  sizeof(VectorInfo)  |  Metadata   |
+///   |     (see above)       |  (~36 with padding)  |             |
+///   +-----------------------+----------------------+-------------+
+///   |  2. Packed Values     |  bit_packed_size     |  Bitpacked  |
+///   |     (compressed data) |  (variable)          |  integers   |
+///   +-----------------------+----------------------+-------------+
+///   |  3. Exception Pos     |  num_exceptions * 2  |  uint16_t[] |
+///   |     (indices)         |  (variable)          |  positions  |
+///   +-----------------------+----------------------+-------------+
+///   |  4. Exception Values  |  num_exceptions *    |  T[] (float/|
+///   |     (original floats) |  sizeof(T)           |  double)    |
+///   +------------------------------------------------------------+
+///
+/// Example for 1024 floats with 5 exceptions and bit_width=8:
+///   - VectorInfo:        36 bytes
+///   - Packed Values:    1024 bytes (1024 * 8 bits / 8)
+///   - Exception Pos:      10 bytes (5 * 2)
+///   - Exception Values:   20 bytes (5 * 4)
+///   Total:              1090 bytes
+template <typename T>
+class AlpEncodedVector {
+ public:
+  /// Metadata of the encoded vector
+  AlpEncodedVectorInfo vector_info;
+  /// Successfully encoded and bitpacked data
+  arrow::internal::StaticVector<uint8_t, AlpConstants::kAlpVectorSize * sizeof(T)>
+      packed_values;
+  /// Float values that could not be converted successfully
+  arrow::internal::StaticVector<T, AlpConstants::kAlpVectorSize> exceptions;
+  /// Positions of the exceptions in the decompressed vector
+  arrow::internal::StaticVector<uint16_t, AlpConstants::kAlpVectorSize> exception_positions;
+
+  /// \brief Get the size of the vector if stored into a sequential memory block
+  ///
+  /// \return the stored size in bytes
+  uint64_t GetStoredSize() const;
+
+  /// \brief Get the stored size for a given vector info
+  ///
+  /// \param[in] info the vector info to calculate size for
+  /// \return the stored size in bytes
+  static uint64_t GetStoredSize(const AlpEncodedVectorInfo& info);
+
+  /// \brief Get the number of elements in this vector
+  ///
+  /// \return number of elements
+  uint64_t GetNumElements() const { return vector_info.num_elements; }
+
+  /// \brief Store the compressed vector in a compact format into an output buffer
+  ///
+  /// \param[out] output_buffer the buffer to store the compressed data into
+  void Store(arrow::util::span<char> output_buffer) const;
+
+  /// \brief Load a compressed vector from a compact format from an input buffer
+  ///
+  /// \param[in] input_buffer the buffer to load from
+  /// \return the loaded AlpEncodedVector
+  static AlpEncodedVector Load(arrow::util::span<const char> input_buffer);
+
+  bool operator==(const AlpEncodedVector<T>& other) const;
+};
+
+// ----------------------------------------------------------------------
+// AlpEncodedVectorView
+
+/// \class AlpEncodedVectorView
+/// \brief A zero-copy view into compressed ALP data
+///
+/// Unlike AlpEncodedVector which copies data into internal buffers,
+/// AlpEncodedVectorView holds spans that point directly to the compressed
+/// data buffer. This avoids memory copies during decompression.
+///
+/// Use LoadView() to create a view, then pass to DecompressVectorView().
+/// The underlying buffer must remain valid for the lifetime of the view.
+template <typename T>
+struct AlpEncodedVectorView {
+  /// Metadata of the encoded vector (copied, small fixed size)
+  AlpEncodedVectorInfo vector_info;
+  /// View into bitpacked data (no copy)
+  arrow::util::span<const uint8_t> packed_values;
+  /// View into exception values (no copy)
+  arrow::util::span<const T> exceptions;
+  /// View into exception positions (no copy)
+  arrow::util::span<const uint16_t> exception_positions;
+
+  /// \brief Create a zero-copy view from a compact format input buffer
+  ///
+  /// \param[in] input_buffer the buffer to create a view into
+  /// \return the view into the compressed data
+  static AlpEncodedVectorView LoadView(arrow::util::span<const char> input_buffer);
+
+  /// \brief Get the stored size of this vector in the buffer
+  ///
+  /// \return the stored size in bytes
+  uint64_t GetStoredSize() const;
+};
+
+// ----------------------------------------------------------------------
+// AlpBitPackLayout
+
+/// \brief Bit packing layout
+///
+/// Currently only normal bit packing is implemented.
+enum class AlpBitPackLayout { kNormal };
+
+// ----------------------------------------------------------------------
+// AlpEncodingPreset
+
+/// \brief Preset for ALP compression
+///
+/// Helper struct for compression. Before a larger amount of data is compressed,
+/// a preset is generated, which contains multiple combinations of exponents and
+/// factors. For each vector that is compressed, one of the combinations of this
+/// preset is chosen dynamically.
+struct AlpEncodingPreset {
+  /// Combinations of exponents and factors
+  std::vector<AlpExponentAndFactor> combinations;
+  /// Best compressed size for the preset
+  uint64_t best_compressed_size = 0;
+  /// Bit packing layout used for bitpacking
+  AlpBitPackLayout bit_pack_layout = AlpBitPackLayout::kNormal;
+};
+
+template <typename T>
+class AlpSampler;
+
+// ----------------------------------------------------------------------
+// AlpCompression
+
+/// \class AlpCompression
+/// \brief ALP compression and decompression facilities
+///
+/// AlpCompression contains all facilities to compress and decompress data with
+/// ALP in a vectorized fashion. Use CreateEncodingPreset() first on a sample of
+/// the input data, then compress it vector-wise via CompressVector(). To
+/// serialize the data, use the facilities provided by AlpEncodedVector.
+///
+/// \tparam T the type of data to be compressed. Currently float and double.
+template <typename T>
+class AlpCompression : private AlpConstants {
+ public:
+  using Constants = AlpTypedConstants<T>;
+  using ExactType = typename Constants::FloatingToExact;
+  using SignedExactType = typename Constants::FloatingToSignedExact;
+  static constexpr uint8_t kExactTypeBitSize = sizeof(T) * 8;
+
+  /// \brief Compress a vector of floating point values via ALP
+  ///
+  /// \param[in] input_vector a vector of floats containing input to compress
+  /// \param[in] num_elements the number of values to be compressed
+  /// \param[in] preset the preset to be used for compression
+  /// \return an ALP encoded vector
+  static AlpEncodedVector<T> CompressVector(const T* input_vector,
+                                            uint16_t num_elements,
+                                            const AlpEncodingPreset& preset);
+
+  /// \brief Decompress a compressed vector with ALP
+  ///
+  /// \param[in] encoded_vector the ALP encoded vector to be decompressed
+  /// \param[in] bit_pack_layout the bit packing layout used
+  /// \param[out] output_vector the vector of floats to decompress into.
+  ///             Must be able to contain encoded_vector.GetNumElements().
+  /// \tparam TargetType the type that is used to store the output.
+  ///         May not be a narrowing conversion from T.
+  template <typename TargetType>
+  static void DecompressVector(const AlpEncodedVector<T>& encoded_vector,
+                               AlpBitPackLayout bit_pack_layout,
+                               TargetType* output_vector);
+
+  /// \brief Decompress using a zero-copy view (faster, no memory allocation)
+  ///
+  /// \param[in] encoded_view the zero-copy view into compressed data
+  /// \param[in] bit_pack_layout the bit packing layout used
+  /// \param[out] output_vector the vector of floats to decompress into.
+  ///             Must be able to contain encoded_view.vector_info.num_elements.
+  /// \tparam TargetType the type that is used to store the output.
+  ///         May not be a narrowing conversion from T.
+  template <typename TargetType>
+  static void DecompressVectorView(const AlpEncodedVectorView<T>& encoded_view,
+                                   AlpBitPackLayout bit_pack_layout,
+                                   TargetType* output_vector);
+
+ protected:
+  /// \brief Creates an EncodingPreset consisting of multiple factors/exponents
+  ///
+  /// \param[in] vectors_sampled the sampled vectors to derive combinations from
+  /// \return the EncodingPreset
+  static AlpEncodingPreset CreateEncodingPreset(
+      const std::vector<std::vector<T>>& vectors_sampled);
+  friend AlpSampler<T>;
+
+ private:
+  /// \brief Create a subsample of floats from an input vector for preset gen
+  ///
+  /// \param[in] input the input vector to sample from
+  /// \return a vector containing a representative subsample of input values
+  static std::vector<T> CreateSample(arrow::util::span<const T> input);
+
+  /// \brief Perform a dry-compression to estimate the compressed size
+  ///
+  /// \param[in] input_vector the input vector to estimate compression for
+  /// \param[in] exponent_and_factor the exponent/factor combination to evaluate
+  /// \param[in] penalize_exceptions if true, applies a penalty for exceptions
+  /// \return the estimated compressed size in bytes, or std::nullopt if the
+  ///         data is not compressible using these settings
+  static std::optional<uint64_t> EstimateCompressedSize(
+      const std::vector<T>& input_vector,
+      AlpExponentAndFactor exponent_and_factor,
+      bool penalize_exceptions);
+
+  /// \brief Find the best exponent and factor combination for an input vector
+  ///
+  /// Iterates through all combinations in the preset and selects the one
+  /// that produces the smallest compressed size.
+  ///
+  /// \param[in] input the input vector to find the best combination for
+  /// \param[in] combinations candidate exponent/factor combinations from preset
+  /// \return the exponent and factor combination yielding best compression
+  static AlpExponentAndFactor FindBestExponentAndFactor(
+      arrow::util::span<const T> input,
+      const std::vector<AlpExponentAndFactor>& combinations);
+
+  /// \brief Helper struct to encapsulate the result from EncodeVector()
+  struct EncodingResult {
+    arrow::internal::StaticVector<SignedExactType, AlpConstants::kAlpVectorSize>
+        encoded_integers;
+    arrow::internal::StaticVector<T, AlpConstants::kAlpVectorSize> exceptions;
+    arrow::internal::StaticVector<uint16_t, AlpConstants::kAlpVectorSize>
+        exception_positions;
+    ExactType min_max_diff = 0;
+    ExactType frame_of_reference = 0;
+  };
+
+  /// \brief Encode a vector via decimal encoding and frame of reference (FOR)
+  ///
+  /// \param[in] input_vector the input vector of floating point values
+  /// \param[in] exponent_and_factor the exponent/factor for decimal encoding
+  /// \return an EncodingResult containing encoded integers, exceptions, etc.
+  static EncodingResult EncodeVector(arrow::util::span<const T> input_vector,
+                                     AlpExponentAndFactor exponent_and_factor);
+
+  /// \brief Decode a vector of integers back to floating point values
+  ///
+  /// \param[out] output_vector output buffer to write decoded floats to
+  /// \param[in] input_vector encoded integers (after bit unpacking and unFOR)
+  /// \param[in] vector_info metadata with exponent, factor, decoding params
+  /// \tparam TargetType the type that is used to store the output.
+  ///         May not be a narrowing conversion from T.
+  template <typename TargetType>
+  static void DecodeVector(TargetType* output_vector,
+                           arrow::util::span<ExactType> input_vector,
+                           AlpEncodedVectorInfo vector_info);
+
+  /// \brief Helper struct to encapsulate the result from BitPackIntegers
+  struct BitPackingResult {
+    arrow::internal::StaticVector<uint8_t, AlpConstants::kAlpVectorSize * sizeof(T)>
+        packed_integers;
+    uint8_t bit_width = 0;
+    uint64_t bit_packed_size = 0;
+  };
+
+  /// \brief Bitpack the encoded integers as the final step of compression
+  ///
+  /// Calculates the minimum bit width required and packs each value
+  /// using that many bits, resulting in tightly packed binary data.
+  ///
+  /// \param[in] integers the encoded integers (after FOR subtraction)
+  /// \param[in] min_max_diff the difference between max and min values,
+  ///            used to determine the required bit width
+  /// \return a BitPackingResult with packed bytes, bit width, and packed size
+  static BitPackingResult BitPackIntegers(
+      arrow::util::span<const SignedExactType> integers, uint64_t min_max_diff);
+
+  /// \brief Unpack bitpacked integers back to their original representation
+  ///
+  /// The result is still encoded (FOR applied) and needs decoding to get floats.
+  ///
+  /// \param[in] packed_integers the bitpacked integer data to unpack
+  /// \param[in] vector_info metadata with bit width and unpacking parameters
+  /// \return a vector of unpacked integers (still with frame of reference)
+  static arrow::internal::StaticVector<ExactType, kAlpVectorSize> BitUnpackIntegers(
+      arrow::util::span<const uint8_t> packed_integers,
+      AlpEncodedVectorInfo vector_info);
+
+  /// \brief Patch exceptions into the decoded output vector
+  ///
+  /// Replaces placeholder values at exception positions with the original
+  /// floating point values that could not be losslessly encoded.
+  ///
+  /// \param[out] output the decoded output vector to patch exceptions into
+  /// \param[in] exceptions the original floats stored as exceptions
+  /// \param[in] exception_positions indices where exceptions should be placed
+  /// \tparam TargetType the type that is used to store the output.
+  ///         May not be a narrowing conversion from T.
+  template <typename TargetType>
+  static void PatchExceptions(TargetType* output,
+                              arrow::util::span<const T> exceptions,
+                              arrow::util::span<const uint16_t> exception_positions);
+};
+
+}  // namespace alp
+}  // namespace util
+}  // namespace arrow
diff --git a/cpp/src/arrow/util/alp/AlpConstants.h b/cpp/src/arrow/util/alp/AlpConstants.h
new file mode 100644
index 00000000000..5a44e8f7a95
--- /dev/null
+++ b/cpp/src/arrow/util/alp/AlpConstants.h
@@ -0,0 +1,256 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+// Constants and type traits for ALP compression
+
+#pragma once
+
+#include <cstdint>
+
+#include "arrow/util/logging.h"
+
+namespace arrow {
+namespace util {
+namespace alp {
+
+// ----------------------------------------------------------------------
+// AlpConstants
+
+/// \brief Constants used throughout ALP compression
+class AlpConstants {
+ public:
+  /// Number of elements compressed together as a unit. Fixed for compatibility.
+  static constexpr uint64_t kAlpVectorSize = 1024;
+
+  /// Number of elements to use when determining sampling parameters.
+  static constexpr uint64_t kSamplerVectorSize = 4096;
+
+  /// Total number of elements in a rowgroup for sampling purposes.
+  static constexpr uint64_t kSamplerRowgroupSize = 122880;
+
+  /// Number of samples to collect per vector during the sampling phase.
+  static constexpr uint64_t kSamplerSamplesPerVector = 256;
+
+  /// Number of sample vectors to collect per rowgroup.
+  static constexpr uint64_t kSamplerSampleVectorsPerRowgroup = 8;
+
+  /// Version number for the ALP compression format.
+  static constexpr uint64_t kAlpVersion = 1;
+
+  /// Type used to store exception positions within a compressed vector.
+  using PositionType = uint16_t;
+
+  /// Threshold for early exit during sampling when compression quality is poor.
+  static constexpr uint8_t kSamplingEarlyExitThreshold = 4;
+
+  /// Maximum number of exponent-factor combinations to try during compression.
+  static constexpr uint8_t kMaxCombinations = 5;
+
+  /// Loop unroll factor for tight loops in ALP compression/decompression.
+  /// ALP has multiple tight loops that profit from unrolling. Setting this
+  /// might affect performance, so benchmarking is recommended.
+  static constexpr uint64_t kLoopUnrolls = 4;
+
+  /// \brief Get power of ten as uint64_t
+  ///
+  /// \param[in] power the exponent (must be <= 19)
+  /// \return 10^power as uint64_t
+  static uint64_t PowerOfTenUB8(const uint8_t power) {
+    ARROW_DCHECK(power <= 19) << "power_out_of_range: " << static_cast<int>(power);
+    static constexpr uint64_t kTable[20] = {1,
+                                            10,
+                                            100,
+                                            1'000,
+                                            10'000,
+                                            100'000,
+                                            1'000'000,
+                                            10'000'000,
+                                            100'000'000,
+                                            1'000'000'000,
+                                            10'000'000'000,
+                                            100'000'000'000,
+                                            1'000'000'000'000,
+                                            10'000'000'000'000,
+                                            100'000'000'000'000,
+                                            1'000'000'000'000'000,
+                                            10'000'000'000'000'000,
+                                            100'000'000'000'000'000,
+                                            1'000'000'000'000'000'000,
+                                            10'000'000'000'000'000'000ULL};
+
+    return kTable[power];
+  }
+
+  /// \brief Get power of ten as float
+  ///
+  /// \param[in] power the exponent (must be in range [-10, 10])
+  /// \return 10^power as float
+  static float PowerOfTenFloat(int8_t power) {
+    ARROW_DCHECK(power >= -10 && power <= 10)
+        << "power_out_of_range: " << static_cast<int>(power);
+    static constexpr float kTable[21] = {
+        0.0000000001F, 0.000000001F,  0.00000001F,   0.0000001F, 0.000001F,
+        0.00001F,      0.0001F,       0.001F,        0.01F,      0.1F,
+        1.0F,          10.0F,         100.0F,        1000.0F,    10000.0F,
+        100000.0F,     1000000.0F,    10000000.0F,   100000000.0F,
+        1000000000.0F, 10000000000.0F};
+
+    return kTable[power + 10];
+  }
+
+  /// \brief Get power of ten as double
+  ///
+  /// \param[in] power the exponent (must be in range [-20, 20])
+  /// \return 10^power as double
+  static double PowerOfTenDouble(const int8_t power) {
+    ARROW_DCHECK(power >= -20 && power <= 20)
+        << "power_out_of_range: " << static_cast<int>(power);
+    static constexpr double kTable[41] = {
+        0.00000000000000000001,
+        0.0000000000000000001,
+        0.000000000000000001,
+        0.00000000000000001,
+        0.0000000000000001,
+        0.000000000000001,
+        0.00000000000001,
+        0.0000000000001,
+        0.000000000001,
+        0.00000000001,
+        0.0000000001,
+        0.000000001,
+        0.00000001,
+        0.0000001,
+        0.000001,
+        0.00001,
+        0.0001,
+        0.001,
+        0.01,
+        0.1,
+        1.0,
+        10.0,
+        100.0,
+        1000.0,
+        10000.0,
+        100000.0,
+        1000000.0,
+        10000000.0,
+        100000000.0,
+        1000000000.0,
+        10000000000.0,
+        100000000000.0,
+        1000000000000.0,
+        10000000000000.0,
+        100000000000000.0,
+        1000000000000000.0,
+        10000000000000000.0,
+        100000000000000000.0,
+        1000000000000000000.0,
+        10000000000000000000.0,
+        100000000000000000000.0,
+    };
+    return kTable[power + 20];
+  }
+
+  /// \brief Get factor as int64_t
+  ///
+  /// \param[in] power the exponent
+  /// \return 10^power as int64_t
+  static int64_t GetFactor(const int8_t power) { return PowerOfTenUB8(power); }
+};
+
+// ----------------------------------------------------------------------
+// AlpTypedConstants
+
+/// \brief Type-specific constants for ALP compression
+/// \tparam FloatingPointType the floating point type (float or double)
+template <typename FloatingPointType>
+struct AlpTypedConstants {};
+
+/// \brief Type-specific constants for float
+template <>
+struct AlpTypedConstants<float> {
+  /// Magic number used for fast rounding of floats to nearest integer:
+  /// rounded(n) = static_cast<int32_t>(n + kMagicNumber - kMagicNumber).
+  static constexpr float kMagicNumber = 12582912.0f;  // 2^22 + 2^23
+
+  static constexpr uint8_t kMaxExponent = 10;
+
+  /// Largest float value that can be safely converted to int32.
+  static constexpr float kEncodingUpperLimit = 2147483520.0f;
+  static constexpr float kEncodingLowerLimit = -2147483520.0f;
+
+  /// \brief Get exponent multiplier
+  ///
+  /// \param[in] power the exponent
+  /// \return 10^power as float
+  static float GetExponent(const uint8_t power) {
+    return AlpConstants::PowerOfTenFloat(power);
+  }
+
+  /// \brief Get factor multiplier
+  ///
+  /// \param[in] power the factor
+  /// \return 10^(-power) as float
+  static float GetFactor(const uint8_t power) {
+    // This double cast is necessary since subtraction on int8_t does not
+    // necessarily yield an int8_t.
+    return AlpConstants::PowerOfTenFloat(
+        static_cast<int8_t>(-static_cast<int8_t>(power)));
+  }
+
+  using FloatingToExact = uint32_t;
+  using FloatingToSignedExact = int32_t;
+};
+
+/// \brief Type-specific constants for double
+template <>
+class AlpTypedConstants<double> {
+ public:
+  /// Magic number used for fast rounding of doubles to nearest integer:
+  /// rounded(n) = static_cast<int64_t>(n + kMagicNumber - kMagicNumber).
+  static constexpr double kMagicNumber = 6755399441055744.0;  // 2^51 + 2^52
+
+  static constexpr uint8_t kMaxExponent = 18;  // 10^18 is the maximum int64
+
+  /// Largest double value that can be safely converted to int64.
+  static constexpr double kEncodingUpperLimit = 9223372036854774784.0;
+  static constexpr double kEncodingLowerLimit = -9223372036854774784.0;
+
+  /// \brief Get exponent multiplier
+  ///
+  /// \param[in] power the exponent
+  /// \return 10^power as double
+  static double GetExponent(const uint8_t power) {
+    return AlpConstants::PowerOfTenDouble(power);
+  }
+
+  /// \brief Get factor multiplier
+  ///
+  /// \param[in] power the factor
+  /// \return 10^(-power) as double
+  static double GetFactor(const uint8_t power) {
+    return AlpConstants::PowerOfTenDouble(
+        static_cast<int8_t>(-static_cast<int8_t>(power)));
+  }
+
+  using FloatingToExact = uint64_t;
+  using FloatingToSignedExact = int64_t;
+};
+
+}  // namespace alp
+}  // namespace util
+}  // namespace arrow
diff --git a/cpp/src/arrow/util/alp/AlpSampler.cc b/cpp/src/arrow/util/alp/AlpSampler.cc
new file mode 100644
index 00000000000..6a2c7a90dcf
--- /dev/null
+++ b/cpp/src/arrow/util/alp/AlpSampler.cc
@@ -0,0 +1,150 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "arrow/util/alp/AlpSampler.h"
+
+#include <cmath>
+
+#include "arrow/util/alp/Alp.h"
+#include "arrow/util/alp/AlpConstants.h"
+#include "arrow/util/logging.h"
+#include "arrow/util/ubsan.h"
+
+namespace arrow {
+namespace util {
+namespace alp {
+
+// ----------------------------------------------------------------------
+// AlpSampler implementation
+
+template <typename T>
+AlpSampler<T>::AlpSampler()
+    : sample_vector_size_(AlpConstants::kSamplerVectorSize),
+      rowgroup_size_(AlpConstants::kSamplerRowgroupSize),
+      samples_per_vector_(AlpConstants::kSamplerSamplesPerVector),
+      sample_vectors_per_rowgroup_(AlpConstants::kSamplerSampleVectorsPerRowgroup),
+      rowgroup_sample_jump_((rowgroup_size_ / sample_vectors_per_rowgroup_) /
+                            sample_vector_size_) {}
+
+template <typename T>
+void AlpSampler<T>::AddSample(arrow::util::span<const T> input) {
+  for (uint64_t i = 0; i < input.size(); i += sample_vector_size_) {
+    const uint64_t elements = std::min(input.size() - i, sample_vector_size_);
+    AddSampleVector({input.data() + i, elements});
+  }
+}
+
+template <typename T>
+void AlpSampler<T>::AddSampleVector(arrow::util::span<const T> input) {
+  const bool must_skip_current_vector =
+      MustSkipSamplingFromCurrentVector(vectors_count_, vectors_sampled_count_,
+                                        input.size());
+
+  vectors_count_ += 1;
+  total_values_count_ += input.size();
+  if (must_skip_current_vector) {
+    return;
+  }
+
+  const AlpSamplingParameters sampling_params = GetAlpSamplingParameters(input.size());
+
+  // Slice: take first num_lookup_value elements.
+  std::vector<T> current_vector_values(
+      input.begin(),
+      input.begin() + std::min<size_t>(sampling_params.num_lookup_value, input.size()));
+
+  // Stride: take every num_sampled_increments-th element.
+  std::vector<T> current_vector_sample;
+  for (size_t i = 0; i < current_vector_values.size();
+       i += sampling_params.num_sampled_increments) {
+    current_vector_sample.push_back(current_vector_values[i]);
+  }
+  sample_stored_ += current_vector_sample.size();
+
+  complete_vectors_sampled_.push_back(std::move(current_vector_values));
+  rowgroup_sample_.push_back(std::move(current_vector_sample));
+  vectors_sampled_count_++;
+}
+
+template <typename T>
+typename AlpSampler<T>::AlpSamplerResult AlpSampler<T>::Finalize() {
+  ARROW_LOG(DEBUG) << "AlpSampler finalized: vectorsSampled=" << vectors_sampled_count_
+                   << "/" << vectors_count_ << " total"
+                   << ", valuesSampled=" << sample_stored_ << "/" << total_values_count_
+                   << " total";
+
+  AlpSamplerResult result;
+  result.alp_preset = AlpCompression<T>::CreateEncodingPreset(rowgroup_sample_);
+
+  ARROW_LOG(DEBUG) << "AlpSampler preset: " << result.alp_preset.combinations.size()
+                   << " exponent/factor combinations"
+                   << ", estimatedSize=" << result.alp_preset.best_compressed_size
+                   << " bytes";
+
+  return result;
+}
+
+template <typename T>
+typename AlpSampler<T>::AlpSamplingParameters AlpSampler<T>::GetAlpSamplingParameters(
+    uint64_t num_current_vector_values) {
+  const uint64_t num_lookup_values =
+      std::min(num_current_vector_values,
+               static_cast<uint64_t>(AlpConstants::kAlpVectorSize));
+  // Sample equidistant values within a vector; jump a fixed number of values.
+  const uint64_t num_sampled_increments =
+      std::max(uint64_t{1}, static_cast<uint64_t>(std::ceil(
+                                static_cast<double>(num_lookup_values) /
+                                samples_per_vector_)));
+  const uint64_t num_sampled_values =
+      std::ceil(static_cast<double>(num_lookup_values) / num_sampled_increments);
+
+  ARROW_CHECK(num_sampled_values < AlpConstants::kAlpVectorSize) << "alp_sample_too_large";
+
+  return AlpSamplingParameters{num_lookup_values, num_sampled_increments,
+                               num_sampled_values};
+}
+
+template <typename T>
+bool AlpSampler<T>::MustSkipSamplingFromCurrentVector(
+    const uint64_t vectors_count, const uint64_t vectors_sampled_count,
+    const uint64_t current_vector_n_values) {
+  // Sample equidistant vectors; skip a fixed number of vectors.
+  const bool must_select_rowgroup_samples = (vectors_count % rowgroup_sample_jump_) == 0;
+
+  // If we are not in the correct jump, do not take sample from this vector.
+  if (!must_select_rowgroup_samples) {
+    return true;
+  }
+
+  // Do not take samples of non-complete vectors (usually the last one),
+  // except in the case of too little data.
+  if (current_vector_n_values < AlpConstants::kSamplerSamplesPerVector &&
+      vectors_sampled_count != 0) {
+    return true;
+  }
+  return false;
+}
+
+// ----------------------------------------------------------------------
+// Template instantiations
+
+template class AlpSampler<float>;
+template class AlpSampler<double>;
+
+}  // namespace alp
+}  // namespace util
+}  // namespace arrow
diff --git a/cpp/src/arrow/util/alp/AlpSampler.h b/cpp/src/arrow/util/alp/AlpSampler.h
new file mode 100644
index 00000000000..5b9fdb47d44
--- /dev/null
+++ b/cpp/src/arrow/util/alp/AlpSampler.h
@@ -0,0 +1,123 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+// ALP sampler for collecting samples and creating encoding presets
+
+#pragma once
+
+#include <optional>
+#include <vector>
+
+#include "arrow/util/alp/Alp.h"
+#include "arrow/util/span.h"
+
+namespace arrow {
+namespace util {
+namespace alp {
+
+// ----------------------------------------------------------------------
+// AlpSampler
+
+/// \class AlpSampler
+/// \brief Collects samples from data to be compressed with ALP
+///
+/// Usage: Call AddSample() or AddSampleVector() multiple times to collect
+/// samples, then call Finalize() to retrieve the resulting preset.
+///
+/// \tparam T the floating point type (float or double) to sample
+template <typename T>
+class AlpSampler {
+ public:
+  /// \brief Default constructor
+  AlpSampler();
+
+  /// \brief Helper struct containing the preset for ALP compression
+  struct AlpSamplerResult {
+    AlpEncodingPreset alp_preset;
+  };
+
+  /// \brief Add a sample of arbitrary size
+  ///
+  /// The sample is internally separated into vectors on which AddSampleVector()
+  /// is called.
+  ///
+  /// \param[in] input the input data to sample from
+  void AddSample(arrow::util::span<const T> input);
+
+  /// \brief Add a single vector as a sample
+  ///
+  /// \param[in] input the input vector to add.
+  ///            Size should be <= AlpConstants::kAlpVectorSize.
+  void AddSampleVector(arrow::util::span<const T> input);
+
+  /// \brief Finalize sampling and generate the encoding preset
+  ///
+  /// \return an AlpSamplerResult containing the generated encoding preset
+  AlpSamplerResult Finalize();
+
+ private:
+  /// \brief Helper struct to encapsulate settings used for sampling
+  struct AlpSamplingParameters {
+    uint64_t num_lookup_value;
+    uint64_t num_sampled_increments;
+    uint64_t num_sampled_values;
+  };
+
+  /// \brief Calculate sampling parameters for the current vector
+  ///
+  /// \param[in] num_current_vector_values number of values in current vector
+  /// \return the sampling parameters to use
+  AlpSamplingParameters GetAlpSamplingParameters(uint64_t num_current_vector_values);
+
+  /// \brief Check if the current vector must be ignored for sampling
+  ///
+  /// \param[in] vectors_count the total number of vectors processed so far
+  /// \param[in] vectors_sampled_count the number of vectors sampled so far
+  /// \param[in] num_current_vector_values number of values in current vector
+  /// \return true if the current vector should be skipped, false otherwise
+  bool MustSkipSamplingFromCurrentVector(uint64_t vectors_count,
+                                         uint64_t vectors_sampled_count,
+                                         uint64_t num_current_vector_values);
+
+  /// Count of vectors that have been sampled
+  uint64_t vectors_sampled_count_ = 0;
+  /// Total count of values processed
+  uint64_t total_values_count_ = 0;
+  /// Total count of vectors processed
+  uint64_t vectors_count_ = 0;
+  /// Number of samples stored
+  uint64_t sample_stored_ = 0;
+  /// Samples collected from current rowgroup
+  std::vector<std::vector<T>> rowgroup_sample_;
+
+  /// Complete vectors sampled
+  std::vector<std::vector<T>> complete_vectors_sampled_;
+  /// Size of each sample vector
+  const uint64_t sample_vector_size_;
+  /// Size of each rowgroup
+  const uint64_t rowgroup_size_;
+  /// Number of samples to take per vector
+  const uint64_t samples_per_vector_;
+  /// Number of vectors to sample per rowgroup
+  const uint64_t sample_vectors_per_rowgroup_;
+  /// Jump interval for rowgroup sampling
+  const uint64_t rowgroup_sample_jump_;
+};
+
+}  // namespace alp
+}  // namespace util
+}  // namespace arrow
diff --git a/cpp/src/arrow/util/alp/AlpWrapper.cc b/cpp/src/arrow/util/alp/AlpWrapper.cc
new file mode 100644
index 00000000000..363b046a388
--- /dev/null
+++ b/cpp/src/arrow/util/alp/AlpWrapper.cc
@@ -0,0 +1,310 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "arrow/util/alp/AlpWrapper.h"
+
+#include <cmath>
+#include <optional>
+
+#include "arrow/util/alp/Alp.h"
+#include "arrow/util/alp/AlpConstants.h"
+#include "arrow/util/alp/AlpSampler.h"
+#include "arrow/util/logging.h"
+#include "arrow/util/ubsan.h"
+
+namespace arrow {
+namespace util {
+namespace alp {
+
+namespace {
+
+// ----------------------------------------------------------------------
+// CompressionBlockHeader
+
+/// \brief Header structure for ALP compression blocks
+///
+/// Contains metadata required to decompress the data.
+///
+/// Serialization format (version 1):
+///
+///   +---------------------------------------------------+
+///   |  CompressionBlockHeader (40 bytes)                |
+///   +---------------------------------------------------+
+///   |  Offset |  Field              |  Size             |
+///   +---------+---------------------+-------------------+
+///   |    0    |  version            |  8 bytes (uint64) |
+///   |    8    |  compressed_size    |  8 bytes (uint64) |
+///   |   16    |  num_elements       |  8 bytes (uint64) |
+///   |   24    |  vector_size        |  8 bytes (uint64) |
+///   |   32    |  compression_mode   |  4 bytes (enum)   |
+///   |   36    |  bit_pack_layout    |  4 bytes (enum)   |
+///   +---------------------------------------------------+
+///
+/// \note version must remain the first field to allow reading the rest
+///       of the header based on version number.
+struct CompressionBlockHeader {
+  /// Version number. Must remain the first field for version-based parsing.
+  uint64_t version = 0;
+  /// Size of the compressed data in bytes (includes header).
+  uint64_t compressed_size = 0;
+  /// Number of elements in the compressed data.
+  uint64_t num_elements = 0;
+  /// Vector size used for compression.
+  /// Must be AlpConstants::kAlpVectorSize for decompression.
+  uint64_t vector_size = 0;
+  /// Compression mode (currently only kAlp is supported).
+  AlpMode compression_mode = AlpMode::kAlp;
+  /// Bit packing layout used for bitpacking.
+  AlpBitPackLayout bit_pack_layout = AlpBitPackLayout::kNormal;
+
+  /// \brief Get the size in bytes of the CompressionBlockHeader for a version
+  ///
+  /// \param[in] v the version number
+  /// \return the size in bytes
+  static size_t GetSizeForVersion(uint64_t v) {
+    size_t size;
+    if (v == 1) {
+      size = sizeof(version) + sizeof(compressed_size) + sizeof(num_elements) +
+             sizeof(vector_size) + sizeof(compression_mode) + sizeof(bit_pack_layout);
+    } else {
+      ARROW_CHECK(false) << "unknown_version: " << v;
+    }
+    return size;
+  }
+
+  /// \brief Check whether the given version is valid
+  ///
+  /// \param[in] v the version to check
+  /// \return the version if valid, otherwise asserts
+  static uint64_t IsValidVersion(uint64_t v) {
+    if (v == 1) {
+      return v;
+    }
+    ARROW_CHECK(false) << "invalid_version: " << v;
+    return 0;  // Unreachable, but silences warning.
+  }
+};
+
+}  // namespace
+
+// ----------------------------------------------------------------------
+// AlpWrapper::CompressionBlockHeader definition
+
+template <typename T>
+struct AlpWrapper<T>::CompressionBlockHeader : public ::arrow::util::alp::CompressionBlockHeader {
+};
+
+// ----------------------------------------------------------------------
+// AlpWrapper implementation
+
+template <typename T>
+typename AlpWrapper<T>::CompressionBlockHeader AlpWrapper<T>::LoadHeader(
+    const char* comp, size_t comp_size) {
+  CompressionBlockHeader header{};
+  ARROW_CHECK(comp_size > sizeof(header.version))
+      << "alp_loadHeader_compSize_too_small_for_header_version";
+  uint64_t version;
+  std::memcpy(&version, comp, sizeof(header.version));
+  ::arrow::util::alp::CompressionBlockHeader::IsValidVersion(version);
+  ARROW_CHECK(comp_size >= ::arrow::util::alp::CompressionBlockHeader::GetSizeForVersion(version))
+      << "alp_loadHeader_compSize_too_small";
+  std::memcpy(&header, comp,
+              ::arrow::util::alp::CompressionBlockHeader::GetSizeForVersion(version));
+  return header;
+}
+
+template <typename T>
+void AlpWrapper<T>::Encode(const T* decomp, size_t decomp_size, char* comp,
+                           size_t* comp_size, std::optional<AlpMode> enforce_mode) {
+  ARROW_CHECK(decomp_size % sizeof(T) == 0) << "alp_encode_input_must_be_multiple_of_T";
+  const uint64_t element_count = decomp_size / sizeof(T);
+  const uint64_t version = ::arrow::util::alp::CompressionBlockHeader::IsValidVersion(
+      AlpConstants::kAlpVersion);
+
+  AlpSampler<T> sampler;
+  sampler.AddSample({decomp, element_count});
+  auto sampling_result = sampler.Finalize();
+
+  // Make room to store header afterwards.
+  char* encoded_header = comp;
+  comp += ::arrow::util::alp::CompressionBlockHeader::GetSizeForVersion(version);
+  const uint64_t remaining_compressed_size =
+      *comp_size - ::arrow::util::alp::CompressionBlockHeader::GetSizeForVersion(version);
+
+  const CompressionProgress compression_progress =
+      EncodeAlp(decomp, element_count, comp, remaining_compressed_size,
+                sampling_result.alp_preset);
+
+  CompressionBlockHeader header{};
+  header.version = version;
+  header.compressed_size =
+      ::arrow::util::alp::CompressionBlockHeader::GetSizeForVersion(version) +
+      compression_progress.num_compressed_bytes_produced;
+  header.num_elements = decomp_size / sizeof(T);
+  header.vector_size = AlpConstants::kAlpVectorSize;
+  header.compression_mode = AlpMode::kAlp;
+  header.bit_pack_layout = AlpBitPackLayout::kNormal;
+
+  std::memcpy(encoded_header, &header,
+              ::arrow::util::alp::CompressionBlockHeader::GetSizeForVersion(version));
+  *comp_size = header.compressed_size;
+}
+
+template <typename T>
+template <typename TargetType>
+void AlpWrapper<T>::Decode(TargetType* decomp, size_t* decomp_size, const char* comp,
+                           size_t comp_size) {
+  const CompressionBlockHeader header = LoadHeader(comp, comp_size);
+  ARROW_CHECK(header.vector_size == AlpConstants::kAlpVectorSize)
+      << "unsupported_vector_size: " << header.vector_size;
+
+  if (header.num_elements * sizeof(TargetType) > *decomp_size) {
+    *decomp_size = 0;
+    return;
+  }
+
+  const uint64_t elements_to_decode = header.num_elements;
+  const char* compression_body =
+      comp + ::arrow::util::alp::CompressionBlockHeader::GetSizeForVersion(header.version);
+  const uint64_t compression_body_size =
+      comp_size -
+      ::arrow::util::alp::CompressionBlockHeader::GetSizeForVersion(header.version);
+
+  ARROW_CHECK(header.compression_mode == AlpMode::kAlp) << "alp_decode_unsupported_mode";
+
+  uint64_t elements_decoded =
+      DecodeAlp(decomp, elements_to_decode, compression_body, compression_body_size,
+                header.bit_pack_layout)
+          .num_decompressed_elements_produced;
+  *decomp_size = elements_decoded * sizeof(TargetType);
+}
+
+template void AlpWrapper<float>::Decode(float* decomp, size_t* decomp_size,
+                                        const char* comp, size_t comp_size);
+template void AlpWrapper<float>::Decode(double* decomp, size_t* decomp_size,
+                                        const char* comp, size_t comp_size);
+template void AlpWrapper<double>::Decode(double* decomp, size_t* decomp_size,
+                                         const char* comp, size_t comp_size);
+
+template <typename T>
+template <typename TargetType>
+uint64_t AlpWrapper<T>::GetDecompressedSize(const char* comp, uint64_t comp_size) {
+  const CompressionBlockHeader header = LoadHeader(comp, comp_size);
+  return header.num_elements * sizeof(TargetType);
+}
+
+template uint64_t AlpWrapper<float>::GetDecompressedSize<float>(const char* comp,
+                                                                uint64_t comp_size);
+template uint64_t AlpWrapper<float>::GetDecompressedSize<double>(const char* comp,
+                                                                 uint64_t comp_size);
+template uint64_t AlpWrapper<double>::GetDecompressedSize<double>(const char* comp,
+                                                                  uint64_t comp_size);
+
+template <typename T>
+uint64_t AlpWrapper<T>::GetMaxCompressedSize(uint64_t decomp_size) {
+  ARROW_CHECK(decomp_size % sizeof(T) == 0)
+      << "alp_decompressed_size_not_multiple_of_T";
+  const uint64_t element_count = decomp_size / sizeof(T);
+  const uint64_t version = ::arrow::util::alp::CompressionBlockHeader::IsValidVersion(
+      AlpConstants::kAlpVersion);
+  uint64_t max_alp_size =
+      ::arrow::util::alp::CompressionBlockHeader::GetSizeForVersion(version);
+  // Add header sizes.
+  max_alp_size +=
+      sizeof(AlpEncodedVectorInfo) *
+      std::ceil(static_cast<double>(element_count) / AlpConstants::kAlpVectorSize);
+  // Worst case: everything is an exception, except two values that are chosen
+  // with large difference to make FOR encoding for placeholders impossible.
+  // Values/placeholders.
+  max_alp_size += element_count * sizeof(T);
+  // Exceptions.
+  max_alp_size += element_count * sizeof(T);
+  // Exception positions.
+  max_alp_size += element_count * sizeof(AlpConstants::PositionType);
+
+  return max_alp_size;
+}
+
+template <typename T>
+auto AlpWrapper<T>::EncodeAlp(const T* decomp, uint64_t element_count, char* comp,
+                              size_t comp_size, const AlpEncodingPreset& combinations)
+    -> CompressionProgress {
+  uint64_t output_offset = 0;
+  uint64_t input_offset = 0;
+  uint64_t remaining_output_size = comp_size;
+
+  for (uint64_t remaining_elements = element_count; remaining_elements > 0;
+       remaining_elements -= std::min(AlpConstants::kAlpVectorSize, remaining_elements)) {
+    const uint64_t elements_to_encode =
+        std::min(AlpConstants::kAlpVectorSize, remaining_elements);
+    const AlpEncodedVector<T> encoded_vector = AlpCompression<T>::CompressVector(
+        decomp + input_offset, elements_to_encode, combinations);
+
+    const uint64_t compressed_vector_size = encoded_vector.GetStoredSize();
+    if (compressed_vector_size == 0 || compressed_vector_size > remaining_output_size) {
+      return CompressionProgress{0, 0};
+    }
+
+    ARROW_CHECK(encoded_vector.GetStoredSize() <= remaining_output_size)
+        << "alp_encode_cannot_store_compressed_vector";
+
+    encoded_vector.Store({comp + output_offset, remaining_output_size});
+
+    remaining_output_size -= compressed_vector_size;
+    output_offset += compressed_vector_size;
+    input_offset += elements_to_encode;
+  }
+  return CompressionProgress{output_offset, input_offset};
+}
+
+template <typename T>
+template <typename TargetType>
+auto AlpWrapper<T>::DecodeAlp(TargetType* decomp, size_t decomp_element_count,
+                              const char* comp, size_t comp_size,
+                              AlpBitPackLayout bit_pack_layout) -> DecompressionProgress {
+  uint64_t input_offset = 0;
+  uint64_t output_offset = 0;
+  while (input_offset < comp_size && output_offset < decomp_element_count) {
+    // Use zero-copy view to avoid memory allocation and copying
+    const AlpEncodedVectorView<T> encoded_view =
+        AlpEncodedVectorView<T>::LoadView({comp + input_offset, comp_size - input_offset});
+    const uint64_t compressed_size = encoded_view.GetStoredSize();
+    const uint64_t element_count = encoded_view.vector_info.num_elements;
+
+    ARROW_CHECK(output_offset + element_count <= decomp_element_count)
+        << "alp_decode_output_too_small: " << output_offset << " vs " << element_count
+        << " vs " << decomp_element_count;
+
+    AlpCompression<T>::DecompressVectorView(encoded_view, bit_pack_layout,
+                                            decomp + output_offset);
+
+    input_offset += compressed_size;
+    output_offset += element_count;
+  }
+
+  return DecompressionProgress{output_offset, input_offset};
+}
+
+// ----------------------------------------------------------------------
+// Template instantiations
+
+template class AlpWrapper<float>;
+template class AlpWrapper<double>;
+
+}  // namespace alp
+}  // namespace util
+}  // namespace arrow
diff --git a/cpp/src/arrow/util/alp/AlpWrapper.h b/cpp/src/arrow/util/alp/AlpWrapper.h
new file mode 100644
index 00000000000..19aa2a3a4bb
--- /dev/null
+++ b/cpp/src/arrow/util/alp/AlpWrapper.h
@@ -0,0 +1,156 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+// High-level wrapper interface for ALP compression
+
+#pragma once
+
+#include <cstddef>
+#include <optional>
+
+#include "arrow/util/alp/Alp.h"
+
+namespace arrow {
+namespace util {
+namespace alp {
+
+// ----------------------------------------------------------------------
+// AlpWrapper
+
+/// \class AlpWrapper
+/// \brief High-level interface for ALP compression
+///
+/// AlpWrapper is an interface for Adaptive Lossless floating-Point Compression
+/// (ALP) (https://dl.acm.org/doi/10.1145/3626717). For encoding, it samples
+/// the data and applies decimal compression (Alp) to floating point values.
+/// This class acts as a wrapper around the vector-based interfaces of
+/// AlpSampler and Alp.
+///
+/// \tparam T the floating point type (float or double)
+template <typename T>
+class AlpWrapper {
+ public:
+  /// \brief Encode floating point values using ALP decimal compression
+  ///
+  /// \param[in] decomp pointer to the input that is to be encoded
+  /// \param[in] decomp_size size of decomp in bytes.
+  ///            This needs to be a multiple of sizeof(T).
+  /// \param[out] comp pointer to the memory region we will encode into.
+  ///             The caller is responsible for ensuring this is big enough.
+  /// \param[in,out] comp_size the actual size of the encoded data in bytes,
+  ///                expects the size of comp as input. If this is too small,
+  ///                this is set to 0 and we bail out.
+  /// \param[in] enforce_mode reserved for future use.
+  ///            Currently only AlpMode::kAlp is supported.
+  static void Encode(const T* decomp, size_t decomp_size, char* comp,
+                     size_t* comp_size,
+                     std::optional<AlpMode> enforce_mode = std::nullopt);
+
+  /// \brief Decode floating point values
+  ///
+  /// \param[out] decomp pointer to the memory region we will decode into.
+  ///             The caller is responsible for ensuring this is big enough.
+  /// \param[in,out] decomp_size the actual size of decoded data in bytes,
+  ///                expects the decomp size as input.
+  /// \param[in] comp pointer to the input that is to be decoded
+  /// \param[in] comp_size size of the input in bytes.
+  /// \tparam TargetType the type that is used to store the output.
+  ///         May not be a narrowing conversion from T.
+  template <typename TargetType>
+  static void Decode(TargetType* decomp, size_t* decomp_size, const char* comp,
+                     size_t comp_size);
+
+  /// \brief Get the decompressed size of a compression block
+  ///
+  /// Get the size of a compression block encoded previously with
+  /// AlpWrapper::Encode().
+  ///
+  /// \param[in] comp start of the memory region containing the compression block
+  /// \param[in] comp_size size of the compression block
+  /// \return the decompressed size of the block, in bytes
+  /// \tparam TargetType the type that is used to store the output.
+  ///         May not be a narrowing conversion from T.
+  template <typename TargetType>
+  static uint64_t GetDecompressedSize(const char* comp, uint64_t comp_size);
+
+  /// \brief Get the maximum compressed size of an uncompressed buffer
+  ///
+  /// \param[in] decomp_size the size of the uncompressed buffer in bytes
+  /// \return the maximum size of the compressed buffer
+  static uint64_t GetMaxCompressedSize(uint64_t decomp_size);
+
+ private:
+  struct CompressionBlockHeader;
+
+  /// \brief Tracks the progress of a compression operation
+  ///
+  /// Used to report how much data was consumed and produced during encoding.
+  struct CompressionProgress {
+    /// Number of compressed bytes written to output
+    uint64_t num_compressed_bytes_produced = 0;
+    /// Number of input elements consumed
+    uint64_t num_uncompressed_elements_taken = 0;
+  };
+
+  /// \brief Tracks the progress of a decompression operation
+  ///
+  /// Used to report how much data was consumed and produced during decoding.
+  struct DecompressionProgress {
+    /// Number of decompressed elements written
+    uint64_t num_decompressed_elements_produced = 0;
+    /// Number of compressed bytes consumed
+    uint64_t num_compressed_bytes_taken = 0;
+  };
+
+  /// \brief Compress a buffer using the ALP variant
+  ///
+  /// \param[in] decomp array of floating point numbers to compress
+  /// \param[in] element_count the number of floating point numbers
+  /// \param[out] comp the buffer to be compressed into
+  /// \param[in] comp_size the size of the compression buffer
+  /// \param[in] combinations the encoding preset to use
+  /// \return the compression progress
+  static CompressionProgress EncodeAlp(const T* decomp, uint64_t element_count,
+                                       char* comp, size_t comp_size,
+                                       const AlpEncodingPreset& combinations);
+
+  /// \brief Decompress a buffer using the ALP variant
+  ///
+  /// \param[out] decomp the buffer to be decompressed into
+  /// \param[in] decomp_element_count the number of floats to decompress
+  /// \param[in] comp the compressed buffer to be decompressed
+  /// \param[in] comp_size the size of the compressed data
+  /// \param[in] bit_pack_layout the bit packing layout used
+  /// \return the decompression progress
+  /// \tparam TargetType the type that is used to store the output.
+  ///         May not be a narrowing conversion from T.
+  template <typename TargetType>
+  static DecompressionProgress DecodeAlp(TargetType* decomp, size_t decomp_element_count,
+                                         const char* comp, size_t comp_size,
+                                         AlpBitPackLayout bit_pack_layout);
+
+  /// \brief Load the CompressionBlockHeader from compressed data
+  ///
+  /// \param[in] comp the compressed buffer
+  /// \param[in] comp_size the size of the compressed data
+  /// \return the CompressionBlockHeader from comp
+  static CompressionBlockHeader LoadHeader(const char* comp, size_t comp_size);
+};
+
+}  // namespace alp
+}  // namespace util
+}  // namespace arrow
diff --git a/cpp/src/arrow/util/alp/alp_test.cc b/cpp/src/arrow/util/alp/alp_test.cc
new file mode 100644
index 00000000000..f3a1bfd2d4a
--- /dev/null
+++ b/cpp/src/arrow/util/alp/alp_test.cc
@@ -0,0 +1,213 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include <cmath>
+#include <cstdint>
+#include <random>
+#include <vector>
+
+#include <gtest/gtest.h>
+
+#include "arrow/testing/gtest_util.h"
+#include "arrow/util/alp/Alp.h"
+#include "arrow/util/alp/AlpConstants.h"
+#include "arrow/util/alp/AlpSampler.h"
+#include "arrow/util/alp/CompressFloatAlp.h"
+#include "arrow/util/bit_stream_utils_internal.h"
+#include "arrow/util/bpacking_internal.h"
+
+namespace arrow {
+namespace util {
+namespace alp {
+
+// ============================================================================
+// ALP Constants Tests
+// ============================================================================
+
+TEST(AlpConstantsTest, SamplerConstants) {
+  EXPECT_GT(AlpConstants::kSamplerVectorSize, 0);
+  EXPECT_GT(AlpConstants::kSamplerRowgroupSize, 0);
+  EXPECT_GT(AlpConstants::kSamplerSamplesPerVector, 0);
+  EXPECT_EQ(AlpConstants::kAlpVersion, 1);
+}
+
+// ============================================================================
+// ALP Compression Tests (Float)
+// ============================================================================
+
+class AlpCompressionFloatTest : public ::testing::Test {
+ protected:
+  void TestCompressDecompressFloat(const std::vector<float>& input) {
+    AlpCompression<float> compressor;
+
+    // Compress
+    AlpEncodingPreset preset{};  // Default preset
+    auto encoded = compressor.CompressVector(input.data(), input.size(), preset);
+
+    // Decompress
+    std::vector<float> output(input.size());
+    compressor.DecompressVector(encoded, AlpBitPackLayout::kNormal, output.data());
+
+    // Verify
+    ASSERT_EQ(output.size(), input.size());
+    for (size_t i = 0; i < input.size(); ++i) {
+      EXPECT_FLOAT_EQ(output[i], input[i]) << "Mismatch at index " << i;
+    }
+  }
+};
+
+TEST_F(AlpCompressionFloatTest, SimpleSequence) {
+  std::vector<float> input(64);
+  for (size_t i = 0; i < input.size(); ++i) {
+    input[i] = static_cast<float>(i + 1);
+  }
+  TestCompressDecompressFloat(input);
+}
+
+TEST_F(AlpCompressionFloatTest, DecimalValues) {
+  std::vector<float> input(64);
+  for (size_t i = 0; i < input.size(); ++i) {
+    input[i] = static_cast<float>(i) + 0.5f;
+  }
+  TestCompressDecompressFloat(input);
+}
+
+TEST_F(AlpCompressionFloatTest, SmallValues) {
+  std::vector<float> input(64);
+  for (size_t i = 0; i < input.size(); ++i) {
+    input[i] = 0.001f * (i + 1);
+  }
+  TestCompressDecompressFloat(input);
+}
+
+TEST_F(AlpCompressionFloatTest, MixedValues) {
+  std::vector<float> input = {100.5f,       200.25f,       300.125f,   400.0625f,
+                              500.03125f,   600.015625f,   700.0078125f,
+                              800.00390625f};
+  TestCompressDecompressFloat(input);
+}
+
+TEST_F(AlpCompressionFloatTest, RandomValues) {
+  std::mt19937 rng(42);
+  std::uniform_real_distribution<float> dist(0.0f, 1000.0f);
+
+  std::vector<float> input(64);
+  for (auto& v : input) {
+    v = dist(rng);
+  }
+
+  TestCompressDecompressFloat(input);
+}
+
+// ============================================================================
+// ALP Compression Tests (Double)
+// ============================================================================
+
+class AlpCompressionDoubleTest : public ::testing::Test {
+ protected:
+  void TestCompressDecompressDouble(const std::vector<double>& input) {
+    AlpCompression<double> compressor;
+
+    // Compress
+    AlpEncodingPreset preset{};  // Default preset
+    auto encoded = compressor.CompressVector(input.data(), input.size(), preset);
+
+    // Decompress
+    std::vector<double> output(input.size());
+    compressor.DecompressVector(encoded, AlpBitPackLayout::kNormal, output.data());
+
+    // Verify
+    ASSERT_EQ(output.size(), input.size());
+    for (size_t i = 0; i < input.size(); ++i) {
+      EXPECT_DOUBLE_EQ(output[i], input[i]) << "Mismatch at index " << i;
+    }
+  }
+};
+
+TEST_F(AlpCompressionDoubleTest, SimpleSequence) {
+  std::vector<double> input(64);
+  for (size_t i = 0; i < input.size(); ++i) {
+    input[i] = static_cast<double>(i + 1);
+  }
+  TestCompressDecompressDouble(input);
+}
+
+TEST_F(AlpCompressionDoubleTest, HighPrecision) {
+  std::vector<double> input(64);
+  for (size_t i = 0; i < input.size(); ++i) {
+    input[i] = 1.123456789 * (i + 1);
+  }
+  TestCompressDecompressDouble(input);
+}
+
+TEST_F(AlpCompressionDoubleTest, VerySmallValues) {
+  std::vector<double> input(64);
+  for (size_t i = 0; i < input.size(); ++i) {
+    input[i] = 1e-10 * (i + 1);
+  }
+  TestCompressDecompressDouble(input);
+}
+
+// ============================================================================
+// Integration Tests
+// ============================================================================
+
+TEST(AlpIntegrationTest, LargeFloatDataset) {
+  std::mt19937 rng(12345);
+  std::uniform_real_distribution<float> dist(-1000.0f, 1000.0f);
+
+  std::vector<float> input(1024);
+  for (auto& v : input) {
+    v = dist(rng);
+  }
+
+  AlpCompression<float> compressor;
+  AlpEncodingPreset preset{};
+  auto encoded = compressor.CompressVector(input.data(), input.size(), preset);
+
+  std::vector<float> output(input.size());
+  compressor.DecompressVector(encoded, AlpBitPackLayout::kNormal, output.data());
+
+  for (size_t i = 0; i < input.size(); ++i) {
+    EXPECT_FLOAT_EQ(output[i], input[i]);
+  }
+}
+
+TEST(AlpIntegrationTest, LargeDoubleDataset) {
+  std::mt19937 rng(12345);
+  std::uniform_real_distribution<double> dist(-1000.0, 1000.0);
+
+  std::vector<double> input(1024);
+  for (auto& v : input) {
+    v = dist(rng);
+  }
+
+  AlpCompression<double> compressor;
+  AlpEncodingPreset preset{};
+  auto encoded = compressor.CompressVector(input.data(), input.size(), preset);
+
+  std::vector<double> output(input.size());
+  compressor.DecompressVector(encoded, AlpBitPackLayout::kNormal, output.data());
+
+  for (size_t i = 0; i < input.size(); ++i) {
+    EXPECT_DOUBLE_EQ(output[i], input[i]);
+  }
+}
+
+}  // namespace alp
+}  // namespace util
+}  // namespace arrow
diff --git a/cpp/src/arrow/util/small_vector.h b/cpp/src/arrow/util/small_vector.h
index f371e647152..90dcb111a41 100644
--- a/cpp/src/arrow/util/small_vector.h
+++ b/cpp/src/arrow/util/small_vector.h
@@ -457,6 +457,22 @@ class StaticVectorImpl {
     }
   }
 
+  // Unsafe resize without initialization - use only when you will immediately
+  // overwrite the memory (e.g., before memcpy). Only safe for POD types.
+  void UnsafeResize(size_t n) {
+    const size_t old_size = storage_.size_;
+    if (n > storage_.size_) {
+      storage_.bump_size(n - old_size);
+      // No construction - caller must initialize!
+    } else {
+      auto* p = storage_.storage_ptr();
+      for (size_t i = n; i < old_size; ++i) {
+        p[i].destroy();
+      }
+      storage_.reduce_size(old_size - n);
+    }
+  }
+
  private:
   template <typename InputIt>
   void init_by_copying(size_t n, InputIt src) {
diff --git a/cpp/src/arrow/util/type_fwd.h b/cpp/src/arrow/util/type_fwd.h
index b8934ecbd4c..5ba696104bb 100644
--- a/cpp/src/arrow/util/type_fwd.h
+++ b/cpp/src/arrow/util/type_fwd.h
@@ -55,7 +55,8 @@ struct Compression {
     LZ4_FRAME,
     LZO,
     BZ2,
-    LZ4_HADOOP
+    LZ4_HADOOP,
+    ALP
   };
 };
 
diff --git a/cpp/src/parquet/CMakeLists.txt b/cpp/src/parquet/CMakeLists.txt
index dc7d40d2a38..92a75bcbd2e 100644
--- a/cpp/src/parquet/CMakeLists.txt
+++ b/cpp/src/parquet/CMakeLists.txt
@@ -442,6 +442,7 @@ add_parquet_benchmark(bloom_filter_benchmark SOURCES bloom_filter_benchmark.cc
 add_parquet_benchmark(column_reader_benchmark)
 add_parquet_benchmark(column_io_benchmark)
 add_parquet_benchmark(encoding_benchmark)
+add_parquet_benchmark(encoding_alp_benchmark)
 add_parquet_benchmark(level_conversion_benchmark)
 add_parquet_benchmark(metadata_benchmark)
 add_parquet_benchmark(page_index_benchmark SOURCES page_index_benchmark.cc
diff --git a/cpp/src/parquet/column_reader.cc b/cpp/src/parquet/column_reader.cc
index 79b837f755c..9dbdabe9b2f 100644
--- a/cpp/src/parquet/column_reader.cc
+++ b/cpp/src/parquet/column_reader.cc
@@ -859,6 +859,7 @@ class ColumnReaderImplBase {
       switch (encoding) {
         case Encoding::PLAIN:
         case Encoding::BYTE_STREAM_SPLIT:
+        case Encoding::ALP:
         case Encoding::RLE:
         case Encoding::DELTA_BINARY_PACKED:
         case Encoding::DELTA_BYTE_ARRAY:
diff --git a/cpp/src/parquet/decoder.cc b/cpp/src/parquet/decoder.cc
index 3ce2323d29a..885477e513c 100644
--- a/cpp/src/parquet/decoder.cc
+++ b/cpp/src/parquet/decoder.cc
@@ -21,6 +21,7 @@
 #include <cstdint>
 #include <cstdlib>
 #include <cstring>
+#include <iostream>
 #include <limits>
 #include <memory>
 #include <string>
@@ -40,6 +41,9 @@
 #include "arrow/util/bit_util.h"
 #include "arrow/util/bitmap_ops.h"
 #include "arrow/util/byte_stream_split_internal.h"
+#include "arrow/util/alp/Alp.h"
+#include "arrow/util/alp/AlpConstants.h"
+#include "arrow/util/alp/AlpWrapper.h"
 #include "arrow/util/checked_cast.h"
 #include "arrow/util/int_util_overflow.h"
 #include "arrow/util/logging_internal.h"
@@ -2323,6 +2327,124 @@ class ByteStreamSplitDecoder<FLBAType> : public ByteStreamSplitDecoderBase<FLBAT
   }
 };
 
+// ----------------------------------------------------------------------
+// ALP decoder (Adaptive Lossless floating-Point)
+
+template <typename DType>
+class AlpDecoder : public TypedDecoderImpl<DType> {
+ public:
+  using Base = TypedDecoderImpl<DType>;
+  using T = typename DType::c_type;
+
+  explicit AlpDecoder(const ColumnDescriptor* descr)
+      : Base(descr, Encoding::ALP), current_offset_{0}, needs_decode_{false} {
+    static_assert(std::is_same<T, float>::value || std::is_same<T, double>::value,
+                  "ALP only supports float and double types");
+  }
+
+  void SetData(int num_values, const uint8_t* data, int len) final {
+    Base::SetData(num_values, data, len);
+    current_offset_ = 0;
+    needs_decode_ = (len > 0 && num_values > 0);
+    decoded_buffer_.clear();
+  }
+
+  int Decode(T* buffer, int max_values) override {
+    // Fast path: decode directly into output buffer if requesting all values
+    if (needs_decode_ && max_values >= this->num_values_) {
+      size_t decompSize = this->num_values_ * sizeof(T);
+      ::arrow::util::alp::AlpWrapper<T>::Decode(
+          buffer, &decompSize,
+          reinterpret_cast<const char*>(this->data_), this->len_);
+
+      const int decoded = this->num_values_;
+      this->num_values_ = 0;
+      needs_decode_ = false;
+      return decoded;
+    }
+
+    // Slow path: partial read - decode to intermediate buffer
+    // ALP Bit unpacker needs batches of 64
+    if (needs_decode_) {
+      decoded_buffer_.resize(this->num_values_);
+      size_t decompSize = this->num_values_ * sizeof(T);
+      ::arrow::util::alp::AlpWrapper<T>::Decode(
+          decoded_buffer_.data(), &decompSize,
+          reinterpret_cast<const char*>(this->data_), this->len_);
+      needs_decode_ = false;
+    }
+
+    // Copy from intermediate buffer
+    const int values_to_decode = std::min(
+        max_values,
+        static_cast<int>(decoded_buffer_.size() - current_offset_));
+
+    if (values_to_decode > 0) {
+      std::memcpy(buffer, decoded_buffer_.data() + current_offset_,
+                  values_to_decode * sizeof(T));
+      current_offset_ += values_to_decode;
+      this->num_values_ -= values_to_decode;
+    }
+
+    return values_to_decode;
+  }
+
+  int DecodeArrow(int num_values, int null_count, const uint8_t* valid_bits,
+                  int64_t valid_bits_offset,
+                  typename EncodingTraits<DType>::Accumulator* builder) override {
+    const int values_to_decode = num_values - null_count;
+    if (ARROW_PREDICT_FALSE(this->num_values_ < values_to_decode)) {
+      ParquetException::EofException("ALP DecodeArrow: Not enough values available. "
+                                      "Available: " + std::to_string(this->num_values_) +
+                                      ", Requested: " + std::to_string(values_to_decode));
+    }
+
+    // Decode if needed (DecodeArrow always needs intermediate buffer for nulls)
+    if (needs_decode_) {
+      decoded_buffer_.resize(this->num_values_);
+      size_t decompSize = this->num_values_ * sizeof(T);
+      ::arrow::util::alp::AlpWrapper<T>::Decode(
+          decoded_buffer_.data(), &decompSize,
+          reinterpret_cast<const char*>(this->data_), this->len_);
+      needs_decode_ = false;
+    }
+
+    if (null_count == 0) {
+      // Fast path: no nulls
+      PARQUET_THROW_NOT_OK(builder->AppendValues(
+          decoded_buffer_.data() + current_offset_, values_to_decode));
+      current_offset_ += values_to_decode;
+      this->num_values_ -= values_to_decode;
+      return values_to_decode;
+    } else {
+      // Slow path: with nulls
+      int value_idx = 0;
+      for (int i = 0; i < num_values; ++i) {
+        if (::arrow::bit_util::GetBit(valid_bits, valid_bits_offset + i)) {
+          PARQUET_THROW_NOT_OK(builder->Append(decoded_buffer_[current_offset_ + value_idx]));
+          ++value_idx;
+        } else {
+          PARQUET_THROW_NOT_OK(builder->AppendNull());
+        }
+      }
+      current_offset_ += values_to_decode;
+      this->num_values_ -= values_to_decode;
+      return values_to_decode;
+    }
+  }
+
+  int DecodeArrow(int num_values, int null_count, const uint8_t* valid_bits,
+                  int64_t valid_bits_offset,
+                  typename EncodingTraits<DType>::DictAccumulator* builder) override {
+    ParquetException::NYI("DecodeArrow to DictAccumulator for ALP");
+  }
+
+ private:
+  std::vector<T> decoded_buffer_;
+  size_t current_offset_;
+  bool needs_decode_;
+};
+
 }  // namespace
 
 // ----------------------------------------------------------------------
@@ -2369,6 +2491,15 @@ std::unique_ptr<Decoder> MakeDecoder(Type::type type_num, Encoding::type encodin
             "BYTE_STREAM_SPLIT only supports FLOAT, DOUBLE, INT32, INT64 "
             "and FIXED_LEN_BYTE_ARRAY");
     }
+  } else if (encoding == Encoding::ALP) {
+    switch (type_num) {
+      case Type::FLOAT:
+        return std::make_unique<AlpDecoder<FloatType>>(descr);
+      case Type::DOUBLE:
+        return std::make_unique<AlpDecoder<DoubleType>>(descr);
+      default:
+        throw ParquetException("ALP encoding only supports FLOAT and DOUBLE");
+    }
   } else if (encoding == Encoding::DELTA_BINARY_PACKED) {
     switch (type_num) {
       case Type::INT32:
diff --git a/cpp/src/parquet/encoder.cc b/cpp/src/parquet/encoder.cc
index 04f079ce70c..ad9d1cd64eb 100644
--- a/cpp/src/parquet/encoder.cc
+++ b/cpp/src/parquet/encoder.cc
@@ -20,6 +20,7 @@
 #include <algorithm>
 #include <cstdint>
 #include <cstdlib>
+#include <iostream>
 #include <limits>
 #include <memory>
 #include <string>
@@ -35,6 +36,9 @@
 #include "arrow/util/bit_util.h"
 #include "arrow/util/bitmap_ops.h"
 #include "arrow/util/byte_stream_split_internal.h"
+#include "arrow/util/alp/Alp.h"
+#include "arrow/util/alp/AlpConstants.h"
+#include "arrow/util/alp/AlpWrapper.h"
 #include "arrow/util/checked_cast.h"
 #include "arrow/util/hashing.h"
 #include "arrow/util/int_util_overflow.h"
@@ -995,6 +999,90 @@ class ByteStreamSplitEncoder<FLBAType> : public ByteStreamSplitEncoderBase<FLBAT
   }
 };
 
+// ----------------------------------------------------------------------
+// ALP encoder (Adaptive Lossless floating-Point)
+
+template <typename DType>
+class AlpEncoder : public EncoderImpl, virtual public TypedEncoder<DType> {
+ public:
+  using T = typename DType::c_type;
+  using ArrowType = typename EncodingTraits<DType>::ArrowType;
+  using TypedEncoder<DType>::Put;
+
+  explicit AlpEncoder(const ColumnDescriptor* descr,
+                      ::arrow::MemoryPool* pool = ::arrow::default_memory_pool())
+      : EncoderImpl(descr, Encoding::ALP, pool),
+        sink_{pool} {
+    static_assert(std::is_same<T, float>::value || std::is_same<T, double>::value,
+                  "ALP only supports float and double types");
+  }
+
+  int64_t EstimatedDataEncodedSize() override { return sink_.length(); }
+
+  std::shared_ptr<Buffer> FlushValues() override {
+    if (sink_.length() == 0) {
+      // Empty buffer case
+      PARQUET_ASSIGN_OR_THROW(auto buf, sink_.Finish());
+      return buf;
+    }
+
+    // Call AlpWrapper::Encode() - it handles sampling, preset selection, and compression
+    const size_t decompSize = sink_.length();
+    size_t compSize = ::arrow::util::alp::AlpWrapper<T>::GetMaxCompressedSize(decompSize);
+
+    PARQUET_ASSIGN_OR_THROW(
+        auto compressed_buffer,
+        ::arrow::AllocateResizableBuffer(compSize, this->memory_pool()));
+
+    ::arrow::util::alp::AlpWrapper<T>::Encode(
+        reinterpret_cast<const T*>(sink_.data()),
+        decompSize,
+        reinterpret_cast<char*>(compressed_buffer->mutable_data()),
+        &compSize);
+
+    PARQUET_THROW_NOT_OK(compressed_buffer->Resize(compSize));
+    sink_.Reset();
+
+    return std::shared_ptr<Buffer>(std::move(compressed_buffer));
+  }
+
+  void Put(const T* buffer, int num_values) override {
+    if (num_values > 0) {
+      PARQUET_THROW_NOT_OK(
+          sink_.Append(reinterpret_cast<const uint8_t*>(buffer),
+                       num_values * static_cast<int64_t>(sizeof(T))));
+    }
+  }
+
+  void PutSpaced(const T* src, int num_values, const uint8_t* valid_bits,
+                 int64_t valid_bits_offset) override {
+    if (valid_bits != NULLPTR) {
+      PARQUET_ASSIGN_OR_THROW(auto buffer, ::arrow::AllocateBuffer(num_values * sizeof(T),
+                                                                   this->memory_pool()));
+      T* data = buffer->template mutable_data_as<T>();
+      const int num_valid_values = ::arrow::util::internal::SpacedCompress<T>(
+          src, num_values, valid_bits, valid_bits_offset, data);
+      Put(data, num_valid_values);
+    } else {
+      Put(src, num_values);
+    }
+  }
+
+  void Put(const ::arrow::Array& values) override {
+    if (values.type_id() != ArrowType::type_id) {
+      throw ParquetException(std::string() + "direct put from " +
+                             values.type()->ToString() + " not supported");
+    }
+    const auto& data = *values.data();
+    this->PutSpaced(data.GetValues<typename ArrowType::c_type>(1),
+                    static_cast<int>(data.length), data.GetValues<uint8_t>(0, 0),
+                    data.offset);
+  }
+
+ private:
+  ::arrow::BufferBuilder sink_;
+};
+
 // ----------------------------------------------------------------------
 // DELTA_BINARY_PACKED encoder
 
@@ -1816,6 +1904,15 @@ std::unique_ptr<Encoder> MakeEncoder(Type::type type_num, Encoding::type encodin
             "BYTE_STREAM_SPLIT only supports FLOAT, DOUBLE, INT32, INT64 "
             "and FIXED_LEN_BYTE_ARRAY");
     }
+  } else if (encoding == Encoding::ALP) {
+    switch (type_num) {
+      case Type::FLOAT:
+        return std::make_unique<AlpEncoder<FloatType>>(descr, pool);
+      case Type::DOUBLE:
+        return std::make_unique<AlpEncoder<DoubleType>>(descr, pool);
+      default:
+        throw ParquetException("ALP encoding only supports FLOAT and DOUBLE");
+    }
   } else if (encoding == Encoding::DELTA_BINARY_PACKED) {
     switch (type_num) {
       case Type::INT32:
diff --git a/cpp/src/parquet/encoding_alp_benchmark.cc b/cpp/src/parquet/encoding_alp_benchmark.cc
new file mode 100644
index 00000000000..7bea2a64914
--- /dev/null
+++ b/cpp/src/parquet/encoding_alp_benchmark.cc
@@ -0,0 +1,1824 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include <chrono>
+#include <cmath>
+#include <cstring>
+#include <filesystem>
+#include <fstream>
+#include <iostream>
+#include <memory>
+#include <random>
+#include <sstream>
+#include <unistd.h>
+#include <unordered_set>
+#include <vector>
+
+#include <benchmark/benchmark.h>
+
+#include "arrow/buffer.h"
+#include "arrow/util/alp/AlpWrapper.h"
+#include "arrow/util/compression.h"
+#include "parquet/encoding.h"
+#include "parquet/schema.h"
+#include "parquet/types.h"
+
+// This file benchmarks multiple encoding schemes for floating point values in
+// Parquet. Structure mirrors Snowflake's FloatComprBenchmark.cpp
+//
+// It evaluates:
+// 1) Compression Ratio
+// 2) Encoding Speed
+// 3) Decoding Speed
+//
+// Encoding schemes:
+// 1) ALP encoding
+// 2) ByteStreamSplit encoding
+// 3) ZSTD compression
+//
+// On synthetic datasets:
+// 1) Constant Value
+// 2) Increasing values
+// 3) Small Range decimal
+// 4) Range decimal
+// 5) Large Range decimal
+// 6) Random values
+//
+// And real-world datasets:
+// 1) floatingpoint_spotify1.csv (9 columns)
+// 2) floatingpoint_spotify2.csv (9 columns)
+// 3) floatingpoint_citytemperature.csv (1 column)
+// 4) floatingpoint_poi.csv (2 columns)
+// 5) floatingpoint_birdmigration.csv (1 column)
+// 6) floatingpoint_commongovernment.csv (3 columns)
+// 7) floatingpoint_arade.csv (4 columns)
+// 8) floatingpoint_num_brain.csv (1 column)
+// 9) floatingpoint_num_comet.csv (1 column)
+// 10) floatingpoint_num_control.csv (1 column)
+// 11) floatingpoint_num_plasma.csv (1 column)
+// 12) floatingpoint_obs_error.csv (1 column)
+// 13) floatingpoint_obs_info.csv (1 column)
+// 14) floatingpoint_obs_spitzer.csv (1 column)
+// 15) floatingpoint_obs_temp.csv (1 column)
+// 16) floatingpoint_msg_bt.csv (1 column)
+// 17) floatingpoint_msg_lu.csv (1 column)
+// 18) floatingpoint_msg_sp.csv (1 column)
+// 19) floatingpoint_msg_sppm.csv (1 column)
+// 20) floatingpoint_msg_sweep3d.csv (1 column)
+
+namespace parquet {
+
+using schema::PrimitiveNode;
+
+// Helper function matching Snowflake's pow10
+constexpr uint64_t Pow10(uint64_t exp) {
+  uint64_t result = 1;
+  for (uint64_t i = 0; i < exp; ++i) {
+    result *= 10;
+  }
+  return result;
+}
+
+// Encoding type enum (matching Snowflake's ComprEngine pattern)
+enum class EncodingType {
+  kALP,
+  kByteStreamSplit,
+  kZSTD,
+};
+
+// Helper to create column descriptor for float/double
+template <typename DType>
+std::shared_ptr<ColumnDescriptor> MakeColumnDescriptor() {
+  auto node = PrimitiveNode::Make("column", Repetition::REQUIRED, DType::type_num);
+  return std::make_shared<ColumnDescriptor>(node, false, false);
+}
+
+// ============================================================================
+// Benchmark data base class
+// ============================================================================
+
+/// \brief Helper class to set up encoding benchmark data.
+///
+/// Matches Snowflake's RealComprBenchmarkData<T> structure with encoding parameter.
+template <typename T>
+struct RealComprBenchmarkData {
+  std::vector<T> input_uncompressed;
+  std::shared_ptr<Buffer> encoded_data;
+  std::vector<T> output_uncompressed;
+  uint64_t encoded_size = 0;
+  Encoding::type current_encoding;
+  std::unique_ptr<::arrow::util::Codec> codec;  // For ZSTD
+
+  virtual ~RealComprBenchmarkData() = default;
+
+  void PrepareBenchmarkData(uint64_t element_count, EncodingType encoding_type) {
+    FillUncompressedInput(element_count);
+
+    using DType =
+        typename std::conditional<std::is_same<T, float>::value, FloatType,
+                                  DoubleType>::type;
+    auto descr = MakeColumnDescriptor<DType>();
+
+    // Select encoding based on type
+    switch (encoding_type) {
+      case EncodingType::kALP:
+        current_encoding = Encoding::ALP;
+        break;
+      case EncodingType::kByteStreamSplit:
+        current_encoding = Encoding::BYTE_STREAM_SPLIT;
+        codec = ::arrow::util::Codec::Create(::arrow::Compression::ZSTD).ValueOrDie();
+        break;
+      case EncodingType::kZSTD:
+        // ZSTD uses PLAIN encoding + compression
+        current_encoding = Encoding::PLAIN;
+        codec = ::arrow::util::Codec::Create(::arrow::Compression::ZSTD).ValueOrDie();
+        break;
+    }
+
+    // Do initial encoding to size buffers
+    if (encoding_type == EncodingType::kALP) {
+      auto encoder = MakeTypedEncoder<DType>(Encoding::ALP, false, descr.get());
+      encoder->Put(input_uncompressed.data(),
+                   static_cast<int>(input_uncompressed.size()));
+      encoded_data = encoder->FlushValues();
+      encoded_size = encoded_data->size();
+    } else if (encoding_type == EncodingType::kZSTD) {
+      // For ZSTD: Plain encode then compress
+      auto encoder = MakeTypedEncoder<DType>(Encoding::PLAIN, false, descr.get());
+      encoder->Put(input_uncompressed.data(),
+                   static_cast<int>(input_uncompressed.size()));
+      auto plain_data = encoder->FlushValues();
+
+      // Compress with ZSTD - use AllocateBuffer to properly manage memory
+      int64_t max_compressed_len =
+          codec->MaxCompressedLen(plain_data->size(), plain_data->data());
+      auto compressed_buffer =
+          ::arrow::AllocateResizableBuffer(max_compressed_len).ValueOrDie();
+      int64_t actual_size =
+          codec
+              ->Compress(plain_data->size(), plain_data->data(), max_compressed_len,
+                         compressed_buffer->mutable_data())
+              .ValueOrDie();
+      // Resize to actual compressed size and move to shared_ptr
+      (void)compressed_buffer->Resize(actual_size);  // Resize can't fail for shrinking
+      encoded_data = std::shared_ptr<Buffer>(std::move(compressed_buffer));
+      encoded_size = actual_size;
+    } else {
+      // For ByteStreamSplit: Direct encoding
+      auto encoder = MakeTypedEncoder<DType>(current_encoding, false, descr.get());
+      encoder->Put(input_uncompressed.data(),
+                   static_cast<int>(input_uncompressed.size()));
+      auto byte_stream_split_data = encoder->FlushValues();
+      // Compress with ZSTD - use AllocateBuffer to properly manage memory
+      int64_t max_compressed_len = codec->MaxCompressedLen(
+          byte_stream_split_data->size(), byte_stream_split_data->data());
+      auto compressed_buffer =
+          ::arrow::AllocateResizableBuffer(max_compressed_len).ValueOrDie();
+      int64_t actual_size =
+          codec
+              ->Compress(byte_stream_split_data->size(), byte_stream_split_data->data(),
+                         max_compressed_len, compressed_buffer->mutable_data())
+              .ValueOrDie();
+      // Resize to actual compressed size and move to shared_ptr
+      (void)compressed_buffer->Resize(actual_size);  // Resize can't fail for shrinking
+      encoded_data = std::shared_ptr<Buffer>(std::move(compressed_buffer));
+      encoded_size = actual_size;
+    }
+
+    // Prepare output buffer
+    output_uncompressed.resize(input_uncompressed.size());
+  }
+
+  virtual void FillUncompressedInput(uint64_t element_count) = 0;
+};
+
+// ============================================================================
+// Synthetic Data Generators
+// ============================================================================
+
+template <typename T>
+struct ConstantValues : public RealComprBenchmarkData<T> {
+  void FillUncompressedInput(uint64_t element_count) override {
+    const T value = static_cast<T>(1.1);
+    this->input_uncompressed = std::vector<T>(element_count, value);
+  }
+};
+
+template <typename T>
+struct IncreasingValues : public RealComprBenchmarkData<T> {
+  void FillUncompressedInput(uint64_t element_count) override {
+    this->input_uncompressed.resize(element_count);
+    T current_value = 0.0;
+    for (uint64_t i = 0; i < element_count; i++) {
+      this->input_uncompressed[i] = current_value;
+      current_value += 1.0;
+    }
+  }
+};
+
+template <typename T>
+struct DecimalSmallRange : public RealComprBenchmarkData<T> {
+  void FillUncompressedInput(uint64_t element_count) override {
+    this->input_uncompressed.resize(element_count);
+    const uint64_t min_val = 100;
+    const uint64_t max_val = 1000;
+    const uint64_t decimal_places = 2;
+    const uint64_t mult = Pow10(decimal_places);
+
+    std::uniform_int_distribution<uint64_t> unif(min_val * mult, max_val * mult);
+    std::default_random_engine re;
+    for (uint64_t i = 0; i < element_count; i++) {
+      this->input_uncompressed[i] = unif(re) * 1.0 / mult;
+    }
+  }
+};
+
+template <typename T>
+struct DecimalRange : public RealComprBenchmarkData<T> {
+  void FillUncompressedInput(uint64_t element_count) override {
+    this->input_uncompressed.resize(element_count);
+    const uint64_t min_val = 1000;
+    const uint64_t max_val = 100000;
+    const uint64_t decimal_places = 6;
+    const uint64_t mult = Pow10(decimal_places);
+
+    std::uniform_int_distribution<uint64_t> unif(min_val * mult, max_val * mult);
+    std::default_random_engine re;
+    for (uint64_t i = 0; i < element_count; i++) {
+      this->input_uncompressed[i] = unif(re) * 1.0 / mult;
+    }
+  }
+};
+
+template <typename T>
+struct DecimalLargeRange : public RealComprBenchmarkData<T> {
+  void FillUncompressedInput(uint64_t element_count) override {
+    this->input_uncompressed.resize(element_count);
+    const uint64_t min_val = 1000;
+    const uint64_t max_val = 1000000;
+    const uint64_t decimal_places = 6;
+    const uint64_t mult = Pow10(decimal_places);
+
+    std::uniform_int_distribution<uint64_t> unif(min_val * mult, max_val * mult);
+    std::default_random_engine re;
+    for (uint64_t i = 0; i < element_count; i++) {
+      this->input_uncompressed[i] = unif(re) * 1.0 / mult;
+    }
+  }
+};
+
+template <typename T>
+struct RandomValues : public RealComprBenchmarkData<T> {
+  void FillUncompressedInput(uint64_t element_count) override {
+    this->input_uncompressed.resize(element_count);
+    std::uniform_real_distribution<T> unif(std::numeric_limits<T>::min(),
+                                           std::numeric_limits<T>::max());
+    std::default_random_engine re;
+    for (uint64_t i = 0; i < element_count; i++) {
+      this->input_uncompressed[i] = unif(re);
+    }
+  }
+};
+
+// ============================================================================
+// CSV Loading Infrastructure (for real-world datasets)
+// ============================================================================
+
+// Extract tarball once and return the data directory path
+std::string GetDataDirectory() {
+  static std::string data_dir;
+  static bool initialized = false;
+
+  if (!initialized) {
+    // Find the tarball location relative to this source file
+    std::string tarball_path = std::string(__FILE__);
+    tarball_path = tarball_path.substr(0, tarball_path.find_last_of("/\\"));
+    tarball_path = tarball_path.substr(0, tarball_path.find_last_of("/\\"));
+
+    tarball_path += "/../submodules/parquet-testing/data/floatingpoint_data.tar.gz";
+
+    // Use a fixed extraction directory that can be reused across runs
+    data_dir = "/tmp/parquet_alp_benchmark_data";
+
+    // Check if tarball exists
+    std::ifstream tarball_check(tarball_path);
+    if (!tarball_check.good()) {
+      // Fall back to original directory if tarball not found
+      data_dir = std::string(__FILE__);
+      data_dir = data_dir.substr(0, data_dir.find_last_of("/\\"));
+      data_dir = data_dir.substr(0, data_dir.find_last_of("/\\"));
+      data_dir += "/../submodules/parquet-testing/data";
+      initialized = true;
+      return data_dir;
+    }
+
+    // Check if extraction directory already exists and has files
+    std::ifstream check_file(data_dir + "/floatingpoint_spotify1.csv");
+    if (check_file.good()) {
+      // Directory already exists with data, reuse it
+      initialized = true;
+      return data_dir;
+    }
+
+    // Create extraction directory and extract tarball
+    std::string mkdir_cmd = "mkdir -p " + data_dir;
+    std::string extract_cmd = "tar -xzf " + tarball_path + " -C " + data_dir;
+
+    if (system(mkdir_cmd.c_str()) == 0 && system(extract_cmd.c_str()) == 0) {
+      initialized = true;
+    } else {
+      // Extraction failed, fall back to original directory
+      data_dir = std::string(__FILE__);
+      data_dir = data_dir.substr(0, data_dir.find_last_of("/\\"));
+      data_dir = data_dir.substr(0, data_dir.find_last_of("/\\"));
+      data_dir += "/../submodules/parquet-testing/data";
+      initialized = true;
+    }
+  }
+
+  return data_dir;
+}
+
+std::vector<std::string> SplitCsvRow(const std::string& line, char delimiter = ',') {
+  std::vector<std::string> columns;
+  std::istringstream stream(line);
+  std::string cell;
+
+  while (std::getline(stream, cell, delimiter)) {
+    columns.push_back(cell);
+  }
+  return columns;
+}
+
+std::vector<double> LoadSpotifyColumn(const std::string& column_name,
+                                      const std::string& filename) {
+  std::vector<double> values;
+
+  static const std::unordered_set<std::string> kValidFloatColumns = {
+      "danceability", "energy",     "loudness",        "speechiness", "acousticness",
+      "instrumentalness", "liveness", "valence",         "tempo"};
+
+  if (kValidFloatColumns.find(column_name) == kValidFloatColumns.end()) {
+    std::cerr << "Column '" << column_name << "' is not a supported double column"
+              << std::endl;
+    return values;
+  }
+
+  std::string file_path = GetDataDirectory() + "/" + filename;
+
+  std::ifstream file(file_path);
+  if (!file.is_open()) {
+    std::cerr << "Failed to open file: " << file_path << std::endl;
+    return values;
+  }
+
+  std::string file_content((std::istreambuf_iterator<char>(file)),
+                           std::istreambuf_iterator<char>());
+  file.close();
+
+  std::istringstream ss(file_content);
+  std::string line;
+  size_t column_index = SIZE_MAX;
+
+  if (std::getline(ss, line)) {
+    std::istringstream header_stream(line);
+    std::string header;
+    size_t index = 0;
+
+    while (std::getline(header_stream, header, ',')) {
+      header.erase(0, header.find_first_not_of(" \t\r\n"));
+      header.erase(header.find_last_not_of(" \t\r\n") + 1);
+
+      if (header == column_name) {
+        column_index = index;
+        break;
+      }
+      index++;
+    }
+  }
+
+  if (column_index == SIZE_MAX) {
+    std::cerr << "Column '" << column_name << "' not found in header" << std::endl;
+    return values;
+  }
+
+  while (std::getline(ss, line)) {
+    std::vector<std::string> columns = SplitCsvRow(line);
+    if (column_index < columns.size()) {
+      try {
+        double value = std::stod(columns[column_index]);
+        values.push_back(value);
+      } catch (const std::exception& e) {
+        // Skip invalid values silently
+      }
+    }
+  }
+
+  return values;
+}
+
+// ============================================================================
+// Real-World Dataset Classes
+// ============================================================================
+
+template <typename T>
+struct SpotifyData : public RealComprBenchmarkData<T> {
+  std::string column_name;
+
+  explicit SpotifyData(const std::string& column) : column_name(column) {}
+
+  void FillUncompressedInput(uint64_t /*element_count*/) override {
+    std::vector<double> spotify_values =
+        LoadSpotifyColumn(column_name, "floatingpoint_spotify1.csv");
+
+    this->input_uncompressed.resize(spotify_values.size());
+    for (size_t i = 0; i < spotify_values.size(); ++i) {
+      this->input_uncompressed[i] = static_cast<T>(spotify_values[i]);
+    }
+  }
+};
+
+template <typename T>
+struct SpotifyData2 : public RealComprBenchmarkData<T> {
+  std::string column_name;
+
+  explicit SpotifyData2(const std::string& column) : column_name(column) {}
+
+  void FillUncompressedInput(uint64_t /*element_count*/) override {
+    std::vector<double> spotify_values =
+        LoadSpotifyColumn(column_name, "floatingpoint_spotify2.csv");
+
+    this->input_uncompressed.resize(spotify_values.size());
+    for (size_t i = 0; i < spotify_values.size(); ++i) {
+      this->input_uncompressed[i] = static_cast<T>(spotify_values[i]);
+    }
+  }
+};
+
+// Load AvgTemperature column from City Temperature CSV data
+std::vector<double> LoadCityTemperatureColumn() {
+  std::vector<double> values;
+
+  std::string file_path = GetDataDirectory() + "/floatingpoint_citytemperature.csv";
+
+  std::ifstream file(file_path);
+  if (!file.is_open()) {
+    std::cerr << "Failed to open file: " << file_path << std::endl;
+    return values;
+  }
+
+  std::string line;
+  // Skip header line
+  if (std::getline(file, line)) {
+    // Process data lines - each line is a single temperature value
+    while (std::getline(file, line)) {
+      try {
+        double value = std::stod(line);
+        values.push_back(value);
+      } catch (const std::exception& e) {
+        // Skip invalid values
+        continue;
+      }
+    }
+  }
+  file.close();
+
+  return values;
+}
+
+// Load any double-point column from POI CSV data
+std::vector<double> LoadPoiColumn(const std::string& column_name) {
+  std::vector<double> values;
+
+  static const std::unordered_set<std::string> kValidFloatColumns = {"latitude_radian",
+                                                                     "longitude_radian"};
+
+  if (kValidFloatColumns.find(column_name) == kValidFloatColumns.end()) {
+    std::cerr << "Column '" << column_name << "' is not a supported double column"
+              << std::endl;
+    return values;
+  }
+
+  std::string file_path = GetDataDirectory() + "/floatingpoint_poi.csv";
+
+  std::ifstream file(file_path);
+  if (!file.is_open()) {
+    std::cerr << "Failed to open file: " << file_path << std::endl;
+    return values;
+  }
+
+  std::string line;
+  // Read header line to find column index
+  if (!std::getline(file, line)) {
+    std::cerr << "Failed to read header from POI CSV" << std::endl;
+    return values;
+  }
+
+  std::vector<std::string> headers = SplitCsvRow(line);
+  int column_index = -1;
+  for (size_t i = 0; i < headers.size(); ++i) {
+    std::string trimmed_header = headers[i];
+    trimmed_header.erase(0, trimmed_header.find_first_not_of(" \t\r\n"));
+    trimmed_header.erase(trimmed_header.find_last_not_of(" \t\r\n") + 1);
+
+    if (trimmed_header == column_name) {
+      column_index = static_cast<int>(i);
+      break;
+    }
+  }
+
+  if (column_index == -1) {
+    std::cerr << "Column '" << column_name << "' not found in POI CSV header"
+              << std::endl;
+    return values;
+  }
+
+  // Process data lines
+  while (std::getline(file, line)) {
+    std::vector<std::string> columns = SplitCsvRow(line);
+    if (columns.size() > static_cast<size_t>(column_index)) {
+      try {
+        double value = std::stod(columns[column_index]);
+        values.push_back(value);
+      } catch (const std::exception& e) {
+        continue;
+      }
+    }
+  }
+  file.close();
+
+  return values;
+}
+
+// Load Bird Migration data
+std::vector<double> LoadBirdMigrationData() {
+  std::vector<double> values;
+
+  std::string file_path = GetDataDirectory() + "/floatingpoint_birdmigration.csv";
+
+  std::ifstream file(file_path);
+  if (!file.is_open()) {
+    std::cerr << "Failed to open file: " << file_path << std::endl;
+    return values;
+  }
+
+  std::string line;
+  // Skip header line
+  if (!std::getline(file, line)) {
+    std::cerr << "Failed to read header from bird-migration CSV" << std::endl;
+    return values;
+  }
+
+  while (std::getline(file, line)) {
+    try {
+      double value = std::stod(line);
+      values.push_back(value);
+    } catch (const std::exception& e) {
+      continue;
+    }
+  }
+  file.close();
+
+  return values;
+}
+
+// Load Common Government column
+std::vector<double> LoadCommonGovernmentColumn(const std::string& column_name) {
+  std::vector<double> values;
+
+  static const std::unordered_set<std::string> kValidFloatColumns = {"amount1", "amount2",
+                                                                     "amount3"};
+
+  if (kValidFloatColumns.find(column_name) == kValidFloatColumns.end()) {
+    std::cerr << "Column '" << column_name << "' is not a supported double column"
+              << std::endl;
+    return values;
+  }
+
+  size_t column_index = SIZE_MAX;
+  if (column_name == "amount1")
+    column_index = 0;
+  else if (column_name == "amount2")
+    column_index = 1;
+  else if (column_name == "amount3")
+    column_index = 2;
+
+  std::string file_path = GetDataDirectory() + "/floatingpoint_commongovernment.csv";
+
+  std::ifstream file(file_path);
+  if (!file.is_open()) {
+    std::cerr << "Failed to open file: " << file_path << std::endl;
+    return values;
+  }
+
+  std::string line;
+  while (std::getline(file, line)) {
+    std::vector<std::string> columns = SplitCsvRow(line, '|');
+    if (column_index < columns.size()) {
+      try {
+        double value = std::stod(columns[column_index]);
+        values.push_back(value);
+      } catch (const std::exception& e) {
+        // Skip invalid values
+      }
+    }
+  }
+  file.close();
+
+  return values;
+}
+
+// Load Arade column
+std::vector<double> LoadAradeColumn(const std::string& column_name) {
+  std::vector<double> values;
+
+  static const std::unordered_set<std::string> kValidFloatColumns = {"value1", "value2",
+                                                                     "value3", "value4"};
+
+  if (kValidFloatColumns.find(column_name) == kValidFloatColumns.end()) {
+    std::cerr << "Column '" << column_name << "' is not a supported double column"
+              << std::endl;
+    return values;
+  }
+
+  size_t column_index = SIZE_MAX;
+  if (column_name == "value1")
+    column_index = 0;
+  else if (column_name == "value2")
+    column_index = 1;
+  else if (column_name == "value3")
+    column_index = 2;
+  else if (column_name == "value4")
+    column_index = 3;
+
+  std::string file_path = GetDataDirectory() + "/floatingpoint_arade.csv";
+
+  std::ifstream file(file_path);
+  if (!file.is_open()) {
+    std::cerr << "Failed to open file: " << file_path << std::endl;
+    return values;
+  }
+
+  std::string line;
+  while (std::getline(file, line)) {
+    std::vector<std::string> columns = SplitCsvRow(line, '|');
+    if (column_index < columns.size()) {
+      try {
+        double value = std::stod(columns[column_index]);
+        values.push_back(value);
+      } catch (const std::exception& e) {
+        // Skip invalid values
+      }
+    }
+  }
+  file.close();
+
+  return values;
+}
+
+// Generic loader for single-column FPC-format CSV files (with header)
+std::vector<double> LoadSingleColumnFpcData(const std::string& dataset_name) {
+  std::vector<double> values;
+
+  std::string file_path = GetDataDirectory() + "/floatingpoint_" + dataset_name + ".csv";
+
+  std::ifstream file(file_path);
+  if (!file.is_open()) {
+    std::cerr << "Failed to open file: " << file_path << std::endl;
+    return values;
+  }
+
+  std::string line;
+  // Skip header line
+  if (!std::getline(file, line)) {
+    std::cerr << "Failed to read header from " << dataset_name << " CSV" << std::endl;
+    return values;
+  }
+
+  while (std::getline(file, line)) {
+    try {
+      double value = std::stod(line);
+      values.push_back(value);
+    } catch (const std::exception& e) {
+      continue;
+    }
+  }
+  file.close();
+
+  return values;
+}
+
+// Individual loaders for FPC datasets
+std::vector<double> LoadNumBrainData() { return LoadSingleColumnFpcData("num_brain"); }
+std::vector<double> LoadNumCometData() { return LoadSingleColumnFpcData("num_comet"); }
+std::vector<double> LoadNumControlData() {
+  return LoadSingleColumnFpcData("num_control");
+}
+std::vector<double> LoadNumPlasmaData() { return LoadSingleColumnFpcData("num_plasma"); }
+std::vector<double> LoadObsErrorData() { return LoadSingleColumnFpcData("obs_error"); }
+std::vector<double> LoadObsInfoData() { return LoadSingleColumnFpcData("obs_info"); }
+std::vector<double> LoadObsSpitzerData() {
+  return LoadSingleColumnFpcData("obs_spitzer");
+}
+std::vector<double> LoadObsTempData() { return LoadSingleColumnFpcData("obs_temp"); }
+std::vector<double> LoadMsgBtData() { return LoadSingleColumnFpcData("msg_bt"); }
+std::vector<double> LoadMsgLuData() { return LoadSingleColumnFpcData("msg_lu"); }
+std::vector<double> LoadMsgSpData() { return LoadSingleColumnFpcData("msg_sp"); }
+std::vector<double> LoadMsgSppmData() { return LoadSingleColumnFpcData("msg_sppm"); }
+std::vector<double> LoadMsgSweep3dData() {
+  return LoadSingleColumnFpcData("msg_sweep3d");
+}
+
+// Data classes for all additional datasets
+template <typename T>
+struct CityTemperatureData : public RealComprBenchmarkData<T> {
+  CityTemperatureData() = default;
+
+  void FillUncompressedInput(uint64_t /*element_count*/) override {
+    std::vector<double> values = LoadCityTemperatureColumn();
+    this->input_uncompressed.resize(values.size());
+    for (size_t i = 0; i < values.size(); ++i) {
+      this->input_uncompressed[i] = static_cast<T>(values[i]);
+    }
+  }
+};
+
+template <typename T>
+struct PoiData : public RealComprBenchmarkData<T> {
+  std::string column_name;
+
+  explicit PoiData(const std::string& column) : column_name(column) {}
+
+  void FillUncompressedInput(uint64_t /*element_count*/) override {
+    std::vector<double> values = LoadPoiColumn(column_name);
+    this->input_uncompressed.resize(values.size());
+    for (size_t i = 0; i < values.size(); ++i) {
+      this->input_uncompressed[i] = static_cast<T>(values[i]);
+    }
+  }
+};
+
+template <typename T>
+struct BirdMigrationData : public RealComprBenchmarkData<T> {
+  explicit BirdMigrationData() {}
+
+  void FillUncompressedInput(uint64_t /*element_count*/) override {
+    std::vector<double> values = LoadBirdMigrationData();
+    this->input_uncompressed.resize(values.size());
+    for (size_t i = 0; i < values.size(); ++i) {
+      this->input_uncompressed[i] = static_cast<T>(values[i]);
+    }
+  }
+};
+
+template <typename T>
+struct CommonGovernmentData : public RealComprBenchmarkData<T> {
+  std::string column_name;
+
+  explicit CommonGovernmentData(const std::string& column) : column_name(column) {}
+
+  void FillUncompressedInput(uint64_t /*element_count*/) override {
+    std::vector<double> values = LoadCommonGovernmentColumn(column_name);
+    this->input_uncompressed.resize(values.size());
+    for (size_t i = 0; i < values.size(); ++i) {
+      this->input_uncompressed[i] = static_cast<T>(values[i]);
+    }
+  }
+};
+
+template <typename T>
+struct AradeData : public RealComprBenchmarkData<T> {
+  std::string column_name;
+
+  explicit AradeData(const std::string& column) : column_name(column) {}
+
+  void FillUncompressedInput(uint64_t /*element_count*/) override {
+    std::vector<double> values = LoadAradeColumn(column_name);
+    this->input_uncompressed.resize(values.size());
+    for (size_t i = 0; i < values.size(); ++i) {
+      this->input_uncompressed[i] = static_cast<T>(values[i]);
+    }
+  }
+};
+
+// Generic template for FPC single-column datasets
+template <typename T, std::vector<double> (*LoaderFunc)()>
+struct FpcDataset : public RealComprBenchmarkData<T> {
+  explicit FpcDataset() {}
+
+  void FillUncompressedInput(uint64_t /*element_count*/) override {
+    std::vector<double> values = LoaderFunc();
+    this->input_uncompressed.resize(values.size());
+    for (size_t i = 0; i < values.size(); ++i) {
+      this->input_uncompressed[i] = static_cast<T>(values[i]);
+    }
+  }
+};
+
+// Type aliases for each FPC dataset
+template <typename T>
+using NumBrainData = FpcDataset<T, LoadNumBrainData>;
+template <typename T>
+using NumCometData = FpcDataset<T, LoadNumCometData>;
+template <typename T>
+using NumControlData = FpcDataset<T, LoadNumControlData>;
+template <typename T>
+using NumPlasmaData = FpcDataset<T, LoadNumPlasmaData>;
+template <typename T>
+using ObsErrorData = FpcDataset<T, LoadObsErrorData>;
+template <typename T>
+using ObsInfoData = FpcDataset<T, LoadObsInfoData>;
+template <typename T>
+using ObsSpitzerData = FpcDataset<T, LoadObsSpitzerData>;
+template <typename T>
+using ObsTempData = FpcDataset<T, LoadObsTempData>;
+template <typename T>
+using MsgBtData = FpcDataset<T, LoadMsgBtData>;
+template <typename T>
+using MsgLuData = FpcDataset<T, LoadMsgLuData>;
+template <typename T>
+using MsgSpData = FpcDataset<T, LoadMsgSpData>;
+template <typename T>
+using MsgSppmData = FpcDataset<T, LoadMsgSppmData>;
+template <typename T>
+using MsgSweep3dData = FpcDataset<T, LoadMsgSweep3dData>;
+
+// ============================================================================
+// Benchmark Fixture (matching Snowflake's DoubleBenchmark structure)
+// ============================================================================
+
+template <typename T>
+class DoubleBenchmark : public benchmark::Fixture {
+ public:
+  static constexpr uint64_t kElementCount = 50000;  // Matches Snowflake exactly
+
+  void Setup(std::unique_ptr<RealComprBenchmarkData<T>> bd, uint64_t element_count,
+             EncodingType encoding_type) {
+    encoding_type_ = encoding_type;
+    bd_ = std::move(bd);
+    bd_->PrepareBenchmarkData(element_count, encoding_type);
+  }
+
+  void VerifyDataCompress() {
+    Decompress();
+    if (memcmp(bd_->input_uncompressed.data(), bd_->output_uncompressed.data(),
+               bd_->input_uncompressed.size() * sizeof(T)) != 0) {
+      std::cerr << "verificationFailed" << std::endl;
+    }
+  }
+
+  void VerifyDataDecompress() {
+    if (memcmp(bd_->input_uncompressed.data(), bd_->output_uncompressed.data(),
+               bd_->input_uncompressed.size() * sizeof(T)) != 0) {
+      std::cerr << "verificationFailed" << std::endl;
+    }
+  }
+
+  void Compress() {
+    using DType =
+        typename std::conditional<std::is_same<T, float>::value, FloatType,
+                                  DoubleType>::type;
+    auto descr = MakeColumnDescriptor<DType>();
+
+    if (encoding_type_ == EncodingType::kALP) {
+      auto encoder = MakeTypedEncoder<DType>(Encoding::ALP, false, descr.get());
+      encoder->Put(bd_->input_uncompressed.data(),
+                   static_cast<int>(bd_->input_uncompressed.size()));
+      bd_->encoded_data = encoder->FlushValues();
+      bd_->encoded_size = bd_->encoded_data->size();
+    } else if (encoding_type_ == EncodingType::kZSTD) {
+      // For ZSTD: Plain encode then compress
+      auto encoder = MakeTypedEncoder<DType>(Encoding::PLAIN, false, descr.get());
+      encoder->Put(bd_->input_uncompressed.data(),
+                   static_cast<int>(bd_->input_uncompressed.size()));
+      auto plain_data = encoder->FlushValues();
+
+      // Compress with ZSTD - use AllocateBuffer to properly manage memory
+      int64_t max_compressed_len =
+          bd_->codec->MaxCompressedLen(plain_data->size(), plain_data->data());
+      auto compressed_buffer =
+          ::arrow::AllocateResizableBuffer(max_compressed_len).ValueOrDie();
+      int64_t actual_size =
+          bd_->codec
+              ->Compress(plain_data->size(), plain_data->data(), max_compressed_len,
+                         compressed_buffer->mutable_data())
+              .ValueOrDie();
+      // Resize to actual compressed size and move to shared_ptr
+      (void)compressed_buffer->Resize(actual_size);  // Resize can't fail for shrinking
+      bd_->encoded_data = std::shared_ptr<Buffer>(std::move(compressed_buffer));
+      bd_->encoded_size = actual_size;
+    } else {
+      // For ByteStreamSplit: Direct encoding
+      auto encoder = MakeTypedEncoder<DType>(bd_->current_encoding, false, descr.get());
+      encoder->Put(bd_->input_uncompressed.data(),
+                   static_cast<int>(bd_->input_uncompressed.size()));
+      auto byte_stream_split_data = encoder->FlushValues();
+      // Compress with ZSTD - use AllocateBuffer to properly manage memory
+      int64_t max_compressed_len = bd_->codec->MaxCompressedLen(
+          byte_stream_split_data->size(), byte_stream_split_data->data());
+      auto compressed_buffer =
+          ::arrow::AllocateResizableBuffer(max_compressed_len).ValueOrDie();
+      int64_t actual_size =
+          bd_->codec
+              ->Compress(byte_stream_split_data->size(), byte_stream_split_data->data(),
+                         max_compressed_len, compressed_buffer->mutable_data())
+              .ValueOrDie();
+      // Resize to actual compressed size and move to shared_ptr
+      (void)compressed_buffer->Resize(actual_size);  // Resize can't fail for shrinking
+      bd_->encoded_data = std::shared_ptr<Buffer>(std::move(compressed_buffer));
+      bd_->encoded_size = actual_size;
+    }
+  }
+
+  void Decompress() {
+    using DType =
+        typename std::conditional<std::is_same<T, float>::value, FloatType,
+                                  DoubleType>::type;
+    auto descr = MakeColumnDescriptor<DType>();
+
+    if (encoding_type_ == EncodingType::kALP) {
+      // For ALP: Use Parquet decoder
+      auto decoder = MakeTypedDecoder<DType>(Encoding::ALP, descr.get());
+      decoder->SetData(static_cast<int>(bd_->input_uncompressed.size()),
+                       bd_->encoded_data->data(),
+                       static_cast<int>(bd_->encoded_data->size()));
+      decoder->Decode(bd_->output_uncompressed.data(),
+                      static_cast<int>(bd_->output_uncompressed.size()));
+    } else if (encoding_type_ == EncodingType::kZSTD) {
+      // For ZSTD: Decompress then plain decode
+      int64_t decompressed_len = bd_->input_uncompressed.size() * sizeof(T);
+      std::vector<uint8_t> decompressed(decompressed_len);
+      int64_t actual_size =
+          bd_->codec
+              ->Decompress(bd_->encoded_data->size(), bd_->encoded_data->data(),
+                           decompressed_len, decompressed.data())
+              .ValueOrDie();
+
+      // Plain decode
+      auto decoder = MakeTypedDecoder<DType>(Encoding::PLAIN, descr.get());
+      decoder->SetData(static_cast<int>(bd_->input_uncompressed.size()),
+                       decompressed.data(), static_cast<int>(actual_size));
+      decoder->Decode(bd_->output_uncompressed.data(),
+                      static_cast<int>(bd_->output_uncompressed.size()));
+    } else {
+      int64_t decompressed_len = bd_->input_uncompressed.size() * sizeof(T);
+      std::vector<uint8_t> decompressed(decompressed_len);
+      int64_t actual_size =
+          bd_->codec
+              ->Decompress(bd_->encoded_data->size(), bd_->encoded_data->data(),
+                           decompressed_len, decompressed.data())
+              .ValueOrDie();
+
+      // For ByteStreamSplit: Direct decoding
+      auto decoder = MakeTypedDecoder<DType>(bd_->current_encoding, descr.get());
+      decoder->SetData(static_cast<int>(bd_->input_uncompressed.size()),
+                       decompressed.data(), static_cast<int>(actual_size));
+      decoder->Decode(bd_->output_uncompressed.data(),
+                      static_cast<int>(bd_->output_uncompressed.size()));
+    }
+  }
+
+  void BenchmarkCompress(benchmark::State& state,
+                         std::unique_ptr<RealComprBenchmarkData<T>> bd,
+                         EncodingType encoding_type) {
+    Setup(std::move(bd), kElementCount, encoding_type);
+
+    uint64_t iteration_count = 0;
+    auto start = std::chrono::high_resolution_clock::now();
+    for (auto _ : state) {
+      Compress();
+      iteration_count++;
+    }
+    auto end = std::chrono::high_resolution_clock::now();
+    const uint64_t overall_time_us =
+        std::chrono::duration_cast<std::chrono::microseconds>(end - start).count();
+
+    state.counters["MB/s"] =
+        static_cast<double>(bd_->input_uncompressed.size() * sizeof(T) *
+                            iteration_count) /
+        (overall_time_us);
+
+    VerifyDataCompress();
+    state.counters["Compression Ratio Percent"] =
+        0.64 *
+        (100 * bd_->encoded_size / (1.0 * bd_->input_uncompressed.size() * sizeof(T)));
+  }
+
+  void BenchmarkDecompress(benchmark::State& state,
+                           std::unique_ptr<RealComprBenchmarkData<T>> bd,
+                           EncodingType encoding_type) {
+    Setup(std::move(bd), kElementCount, encoding_type);
+
+    uint64_t iteration_count = 0;
+    auto start = std::chrono::high_resolution_clock::now();
+    for (auto _ : state) {
+      Decompress();
+      iteration_count++;
+    }
+    auto end = std::chrono::high_resolution_clock::now();
+    const uint64_t overall_time_us =
+        std::chrono::duration_cast<std::chrono::microseconds>(end - start).count();
+
+    state.counters["MB/s"] =
+        static_cast<double>(bd_->input_uncompressed.size() * sizeof(T) *
+                            iteration_count) /
+        (overall_time_us);
+
+    VerifyDataDecompress();
+  }
+
+  std::unique_ptr<RealComprBenchmarkData<T>> bd_;
+  EncodingType encoding_type_;
+};
+
+// ============================================================================
+// Column Lists (matching Snowflake's pattern)
+// ============================================================================
+
+#define COLUMN_LIST                      \
+  X(Valence, "valence")                  \
+  X(Acousticness, "acousticness")        \
+  X(Danceability, "danceability")        \
+  X(Energy, "energy")                    \
+  X(Instrumentalness, "instrumentalness")\
+  X(Liveness, "liveness")                \
+  X(Loudness, "loudness")                \
+  X(Tempo, "tempo")                      \
+  X(Speechiness, "speechiness")
+
+// For new dataset (Spotify2), we need lowercase identifiers
+#define COLUMN_LIST_NEW    \
+  X(valence)               \
+  X(acousticness)          \
+  X(danceability)          \
+  X(energy)                \
+  X(instrumentalness)      \
+  X(liveness)              \
+  X(loudness)              \
+  X(tempo)                 \
+  X(speechiness)
+
+// POI dataset columns
+#define POI_COLUMN_LIST                    \
+  X(LatitudeRadian, "latitude_radian")     \
+  X(LongitudeRadian, "longitude_radian")
+
+// Common Government dataset columns
+#define COMMON_GOVERNMENT_COLUMN_LIST \
+  X(Amount1, "amount1")               \
+  X(Amount2, "amount2")               \
+  X(Amount3, "amount3")
+
+// Arade dataset columns
+#define ARADE_COLUMN_LIST   \
+  X(Value1, "value1")       \
+  X(Value2, "value2")       \
+  X(Value3, "value3")       \
+  X(Value4, "value4")
+
+// Algorithm list for all benchmarks (matching Snowflake's pattern)
+#define ALGORITHM_LIST                  \
+  X(ALP, kALP)                          \
+  X(BYTESTREAMSPLIT, kByteStreamSplit)  \
+  X(ZSTD, kZSTD)
+
+// ============================================================================
+// Benchmark Generation Macros (matching Snowflake's pattern)
+// ============================================================================
+
+// Synthetic data benchmark macros
+#define BENCHMARK_SYNTHETIC_COMPRESS(ALGO, NAME, CLASS, ENGINE)                      \
+  BENCHMARK_TEMPLATE_F(DoubleBenchmark, ALGO##compress##NAME##Float, double)         \
+  (benchmark::State & state) {                                                       \
+    BenchmarkCompress(                                                               \
+        state,                                                                       \
+        std::unique_ptr<RealComprBenchmarkData<double>>(                             \
+            std::make_unique<CLASS<double>>()),                                      \
+        EncodingType::ENGINE);                                                       \
+  }
+
+#define BENCHMARK_SYNTHETIC_DECOMPRESS(ALGO, NAME, CLASS, ENGINE)                    \
+  BENCHMARK_TEMPLATE_F(DoubleBenchmark, ALGO##decompress##NAME##Float, double)       \
+  (benchmark::State & state) {                                                       \
+    BenchmarkDecompress(                                                             \
+        state,                                                                       \
+        std::unique_ptr<RealComprBenchmarkData<double>>(                             \
+            std::make_unique<CLASS<double>>()),                                      \
+        EncodingType::ENGINE);                                                       \
+  }
+
+// Original Spotify dataset (Dataset 1) benchmark macros
+#define BENCHMARK_ORIGINAL_DATASET_COMPRESS(ALGO, COLUMN_CAP, COLUMN_LOWER, ENGINE)  \
+  BENCHMARK_TEMPLATE_F(DoubleBenchmark, ALGO##compress##Spotify##COLUMN_CAP##Float,  \
+                       double)                                                       \
+  (benchmark::State & state) {                                                       \
+    BenchmarkCompress(                                                               \
+        state,                                                                       \
+        std::unique_ptr<RealComprBenchmarkData<double>>(                             \
+            std::make_unique<SpotifyData<double>>(COLUMN_LOWER)),                    \
+        EncodingType::ENGINE);                                                       \
+  }
+
+#define BENCHMARK_ORIGINAL_DATASET_DECOMPRESS(ALGO, COLUMN_CAP, COLUMN_LOWER,        \
+                                              ENGINE)                                \
+  BENCHMARK_TEMPLATE_F(DoubleBenchmark,                                              \
+                       ALGO##decompress##Spotify##COLUMN_CAP##Float, double)         \
+  (benchmark::State & state) {                                                       \
+    BenchmarkDecompress(                                                             \
+        state,                                                                       \
+        std::unique_ptr<RealComprBenchmarkData<double>>(                             \
+            std::make_unique<SpotifyData<double>>(COLUMN_LOWER)),                    \
+        EncodingType::ENGINE);                                                       \
+  }
+
+// New Spotify dataset (Dataset 2) benchmark macros
+#define BENCHMARK_NEW_DATASET_COMPRESS(ALGO, COLUMN, ENGINE)                         \
+  BENCHMARK_TEMPLATE_F(DoubleBenchmark, ALGO##compress##Spotify##COLUMN##2Float,     \
+                       double)                                                       \
+  (benchmark::State & state) {                                                       \
+    BenchmarkCompress(                                                               \
+        state,                                                                       \
+        std::unique_ptr<RealComprBenchmarkData<double>>(                             \
+            std::make_unique<SpotifyData2<double>>(#COLUMN)),                        \
+        EncodingType::ENGINE);                                                       \
+  }
+
+#define BENCHMARK_NEW_DATASET_DECOMPRESS(ALGO, COLUMN, ENGINE)                       \
+  BENCHMARK_TEMPLATE_F(DoubleBenchmark, ALGO##decompress##Spotify##COLUMN##2Float,   \
+                       double)                                                       \
+  (benchmark::State & state) {                                                       \
+    BenchmarkDecompress(                                                             \
+        state,                                                                       \
+        std::unique_ptr<RealComprBenchmarkData<double>>(                             \
+            std::make_unique<SpotifyData2<double>>(#COLUMN)),                        \
+        EncodingType::ENGINE);                                                       \
+  }
+
+// City Temperature dataset benchmark macros
+#define BENCHMARK_CITY_TEMP_COMPRESS(ALGO, ENGINE)                                   \
+  BENCHMARK_TEMPLATE_F(DoubleBenchmark, ALGO##compress##CityTemperatureFloat,        \
+                       double)                                                       \
+  (benchmark::State & state) {                                                       \
+    BenchmarkCompress(                                                               \
+        state,                                                                       \
+        std::unique_ptr<RealComprBenchmarkData<double>>(                             \
+            std::make_unique<CityTemperatureData<double>>()),                        \
+        EncodingType::ENGINE);                                                       \
+  }
+
+#define BENCHMARK_CITY_TEMP_DECOMPRESS(ALGO, ENGINE)                                 \
+  BENCHMARK_TEMPLATE_F(DoubleBenchmark, ALGO##decompress##CityTemperatureFloat,      \
+                       double)                                                       \
+  (benchmark::State & state) {                                                       \
+    BenchmarkDecompress(                                                             \
+        state,                                                                       \
+        std::unique_ptr<RealComprBenchmarkData<double>>(                             \
+            std::make_unique<CityTemperatureData<double>>()),                        \
+        EncodingType::ENGINE);                                                       \
+  }
+
+// POI dataset benchmark macros
+#define BENCHMARK_POI_COMPRESS(ALGO, COLUMN_CAP, COLUMN_LOWER, ENGINE)               \
+  BENCHMARK_TEMPLATE_F(DoubleBenchmark, ALGO##compress##Poi##COLUMN_CAP##Float,      \
+                       double)                                                       \
+  (benchmark::State & state) {                                                       \
+    BenchmarkCompress(                                                               \
+        state,                                                                       \
+        std::unique_ptr<RealComprBenchmarkData<double>>(                             \
+            std::make_unique<PoiData<double>>(COLUMN_LOWER)),                        \
+        EncodingType::ENGINE);                                                       \
+  }
+
+#define BENCHMARK_POI_DECOMPRESS(ALGO, COLUMN_CAP, COLUMN_LOWER, ENGINE)             \
+  BENCHMARK_TEMPLATE_F(DoubleBenchmark, ALGO##decompress##Poi##COLUMN_CAP##Float,    \
+                       double)                                                       \
+  (benchmark::State & state) {                                                       \
+    BenchmarkDecompress(                                                             \
+        state,                                                                       \
+        std::unique_ptr<RealComprBenchmarkData<double>>(                             \
+            std::make_unique<PoiData<double>>(COLUMN_LOWER)),                        \
+        EncodingType::ENGINE);                                                       \
+  }
+
+// Bird Migration dataset benchmark macros
+#define BENCHMARK_BIRD_MIGRATION_COMPRESS(ALGO, ENGINE)                              \
+  BENCHMARK_TEMPLATE_F(DoubleBenchmark, ALGO##compress##BirdMigrationFloat, double)  \
+  (benchmark::State & state) {                                                       \
+    BenchmarkCompress(                                                               \
+        state,                                                                       \
+        std::unique_ptr<RealComprBenchmarkData<double>>(                             \
+            std::make_unique<BirdMigrationData<double>>()),                          \
+        EncodingType::ENGINE);                                                       \
+  }
+
+#define BENCHMARK_BIRD_MIGRATION_DECOMPRESS(ALGO, ENGINE)                            \
+  BENCHMARK_TEMPLATE_F(DoubleBenchmark, ALGO##decompress##BirdMigrationFloat,        \
+                       double)                                                       \
+  (benchmark::State & state) {                                                       \
+    BenchmarkDecompress(                                                             \
+        state,                                                                       \
+        std::unique_ptr<RealComprBenchmarkData<double>>(                             \
+            std::make_unique<BirdMigrationData<double>>()),                          \
+        EncodingType::ENGINE);                                                       \
+  }
+
+// Common Government dataset benchmark macros
+#define BENCHMARK_COMMON_GOVERNMENT_COMPRESS(ALGO, COLUMN_CAP, COLUMN_LOWER, ENGINE) \
+  BENCHMARK_TEMPLATE_F(DoubleBenchmark,                                              \
+                       ALGO##compress##CommonGovernment##COLUMN_CAP##Float, double)  \
+  (benchmark::State & state) {                                                       \
+    BenchmarkCompress(                                                               \
+        state,                                                                       \
+        std::unique_ptr<RealComprBenchmarkData<double>>(                             \
+            std::make_unique<CommonGovernmentData<double>>(COLUMN_LOWER)),           \
+        EncodingType::ENGINE);                                                       \
+  }
+
+#define BENCHMARK_COMMON_GOVERNMENT_DECOMPRESS(ALGO, COLUMN_CAP, COLUMN_LOWER,       \
+                                               ENGINE)                               \
+  BENCHMARK_TEMPLATE_F(DoubleBenchmark,                                              \
+                       ALGO##decompress##CommonGovernment##COLUMN_CAP##Float,        \
+                       double)                                                       \
+  (benchmark::State & state) {                                                       \
+    BenchmarkDecompress(                                                             \
+        state,                                                                       \
+        std::unique_ptr<RealComprBenchmarkData<double>>(                             \
+            std::make_unique<CommonGovernmentData<double>>(COLUMN_LOWER)),           \
+        EncodingType::ENGINE);                                                       \
+  }
+
+// Arade dataset benchmark macros
+#define BENCHMARK_ARADE_COMPRESS(ALGO, COLUMN_CAP, COLUMN_LOWER, ENGINE)             \
+  BENCHMARK_TEMPLATE_F(DoubleBenchmark, ALGO##compress##Arade##COLUMN_CAP##Float,    \
+                       double)                                                       \
+  (benchmark::State & state) {                                                       \
+    BenchmarkCompress(                                                               \
+        state,                                                                       \
+        std::unique_ptr<RealComprBenchmarkData<double>>(                             \
+            std::make_unique<AradeData<double>>(COLUMN_LOWER)),                      \
+        EncodingType::ENGINE);                                                       \
+  }
+
+#define BENCHMARK_ARADE_DECOMPRESS(ALGO, COLUMN_CAP, COLUMN_LOWER, ENGINE)           \
+  BENCHMARK_TEMPLATE_F(DoubleBenchmark, ALGO##decompress##Arade##COLUMN_CAP##Float,  \
+                       double)                                                       \
+  (benchmark::State & state) {                                                       \
+    BenchmarkDecompress(                                                             \
+        state,                                                                       \
+        std::unique_ptr<RealComprBenchmarkData<double>>(                             \
+            std::make_unique<AradeData<double>>(COLUMN_LOWER)),                      \
+        EncodingType::ENGINE);                                                       \
+  }
+
+// FPC dataset benchmark macros (generic for single-column datasets)
+#define BENCHMARK_NUM_BRAIN_COMPRESS(ALGO, ENGINE)                                   \
+  BENCHMARK_TEMPLATE_F(DoubleBenchmark, ALGO##compress##NumBrainFloat, double)       \
+  (benchmark::State & state) {                                                       \
+    BenchmarkCompress(                                                               \
+        state,                                                                       \
+        std::unique_ptr<RealComprBenchmarkData<double>>(                             \
+            std::make_unique<NumBrainData<double>>()),                               \
+        EncodingType::ENGINE);                                                       \
+  }
+
+#define BENCHMARK_NUM_BRAIN_DECOMPRESS(ALGO, ENGINE)                                 \
+  BENCHMARK_TEMPLATE_F(DoubleBenchmark, ALGO##decompress##NumBrainFloat, double)     \
+  (benchmark::State & state) {                                                       \
+    BenchmarkDecompress(                                                             \
+        state,                                                                       \
+        std::unique_ptr<RealComprBenchmarkData<double>>(                             \
+            std::make_unique<NumBrainData<double>>()),                               \
+        EncodingType::ENGINE);                                                       \
+  }
+
+#define BENCHMARK_NUM_COMET_COMPRESS(ALGO, ENGINE)                                   \
+  BENCHMARK_TEMPLATE_F(DoubleBenchmark, ALGO##compress##NumCometFloat, double)       \
+  (benchmark::State & state) {                                                       \
+    BenchmarkCompress(                                                               \
+        state,                                                                       \
+        std::unique_ptr<RealComprBenchmarkData<double>>(                             \
+            std::make_unique<NumCometData<double>>()),                               \
+        EncodingType::ENGINE);                                                       \
+  }
+
+#define BENCHMARK_NUM_COMET_DECOMPRESS(ALGO, ENGINE)                                 \
+  BENCHMARK_TEMPLATE_F(DoubleBenchmark, ALGO##decompress##NumCometFloat, double)     \
+  (benchmark::State & state) {                                                       \
+    BenchmarkDecompress(                                                             \
+        state,                                                                       \
+        std::unique_ptr<RealComprBenchmarkData<double>>(                             \
+            std::make_unique<NumCometData<double>>()),                               \
+        EncodingType::ENGINE);                                                       \
+  }
+
+#define BENCHMARK_NUM_CONTROL_COMPRESS(ALGO, ENGINE)                                 \
+  BENCHMARK_TEMPLATE_F(DoubleBenchmark, ALGO##compress##NumControlFloat, double)     \
+  (benchmark::State & state) {                                                       \
+    BenchmarkCompress(                                                               \
+        state,                                                                       \
+        std::unique_ptr<RealComprBenchmarkData<double>>(                             \
+            std::make_unique<NumControlData<double>>()),                             \
+        EncodingType::ENGINE);                                                       \
+  }
+
+#define BENCHMARK_NUM_CONTROL_DECOMPRESS(ALGO, ENGINE)                               \
+  BENCHMARK_TEMPLATE_F(DoubleBenchmark, ALGO##decompress##NumControlFloat, double)   \
+  (benchmark::State & state) {                                                       \
+    BenchmarkDecompress(                                                             \
+        state,                                                                       \
+        std::unique_ptr<RealComprBenchmarkData<double>>(                             \
+            std::make_unique<NumControlData<double>>()),                             \
+        EncodingType::ENGINE);                                                       \
+  }
+
+#define BENCHMARK_NUM_PLASMA_COMPRESS(ALGO, ENGINE)                                  \
+  BENCHMARK_TEMPLATE_F(DoubleBenchmark, ALGO##compress##NumPlasmaFloat, double)      \
+  (benchmark::State & state) {                                                       \
+    BenchmarkCompress(                                                               \
+        state,                                                                       \
+        std::unique_ptr<RealComprBenchmarkData<double>>(                             \
+            std::make_unique<NumPlasmaData<double>>()),                              \
+        EncodingType::ENGINE);                                                       \
+  }
+
+#define BENCHMARK_NUM_PLASMA_DECOMPRESS(ALGO, ENGINE)                                \
+  BENCHMARK_TEMPLATE_F(DoubleBenchmark, ALGO##decompress##NumPlasmaFloat, double)    \
+  (benchmark::State & state) {                                                       \
+    BenchmarkDecompress(                                                             \
+        state,                                                                       \
+        std::unique_ptr<RealComprBenchmarkData<double>>(                             \
+            std::make_unique<NumPlasmaData<double>>()),                              \
+        EncodingType::ENGINE);                                                       \
+  }
+
+#define BENCHMARK_OBS_ERROR_COMPRESS(ALGO, ENGINE)                                   \
+  BENCHMARK_TEMPLATE_F(DoubleBenchmark, ALGO##compress##ObsErrorFloat, double)       \
+  (benchmark::State & state) {                                                       \
+    BenchmarkCompress(                                                               \
+        state,                                                                       \
+        std::unique_ptr<RealComprBenchmarkData<double>>(                             \
+            std::make_unique<ObsErrorData<double>>()),                               \
+        EncodingType::ENGINE);                                                       \
+  }
+
+#define BENCHMARK_OBS_ERROR_DECOMPRESS(ALGO, ENGINE)                                 \
+  BENCHMARK_TEMPLATE_F(DoubleBenchmark, ALGO##decompress##ObsErrorFloat, double)     \
+  (benchmark::State & state) {                                                       \
+    BenchmarkDecompress(                                                             \
+        state,                                                                       \
+        std::unique_ptr<RealComprBenchmarkData<double>>(                             \
+            std::make_unique<ObsErrorData<double>>()),                               \
+        EncodingType::ENGINE);                                                       \
+  }
+
+#define BENCHMARK_OBS_INFO_COMPRESS(ALGO, ENGINE)                                    \
+  BENCHMARK_TEMPLATE_F(DoubleBenchmark, ALGO##compress##ObsInfoFloat, double)        \
+  (benchmark::State & state) {                                                       \
+    BenchmarkCompress(                                                               \
+        state,                                                                       \
+        std::unique_ptr<RealComprBenchmarkData<double>>(                             \
+            std::make_unique<ObsInfoData<double>>()),                                \
+        EncodingType::ENGINE);                                                       \
+  }
+
+#define BENCHMARK_OBS_INFO_DECOMPRESS(ALGO, ENGINE)                                  \
+  BENCHMARK_TEMPLATE_F(DoubleBenchmark, ALGO##decompress##ObsInfoFloat, double)      \
+  (benchmark::State & state) {                                                       \
+    BenchmarkDecompress(                                                             \
+        state,                                                                       \
+        std::unique_ptr<RealComprBenchmarkData<double>>(                             \
+            std::make_unique<ObsInfoData<double>>()),                                \
+        EncodingType::ENGINE);                                                       \
+  }
+
+#define BENCHMARK_OBS_SPITZER_COMPRESS(ALGO, ENGINE)                                 \
+  BENCHMARK_TEMPLATE_F(DoubleBenchmark, ALGO##compress##ObsSpitzerFloat, double)     \
+  (benchmark::State & state) {                                                       \
+    BenchmarkCompress(                                                               \
+        state,                                                                       \
+        std::unique_ptr<RealComprBenchmarkData<double>>(                             \
+            std::make_unique<ObsSpitzerData<double>>()),                             \
+        EncodingType::ENGINE);                                                       \
+  }
+
+#define BENCHMARK_OBS_SPITZER_DECOMPRESS(ALGO, ENGINE)                               \
+  BENCHMARK_TEMPLATE_F(DoubleBenchmark, ALGO##decompress##ObsSpitzerFloat, double)   \
+  (benchmark::State & state) {                                                       \
+    BenchmarkDecompress(                                                             \
+        state,                                                                       \
+        std::unique_ptr<RealComprBenchmarkData<double>>(                             \
+            std::make_unique<ObsSpitzerData<double>>()),                             \
+        EncodingType::ENGINE);                                                       \
+  }
+
+#define BENCHMARK_OBS_TEMP_COMPRESS(ALGO, ENGINE)                                    \
+  BENCHMARK_TEMPLATE_F(DoubleBenchmark, ALGO##compress##ObsTempFloat, double)        \
+  (benchmark::State & state) {                                                       \
+    BenchmarkCompress(                                                               \
+        state,                                                                       \
+        std::unique_ptr<RealComprBenchmarkData<double>>(                             \
+            std::make_unique<ObsTempData<double>>()),                                \
+        EncodingType::ENGINE);                                                       \
+  }
+
+#define BENCHMARK_OBS_TEMP_DECOMPRESS(ALGO, ENGINE)                                  \
+  BENCHMARK_TEMPLATE_F(DoubleBenchmark, ALGO##decompress##ObsTempFloat, double)      \
+  (benchmark::State & state) {                                                       \
+    BenchmarkDecompress(                                                             \
+        state,                                                                       \
+        std::unique_ptr<RealComprBenchmarkData<double>>(                             \
+            std::make_unique<ObsTempData<double>>()),                                \
+        EncodingType::ENGINE);                                                       \
+  }
+
+#define BENCHMARK_MSG_BT_COMPRESS(ALGO, ENGINE)                                      \
+  BENCHMARK_TEMPLATE_F(DoubleBenchmark, ALGO##compress##MsgBtFloat, double)          \
+  (benchmark::State & state) {                                                       \
+    BenchmarkCompress(                                                               \
+        state,                                                                       \
+        std::unique_ptr<RealComprBenchmarkData<double>>(                             \
+            std::make_unique<MsgBtData<double>>()),                                  \
+        EncodingType::ENGINE);                                                       \
+  }
+
+#define BENCHMARK_MSG_BT_DECOMPRESS(ALGO, ENGINE)                                    \
+  BENCHMARK_TEMPLATE_F(DoubleBenchmark, ALGO##decompress##MsgBtFloat, double)        \
+  (benchmark::State & state) {                                                       \
+    BenchmarkDecompress(                                                             \
+        state,                                                                       \
+        std::unique_ptr<RealComprBenchmarkData<double>>(                             \
+            std::make_unique<MsgBtData<double>>()),                                  \
+        EncodingType::ENGINE);                                                       \
+  }
+
+#define BENCHMARK_MSG_LU_COMPRESS(ALGO, ENGINE)                                      \
+  BENCHMARK_TEMPLATE_F(DoubleBenchmark, ALGO##compress##MsgLuFloat, double)          \
+  (benchmark::State & state) {                                                       \
+    BenchmarkCompress(                                                               \
+        state,                                                                       \
+        std::unique_ptr<RealComprBenchmarkData<double>>(                             \
+            std::make_unique<MsgLuData<double>>()),                                  \
+        EncodingType::ENGINE);                                                       \
+  }
+
+#define BENCHMARK_MSG_LU_DECOMPRESS(ALGO, ENGINE)                                    \
+  BENCHMARK_TEMPLATE_F(DoubleBenchmark, ALGO##decompress##MsgLuFloat, double)        \
+  (benchmark::State & state) {                                                       \
+    BenchmarkDecompress(                                                             \
+        state,                                                                       \
+        std::unique_ptr<RealComprBenchmarkData<double>>(                             \
+            std::make_unique<MsgLuData<double>>()),                                  \
+        EncodingType::ENGINE);                                                       \
+  }
+
+#define BENCHMARK_MSG_SP_COMPRESS(ALGO, ENGINE)                                      \
+  BENCHMARK_TEMPLATE_F(DoubleBenchmark, ALGO##compress##MsgSpFloat, double)          \
+  (benchmark::State & state) {                                                       \
+    BenchmarkCompress(                                                               \
+        state,                                                                       \
+        std::unique_ptr<RealComprBenchmarkData<double>>(                             \
+            std::make_unique<MsgSpData<double>>()),                                  \
+        EncodingType::ENGINE);                                                       \
+  }
+
+#define BENCHMARK_MSG_SP_DECOMPRESS(ALGO, ENGINE)                                    \
+  BENCHMARK_TEMPLATE_F(DoubleBenchmark, ALGO##decompress##MsgSpFloat, double)        \
+  (benchmark::State & state) {                                                       \
+    BenchmarkDecompress(                                                             \
+        state,                                                                       \
+        std::unique_ptr<RealComprBenchmarkData<double>>(                             \
+            std::make_unique<MsgSpData<double>>()),                                  \
+        EncodingType::ENGINE);                                                       \
+  }
+
+#define BENCHMARK_MSG_SPPM_COMPRESS(ALGO, ENGINE)                                    \
+  BENCHMARK_TEMPLATE_F(DoubleBenchmark, ALGO##compress##MsgSppmFloat, double)        \
+  (benchmark::State & state) {                                                       \
+    BenchmarkCompress(                                                               \
+        state,                                                                       \
+        std::unique_ptr<RealComprBenchmarkData<double>>(                             \
+            std::make_unique<MsgSppmData<double>>()),                                \
+        EncodingType::ENGINE);                                                       \
+  }
+
+#define BENCHMARK_MSG_SPPM_DECOMPRESS(ALGO, ENGINE)                                  \
+  BENCHMARK_TEMPLATE_F(DoubleBenchmark, ALGO##decompress##MsgSppmFloat, double)      \
+  (benchmark::State & state) {                                                       \
+    BenchmarkDecompress(                                                             \
+        state,                                                                       \
+        std::unique_ptr<RealComprBenchmarkData<double>>(                             \
+            std::make_unique<MsgSppmData<double>>()),                                \
+        EncodingType::ENGINE);                                                       \
+  }
+
+#define BENCHMARK_MSG_SWEEP3D_COMPRESS(ALGO, ENGINE)                                 \
+  BENCHMARK_TEMPLATE_F(DoubleBenchmark, ALGO##compress##MsgSweep3dFloat, double)     \
+  (benchmark::State & state) {                                                       \
+    BenchmarkCompress(                                                               \
+        state,                                                                       \
+        std::unique_ptr<RealComprBenchmarkData<double>>(                             \
+            std::make_unique<MsgSweep3dData<double>>()),                             \
+        EncodingType::ENGINE);                                                       \
+  }
+
+#define BENCHMARK_MSG_SWEEP3D_DECOMPRESS(ALGO, ENGINE)                               \
+  BENCHMARK_TEMPLATE_F(DoubleBenchmark, ALGO##decompress##MsgSweep3dFloat, double)   \
+  (benchmark::State & state) {                                                       \
+    BenchmarkDecompress(                                                             \
+        state,                                                                       \
+        std::unique_ptr<RealComprBenchmarkData<double>>(                             \
+            std::make_unique<MsgSweep3dData<double>>()),                             \
+        EncodingType::ENGINE);                                                       \
+  }
+
+// ============================================================================
+// Benchmark Registrations - Synthetic Data (All Algorithms)
+// COMMENTED OUT - Using only real-world Spotify data
+// ============================================================================
+
+#if 0
+#define GENERATE_SYNTHETIC_BENCHMARKS(ALGO, ENGINE)                \
+  BENCHMARK_SYNTHETIC_COMPRESS(ALGO, Constant, ConstantValues, ENGINE)     \
+  BENCHMARK_SYNTHETIC_DECOMPRESS(ALGO, Constant, ConstantValues, ENGINE)   \
+  BENCHMARK_SYNTHETIC_COMPRESS(ALGO, Increasing, IncreasingValues, ENGINE) \
+  BENCHMARK_SYNTHETIC_DECOMPRESS(ALGO, Increasing, IncreasingValues, ENGINE)     \
+  BENCHMARK_SYNTHETIC_COMPRESS(ALGO, SmallRange, DecimalSmallRange, ENGINE)      \
+  BENCHMARK_SYNTHETIC_DECOMPRESS(ALGO, SmallRange, DecimalSmallRange, ENGINE)    \
+  BENCHMARK_SYNTHETIC_COMPRESS(ALGO, Range, DecimalRange, ENGINE)                \
+  BENCHMARK_SYNTHETIC_DECOMPRESS(ALGO, Range, DecimalRange, ENGINE)              \
+  BENCHMARK_SYNTHETIC_COMPRESS(ALGO, LargeRange, DecimalLargeRange, ENGINE)      \
+  BENCHMARK_SYNTHETIC_DECOMPRESS(ALGO, LargeRange, DecimalLargeRange, ENGINE)    \
+  BENCHMARK_SYNTHETIC_COMPRESS(ALGO, Random, RandomValues, ENGINE)               \
+  BENCHMARK_SYNTHETIC_DECOMPRESS(ALGO, Random, RandomValues, ENGINE)
+
+#define X(ALGO, ENGINE) GENERATE_SYNTHETIC_BENCHMARKS(ALGO, ENGINE)
+ALGORITHM_LIST
+#undef X
+#endif
+
+// ============================================================================
+// Benchmark Registrations - Spotify Dataset 1 (All Algorithms x 9 columns)
+// ============================================================================
+
+#define GENERATE_SPOTIFY_BENCHMARKS(ALGO, COLUMN_CAP, COLUMN_LOWER, ENGINE) \
+  BENCHMARK_ORIGINAL_DATASET_COMPRESS(ALGO, COLUMN_CAP, COLUMN_LOWER, ENGINE)     \
+  BENCHMARK_ORIGINAL_DATASET_DECOMPRESS(ALGO, COLUMN_CAP, COLUMN_LOWER, ENGINE)
+
+#define GENERATE_ALGORITHM_FOR_SPOTIFY(ALGO, ENGINE)                         \
+  GENERATE_SPOTIFY_BENCHMARKS(ALGO, Valence, "valence", ENGINE)              \
+  GENERATE_SPOTIFY_BENCHMARKS(ALGO, Acousticness, "acousticness", ENGINE)    \
+  GENERATE_SPOTIFY_BENCHMARKS(ALGO, Danceability, "danceability", ENGINE)    \
+  GENERATE_SPOTIFY_BENCHMARKS(ALGO, Energy, "energy", ENGINE)                \
+  GENERATE_SPOTIFY_BENCHMARKS(ALGO, Instrumentalness, "instrumentalness", ENGINE) \
+  GENERATE_SPOTIFY_BENCHMARKS(ALGO, Liveness, "liveness", ENGINE)            \
+  GENERATE_SPOTIFY_BENCHMARKS(ALGO, Loudness, "loudness", ENGINE)            \
+  GENERATE_SPOTIFY_BENCHMARKS(ALGO, Tempo, "tempo", ENGINE)                  \
+  GENERATE_SPOTIFY_BENCHMARKS(ALGO, Speechiness, "speechiness", ENGINE)
+
+#define X(ALGO, ENGINE) GENERATE_ALGORITHM_FOR_SPOTIFY(ALGO, ENGINE)
+ALGORITHM_LIST
+#undef X
+
+// ============================================================================
+// Benchmark Registrations - Spotify Dataset 2 (All Algorithms x 9 columns)
+// ============================================================================
+
+#define GENERATE_SPOTIFY2_BENCHMARKS(ALGO, COLUMN, ENGINE) \
+  BENCHMARK_NEW_DATASET_COMPRESS(ALGO, COLUMN, ENGINE)     \
+  BENCHMARK_NEW_DATASET_DECOMPRESS(ALGO, COLUMN, ENGINE)
+
+#define GENERATE_ALGORITHM_FOR_SPOTIFY2(ALGO, ENGINE)      \
+  GENERATE_SPOTIFY2_BENCHMARKS(ALGO, valence, ENGINE)      \
+  GENERATE_SPOTIFY2_BENCHMARKS(ALGO, acousticness, ENGINE) \
+  GENERATE_SPOTIFY2_BENCHMARKS(ALGO, danceability, ENGINE) \
+  GENERATE_SPOTIFY2_BENCHMARKS(ALGO, energy, ENGINE)       \
+  GENERATE_SPOTIFY2_BENCHMARKS(ALGO, instrumentalness, ENGINE) \
+  GENERATE_SPOTIFY2_BENCHMARKS(ALGO, liveness, ENGINE)     \
+  GENERATE_SPOTIFY2_BENCHMARKS(ALGO, loudness, ENGINE)     \
+  GENERATE_SPOTIFY2_BENCHMARKS(ALGO, tempo, ENGINE)        \
+  GENERATE_SPOTIFY2_BENCHMARKS(ALGO, speechiness, ENGINE)
+
+#define X(ALGO, ENGINE) GENERATE_ALGORITHM_FOR_SPOTIFY2(ALGO, ENGINE)
+ALGORITHM_LIST
+#undef X
+
+// ============================================================================
+// Benchmark Registrations - City Temperature Dataset (1 column x 3 algorithms)
+// ============================================================================
+
+#define GENERATE_ALGORITHM_FOR_CITY_TEMP(ALGO, ENGINE) \
+  BENCHMARK_CITY_TEMP_COMPRESS(ALGO, ENGINE)           \
+  BENCHMARK_CITY_TEMP_DECOMPRESS(ALGO, ENGINE)
+
+#define X(ALGO, ENGINE) GENERATE_ALGORITHM_FOR_CITY_TEMP(ALGO, ENGINE)
+ALGORITHM_LIST
+#undef X
+
+// ============================================================================
+// Benchmark Registrations - POI Dataset (2 columns x 3 algorithms)
+// ============================================================================
+
+#define GENERATE_ALGORITHM_FOR_POI(COLUMN_CAP, COLUMN_LOWER, ALGO, ENGINE) \
+  BENCHMARK_POI_COMPRESS(ALGO, COLUMN_CAP, COLUMN_LOWER, ENGINE)           \
+  BENCHMARK_POI_DECOMPRESS(ALGO, COLUMN_CAP, COLUMN_LOWER, ENGINE)
+
+#define GENERATE_ALGORITHMS_FOR_POI_COLUMN(COLUMN_CAP, COLUMN_LOWER)           \
+  GENERATE_ALGORITHM_FOR_POI(COLUMN_CAP, COLUMN_LOWER, ALP, kALP)              \
+  GENERATE_ALGORITHM_FOR_POI(COLUMN_CAP, COLUMN_LOWER, BYTESTREAMSPLIT,        \
+                             kByteStreamSplit)                                 \
+  GENERATE_ALGORITHM_FOR_POI(COLUMN_CAP, COLUMN_LOWER, ZSTD, kZSTD)
+
+#define X(COLUMN_CAP, COLUMN_LOWER) \
+  GENERATE_ALGORITHMS_FOR_POI_COLUMN(COLUMN_CAP, COLUMN_LOWER)
+POI_COLUMN_LIST
+#undef X
+
+// ============================================================================
+// Benchmark Registrations - Bird Migration Dataset (1 column x 3 algorithms)
+// ============================================================================
+
+#define GENERATE_ALGORITHM_FOR_BIRD_MIGRATION(ALGO, ENGINE) \
+  BENCHMARK_BIRD_MIGRATION_COMPRESS(ALGO, ENGINE)           \
+  BENCHMARK_BIRD_MIGRATION_DECOMPRESS(ALGO, ENGINE)
+
+#define X(ALGO, ENGINE) GENERATE_ALGORITHM_FOR_BIRD_MIGRATION(ALGO, ENGINE)
+ALGORITHM_LIST
+#undef X
+
+// ============================================================================
+// Benchmark Registrations - Common Government Dataset (3 columns x 3 algorithms)
+// ============================================================================
+
+#define GENERATE_ALGORITHM_FOR_COMMON_GOVERNMENT(COLUMN_CAP, COLUMN_LOWER, ALGO, \
+                                                 ENGINE)                         \
+  BENCHMARK_COMMON_GOVERNMENT_COMPRESS(ALGO, COLUMN_CAP, COLUMN_LOWER, ENGINE)   \
+  BENCHMARK_COMMON_GOVERNMENT_DECOMPRESS(ALGO, COLUMN_CAP, COLUMN_LOWER, ENGINE)
+
+#define GENERATE_ALGORITHMS_FOR_COMMON_GOVERNMENT_COLUMN(COLUMN_CAP, COLUMN_LOWER) \
+  GENERATE_ALGORITHM_FOR_COMMON_GOVERNMENT(COLUMN_CAP, COLUMN_LOWER, ALP, kALP)    \
+  GENERATE_ALGORITHM_FOR_COMMON_GOVERNMENT(COLUMN_CAP, COLUMN_LOWER,               \
+                                           BYTESTREAMSPLIT, kByteStreamSplit)      \
+  GENERATE_ALGORITHM_FOR_COMMON_GOVERNMENT(COLUMN_CAP, COLUMN_LOWER, ZSTD, kZSTD)
+
+#define X(COLUMN_CAP, COLUMN_LOWER) \
+  GENERATE_ALGORITHMS_FOR_COMMON_GOVERNMENT_COLUMN(COLUMN_CAP, COLUMN_LOWER)
+COMMON_GOVERNMENT_COLUMN_LIST
+#undef X
+
+// ============================================================================
+// Benchmark Registrations - Arade Dataset (4 columns x 3 algorithms)
+// ============================================================================
+
+#define GENERATE_ALGORITHM_FOR_ARADE(COLUMN_CAP, COLUMN_LOWER, ALGO, ENGINE) \
+  BENCHMARK_ARADE_COMPRESS(ALGO, COLUMN_CAP, COLUMN_LOWER, ENGINE)           \
+  BENCHMARK_ARADE_DECOMPRESS(ALGO, COLUMN_CAP, COLUMN_LOWER, ENGINE)
+
+#define GENERATE_ALGORITHMS_FOR_ARADE_COLUMN(COLUMN_CAP, COLUMN_LOWER)          \
+  GENERATE_ALGORITHM_FOR_ARADE(COLUMN_CAP, COLUMN_LOWER, ALP, kALP)             \
+  GENERATE_ALGORITHM_FOR_ARADE(COLUMN_CAP, COLUMN_LOWER, BYTESTREAMSPLIT,       \
+                               kByteStreamSplit)                                \
+  GENERATE_ALGORITHM_FOR_ARADE(COLUMN_CAP, COLUMN_LOWER, ZSTD, kZSTD)
+
+#define X(COLUMN_CAP, COLUMN_LOWER) \
+  GENERATE_ALGORITHMS_FOR_ARADE_COLUMN(COLUMN_CAP, COLUMN_LOWER)
+ARADE_COLUMN_LIST
+#undef X
+
+// ============================================================================
+// Benchmark Registrations - FPC Datasets (13 single-column datasets x 3 each)
+// ============================================================================
+
+// NumBrain dataset
+#define GENERATE_ALGORITHM_FOR_NUM_BRAIN(ALGO, ENGINE) \
+  BENCHMARK_NUM_BRAIN_COMPRESS(ALGO, ENGINE)           \
+  BENCHMARK_NUM_BRAIN_DECOMPRESS(ALGO, ENGINE)
+
+#define X(ALGO, ENGINE) GENERATE_ALGORITHM_FOR_NUM_BRAIN(ALGO, ENGINE)
+ALGORITHM_LIST
+#undef X
+
+// NumComet dataset
+#define GENERATE_ALGORITHM_FOR_NUM_COMET(ALGO, ENGINE) \
+  BENCHMARK_NUM_COMET_COMPRESS(ALGO, ENGINE)           \
+  BENCHMARK_NUM_COMET_DECOMPRESS(ALGO, ENGINE)
+
+#define X(ALGO, ENGINE) GENERATE_ALGORITHM_FOR_NUM_COMET(ALGO, ENGINE)
+ALGORITHM_LIST
+#undef X
+
+// NumControl dataset
+#define GENERATE_ALGORITHM_FOR_NUM_CONTROL(ALGO, ENGINE) \
+  BENCHMARK_NUM_CONTROL_COMPRESS(ALGO, ENGINE)           \
+  BENCHMARK_NUM_CONTROL_DECOMPRESS(ALGO, ENGINE)
+
+#define X(ALGO, ENGINE) GENERATE_ALGORITHM_FOR_NUM_CONTROL(ALGO, ENGINE)
+ALGORITHM_LIST
+#undef X
+
+// NumPlasma dataset
+#define GENERATE_ALGORITHM_FOR_NUM_PLASMA(ALGO, ENGINE) \
+  BENCHMARK_NUM_PLASMA_COMPRESS(ALGO, ENGINE)           \
+  BENCHMARK_NUM_PLASMA_DECOMPRESS(ALGO, ENGINE)
+
+#define X(ALGO, ENGINE) GENERATE_ALGORITHM_FOR_NUM_PLASMA(ALGO, ENGINE)
+ALGORITHM_LIST
+#undef X
+
+// ObsError dataset
+#define GENERATE_ALGORITHM_FOR_OBS_ERROR(ALGO, ENGINE) \
+  BENCHMARK_OBS_ERROR_COMPRESS(ALGO, ENGINE)           \
+  BENCHMARK_OBS_ERROR_DECOMPRESS(ALGO, ENGINE)
+
+#define X(ALGO, ENGINE) GENERATE_ALGORITHM_FOR_OBS_ERROR(ALGO, ENGINE)
+ALGORITHM_LIST
+#undef X
+
+// ObsInfo dataset
+#define GENERATE_ALGORITHM_FOR_OBS_INFO(ALGO, ENGINE) \
+  BENCHMARK_OBS_INFO_COMPRESS(ALGO, ENGINE)           \
+  BENCHMARK_OBS_INFO_DECOMPRESS(ALGO, ENGINE)
+
+#define X(ALGO, ENGINE) GENERATE_ALGORITHM_FOR_OBS_INFO(ALGO, ENGINE)
+ALGORITHM_LIST
+#undef X
+
+// ObsSpitzer dataset
+#define GENERATE_ALGORITHM_FOR_OBS_SPITZER(ALGO, ENGINE) \
+  BENCHMARK_OBS_SPITZER_COMPRESS(ALGO, ENGINE)           \
+  BENCHMARK_OBS_SPITZER_DECOMPRESS(ALGO, ENGINE)
+
+#define X(ALGO, ENGINE) GENERATE_ALGORITHM_FOR_OBS_SPITZER(ALGO, ENGINE)
+ALGORITHM_LIST
+#undef X
+
+// ObsTemp dataset
+#define GENERATE_ALGORITHM_FOR_OBS_TEMP(ALGO, ENGINE) \
+  BENCHMARK_OBS_TEMP_COMPRESS(ALGO, ENGINE)           \
+  BENCHMARK_OBS_TEMP_DECOMPRESS(ALGO, ENGINE)
+
+#define X(ALGO, ENGINE) GENERATE_ALGORITHM_FOR_OBS_TEMP(ALGO, ENGINE)
+ALGORITHM_LIST
+#undef X
+
+// MsgBt dataset
+#define GENERATE_ALGORITHM_FOR_MSG_BT(ALGO, ENGINE) \
+  BENCHMARK_MSG_BT_COMPRESS(ALGO, ENGINE)           \
+  BENCHMARK_MSG_BT_DECOMPRESS(ALGO, ENGINE)
+
+#define X(ALGO, ENGINE) GENERATE_ALGORITHM_FOR_MSG_BT(ALGO, ENGINE)
+ALGORITHM_LIST
+#undef X
+
+// MsgLu dataset
+#define GENERATE_ALGORITHM_FOR_MSG_LU(ALGO, ENGINE) \
+  BENCHMARK_MSG_LU_COMPRESS(ALGO, ENGINE)           \
+  BENCHMARK_MSG_LU_DECOMPRESS(ALGO, ENGINE)
+
+#define X(ALGO, ENGINE) GENERATE_ALGORITHM_FOR_MSG_LU(ALGO, ENGINE)
+ALGORITHM_LIST
+#undef X
+
+// MsgSp dataset
+#define GENERATE_ALGORITHM_FOR_MSG_SP(ALGO, ENGINE) \
+  BENCHMARK_MSG_SP_COMPRESS(ALGO, ENGINE)           \
+  BENCHMARK_MSG_SP_DECOMPRESS(ALGO, ENGINE)
+
+#define X(ALGO, ENGINE) GENERATE_ALGORITHM_FOR_MSG_SP(ALGO, ENGINE)
+ALGORITHM_LIST
+#undef X
+
+// MsgSppm dataset
+#define GENERATE_ALGORITHM_FOR_MSG_SPPM(ALGO, ENGINE) \
+  BENCHMARK_MSG_SPPM_COMPRESS(ALGO, ENGINE)           \
+  BENCHMARK_MSG_SPPM_DECOMPRESS(ALGO, ENGINE)
+
+#define X(ALGO, ENGINE) GENERATE_ALGORITHM_FOR_MSG_SPPM(ALGO, ENGINE)
+ALGORITHM_LIST
+#undef X
+
+// MsgSweep3d dataset
+#define GENERATE_ALGORITHM_FOR_MSG_SWEEP3D(ALGO, ENGINE) \
+  BENCHMARK_MSG_SWEEP3D_COMPRESS(ALGO, ENGINE)           \
+  BENCHMARK_MSG_SWEEP3D_DECOMPRESS(ALGO, ENGINE)
+
+#define X(ALGO, ENGINE) GENERATE_ALGORITHM_FOR_MSG_SWEEP3D(ALGO, ENGINE)
+ALGORITHM_LIST
+#undef X
+
+}  // namespace parquet
+
+BENCHMARK_MAIN();
diff --git a/cpp/src/parquet/encoding_benchmark.cc b/cpp/src/parquet/encoding_benchmark.cc
index bea1a5807a2..48ee0558567 100644
--- a/cpp/src/parquet/encoding_benchmark.cc
+++ b/cpp/src/parquet/encoding_benchmark.cc
@@ -661,6 +661,78 @@ BENCHMARK(BM_ByteStreamSplitEncode_Float_Neon)->Apply(ByteStreamSplitApply);
 BENCHMARK(BM_ByteStreamSplitEncode_Double_Neon)->Apply(ByteStreamSplitApply);
 #endif
 
+// ----------------------------------------------------------------------
+// ALP encoding/decoding benchmarks
+
+static void BM_AlpEncodingFloat(benchmark::State& state) {
+  std::vector<float> values(state.range(0), 64.0f);
+  auto encoder = MakeTypedEncoder<FloatType>(Encoding::ALP);
+  for (auto _ : state) {
+    encoder->Put(values.data(), static_cast<int>(values.size()));
+    encoder->FlushValues();
+  }
+  state.SetBytesProcessed(state.iterations() * state.range(0) * sizeof(float));
+  state.SetItemsProcessed(state.iterations() * state.range(0));
+}
+
+BENCHMARK(BM_AlpEncodingFloat)->Range(MIN_RANGE, MAX_RANGE);
+
+static void BM_AlpDecodingFloat(benchmark::State& state) {
+  std::vector<float> values(state.range(0), 64.0f);
+  auto encoder = MakeTypedEncoder<FloatType>(Encoding::ALP);
+  encoder->Put(values.data(), static_cast<int>(values.size()));
+  std::shared_ptr<Buffer> buf = encoder->FlushValues();
+
+  for (auto _ : state) {
+    auto decoder = MakeTypedDecoder<FloatType>(Encoding::ALP);
+    decoder->SetData(static_cast<int>(values.size()), buf->data(),
+                     static_cast<int>(buf->size()));
+    std::vector<float> output(values.size());
+    decoder->Decode(output.data(), static_cast<int>(values.size()));
+    benchmark::ClobberMemory();
+  }
+  state.SetBytesProcessed(state.iterations() * state.range(0) * sizeof(float));
+  state.SetItemsProcessed(state.iterations() * state.range(0));
+}
+
+BENCHMARK(BM_AlpDecodingFloat)->Range(MIN_RANGE, MAX_RANGE);
+
+static void BM_AlpEncodingDouble(benchmark::State& state) {
+  std::vector<double> values(state.range(0), 64.0);
+  auto encoder = MakeTypedEncoder<DoubleType>(Encoding::ALP);
+  for (auto _ : state) {
+    encoder->Put(values.data(), static_cast<int>(values.size()));
+    encoder->FlushValues();
+  }
+  state.SetBytesProcessed(state.iterations() * state.range(0) * sizeof(double));
+  state.SetItemsProcessed(state.iterations() * state.range(0));
+}
+
+BENCHMARK(BM_AlpEncodingDouble)->Range(MIN_RANGE, MAX_RANGE);
+
+static void BM_AlpDecodingDouble(benchmark::State& state) {
+  std::vector<double> values(state.range(0), 64.0);
+  auto encoder = MakeTypedEncoder<DoubleType>(Encoding::ALP);
+  encoder->Put(values.data(), static_cast<int>(values.size()));
+  std::shared_ptr<Buffer> buf = encoder->FlushValues();
+
+  for (auto _ : state) {
+    auto decoder = MakeTypedDecoder<DoubleType>(Encoding::ALP);
+    decoder->SetData(static_cast<int>(values.size()), buf->data(),
+                     static_cast<int>(buf->size()));
+    std::vector<double> output(values.size());
+    decoder->Decode(output.data(), static_cast<int>(values.size()));
+    benchmark::ClobberMemory();
+  }
+  state.SetBytesProcessed(state.iterations() * state.range(0) * sizeof(double));
+  state.SetItemsProcessed(state.iterations() * state.range(0));
+}
+
+BENCHMARK(BM_AlpDecodingDouble)->Range(MIN_RANGE, MAX_RANGE);
+
+// ----------------------------------------------------------------------
+// DeltaBitPacking encoding/decoding benchmarks
+
 template <typename DType>
 static auto MakeDeltaBitPackingInputFixed(size_t length) {
   using T = typename DType::c_type;
diff --git a/cpp/src/parquet/types.cc b/cpp/src/parquet/types.cc
index fb4eb92a754..575d7e65726 100644
--- a/cpp/src/parquet/types.cc
+++ b/cpp/src/parquet/types.cc
@@ -259,6 +259,8 @@ std::string EncodingToString(Encoding::type t) {
       return "RLE_DICTIONARY";
     case Encoding::BYTE_STREAM_SPLIT:
       return "BYTE_STREAM_SPLIT";
+    case Encoding::ALP:
+      return "ALP";
     default:
       return "UNKNOWN";
   }
diff --git a/cpp/src/parquet/types.h b/cpp/src/parquet/types.h
index 7e8a18fc94d..ef64aa7f323 100644
--- a/cpp/src/parquet/types.h
+++ b/cpp/src/parquet/types.h
@@ -538,8 +538,9 @@ struct Encoding {
     DELTA_BYTE_ARRAY = 7,
     RLE_DICTIONARY = 8,
     BYTE_STREAM_SPLIT = 9,
+    ALP = 10,
     // Should always be last element (except UNKNOWN)
-    UNDEFINED = 10,
+    UNDEFINED = 11,
     UNKNOWN = 999
   };
 };
diff --git a/cpp/submodules/parquet-testing b/cpp/submodules/parquet-testing
index a3d96a65e11..66dfde8b2a5 160000
--- a/cpp/submodules/parquet-testing
+++ b/cpp/submodules/parquet-testing
@@ -1 +1 @@
-Subproject commit a3d96a65e11e2bbca7d22a894e8313ede90a33a3
+Subproject commit 66dfde8b2a569e7cbc8e998153e8dd6f2b36f940