From 568165abc8d991f1916108b0ec8c47d681b7529b Mon Sep 17 00:00:00 2001
From: Johannes Vollmer <32042925+johannesvollmer@users.noreply.github.com>
Date: Sat, 7 Oct 2023 13:22:46 +0200
Subject: [PATCH 01/14] prototype loading chunks on demand (wip)

---
 examples/9_read_blocks_on_demand.rs |  87 ++++++++++++
 src/block/reader.rs                 | 208 ++++++++++++++++++++++++----
 src/image/read/levels.rs            |   2 +-
 src/image/read/specific_channels.rs |  53 +++++--
 src/io.rs                           |  12 +-
 src/meta/attribute.rs               |  26 +++-
 src/meta/header.rs                  |  19 ++-
 7 files changed, 350 insertions(+), 57 deletions(-)
 create mode 100644 examples/9_read_blocks_on_demand.rs

diff --git a/examples/9_read_blocks_on_demand.rs b/examples/9_read_blocks_on_demand.rs
new file mode 100644
index 00000000..7f771899
--- /dev/null
+++ b/examples/9_read_blocks_on_demand.rs
@@ -0,0 +1,87 @@
+
+extern crate rand;
+extern crate half;
+
+
+// exr imports
+extern crate exr;
+
+use std::collections::HashMap;
+use std::fs::File;
+use std::io::BufReader;
+use exr::block::UncompressedBlock;
+use exr::image::read::specific_channels::{read_specific_channels, RecursivePixelReader};
+use exr::prelude::{IntegerBounds, ReadSpecificChannel};
+
+/// Load only some specific pixel sections from the file, just when they are needed.
+fn main() {
+    let header_index = 0; // only load pixels from the first header
+    let mip_level = (0, 0); // only load largest mip map
+
+    /// for this example, we use a hashmap instead of a real sparse texture.
+    /// it stores blocks of rgba pixels, indexed by the position of the block (usize, usize)
+    let mut my_sparse_texture: HashMap<(usize, usize), Vec<[f32; 4]>> = Default::default();
+
+    let file = BufReader::new(
+        File::open("3GB.exr")
+            .expect("run example `7_write_raw_blocks` to generate this image file")
+    );
+
+    // initializes a lazy decoder (reads meta data immediately)
+    let mut chunk_reader = exr::block::read(file, true).unwrap()
+        .on_demand_chunks().unwrap();
+
+    // this object can decode packed exr blocks to simple rgb
+    let mut rgb_from_block_extractor = read_specific_channels()
+            .required("R").required("G").required("B")
+            .optional("A", 1.0)
+            .create_recursive_reader(&chunk_reader.header(header_index).channels).unwrap();
+
+    // later in your app, maybe when the view changed:
+    when_new_pixel_section_must_be_loaded(move |pixel_section| {
+
+        // todo: only load blocks that are not loaded yet. maybe an additional filter? or replace this with a more modular filtering architecture?
+        let compressed_chunks = chunk_reader
+            .load_all_chunks_for_display_space_section(header_index, mip_level, pixel_section);
+
+        // this could be done in parallel, e.g. by using rayon par_iter
+        // we use .flatten(), this simply discards all errors and only continues with the successfully loaded chunks
+        let packed_pixel_blocks = compressed_chunks.flatten()
+            .map(|chunk| UncompressedBlock::decompress_chunk(chunk, chunk_reader.meta_data(), chunk_reader.pedantic()))
+            .flatten();
+
+        // the exr blocks may contain arbitrary channels, but we are only interested in rgba.
+        // so we convert each exr block to an rgba block (vec of [f32; 4])
+        let rgba_blocks = packed_pixel_blocks.map(|block| {
+            let header = &chunk_reader.meta_data().headers[block.index.layer];
+
+            let position = block.index.pixel_position;
+            let size = block.index.pixel_size;
+            let rgba_buffer = vec![[0.0; 4], size.area()]; // rgba = 4 floats
+
+            // decode individual pixels into our f32 buffer
+            // automatically converts f16 samples to f32 if required
+            // ignores all other channel data
+            rgb_from_block_extractor.read_block_pixels(header, block, |position, (r,g,b,a)|{
+                rgba_buffer[position.flat_index_for_size(size)] = [r,g,b,a];
+            });
+
+            (position.into(), rgba_buffer)
+        });
+
+        for (position, block) in rgba_blocks {
+            my_sparse_texture.insert(position, block);
+        }
+    })
+}
+
+/// request to load a specific sub-rect into view
+/// (loads a single view once, as this is a stub implementation)
+fn when_new_pixel_section_must_be_loaded(mut load_for_view: impl FnMut(IntegerBounds)){
+    let image_sub_section = IntegerBounds::new(
+        (800, 800), // position
+        (600, 600) // size
+    );
+
+    load_for_view(image_sub_section);
+}
\ No newline at end of file
diff --git a/src/block/reader.rs b/src/block/reader.rs
index bb9888ed..71c0646e 100644
--- a/src/block/reader.rs
+++ b/src/block/reader.rs
@@ -13,13 +13,16 @@ use crate::block::chunk::{Chunk, TileCoordinates};
 use crate::compression::Compression;
 use crate::error::{Error, Result, u64_to_usize, UnitResult};
 use crate::io::{PeekRead, Tracking};
-use crate::meta::{MetaData, OffsetTables};
+use crate::math::Vec2;
+use crate::meta::{MetaData, OffsetTables, TileIndices};
 use crate::meta::header::Header;
+use crate::prelude::{IntegerBounds, ReadSpecificChannel};
 
 /// Decode the meta data from a byte source, keeping the source ready for further reading.
 /// Continue decoding the remaining bytes by calling `filtered_chunks` or `all_chunks`.
 #[derive(Debug)]
 pub struct Reader<R> {
+    pedantic: bool,
     meta_data: MetaData,
     remaining_reader: PeekRead<Tracking<R>>, // TODO does R need to be Seek or is Tracking enough?
 }
@@ -32,7 +35,7 @@ impl<R: Read + Seek> Reader<R> {
     pub fn read_from_buffered(read: R, pedantic: bool) -> Result<Self> {
         let mut remaining_reader = PeekRead::new(Tracking::new(read));
         let meta_data = MetaData::read_validated_from_buffered_peekable(&mut remaining_reader, pedantic)?;
-        Ok(Self { meta_data, remaining_reader })
+        Ok(Self { meta_data, remaining_reader, pedantic })
     }
 
     // must not be mutable, as reading the file later on relies on the meta data
@@ -45,12 +48,20 @@ impl<R: Read + Seek> Reader<R> {
     /// Obtain the meta data ownership.
     pub fn into_meta_data(self) -> MetaData { self.meta_data }
 
+    /// Obtain the loaded meta data and the source bytes,
+    /// with the current seek position right after the meta data.
+    pub fn deconstruct(self) -> (MetaData, PeekRead<Tracking<R>>) {
+        let Self { meta_data, remaining_reader, .. } = self;
+        (meta_data, remaining_reader)
+    }
+
     /// Prepare to read all the chunks from the file.
     /// Does not decode the chunks now, but returns a decoder.
     /// Reading all chunks reduces seeking the file, but some chunks might be read without being used.
-    pub fn all_chunks(mut self, pedantic: bool) -> Result<AllChunksReader<R>> {
+    /// This is pedantic, if the reader was constructed with the pedantic flag.
+    pub fn all_chunks(mut self) -> Result<AllChunksReader<R>> {
         let total_chunk_count = {
-            if pedantic {
+            if self.pedantic {
                 let offset_tables = MetaData::read_offset_tables(&mut self.remaining_reader, &self.meta_data.headers)?;
                 validate_offset_tables(self.meta_data.headers.as_slice(), &offset_tables, self.remaining_reader.byte_position())?;
                 offset_tables.iter().map(|table| table.len()).sum()
@@ -72,12 +83,13 @@ impl<R: Read + Seek> Reader<R> {
     /// Prepare to read some the chunks from the file.
     /// Does not decode the chunks now, but returns a decoder.
     /// Reading only some chunks may seeking the file, potentially skipping many bytes.
+    /// This is pedantic, if the reader was constructed with the pedantic flag.
     // TODO tile indices add no new information to block index??
-    pub fn filter_chunks(mut self, pedantic: bool, mut filter: impl FnMut(&MetaData, TileCoordinates, BlockIndex) -> bool) -> Result<FilteredChunksReader<R>> {
+    pub fn filter_chunks(mut self, mut filter: impl FnMut(&MetaData, TileCoordinates, BlockIndex) -> bool) -> Result<FilteredChunksReader<R>> {
         let offset_tables = MetaData::read_offset_tables(&mut self.remaining_reader, &self.meta_data.headers)?;
 
         // TODO regardless of pedantic, if invalid, read all chunks instead, and filter after reading each chunk?
-        if pedantic {
+        if self.pedantic {
             validate_offset_tables(
                 self.meta_data.headers.as_slice(), &offset_tables,
                 self.remaining_reader.byte_position()
@@ -109,7 +121,7 @@ impl<R: Read + Seek> Reader<R> {
 
         filtered_offsets.sort_unstable(); // enables reading continuously if possible (already sorted where line order increasing)
 
-        if pedantic {
+        if self.pedantic {
             // table is sorted. if any two neighbours are equal, we have duplicates. this is invalid.
             if filtered_offsets.windows(2).any(|pair| pair[0] == pair[1]) {
                 return Err(Error::invalid("chunk offset table"))
@@ -120,20 +132,38 @@ impl<R: Read + Seek> Reader<R> {
             meta_data: self.meta_data,
             expected_filtered_chunk_count: filtered_offsets.len(),
             remaining_filtered_chunk_indices: filtered_offsets.into_iter(),
-            remaining_bytes: self.remaining_reader
+            remaining_bytes: self.remaining_reader,
+            pedantic: self.pedantic
+        })
+    }
+
+    /// Prepare to load individual chunks only when requested.
+    /// Does not decode any pixels just yet.
+    /// Seeks the file to load specific pixels.
+    pub fn on_demand_chunks(mut self) -> Result<OnDemandChunksReader<R>> {
+        let offset_tables = MetaData::read_offset_tables(&mut self.remaining_reader, &self.meta_data.headers)?;
+
+        if self.pedantic {
+            validate_offset_tables(self.meta_data.headers.as_slice(), &offset_tables, self.remaining_reader.byte_position())?;
+        }
+
+        Ok(OnDemandChunksReader {
+            offset_tables, pedantic: self.pedantic,
+            seekable_bytes: self.remaining_reader,
+            meta_data: self.meta_data,
         })
     }
 }
 
 
-fn validate_offset_tables(headers: &[Header], offset_tables: &OffsetTables, chunks_start_byte: usize) -> UnitResult {
-    let max_pixel_bytes: usize = headers.iter() // when compressed, chunks are smaller, but never larger than max
-        .map(|header| header.max_pixel_file_bytes())
+fn validate_offset_tables(headers: &[Header], offset_tables: &OffsetTables, chunks_start_byte: u64) -> UnitResult {
+    let max_pixel_bytes: u64 = headers.iter() // when compressed, chunks are smaller, but never larger than max
+        .map(|header| header.max_pixel_file_bytes().into())
         .sum();
 
     // check that each offset is within the bounds
     let end_byte = chunks_start_byte + max_pixel_bytes;
-    let is_invalid = offset_tables.iter().flatten().map(|&u64| u64_to_usize(u64))
+    let is_invalid = offset_tables.iter().flatten().copied()
         .any(|chunk_start| chunk_start < chunks_start_byte || chunk_start > end_byte);
 
     if is_invalid { Err(Error::invalid("offset table")) }
@@ -154,6 +184,7 @@ pub struct FilteredChunksReader<R> {
     expected_filtered_chunk_count: usize,
     remaining_filtered_chunk_indices: std::vec::IntoIter<u64>,
     remaining_bytes: PeekRead<Tracking<R>>,
+    pedantic: bool,
 }
 
 /// Decode all chunks in the file without seeking.
@@ -169,8 +200,18 @@ pub struct AllChunksReader<R> {
     pedantic: bool,
 }
 
-/// Decode chunks in the file without seeking.
-/// Calls the supplied closure for each chunk.
+/// Decode individual chunks only when requested specifically, by seeking within the file.
+/// Also contains the image meta data.
+#[derive(Debug)]
+pub struct OnDemandChunksReader<R> {
+    meta_data: MetaData,
+    offset_tables: OffsetTables,
+    seekable_bytes: PeekRead<Tracking<R>>,
+    pedantic: bool,
+}
+
+/// While decoding chunks,
+/// calls the supplied closure for each chunk.
 /// The decoded chunks can be decompressed by calling
 /// `decompress_parallel`, `decompress_sequential`, or `sequential_decompressor`.
 /// Also contains the image meta data.
@@ -194,6 +235,9 @@ pub trait ChunksReader: Sized + Iterator<Item=Result<Chunk>> + ExactSizeIterator
     /// The decoded exr headers from the file.
     fn headers(&self) -> &[Header] { &self.meta_data().headers }
 
+    /// Whether to abort the file at the slightest hint of corruption.
+    fn pedantic(&self) -> bool;
+
     /// The number of chunks that this reader will return in total.
     /// Can be less than the total number of chunks in the file, if some chunks are skipped.
     fn expected_chunk_count(&self) -> usize;
@@ -217,12 +261,12 @@ pub trait ChunksReader: Sized + Iterator<Item=Result<Chunk>> + ExactSizeIterator
     /// Will fallback to sequential processing where threads are not available, or where it would not speed up the process.
     // FIXME try async + futures instead of rayon! Maybe even allows for external async decoding? (-> impl Stream<UncompressedBlock>)
     fn decompress_parallel(
-        self, pedantic: bool,
+        self,
         mut insert_block: impl FnMut(&MetaData, UncompressedBlock) -> UnitResult
     ) -> UnitResult
     {
-        let mut decompressor = match self.parallel_decompressor(pedantic) {
-            Err(old_self) => return old_self.decompress_sequential(pedantic, insert_block),
+        let mut decompressor = match self.parallel_decompressor() {
+            Err(old_self) => return old_self.decompress_sequential(insert_block),
             Ok(decompressor) => decompressor,
         };
 
@@ -239,18 +283,17 @@ pub trait ChunksReader: Sized + Iterator<Item=Result<Chunk>> + ExactSizeIterator
     /// Use `ParallelBlockDecompressor::new` if you want to use your own thread pool.
     /// By default, this uses as many threads as there are CPUs.
     /// Returns the `self` if there is no need for parallel decompression.
-    fn parallel_decompressor(self, pedantic: bool) -> std::result::Result<ParallelBlockDecompressor<Self>, Self> {
-        ParallelBlockDecompressor::new(self, pedantic)
+    fn parallel_decompressor(self) -> std::result::Result<ParallelBlockDecompressor<Self>, Self> {
+        ParallelBlockDecompressor::new(self, self.pedantic())
     }
 
     /// Return an iterator that decompresses the chunks in this thread.
     /// You can alternatively use `sequential_decompressor` if you prefer an external iterator.
     fn decompress_sequential(
-        self, pedantic: bool,
-        mut insert_block: impl FnMut(&MetaData, UncompressedBlock) -> UnitResult
+        self, mut insert_block: impl FnMut(&MetaData, UncompressedBlock) -> UnitResult
     ) -> UnitResult
     {
-        let mut decompressor = self.sequential_decompressor(pedantic);
+        let mut decompressor = self.sequential_decompressor();
         while let Some(block) = decompressor.next() {
             insert_block(decompressor.meta_data(), block?)?;
         }
@@ -260,13 +303,14 @@ pub trait ChunksReader: Sized + Iterator<Item=Result<Chunk>> + ExactSizeIterator
     }
 
     /// Prepare reading the chunks sequentially, only a single thread, but with less memory overhead.
-    fn sequential_decompressor(self, pedantic: bool) -> SequentialBlockDecompressor<Self> {
-        SequentialBlockDecompressor { remaining_chunks_reader: self, pedantic }
+    fn sequential_decompressor(self) -> SequentialBlockDecompressor<Self> {
+        SequentialBlockDecompressor { remaining_chunks_reader: self, pedantic: self.pedantic() }
     }
 }
 
 impl<R, F> ChunksReader for OnProgressChunksReader<R, F> where R: ChunksReader, F: FnMut(f64) {
     fn meta_data(&self) -> &MetaData { self.chunks_reader.meta_data() }
+    fn pedantic(&self) -> bool { self.chunks_reader.pedantic() }
     fn expected_chunk_count(&self) -> usize { self.chunks_reader.expected_chunk_count() }
 }
 
@@ -304,6 +348,7 @@ impl<R, F> Iterator for OnProgressChunksReader<R, F> where R: ChunksReader, F: F
 
 impl<R: Read + Seek> ChunksReader for AllChunksReader<R> {
     fn meta_data(&self) -> &MetaData { &self.meta_data }
+    fn pedantic(&self) -> bool { self.pedantic }
     fn expected_chunk_count(&self) -> usize { self.remaining_chunks.end }
 }
 
@@ -331,6 +376,7 @@ impl<R: Read + Seek> Iterator for AllChunksReader<R> {
 
 impl<R: Read + Seek> ChunksReader for FilteredChunksReader<R> {
     fn meta_data(&self) -> &MetaData { &self.meta_data }
+    fn pedantic(&self) -> bool { self.pedantic }
     fn expected_chunk_count(&self) -> usize { self.expected_filtered_chunk_count }
 }
 
@@ -341,9 +387,9 @@ impl<R: Read + Seek> Iterator for FilteredChunksReader<R> {
     fn next(&mut self) -> Option<Self::Item> {
         // read as many chunks as we have desired chunk offsets
         self.remaining_filtered_chunk_indices.next().map(|next_chunk_location|{
-            self.remaining_bytes.skip_to( // no-op for seek at current position, uses skip_bytes for small amounts
-                                          usize::try_from(next_chunk_location)
-                                              .expect("too large chunk position for this machine")
+            self.remaining_bytes.skip_to(
+                // no-op for seek at current position, uses skip_bytes for small amounts
+                next_chunk_location
             )?;
 
             let meta_data = &self.meta_data;
@@ -523,5 +569,113 @@ impl<R: ChunksReader> Iterator for ParallelBlockDecompressor<R> {
 
 
 
+impl<R: Read + Seek> OnDemandChunksReader<R> {
+
+    pub fn meta_data(&self) -> &MetaData { &self.meta_data }
+
+    pub fn header(&self, header_index: usize) -> &Header { &self.meta_data().headers[header_index] }
+
+    /*pub fn specific_channels_block_decoder<Read>(&self, header_index: usize, reader: Read)
+        -> Read::RecursivePixelReader
+        where Read: ReadSpecificChannel
+    {
+        reader.create_recursive_reader(&self.header(header_index).channels)
+    }*/
+
+    pub fn load_all_chunks_for_display_space_section(
+        &mut self, header_index: usize, level: impl Into<Vec2<usize>>, display_window_section: IntegerBounds
+    ) -> impl '_ + Iterator<Item = Result<Chunk>>
+    {
+        self.load_chunks_for_blocks(|tile_index, block_index|{
+            if block_index.layer != header_index || block_index.level != level {
+                return false
+            }
+
+            let header = &self.meta_data.headers[block_index.layer];
+            let block_in_display_window = header
+                .get_block_display_window_pixel_coordinates(tile_index.location)
+                .expect("invalid tile index");
+
+            let should_load_block = display_window_section.intersects(block_in_display_window);
+            should_load_block
+        })
+    }
+
+    pub fn load_all_chunks_for_layer_space_section(
+        &mut self, header_index: usize, level: impl Into<Vec2<usize>>, data_window_section: IntegerBounds
+    ) -> impl '_ + Iterator<Item = Result<Chunk>>
+    {
+        self.load_chunks_for_blocks(|tile_index, block_index|{
+            if block_index.layer != header_index || block_index.level != level {
+                return false
+            }
+
+            let block_section = IntegerBounds::new(block_index.pixel_position, block_index.pixel_size);
+            let should_load_block = data_window_section.intersects(block_section);
+            should_load_block
+        })
+    }
+
+    /// Returned order is arbitrary (optimized for speed).
+    pub fn load_chunks_for_blocks(&mut self, filter_blocks: impl Fn(TileIndices, BlockIndex) -> bool) -> impl '_ + Iterator<Item = Result<Chunk>> {
+        let chunks_indices = self.find_seek_positions_for_blocks(filter_blocks).collect();
+        self.load_chunks(chunks_indices)
+    }
+
+    /// Computes which chunks to seek to in the file, based on the specified predicate.
+    /// Iterator returns block indices in increasing-y order.
+    pub fn find_seek_positions_for_blocks(&self, filter_blocks: impl Fn(TileIndices, BlockIndex) -> bool) -> impl '_ + Iterator<Item=u64> {
+        debug_assert_eq!(self.meta_data.headers.len(), self.offset_tables.len());
+
+        self.meta_data.headers.iter().zip(&self.offset_tables).enumerate()
+            .flat_map(move |(header_index, (header, offset_table))| {
+                debug_assert_eq!(header.chunk_count, offset_table.len());
+
+                header.blocks_increasing_y_order().zip(offset_table) // todo: this iter allocates, save it in the reader later
+                    .filter(move |(tile_coordinates, &seek_position)|{
+
+                        // TODO this algorithm should not now whether we need to make coordinates absolute?
+                        // deduplicate with block::UncompressedBlock::decompress_chunk()?
+                        let absolute_indices = header.get_absolute_block_pixel_coordinates(tile_coordinates.location)?;
+                        let absolute_position = absolute_indices.position.to_usize("coordinate calculation bug").unwrap();
+
+                        filter_blocks(*tile_coordinates, BlockIndex {
+                            layer: header_index,
+                            pixel_position: absolute_position,
+                            pixel_size: tile_coordinates.size,
+                            level: tile_coordinates.location.level_index,
+                        })
+                    })
+                    .map(|(_, chunk_byte_position)| chunk_byte_position)
+            })
+    }
+
+    /*pub fn find_seek_position_for_block(&self, layer_index: usize, filter_blocks: impl Fn(TileIndices) -> bool) -> impl Iterator<> {
+        let header = &self.meta_data.headers[layer_index];
+
+        // TODO: directly compute the block index based on mip level and resolution??
+        let increasing_y_block_index_in_header = header.blocks_increasing_y_order()
+            .position(filter_blocks); // todo: this is a vec internally, save it in the reader and look it up at this point
+
+        let offset_table = &self.offset_tables[layer_index];
+        offset_table[increasing_y_block_index_in_header]
+    }*/
+
+
+    /// Reads the specified chunks by seeking the file. In the order as they appear in the file, so it might be arbitrary.
+    pub fn load_chunks(&mut self, mut chunks: Vec<u64>) -> impl '_ + Iterator<Item = Result<Chunk>> {
+        // sorting the file access should improve read performance, especially on HDDs
+        // since seeking can be skipped for blocks that are stored right after another in the file
+        chunks.sort_unstable();
+        chunks.into_iter().map(move |seek| self.load_chunk(seek))
+    }
+
+    /// Reads one individual chunk from the byte source by seeking.
+    pub fn load_chunk(&mut self, block_seek_position: u64) -> Result<Chunk> {
+        self.seekable_bytes.skip_to(block_seek_position)?;
+        Chunk::read(&mut self.seekable_bytes, &self.meta_data)
+    }
+}
+
 
 
diff --git a/src/image/read/levels.rs b/src/image/read/levels.rs
index 5705903c..09703a4c 100644
--- a/src/image/read/levels.rs
+++ b/src/image/read/levels.rs
@@ -93,7 +93,7 @@ impl<DeepOrFlatSamples> ReadLargestLevel<DeepOrFlatSamples> {
     ///
     /// Throws an error for images with deep data or subsampling.
     pub fn specific_channels(self) -> ReadZeroChannels {
-        ReadZeroChannels { }
+        read_specific_channels()
     }
 }
 
diff --git a/src/image/read/specific_channels.rs b/src/image/read/specific_channels.rs
index 375691c4..28c94e07 100644
--- a/src/image/read/specific_channels.rs
+++ b/src/image/read/specific_channels.rs
@@ -88,6 +88,29 @@ pub trait RecursivePixelReader {
         &self, bytes: &'s[u8], pixels: &mut [FullPixel],
         get_pixel: impl Fn(&mut FullPixel) -> &mut Self::RecursivePixel
     );
+
+    // TODO dedup with SpecificChannelsReader::read_block(..)?
+    /// Note: The (x,y) coordinates are in block space. You will have to add `block.index.pixel_position` for the pixel position in the layer.
+    fn read_block_pixels(
+        &self, header: &Header, block: UncompressedBlock,
+        mut set_pixel: impl FnMut(Vec2<usize>, <Self::RecursivePixel as IntoNonRecursive>::NonRecursive)
+    )
+        where Self::RecursivePixel: IntoNonRecursive
+    {
+        let mut one_line_of_recursive_pixels = vec![Self::RecursivePixel::default(); block.index.pixel_size.width()];
+
+        let byte_lines = block.data.chunks_exact(header.channels.bytes_per_pixel * block.index.pixel_size.width());
+        debug_assert_eq!(byte_lines.len(), block.index.pixel_size.height(), "invalid block lines split");
+
+        for (y_offset, line_bytes) in byte_lines.enumerate() { // TODO sampling
+            // this two-step copy method should be very cache friendly in theory, and also reduce sample_type lookup count
+            self.read_pixels(line_bytes, &mut one_line_of_recursive_pixels, |px| px);
+
+            for (x_offset, recursive_pixel) in one_line_of_recursive_pixels.iter().enumerate() {
+                set_pixel(Vec2(x_offset, y_offset), recursive_pixel.into_tuple());
+            }
+        }
+    }
 }
 
 // does not use the generic `Recursive` struct to reduce the number of angle brackets in the public api
@@ -185,20 +208,13 @@ ChannelsReader for SpecificChannelsReader<PixelStorage, SetPixel, PxReader, Pixe
     fn filter_block(&self, tile: TileCoordinates) -> bool { tile.is_largest_resolution_level() } // TODO all levels
 
     fn read_block(&mut self, header: &Header, block: UncompressedBlock) -> UnitResult {
-        let mut pixels = vec![PxReader::RecursivePixel::default(); block.index.pixel_size.width()]; // TODO allocate once in self
+        let (storage, set_pixel, reader) = (&mut self.pixel_storage, &mut self.set_pixel, &self.pixel_reader);
+        let block_position = block.index.pixel_position;
 
-        let byte_lines = block.data.chunks_exact(header.channels.bytes_per_pixel * block.index.pixel_size.width());
-        debug_assert_eq!(byte_lines.len(), block.index.pixel_size.height(), "invalid block lines split");
-
-        for (y_offset, line_bytes) in byte_lines.enumerate() { // TODO sampling
-            // this two-step copy method should be very cache friendly in theory, and also reduce sample_type lookup count
-            self.pixel_reader.read_pixels(line_bytes, &mut pixels, |px| px);
-
-            for (x_offset, pixel) in pixels.iter().enumerate() {
-                let set_pixel = &self.set_pixel;
-                set_pixel(&mut self.pixel_storage, block.index.pixel_position + Vec2(x_offset, y_offset), pixel.into_tuple());
-            }
-        }
+        reader.read_block_pixels(
+            header, block,
+            |pos, px| set_pixel(storage, pos + block_position, px)
+        );
 
         Ok(())
     }
@@ -213,6 +229,16 @@ ChannelsReader for SpecificChannelsReader<PixelStorage, SetPixel, PxReader, Pixe
 /// to read as many channels as desired.
 pub type ReadZeroChannels = NoneMore;
 
+/// Read only layers that contain the specified channels, skipping any other channels in the layer.
+/// Further specify which channels should be included by calling `.required("ChannelName")`
+/// or `.optional("ChannelName", default_value)` on the result of this function.
+/// Call `collect_pixels` afterwards to define the pixel container for your set of channels.
+///
+/// Throws an error for images with deep data or subsampling.
+pub fn read_specific_channels() -> ReadZeroChannels {
+    ReadZeroChannels { }
+}
+
 impl ReadSpecificChannel for NoneMore {
     type RecursivePixelReader = NoneMore;
     fn create_recursive_reader(&self, _: &ChannelList) -> Result<Self::RecursivePixelReader> { Ok(NoneMore) }
@@ -258,6 +284,7 @@ impl<Sample, ReadChannels> ReadSpecificChannel for ReadRequiredChannel<ReadChann
     }
 }
 
+
 /// Reader for a single channel. Generic over the concrete sample type (f16, f32, u32).
 #[derive(Clone, Debug)]
 pub struct SampleReader<Sample> {
diff --git a/src/io.rs b/src/io.rs
index 1fb863b3..cf9e2faa 100644
--- a/src/io.rs
+++ b/src/io.rs
@@ -156,7 +156,7 @@ impl<T: Read + Seek> PeekRead<Tracking<T>> {
 
     /// Seek this read to the specified byte position.
     /// Discards any previously peeked value.
-    pub fn skip_to(&mut self, position: usize) -> std::io::Result<()> {
+    pub fn skip_to(&mut self, position: u64) -> std::io::Result<()> {
         self.inner.seek_read_to(position)?;
         self.peeked = None;
         Ok(())
@@ -166,7 +166,7 @@ impl<T: Read + Seek> PeekRead<Tracking<T>> {
 impl<T: Read> PeekRead<Tracking<T>> {
 
     /// Current number of bytes read.
-    pub fn byte_position(&self) -> usize {
+    pub fn byte_position(&self) -> u64 {
         self.inner.byte_position()
     }
 }
@@ -179,7 +179,7 @@ pub struct Tracking<T> {
     /// Do not expose to prevent seeking without updating position
     inner: T,
 
-    position: usize,
+    position: u64,
 }
 
 impl<T: Read> Read for Tracking<T> {
@@ -211,7 +211,7 @@ impl<T> Tracking<T> {
     }
 
     /// Current number of bytes written or read.
-    pub fn byte_position(&self) -> usize {
+    pub fn byte_position(&self) -> u64 {
         self.position
     }
 }
@@ -220,7 +220,7 @@ impl<T: Read + Seek> Tracking<T> {
 
     /// Set the reader to the specified byte position.
     /// If it is only a couple of bytes, no seek system call is performed.
-    pub fn seek_read_to(&mut self, target_position: usize) -> std::io::Result<()> {
+    pub fn seek_read_to(&mut self, target_position: u64) -> std::io::Result<()> {
         let delta = target_position as i128 - self.position as i128; // FIXME  panicked at 'attempt to subtract with overflow'
         debug_assert!(delta.abs() < usize::MAX as i128);
 
@@ -241,7 +241,7 @@ impl<T: Write + Seek> Tracking<T> {
 
     /// Move the writing cursor to the specified target byte index.
     /// If seeking forward, this will write zeroes.
-    pub fn seek_write_to(&mut self, target_position: usize) -> std::io::Result<()> {
+    pub fn seek_write_to(&mut self, target_position: u64) -> std::io::Result<()> {
         if target_position < self.position {
             self.inner.seek(SeekFrom::Start(u64::try_from(target_position).unwrap()))?;
         }
diff --git a/src/meta/attribute.rs b/src/meta/attribute.rs
index 5b71e825..e8e67ca2 100644
--- a/src/meta/attribute.rs
+++ b/src/meta/attribute.rs
@@ -899,10 +899,19 @@ impl IntegerBounds {
 
     /// Returns whether the specified rectangle is equal to or inside this rectangle.
     pub fn contains(self, subset: Self) -> bool {
-           subset.position.x() >= self.position.x()
-        && subset.position.y() >= self.position.y()
-        && subset.end().x() <= self.end().x()
-        && subset.end().y() <= self.end().y()
+        subset.position.x() >= self.position.x()
+            && subset.position.y() >= self.position.y()
+            && subset.end().x() <= self.end().x()
+            && subset.end().y() <= self.end().y()
+    }
+
+    /// Returns whether the specified rectangle touches this rectangle.
+    pub fn intersects(self, other: Self) -> bool {
+        // https://stackoverflow.com/questions/2752349/fast-rectangle-to-rectangle-intersection
+        other.position.x() <= self.end().x()
+            && self.position.x() <= other.end().x()
+            && other.position.y() <= self.end().y()
+            && self.position.y() <= other.end().y()
     }
 }
 
@@ -2223,4 +2232,13 @@ mod test {
         }
     }
 
+    #[test]
+    fn rectangle_intersect(){
+        unimplemented!()
+    }
+
+    #[test]
+    fn rectangle_contains(){
+        unimplemented!()
+    }
 }
diff --git a/src/meta/header.rs b/src/meta/header.rs
index b322b18f..730df973 100644
--- a/src/meta/header.rs
+++ b/src/meta/header.rs
@@ -484,10 +484,17 @@ impl Header {
         }
     }
 
-    /// Calculate the position of a block in the global infinite 2D space of a file. May be negative.
-    pub fn get_block_data_window_pixel_coordinates(&self, tile: TileCoordinates) -> Result<IntegerBounds> {
-        let data = self.get_absolute_block_pixel_coordinates(tile)?;
-        Ok(data.with_origin(self.own_attributes.layer_position))
+    /// Calculate the position of a block in the global infinite 2D space of a file (the display space).
+    /// Position may be negative.
+    pub fn get_block_display_window_pixel_coordinates(&self, tile: TileCoordinates) -> Result<IntegerBounds> {
+        let data_coords = self.get_absolute_block_pixel_coordinates(tile)?;
+        Ok(self.get_display_window_from_data_window(data_coords))
+    }
+
+    /// Transform a data-space section to the display-space section (the global infinite 2D space of a file).
+    /// Position may be negative.
+    pub fn get_display_window_from_data_window(&self, data_window_pixels: IntegerBounds) -> IntegerBounds {
+        data_window_pixels.with_origin(self.own_attributes.layer_position)
     }
 
     /// Calculate the pixel index rectangle inside this header. Is not negative. Starts at `0`.
@@ -606,9 +613,9 @@ impl Header {
     }
 
     /// Approximates the maximum number of bytes that the pixels of this header will consume in a file.
-    /// Due to compression, the actual byte size may be smaller.
+    /// Due to compression, the actual file byte size may be smaller.
     pub fn max_pixel_file_bytes(&self) -> usize {
-        assert!(!self.deep);
+        assert!(!self.deep, "deep data not supported yet");
 
         self.chunk_count * 64 // at most 64 bytes overhead for each chunk (header index, tile description, chunk size, and more)
             + self.total_pixel_bytes()

From 642e3e7a5262e6c1b519464867a1aa79a62c0fb4 Mon Sep 17 00:00:00 2001
From: Johannes Vollmer <32042925+johannesvollmer@users.noreply.github.com>
Date: Sat, 7 Oct 2023 14:07:21 +0200
Subject: [PATCH 02/14] fix some compile time errors

---
 examples/8_read_raw_blocks.rs       |  4 +-
 examples/9_read_blocks_on_demand.rs | 14 +++---
 src/block/chunk.rs                  |  6 +--
 src/block/reader.rs                 | 67 +++++++++++++++++------------
 src/block/writer.rs                 |  8 ++--
 src/image/read/image.rs             |  8 ++--
 src/image/read/specific_channels.rs |  6 +--
 src/io.rs                           | 12 +++---
 8 files changed, 71 insertions(+), 54 deletions(-)

diff --git a/examples/8_read_raw_blocks.rs b/examples/8_read_raw_blocks.rs
index 4864c98a..410732fa 100644
--- a/examples/8_read_raw_blocks.rs
+++ b/examples/8_read_raw_blocks.rs
@@ -80,7 +80,7 @@ fn main() {
     let reader = reader
 
         // do not worry about multi-resolution levels or deep data
-        .filter_chunks(true, |meta_data, tile, block| {
+        .filter_chunks(|meta_data, tile, block| {
             let header = &meta_data.headers[block.layer];
             !header.deep && tile.is_largest_resolution_level()
         }).unwrap()
@@ -94,7 +94,7 @@ fn main() {
         });
 
     // read all pixel blocks from the image, decompressing in parallel
-    reader.decompress_parallel(true, |meta_data, block|{
+    reader.decompress_parallel(|meta_data, block|{
         let header = &meta_data.headers[block.index.layer];
 
         // collect all pixel values from the pixel block
diff --git a/examples/9_read_blocks_on_demand.rs b/examples/9_read_blocks_on_demand.rs
index 7f771899..c47fd3ed 100644
--- a/examples/9_read_blocks_on_demand.rs
+++ b/examples/9_read_blocks_on_demand.rs
@@ -9,6 +9,7 @@ extern crate exr;
 use std::collections::HashMap;
 use std::fs::File;
 use std::io::BufReader;
+use exr::block::chunk::Chunk;
 use exr::block::UncompressedBlock;
 use exr::image::read::specific_channels::{read_specific_channels, RecursivePixelReader};
 use exr::prelude::{IntegerBounds, ReadSpecificChannel};
@@ -42,12 +43,15 @@ fn main() {
 
         // todo: only load blocks that are not loaded yet. maybe an additional filter? or replace this with a more modular filtering architecture?
         let compressed_chunks = chunk_reader
-            .load_all_chunks_for_display_space_section(header_index, mip_level, pixel_section);
+            .load_all_chunks_for_display_space_section(header_index, mip_level, pixel_section)
+
+            // we use .flatten(), this simply discards all errors and only continues with the successfully loaded chunks
+            // we collect here due to borrowing meta data
+            .flatten().collect::<Vec<Chunk>>();
 
         // this could be done in parallel, e.g. by using rayon par_iter
-        // we use .flatten(), this simply discards all errors and only continues with the successfully loaded chunks
-        let packed_pixel_blocks = compressed_chunks.flatten()
-            .map(|chunk| UncompressedBlock::decompress_chunk(chunk, chunk_reader.meta_data(), chunk_reader.pedantic()))
+        let packed_pixel_blocks = compressed_chunks.into_iter()
+            .map(|chunk| UncompressedBlock::decompress_chunk(chunk, chunk_reader.meta_data(), true))
             .flatten();
 
         // the exr blocks may contain arbitrary channels, but we are only interested in rgba.
@@ -57,7 +61,7 @@ fn main() {
 
             let position = block.index.pixel_position;
             let size = block.index.pixel_size;
-            let rgba_buffer = vec![[0.0; 4], size.area()]; // rgba = 4 floats
+            let mut rgba_buffer = vec![[0.0; 4]; size.area()]; // rgba = 4 floats
 
             // decode individual pixels into our f32 buffer
             // automatically converts f16 samples to f32 if required
diff --git a/src/block/chunk.rs b/src/block/chunk.rs
index ff138f87..e4236ce3 100644
--- a/src/block/chunk.rs
+++ b/src/block/chunk.rs
@@ -365,11 +365,11 @@ impl Chunk {
             compressed_block: match header.blocks {
                 // flat data
                 BlockDescription::ScanLines if !header.deep => CompressedBlock::ScanLine(CompressedScanLineBlock::read(read, max_block_byte_size)?),
-                BlockDescription::Tiles(_) if !header.deep     => CompressedBlock::Tile(CompressedTileBlock::read(read, max_block_byte_size)?),
+                BlockDescription::Tiles(_) if !header.deep => CompressedBlock::Tile(CompressedTileBlock::read(read, max_block_byte_size)?),
 
                 // deep data
-                BlockDescription::ScanLines   => CompressedBlock::DeepScanLine(CompressedDeepScanLineBlock::read(read, max_block_byte_size)?),
-                BlockDescription::Tiles(_)    => CompressedBlock::DeepTile(CompressedDeepTileBlock::read(read, max_block_byte_size)?),
+                BlockDescription::ScanLines => CompressedBlock::DeepScanLine(CompressedDeepScanLineBlock::read(read, max_block_byte_size)?),
+                BlockDescription::Tiles(_) => CompressedBlock::DeepTile(CompressedDeepTileBlock::read(read, max_block_byte_size)?),
             },
         };
 
diff --git a/src/block/reader.rs b/src/block/reader.rs
index 71c0646e..a68c330b 100644
--- a/src/block/reader.rs
+++ b/src/block/reader.rs
@@ -1,7 +1,7 @@
 //! Composable structures to handle reading an image.
 
 
-use std::convert::TryFrom;
+use std::convert::{TryFrom};
 use std::fmt::Debug;
 use std::io::{Read, Seek};
 use rayon_core::{ThreadPool, ThreadPoolBuildError};
@@ -11,12 +11,12 @@ use smallvec::alloc::sync::Arc;
 use crate::block::{BlockIndex, UncompressedBlock};
 use crate::block::chunk::{Chunk, TileCoordinates};
 use crate::compression::Compression;
-use crate::error::{Error, Result, u64_to_usize, UnitResult};
+use crate::error::{Error, Result, UnitResult};
 use crate::io::{PeekRead, Tracking};
 use crate::math::Vec2;
 use crate::meta::{MetaData, OffsetTables, TileIndices};
 use crate::meta::header::Header;
-use crate::prelude::{IntegerBounds, ReadSpecificChannel};
+use crate::prelude::{IntegerBounds};
 
 /// Decode the meta data from a byte source, keeping the source ready for further reading.
 /// Continue decoding the remaining bytes by calling `filtered_chunks` or `all_chunks`.
@@ -76,7 +76,7 @@ impl<R: Read + Seek> Reader<R> {
             meta_data: self.meta_data,
             remaining_chunks: 0 .. total_chunk_count,
             remaining_bytes: self.remaining_reader,
-            pedantic
+            pedantic: self.pedantic
         })
     }
 
@@ -148,7 +148,7 @@ impl<R: Read + Seek> Reader<R> {
         }
 
         Ok(OnDemandChunksReader {
-            offset_tables, pedantic: self.pedantic,
+            offset_tables, 
             seekable_bytes: self.remaining_reader,
             meta_data: self.meta_data,
         })
@@ -158,7 +158,7 @@ impl<R: Read + Seek> Reader<R> {
 
 fn validate_offset_tables(headers: &[Header], offset_tables: &OffsetTables, chunks_start_byte: u64) -> UnitResult {
     let max_pixel_bytes: u64 = headers.iter() // when compressed, chunks are smaller, but never larger than max
-        .map(|header| header.max_pixel_file_bytes().into())
+        .map(|header| u64::try_from(header.max_pixel_file_bytes()).expect("failed to cast usize to u64"))
         .sum();
 
     // check that each offset is within the bounds
@@ -207,7 +207,6 @@ pub struct OnDemandChunksReader<R> {
     meta_data: MetaData,
     offset_tables: OffsetTables,
     seekable_bytes: PeekRead<Tracking<R>>,
-    pedantic: bool,
 }
 
 /// While decoding chunks,
@@ -284,7 +283,8 @@ pub trait ChunksReader: Sized + Iterator<Item=Result<Chunk>> + ExactSizeIterator
     /// By default, this uses as many threads as there are CPUs.
     /// Returns the `self` if there is no need for parallel decompression.
     fn parallel_decompressor(self) -> std::result::Result<ParallelBlockDecompressor<Self>, Self> {
-        ParallelBlockDecompressor::new(self, self.pedantic())
+        let pedantic = self.pedantic();
+        ParallelBlockDecompressor::new(self, pedantic)
     }
 
     /// Return an iterator that decompresses the chunks in this thread.
@@ -304,7 +304,8 @@ pub trait ChunksReader: Sized + Iterator<Item=Result<Chunk>> + ExactSizeIterator
 
     /// Prepare reading the chunks sequentially, only a single thread, but with less memory overhead.
     fn sequential_decompressor(self) -> SequentialBlockDecompressor<Self> {
-        SequentialBlockDecompressor { remaining_chunks_reader: self, pedantic: self.pedantic() }
+        let pedantic = self.pedantic();
+        SequentialBlockDecompressor { remaining_chunks_reader: self, pedantic }
     }
 }
 
@@ -586,12 +587,14 @@ impl<R: Read + Seek> OnDemandChunksReader<R> {
         &mut self, header_index: usize, level: impl Into<Vec2<usize>>, display_window_section: IntegerBounds
     ) -> impl '_ + Iterator<Item = Result<Chunk>>
     {
-        self.load_chunks_for_blocks(|tile_index, block_index|{
+        let level = level.into();
+
+        self.load_chunks_for_blocks(move |meta, tile_index, block_index|{
             if block_index.layer != header_index || block_index.level != level {
                 return false
             }
 
-            let header = &self.meta_data.headers[block_index.layer];
+            let header = &meta.headers[block_index.layer];
             let block_in_display_window = header
                 .get_block_display_window_pixel_coordinates(tile_index.location)
                 .expect("invalid tile index");
@@ -605,49 +608,59 @@ impl<R: Read + Seek> OnDemandChunksReader<R> {
         &mut self, header_index: usize, level: impl Into<Vec2<usize>>, data_window_section: IntegerBounds
     ) -> impl '_ + Iterator<Item = Result<Chunk>>
     {
-        self.load_chunks_for_blocks(|tile_index, block_index|{
+        let level = level.into();
+
+        self.load_chunks_for_blocks(move |_meta, _tile_index, block_index|{
             if block_index.layer != header_index || block_index.level != level {
                 return false
             }
 
-            let block_section = IntegerBounds::new(block_index.pixel_position, block_index.pixel_size);
+            let block_section = IntegerBounds::new(block_index.pixel_position.to_i32(), block_index.pixel_size);
             let should_load_block = data_window_section.intersects(block_section);
             should_load_block
         })
     }
 
     /// Returned order is arbitrary (optimized for speed).
-    pub fn load_chunks_for_blocks(&mut self, filter_blocks: impl Fn(TileIndices, BlockIndex) -> bool) -> impl '_ + Iterator<Item = Result<Chunk>> {
-        let chunks_indices = self.find_seek_positions_for_blocks(filter_blocks).collect();
+    pub fn load_chunks_for_blocks(&mut self, filter_blocks: impl Fn(&MetaData, TileIndices, BlockIndex) -> bool) -> impl '_ + Iterator<Item = Result<Chunk>> {
+        let chunks_indices = self.find_seek_positions_for_blocks(filter_blocks);
         self.load_chunks(chunks_indices)
     }
 
     /// Computes which chunks to seek to in the file, based on the specified predicate.
     /// Iterator returns block indices in increasing-y order.
-    pub fn find_seek_positions_for_blocks(&self, filter_blocks: impl Fn(TileIndices, BlockIndex) -> bool) -> impl '_ + Iterator<Item=u64> {
+    pub fn find_seek_positions_for_blocks(&self, filter_blocks: impl Fn(&MetaData, TileIndices, BlockIndex) -> bool) -> Vec<u64> {
         debug_assert_eq!(self.meta_data.headers.len(), self.offset_tables.len());
+        let filter_blocks = &filter_blocks;
 
         self.meta_data.headers.iter().zip(&self.offset_tables).enumerate()
             .flat_map(move |(header_index, (header, offset_table))| {
                 debug_assert_eq!(header.chunk_count, offset_table.len());
 
                 header.blocks_increasing_y_order().zip(offset_table) // todo: this iter allocates, save it in the reader later
-                    .filter(move |(tile_coordinates, &seek_position)|{
+                    .filter(move |(tile_coordinates, _seek_pos)|{
 
                         // TODO this algorithm should not now whether we need to make coordinates absolute?
                         // deduplicate with block::UncompressedBlock::decompress_chunk()?
-                        let absolute_indices = header.get_absolute_block_pixel_coordinates(tile_coordinates.location)?;
-                        let absolute_position = absolute_indices.position.to_usize("coordinate calculation bug").unwrap();
-
-                        filter_blocks(*tile_coordinates, BlockIndex {
-                            layer: header_index,
-                            pixel_position: absolute_position,
-                            pixel_size: tile_coordinates.size,
-                            level: tile_coordinates.location.level_index,
-                        })
+                        let absolute_indices = header.get_absolute_block_pixel_coordinates(tile_coordinates.location)
+                            .expect("tile index bug");
+
+                        let absolute_position = absolute_indices.position
+                            .to_usize("coordinate calculation bug").unwrap();
+
+                        filter_blocks(
+                            self.meta_data(), *tile_coordinates,
+                            BlockIndex {
+                                layer: header_index,
+                                pixel_position: absolute_position,
+                                pixel_size: tile_coordinates.size,
+                                level: tile_coordinates.location.level_index,
+                            }
+                        )
                     })
-                    .map(|(_, chunk_byte_position)| chunk_byte_position)
+                    .map(move |(_, &chunk_byte_position)| chunk_byte_position)
             })
+            .collect()
     }
 
     /*pub fn find_seek_position_for_block(&self, layer_index: usize, filter_blocks: impl Fn(TileIndices) -> bool) -> impl Iterator<> {
diff --git a/src/block/writer.rs b/src/block/writer.rs
index 1227c695..fde152bf 100644
--- a/src/block/writer.rs
+++ b/src/block/writer.rs
@@ -41,7 +41,7 @@ pub fn write_chunks_with<W: Write + Seek>(
 pub struct ChunkWriter<W> {
     header_count: usize,
     byte_writer: Tracking<W>,
-    chunk_indices_byte_location: std::ops::Range<usize>,
+    chunk_indices_byte_location: std::ops::Range<u64>,
     chunk_indices_increasing_y: OffsetTables,
     chunk_count: usize, // TODO compose?
 }
@@ -142,7 +142,7 @@ impl<W> ChunksWriter for ChunkWriter<W> where W: Write + Seek {
             return Err(Error::invalid(format!("chunk at index {} is already written", index_in_header_increasing_y)));
         }
 
-        *chunk_index_slot = usize_to_u64(self.byte_writer.byte_position());
+        *chunk_index_slot = self.byte_writer.byte_position();
         chunk.write(&mut self.byte_writer, self.header_count)?;
         Ok(())
     }
@@ -166,10 +166,10 @@ impl<W> ChunkWriter<W> where W: Write + Seek {
             }
         }*/
 
-        let offset_table_size: usize = headers.iter().map(|header| header.chunk_count).sum();
+        let offset_table_size = headers.iter().map(|header| header.chunk_count).sum();
 
         let offset_table_start_byte = write.byte_position();
-        let offset_table_end_byte = write.byte_position() + offset_table_size * u64::BYTE_SIZE;
+        let offset_table_end_byte = offset_table_start_byte + usize_to_u64(offset_table_size * u64::BYTE_SIZE);
 
         // skip offset tables, filling with 0, will be updated after the last chunk has been written
         write.seek_write_to(offset_table_end_byte)?;
diff --git a/src/image/read/image.rs b/src/image/read/image.rs
index fce2f527..a60815dd 100644
--- a/src/image/read/image.rs
+++ b/src/image/read/image.rs
@@ -107,25 +107,25 @@ impl<F, L> ReadImage<F, L> where F: FnMut(f64)
     pub fn from_chunks<Layers>(mut self, chunks_reader: crate::block::reader::Reader<impl Read + Seek>) -> Result<Image<Layers>>
         where for<'s> L: ReadLayers<'s, Layers = Layers>
     {
-        let Self { pedantic, parallel, ref mut on_progress, ref mut read_layers } = self;
+        let Self { parallel, ref mut on_progress, ref mut read_layers, .. } = self;
 
         let layers_reader = read_layers.create_layers_reader(chunks_reader.headers())?;
         let mut image_collector = ImageWithAttributesReader::new(chunks_reader.headers(), layers_reader)?;
 
         let block_reader = chunks_reader
-            .filter_chunks(pedantic, |meta, tile, block| {
+            .filter_chunks(|meta, tile, block| {
                 image_collector.filter_block(meta, tile, block)
             })?
             .on_progress(on_progress);
 
         // TODO propagate send requirement further upwards
         if parallel {
-            block_reader.decompress_parallel(pedantic, |meta_data, block|{
+            block_reader.decompress_parallel(|meta_data, block|{
                 image_collector.read_block(&meta_data.headers, block)
             })?;
         }
         else {
-            block_reader.decompress_sequential(pedantic, |meta_data, block|{
+            block_reader.decompress_sequential(|meta_data, block|{
                 image_collector.read_block(&meta_data.headers, block)
             })?;
         }
diff --git a/src/image/read/specific_channels.rs b/src/image/read/specific_channels.rs
index 28c94e07..7a143b63 100644
--- a/src/image/read/specific_channels.rs
+++ b/src/image/read/specific_channels.rs
@@ -91,11 +91,11 @@ pub trait RecursivePixelReader {
 
     // TODO dedup with SpecificChannelsReader::read_block(..)?
     /// Note: The (x,y) coordinates are in block space. You will have to add `block.index.pixel_position` for the pixel position in the layer.
-    fn read_block_pixels(
+    fn read_block_pixels<Pixel>(
         &self, header: &Header, block: UncompressedBlock,
-        mut set_pixel: impl FnMut(Vec2<usize>, <Self::RecursivePixel as IntoNonRecursive>::NonRecursive)
+        mut set_pixel: impl FnMut(Vec2<usize>, Pixel)
     )
-        where Self::RecursivePixel: IntoNonRecursive
+        where Self::RecursivePixel: IntoTuple<Pixel>
     {
         let mut one_line_of_recursive_pixels = vec![Self::RecursivePixel::default(); block.index.pixel_size.width()];
 
diff --git a/src/io.rs b/src/io.rs
index cf9e2faa..2712308a 100644
--- a/src/io.rs
+++ b/src/io.rs
@@ -8,7 +8,7 @@ pub use ::std::io::{Read, Write};
 use half::slice::{HalfFloatSliceExt};
 use lebe::prelude::*;
 use ::half::f16;
-use crate::error::{Error, Result, UnitResult, IoResult};
+use crate::error::{Error, Result, UnitResult, IoResult, usize_to_u64};
 use std::io::{Seek, SeekFrom};
 use std::path::Path;
 use std::fs::File;
@@ -185,7 +185,7 @@ pub struct Tracking<T> {
 impl<T: Read> Read for Tracking<T> {
     fn read(&mut self, buffer: &mut [u8]) -> std::io::Result<usize> {
         let count = self.inner.read(buffer)?;
-        self.position += count;
+        self.position += usize_to_u64(count);
         Ok(count)
     }
 }
@@ -193,7 +193,7 @@ impl<T: Read> Read for Tracking<T> {
 impl<T: Write> Write for Tracking<T> {
     fn write(&mut self, buffer: &[u8]) -> std::io::Result<usize> {
         let count = self.inner.write(buffer)?;
-        self.position += count;
+        self.position += usize_to_u64(count);
         Ok(count)
     }
 
@@ -222,14 +222,14 @@ impl<T: Read + Seek> Tracking<T> {
     /// If it is only a couple of bytes, no seek system call is performed.
     pub fn seek_read_to(&mut self, target_position: u64) -> std::io::Result<()> {
         let delta = target_position as i128 - self.position as i128; // FIXME  panicked at 'attempt to subtract with overflow'
-        debug_assert!(delta.abs() < usize::MAX as i128);
+        assert!(delta.abs() < u64::MAX as i128 && delta.abs() < usize::MAX as i128);
 
         if delta > 0 && delta < 16 { // TODO profile that this is indeed faster than a syscall! (should be because of bufread buffer discard)
             skip_bytes(self, delta as usize)?;
-            self.position += delta as usize;
+            self.position += delta as u64;
         }
         else if delta != 0 {
-            self.inner.seek(SeekFrom::Start(u64::try_from(target_position).unwrap()))?;
+            self.inner.seek(SeekFrom::Start(target_position))?;
             self.position = target_position;
         }
 

From b6e9d37fc6d70cad14d6ef10013cafa8053049ef Mon Sep 17 00:00:00 2001
From: Johannes Vollmer <32042925+johannesvollmer@users.noreply.github.com>
Date: Sat, 7 Oct 2023 14:10:02 +0200
Subject: [PATCH 03/14] fix some warnings

---
 examples/9_read_blocks_on_demand.rs | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/examples/9_read_blocks_on_demand.rs b/examples/9_read_blocks_on_demand.rs
index c47fd3ed..5455f597 100644
--- a/examples/9_read_blocks_on_demand.rs
+++ b/examples/9_read_blocks_on_demand.rs
@@ -19,8 +19,8 @@ fn main() {
     let header_index = 0; // only load pixels from the first header
     let mip_level = (0, 0); // only load largest mip map
 
-    /// for this example, we use a hashmap instead of a real sparse texture.
-    /// it stores blocks of rgba pixels, indexed by the position of the block (usize, usize)
+    // for this example, we use a hashmap instead of a real sparse texture.
+    // it stores blocks of rgba pixels, indexed by the position of the block (usize, usize)
     let mut my_sparse_texture: HashMap<(usize, usize), Vec<[f32; 4]>> = Default::default();
 
     let file = BufReader::new(
@@ -32,8 +32,8 @@ fn main() {
     let mut chunk_reader = exr::block::read(file, true).unwrap()
         .on_demand_chunks().unwrap();
 
-    // this object can decode packed exr blocks to simple rgb
-    let mut rgb_from_block_extractor = read_specific_channels()
+    // this object can decode packed exr blocks to simple rgb (can be shared or cloned across threads)
+    let rgb_from_block_extractor = read_specific_channels()
             .required("R").required("G").required("B")
             .optional("A", 1.0)
             .create_recursive_reader(&chunk_reader.header(header_index).channels).unwrap();
@@ -45,8 +45,8 @@ fn main() {
         let compressed_chunks = chunk_reader
             .load_all_chunks_for_display_space_section(header_index, mip_level, pixel_section)
 
-            // we use .flatten(), this simply discards all errors and only continues with the successfully loaded chunks
-            // we collect here due to borrowing meta data
+            // in this example, we use .flatten(), this simply discards all errors and only continues with the successfully loaded chunks
+            // in this example, we collect here due to borrowing meta data
             .flatten().collect::<Vec<Chunk>>();
 
         // this could be done in parallel, e.g. by using rayon par_iter

From 0a910b36122b533491cc1bbee74ebb8f23a48e80 Mon Sep 17 00:00:00 2001
From: Johannes Vollmer <32042925+johannesvollmer@users.noreply.github.com>
Date: Sat, 7 Oct 2023 14:13:45 +0200
Subject: [PATCH 04/14] add more comments

---
 examples/9_read_blocks_on_demand.rs | 10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)

diff --git a/examples/9_read_blocks_on_demand.rs b/examples/9_read_blocks_on_demand.rs
index 5455f597..2769008b 100644
--- a/examples/9_read_blocks_on_demand.rs
+++ b/examples/9_read_blocks_on_demand.rs
@@ -14,7 +14,15 @@ use exr::block::UncompressedBlock;
 use exr::image::read::specific_channels::{read_specific_channels, RecursivePixelReader};
 use exr::prelude::{IntegerBounds, ReadSpecificChannel};
 
-/// Load only some specific pixel sections from the file, just when they are needed.
+/// load only some specific pixel sections from the file, just when they are needed.
+/// load blocks of pixels into a sparse texture (illustrated with a hashmap in this example).
+/// the process is as follows:
+///
+/// 1. prepare some state (open the file, read meta data, define the channels we want to read)
+/// 2. when needed, load more pixel blocks from the file
+///    a. load compressed chunks for a specific pixel section
+///    b. decompress chunks and extract rgba pixels from the packed channel data in the block
+///    c. write the loaded rgba pixel blocks into the sparse texture
 fn main() {
     let header_index = 0; // only load pixels from the first header
     let mip_level = (0, 0); // only load largest mip map

From 5d0e9f7a37937610294b8ab6abe824dd5cbe7127 Mon Sep 17 00:00:00 2001
From: Johannes Vollmer <32042925+johannesvollmer@users.noreply.github.com>
Date: Sat, 7 Oct 2023 14:23:00 +0200
Subject: [PATCH 05/14] add more comments

---
 examples/9_read_blocks_on_demand.rs |  8 +++++---
 src/block/reader.rs                 | 13 +++++--------
 2 files changed, 10 insertions(+), 11 deletions(-)

diff --git a/examples/9_read_blocks_on_demand.rs b/examples/9_read_blocks_on_demand.rs
index 2769008b..36dafb02 100644
--- a/examples/9_read_blocks_on_demand.rs
+++ b/examples/9_read_blocks_on_demand.rs
@@ -24,8 +24,6 @@ use exr::prelude::{IntegerBounds, ReadSpecificChannel};
 ///    b. decompress chunks and extract rgba pixels from the packed channel data in the block
 ///    c. write the loaded rgba pixel blocks into the sparse texture
 fn main() {
-    let header_index = 0; // only load pixels from the first header
-    let mip_level = (0, 0); // only load largest mip map
 
     // for this example, we use a hashmap instead of a real sparse texture.
     // it stores blocks of rgba pixels, indexed by the position of the block (usize, usize)
@@ -40,11 +38,15 @@ fn main() {
     let mut chunk_reader = exr::block::read(file, true).unwrap()
         .on_demand_chunks().unwrap();
 
+    let header_index = 0; // only load pixels from the first header (assumes first layer has rgb channels)
+    let mip_level = (0, 0); // only load largest mip map
+    println!("loading header #0 from {:?}", chunk_reader.meta_data());
+
     // this object can decode packed exr blocks to simple rgb (can be shared or cloned across threads)
     let rgb_from_block_extractor = read_specific_channels()
             .required("R").required("G").required("B")
             .optional("A", 1.0)
-            .create_recursive_reader(&chunk_reader.header(header_index).channels).unwrap();
+            .create_recursive_reader(&chunk_reader.header(header_index).channels).unwrap(); // this will fail if the image does not contain rgb channels
 
     // later in your app, maybe when the view changed:
     when_new_pixel_section_must_be_loaded(move |pixel_section| {
diff --git a/src/block/reader.rs b/src/block/reader.rs
index a68c330b..5eb51025 100644
--- a/src/block/reader.rs
+++ b/src/block/reader.rs
@@ -148,7 +148,7 @@ impl<R: Read + Seek> Reader<R> {
         }
 
         Ok(OnDemandChunksReader {
-            offset_tables, 
+            offset_tables,
             seekable_bytes: self.remaining_reader,
             meta_data: self.meta_data,
         })
@@ -572,17 +572,13 @@ impl<R: ChunksReader> Iterator for ParallelBlockDecompressor<R> {
 
 impl<R: Read + Seek> OnDemandChunksReader<R> {
 
+    /// The meta data loaded from this file.
     pub fn meta_data(&self) -> &MetaData { &self.meta_data }
 
+    /// The meta data headers loaded from this file.
     pub fn header(&self, header_index: usize) -> &Header { &self.meta_data().headers[header_index] }
 
-    /*pub fn specific_channels_block_decoder<Read>(&self, header_index: usize, reader: Read)
-        -> Read::RecursivePixelReader
-        where Read: ReadSpecificChannel
-    {
-        reader.create_recursive_reader(&self.header(header_index).channels)
-    }*/
-
+    /// Load all chunks that intersect the specified display-space section (DisplayWindow).
     pub fn load_all_chunks_for_display_space_section(
         &mut self, header_index: usize, level: impl Into<Vec2<usize>>, display_window_section: IntegerBounds
     ) -> impl '_ + Iterator<Item = Result<Chunk>>
@@ -604,6 +600,7 @@ impl<R: Read + Seek> OnDemandChunksReader<R> {
         })
     }
 
+    /// Load all chunks that intersect the specified layer-space section (DataWindow).
     pub fn load_all_chunks_for_layer_space_section(
         &mut self, header_index: usize, level: impl Into<Vec2<usize>>, data_window_section: IntegerBounds
     ) -> impl '_ + Iterator<Item = Result<Chunk>>

From 6d40b22b1fd550aed1f8b6c81df71464228a9407 Mon Sep 17 00:00:00 2001
From: Johannes Vollmer <32042925+johannesvollmer@users.noreply.github.com>
Date: Sat, 7 Oct 2023 14:28:29 +0200
Subject: [PATCH 06/14] add some console printing

---
 examples/9_read_blocks_on_demand.rs | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/examples/9_read_blocks_on_demand.rs b/examples/9_read_blocks_on_demand.rs
index 36dafb02..89fe529c 100644
--- a/examples/9_read_blocks_on_demand.rs
+++ b/examples/9_read_blocks_on_demand.rs
@@ -40,7 +40,7 @@ fn main() {
 
     let header_index = 0; // only load pixels from the first header (assumes first layer has rgb channels)
     let mip_level = (0, 0); // only load largest mip map
-    println!("loading header #0 from {:?}", chunk_reader.meta_data());
+    println!("loading header #0 from {:#?}", chunk_reader.meta_data());
 
     // this object can decode packed exr blocks to simple rgb (can be shared or cloned across threads)
     let rgb_from_block_extractor = read_specific_channels()
@@ -86,6 +86,8 @@ fn main() {
         for (position, block) in rgba_blocks {
             my_sparse_texture.insert(position, block);
         }
+
+        println!("\n\nsparse texture now contains {} blocks", my_sparse_texture.len());
     })
 }
 
@@ -93,8 +95,8 @@ fn main() {
 /// (loads a single view once, as this is a stub implementation)
 fn when_new_pixel_section_must_be_loaded(mut load_for_view: impl FnMut(IntegerBounds)){
     let image_sub_section = IntegerBounds::new(
-        (800, 800), // position
-        (600, 600) // size
+        (831, 739), // position
+        (932, 561) // size
     );
 
     load_for_view(image_sub_section);

From 598e16b73a379cda0d67519fefc8f9d308a59b28 Mon Sep 17 00:00:00 2001
From: Johannes Vollmer <32042925+johannesvollmer@users.noreply.github.com>
Date: Sun, 8 Oct 2023 22:08:53 +0200
Subject: [PATCH 07/14] fix intersect bugs, add tests

---
 src/meta/attribute.rs | 138 +++++++++++++++++++++++++++++++++++++++---
 1 file changed, 130 insertions(+), 8 deletions(-)

diff --git a/src/meta/attribute.rs b/src/meta/attribute.rs
index e8e67ca2..68a92c4f 100644
--- a/src/meta/attribute.rs
+++ b/src/meta/attribute.rs
@@ -906,12 +906,14 @@ impl IntegerBounds {
     }
 
     /// Returns whether the specified rectangle touches this rectangle.
+    /// Empty rectangles are treated as points: They might touch the rectangle.
+    /// Only if both rectangles are empty, they don't intersect.
     pub fn intersects(self, other: Self) -> bool {
-        // https://stackoverflow.com/questions/2752349/fast-rectangle-to-rectangle-intersection
-        other.position.x() <= self.end().x()
-            && self.position.x() <= other.end().x()
-            && other.position.y() <= self.end().y()
-            && self.position.y() <= other.end().y()
+        // https://stackoverflow.com/a/2752369
+        other.position.x() <= self.max().x()
+            && self.position.x() <= other.max().x()
+            && other.position.y() <= self.max().y()
+            && self.position.y() <= other.max().y()
     }
 }
 
@@ -2233,12 +2235,132 @@ mod test {
     }
 
     #[test]
-    fn rectangle_intersect(){
-        unimplemented!()
+    fn rectangle_intersect_at_edge() {
+        assert_intersects(
+            false,
+            (0, 0), (10, 10),
+            (10, 0), (1, 1)
+        );
+
+        assert_intersects(
+            false,
+            (0, 0), (10, 10),
+            (0, 10), (1, 1)
+        );
+
+        assert_intersects(
+            true,
+            (0, 1), (10, 10),
+            (0, 10), (1, 1)
+        );
+    }
+
+    #[test]
+    fn rectangle_intersect_contained() {
+        assert_intersects(
+            true,
+            (0, 0), (10, 10),
+            (5, 5), (1, 1)
+        );
+
+        assert_intersects(
+            true,
+            (5, 5), (1, 1),
+            (0, 0), (10, 10),
+        );
+
+
+        assert_intersects(
+            true,
+            (5, 5), (1, 1),
+            (5, 5), (1, 1),
+        );
+
+        for (x,y) in [
+            (5,6), (6,5), (5,4), (4,5),
+            (4,4), (6,6), (4,6), (6,4),
+        ] {
+            assert_intersects(
+                false,
+                (5, 5), (1, 1),
+                (x, y), (1, 1),
+            );
+        }
+    }
+
+    #[test]
+    fn rectangle_intersect_zero_sized() {
+        assert_intersects( // point in a rectangle intersects
+            true,
+            (0, 0), (10, 10),
+            (5, 5), (0, 0)
+        );
+
+        assert_intersects( // two different points don't intersect
+            false,
+            (0, 0), (0, 0),
+            (-1, -1), (0, 0)
+        );
+
+        assert_intersects( // two equal points don't intersect
+            false,
+            (1, 1), (0, 0),
+            (1, 1), (0, 0)
+        );
+    }
+
+    fn assert_intersects(intersects: bool, a_pos: (i32,i32), a_size: (usize, usize), b_pos: (i32,i32), b_size: (usize,usize)) {
+        let a = IntegerBounds::new(a_pos, a_size);
+        let b = IntegerBounds::new(b_pos, b_size);
+        assert_eq!(
+            intersects, a.intersects(b),
+            "rectangles should {}: {:?}, {:?}",
+            if intersects { "intersect" } else { "not intersect" },
+            a, b,
+        );
     }
 
     #[test]
     fn rectangle_contains(){
-        unimplemented!()
+        assert_contains(
+            true,
+            (10,10), (100,100),
+            (50,50), (10,10),
+        );
+
+        assert_contains(
+            true,
+            (1,1), (2,2),
+            (1,1), (1,1),
+        );
+
+        assert_contains(
+            true,
+            (1,1), (2,2),
+            (1,1), (0,0),
+        );
+
+        assert_contains(
+            false,
+            (1,1), (2,2),
+            (1,1), (2,3),
+        );
+
+        assert_contains(
+            false,
+            (1,1), (2,2),
+            (0,0), (8,8),
+        );
+    }
+
+
+    fn assert_contains(contains: bool, a_pos: (i32, i32), a_size: (usize, usize), b_pos: (i32, i32), b_size: (usize, usize)) {
+        let a = IntegerBounds::new(a_pos, a_size);
+        let b = IntegerBounds::new(b_pos, b_size);
+        assert_eq!(
+            contains, a.contains(b),
+            "{:?} should {} {:?}",
+            a, if contains { "contain" } else { "not contain" }, b,
+        );
     }
 }

From 48fe94f3eab440002eecd495d9e7924d2a01c409 Mon Sep 17 00:00:00 2001
From: Johannes Vollmer <32042925+johannesvollmer@users.noreply.github.com>
Date: Sun, 8 Oct 2023 22:21:19 +0200
Subject: [PATCH 08/14] refactor: use convenience functions for casting
 primitives

---
 src/block/reader.rs            | 7 +++----
 src/compression/piz/huffman.rs | 6 +++---
 src/error.rs                   | 6 ++++++
 src/io.rs                      | 4 ++--
 src/meta/mod.rs                | 2 +-
 5 files changed, 15 insertions(+), 10 deletions(-)

diff --git a/src/block/reader.rs b/src/block/reader.rs
index 5eb51025..27e07a10 100644
--- a/src/block/reader.rs
+++ b/src/block/reader.rs
@@ -11,7 +11,7 @@ use smallvec::alloc::sync::Arc;
 use crate::block::{BlockIndex, UncompressedBlock};
 use crate::block::chunk::{Chunk, TileCoordinates};
 use crate::compression::Compression;
-use crate::error::{Error, Result, UnitResult};
+use crate::error::{Error, Result, UnitResult, usize_to_u64};
 use crate::io::{PeekRead, Tracking};
 use crate::math::Vec2;
 use crate::meta::{MetaData, OffsetTables, TileIndices};
@@ -67,8 +67,7 @@ impl<R: Read + Seek> Reader<R> {
                 offset_tables.iter().map(|table| table.len()).sum()
             }
             else {
-                usize::try_from(MetaData::skip_offset_tables(&mut self.remaining_reader, &self.meta_data.headers)?)
-                    .expect("too large chunk count for this machine")
+                MetaData::skip_offset_tables(&mut self.remaining_reader, &self.meta_data.headers)?
             }
         };
 
@@ -158,7 +157,7 @@ impl<R: Read + Seek> Reader<R> {
 
 fn validate_offset_tables(headers: &[Header], offset_tables: &OffsetTables, chunks_start_byte: u64) -> UnitResult {
     let max_pixel_bytes: u64 = headers.iter() // when compressed, chunks are smaller, but never larger than max
-        .map(|header| u64::try_from(header.max_pixel_file_bytes()).expect("failed to cast usize to u64"))
+        .map(|header| usize_to_u64(header.max_pixel_file_bytes()))
         .sum();
 
     // check that each offset is within the bounds
diff --git a/src/compression/piz/huffman.rs b/src/compression/piz/huffman.rs
index a01cbf2d..d9df215d 100644
--- a/src/compression/piz/huffman.rs
+++ b/src/compression/piz/huffman.rs
@@ -24,7 +24,7 @@ pub fn decompress(compressed: &[u8], expected_size: usize) -> Result<Vec<u16>> {
     let bit_count = usize::try_from(u32::read(&mut remaining_compressed)?)?;
     let _skipped = u32::read(&mut remaining_compressed)?; // what is this
 
-    let max_code_index = usize::try_from(max_code_index_32).unwrap();
+    let max_code_index = u32_to_usize(max_code_index_32);
     if min_code_index >= ENCODING_TABLE_SIZE || max_code_index >= ENCODING_TABLE_SIZE {
         return Err(Error::invalid(INVALID_TABLE_SIZE));
     }
@@ -302,7 +302,7 @@ fn read_encoding_table(
 
         if code_len == LONG_ZEROCODE_RUN {
             let zerun_bits = read_bits(8, &mut code_bits, &mut code_bit_count, packed)?;
-            let zerun = usize::try_from(zerun_bits + SHORTEST_LONG_RUN).unwrap();
+            let zerun = u64_to_usize(zerun_bits + SHORTEST_LONG_RUN);
 
             if code_index + zerun > max_code_index + 1 {
                 return Err(Error::invalid(TABLE_TOO_LONG));
@@ -315,7 +315,7 @@ fn read_encoding_table(
             code_index += zerun;
         }
         else if code_len >= SHORT_ZEROCODE_RUN {
-            let duplication_count = usize::try_from(code_len - SHORT_ZEROCODE_RUN + 2).unwrap();
+            let duplication_count = u64_to_usize(code_len - SHORT_ZEROCODE_RUN + 2);
             if code_index + duplication_count > max_code_index + 1 {
                 return Err(Error::invalid(TABLE_TOO_LONG));
             }
diff --git a/src/error.rs b/src/error.rs
index 4ab141fa..7e0e41af 100644
--- a/src/error.rs
+++ b/src/error.rs
@@ -129,6 +129,12 @@ pub(crate) fn usize_to_i32(value: usize) -> i32 {
     i32::try_from(value).expect("(usize as i32) overflowed")
 }
 
+/// Panic on overflow.
+#[inline]
+pub(crate) fn usize_to_u32(value: usize) -> u32 {
+    u32::try_from(value).expect("(usize as u32) failed")
+}
+
 /// Panic on overflow.
 #[inline]
 pub(crate) fn usize_to_u64(value: usize) -> u64 {
diff --git a/src/io.rs b/src/io.rs
index 2712308a..8b39f1f8 100644
--- a/src/io.rs
+++ b/src/io.rs
@@ -243,11 +243,11 @@ impl<T: Write + Seek> Tracking<T> {
     /// If seeking forward, this will write zeroes.
     pub fn seek_write_to(&mut self, target_position: u64) -> std::io::Result<()> {
         if target_position < self.position {
-            self.inner.seek(SeekFrom::Start(u64::try_from(target_position).unwrap()))?;
+            self.inner.seek(SeekFrom::Start(target_position))?;
         }
         else if target_position > self.position {
             std::io::copy(
-                &mut std::io::repeat(0).take(u64::try_from(target_position - self.position).unwrap()),
+                &mut std::io::repeat(0).take(target_position - self.position),
                 self
             )?;
         }
diff --git a/src/meta/mod.rs b/src/meta/mod.rs
index 0c36af88..56ef5775 100644
--- a/src/meta/mod.rs
+++ b/src/meta/mod.rs
@@ -249,7 +249,7 @@ pub fn calculate_block_size(total_size: usize, block_size: usize, block_position
 /// Calculate number of mip levels in a given resolution.
 // TODO this should be cached? log2 may be very expensive
 pub fn compute_level_count(round: RoundingMode, full_res: usize) -> usize {
-    usize::try_from(round.log2(u32::try_from(full_res).unwrap())).unwrap() + 1
+    u32_to_usize(round.log2(usize_to_u32(full_res))) + 1
 }
 
 /// Calculate the size of a single mip level by index.

From 05670f56a98bd154a81b31f7ae047d05f7f9ccc3 Mon Sep 17 00:00:00 2001
From: Johannes Vollmer <32042925+johannesvollmer@users.noreply.github.com>
Date: Sun, 8 Oct 2023 22:45:41 +0200
Subject: [PATCH 09/14] make some minimal changes on the api

---
 examples/9_read_blocks_on_demand.rs | 20 +++++++++++++-------
 src/block/reader.rs                 | 12 ------------
 src/image/read/specific_channels.rs | 14 +++++++-------
 3 files changed, 20 insertions(+), 26 deletions(-)

diff --git a/examples/9_read_blocks_on_demand.rs b/examples/9_read_blocks_on_demand.rs
index 89fe529c..cb2f212f 100644
--- a/examples/9_read_blocks_on_demand.rs
+++ b/examples/9_read_blocks_on_demand.rs
@@ -38,22 +38,28 @@ fn main() {
     let mut chunk_reader = exr::block::read(file, true).unwrap()
         .on_demand_chunks().unwrap();
 
-    let header_index = 0; // only load pixels from the first header (assumes first layer has rgb channels)
+    let layer_index = 0; // only load pixels from the first "header" (assumes first layer has rgb channels)
     let mip_level = (0, 0); // only load largest mip map
-    println!("loading header #0 from {:#?}", chunk_reader.meta_data());
+
+    let exr_info = &chunk_reader.meta_data().clone();
+    let layer_info = &exr_info.headers[layer_index];
+    let channel_info = &layer_info.channels;
+    println!("loading header #0 from {:#?}", exr_info);
 
     // this object can decode packed exr blocks to simple rgb (can be shared or cloned across threads)
     let rgb_from_block_extractor = read_specific_channels()
             .required("R").required("G").required("B")
             .optional("A", 1.0)
-            .create_recursive_reader(&chunk_reader.header(header_index).channels).unwrap(); // this will fail if the image does not contain rgb channels
+            .create_recursive_reader(channel_info).unwrap(); // this will fail if the image does not contain rgb channels
+
 
+    // ...
     // later in your app, maybe when the view changed:
     when_new_pixel_section_must_be_loaded(move |pixel_section| {
 
         // todo: only load blocks that are not loaded yet. maybe an additional filter? or replace this with a more modular filtering architecture?
         let compressed_chunks = chunk_reader
-            .load_all_chunks_for_display_space_section(header_index, mip_level, pixel_section)
+            .load_all_chunks_for_display_space_section(layer_index, mip_level, pixel_section)
 
             // in this example, we use .flatten(), this simply discards all errors and only continues with the successfully loaded chunks
             // in this example, we collect here due to borrowing meta data
@@ -61,13 +67,13 @@ fn main() {
 
         // this could be done in parallel, e.g. by using rayon par_iter
         let packed_pixel_blocks = compressed_chunks.into_iter()
-            .map(|chunk| UncompressedBlock::decompress_chunk(chunk, chunk_reader.meta_data(), true))
+            .map(|chunk| UncompressedBlock::decompress_chunk(chunk, exr_info, true))
             .flatten();
 
         // the exr blocks may contain arbitrary channels, but we are only interested in rgba.
         // so we convert each exr block to an rgba block (vec of [f32; 4])
         let rgba_blocks = packed_pixel_blocks.map(|block| {
-            let header = &chunk_reader.meta_data().headers[block.index.layer];
+            assert_eq!(block.index.layer, layer_index);
 
             let position = block.index.pixel_position;
             let size = block.index.pixel_size;
@@ -76,7 +82,7 @@ fn main() {
             // decode individual pixels into our f32 buffer
             // automatically converts f16 samples to f32 if required
             // ignores all other channel data
-            rgb_from_block_extractor.read_block_pixels(header, block, |position, (r,g,b,a)|{
+            rgb_from_block_extractor.read_pixels_from_block(channel_info, block, |position, (r,g,b,a)|{
                 rgba_buffer[position.flat_index_for_size(size)] = [r,g,b,a];
             });
 
diff --git a/src/block/reader.rs b/src/block/reader.rs
index 27e07a10..37ce5c42 100644
--- a/src/block/reader.rs
+++ b/src/block/reader.rs
@@ -659,18 +659,6 @@ impl<R: Read + Seek> OnDemandChunksReader<R> {
             .collect()
     }
 
-    /*pub fn find_seek_position_for_block(&self, layer_index: usize, filter_blocks: impl Fn(TileIndices) -> bool) -> impl Iterator<> {
-        let header = &self.meta_data.headers[layer_index];
-
-        // TODO: directly compute the block index based on mip level and resolution??
-        let increasing_y_block_index_in_header = header.blocks_increasing_y_order()
-            .position(filter_blocks); // todo: this is a vec internally, save it in the reader and look it up at this point
-
-        let offset_table = &self.offset_tables[layer_index];
-        offset_table[increasing_y_block_index_in_header]
-    }*/
-
-
     /// Reads the specified chunks by seeking the file. In the order as they appear in the file, so it might be arbitrary.
     pub fn load_chunks(&mut self, mut chunks: Vec<u64>) -> impl '_ + Iterator<Item = Result<Chunk>> {
         // sorting the file access should improve read performance, especially on HDDs
diff --git a/src/image/read/specific_channels.rs b/src/image/read/specific_channels.rs
index 7a143b63..2fb7c107 100644
--- a/src/image/read/specific_channels.rs
+++ b/src/image/read/specific_channels.rs
@@ -91,15 +91,15 @@ pub trait RecursivePixelReader {
 
     // TODO dedup with SpecificChannelsReader::read_block(..)?
     /// Note: The (x,y) coordinates are in block space. You will have to add `block.index.pixel_position` for the pixel position in the layer.
-    fn read_block_pixels<Pixel>(
-        &self, header: &Header, block: UncompressedBlock,
-        mut set_pixel: impl FnMut(Vec2<usize>, Pixel)
+    fn read_pixels_from_block<PixelTuple>(
+        &self, channels: &ChannelList, block: UncompressedBlock,
+        mut set_pixel: impl FnMut(Vec2<usize>, PixelTuple)
     )
-        where Self::RecursivePixel: IntoTuple<Pixel>
+        where Self::RecursivePixel: IntoTuple<PixelTuple>
     {
         let mut one_line_of_recursive_pixels = vec![Self::RecursivePixel::default(); block.index.pixel_size.width()];
 
-        let byte_lines = block.data.chunks_exact(header.channels.bytes_per_pixel * block.index.pixel_size.width());
+        let byte_lines = block.data.chunks_exact(channels.bytes_per_pixel * block.index.pixel_size.width());
         debug_assert_eq!(byte_lines.len(), block.index.pixel_size.height(), "invalid block lines split");
 
         for (y_offset, line_bytes) in byte_lines.enumerate() { // TODO sampling
@@ -211,8 +211,8 @@ ChannelsReader for SpecificChannelsReader<PixelStorage, SetPixel, PxReader, Pixe
         let (storage, set_pixel, reader) = (&mut self.pixel_storage, &mut self.set_pixel, &self.pixel_reader);
         let block_position = block.index.pixel_position;
 
-        reader.read_block_pixels(
-            header, block,
+        reader.read_pixels_from_block(
+            &header.channels, block,
             |pos, px| set_pixel(storage, pos + block_position, px)
         );
 

From 6234024ca14b3d27bed40eeae90e31be55c6fabb Mon Sep 17 00:00:00 2001
From: Johannes Vollmer <32042925+johannesvollmer@users.noreply.github.com>
Date: Sun, 8 Oct 2023 22:47:11 +0200
Subject: [PATCH 10/14] remove unused import

---
 src/block/reader.rs | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/src/block/reader.rs b/src/block/reader.rs
index 37ce5c42..5321a86f 100644
--- a/src/block/reader.rs
+++ b/src/block/reader.rs
@@ -1,13 +1,10 @@
 //! Composable structures to handle reading an image.
 
 
-use std::convert::{TryFrom};
 use std::fmt::Debug;
 use std::io::{Read, Seek};
 use rayon_core::{ThreadPool, ThreadPoolBuildError};
-
 use smallvec::alloc::sync::Arc;
-
 use crate::block::{BlockIndex, UncompressedBlock};
 use crate::block::chunk::{Chunk, TileCoordinates};
 use crate::compression::Compression;

From 4c23c41623b7606de8ce83779ff8c6347b1c3bf1 Mon Sep 17 00:00:00 2001
From: Johannes Vollmer <32042925+johannesvollmer@users.noreply.github.com>
Date: Sun, 8 Oct 2023 23:32:06 +0200
Subject: [PATCH 11/14] add flexibility to flat_index_for_size

---
 src/math.rs | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/src/math.rs b/src/math.rs
index 9f21bf1a..d9fa8d74 100644
--- a/src/math.rs
+++ b/src/math.rs
@@ -60,9 +60,11 @@ impl<T> Vec2<T> {
     /// Convert this two-dimensional coordinate to an index suited for one-dimensional flattened image arrays.
     /// Works for images that store the pixels row by row, one after another, in a single array.
     /// In debug mode, panics for an index out of bounds.
-    #[inline] pub fn flat_index_for_size(self, resolution: Vec2<T>) -> T
+    #[inline] pub fn flat_index_for_size(self, resolution: impl Into<Vec2<T>>) -> T
         where T: Copy + Debug + Ord + Mul<Output=T> + Add<Output=T>
     {
+        let resolution = resolution.into();
+
         debug_assert!(
             self.x() < resolution.width() && self.y() < resolution.height(),
             "Vec2 index {:?} is invalid for resolution {:?}", self, resolution

From e171d5505784472f5180fc6a279c2ce05f789a66 Mon Sep 17 00:00:00 2001
From: Johannes Vollmer <32042925+johannesvollmer@users.noreply.github.com>
Date: Sun, 8 Oct 2023 23:32:23 +0200
Subject: [PATCH 12/14] debug output some blocks as pngs

---
 examples/9_read_blocks_on_demand.rs | 47 ++++++++++++++++++-----------
 1 file changed, 29 insertions(+), 18 deletions(-)

diff --git a/examples/9_read_blocks_on_demand.rs b/examples/9_read_blocks_on_demand.rs
index cb2f212f..7a3d2e76 100644
--- a/examples/9_read_blocks_on_demand.rs
+++ b/examples/9_read_blocks_on_demand.rs
@@ -1,9 +1,4 @@
 
-extern crate rand;
-extern crate half;
-
-
-// exr imports
 extern crate exr;
 
 use std::collections::HashMap;
@@ -12,7 +7,7 @@ use std::io::BufReader;
 use exr::block::chunk::Chunk;
 use exr::block::UncompressedBlock;
 use exr::image::read::specific_channels::{read_specific_channels, RecursivePixelReader};
-use exr::prelude::{IntegerBounds, ReadSpecificChannel};
+use exr::prelude::{IntegerBounds, ReadSpecificChannel, Vec2};
 
 /// load only some specific pixel sections from the file, just when they are needed.
 /// load blocks of pixels into a sparse texture (illustrated with a hashmap in this example).
@@ -25,9 +20,13 @@ use exr::prelude::{IntegerBounds, ReadSpecificChannel};
 ///    c. write the loaded rgba pixel blocks into the sparse texture
 fn main() {
 
+    // this is where we will store our loaded data.
     // for this example, we use a hashmap instead of a real sparse texture.
-    // it stores blocks of rgba pixels, indexed by the position of the block (usize, usize)
-    let mut my_sparse_texture: HashMap<(usize, usize), Vec<[f32; 4]>> = Default::default();
+    // it stores blocks of rgba pixels, indexed by the position of the block (i32, i32) and its size
+    let mut my_sparse_texture: HashMap<(Pos, Size), Vec<[f32; 4]>> = Default::default();
+    type Pos = (i32, i32);
+    type Size = (usize, usize);
+
 
     let file = BufReader::new(
         File::open("3GB.exr")
@@ -55,7 +54,7 @@ fn main() {
 
     // ...
     // later in your app, maybe when the view changed:
-    when_new_pixel_section_must_be_loaded(move |pixel_section| {
+    when_new_pixel_section_must_be_loaded(|pixel_section| {
 
         // todo: only load blocks that are not loaded yet. maybe an additional filter? or replace this with a more modular filtering architecture?
         let compressed_chunks = chunk_reader
@@ -75,8 +74,8 @@ fn main() {
         let rgba_blocks = packed_pixel_blocks.map(|block| {
             assert_eq!(block.index.layer, layer_index);
 
-            let position = block.index.pixel_position;
             let size = block.index.pixel_size;
+            let position = block.index.pixel_position.to_i32() + layer_info.own_attributes.layer_position;
             let mut rgba_buffer = vec![[0.0; 4]; size.area()]; // rgba = 4 floats
 
             // decode individual pixels into our f32 buffer
@@ -86,23 +85,35 @@ fn main() {
                 rgba_buffer[position.flat_index_for_size(size)] = [r,g,b,a];
             });
 
-            (position.into(), rgba_buffer)
+            (position, size, rgba_buffer)
         });
 
-        for (position, block) in rgba_blocks {
-            my_sparse_texture.insert(position, block);
+        for (position, size, block) in rgba_blocks {
+            my_sparse_texture.insert((position.into(), size.into()), block);
         }
-
-        println!("\n\nsparse texture now contains {} blocks", my_sparse_texture.len());
-    })
+    });
+
+    // we're done! print something
+    println!("\n\nsparse texture now contains {} blocks", my_sparse_texture.len());
+
+    // write a png for each block
+    for (index, ((_pos, (width, height)), block)) in my_sparse_texture.into_iter().enumerate() {
+        exr::prelude::write_rgba_file(
+            format!("block #{}.png", index), width, height,
+            |x,y| {
+                let [r,g,b,a] = block[Vec2(x,y).flat_index_for_size((width, height))];
+                (r,g,b,a)
+            }
+        ).unwrap();
+    }
 }
 
 /// request to load a specific sub-rect into view
 /// (loads a single view once, as this is a stub implementation)
-fn when_new_pixel_section_must_be_loaded(mut load_for_view: impl FnMut(IntegerBounds)){
+fn when_new_pixel_section_must_be_loaded<'a>(mut load_for_view: impl 'a + FnMut(IntegerBounds)){
     let image_sub_section = IntegerBounds::new(
         (831, 739), // position
-        (932, 561) // size
+        (32, 91) // size
     );
 
     load_for_view(image_sub_section);

From 7f5d8b22362c7fc15da835390fb4ecf72fea8bea Mon Sep 17 00:00:00 2001
From: Johannes Vollmer <32042925+johannesvollmer@users.noreply.github.com>
Date: Sun, 22 Oct 2023 18:05:43 +0200
Subject: [PATCH 13/14] add helper functions for dynamically examining the
 contents of a block and add an example for it

---
 examples/9_read_blocks_on_demand.rs         |   2 +-
 examples/9_read_blocks_on_demand_dynamic.rs | 111 ++++++++++++++++++++
 src/block/lines.rs                          |   4 +-
 src/block/mod.rs                            |  36 +++++++
 src/image/mod.rs                            |  10 ++
 src/image/read/samples.rs                   |   8 +-
 6 files changed, 162 insertions(+), 9 deletions(-)
 create mode 100644 examples/9_read_blocks_on_demand_dynamic.rs

diff --git a/examples/9_read_blocks_on_demand.rs b/examples/9_read_blocks_on_demand.rs
index 7a3d2e76..944c7a56 100644
--- a/examples/9_read_blocks_on_demand.rs
+++ b/examples/9_read_blocks_on_demand.rs
@@ -99,7 +99,7 @@ fn main() {
     // write a png for each block
     for (index, ((_pos, (width, height)), block)) in my_sparse_texture.into_iter().enumerate() {
         exr::prelude::write_rgba_file(
-            format!("block #{}.png", index), width, height,
+            format!("block #{}.exr", index), width, height,
             |x,y| {
                 let [r,g,b,a] = block[Vec2(x,y).flat_index_for_size((width, height))];
                 (r,g,b,a)
diff --git a/examples/9_read_blocks_on_demand_dynamic.rs b/examples/9_read_blocks_on_demand_dynamic.rs
new file mode 100644
index 00000000..82965944
--- /dev/null
+++ b/examples/9_read_blocks_on_demand_dynamic.rs
@@ -0,0 +1,111 @@
+
+extern crate exr;
+
+use std::collections::HashMap;
+use std::fs::File;
+use std::io::BufReader;
+use exr::block::chunk::Chunk;
+use exr::block::UncompressedBlock;
+use exr::image::{AnyChannel, AnyChannels, FlatSamples};
+use exr::prelude::{IntegerBounds, WritableImage};
+
+/// load only some specific pixel sections from the file, just when they are needed.
+/// load blocks of pixels into a sparse texture (illustrated with a hashmap in this example).
+/// the process is as follows:
+///
+/// 1. prepare some state (open the file, read meta data)
+/// 2. when needed, load more pixel blocks from the file
+///    a. load compressed chunks for a specific pixel section
+///    b. decompress chunks and extract pixels from the packed channel data in the block
+///    c. write the loaded pixel blocks into the sparse texture
+fn main() {
+
+    // this is where we will store our loaded data.
+    // for this example, we use a hashmap instead of a real sparse texture.
+    // it stores a vector of channels, each containing either f32, f16, or u32 samples
+    let mut my_sparse_texture: HashMap<(Pos, Size), Vec<FlatSamples>> = Default::default();
+    type Pos = (i32, i32);
+    type Size = (usize, usize);
+
+
+    let file = BufReader::new(
+        File::open("3GB.exr")
+            .expect("run example `7_write_raw_blocks` to generate this image file")
+    );
+
+    // initializes a lazy decoder (reads meta data immediately)
+    let mut chunk_reader = exr::block::read(file, true).unwrap()
+        .on_demand_chunks().unwrap();
+
+    let layer_index = 0; // only load pixels from the first "header" (assumes first layer has rgb channels)
+    let mip_level = (0, 0); // only load largest mip map
+
+    let exr_info = &chunk_reader.meta_data().clone();
+    let layer_info = &exr_info.headers[layer_index];
+    let channel_info = &layer_info.channels.list;
+    println!("loading header #0 from {:#?}", exr_info);
+
+    // ...
+    // later in your app, maybe when the view changed:
+    when_new_pixel_section_must_be_loaded(|pixel_section| {
+
+        // todo: only load blocks that are not loaded yet. maybe an additional filter? or replace this with a more modular filtering architecture?
+        let compressed_chunks = chunk_reader
+            .load_all_chunks_for_display_space_section(layer_index, mip_level, pixel_section)
+
+            // in this example, we use .flatten(), this simply discards all errors and only continues with the successfully loaded chunks
+            // in this example, we collect here due to borrowing meta data
+            .flatten().collect::<Vec<Chunk>>();
+
+        // this could be done in parallel, e.g. by using rayon par_iter
+        let packed_pixel_blocks = compressed_chunks.into_iter()
+            .map(|chunk| UncompressedBlock::decompress_chunk(chunk, exr_info, true))
+            .flatten();
+
+        // exr blocks store line by line, each line stores all the channels.
+        // what we might want instead is to store channel by channel, each channel containing all the lines for this block.
+        let unpacked_blocks = packed_pixel_blocks.map(|block|{
+            // obtain a vector of channels, where each channel contains the whole block
+            let channels = block.unpack_channels(layer_info);
+
+            let size = block.index.pixel_size;
+            let position = block.index.pixel_position.to_i32() + layer_info.own_attributes.layer_position;
+
+            (position, size, channels)
+        });
+
+        for (position, size, block) in unpacked_blocks {
+            my_sparse_texture.insert((position.into(), size.into()), block);
+        }
+    });
+
+
+    println!("\n\nsparse texture now contains {} blocks", my_sparse_texture.len());
+
+    // write a png for each block
+    for (index, ((_pos, (width, height)), channel_data)) in my_sparse_texture.into_iter().enumerate() {
+        let path = format!("block #{}.exr", index);
+        let channel_names = channel_info.iter().map(|c| c.name.clone());
+
+        let image = exr::image::Image::from_channels((width, height), AnyChannels::sort(
+            channel_names.zip(channel_data)
+                .map(|(chan, channel_data)| AnyChannel::new(chan, channel_data))
+                .collect()
+        ));
+
+        image.write().to_file(path).unwrap();
+    }
+
+    println!("Written the blocks as exr files.");
+}
+
+/// request to load a specific sub-rect into view
+/// (loads a single view once, as this is a stub implementation)
+fn when_new_pixel_section_must_be_loaded<'a>(mut load_for_view: impl 'a + FnMut(IntegerBounds)){
+    let image_sub_section = IntegerBounds::new(
+        (831, 739), // position
+        (32, 91) // size
+    );
+
+    load_for_view(image_sub_section);
+}
\ No newline at end of file
diff --git a/src/block/lines.rs b/src/block/lines.rs
index 1cdf8eeb..659e066d 100644
--- a/src/block/lines.rs
+++ b/src/block/lines.rs
@@ -54,7 +54,7 @@ pub struct LineIndex {
     /// Index of the mip or rip level in the image.
     pub level: Vec2<usize>,
 
-    /// Position of the most left pixel of the row.
+    /// Position of the most left pixel of the row, in data window space.
     pub position: Vec2<usize>,
 
     /// The width of the line; the number of samples in this row,
@@ -191,7 +191,7 @@ impl LineRef<'_> {
     pub fn read_samples<T: crate::io::Data>(&self) -> impl Iterator<Item = Result<T>> + '_ {
         debug_assert_eq!(self.value.len(), self.location.sample_count * T::BYTE_SIZE, "sample type size does not match line byte size");
 
-        let mut read = self.value.clone(); // FIXME deep data
+        let mut read = self.value; // FIXME deep data
         (0..self.location.sample_count).map(move |_| T::read(&mut read))
     }
 }
\ No newline at end of file
diff --git a/src/block/mod.rs b/src/block/mod.rs
index 1d20aa89..1f6d7755 100644
--- a/src/block/mod.rs
+++ b/src/block/mod.rs
@@ -24,6 +24,7 @@ use crate::compression::ByteVec;
 use crate::block::chunk::{CompressedBlock, CompressedTileBlock, CompressedScanLineBlock, Chunk, TileCoordinates};
 use crate::meta::header::Header;
 use crate::block::lines::{LineIndex, LineRef, LineSlice, LineRefMut};
+use crate::image::FlatSamples;
 use crate::meta::attribute::ChannelList;
 
 
@@ -254,4 +255,39 @@ impl UncompressedBlock {
             data: Self::collect_block_data_from_lines(channels, block_index, extract_line)
         }
     }
+
+    /// Unpack the channel data from the raw block bytes.
+    /// Creates a vector with one entry for each channel.
+    /// Each channel contains the samples for this whole block.
+    /// The samples are typed to either `f32`, `f16`, or `u32`.
+    /// The samples are flattened, in row-major order, according to `Vec2::flat_index_for_size(block_size)`.
+    pub fn unpack_channels(&self, header: &Header) -> Vec<FlatSamples> {
+        let block = self;
+        let layer_info = header;
+        let channel_info = &layer_info.channels;
+        let block_size = block.index.pixel_size;
+
+        // the whole block, but each channel is one entry in this vec
+        let mut channels: Vec<FlatSamples> = layer_info.channels.list.iter()
+            .map(|chan| FlatSamples::new(chan, block.index.pixel_size))
+            .collect();
+
+        for line in block.lines(channel_info) {
+            let all_lines_for_this_channel = &mut channels[line.location.channel];
+
+            // TODO sampling
+            let position_in_block = line.location.position - block.index.pixel_position;
+            let start = position_in_block.flat_index_for_size(block_size);
+            let end = start + block_size.width();
+
+            // read either f16, f32, or u32 samples based on the channels type
+            match all_lines_for_this_channel {
+                FlatSamples::F16(samples) => line.read_samples_into_slice(&mut samples[start..end]),
+                FlatSamples::F32(samples) => line.read_samples_into_slice(&mut samples[start..end]),
+                FlatSamples::U32(samples) => line.read_samples_into_slice(&mut samples[start..end]),
+            }.expect("line indexing bug");
+        }
+
+        channels
+    }
 }
\ No newline at end of file
diff --git a/src/image/mod.rs b/src/image/mod.rs
index db75050a..3b6fcb6f 100644
--- a/src/image/mod.rs
+++ b/src/image/mod.rs
@@ -654,6 +654,16 @@ impl<Samples> RipMaps<Samples> {
 
 impl FlatSamples {
 
+    /// Creates FlatSamples based for a channel with the specified number of samples.
+    pub fn new(channel: &ChannelDescription, size: Vec2<usize>) -> Self {
+        let size = (size / channel.sampling).area();
+        match channel.sample_type {
+            SampleType::F16 => FlatSamples::F16(vec![f16::ZERO; size]),
+            SampleType::F32 => FlatSamples::F32(vec![0.0; size]),
+            SampleType::U32 => FlatSamples::U32(vec![0; size]),
+        }
+    }
+
     /// The number of samples in the image. Should be the width times the height.
     /// Might vary when subsampling is used.
     pub fn len(&self) -> usize {
diff --git a/src/image/read/samples.rs b/src/image/read/samples.rs
index e03c3ccb..3cfcb837 100644
--- a/src/image/read/samples.rs
+++ b/src/image/read/samples.rs
@@ -5,7 +5,7 @@ use crate::meta::header::{Header};
 use crate::error::{Result, UnitResult};
 use crate::block::lines::LineRef;
 use crate::math::Vec2;
-use crate::meta::attribute::{ChannelDescription, SampleType};
+use crate::meta::attribute::{ChannelDescription};
 use crate::image::read::any_channels::{SamplesReader, ReadSamples};
 use crate::image::read::levels::{ReadSamplesLevel, ReadAllLevels, ReadLargestLevel};
 use crate::block::chunk::TileCoordinates;
@@ -62,11 +62,7 @@ impl ReadSamplesLevel for ReadFlatSamples {
     fn create_samples_level_reader(&self, _header: &Header, channel: &ChannelDescription, level: Vec2<usize>, resolution: Vec2<usize>) -> Result<Self::Reader> {
         Ok(FlatSamplesReader {
             level, resolution, // TODO sampling
-            samples: match channel.sample_type {
-                SampleType::F16 => FlatSamples::F16(vec![f16::ZERO; resolution.area()]),
-                SampleType::F32 => FlatSamples::F32(vec![0.0; resolution.area()]),
-                SampleType::U32 => FlatSamples::U32(vec![0; resolution.area()]),
-            }
+            samples: FlatSamples::new(channel, resolution)
         })
     }
 }

From d768f0bbde0c6d593b8258eb7c8fcf8a1653f736 Mon Sep 17 00:00:00 2001
From: Johannes Vollmer <32042925+johannesvollmer@users.noreply.github.com>
Date: Sun, 22 Oct 2023 22:13:07 +0200
Subject: [PATCH 14/14] add helper functions to convert  f32 flatsamples

---
 examples/9_read_blocks_on_demand_dynamic.rs |  4 ++--
 src/block/samples.rs                        |  9 +++++---
 src/image/mod.rs                            | 25 +++++++++++++++++++++
 3 files changed, 33 insertions(+), 5 deletions(-)

diff --git a/examples/9_read_blocks_on_demand_dynamic.rs b/examples/9_read_blocks_on_demand_dynamic.rs
index 82965944..e2620a7c 100644
--- a/examples/9_read_blocks_on_demand_dynamic.rs
+++ b/examples/9_read_blocks_on_demand_dynamic.rs
@@ -6,7 +6,7 @@ use std::fs::File;
 use std::io::BufReader;
 use exr::block::chunk::Chunk;
 use exr::block::UncompressedBlock;
-use exr::image::{AnyChannel, AnyChannels, FlatSamples};
+use exr::image::{AnyChannel, AnyChannels, FlatSamples, Image};
 use exr::prelude::{IntegerBounds, WritableImage};
 
 /// load only some specific pixel sections from the file, just when they are needed.
@@ -87,7 +87,7 @@ fn main() {
         let path = format!("block #{}.exr", index);
         let channel_names = channel_info.iter().map(|c| c.name.clone());
 
-        let image = exr::image::Image::from_channels((width, height), AnyChannels::sort(
+        let image = Image::from_channels((width, height), AnyChannels::sort(
             channel_names.zip(channel_data)
                 .map(|(chan, channel_data)| AnyChannel::new(chan, channel_data))
                 .collect()
diff --git a/src/block/samples.rs b/src/block/samples.rs
index 4352b111..af3aad8c 100644
--- a/src/block/samples.rs
+++ b/src/block/samples.rs
@@ -127,7 +127,10 @@ pub trait FromNativeSample: Sized + Copy + Default + 'static {
 
     /// Convert all values from the slice into this type.
     /// This function exists to allow the compiler to perform a vectorization optimization.
-    /// Note that this default implementation will **not** be vectorized by the compiler automatically.
+    /// All implementations provided by this library are implemented in this efficient way.
+    /// ---
+    /// Note for implementors:
+    /// The default f16 implementations will **not** be vectorized by the compiler automatically.
     /// For maximum performance you will need to override this function and implement it via
     /// an explicit batched conversion such as [`convert_to_f32_slice`](https://docs.rs/half/2.3.1/half/slice/trait.HalfFloatSliceExt.html#tymethod.convert_to_f32_slice)
     #[inline]
@@ -140,7 +143,7 @@ pub trait FromNativeSample: Sized + Copy + Default + 'static {
 
     /// Convert all values from the slice into this type.
     /// This function exists to allow the compiler to perform a vectorization optimization.
-    /// Note that this default implementation will be vectorized by the compiler automatically.
+    /// Note that the default f32 implementation will be vectorized by the compiler automatically.
     #[inline]
     fn from_f32s(from: &[f32], to: &mut [Self]) {
         assert_eq!(from.len(), to.len(), "slices must have the same length");
@@ -151,7 +154,7 @@ pub trait FromNativeSample: Sized + Copy + Default + 'static {
 
     /// Convert all values from the slice into this type.
     /// This function exists to allow the compiler to perform a vectorization optimization.
-    /// Note that this default implementation will be vectorized by the compiler automatically,
+    /// Note that the default u32 implementation will be vectorized by the compiler automatically,
     /// provided that the CPU supports the necessary conversion instructions.
     /// For example, x86_64 lacks the instructions to convert `u32` to floats,
     /// so this will inevitably be slow on x86_64.
diff --git a/src/image/mod.rs b/src/image/mod.rs
index 3b6fcb6f..e747d6f9 100644
--- a/src/image/mod.rs
+++ b/src/image/mod.rs
@@ -698,6 +698,31 @@ impl FlatSamples {
             FlatSamples::U32(vec) => Sample::U32(vec[index]),
         }
     }
+
+    /// Converts all samples in this vector into a new vector with the desired sample type.
+    /// This is uses vectorization and should therefore be rather efficient.
+    pub fn all_values_as<Sample>(&self) -> Vec<Sample> where Sample: FromNativeSample {
+        let mut target = vec![Sample::default(); self.len()];
+        self.all_values_into(&mut target);
+        target
+    }
+
+    /// Converts all samples in this vector into a slice of the desired sample type.
+    /// This is uses vectorization and should therefore be rather efficient.
+    /// The provided slice must have the same length as this vector.
+    pub fn all_values_into<Sample>(&self, out: &mut[Sample]) where Sample: FromNativeSample {
+        match self {
+            FlatSamples::F16(f16s) => Sample::from_f16s(f16s, out),
+            FlatSamples::F32(f32s) => Sample::from_f32s(f32s, out),
+            FlatSamples::U32(u32s) => Sample::from_u32s(u32s, out),
+        }
+    }
+
+    /// Obtain the f32 version of this vector.
+    /// Use `all_values_as::<f32>()` instead if you don't need a `FlatSamples` object.
+    pub fn as_f32(&self) -> FlatSamples {
+        FlatSamples::F32(self.all_values_as())
+    }
 }