diff --git a/Makefile b/Makefile index f063f3e..c19593a 100644 --- a/Makefile +++ b/Makefile @@ -1,15 +1,49 @@ +CPPC = g++ FC := gfortran -FFLAGS := -O3 -g -fbounds-check -Wall -Wextra -cpp -Wno-unused-dummy-argument +FFLAGS_BASIC = -g -fbacktrace -std=f2008 -pedantic -Wall -Wextra -cpp +FFLAGS_BASIC += -Werror -Werror=shadow -Werror=intrinsic-shadow -Wuninitialized +FFLAGS_BASIC += -Wunreachable-code -Wconversion +FFLAGS_BASIC += -Waliasing -Wampersand -Wc-binding-type -Wcharacter-truncation +FFLAGS_BASIC += -Wfunction-elimination -Wimplicit-interface -Wimplicit-procedure -Wintrinsic-shadow -Wintrinsics-std -Wline-truncation -Wno-tabs +FFLAGS_BASIC += -Wreal-q-constant -Wsurprising +FFLAGS_BASIC += -Wunused-parameter +FFLAGS_BASIC += -Wno-maybe-uninitialized -Wno-unused-dummy-argument -Wno-error=return-type +FFLAGS_BASIC += -Wno-unused-function +FFLAGS_BASIC += -Wno-conversion +FFLAGS_BASIC += -Wno-implicit-interface -Wno-strict-overflow # implicit interface is necessary for calling qsort with general types. Conversions from/to C ints are harmless. -.PHONY: all test clean ref +FFLAGS_DEVEL = -O0 -fcheck=all -fbounds-check -Warray-bounds -Wstrict-overflow=5 -Wunderflow -ffpe-trap=invalid,zero,overflow +# FFLAGS_DEVEL += -ftrapv +FFLAGS_RELEASE = -O3 + +# not yet in gfortran 4.8.5: +# FFLAGS_BASIC += -Wdo-subscript -std=f2018 -Wfrontend-loop-interchange +# FFLAGS_DEVEL += -fsanitize-address-use-after-scope + +# CPPC = icpc +# FC = ifort +# FFLAGS_BASIC = -g -traceback -cpp +# FFLAGS_DEVEL = -O0 +# FFLAGS_RELEASE = -Ofast + +FFLAGS = $(FFLAGS_DEVEL) $(FFLAGS_BASIC) + +.PHONY: all test clean all: test test: fhash_modules fhash_test.f90 - $(FC) $(FFLAGS) fhash_modules.f90 fhash_test.f90 -o fhash_test.out && ./fhash_test.out + $(FC) $(FFLAGS) fhash_modules.f90 fhash_test.f90 -o fhash_test.out \ + && ./fhash_test.out + +benchmark: fhash_benchmark.out stl_benchmark.out + ./fhash_benchmark.out && ./stl_benchmark.out + +fhash_benchmark.out: fhash.f90 fhash_modules.f90 benchmark.f90 + $(FC) $(FFLAGS_BASIC) $(FFLAGS_RELEASE) fhash_modules.f90 benchmark.f90 -o fhash_benchmark.out -ref: benchmark.cc - g++ -O3 -std=c++14 benchmark.cc -o ref.out && ./ref.out +stl_benchmark.out: benchmark.cc + $(CPPC) -std=c++11 -O3 $< -o $@ clean: rm -rf *.mod *.o diff --git a/README.md b/README.md index 90545bf..60deedf 100644 --- a/README.md +++ b/README.md @@ -2,44 +2,41 @@ Fast hash map implementation in fortran ## Description -Implemention of the GCC hashmap structure in Fortran. With the usage of macros, it can support any types of keys and values, as long as you implement (or the compiler provides) the corresponding equal operator(==), assignment operator(=) and the hash_value interface of the key type and the assignment operator of the value type. +Implemention of the GCC hashmap structure in Fortran. It supports any types of keys and values, as long as you set the following macros: -## Benchmarks +* `FHASH_NAME`; + +* `KEY_TYPE` and `VALUE_TYPE` with corresponding use statements `KEY_USE` and `VALUE_USE`, + +and, optionally, -Here are the benchmarks between my Fortran implementation and GCC 4.8 standard library: +* `KEYS_EQUAL_FUNC`: the comparison operator for the keys (defaults to either `a == b` or `all(a == b)`, depending on whether the key is a scalar; -For 14 integer array as the key, double precision floating point as the value, 10M entries: +* `HASH_FUNC`, which takes a key and returns a hash integer. There are defaults for integers and integer arrays; + +* `VALUE_POINTER`: when defined the values are assumed to be pointers. + +## Benchmarks -Fortran hash: +For -> Insert: 1.80 s -> -> Clean: 1.70 s -> -> 1.59 GB +* key: integer array of size 2; -GCC unordered_map: +* value: double precision (64-bit) floating point; -> Insert: 2.02 s -> -> Clean: 0.61 s -> -> 1.38 GB +* 10M entries, -For 2 integer array as the key, double precision floating point as the value, 20M entries: +on -Fortran hash: +* Intel(R) Core(TM) i7-6820HQ CPU @ 2.70GHz; -> Insert: 2.66 s -> -> Clean: 2.54 s -> -> 2.57 GB +* Ubuntu 20.04.3 LTS, -GCC unordered_map: +I got -> Insert: 3.60 s -> -> Clean: 1.07 s -> -> 2.16 GB \ No newline at end of file +| | | ifort 2021 | gfortran 9 | +|-----------|---------------|------------|------------| +| *insert* | **fhash** | 2.99 | 2.38 | +| | **C++ (STL)** | 2.80 | 2.69 | +| *clear* | **fhash** | 1.24 | 0.391 | +| | **C++ (STL)** | 0.37 | 0.328 | diff --git a/benchmark.cc b/benchmark.cc index f6a9104..9413498 100644 --- a/benchmark.cc +++ b/benchmark.cc @@ -1,38 +1,46 @@ #include -#include -#include +#include #include #include -#include -#define N_INTS 2 -#define N_KEYS 20000000 +constexpr int N_INTS = 2; +constexpr int N_KEYS = 1e7; + +int main() { + std::cout << "Start C++ STL benchmark:\n"; + + typedef std::array KeyType; + std::unordered_map, double, boost::hash> h; -void benchmark() { - typedef std::vector KeyType; - const std::clock_t start = std::clock(); - std::unordered_map> h; h.reserve(N_KEYS * 2); - KeyType key(N_INTS); + const double t0 = std::clock(); + + KeyType key; for (int i = 1; i <= N_KEYS; i++) { for (int j = 1; j <= N_INTS; j++) { key[j - 1] = i + j; } h[key] = i * 0.5; } - const std::clock_t finish = std::clock(); - printf("Time insert: %.3g s\n", - static_cast(finish - start) / CLOCKS_PER_SEC); + const double t1 = std::clock(); + + double val; + for (int i = 1; i <= N_KEYS; i++) { + for (int j = 1; j <= N_INTS; j++) { + key[j - 1] = i + j; + } + val = h[key]; + } + const double t2 = std::clock(); + h.clear(); -} + const double t3 = std::clock(); -int main() { - typedef std::vector KeyType; - const std::clock_t start = std::clock(); - benchmark(); - const std::clock_t finish = std::clock(); - printf("Time finish: %.3g s\n", - static_cast(finish - start) / CLOCKS_PER_SEC); + std::cout << "Time to assemble / get / clear:" + << " " << (t1 - t0) / CLOCKS_PER_SEC + << " " << (t2 - t1) / CLOCKS_PER_SEC + << " " << (t3 - t2) / CLOCKS_PER_SEC + << "\n"; return 0; -} \ No newline at end of file +} diff --git a/benchmark.f90 b/benchmark.f90 new file mode 100644 index 0000000..169702e --- /dev/null +++ b/benchmark.f90 @@ -0,0 +1,55 @@ +#define KEY_ARRAY_SIZE 2 + +#define FHASH_NAME int2real +#define KEY_TYPE integer, dimension(KEY_ARRAY_SIZE) +#define VALUE_TYPE real(real64) +#define VALUE_USE use, intrinsic :: iso_fortran_env, only: real64 +#include "fhash.f90" + +program test_benchmark + implicit none + + call benchmark(n_ints=KEY_ARRAY_SIZE, n_keys=10**7) + +contains + subroutine benchmark(n_ints, n_keys) + use int2real_mod + use iso_fortran_env, only: real64 + + integer, intent(in) :: n_ints, n_keys + + type(int2real_t) :: h + integer :: key(n_ints) + integer :: i, j + real :: t0, t1, t2, t3 + real(real64), pointer :: val + + write(*,'(a)') "Start fhash benchmark:" + + write(*,'("n_ints: ", I0, ", n_keys: ", I0)') n_ints, n_keys + + call h%reserve(n_keys * 2) + call cpu_time(t0) + + do i = 1, n_keys + do j = 1, n_ints + key(j) = i + j + enddo + call h%set(key, i * 0.5d0) + enddo + call cpu_time(t1) + + do i = 1, n_keys + do j = 1, n_ints + key(j) = i + j + enddo + val => h%get_ptr(key) ! , autoval=3.0_real64) + enddo + call cpu_time(t2) + + call h%clear() + call cpu_time(t3) + + write(*,'(a,3(g15.3))') "Time to assemble/ get / clear: ", t1 - t0, t2 - t1, t3 - t2 + end subroutine +end program diff --git a/fhash.f90 b/fhash.f90 index c05af60..fe80937 100644 --- a/fhash.f90 +++ b/fhash.f90 @@ -3,71 +3,66 @@ ! DO NOT COMPILE THIS TEMPLATE FILE DIRECTLY. ! Use a wrapper module and include this file instead, e.g. fhash_modules.f90. ! Remove is not implemented since not needed currently. -! +! ! #define | meaning ! --------------------------------+----------------------------------------------------- -! SHORTNAME | (optional) The name of the type this FHASH table is -! | for. If set, it overrides all settings that have -! | have possibly been made for FHASH_MODULE_NAME, -! | FHASH_TYPE_NAME and FHASH_TYPE_ITERATOR_NAME. -! | -! FHASH_MODULE_NAME | The name of the module that encapsulates the FHASH -! | types and functionality -! FHASH_TYPE_NAME | The name of the actual FHASH type -! FHASH_TYPE_ITERATOR_NAME | The name of the FHASH type that can iterate through -! | the whole FHASH +! FHASH_NAME | The name of the type of FHASH table. ! | ! KEY_USE | (optional) A use statement that is required to use ! | a specific type as a key for the FHASH ! KEY_TYPE | The type of the keys. May require KEY_USE to be ! | accessible. +! KEYS_EQUAL_FUNC | (optional) function that returns whether two keys +! | are equal. Defaults to `a == b` or `all(a == b)`, +! | depending on whether the key is a scalar. ! | ! VALUE_USE | (optional) A use statement that is required to use ! | a specific type as a value for the FHASH ! VALUE_TYPE | The type of the values. May require VALUE_USE to be ! | accessible. +! +! HASH_FUNC | (optional) hash function name. Defaults to 'hash'. ! | -! VALUE_VALUE | Flag indicating that the values in FHASH are value -! | values. This is the default. (see VALUE_POINTER) -! VALUE_POINTER | Flag indicating that the values in FHASH are value -! | pointers. -! VALUE_ASSIGNMENT | (internal) The assignment operator, do not set it -! | anywhere, it is configured based on VALUE_VALUE or -! | VALUE_POINTER +! VALUE_POINTER | (optional) If defined, the values are pointers. +! VALUE_ASSIGNMENT | (internal) The assignment operator, do not set it +! | anywhere, it is configured based on VALUE_POINTER #endif -#ifdef SHORTNAME -#undef FHASH_MODULE_NAME -#undef FHASH_TYPE_NAME -#undef FHASH_TYPE_ITERATOR_NAME - #ifdef __GFORTRAN__ -#define PASTE(a) a -#define CONCAT(a,b) PASTE(a)b +# define PASTE(a) a +# define CONCAT(a,b) PASTE(a)b #else -#define PASTE(a,b) a ## b -#define CONCAT(a,b) PASTE(a,b) -#endif -#define FHASH_MODULE_NAME CONCAT(fhash_module__,SHORTNAME) -#define FHASH_TYPE_NAME CONCAT(fhash_type__,SHORTNAME) -#define FHASH_TYPE_ITERATOR_NAME CONCAT(fhash_type_iterator__,SHORTNAME) -#endif - -#undef VALUE_ASSIGNMENT -#ifndef VALUE_VALUE -#ifndef VALUE_POINTER -#define VALUE_VALUE -#endif +# define PASTE(a,b) a ## b +# define CONCAT(a,b) PASTE(a,b) #endif +#define FHASH_MODULE_NAME CONCAT(FHASH_NAME,_mod) +#define FHASH_TYPE_NAME CONCAT(FHASH_NAME,_t) +#define FHASH_TYPE_ITERATOR_NAME CONCAT(FHASH_NAME,_iter_t) +#define FHASH_TYPE_KV_TYPE_NAME CONCAT(FHASH_NAME,_kv_t) +#define FHASH_SORT_KV_NAME CONCAT(sort_,FHASH_NAME) + +! For some bizar reason both gfortran-10 and ifort-2021.4 fail to compile, unless +! this function has a unique name for every time that this file is included: +#define __COMPARE_AT_IDX CONCAT(fhash_type_compare__,FHASH_NAME) #ifdef VALUE_POINTER -#define VALUE_ASSIGNMENT => +# define VALUE_ASSIGNMENT => #else -#define VALUE_ASSIGNMENT = +# define VALUE_ASSIGNMENT = +#endif + +! Not all compilers implement finalization: +#if defined __GFORTRAN__ && __GNUC__ <= 5 +#else +# define _FINAL_IS_IMPLEMENTED +#endif +#ifdef _FINAL_IS_IMPLEMENTED +# define _FINAL_TYPEORCLASS type +#else +# define _FINAL_TYPEORCLASS class #endif module FHASH_MODULE_NAME -#undef FHASH_MODULE_NAME #ifdef KEY_USE KEY_USE @@ -84,77 +79,80 @@ module FHASH_MODULE_NAME public :: FHASH_TYPE_NAME public :: FHASH_TYPE_ITERATOR_NAME + public :: FHASH_TYPE_KV_TYPE_NAME + public :: FHASH_SORT_KV_NAME ! for convenience, because it's hard for the users to write a generic sort + ! (that circumvents the compiler bugs when passing pointers to internal functions to `qsort`) - type kv_type + type :: FHASH_TYPE_KV_TYPE_NAME KEY_TYPE :: key VALUE_TYPE :: value end type - type node_type - type(kv_type), allocatable :: kv + type :: node_type + type(FHASH_TYPE_KV_TYPE_NAME), allocatable :: kv type(node_type), pointer :: next => null() contains - ! If kv is not allocated, allocate and set to the key, value passed in. - ! If key is present and the same as the key passed in, overwrite the value. - ! Otherwise, defer to the next node (allocate if not allocated) - procedure :: node_set - - ! If kv is not allocated, fail and return 0. - ! If key is present and the same as the key passed in, return the value in kv. - ! If next pointer is associated, delegate to it. - ! Otherwise, fail and return 0. - procedure :: node_get - - ! If kv is not allocated, fail and return - ! If key is present and node is first in bucket, set first node in bucket to - ! the next node of first. Return success - ! If key is present and the node is another member of the linked list, link the - ! previous node's next node to this node's next node, deallocate this node, - ! return success - ! Otherwise, fail and return 0 - procedure :: node_remove - - ! Deallocate kv is allocated. - ! Call the clear method of the next node if the next pointer associated. - ! Deallocate and nullify the next pointer. - procedure :: node_clear - ! Return the length of the linked list start from the current node. - procedure :: node_depth + procedure, non_overridable :: node_depth + + ! No FINAL procedure here, because it would have to be recursive (at least + ! implicitly, because it finalizes the 'next' pointer), and a recursive + ! procedure is not performant. + ! Fortunately this type is not public, and it gets deallocated when finalizing the fhash. end type type FHASH_TYPE_NAME private - integer :: n_buckets = 0 integer :: n_keys = 0 - type(node_type), allocatable :: buckets(:) + type(node_type), contiguous, pointer :: buckets(:) => null() contains ! Returns the number of buckets. - procedure, public :: bucket_count + procedure, non_overridable, public :: bucket_count ! Return the number of collisions. - procedure, public :: n_collisions + procedure, non_overridable, public :: n_collisions ! Reserve certain number of buckets. - procedure, public :: reserve + procedure, non_overridable, public :: reserve ! Returns number of keys. - procedure, public :: key_count + procedure, non_overridable, public :: key_count ! Set the value at a given a key. - procedure, public :: set + procedure, non_overridable, public :: set ! Get the value at the given key. - procedure, public :: get + procedure, non_overridable, public :: get + +#ifndef VALUE_POINTER + generic :: get_ptr => get_ptr_or_autoval, get_ptr_or_null + procedure, non_overridable, public :: get_ptr_or_null + procedure, non_overridable, public :: get_ptr_or_autoval +#endif ! Remove the value with the given key. - procedure, public :: remove + procedure, non_overridable, public :: remove + + ! Get the key/value pairs as a list: + procedure, non_overridable, public :: as_list + procedure, non_overridable, public :: as_sorted_list - ! Clear all the allocated memory (must be called to prevent memory leak). - procedure, public :: clear + ! Return the accumalated storage size of an fhash, including the underlying pointers. + ! Takes the bit size of a key-value pair as an argument. + procedure, non_overridable, public :: deep_storage_size => fhash_deep_storage_size + + ! Clear all the allocated memory + procedure, non_overridable, public :: clear +#ifdef _FINAL_IS_IMPLEMENTED + final :: clear_final +#endif + generic, public :: assignment(=) => deepcopy_fhash + procedure, non_overridable, private :: deepcopy_fhash + + procedure, non_overridable, private :: key2bucket end type type FHASH_TYPE_ITERATOR_NAME @@ -166,34 +164,68 @@ module FHASH_MODULE_NAME contains ! Set the iterator to the beginning of a hash table. - procedure, public :: begin + procedure, non_overridable, public :: begin ! Get the key value of the next element and advance the iterator. - procedure, public :: next + procedure, non_overridable, public :: next end type - contains + interface default_hash + module procedure :: default_hash__int + module procedure :: default_hash__int_array + end interface + + interface all + module procedure :: scalar_all + end interface + + interface + integer function compare_keys_i(a, b) + import + implicit none + KEY_TYPE, intent(in) :: a, b + end function + end interface + procedure(compare_keys_i), pointer :: global_compare_ptr => null() + type(FHASH_TYPE_KV_TYPE_NAME), pointer :: global_sorted_kv_list_ptr(:) => null() + +contains + logical function keys_equal(a, b) + KEY_TYPE, intent(in) :: a, b + +#ifdef KEYS_EQUAL_FUNC + keys_equal = KEYS_EQUAL_FUNC(a, b) +#else + keys_equal = all(a == b) +#endif + end function function bucket_count(this) - class(FHASH_TYPE_NAME), intent(inout) :: this + class(FHASH_TYPE_NAME), intent(in) :: this integer :: bucket_count - bucket_count = this%n_buckets + if (.not. associated(this%buckets)) then + bucket_count = 0 + else + bucket_count = size(this%buckets) + endif end function function n_collisions(this) - class(FHASH_TYPE_NAME), intent(inout) :: this + class(FHASH_TYPE_NAME), intent(in) :: this integer :: n_collisions integer :: i + call assert(associated(this%buckets), "n_collisions: fhash has not been initialized") + n_collisions = 0 - do i = 1, this%n_buckets + do i = 1, size(this%buckets) n_collisions = n_collisions + node_depth(this%buckets(i)) - 1 enddo end function recursive function node_depth(this) result(depth) - class(node_type), intent(inout) :: this + class(node_type), intent(in) :: this integer :: depth if (.not. associated(this%next)) then @@ -203,29 +235,30 @@ recursive function node_depth(this) result(depth) endif end function - subroutine reserve(this, n_buckets) - class(FHASH_TYPE_NAME), intent(inout) :: this + impure elemental subroutine reserve(this, n_buckets) + class(FHASH_TYPE_NAME), intent(out) :: this integer, intent(in) :: n_buckets - integer, dimension(29) :: sizes + integer :: i + integer, parameter :: sizes(*) = [5, 11, 23, 47, 97, 199, 409, 823, 1741, 3469, 6949, 14033, & + & 28411, 57557, 116731, 236897, 480881, 976369,1982627, 4026031, & + & 8175383, 16601593, 33712729, 68460391, 139022417, 282312799, & + & 573292817, 1164186217, 2147483647] + integer, parameter :: n = size(sizes) - if (this%key_count() > 0) stop 'Cannot reserve when fhash is not empty.' + call assert(sizes(2:) - sizes(:n-1) > 0, "PROGRAMMING ERROR: sizes should be strictly increasing") + call assert(sizes(n) >= n_buckets, "Did not expect to need this many buckets.") - sizes = (/5, 11, 23, 47, 97, 199, 409, 823, 1741, 3469, 6949, 14033, & - & 28411, 57557, 116731, 236897, 480881, 976369,1982627, 4026031, & - & 8175383, 16601593, 33712729, 68460391, 139022417, 282312799, & - & 573292817, 1164186217, 2147483647/) - do i = 1, size(sizes) + do i = 1, n if (sizes(i) >= n_buckets) then - this%n_buckets = sizes(i) - allocate(this%buckets(this%n_buckets)) - return + allocate(this%buckets(sizes(i))) + exit endif enddo end subroutine - function key_count(this) - class(FHASH_TYPE_NAME), intent(inout) :: this + impure elemental function key_count(this) + class(FHASH_TYPE_NAME), intent(in) :: this integer :: key_count key_count = this%n_keys @@ -238,46 +271,57 @@ subroutine set(this, key, value) integer :: bucket_id logical :: is_new - bucket_id = modulo(hash_value(key), this%n_buckets) + 1 + call assert(associated(this%buckets), "set: fhash has not been initialized") - call this%buckets(bucket_id)%node_set(key, value, is_new) + bucket_id = this%key2bucket(key) + call node_set(this%buckets(bucket_id), key, value, is_new) if (is_new) this%n_keys = this%n_keys + 1 end subroutine recursive subroutine node_set(this, key, value, is_new) - class(node_type), intent(inout) :: this + ! If kv is not allocated, allocate and set to the key, value passed in. + ! If key is present and the same as the key passed in, overwrite the value. + ! Otherwise, defer to the next node (allocate if not allocated) + type(node_type), intent(inout) :: this KEY_TYPE, intent(in) :: key VALUE_TYPE, intent(in) :: value - logical, optional, intent(out) :: is_new + logical, intent(out) :: is_new if (.not. allocated(this%kv)) then allocate(this%kv) this%kv%key = key this%kv%value VALUE_ASSIGNMENT value - if (present(is_new)) is_new = .true. - else if (this%kv%key == key) then + is_new = .true. + else if (keys_equal(this%kv%key, key)) then this%kv%value VALUE_ASSIGNMENT value - if (present(is_new)) is_new = .false. + is_new = .false. else if (.not. associated(this%next)) allocate(this%next) - call this%next%node_set(key, value, is_new) + call node_set(this%next, key, value, is_new) endif end subroutine subroutine get(this, key, value, success) - class(FHASH_TYPE_NAME), intent(inout) :: this + class(FHASH_TYPE_NAME), intent(in) :: this KEY_TYPE, intent(in) :: key VALUE_TYPE, intent(out) :: value logical, optional, intent(out) :: success + integer :: bucket_id - bucket_id = modulo(hash_value(key), this%n_buckets) + 1 - call this%buckets(bucket_id)%node_get(key, value, success) + call assert(associated(this%buckets), "get: fhash has not been initialized") + + bucket_id = this%key2bucket(key) + call node_get(this%buckets(bucket_id), key, value, success) end subroutine recursive subroutine node_get(this, key, value, success) - class(node_type), intent(inout) :: this + ! If kv is not allocated, fail and return 0. + ! If key is present and the same as the key passed in, return the value in kv. + ! If next pointer is associated, delegate to it. + ! Otherwise, fail and return 0. + type(node_type), intent(in) :: this KEY_TYPE, intent(in) :: key VALUE_TYPE, intent(out) :: value logical, optional, intent(out) :: success @@ -285,103 +329,355 @@ recursive subroutine node_get(this, key, value, success) if (.not. allocated(this%kv)) then ! Not found. (Initial node in the bucket not set) if (present(success)) success = .false. - else if (this%kv%key == key) then + else if (keys_equal(this%kv%key, key)) then value VALUE_ASSIGNMENT this%kv%value if (present(success)) success = .true. - else if (associated(this%next)) then - call this%next%node_get(key, value, success) - else + elseif (.not. associated(this%next)) then if (present(success)) success = .false. + else + call node_get(this%next, key, value, success) endif end subroutine +#ifndef VALUE_POINTER + function get_ptr_or_null(this, key) result(value) + class(FHASH_TYPE_NAME), intent(in) :: this + KEY_TYPE, intent(in) :: key + VALUE_TYPE, pointer :: value + + integer :: bucket_id + type(node_type), pointer :: bucket + + call assert(associated(this%buckets), "get: fhash has not been initialized") + + bucket_id = this%key2bucket(key) + call assert(1 <= bucket_id .and. bucket_id <= size(this%buckets), "get: fhash has not been initialized") + bucket => this%buckets(bucket_id) + + value => node_get_ptr_or_null(bucket, key) + end function + + recursive function node_get_ptr_or_null(this, key) result(value) + type(node_type), target, intent(in) :: this + KEY_TYPE, intent(in) :: key + VALUE_TYPE, pointer :: value + + if (.not. allocated(this%kv)) then + value => null() + else if (keys_equal(this%kv%key, key)) then + value => this%kv%value + else if (.not. associated(this%next)) then + value => null() + else + value => node_get_ptr_or_null(this%next, key) + endif + end function + + function get_ptr_or_autoval(this, key, autoval) result(value) + class(FHASH_TYPE_NAME), intent(inout) :: this + KEY_TYPE, intent(in) :: key + VALUE_TYPE, intent(in) :: autoval + VALUE_TYPE, pointer :: value + + integer :: bucket_id + type(node_type), pointer :: bucket + logical :: is_new + + call assert(associated(this%buckets), "get: fhash has not been initialized") + + bucket_id = this%key2bucket(key) + call assert(1 <= bucket_id .and. bucket_id <= size(this%buckets), "get: fhash has not been initialized") + bucket => this%buckets(bucket_id) + + call node_get_ptr_or_autoval(bucket, key, value, is_new, autoval) + if (is_new) this%n_keys = this%n_keys + 1 + end function + + recursive subroutine node_get_ptr_or_autoval(this, key, value, is_new, autoval) + type(node_type), target, intent(inout) :: this + KEY_TYPE, intent(in) :: key + VALUE_TYPE, pointer, intent(out) :: value + logical, intent(out) :: is_new + VALUE_TYPE, intent(in) :: autoval + + if (.not. allocated(this%kv)) then + allocate(this%kv) + this%kv%key = key + this%kv%value = autoval + value => this%kv%value + is_new = .true. + else if (keys_equal(this%kv%key, key)) then + value => this%kv%value + is_new = .false. + else if (.not. associated(this%next)) then + allocate(this%next) + allocate(this%next%kv) + this%next%kv%key = key + this%next%kv%value = autoval + value => this%next%kv%value + is_new = .true. + else + call node_get_ptr_or_autoval(this%next, key, value, is_new, autoval) + endif + end subroutine +#endif + subroutine remove(this, key, success) class(FHASH_TYPE_NAME), intent(inout) :: this KEY_TYPE, intent(in) :: key logical, optional, intent(out) :: success integer :: bucket_id - type(node_type) :: first logical :: locSuccess + type(node_type), pointer :: first, temp - bucket_id = modulo(hash_value(key), this%n_buckets) + 1 - first = this%buckets(bucket_id) - - if (allocated(first%kv)) then - if (first%kv%key == key) then - if (associated(first%next)) then - this%buckets(bucket_id)%kv%key = this%buckets(bucket_id)%next%kv%key - this%buckets(bucket_id)%kv%value VALUE_ASSIGNMENT this%buckets(bucket_id)%next%kv%value - deallocate(first%next%kv) - this%buckets(bucket_id)%next => this%buckets(bucket_id)%next%next - else - deallocate(this%buckets(bucket_id)%kv) - endif - locSuccess = .true. - else - call node_remove(first%next, key, locSuccess, first) - end if + call assert(associated(this%buckets), "remove: fhash has not been initialized") + + bucket_id = this%key2bucket(key) + first => this%buckets(bucket_id) + + if (.not. allocated(first%kv)) then + locSuccess = .false. + elseif (.not. keys_equal(first%kv%key, key)) then + call node_remove(first, key, locSuccess) + elseif (associated(first%next)) then + call move_alloc(first%next%kv, first%kv) + temp => first%next + first%next => first%next%next + deallocate(temp) + locSuccess = .true. else - locSuccess = .false. + deallocate(first%kv) + locSuccess = .true. endif - + if (locSuccess) this%n_keys = this%n_keys - 1 if (present(success)) success = locSuccess - end subroutine - recursive subroutine node_remove(this, key, success, last) - class(node_type), intent(inout) :: this, last + recursive subroutine node_remove(last, key, success) + ! If kv is not allocated, fail and return + ! If key is present and node is first in bucket, set first node in bucket to + ! the next node of first. Return success + ! If key is present and the node is another member of the linked list, link the + ! previous node's next node to this node's next node, deallocate this node, + ! return success + ! Otherwise, fail and return 0 + type(node_type), intent(inout) :: last KEY_TYPE, intent(in) :: key logical, intent(out) :: success - if (.not. allocated(this%kv)) then - ! Not found. (Initial node in the bucket not set) + type(node_type), pointer :: next + + next => last%next + + if (.not. allocated(next%kv)) then success = .false. - else if (this%kv%key == key) then - last%next => this%next - nullify(this%next) - deallocate(this%kv) + else if (keys_equal(next%kv%key, key)) then + last%next => next%next + deallocate(next%kv) success = .true. - else if (associated(this%next)) then - call this%next%node_remove(key, success, this) - else + else if (.not. associated(next%next)) then success = .false. + else + call node_remove(next, key, success) endif end subroutine - subroutine clear(this) - class(FHASH_TYPE_NAME), intent(inout) :: this + subroutine as_list(this, kv_list) + class(FHASH_TYPE_NAME), target, intent(in) :: this + type(FHASH_TYPE_KV_TYPE_NAME), intent(out) :: kv_list(:) + + integer :: i, n + type(FHASH_TYPE_ITERATOR_NAME) :: iter + integer :: iter_stat + + n = this%key_count() + call assert(size(kv_list) == n, "as_list: kv_list has a bad size") + + call iter%begin(this) + do i = 1, n + call iter%next(kv_list(i)%key, kv_list(i)%value, iter_stat) + call assert(iter_stat == 0, "as_list: internal error: iterator stopped unexpectedly") + enddo + end subroutine + + subroutine as_sorted_list(this, kv_list, compare) + class(FHASH_TYPE_NAME), target, intent(in) :: this + type(FHASH_TYPE_KV_TYPE_NAME), target, intent(out) :: kv_list(:) + procedure(compare_keys_i) :: compare + + call this%as_list(kv_list) + call FHASH_SORT_KV_NAME(kv_list, compare) + end subroutine + + subroutine FHASH_SORT_KV_NAME(kv_list, compare) + type(FHASH_TYPE_KV_TYPE_NAME), target, intent(inout) :: kv_list(:) + procedure(compare_keys_i) :: compare + + call assert(.not. (associated(global_compare_ptr) .or. associated(global_sorted_kv_list_ptr)), & + "It looks like I am already sorting, and this is not thread-safe.") + global_compare_ptr => compare + global_sorted_kv_list_ptr => kv_list + + call permute(kv_list, sorting_perm()) + + global_compare_ptr => null() + global_sorted_kv_list_ptr => null() + end subroutine + + subroutine permute(x, perm) + ! Performs + ! x = x(perm) + ! but (i) this is more efficient, and (ii) ifort appears to put `x(perm)` on + ! the stack before copying, causing a segfault for large arrays. + use, intrinsic :: iso_c_binding, only: c_int + use, intrinsic :: iso_fortran_env, only: int8, int16 + + type(FHASH_TYPE_KV_TYPE_NAME), intent(inout) :: x(:) + integer(c_int), intent(in) :: perm(:) + + type(FHASH_TYPE_KV_TYPE_NAME) :: temp + integer :: i, n, j, jnew + integer, parameter :: smallest_int = merge(int8, int16, int8 > 0) + logical(smallest_int), allocatable :: done(:) + + call assert(size(x) == size(perm), "INTERNAL ERROR: permute: inconsistent sizes") + n = size(x) + + allocate(done(n)) + done = .false._smallest_int + do i = 1, n + ! Follow the permutations, which form a cycle: + j = i + temp = x(i) + do + if (done(j)) exit + jnew = perm(j) + if (jnew == i) then + x(j) = temp + else + x(j) = x(jnew) + endif + done(j) = .true._smallest_int + j = jnew + enddo + enddo + end subroutine + + impure elemental subroutine deepcopy_fhash(lhs, rhs) + class(FHASH_TYPE_NAME), intent(out) :: lhs + type(FHASH_TYPE_NAME), intent(in) :: rhs + integer :: i - if (.not. allocated(this%buckets)) return + if (.not. associated(rhs%buckets)) return - do i = 1, size(this%buckets) - if (associated(this%buckets(i)%next)) then - call this%buckets(i)%next%node_clear() - deallocate(this%buckets(i)%next) - endif + lhs%n_keys = rhs%n_keys + allocate(lhs%buckets(size(rhs%buckets))) + do i = 1, size(lhs%buckets) + call deepcopy_node(rhs%buckets(i), lhs%buckets(i)) enddo - deallocate(this%buckets) - this%n_keys = 0 - this%n_buckets = 0 end subroutine - recursive subroutine node_clear(this) - class(node_type), intent(inout) :: this + recursive subroutine deepcopy_node(this, copy) + class(node_type), intent(in) :: this + type(node_type), intent(out) :: copy + + if (.not. allocated(this%kv)) then + call assert(.not. associated(this%next), 'internal error: node has a "next" pointer, but it''s kv pair has not been set') + else + allocate(copy%kv, source=this%kv) + endif if (associated(this%next)) then - call this%next%node_clear() - deallocate(this%next) - nullify(this%next) + allocate(copy%next) + call deepcopy_node(this%next, copy%next) endif end subroutine + impure elemental integer function fhash_deep_storage_size(this, keyval_ss) result(s) + class(FHASH_TYPE_NAME), intent(in) :: this + integer, intent(in) :: keyval_ss + + integer :: i + + s = storage_size(this) + if (associated(this%buckets)) then + do i = 1, size(this%buckets) + s = s + node_deep_storage_size(this%buckets(i), keyval_ss) + enddo + endif + end function + + recursive integer function node_deep_storage_size(node, keyval_ss) result(s) + type(node_type), intent(in) :: node + integer, intent(in) :: keyval_ss + + s = storage_size(node) + keyval_ss + if (associated(node%next)) s = s + node_deep_storage_size(node%next, keyval_ss) + end function + + impure elemental subroutine clear(this) + class(FHASH_TYPE_NAME), intent(inout) :: this + + integer :: i + + this%n_keys = 0 + if (associated(this%buckets)) then + do i = 1, size(this%buckets) + call clear_children(this%buckets(i)) + if (allocated(this%buckets(i)%kv)) deallocate(this%buckets(i)%kv) + enddo + deallocate(this%buckets) + endif + end subroutine + +#ifdef _FINAL_IS_IMPLEMENTED + impure elemental subroutine clear_final(this) + type(FHASH_TYPE_NAME), intent(inout) :: this + + call this%clear() + end subroutine +#endif + + subroutine clear_children(node) + ! Not a recursive subroutine, because (i) this is much more performant, and + ! (ii) gfortran thinks that it cannot be both elemental and recursive. + _FINAL_TYPEORCLASS(node_type), intent(inout) :: node + + type(node_type), pointer :: prev, next + + next => node%next + do + if (.not. associated(next)) return + prev => next + next => prev%next + deallocate(prev) + enddo + end subroutine + + integer function key2bucket(this, key) result(bucket_id) + class(FHASH_TYPE_NAME), intent(in) :: this + KEY_TYPE, intent(in) :: key + + integer :: hash + +#ifdef HASH_FUNC + hash = HASH_FUNC(key) +#else + hash = default_hash(key) +#endif + bucket_id = modulo(hash, size(this%buckets)) + 1 + end function + subroutine begin(this, fhash_target) class(FHASH_TYPE_ITERATOR_NAME), intent(inout) :: this type(FHASH_TYPE_NAME), target, intent(in) :: fhash_target - this%bucket_id = 1 + call assert(associated(fhash_target%buckets), "cannot start iteration when fhash is empty") + + this%bucket_id = 1 this%node_ptr => fhash_target%buckets(1) this%fhash_ptr => fhash_target end subroutine @@ -392,15 +688,21 @@ subroutine next(this, key, value, status) VALUE_TYPE, intent(out) :: value integer, optional, intent(out) :: status - do while (.not. associated(this%node_ptr) .or. .not. allocated(this%node_ptr%kv)) - if (this%bucket_id < this%fhash_ptr%n_buckets) then + call assert(associated(this%fhash_ptr), "next: iterator has not been initialized") + + do + if (associated(this%node_ptr)) then + if (allocated(this%node_ptr%kv)) exit + endif + + if (this%bucket_id < size(this%fhash_ptr%buckets)) then this%bucket_id = this%bucket_id + 1 this%node_ptr => this%fhash_ptr%buckets(this%bucket_id) else if (present(status)) status = -1 #ifdef VALUE_TYPE_INIT value VALUE_ASSIGNMENT VALUE_TYPE_INIT -#endif +#endif return endif enddo @@ -409,17 +711,108 @@ subroutine next(this, key, value, status) value VALUE_ASSIGNMENT this%node_ptr%kv%value if (present(status)) status = 0 this%node_ptr => this%node_ptr%next + end subroutine + + integer function default_hash__int(key) result(hash) + integer, intent(in) :: key + + hash = key + end function + integer function default_hash__int_array(key) result(hash) + integer, intent(in) :: key(:) + + real(kind(1.0d0)), parameter :: phi = (sqrt(5.0d0) + 1) / 2 + ! Do not use `nint` intrinsic, because ifort claims that "Fortran 2018 specifies that + ! "an elemental intrinsic function here be of type integer or character and + ! each argument must be an initialization expr of type integer or character": + integer, parameter :: magic_number = 0.5d0 + 2.0d0**bit_size(hash) * (1 - 1 / phi) + integer :: i + + hash = 0 + do i = 1, size(key) + ! This triggers an error in `gfortran` (version 9.3.0) with the `-ftrapv` option. + ! Compiler bug? + hash = ieor(hash, key(i) + magic_number + ishft(hash, 6) + ishft(hash, -2)) + enddo + end function + + logical function scalar_all(scal) + logical, intent(in) :: scal + + scalar_all = scal + end function + + impure elemental subroutine assert(condition, msg) + use, intrinsic :: iso_fortran_env, only: error_unit + logical, intent(in) :: condition + character(*), intent(in) :: msg + + if (.not. condition) then + write(error_unit, '(a)') msg + error stop + endif end subroutine + integer(c_int) function __COMPARE_AT_IDX(c_a, c_b) bind(C) + use, intrinsic :: iso_c_binding, only: c_int, c_ptr, c_f_pointer + + type(c_ptr), value :: c_a, c_b + + integer(c_int), pointer :: f_a, f_b + + call c_f_pointer(c_a, f_a) + call c_f_pointer(c_b, f_b) + __COMPARE_AT_IDX = int(global_compare_ptr(global_sorted_kv_list_ptr(f_a)%key, & + global_sorted_kv_list_ptr(f_b)%key), kind=c_int) + end function + + function sorting_perm() result(perm) + use, intrinsic :: iso_c_binding + + integer(c_int), allocatable, target :: perm(:) + + integer(c_int) :: i, n + type(c_funptr) :: fun + interface + subroutine c_qsort(array, elem_count, elem_size, compare) bind(C, name="qsort") + ! The function pointer has the interface + ! int(*compar)(const void *, const void *) + use, intrinsic :: iso_c_binding + implicit none + type(c_ptr), value :: array + integer(c_size_t), value :: elem_count + integer(c_size_t), value :: elem_size + type(c_funptr), value :: compare + end subroutine + end interface + + call assert(associated(global_sorted_kv_list_ptr) .and. associated(global_compare_ptr), & + "internal error: global sorting state has not been set yet") + + n = size(global_sorted_kv_list_ptr, kind=c_int) + allocate(perm(n)) + do i = 1, n + perm(i) = i + enddo + fun = c_funloc(__COMPARE_AT_IDX) + if (n > 0_c_int) call c_qsort(c_loc(perm(1)), int(n, kind=c_size_t), c_sizeof(perm(1)), fun) + end function end module +#undef FHASH_NAME +#undef FHASH_MODULE_NAME +#undef FHASH_TYPE_NAME +#undef FHASH_TYPE_ITERATOR_NAME +#undef FHASH_TYPE_KV_TYPE_NAME +#undef HASH_FUNC +#undef _FINAL_IS_IMPLEMENTED +#undef _FINAL_TYPEORCLASS +#undef __COMPARE_AT_IDX #undef KEY_TYPE +#undef KEYS_EQUAL_FUNC #undef VALUE_TYPE #undef VALUE_TYPE_INIT #undef VALUE_ASSIGNMENT -#undef FHASH_TYPE_NAME -#undef FHASH_TYPE_ITERATOR_NAME -#undef SHORTNAME #undef CONCAT #undef PASTE diff --git a/fhash_modules.f90 b/fhash_modules.f90 index b82dd29..e97863f 100644 --- a/fhash_modules.f90 +++ b/fhash_modules.f90 @@ -1,5 +1,9 @@ +#define FHASH_NAME i2char +#define KEY_TYPE integer +#define VALUE_TYPE character(10) +#include "fhash.f90" + ! Define the module for the key type. -! Override the hash_value and == operator interface. module ints_module implicit none @@ -12,96 +16,68 @@ module ints_module module procedure hash_value_ints end interface - interface operator (==) - module procedure ints_equal - end interface - -#ifdef __GFORTRAN__ - interface assignment (=) - module procedure ints_ptr_assign - end interface -#endif - - contains - - function hash_value_ints(ints) result(hash) - type(ints_type), intent(in) :: ints - integer :: hash - integer :: i - - hash = 0 - do i = 1, size(ints%ints) - hash = xor(hash, ints%ints(i) + 1640531527 + ishft(hash, 6) + ishft(hash, -2)) - enddo - end function - - function ints_equal(lhs, rhs) - type(ints_type), intent(in) :: lhs, rhs - logical :: ints_equal - integer :: i - - if (size(lhs%ints) /= size(rhs%ints)) then +contains + + function hash_value_ints(ints) result(hash) + use, intrinsic :: iso_fortran_env, only: int64, real64 + type(ints_type), intent(in) :: ints + integer(kind(ints%ints)) :: hash + + real(real64), parameter :: phi = (sqrt(5.0_real64) + 1) / 2 + ! Do not use `nint` intrinsic, because ifort claims that "Fortran 2018 specifies that + ! "an elemental intrinsic function here be of type integer or character and + ! each argument must be an initialization expr of type integer or character": + integer, parameter :: magic_number = 0.5d0 + 2.0d0**bit_size(hash) * (1 - 1 / phi) + integer :: i + + hash = 0 + do i = 1, size(ints%ints) + ! This triggers an error in `gfortran` (version 9.3.0) with the `-ftrapv` option. + ! Compiler bug? + hash = ieor(hash, ints%ints(i) + magic_number + ishft(hash, 6) + ishft(hash, -2)) + enddo + end function + + function ints_equal(lhs, rhs) + type(ints_type), intent(in) :: lhs, rhs + logical :: ints_equal + integer :: i + + if (size(lhs%ints) /= size(rhs%ints)) then + ints_equal = .false. + return + endif + + do i = 1, size(lhs%ints) + if (lhs%ints(i) /= rhs%ints(i)) then ints_equal = .false. return endif + enddo - do i = 1, size(lhs%ints) - if (lhs%ints(i) /= rhs%ints(i)) then - ints_equal = .false. - return - endif - enddo - - ints_equal = .true. - - end function - -#ifdef __GFORTRAN__ - subroutine ints_ptr_assign(lhs, rhs) - type(ints_type), pointer, intent(inout) :: lhs - type(ints_type), pointer, intent(in) :: rhs - lhs => rhs - end subroutine -#endif + ints_equal = .true. + end function end module ints_module ! Define the macros needed by fhash and include fhash.f90 +#define FHASH_NAME ints_double #define KEY_USE use ints_module #define KEY_TYPE type(ints_type) +#define KEYS_EQUAL_FUNC ints_equal #define VALUE_USE use, intrinsic :: iso_fortran_env #define VALUE_TYPE real(real64) +#define HASH_FUNC hash_value #define VALUE_TYPE_INIT 0.0 -#define SHORTNAME ints_double #include "fhash.f90" -module int_module - implicit none - - interface hash_value - module procedure hash_value_int - end interface - - contains - - function hash_value_int(int) result(hash) - integer, intent(in) :: int - integer :: hash - - hash = int - end function -end module - ! Define the macros needed by fhash and include fhash.f90 -#define KEY_USE use int_module +#define FHASH_NAME int_ints_ptr #define KEY_TYPE integer #define VALUE_USE use ints_module #define VALUE_TYPE type(ints_type), pointer !#define VALUE_TYPE_INIT null() -#define SHORTNAME int_ints_ptr -#ifndef __GFORTRAN__ #define VALUE_POINTER -#endif #ifdef VALUE_TYPE_INIT #define CHECK_ITERATOR_VALUE #endif diff --git a/fhash_test.f90 b/fhash_test.f90 index eff155a..8e97814 100644 --- a/fhash_test.f90 +++ b/fhash_test.f90 @@ -1,54 +1,264 @@ +module tests_mod + use ints_module + use ints_double_mod + use, intrinsic :: iso_fortran_env + implicit none + +contains + subroutine test_as_list + use i2char_mod + + type(i2char_t) :: h + character(10) :: val + integer :: i + integer, parameter :: n_uniq = 4 + type(i2char_kv_t) :: kv_list(n_uniq) + logical :: success + + call h%reserve(3) + call h%set(1, "one (typo)") + call h%set(1, "one ") + call h%set(0, "zero ") + call h%set(4, "four ") + call h%set(7, "seven ") + call assert(h%get_ptr(1) == "one", 'expected h%get_ptr(1) == "one"') + + call h%as_list(kv_list) + call assert(size(kv_list) == n_uniq, "kv_list has bad size") + do i = 1, n_uniq + call h%get(kv_list(i)%key, val, success) + call assert(success, "key in list was not in hash") + call assert(val == kv_list(i)%value, "bad value in list") + enddo + + call h%as_sorted_list(kv_list, compare_ints) + call assert(size(kv_list) == n_uniq, "sorted kv_list has bad size") + do i = 1, n_uniq + call h%get(kv_list(i)%key, val, success) + call assert(success, "key in sorted list was not in hash") + call assert(val == kv_list(i)%value, "bad value in sorted list") + enddo + call assert(kv_list(2:)%key - kv_list(:size(kv_list)-1)%key > 0, "sorted list should be strictly increasing") + end subroutine + + subroutine test_large_sort() + ! Test with an array that's too big for the stack. + use i2char_mod + + real, parameter :: gigabytes = 0.001 ! make larger for expensive test + type(i2char_kv_t), allocatable :: kv_list(:) + integer, parameter :: max = 1000 + integer :: i, n, val + real :: x + + n = nint(gigabytes * 1024**3 / (storage_size(kv_list) / 8)) + + ! This list contains duplicate keys, which is not possible for lists + ! obtained from a hash, but it should work anyway: + allocate(kv_list(n)) + do i = 1, n + call random_number(x) + val = nint(x * max) + kv_list(i)%key = val + write(kv_list(i)%value, "(i0)") val + enddo + + call sort_i2char(kv_list, compare_ints) + + do i = 2, n + call assert(kv_list(i-1)%key <= kv_list(i)%key, "large sort: list should be increasing") + enddo + do i = 2, n + read(kv_list(i)%value, *) val + call assert(val == kv_list(i)%key, "large sort: bad value") + enddo + end subroutine + + subroutine test_get_ptr() + use i2char_mod + + type(i2char_t) :: h + character(:), pointer :: c + type(i2char_kv_t), allocatable :: kv_list(:) + integer :: i + + call h%reserve(1) + + call h%set(7, "seven ") + + c => h%get_ptr(0) + call assert(.not. associated(c), "expected .not. associated(c)") + c => h%get_ptr(1) + call assert(.not. associated(c), "expected .not. associated(c)") + c => h%get_ptr(7) + call assert(associated(c), "expected associated(c)") + call assert(c == "seven", "exptected c == 'seven'") + + c(:) = 'new seven' + c => h%get_ptr(7) + call assert(associated(c), "expected associated(c)") + call assert(c == 'new seven', "expected c == 'new seven'") + + do i = 1, 3 + c => h%get_ptr(2, autoval='auto two ') + call assert(associated(c), "expected associated(c)") + call assert(c == 'auto two', "expected c == 'auto two'") + call assert(h%key_count() == 2, 'expected two keys in h') + enddo + + allocate(kv_list(h%key_count())) + call h%as_sorted_list(kv_list, compare_ints) + call assert(size(kv_list) == 2, "expected size(kv_list) == 2") + call assert(kv_list%key == [2, 7], "keys should be [2, 7]") + call assert(kv_list%value == ['auto two ', 'new seven'], "test_get_ptr: bad values") + end subroutine + + integer function compare_ints(a, b) + integer, intent(in) :: a, b + + compare_ints = a - b + end function + + subroutine test_deep_storage_size() + type(ints_double_t) :: h + type(ints_type) :: key + + integer :: i + integer :: s + + s = h%deep_storage_size(0123) + + call h%reserve(10) + allocate(key%ints(2)) + + do i = 1, 3 + key%ints = i + call h%set(key, real(i, kind=real64)) + enddo + s = h%deep_storage_size(0123) + + do i = 1, 20 + key%ints = i + call h%set(key, real(i, kind=real64)) + enddo + s = h%deep_storage_size(0123) + end subroutine + + subroutine test_assignment() + type(ints_double_t) :: a, b, c + type(ints_type) :: keys(100) + real(real64) :: values(size(keys)) + + integer :: i + + do i = 1, size(keys) + allocate(keys(i)%ints(3)) + keys(i)%ints = i + values(i) = i + enddo + + call a%reserve(10) + do i = 1, size(keys) + call a%set(keys(i), values(i)) + enddo + call check_kv(a) + + c = a + call check_kv(a) + call check_kv(c) + + call b%reserve(1) + b = a + call check_kv(a) + call check_kv(b) + call a%clear() + call check_kv(b) + + a = b + call check_kv(a) + call check_kv(b) + call a%clear() + call check_kv(b) + contains + subroutine check_kv(fhash) + type(ints_double_t), intent(in) :: fhash + + type(ints_double_iter_t) :: iter + type(ints_type) :: key + real(real64) :: val + integer :: i + integer :: status + logical :: have_seen(size(keys)) + + have_seen = .false. + call iter%begin(fhash) + do + call iter%next(key, val, status) + if (status /= 0) exit + + i = nint(val) + call assert(abs(val - i) <= 10*epsilon(val), "check_kv: bad value") + call assert(key%ints == i, "check_kv: bad key") + call assert(.not. have_seen(i), "check_kv: found the same key twice") + have_seen(i) = .true. + enddo + call assert(all(have_seen), "check_kv: did not get all keys from the iterator") + end subroutine + end subroutine + + impure elemental subroutine assert(condition, msg) + use, intrinsic :: iso_fortran_env, only: error_unit + logical, intent(in) :: condition + character(*), intent(in) :: msg + + if (.not. condition) then + write(error_unit, '(a)') "FAILED A TEST: " // msg + error stop + endif + end subroutine +end module + program fhash_test use, intrinsic :: iso_fortran_env - use fhash_module__ints_double - use fhash_module__int_ints_ptr + use ints_double_mod + use int_ints_ptr_mod use ints_module - + use tests_mod implicit none - real :: start, finish - integer :: numKeys - + call test_get_ptr() call test_contructor() call test_reserve() call test_insert_and_get_ints_double() call test_insert_and_get_int_ints_ptr() call test_insert_get_and_remove_int_ints_ptr() call test_iterate() + call test_as_list() + call test_large_sort() + call test_deep_storage_size() + call test_assignment() print *, 'ALL TESTS PASSED.' - print *, 'Start benchmark:' - - ! Benchmark - numKeys = 10000000 -#ifdef __GFORTRAN__ - if (__SIZEOF_POINTER__ == 8) numKeys = numKeys * 2 -#else - if (int_ptr_kind() == 8) numKeys = numKeys * 2 -#endif - call cpu_time(start) - call benchmark(2, numKeys) - call cpu_time(finish) - print '("Time finish = ", G0.3," seconds.")', finish - start - contains subroutine test_contructor() - type(fhash_type__ints_double) h + type(ints_double_t) h if (h%key_count() /= 0) stop 'expect no keys' end subroutine subroutine test_reserve() - type(fhash_type__ints_double) h + type(ints_double_t) :: h + call h%reserve(3) - if (h%bucket_count() /= 5) stop 'expect to reserve 5 buckets' + call assert(h%bucket_count() == 5, 'expected to reserve 5 buckets') end subroutine subroutine test_insert_and_get_ints_double() - type(fhash_type__ints_double) :: h + type(ints_double_t) :: h type(ints_type) :: key real(real64) :: value + real(real64), pointer :: val_ptr integer :: i logical :: success call h%reserve(5) @@ -57,11 +267,20 @@ subroutine test_insert_and_get_ints_double() key%ints = 0 do i = 1, 10 key%ints(i) = i + call h%get(key, value, success) if (success) stop 'expect not found' + + val_ptr => h%get_ptr(key) + call assert(.not. associated(val_ptr), "expected a null pointer") + call h%set(key, i * 0.5_real64) call h%get(key, value) if (abs(value - i * 0.5_real64) > epsilon(value)) stop 'expect to get 0.5 i' + + val_ptr => h%get_ptr(key) + call assert(associated(val_ptr), "expected a, associated pointer") + call assert(abs(val_ptr - i * 0.5_real64) <= epsilon(val_ptr), 'expect to get pointer value of 0.5 i') enddo if (h%key_count() /= 10) stop 'expect key count to be 10' if (h%n_collisions() >= 10 .or. h%n_collisions() < 5) stop 'expect n_collisions in [5, 10)' @@ -72,7 +291,7 @@ subroutine test_insert_and_get_ints_double() end subroutine subroutine test_insert_and_get_int_ints_ptr() - type(fhash_type__int_ints_ptr) :: h + type(int_ints_ptr_t) :: h type(ints_type), target :: value type(ints_type), pointer :: value_ptr, value_ptr2, value_ptr3 logical :: success @@ -90,12 +309,13 @@ subroutine test_insert_and_get_int_ints_ptr() end subroutine subroutine test_insert_get_and_remove_int_ints_ptr() - type(fhash_type__int_ints_ptr) :: h + type(int_ints_ptr_t) :: h integer, parameter :: num_values = 50 - type(ints_type), pointer :: pValues(:), pValue + type(ints_type), pointer :: pValue + type(ints_type), target, allocatable :: pValues(:) logical :: success integer :: i, key, status - type(fhash_type_iterator__int_ints_ptr) :: it + type(int_ints_ptr_iter_t) :: it ! prepare allocate(pValues(num_values)) @@ -105,9 +325,9 @@ subroutine test_insert_get_and_remove_int_ints_ptr() ! add do i = 1, num_values + allocate(pValues(i)%ints(2)) + pValues(i)%ints(1) = i pValue => pValues(i) - allocate(pValue%ints(2)) - pValue%ints(1) = i call h%set(i, pValue) end do @@ -115,7 +335,6 @@ subroutine test_insert_get_and_remove_int_ints_ptr() ! get do i = num_values, i, -1 - nullify(pValue) call h%get(i, pValue, success) if (.not. success) stop 'expect a value for given key ' if (pValue%ints(1) .ne. pValues(i)%ints(1)) stop 'expect different value for given key' @@ -165,14 +384,11 @@ subroutine test_insert_get_and_remove_int_ints_ptr() #endif call h%clear() - - deallocate(pValues) - end subroutine subroutine test_iterate() - type(fhash_type__ints_double) :: h - type(fhash_type_iterator__ints_double) :: it + type(ints_double_t) :: h + type(ints_double_iter_t) :: it type(ints_type) :: key real(real64) :: value integer :: i, j @@ -217,28 +433,4 @@ subroutine test_iterate() call h%clear() end subroutine - - subroutine benchmark(n_ints, n_keys) - integer, intent(in) :: n_ints, n_keys - type(fhash_type__ints_double) :: h - type(ints_type) :: key - real :: start, finish - integer :: i, j - - print '("n_ints: ", I0, ", n_keys: ", I0)', n_ints, n_keys - - call cpu_time(start) - call h%reserve(n_keys * 2) - allocate(key%ints(n_ints)) - do i = 1, n_keys - do j = 1, n_ints - key%ints(j) = i + j - enddo - call h%set(key, (i + j) * 0.5_real64) - enddo - call cpu_time(finish) - print '("Time insert = ", G0.3," seconds.")', finish - start - call h%clear() - end subroutine - end program