diff --git a/ImCreate_cube.c b/ImCreate_cube.c index 4663450..5d1d344 100644 --- a/ImCreate_cube.c +++ b/ImCreate_cube.c @@ -121,7 +121,7 @@ int main() yc = y0 + r*sin(angle); - imarray.md->write = 1; // set this flag to 1 when writing data + SHMIM_WRITE_ACQUIRE(imarray.md); index = imarray.md->cnt1 +1; if(index == imarray.md->size[2]) @@ -146,13 +146,13 @@ int main() // imarray.array.F[jj*imarray.md->size[0]+ii] = 0.0f; } imarray.md->cnt1 = index; - imarray.md->cnt0++; + SHMIM_CNT0_INCREMENT(imarray.md); clock_gettime(CLOCK_ISIO, &imarray.md[0].lastaccesstime); // POST ALL SEMAPHORES ImageStreamIO_sempost(&imarray, -1); - imarray.md->write = 0; // Done writing data + SHMIM_WRITE_RELEASE(imarray.md); usleep(dtus); angle += dangle; diff --git a/ImCreate_img.c b/ImCreate_img.c index f2d19ce..9cceda9 100644 --- a/ImCreate_img.c +++ b/ImCreate_img.c @@ -71,17 +71,25 @@ int main() free(imsize); - strcpy(imarray.kw[0].name, "keyword_long"); + snprintf(imarray.kw[0].name, + sizeof(imarray.kw[0].name), + "%s", "keyword_long"); imarray.kw[0].type = 'L'; imarray.kw[0].value.numl = 42; - strcpy(imarray.kw[1].name, "keyword_float"); + snprintf(imarray.kw[1].name, + sizeof(imarray.kw[1].name), + "%s", "keyword_float"); imarray.kw[1].type = 'D'; imarray.kw[1].value.numf = 3.141592; - strcpy(imarray.kw[2].name, "keyword_string"); + snprintf(imarray.kw[2].name, + sizeof(imarray.kw[2].name), + "%s", "keyword_string"); imarray.kw[2].type = 'S'; - strcpy(imarray.kw[2].value.valstr, "Hello!"); + snprintf(imarray.kw[2].value.valstr, + sizeof(imarray.kw[2].value.valstr), + "%s", "Hello!"); float angle; float r; @@ -109,7 +117,7 @@ int main() yc = y0 + r*sin(angle); - imarray.md->write = 1; // set this flag to 1 when writing data + SHMIM_WRITE_ACQUIRE(imarray.md); for(ii=0; iisize[0]; ii++) for(jj=0; jjsize[1]; jj++) @@ -126,11 +134,11 @@ int main() // imarray.array.F[jj*imarray.md->size[0]+ii] = 0.0f; } imarray.md->cnt1 = 0; - imarray.md->cnt0++; + SHMIM_CNT0_INCREMENT(imarray.md); // POST ALL SEMAPHORES ImageStreamIO_sempost(&imarray, -1); - imarray.md->write = 0; // Done writing data + SHMIM_WRITE_RELEASE(imarray.md); usleep(dtus); angle += dangle; diff --git a/ImCreate_test.c b/ImCreate_test.c index afce7a4..3e0417a 100644 --- a/ImCreate_test.c +++ b/ImCreate_test.c @@ -68,7 +68,7 @@ int main() angle += dangle; if(angle > 2.0*M_PI) { angle -= 2.0 * M_PI; } - imarray->md->write = 1; // Poor-man's mutex when writing + SHMIM_WRITE_ACQUIRE(imarray->md); // ->array is union; ->array.F is float pointer to image float* dotF = imarray->array.F; @@ -88,8 +88,8 @@ int main() // Post all semaphores (index = -1) ImageStreamIO_sempost(imarray, -1); - imarray->md->write = 0; // Done writing; release mutex - imarray->md->cnt0++; + SHMIM_WRITE_RELEASE(imarray->md); + SHMIM_CNT0_INCREMENT(imarray->md); imarray->md->cnt1++; usleep(dtus); // Wait 1ms diff --git a/ImCreate_test_gpuipc.c b/ImCreate_test_gpuipc.c index 79e2cdc..d7dd613 100644 --- a/ImCreate_test_gpuipc.c +++ b/ImCreate_test_gpuipc.c @@ -96,7 +96,7 @@ int main() printf("ImCreate_test_gpuipc is waiting update\n"); ImageStreamIO_semwait(&imarray[0], 0); - while(imarray[0].md[0].write ); + while(SHMIM_WRITE_LOAD(&imarray[0].md[0])); printf("ImCreate_test_gpuipc reads in SHM\n"); cudaMemcpy(h_ptr, d_ptr, imsize[0]*imsize[1]*sizeof(float), diff --git a/ImCreate_test_gpuipc2.c b/ImCreate_test_gpuipc2.c index d388d00..b9457e9 100644 --- a/ImCreate_test_gpuipc2.c +++ b/ImCreate_test_gpuipc2.c @@ -74,8 +74,8 @@ int main() printf("ImCreate_test_gpuipc2 is sending update\n"); ImageStreamIO_sempost(&imarray[0], -1); - imarray[0].md[0].write = 0; // Done writing data - imarray[0].md[0].cnt0++; + SHMIM_WRITE_RELEASE(&imarray[0].md[0]); // Done writing data + SHMIM_CNT0_INCREMENT(&imarray[0].md[0]); imarray[0].md[0].cnt1++; free(imarray); diff --git a/ImageStreamIO.c b/ImageStreamIO.c index b425e32..c1b4ad5 100644 --- a/ImageStreamIO.c +++ b/ImageStreamIO.c @@ -1592,7 +1592,7 @@ __attribute__((cold)) errno_t ImageStreamIO_createIm_gpu( clock_gettime(CLOCK_ISIO, &image->md->lastaccesstime); clock_gettime(CLOCK_ISIO, &image->md->creationtime); - image->md->write = 0; + SHMIM_WRITE_RELEASE(image->md); image->md->cnt0 = 0; image->md->cnt1 = 0; @@ -2353,8 +2353,8 @@ long ImageStreamIO_UpdateIm_atime( IMAGE *image, image->md->atime = *atime; } - image->md->cnt0++; - image->md->write = 0; + SHMIM_CNT0_INCREMENT(image->md); + SHMIM_WRITE_RELEASE(image->md); #ifdef IMAGESTRUCT_WRITEHISTORY // Update image write history @@ -2414,12 +2414,12 @@ long ImageStreamIO_UpdateIm( IMAGE *image ) **/ long ImageStreamIO_BusywaitForNoWrite(IMAGE *image, int acquire) { - while(image->md->write) + while(SHMIM_WRITE_LOAD(image->md)) { } if(acquire) { - image->md->write = 1; + SHMIM_WRITE_ACQUIRE(image->md); } return IMAGESTREAMIO_SUCCESS; } diff --git a/ImageStreamIO_config.h.in b/ImageStreamIO_config.h.in index b664a10..47e32f1 100644 --- a/ImageStreamIO_config.h.in +++ b/ImageStreamIO_config.h.in @@ -1,6 +1,11 @@ +#undef PROJECT_NAME #define PROJECT_NAME "@PROJECT_NAME@" +#undef VERSION_MAJOR #define VERSION_MAJOR @VERSION_MAJOR@ +#undef VERSION_MINOR #define VERSION_MINOR @VERSION_MINOR@ +#undef VERSION_PATCH #define VERSION_PATCH @VERSION_PATCH@ +#undef VERSION_OPTION #define VERSION_OPTION "@VERSION_OPTION@" #define IMAGESTRUCT_VERSION "@VERSION_MAJOR@.@VERSION_MINOR@" diff --git a/ImageStruct.h b/ImageStruct.h index 620cc6b..06de0b5 100644 --- a/ImageStruct.h +++ b/ImageStruct.h @@ -345,7 +345,18 @@ typedef struct uint64_t cnt1; /**< in 3D rolling buffer image, this is the last slice written */ uint64_t cnt2; /**< in cnt2-based syncronization, proceed until cnt0=cnt2 */ - uint8_t write; /**< 1 if image is being written */ + /** + * Cross-process write mutex flag. + * + * Writers set to 1 before copying data, then to 0 + * after. Readers spin on it to avoid partial frames. + * + * volatile prevents the compiler from caching the + * value in a register (critical for spin loops). + * Use SHMIM_WRITE_* macros for proper CPU memory + * fences via atomic builtins. + */ + volatile uint8_t write; uint16_t NBkw; /**< number of keywords (max: 65536) */ @@ -563,6 +574,83 @@ typedef struct /**< structure used to store data arrays */ } IMAGE; +/* + * ========================================================= + * Atomic accessors for IMAGE_METADATA fields + * ========================================================= + * + * These macros provide proper CPU memory fences for + * cross-process synchronization of shared memory fields. + * + * On x86-64 (TSO), release/acquire compile to plain + * mov instructions plus a compiler barrier — zero + * runtime overhead. On ARM/other weak-memory + * architectures, appropriate dmb fences are emitted. + * + * Usage: + * SHMIM_WRITE_ACQUIRE(md) — set write=1 + * SHMIM_WRITE_RELEASE(md) — set write=0 + * SHMIM_WRITE_LOAD(md) — read write flag + * SHMIM_CNT0_INCREMENT(md) — atomically cnt0++ + * SHMIM_CNT0_LOAD(md) — read cnt0 + */ + +#if defined(__STDC_VERSION__) \ + && __STDC_VERSION__ >= 201112L \ + && !defined(__STDC_NO_ATOMICS__) +#include + +#define SHMIM_WRITE_ACQUIRE(md) \ + atomic_store_explicit( \ + (_Atomic uint8_t *)&(md)->write, \ + 1, memory_order_release) + +#define SHMIM_WRITE_RELEASE(md) \ + atomic_store_explicit( \ + (_Atomic uint8_t *)&(md)->write, \ + 0, memory_order_release) + +#define SHMIM_WRITE_LOAD(md) \ + atomic_load_explicit( \ + (_Atomic uint8_t *)&(md)->write, \ + memory_order_acquire) + +#define SHMIM_CNT0_INCREMENT(md) \ + atomic_fetch_add_explicit( \ + (_Atomic uint64_t *)&(md)->cnt0, \ + 1, memory_order_release) + +#define SHMIM_CNT0_LOAD(md) \ + atomic_load_explicit( \ + (_Atomic uint64_t *)&(md)->cnt0, \ + memory_order_acquire) + +#else +/* GCC/Clang __atomic builtins fallback */ + +#define SHMIM_WRITE_ACQUIRE(md) \ + __atomic_store_n( \ + &(md)->write, 1, __ATOMIC_RELEASE) + +#define SHMIM_WRITE_RELEASE(md) \ + __atomic_store_n( \ + &(md)->write, 0, __ATOMIC_RELEASE) + +#define SHMIM_WRITE_LOAD(md) \ + __atomic_load_n( \ + &(md)->write, __ATOMIC_ACQUIRE) + +#define SHMIM_CNT0_INCREMENT(md) \ + __atomic_add_fetch( \ + &(md)->cnt0, 1, __ATOMIC_RELEASE) + +#define SHMIM_CNT0_LOAD(md) \ + __atomic_load_n( \ + &(md)->cnt0, __ATOMIC_ACQUIRE) + +#endif + + #ifdef __cplusplus } // extern "C" #endif