diff --git a/Makefile b/Makefile
index 48a22f3..c6c01eb 100644
--- a/Makefile
+++ b/Makefile
@@ -1,55 +1,75 @@
-CILK = /opt/intel/composer_xe_2013.5.198/compiler
-INCADD = -I$(CILK)/include -I$(CILK)/examples/include
-LIBADD = -L$(CILK)/lib/intel64
+CILK?=0
+NATIVE?=1
+OPT?=3
+SANITIZE?=0
+
+CFLAGS := -Wall -Wextra -O$(OPT) -g  -std=c++20 -gdwarf-4 -fno-exceptions -Wno-unknown-pragmas -Wno-comment
+
+ifeq ($(NATIVE),1)
+CFLAGS += -march=native
+endif
+
+ifeq ($(CILK),1)
+CFLAGS += -fopencilk
+endif
+
+ifeq ($(SANITIZE),1)
+ifeq ($(CILK),1)
+CFLAGS += -fsanitize=cilk,undefined,address -fno-omit-frame-pointer
+else
+CFLAGS += -fsanitize=undefined,address -fno-omit-frame-pointer
+endif
+endif
+
+DEFINES := -DCILK=$(CILK)
+
+all: parspmv both_d spmm_dall spmm_a spmm_sall
 
-GCCOPT = -O2 -fno-rtti -fno-exceptions # -ftree-vectorize
-INTELOPT = -O2 -no-ipo -fno-rtti -fno-exceptions -parallel -restrict -std=c++11 -xAVX -no-prec-div #-fno-inline-functions
-DEB = -g -DNOBM -O0 -parallel -restrict -std=c++11 
 
 seqsym: sym_spmv_test.cpp csbsym.cpp csbsym.h utility.h friends.h SSEspmv.o
-	icpc -cilk-serialize $(INCADD) $(INTELOPT) -o seqsym sym_spmv_test.cpp SSEspmv.o
+	$(CXX) $(CFLAGS) $(DEFINES) -o seqsym sym_spmv_test.cpp SSEspmv.o
 
 parsym: sym_spmv_test.cpp csbsym.cpp csbsym.h utility.h friends.h SSEspmv.o
-	icpc $(INCADD) $(DEB) -o parsym sym_spmv_test.cpp SSEspmv.o 
+	$(CXX) $(CFLAGS) $(DEFINES) -o parsym sym_spmv_test.cpp SSEspmv.o 
 
 symanal: sym_spmv_test.cpp csbsym.cpp csbsym.h utility.h friends.h SSEspmv.o
-	icpc -DSTATS $(INCADD) $(INTELOPT) -o symanal sym_spmv_test.cpp SSEspmv.o -lcilkutil
+	$(CXX) $(CFLAGS) $(DEFINES) -o symanal sym_spmv_test.cpp SSEspmv.o
 
 seqspmv: csb_spmv_test.cpp bicsb.cpp bicsb.h bmcsb.cpp bmcsb.h friends.h utility.h SSEspmv.o
-	icpc -cilk-serialize $(INCADD) $(INTELOPT) -o seqspmv csb_spmv_test.cpp SSEspmv.o
+	$(CXX) $(CFLAGS) $(DEFINES) -o seqspmv csb_spmv_test.cpp SSEspmv.o
 
 parspmv: csb_spmv_test.cpp bicsb.cpp bicsb.h bmcsb.cpp bmcsb.h friends.h utility.h SSEspmv.o 
-	icpc $(INCADD) $(INTELOPT) -o parspmv csb_spmv_test.cpp SSEspmv.o
+	$(CXX) $(CFLAGS) $(DEFINES) -o parspmv csb_spmv_test.cpp SSEspmv.o
 
 parspmv_nobm: csb_spmv_test.cpp bicsb.cpp bicsb.h friends.h utility.h
-	icpc $(INCADD) $(INTELOPT) -DNOBM -o parspmv_nobm csb_spmv_test.cpp
+	$(CXX) $(CFLAGS) $(DEFINES) -DNOBM -o parspmv_nobm csb_spmv_test.cpp
 
 parspmvt: csb_spmvt_test.cpp bicsb.cpp bicsb.h utility.h friends.h
-	icpc $(INCADD) $(INTELOPT) -o parspmvt csb_spmvt_test.cpp
+	$(CXX) $(CFLAGS) $(DEFINES) -o parspmvt csb_spmvt_test.cpp
 
 both_d:	both_test.cpp bicsb.cpp bicsb.h utility.h friends.h
-	icpc $(INCADD) $(INTELOPT) -o both_d both_test.cpp
+	$(CXX) $(CFLAGS) $(DEFINES) -o both_d both_test.cpp
 
 both_s:	both_test.cpp bicsb.cpp bicsb.h utility.h friends.h
-	icpc $(INCADD) $(INTELOPT) -DSINGLEPRECISION -o both_s both_test.cpp
+	$(CXX) $(CFLAGS) $(DEFINES) -DSINGLEPRECISION -o both_s both_test.cpp
 
 spmm_dall:	spmm_test.cpp bicsb.cpp bicsb.h utility.h friends.h
 	for number in 4 8 12 16 24 32 40 48 56 64; do \
-		echo "icpc $(INCADD) $(INTELOPT) -DRHSDIM=$$number -o spmm_d$$number spmm_test.cpp"; \
-		icpc $(INCADD) $(INTELOPT) -DRHSDIM=$$number -o spmm_d$$number spmm_test.cpp; \
+		echo "$(CXX) $(CFLAGS) $(DEFINES) -DRHSDIM=$$number -o spmm_d$$number spmm_test.cpp"; \
+		$(CXX) $(CFLAGS) $(DEFINES) -DRHSDIM=$$number -o spmm_d$$number spmm_test.cpp; \
 	done;
 
 spmm_a:	spmm_test.cpp bicsb.cpp bicsb.h utility.h friends.h
-	icpc $(INCADD) $(INTELOPT) -DSINGLEPRECISION -S -fcode-asm -vec_report6 spmm_test.cpp
+	$(CXX) $(CFLAGS) $(DEFINES) -DSINGLEPRECISION -S -fcode-asm -vec_report6 spmm_test.cpp
 
 spmm_sall:	spmm_test.cpp bicsb.cpp bicsb.h utility.h friends.h
 	for number in 4 8 12 16 24 32 40 48 56 64; do \
-		echo "icpc $(INCADD) $(INTELOPT) -DSINGLEPRECISION -DRHSDIM=$$number -o spmm_s$$number spmm_test.cpp"; \
-		icpc $(INCADD) $(INTELOPT) -DSINGLEPRECISION -DRHSDIM=$$number -o spmm_s$$number spmm_test.cpp; \
+		echo "$(CXX) $(CFLAGS) $(DEFINES) -DSINGLEPRECISION -DRHSDIM=$$number -o spmm_s$$number spmm_test.cpp"; \
+		$(CXX) $(CFLAGS) $(DEFINES) -DSINGLEPRECISION -DRHSDIM=$$number -o spmm_s$$number spmm_test.cpp; \
 	done;
 
 SSEspmv.o: SSEspmv.cpp
-	g++ -DAMD $(GCCOPT) -march=amdfam10 -c SSEspmv.cpp	
+	$(CXX) $(CFLAGS) $(DEFINES) -c SSEspmv.cpp	
 
 clean:	
 	rm -f seqspmv
diff --git a/Semirings.h b/Semirings.h
index 1deca9c..62563d1 100644
--- a/Semirings.h
+++ b/Semirings.h
@@ -6,6 +6,7 @@
 #include <climits>
 #include <cmath>
 #include <tr1/array>
+#include <memory>
 #include "promote.h"
 
 template <typename T>
@@ -60,7 +61,7 @@ struct UnrollerL {
 template<int End, int Step>
 struct UnrollerL<End, End, Step> {
     template<typename Lambda>
-    static void step(Lambda& func) {
+    [[maybe_unused]] static void step([[maybe_unused]] Lambda& func) {
 		// base case is when Begin=End; do nothing
     }
 };
@@ -75,13 +76,13 @@ struct PTSRArray
 	// y <- a*x + y overload with a=1
 	static void axpy(const array<T2, D> & b, array<T_promote, D> & c)
 	{
+		// const T2 * __restrict barr =  std::assume_aligned<ALIGN>(b.data());
+		// T_promote * __restrict carr = std::assume_aligned<ALIGN>(c.data());
 		const T2 * __restrict barr =  b.data();
 		T_promote * __restrict carr = c.data();
-		__assume_aligned(barr, ALIGN);
-		__assume_aligned(carr, ALIGN);
 
 		#pragma simd
-		for(int i=0; i<D; ++i)
+		for(unsigned int i=0; i<D; ++i)
 		{
 			carr[i] +=  barr[i];
 		}
@@ -92,13 +93,13 @@ struct PTSRArray
 	// Todo: Do partial unrolling; this code will bloat for D > 32 
 	static void axpy(T1 a, const array<T2,D> & b, array<T_promote,D> & c)
 	{
+		// const T2 * __restrict barr =  std::assume_aligned<ALIGN>(b.data());
+		// T_promote * __restrict carr = std::assume_aligned<ALIGN>(c.data());
 		const T2 * __restrict barr =  b.data();
 		T_promote * __restrict carr = c.data();
-		__assume_aligned(barr, ALIGN);
-		__assume_aligned(carr, ALIGN);
 
 		#pragma simd
-		for(int i=0; i<D; ++i)
+		for(unsigned int i=0; i<D; ++i)
 		{
 			carr[i] +=  a* barr[i];
 		}	
diff --git a/aligned.h b/aligned.h
index bcf1ae6..27bda4a 100644
--- a/aligned.h
+++ b/aligned.h
@@ -2,6 +2,7 @@
 #include <malloc.h>
 #endif
 #include <cstdint>
+#include <cstdlib>
 #include <vector>
 #include <iostream>
 using namespace std;
@@ -71,7 +72,7 @@ class aligned_allocator
 		// Returns true if and only if storage allocated from *this
 		// can be deallocated from other, and vice versa.
 		// Always returns true for stateless allocators.
-		bool operator==(const aligned_allocator& other) const
+		bool operator==([[maybe_unused]] const aligned_allocator& other) const
 		{
 			return true;
 		}
@@ -110,7 +111,7 @@ class aligned_allocator
 			}
  
 			// Mallocator wraps malloc().
-			void * const pv = _mm_malloc(n * sizeof(T), Alignment);
+			void * const pv = std::aligned_alloc(Alignment, n * sizeof(T));
  
 			// Allocators should throw std::bad_alloc in the case of memory allocation failure.
 			if (pv == NULL)
@@ -121,9 +122,9 @@ class aligned_allocator
 			return static_cast<T *>(pv);
 		}
  
-		void deallocate(T * const p, const std::size_t n) const
+		void deallocate(T * const p, [[maybe_unused]] const std::size_t n) const
 		{
-			_mm_free(p);
+			free(p);
 		}
  
  
diff --git a/bicsb.cpp b/bicsb.cpp
index 974b50e..6542cd9 100644
--- a/bicsb.cpp
+++ b/bicsb.cpp
@@ -20,8 +20,8 @@ void BiCsb<NT, IT>::Init(int workers, IT forcelogbeta)
 	bool sizereq;
 	if (ispar)
 	{
-		sizereq = ((IntPower<2>(rowbits) > SLACKNESS * workers) 
-			&& (IntPower<2>(colbits) > SLACKNESS * workers));
+		sizereq = ((IntPower<2>(rowbits) > (unsigned int) SLACKNESS * workers) 
+			&& (IntPower<2>(colbits) > (unsigned int) SLACKNESS * workers));
 	}
 	else
 	{
@@ -43,7 +43,7 @@ void BiCsb<NT, IT>::Init(int workers, IT forcelogbeta)
 	colhighbits = colbits-collowbits;	// # higher order bits for cols (has at least one bit)
 	if(ispar)
 	{
-		while(IntPower<2>(rowhighbits) < SLACKNESS * workers)
+		while(IntPower<2>(rowhighbits) < (unsigned int) SLACKNESS * workers)
 		{
 			rowhighbits++;
 			rowlowbits--;
@@ -869,8 +869,8 @@ void BiCsb<NT, IT>::SubSpMV(IT * __restrict btop, IT bstart, IT bend, const RHS
 	IT * __restrict r_bot = bot;
 	NT * __restrict r_num = num;
 
-	__m128i lcms = _mm_set1_epi32 (lowcolmask);
-	__m128i lrms = _mm_set1_epi32 (lowrowmask);
+	[[maybe_unused]] __m128i lcms = _mm_set1_epi32 (lowcolmask);
+	[[maybe_unused]] __m128i lrms = _mm_set1_epi32 (lowrowmask);
 
 	for (IT j = bstart ; j < bend ; ++j)		// for all blocks inside that block row
 	{
@@ -1350,8 +1350,9 @@ ofstream & BiCsb<NT, IT>::PrintStats(ofstream & outfile) const
 	outfile << "## Number of real blocks is "<< ntop << endl;
 	outfile << "## Row imbalance is " << RowImbalance(*this) << endl;
 	outfile << "## Col imbalance is " << ColImbalance(*this) << endl;
+	#ifdef STATS
 	outfile << "## Block parallel calls is " << blockparcalls.get_value() << endl;
-	
+	#endif
 	std::vector<int> blocksizes(ntop);
 	for(IT i=0; i<nbr; ++i)
 	{
diff --git a/bmcsb.h b/bmcsb.h
index 351e790..71a8f30 100644
--- a/bmcsb.h
+++ b/bmcsb.h
@@ -31,6 +31,7 @@ class BmCsb
 	ofstream & PrintStats(ofstream & outfile) const;
 	IT colsize() const { return n;} 
 	IT rowsize() const { return m;} 
+	IT numnonzeros() const { return nz; }
 	IT numregb() const { return nrb;}
 	bool isPar() const { return ispar; }
 
@@ -66,12 +67,12 @@ class BmCsb
 	
 	IT rowlowbits;	// # lower order bits for rows
 	IT rowhighbits;
-	IT highrowmask; // mask with the first log(m)/2 bits = 1 and the other bits = 0  
+	IT highrowmask; // mask with the first log(m)/2 bits = 1 and the other bits = 0  
 	IT lowrowmask;
 
 	IT collowbits;	// # lower order bits for columns
 	IT colhighbits;
-	IT highcolmask; // mask with the first log(n)/2 bits = 1 and the other bits = 0  
+	IT highcolmask; // mask with the first log(n)/2 bits = 1 and the other bits = 0  
 	IT lowcolmask;
 
 	MortonCompare<IT> mortoncmp;	// comparison operator w.r.t. the (inverted N)-morton layout
diff --git a/both_test.cpp b/both_test.cpp
index 96de200..057dc00 100644
--- a/both_test.cpp
+++ b/both_test.cpp
@@ -31,7 +31,7 @@ using namespace std;
 
 int main(int argc, char* argv[])
 {
-#ifndef CILK_STUB
+#if CILK==1
 	int gl_nworkers = __cilkrts_get_nworkers();
 #else
 	int gl_nworkers = 0;
diff --git a/csb_spmv_test.cpp b/csb_spmv_test.cpp
index d73792f..4677dd5 100644
--- a/csb_spmv_test.cpp
+++ b/csb_spmv_test.cpp
@@ -12,6 +12,11 @@
 #include "cilk_util.h"
 #include "utility.h"
 
+#ifndef RHSDIM
+	#define RHSDIM 16
+#endif
+#define ALIGN 32
+
 #include "triple.h"
 #include "csc.h"
 #include "bicsb.h"
@@ -32,7 +37,7 @@ using namespace std;
 
 int main(int argc, char* argv[])
 {
-#ifndef CILK_STUB
+#if CILK==1
 	int gl_nworkers = __cilkrts_get_nworkers();
 #else
 	int gl_nworkers = 0;
diff --git a/csb_spmvt_test.cpp b/csb_spmvt_test.cpp
index 8cd4b36..fc9d348 100644
--- a/csb_spmvt_test.cpp
+++ b/csb_spmvt_test.cpp
@@ -30,7 +30,7 @@ INDEXTYPE flops;
 
 int main(int argc, char* argv[])
 {
-#ifndef	CILK_STUB
+#if CILK==1
 	int gl_nworkers = __cilkrts_get_nworkers();
 #else
 	int gl_nworkers = 0;
diff --git a/csc.cpp b/csc.cpp
index b2cd265..878f05b 100644
--- a/csc.cpp
+++ b/csc.cpp
@@ -98,7 +98,7 @@ Csc<T,ITYPE>::~Csc()
 // (a) triples only contain the upper triangular part, or (b) the whole matrix
 template <class T, class ITYPE>
 Csc<T,ITYPE>::Csc(Triple<T, ITYPE> * triples, ITYPE size, ITYPE rows, ITYPE cols, bool isSym)
-:nz(size),m(rows),n(cols),issym(isSym)
+:issym(isSym), nz(size),m(rows),n(cols)
 {
 	// Constructing empty Csc objects (size = 0) are not allowed.
 	assert(size != 0 && n != 0);
@@ -174,7 +174,7 @@ Csc<T,ITYPE>::Csc(Triple<T, ITYPE> * triples, ITYPE size, ITYPE rows, ITYPE cols
 // Construct a Csc object from parallel arrays
 template <class T, class ITYPE>
 Csc<T,ITYPE>::Csc(ITYPE * ri, ITYPE * ci, T * val, ITYPE size, ITYPE rows, ITYPE cols, bool isSym)
-:nz(size),m(rows),n(cols),issym(isSym)
+:issym(isSym),nz(size),m(rows),n(cols)
 {
 	// Constructing empty Csc objects (size = 0) are not allowed.
 	assert(size != 0 && n != 0);
diff --git a/csc.h b/csc.h
index bce605d..5452afe 100644
--- a/csc.h
+++ b/csc.h
@@ -15,7 +15,7 @@ template <class T, class ITYPE>
 class Csc
 {
 public:
-	Csc ():nz(0), m(0), n(0), logicalnz(0), issym(false) {}				// default constructor
+	Csc (): issym(false), logicalnz(0), nz(0), m(0), n(0) {}				// default constructor
 	Csc (ITYPE size,ITYPE rows, ITYPE cols, bool isSym=false);
 	Csc (const Csc<T, ITYPE> & rhs);		// copy constructor
 	~Csc();
diff --git a/friends.h b/friends.h
index 4bdbe6a..1d99710 100644
--- a/friends.h
+++ b/friends.h
@@ -35,7 +35,7 @@ void bmcsb_gespmv (const BmCsb<NT, IT, TTDIM> & A, const NT * __restrict x, NT *
 	double t0 = timer_seconds_since_init();
 	
 	unsigned * scansum = new unsigned[A.nrb];
-	unsigned sum = prescan(scansum, A.masks, A.nrb);
+	[[maybe_unused]] unsigned sum = prescan(scansum, A.masks, A.nrb);
 	
 	double t1 = timer_seconds_since_init();
 	prescantime += (t1-t0);
@@ -128,14 +128,15 @@ void bicsb_gespmv (const BiCsb<NT, IT> & A, const RHS * __restrict x, LHS * __re
 				IT thsh = BREAKEVEN * ysize;
 				vector<IT*> chunks;
 				chunks.push_back(btop);
-				for(IT j =0; j < A.nbc; )
+				for(IT j =0; j < A.nbc-1; )
 				{
 					IT count = btop[j+1] - btop[j];
 					if(count < thsh && j < A.nbc)
 					{
-						while(count < thsh && j < A.nbc)
+						while(count < thsh && j < A.nbc-1)
 						{
-							count += btop[(++j)+1] - btop[j]; 
+							j+=1;
+							count += btop[j+1] - btop[j]; 
 						}
 						chunks.push_back(btop+j);	// push, but exclude the block that caused the overflow
 					}
diff --git a/spmm_test.cpp b/spmm_test.cpp
index 75f1a31..a4f847d 100644
--- a/spmm_test.cpp
+++ b/spmm_test.cpp
@@ -105,12 +105,12 @@ void VerifyMM (vector< array<NT,DIM>, ALLOC > & control, vector< array<NT,DIM>,
 
 int main(int argc, char* argv[])
 {
-#ifndef CILK_STUB
+#if CILK==1
 	int gl_nworkers = __cilkrts_get_nworkers();
 #else
 	int gl_nworkers = 0;
 #endif
-	bool syminput = false;
+	[[maybe_unused]] bool syminput = false;
 	bool binary = false;
 	bool iscsc = false;
 	INDEXTYPE m = 0, n = 0, nnz = 0, forcelogbeta = 0;
diff --git a/spvec.cpp b/spvec.cpp
index 17f3bc4..1b2519b 100644
--- a/spvec.cpp
+++ b/spvec.cpp
@@ -141,7 +141,7 @@ void Spvec<T,ITYPE>::fillzero()
 }
 
 template <typename NT, typename IT>
-void Verify(Spvec<NT, IT> & control, Spvec<NT, IT> & test, string name, IT m)
+void Verify(Spvec<NT, IT> & control, Spvec<NT, IT> & test, [[maybe_unused]] string name, IT m)
 {
     vector<NT>error(m);
     std::transform(&control[0], (&control[0])+m, &test[0], error.begin(), absdiff<NT>());
diff --git a/sym_spmv_test.cpp b/sym_spmv_test.cpp
index 5e6dfb6..4428f06 100644
--- a/sym_spmv_test.cpp
+++ b/sym_spmv_test.cpp
@@ -10,7 +10,11 @@
 
 #include "utility"
 #include "timer.gettimeofday.c"
-#include "cilk_util.h"
+
+#ifndef RHSDIM
+	#define RHSDIM 16
+#endif
+#define ALIGN 32
 
 #include "triple.h"
 #include "csc.h"
@@ -33,7 +37,7 @@ using namespace std;
 
 int main(int argc, char* argv[])
 {
-#ifndef CILK_STUB
+#if CILK==1
 	int gl_nworkers = WORKERS;
 #else
 	int gl_nworkers = 0;
@@ -110,7 +114,7 @@ int main(int argc, char* argv[])
 			return 1;		
 		}
 
-		long tstart = cilk_get_time();	// start timer
+		long tstart = get_time();	// start timer
 		cout << "Reading matrix with dimensions: "<< m << "-by-" << n <<" having "<< nnz << " nonzeros" << endl;
 		
 		INDEXTYPE * rowindices = new INDEXTYPE[nnz];
@@ -127,7 +131,7 @@ int main(int argc, char* argv[])
 			return -1;
 		}
 
-		long tend = cilk_get_time();	// end timer	
+		long tend = get_time();	// end timer	
 		cout<< "Reading matrix in binary took " << ((VALUETYPE) (tend-tstart)) /1000 << " seconds" <<endl;
 		fclose(f);
 		
@@ -152,7 +156,7 @@ int main(int argc, char* argv[])
 		infile.unget();
 		infile >> m >> n >> nnz;	// #{rows}-#{cols}-#{nonzeros}
 
-		long tstart = cilk_get_time();	// start timer	
+		long tstart = get_time();	// start timer	
 		Triple<VALUETYPE, INDEXTYPE> * triples = new Triple<VALUETYPE, INDEXTYPE>[nnz];
 	
 		if (infile.is_open())
@@ -167,7 +171,7 @@ int main(int argc, char* argv[])
 			}
 			assert(cnz == nnz);	
 		}
-		long tend = cilk_get_time();	// end timer	
+		long tend = get_time();	// end timer	
 		cout<< "Reading matrix in ascii took " << ((double) (tend-tstart)) /1000 << " seconds" <<endl;
 	
 		cout << "converting to csc ... " << endl;
diff --git a/utility.h b/utility.h
index 8bf02a5..5bb2533 100644
--- a/utility.h
+++ b/utility.h
@@ -16,12 +16,21 @@
 
 using namespace std;
 
+#if CILK==1
 #include <cilk/cilk_api.h>
 #include <cilk/cilk.h>
 #define SYNCHED __cilkrts_synched()
 #define DETECT __cilkscreen_enable_checking()
 #define ENDDETECT __cilkscreen_disable_checking()
 #define WORKERS __cilkrts_get_nworkers()
+#else
+#define cilk_for for
+#define cilk_sync
+#define cilk_spawn 
+#define SYNCHED (true)
+#define WORKERS (1)
+#endif
+
 
 #ifdef BWTEST
 	#define UNROLL 100
@@ -29,7 +38,7 @@ using namespace std;
 	#define UNROLL 1
 #endif
 
-#ifndef CILK_STUB
+#if CILK==1
 #ifdef __cplusplus
 extern "C" {
 #endif
@@ -41,16 +50,15 @@ extern "C" {
  * full frame to determine this.
  */
 
-CILK_EXPORT __CILKRTS_NOTHROW
-int __cilkrts_synched(void);
+#define __cilkrts_synched() (0)
 
 #ifdef __cplusplus
 } // extern "C"
 #endif
-#else /* CILK_STUB */
+#else /* CILK==1 */
 /* Stubs for the api functions */
 #define __cilkrts_synched() (1)
-#endif /* CILK_STUB */
+#endif /* CILK */
 
 #ifdef STATS
 	#include <cilk/reducer_opadd.h>
@@ -96,7 +104,7 @@ const unsigned char masktable4[4] = { 0x08, 0x04, 0x02, 0x01 };	// mask for 2x2
 
 
 template <typename MTYPE>
-MTYPE GetMaskTable(unsigned int index)
+MTYPE GetMaskTable([[maybe_unused]] unsigned int index)
 {
 	return 0;
 } 
@@ -503,5 +511,11 @@ inline unsigned int getDivident(unsigned int n, unsigned int d)
 	return n;
 }
 
+[[maybe_unused]] static long get_time() {
+  struct timeval st;
+  gettimeofday(&st, NULL);
+  return st.tv_sec * 1000000 + st.tv_usec;
+}
+
 #endif