diff --git a/platforms/artic/runtime.impala b/platforms/artic/runtime.impala index a0086adf..7a31755d 100644 --- a/platforms/artic/runtime.impala +++ b/platforms/artic/runtime.impala @@ -31,6 +31,13 @@ #[import(cc = "C", name = "anydsl_print_string")] fn print_string(_: &[u8]) -> (); #[import(cc = "C", name = "anydsl_print_flush")] fn print_flush() -> (); +#[import(cc = "C", name = "anydsl_create_event")] fn runtime_create_event(_device: i32) -> u64; +#[import(cc = "C", name = "anydsl_destroy_event")] fn runtime_destroy_event(_device: i32, _event: u64) -> (); +#[import(cc = "C", name = "anydsl_record_event")] fn runtime_record_event(_device: i32, _event: u64) -> (); +#[import(cc = "C", name = "anydsl_check_event")] fn runtime_check_event(_device: i32, _event: u64) -> bool; +#[import(cc = "C", name = "anydsl_query_us_event")] fn runtime_query_us_event(_device: i32, _event_start: u64, _event_end: u64) -> u64; +#[import(cc = "C", name = "anydsl_sync_event")] fn runtime_sync_event(_device: i32, _event: u64) -> (); + // TODO //struct Buffer[T] { // data : &mut [T], diff --git a/platforms/impala/runtime.impala b/platforms/impala/runtime.impala index b3b9aad4..dcd5580d 100644 --- a/platforms/impala/runtime.impala +++ b/platforms/impala/runtime.impala @@ -31,6 +31,13 @@ extern "C" { fn "anydsl_print_char" print_char(u8) -> (); fn "anydsl_print_string" print_string(&[u8]) -> (); fn "anydsl_print_flush" print_flush() -> (); + + fn "anydsl_create_event" runtime_create_event(_device: i32) -> u64; + fn "anydsl_destroy_event" runtime_destroy_event(_device: i32, _event: u64) -> (); + fn "anydsl_record_event" runtime_record_event(_device: i32, _event: u64) -> (); + fn "anydsl_check_event" runtime_check_event(_device: i32, _event: u64) -> bool; + fn "anydsl_query_us_event" runtime_query_us_event(_device: i32, _event_start: u64, _event_end: u64) -> u64; + fn "anydsl_sync_event" runtime_sync_event(_device: i32, _event: u64) -> (); } struct Buffer { diff --git a/src/anydsl_runtime.cpp b/src/anydsl_runtime.cpp index 42b392c1..386e09c7 100644 --- a/src/anydsl_runtime.cpp +++ b/src/anydsl_runtime.cpp @@ -209,6 +209,33 @@ uint64_t anydsl_random_val_u64() { return std_dist_u64(std_gen); } +// Event stuff +//---------------------------------------------- + +anydsl_event_t anydsl_create_event(int32_t mask) { + return runtime().create_event(to_platform(mask), to_device(mask)); +} + +void anydsl_destroy_event(int32_t mask, anydsl_event_t event) { + runtime().destroy_event(to_platform(mask), to_device(mask), event); +} + +void anydsl_record_event(int32_t mask, anydsl_event_t event) { + runtime().record_event(to_platform(mask), to_device(mask), event); +} + +bool anydsl_check_event(int32_t mask, anydsl_event_t event) { + return runtime().check_event(to_platform(mask), to_device(mask), event); +} + +uint64_t anydsl_query_us_event(int32_t mask, anydsl_event_t event_start, anydsl_event_t event_end) { + return runtime().query_us_event(to_platform(mask), to_device(mask), event_start, event_end); +} + +void anydsl_sync_event(int32_t mask, anydsl_event_t event) { + runtime().sync_event(to_platform(mask), to_device(mask), event); +} + #ifndef AnyDSL_runtime_HAS_TBB_SUPPORT // C++11 threads version static std::unordered_map thread_pool; static std::vector free_ids; diff --git a/src/anydsl_runtime.h b/src/anydsl_runtime.h index 901626f1..27a0413d 100644 --- a/src/anydsl_runtime.h +++ b/src/anydsl_runtime.h @@ -84,6 +84,20 @@ AnyDSL_runtime_API int32_t anydsl_create_task(int32_t, Closure); AnyDSL_runtime_API void anydsl_create_edge(int32_t, int32_t); AnyDSL_runtime_API void anydsl_execute_graph(int32_t, int32_t); +typedef uint64_t anydsl_event_t; +/// Create event for device. Will return id of event +AnyDSL_runtime_API anydsl_event_t anydsl_create_event(int32_t); +/// Destroy event +AnyDSL_runtime_API void anydsl_destroy_event(int32_t, anydsl_event_t); +/// Record the event for the device +AnyDSL_runtime_API void anydsl_record_event(int32_t, anydsl_event_t); +/// Check if event has completed. True if the event is completed, false otherwise +AnyDSL_runtime_API bool anydsl_check_event(int32_t, anydsl_event_t); +/// Query time between two events in micro seconds. Both events have to be completed, else UINT64_MAX is returned +AnyDSL_runtime_API uint64_t anydsl_query_us_event(int32_t, anydsl_event_t, anydsl_event_t); +/// Wait for the event to complete +AnyDSL_runtime_API void anydsl_sync_event(int32_t, anydsl_event_t); + #ifdef __cplusplus } #include "anydsl_runtime.hpp" diff --git a/src/anydsl_runtime.hpp b/src/anydsl_runtime.hpp index d2c63fbb..c4a43fff 100644 --- a/src/anydsl_runtime.hpp +++ b/src/anydsl_runtime.hpp @@ -123,6 +123,82 @@ void copy(const Array& a, int64_t offset_a, Array& b, int64_t offset_b, in size * sizeof(T)); } + +class Event { +public: + inline Event(int32_t dev) + : dev_(dev), + event_(0) + { + create(); + } + + inline ~Event() + { + destroy(); + } + + inline Event(Event&& other) + : dev_(other.dev_), + event_(other.event_) + { + other.event_ = 0; + } + + inline Event& operator=(Event&& other) + { + destroy(); + dev_ = other.dev_; + event_ = other.event_; + other.event_ = 0; + return *this; + } + + inline Event(const Event&) = delete; + inline Event& operator=(const Event&) = delete; + + inline bool record() + { + anydsl_record_event(dev_, event_); + return true; + } + + inline bool wait() + { + anydsl_sync_event(dev_, event_); + return true; + } + + inline anydsl_event_t handle() const { return event_; } + + inline static float elapsedTimeMS(const Event& start, const Event& end) + { + if (!anydsl_check_event(start.dev_, start.handle())) + return -1; + if (!anydsl_check_event(end.dev_, end.handle())) + return -1; + + const uint64_t us = anydsl_query_us_event(start.dev_, start.handle(), end.handle()); + return us / 1000.0f; + } + +private: + inline void create() + { + event_ = anydsl_create_event(dev_); + } + + inline void destroy() + { + if (event_ != 0) { + anydsl_destroy_event(dev_, event_); + event_ = 0; + } + } + + int32_t dev_; + anydsl_event_t event_; +}; } // namespace anydsl #endif diff --git a/src/cpu_platform.cpp b/src/cpu_platform.cpp index 069d264c..4dd54222 100644 --- a/src/cpu_platform.cpp +++ b/src/cpu_platform.cpp @@ -6,6 +6,10 @@ #include #include #include +#include + +#include +#include #if defined(__APPLE__) #include @@ -69,3 +73,60 @@ CpuPlatform::CpuPlatform(Runtime* runtime) std::getline(cpuinfo >> std::ws, device_name_); #endif } + +bool CpuPlatform::device_check_feature_support(DeviceId, const char* feature) const { + using namespace std::literals; + + if (feature == "event"sv) + return true; + return false; +} + +struct CpuEvent { + std::mutex mutex; + std::condition_variable cv; + bool recorded = false; + std::chrono::high_resolution_clock::time_point pointOfRecord; +}; + +EventId CpuPlatform::create_event(DeviceId) { + CpuEvent* event = new CpuEvent; + return (EventId)reinterpret_cast(event); +} + +void CpuPlatform::destroy_event(DeviceId, EventId event) { + auto eventPtr = reinterpret_cast((uintptr_t)event); + delete eventPtr; +} + +void CpuPlatform::record_event(DeviceId, EventId event) { + auto eventPtr = reinterpret_cast((uintptr_t)event); + + std::unique_lock lk(eventPtr->mutex); + eventPtr->recorded = true; + eventPtr->pointOfRecord = std::chrono::high_resolution_clock::now(); + lk.unlock(); + + eventPtr->cv.notify_all(); +} + +bool CpuPlatform::check_event(DeviceId, EventId event) { + auto eventPtr = reinterpret_cast((uintptr_t)event); + return eventPtr->recorded; +} + +uint64_t CpuPlatform::query_us_event(DeviceId dev, EventId event_start, EventId event_end) { + if (!check_event(dev, event_start) || !check_event(dev, event_end)) return UINT64_MAX; + + auto eventStartPtr = reinterpret_cast((uintptr_t)event_start); + auto eventEndPtr = reinterpret_cast((uintptr_t)event_end); + + return (uint64_t)std::chrono::duration_cast(eventEndPtr->pointOfRecord - eventStartPtr->pointOfRecord).count(); +} + +void CpuPlatform::sync_event(DeviceId, EventId event){ + auto eventPtr = reinterpret_cast((uintptr_t)event); + + std::unique_lock lk(eventPtr->mutex); + eventPtr->cv.wait(lk, [eventPtr]() { return eventPtr->recorded; }); +} diff --git a/src/cpu_platform.h b/src/cpu_platform.h index d52356e2..1c1792e6 100644 --- a/src/cpu_platform.h +++ b/src/cpu_platform.h @@ -67,7 +67,14 @@ class CpuPlatform : public Platform { size_t dev_count() const override { return 1; } std::string name() const override { return "CPU"; } const char* device_name(DeviceId) const override { return device_name_.c_str(); } - bool device_check_feature_support(DeviceId, const char*) const override { return false; } + bool device_check_feature_support(DeviceId, const char*) const override; + + EventId create_event(DeviceId dev) override; + void destroy_event(DeviceId dev, EventId event) override; + void record_event(DeviceId dev, EventId event) override; + bool check_event(DeviceId dev, EventId event) override; + uint64_t query_us_event(DeviceId dev, EventId event_start, EventId event_end) override; + void sync_event(DeviceId dev, EventId event) override; }; #endif diff --git a/src/cuda_platform.cpp b/src/cuda_platform.cpp index 75635148..755bc887 100644 --- a/src/cuda_platform.cpp +++ b/src/cuda_platform.cpp @@ -697,9 +697,57 @@ const char* CudaPlatform::device_name(DeviceId dev) const { bool CudaPlatform::device_check_feature_support(DeviceId dev, const char* feature) const { if (feature == "ITS"sv) return static_cast(devices_[dev].compute_capability) >= 70; + if (feature == "event"sv) + return true; return false; } +EventId CudaPlatform::create_event(DeviceId) { + CUevent event; + CHECK_CUDA(cuEventCreate(&event, CU_EVENT_DEFAULT), "cuEventCreate()"); + return (EventId)reinterpret_cast(event); +} + +void CudaPlatform::destroy_event(DeviceId, EventId event) { + auto eventPtr = reinterpret_cast((uintptr_t)event); + CHECK_CUDA(cuEventDestroy(eventPtr), "cuEventDestroy"); +} + +void CudaPlatform::record_event(DeviceId, EventId event) { + auto eventPtr = reinterpret_cast((uintptr_t)event); + CHECK_CUDA(cuEventRecord(eventPtr, 0), "cuEventRecord"); +} + +bool CudaPlatform::check_event(DeviceId, EventId event) { + auto eventPtr = reinterpret_cast((uintptr_t)event); + + CUresult err = cuEventQuery(eventPtr); + if (err == CUDA_ERROR_NOT_READY) + return false; + + CHECK_CUDA(err, "cuEventQuery"); + return err == CUDA_SUCCESS; +} + +uint64_t CudaPlatform::query_us_event(DeviceId, EventId event_start, EventId event_end) { + auto eventStartPtr = reinterpret_cast((uintptr_t)event_start); + auto eventEndPtr = reinterpret_cast((uintptr_t)event_end); + + float milliseconds; + CUresult err = cuEventElapsedTime(&milliseconds, eventStartPtr, eventEndPtr); + if (err == CUDA_ERROR_NOT_READY) + return UINT64_MAX; + + CHECK_CUDA(err, "cuEventElapsedTime"); + return static_cast(milliseconds * 1000); +} + +void CudaPlatform::sync_event(DeviceId, EventId event){ + auto eventPtr = reinterpret_cast((uintptr_t)event); + CHECK_CUDA(cuEventSynchronize(eventPtr), "cuEventSynchronize"); +} + void register_cuda_platform(Runtime* runtime) { runtime->register_platform(); } + diff --git a/src/cuda_platform.h b/src/cuda_platform.h index 0989c2e1..dbf745f1 100644 --- a/src/cuda_platform.h +++ b/src/cuda_platform.h @@ -101,6 +101,13 @@ class CudaPlatform : public Platform { std::string compile_nvvm(DeviceId dev, const std::string& filename, const std::string& program_string) const; std::string compile_cuda(DeviceId dev, const std::string& filename, const std::string& program_string) const; CUmodule create_module(DeviceId dev, const std::string& filename, const std::string& ptx_string) const; + + EventId create_event(DeviceId dev) override; + void destroy_event(DeviceId dev, EventId event) override; + void record_event(DeviceId dev, EventId event) override; + bool check_event(DeviceId dev, EventId event) override; + uint64_t query_us_event(DeviceId dev, EventId event_start, EventId event_end) override; + void sync_event(DeviceId dev, EventId event) override; }; #endif diff --git a/src/dummy_platform.h b/src/dummy_platform.h index 480a46be..f68ef0c3 100644 --- a/src/dummy_platform.h +++ b/src/dummy_platform.h @@ -34,6 +34,13 @@ class DummyPlatform : public Platform { bool device_check_feature_support(DeviceId, const char*) const override { return false; } std::string name_; + + EventId create_event(DeviceId) override { platform_error(); return 0; } + void destroy_event(DeviceId, EventId) override { platform_error(); } + void record_event(DeviceId, EventId) override { platform_error(); } + bool check_event(DeviceId, EventId) override { platform_error(); return false; } + uint64_t query_us_event(DeviceId, EventId, EventId) override { platform_error(); return 0; } + void sync_event(DeviceId, EventId) override { platform_error(); } }; #endif diff --git a/src/hsa_platform.h b/src/hsa_platform.h index eff4bdb9..ca402f7b 100644 --- a/src/hsa_platform.h +++ b/src/hsa_platform.h @@ -107,6 +107,13 @@ class HSAPlatform : public Platform { KernelInfo& load_kernel(DeviceId, const std::string&, const std::string&); std::string compile_gcn(DeviceId, const std::string&, const std::string&) const; std::string emit_gcn(const std::string&, const std::string&, const std::string&, llvm::OptimizationLevel) const; + + EventId create_event(DeviceId) override { command_unavailable("create_event"); return 0; } + void destroy_event(DeviceId, EventId) override { command_unavailable("destroy_event"); } + void record_event(DeviceId, EventId) override { command_unavailable("record_event"); } + bool check_event(DeviceId, EventId) override { command_unavailable("check_event"); return false; } + uint64_t query_us_event(DeviceId, EventId, EventId) override { command_unavailable("query_us_event"); return 0; } + void sync_event(DeviceId, EventId) override { command_unavailable("sync_event"); } }; #endif diff --git a/src/opencl_platform.h b/src/opencl_platform.h index 6f9d6c37..d589a11a 100644 --- a/src/opencl_platform.h +++ b/src/opencl_platform.h @@ -111,6 +111,13 @@ class OpenCLPlatform : public Platform { cl_program compile_program(DeviceId dev, cl_program program, const std::string& filename) const; friend void time_kernel_callback(cl_event, cl_int, void*); + + EventId create_event(DeviceId) override { command_unavailable("create_event"); return 0; } + void destroy_event(DeviceId, EventId) override { command_unavailable("destroy_event"); } + void record_event(DeviceId, EventId) override { command_unavailable("record_event"); } + bool check_event(DeviceId, EventId) override { command_unavailable("check_event"); return false; } + uint64_t query_us_event(DeviceId, EventId, EventId) override { command_unavailable("query_us_event"); return 0; } + void sync_event(DeviceId, EventId) override { command_unavailable("sync_event"); } }; #endif diff --git a/src/platform.h b/src/platform.h index c647a293..7fe007ee 100644 --- a/src/platform.h +++ b/src/platform.h @@ -57,6 +57,12 @@ class Platform { /// Checks whether the given platform-specific feature is supported on the given device. virtual bool device_check_feature_support(DeviceId dev, const char* feature) const = 0; + virtual EventId create_event(DeviceId) = 0; + virtual void destroy_event(DeviceId, EventId) = 0; + virtual void record_event(DeviceId, EventId) = 0; + virtual bool check_event(DeviceId, EventId) = 0; + virtual uint64_t query_us_event(DeviceId, EventId, EventId) = 0; + virtual void sync_event(DeviceId, EventId) = 0; protected: [[noreturn]] void platform_error() { error("The selected '%' platform is not available", name()); diff --git a/src/runtime.cpp b/src/runtime.cpp index a13cec2f..734a503e 100644 --- a/src/runtime.cpp +++ b/src/runtime.cpp @@ -111,6 +111,36 @@ void Runtime::synchronize(PlatformId plat, DeviceId dev) { platforms_[plat]->synchronize(dev); } +uint64_t Runtime::create_event(PlatformId plat, DeviceId dev) { + check_device(plat, dev); + return platforms_[plat]->create_event(dev); +} + +void Runtime::destroy_event(PlatformId plat, DeviceId dev, uint64_t event) { + check_device(plat, dev); + platforms_[plat]->destroy_event(dev, event); +} + +void Runtime::record_event(PlatformId plat, DeviceId dev, uint64_t event) { + check_device(plat, dev); + platforms_[plat]->record_event(dev, event); +} + +bool Runtime::check_event(PlatformId plat, DeviceId dev, uint64_t event) { + check_device(plat, dev); + return platforms_[plat]->check_event(dev, event); +} + +uint64_t Runtime::query_us_event(PlatformId plat, DeviceId dev, uint64_t event_start, uint64_t event_end) { + check_device(plat, dev); + return platforms_[plat]->query_us_event(dev, event_start, event_end); +} + +void Runtime::sync_event(PlatformId plat, DeviceId dev, uint64_t event) { + check_device(plat, dev); + platforms_[plat]->sync_event(dev, event); +} + #ifdef _WIN32 #include #define PATH_DIR_SEPARATOR '\\' diff --git a/src/runtime.h b/src/runtime.h index f975ac5d..06224bfe 100644 --- a/src/runtime.h +++ b/src/runtime.h @@ -17,6 +17,8 @@ enum DeviceId : uint32_t {}; enum PlatformId : uint32_t {}; enum class ProfileLevel : uint8_t { None = 0, Full, Fpga_dynamic }; +using EventId = uint64_t; + class Platform; enum class KernelArgType : uint8_t { Val = 0, Ptr, Struct }; @@ -100,6 +102,13 @@ class Runtime { static void* aligned_malloc(size_t, size_t); static void aligned_free(void*); + EventId create_event(PlatformId plat, DeviceId dev); + void destroy_event(PlatformId plat, DeviceId dev, EventId event); + void record_event(PlatformId plat, DeviceId dev, EventId event); + bool check_event(PlatformId plat, DeviceId dev, EventId event); + uint64_t query_us_event(PlatformId plat, DeviceId dev, EventId event_start, EventId event_end); + void sync_event(PlatformId plat, DeviceId dev, EventId event); + private: void check_device(PlatformId, DeviceId) const; std::string get_cached_filename(const std::string& str, const std::string& ext) const;