diff --git a/eigen b/eigen index dde02fce..36b95962 160000 --- a/eigen +++ b/eigen @@ -1 +1 @@ -Subproject commit dde02fceedfc1ba09d4d4f71a2b5dafcfcb85491 +Subproject commit 36b95962756c1fce8e29b1f8bc45967f30773c00 diff --git a/python/thundersvm/thundersvm.dll b/python/thundersvm/thundersvm.dll new file mode 100644 index 00000000..ad8cc526 Binary files /dev/null and b/python/thundersvm/thundersvm.dll differ diff --git a/src/test/googletest b/src/test/googletest index a325ad2d..df1544bc 160000 --- a/src/test/googletest +++ b/src/test/googletest @@ -1 +1 @@ -Subproject commit a325ad2db5deb623eab740527e559b81c0f39d65 +Subproject commit df1544bcee0c7ce35cd5ea0b3eb8cc81855a4140 diff --git a/src/thundersvm/kernel/kernelmatrix_kernel.cpp b/src/thundersvm/kernel/kernelmatrix_kernel.cpp index f4187f92..7bc011ba 100644 --- a/src/thundersvm/kernel/kernelmatrix_kernel.cpp +++ b/src/thundersvm/kernel/kernelmatrix_kernel.cpp @@ -146,6 +146,65 @@ namespace svm_kernel { } } + void sum_kernel_values_instant(const SyncArray& coef, int total_sv, const SyncArray& sv_start, + const SyncArray& sv_count, const SyncArray& rho, + const SyncArray& k_mat, + SyncArray& predict_instant, int n_classes, int n_instances, + SyncArray& vote_device) { + const int* sv_start_data = sv_start.host_data(); + const int* sv_count_data = sv_count.host_data(); + const float_type* coef_data = coef.host_data(); + const kernel_type* k_mat_data = k_mat.host_data(); + float_type* predict_instant_data = predict_instant.host_data(); + const float_type* rho_data = rho.host_data(); + float_type* vote_device_data = vote_device.host_data(); + + int n_binary_models = n_classes * (n_classes - 1) / 2; + +#pragma omp parallel for schedule(guided) + for (int idx = 0; idx < n_instances; idx++) { + int k = 0; + for (int i = 0; i < n_classes; ++i) { + for (int j = i + 1; j < n_classes; ++j) { + int si = sv_start_data[i]; + int sj = sv_start_data[j]; + int ci = sv_count_data[i]; + int cj = sv_count_data[j]; + + const float_type* coef1 = &coef_data[(j - 1) * total_sv]; + const float_type* coef2 = &coef_data[i * total_sv]; + const kernel_type* k_values = &k_mat_data[idx * total_sv]; + + double sum = 0.0; + +#pragma omp parallel for reduction(+:sum) + for (int l = 0; l < ci; ++l) { + sum += coef1[si + l] * k_values[si + l]; + } +#pragma omp parallel for reduction(+:sum) + for (int l = 0; l < cj; ++l) { + sum += coef2[sj + l] * k_values[sj + l]; + } + + // Store in predict_instant and adjust with rho + predict_instant_data[idx * n_binary_models + k] = sum - rho_data[k]; + + // Update vote_device based on the decision value + if (predict_instant_data[idx * n_binary_models + k] > 0) { +#pragma omp atomic + vote_device_data[idx * n_classes + i] += 1; + } + else { +#pragma omp atomic + vote_device_data[idx * n_classes + j] += 1; + } + + k++; + } + } + } + } + void dns_csr_mul(int m, int n, int k, const SyncArray &dense_mat, const SyncArray &csr_val, const SyncArray &csr_row_ptr, const SyncArray &csr_col_ind, int nnz, SyncArray &result) {