-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathblas.c
More file actions
186 lines (168 loc) · 4.48 KB
/
blas.c
File metadata and controls
186 lines (168 loc) · 4.48 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
#include "common.h"
#if NOACC
#include "simple_blas.h"
#elif CUDA
#include "cuda_blas.h"
#elif HIP
#include "hip_blas.h"
#elif OPENMP
#include "openmp_blas.h"
#endif
#include "blas.h"
real_type dot (const int n, const real_type *v, const real_type *w){
real_type d;
#if NOACC
d = simple_dot (n, v, w);
#elif CUDA
d = cuda_dot (n, v, w);
#elif OPENMP
d = openmp_dot (n, v, w);
#elif HIP
d = hip_dot (n, v, w);
#endif
return d;
}
void axpy (const int n, const real_type alpha, real_type *x, real_type *y){
#if NOACC
simple_axpy (n, alpha, x, y);
#elif CUDA
cuda_axpy (n, alpha, x, y);
#elif OPENMP
openmp_axpy (n, alpha, x, y);
#elif HIP
hip_axpy (n, alpha, x, y);
#endif
}
void scal (const int n, const real_type alpha, real_type *v){
#if NOACC
simple_scal (n, alpha, v);
#elif CUDA
cuda_scal (n, alpha, v);
#elif OPENMP
openmp_scal (n, alpha, v);
#elif HIP
hip_scal (n, alpha, v);
#endif
}
void csr_matvec(const int n,
const int nnz,
const int *ia,
const int *ja,
const real_type *a,
const real_type *x,
real_type *result,
const real_type *al,
const real_type *bet,
const char *kind){
#if NOACC
simple_csr_matvec(n, nnz, ia, ja, a, x, result, al, bet);
#elif CUDA
cuda_csr_matvec(n, nnz, ia, ja, a, x, result, al, bet);
#elif OPENMP
openmp_csr_matvec(n, nnz, ia, ja, a, x, result, al, bet);
#elif HIP
hip_csr_matvec(n, nnz, ia, ja, a, x, result, al, bet, kind);
#endif
}
void lower_triangular_solve(const int n,
const int nnz,
const int *lia,
const int *lja,
const real_type *la,
const real_type * diag,
const real_type *x,
real_type *result){
#if NOACC
simple_lower_triangular_solve(n, nnz, lia, lja, la, diag, x, result);
#elif CUDA
cuda_lower_triangular_solve(n, nnz, lia, lja, la, diag, x, result);
#elif OPENMP
openmp_lower_triangular_solve(n, nnz, lia, lja, la, diag, x, result);
#elif HIP
hip_lower_triangular_solve(n, nnz, lia, lja, la, diag, x, result);
#endif
}
void upper_triangular_solve(const int n,
const int nnz,
const int *uia,
const int *uja,
const real_type *ua,
const real_type *diag,
const real_type *x,
real_type *result){
#if NOACC
simple_upper_triangular_solve(n, nnz, uia, uja, ua, diag, x, result);
#elif CUDA
cuda_upper_triangular_solve(n, nnz, uia, uja, ua, diag, x, result);
#elif OPENMP
openmp_upper_triangular_solve(n, nnz, uia, uja, ua, diag, x, result);
#elif HIP
hip_upper_triangular_solve(n, nnz, uia, uja, ua, diag, x, result);
#endif
}
void ichol(const int *ia, const int *ja, real_type *a, const int nnzA, pdata *prec_data, real_type *x, real_type *y){
#if NOACC
simple_ichol( ia, ja, a, nnzA, prec_data, x, y);
#elif CUDA
cuda_ichol( ia, ja, a, nnzA, prec_data, x, y);
#elif OPENMP
openmp_ichol( ia, ja, a, nnzA, prec_data, x, y);
#elif HIP
hip_ichol( ia, ja, a, nnzA, prec_data, x, y);
#endif
}
void vec_vec(const int n, const real_type *x, real_type *y, real_type *res){
#if NOACC
simple_vec_vec(n, x, y, res);
#elif CUDA
cuda_vec_vec(n, x, y, res);
#elif OPENMP
openmp_vec_vec(n, x, y, res);
#elif HIP
hip_vec_vec(n, x, y, res);
#endif
}
void vector_reciprocal(const int n, const real_type *v, real_type *res){
#if NOACC
simple_vector_reciprocal(n, v, res);
#elif CUDA
cuda_vector_reciprocal(n, v, res);
#elif OPENMP
openmp_vector_reciprocal(n, v, res);
#elif HIP
hip_vector_reciprocal(n, v, res);
#endif
}
void vector_sqrt(const int n, const real_type *v, real_type *res){
#if NOACC
simple_vector_sqrt(n, v, res);
#elif CUDA
cuda_vector_sqrt(n, v, res);
#elif OPENMP
openmp_vector_sqrt(n, v, res);
#elif HIP
hip_vector_sqrt(n, v, res);
#endif
}
void vec_copy(const int n, real_type *src, real_type *dest){
#if NOACC
simple_vec_copy(n, src, dest);
#elif CUDA
cuda_vec_copy(n, src, dest);
#elif OPENMP
openmp_vec_copy(n, src, dest);
#elif HIP
hip_vec_copy(n, src, dest);
#endif
}
void vec_zero(const int n, real_type *vec){
#if NOACC
simple_vec_zero(n, vec);
#elif CUDA
cuda_vec_zero(n, vec);
#elif OPENMP
openmp_vec_zero(n, vec);
#elif HIP
hip_vec_zero(n, vec);
#endif
}