1 #include <vtestbed/base/constants.hh> 2 #include <vtestbed/config/real_type.hh> 3 #include <vtestbed/core/math.hh> 4 #include <vtestbed/core/velocity_potential.hh> 5 #include <vtestbed/opencl/fourier_transform.hh> 6 #include <vtestbed/opencl/pipeline.hh> 24 using int2 = blitz::TinyVector<int,2>;
25 using kernel_type = clx::kernel;
38 using Context_base::context;
43 const Array<T,3>& wavy_surface,
44 Array<T,4>& velocity_potential
54 void context(
Context* rhs)
override {
55 Context_base::context(rhs);
56 this->_fft.context(rhs);
57 fft_type::precompile({10,10}, rhs);
58 auto prog = context()->compiler().compile(
"velocity_potential.cl");
68 typedef VTB_REAL_TYPE T;
69 using vtb::core::Policy;
75 #if defined(VTB_DEBUG_VELOCITY_POTENTIAL) 76 template <
class T,
int N>
79 const char* name,
int i=-1) {
80 blitz::Array<T,N> x(shape);
92 #if defined(VTB_DEBUG_VELOCITY_POTENTIAL) 93 #define VTB_DUMP(x, shape) ::dump(x, shape, #x) 94 #define VTB_DUMP2(x, shape, i) ::dump(x, shape, #x, i) 95 #define VTB_DUMP3(x, shape, name, i) ::dump(x, shape, name, i) 97 #define VTB_DUMP(x, shape) 98 #define VTB_DUMP2(x, shape, i) 99 #define VTB_DUMP3(x, shape, name, i) 107 const Grid<T,4>& grid_tzxy,
108 const Array<T,3>& zeta,
112 const int nt = grid_tzxy.extent(0);
113 const Grid<T,2> grid_xy(grid_tzxy.select(2,3));
114 const Range _ = Range::all();
115 Array<C,3> sf(zeta.shape());
116 this->second_function(grid_tzxy, zeta, sf);
119 this->_fft.shape(grid_xy.shape());
120 #if defined(VTB_DEBUG_VELOCITY_POTENTIAL) 122 this->_fft.dump(out);
125 auto wnrange = this->wave_number_grid();
126 wnrange.end() = grid_xy.shape();
127 Grid<T,3> new_wfgrid{grid_tzxy.select(1), wnrange.select(0), wnrange.select(1)};
128 if (wnrange.empty() || near(wnrange.ubound(), T{0}, T{1e-3})) {
133 Vector<T,2> wnmax = base::constants<T>::pi(4) / (grid_xy.ubound() - grid_xy.lbound());
134 wnrange.ubound() =
min(wnmax, wnrange.ubound());
135 wnrange.ubound() =
min(wnrange.lbound(), wnrange.ubound());
136 auto& ppl = context()->pipeline();
137 auto& d_window_function = this->_d_window_function;
138 if (!new_wfgrid.near(this->_wfgrid, T{1e-3})) {
139 this->_wfgrid = new_wfgrid;
140 const auto& window_function_shape = this->_wfgrid.shape();
141 this->_wf.resize(window_function_shape);
142 ppl.allocate(this->_wfgrid.num_elements(), d_window_function);
143 const T h = this->depth();
144 bool infinite_depth = is_positive_infinity(h);
145 auto lbound{this->_wfgrid.lbound()};
146 auto delta{this->_wfgrid.delta()};
147 auto kernel_name = infinite_depth ?
"lvps_wf_infinite_depth" :
"lvps_wf_finite_depth";
148 auto kernel = this->_prog.kernel(kernel_name);
149 kernel.argument(0, lbound(0));
150 kernel.argument(1, delta(0));
151 kernel.argument(2, lbound(1));
152 kernel.argument(3, delta(1));
153 kernel.argument(4, lbound(2));
154 kernel.argument(5, delta(2));
155 if (infinite_depth) {
156 kernel.argument(6, d_window_function);
158 kernel.argument(6, h);
159 kernel.argument(7, d_window_function);
161 ppl.kernel(kernel, window_function_shape);
164 auto& d_phi = this->_d_phi;
165 ppl.allocate(phi.size(), d_phi);
167 ppl.allocate(this->_wfgrid.num_elements(), d_result);
169 for (
int i=0; i<nt; ++i) {
170 Array<C,2> fft_sf(grid_xy.shape());
172 ppl.copy(fft_sf, d_fft_sf);
174 this->_fft.forward(d_fft_sf);
175 VTB_DUMP2(fft_sf, grid_xy.shape(), i);
176 const auto& window_function_shape = this->_wfgrid.shape();
178 auto kernel = this->_prog.kernel(
"lvps_multiply");
179 kernel.argument(0, d_fft_sf);
180 kernel.argument(1, d_window_function);
181 kernel.argument(2, d_result);
183 ppl.kernel(kernel, window_function_shape);
186 this->_fft.backward(d_result, window_function_shape(0));
188 auto kernel = this->_prog.kernel(
"lvps_copy_back");
189 kernel.argument(0, i);
190 kernel.argument(1, d_result);
191 kernel.argument(2, d_phi);
193 ppl.kernel(kernel, window_function_shape);
197 ppl.copy(d_phi, phi);
199 VTB_DUMP3(phi, phi.shape(),
"phi", -1);
204 vtb::core::make_velocity_potential_solver<T,3,Policy::OpenCL>() ->
205 std::unique_ptr<Velocity_potential_solver<T,3>> {
207 new Linear_velocity_potential_solver_opencl<T>
Base class for linear velocity potential solver.