advanced/fft_12.cpp#
The file advanced/fft_12.cpp
shows complex time, complex frequency FFTs.
Note that the CMake configuration builds two binaries, one with and one without
support for dynamic symbol lookup.
In each case the binary is linked with the AVX2 libraries, but only the first
uses dlsym()
to find symbol addresses for AVX512 functions at run time.
On running either of these executables (on x86_64
with no dynamic loader
environment variables set), the output will contain the lines:
Using FftSeqFactoryCC<float32_t>(Architecture::avx2)
for single precision.
Using FftSeqFactoryCC<float64_t>(Architecture::avx2)
for double precision.
However, fft_12
has dlsym()
support. So, on hardware supporting AVX512,
running it as follows:
LD_LIBRARY_PATH=/opt/libhpk0/lib \
LD_PRELOAD=libhpk_fft_avx512_fp32.so \
./fft_12
results in:
Using FftSeqFactoryCC<float32_t>(Architecture::avx512)
for single precision.
Using FftSeqFactoryCC<float64_t>(Architecture::avx2)
for double precision.
In this example, the factories were made without specifying an Architecture
,
thus implying Architecture::detect
.
Note that fft_12_ndlsym
does not use dlsym()
and so cannot use the AVX512
code path regardless of LD_PRELOAD
. Although Architecture::avx512
can be
detected based on the hardware’s capabilities, makeFactory()
must fall back
to AVX2 as it is the only library available.
(If Architecture::avx512
had been explicitly set in the hpk::Configuration
passed to makeFactory()
, then no fall back would occur and an empty
std::unique_ptr
would be returned.)
/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *\
* Copyright (C) 2023--2024, High Performance Kernels LLC *
* *
* This software and the related documents are provided as is, WITHOUT ANY *
* WARRANTY, without even the implied warranty of MERCHANTABILITY or FITNESS *
* FOR A PARTICULAR PURPOSE. *
\* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
#include <complex>
#include <iostream>
#include <string>
#include <vector>
#include <hpk/fft/makeFactory.hpp>
// Prints data (having elements of type 'T', which may be complex) formatted
// into rows and columns, with line continuations if necessary.
template<class T>
void printData(std::string label, const T* data, int rows, int cols) {
std::cout << label << ":\n";
for (int i = 0; i < rows; ++i) {
std::cout << " ";
for (int j = 0; j < cols; ++j) {
if (j % 8 == 0 && j > 0) std::cout << " \\\n ";
std::cout << data[cols * i + j] << " ";
}
std::cout << '\n';
}
std::cout << std::endl;
}
int main() {
// Let's make factories (for both single precision and double precision)
// for FFTs with complex time and complex frequency domains.
std::cout << "Setup: Making factories.\n"
<< "~~~~~ \n";
hpk::Configuration cfg{{hpk::Parameter::threads, 1}};
// Below, auto is std::unique_ptr<hpk::fft::FactoryCC<float>>.
auto factory_s = hpk::fft::makeFactory<float>(cfg);
if (factory_s) {
std::cout << "Using " << *factory_s << " for single precision.\n";
} else {
std::cout << "Error: makeFactory<float>() failed" << std::endl;
return -1;
}
// Below, auto is std::unique_ptr<hpk::fft::FactoryCC<double>>.
auto factory_d = hpk::fft::makeFactory<double>(cfg);
if (factory_d) {
std::cout << "Using " << *factory_d << " for double precision.\n";
} else {
std::cout << "Error: makeFactory<double>() failed" << std::endl;
return -1;
}
std::cout << '\n';
// Example of a one-dimensional 12-point FFT in single precision
std::cout << "Example #1: Twelve-point single precision example.\n"
<< "~~~~~~~~~~ \n";
std::vector<std::complex<float>> vf(12);
vf[0] = {1.0f, 2.0f};
printData("input", vf.data(), 1, 12);
// In C++20, avoid the cast below by using std::ssize(vf).
long vfsize = static_cast<long>(std::size(vf));
factory_s->makeInplace({vfsize})->forward(vf.data());
printData("forward", vf.data(), 1, 12);
// Example of a one-dimensional 12-point FFT in double precision
std::cout << "Example #2: Twelve-point double precision example.\n"
<< "~~~~~~~~~~ \n";
std::vector<std::complex<double>> vd(12);
vd[0] = {1.0, 2.0};
printData("input", vd.data(), 1, 12);
// In C++20, avoid the cast below by using std::ssize(vd).
long vdsize = static_cast<long>(std::size(vd));
factory_d->makeInplace({vdsize})->forward(vd.data());
printData("forward", vd.data(), 1, 12);
}