advanced/fft_3x6.cpp#
The file advanced/fft_3x6.cpp
shows a 2D complex time, complex frequency FFT.
It illustrates dynamically loading a shared FFT library.
After compilation, fft_3x6
is linked with the target hpk::fft_avx2_fp32
,
and the following code in main()
loads the AVX512 library before making
the factory:
handle = dlopen(hpk::fft::avx512_fp32_so, RTLD_LAZY);
Thus, on hardware not supporting AVX512, running the executable shows:
Using FftSeqFactoryCC<float32_t>(Architecture::avx2)
but, on hardware with AVX512:
Using FftSeqFactoryCC<float32_t>(Architecture::avx512)
/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *\
* Copyright (C) 2023--2024, High Performance Kernels LLC *
* *
* This software and the related documents are provided as is, WITHOUT ANY *
* WARRANTY, without even the implied warranty of MERCHANTABILITY or FITNESS *
* FOR A PARTICULAR PURPOSE. *
\* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
#include <cassert>
#include <iostream>
#include <string>
#include <vector>
#include <hpk/fft/makeFactory.hpp>
// Prints data formatted into rows and columns, with line continuations if
// necessary. It assumes that real and imaginary elements (of type 'fp_t')
// are interleaved in memory.
template<typename fp_t>
void printComplexData(std::string label, const fp_t* data, int rows, int cols) {
std::cout << label << ":\n";
for (int i = 0; i < rows; ++i) {
std::cout << " ";
for (int j = 0; j < cols; ++j) {
if (j % 8 == 0 && j > 0) std::cout << " \\\n ";
std::cout << data[(2 * cols * i) + (2 * j) + 0] << std::showpos
<< data[(2 * cols * i) + (2 * j) + 1] << "i "
<< std::noshowpos;
}
std::cout << '\n';
}
std::cout << std::endl;
}
// This demonstrates how a factory can be passed to a function, with the
// function taking ownership of the factory.
void compute3x6(std::unique_ptr<hpk::fft::FactoryCC<float>> factory) {
// This is just a toy problem. Data layout is 3x6.
constexpr long kRows = 3;
constexpr long kCols = 6;
std::cout << "Example #1: Two dimensional, In-place FFT.\n"
<< "~~~~~~~~~~ Rows=" << kRows << ", Cols=" << kCols << '\n';
std::vector<float> v(2 * kCols * kRows);
v[0] = 1.0f;
v[1] = 2.0f;
printComplexData("input", v.data(), kRows, kCols);
auto fft = factory->makeInplace<2>({kRows, kCols});
assert(fft && "Error: makeInplace() failed for fft.");
std::cout << *fft << '\n';
fft->forward(v.data());
printComplexData("forward", v.data(), kRows, kCols);
}
int main() {
// For demonstration purposes, this program will NOT be linked with the
// shared library libhpk_fft_avx512_fp32.so.
// So, on AVX512 hardware, let's try to load it with dlopen().
// If this succeeds, makeFactory() will make an avx512 factory.
void* handle = RTLD_DEFAULT;
if (hpk::detectArchitecture() >= hpk::Architecture::avx512) {
handle = dlopen(hpk::fft::avx512_fp32_so, RTLD_LAZY);
if (!handle) {
std::cout << "Warning: dlopen(" << hpk::fft::avx512_fp32_so
<< ") failed.\n";
handle = RTLD_DEFAULT;
}
}
// Make a factory for complex single precision time and frequency domains
// that uses only one thread, passing the handle from above.
auto factory = hpk::fft::makeFactory<float>({{hpk::Parameter::threads, 1}},
handle);
if (factory) {
std::cout << "Using " << *factory << "\n\n";
} else {
std::cout << "Error: makeFactory<float>() failed" << std::endl;
return -1;
}
// Transfer ownership of the factory to the compute3x6() function, which
// will do some FFTs and print the results.
compute3x6(std::move(factory));
// Note that the previous function takes the factory by value (not by
// reference), so the factory is destroyed when the unique_ptr goes out
// of scope at the end of the compute3x6() function.
assert(!factory);
// Furthermore, by examining compute3x6(), we see that all the FFT objects
// that the factory created have also been destroyed since their owning
// unique_ptrs have gone out of scope.
// Therefore, at this point, destructors have completed for all objects
// that need hpk::fft::avx512_fp32_so (for destruction or anything else).
// So, the library may be unloaded.
if (handle != RTLD_DEFAULT) dlclose(handle);
}