Factory#
#include <hpk/fft/factory.hpp>
Declares a class template for a factory. A factory makes objects that compute FFTs.
TYPEDEFS
-
template<typename fp_t>
using hpk::fft::FactoryCC = Factory<fp_t, std::complex<fp_t>, std::complex<fp_t>># Convenience type alias for a
Factory
having complex time domain and complex frequency domain.
-
template<typename fp_t>
using hpk::fft::FactoryRC = Factory<fp_t, fp_t, std::complex<fp_t>># Convenience type alias for a
Factory
having real time domain and complex frequency domain.
CLASS
-
template<typename fp_t, typename time_t, typename freq_t>
class Factory# Abstract base class for concrete FFT factories.
The function
hpk::fft::makeFactory()
returns aunique_ptr
that owns aFactory
. This is then used to make instances of eitherInplace
orOoplace
, which compute FFTs.Note that all member functions of a factory are
const
-qualified, and instances of aFactory
can be shared in a multithreaded environment.Public Types
-
using realTimeType = remove_complex_t<timeType>#
Real timeType.
-
using realFreqType = remove_complex_t<freqType>#
Real freqType.
Public Functions
-
template<std::size_t rank, typename Allocator = AlignedAllocator<mathType>>
inline std::unique_ptr<Inplace<mathType, timeType, freqType>> makeInplace(const InplaceDim (&layout)[rank], const InplaceDim &batch = InplaceDim(), Allocator &&alloc = Allocator()) const# Makes an instance of
Inplace
for computing in-place FFTs.Note: Overloads of this function are provided that take a rank (the number of dimensions) and a layout pointer, but this one is recommended if
rank
is known at compile time.Examples:
// Single precision, complex time domain, complex freq domain auto factory = hpk::fft::makeFactory<float>(); // One dimensional, 1024 contiguous complex points, batch of 7. // The first argument is an array, so the braces are necessary. // We can also write {{1024, 0}} or {{1024, 2}} or {{1024}}. auto fft1 = factory->makeInplace<1>({1024}, 7); // Two dimensional, 50 rows x 100 columns // The data in each row is contiguous, i.e., the last dimension // of the layout could be written as {100, 2}. // Each row ends with padding consisting of 4 complex numbers, // so we must specify that the stride between rows is 208 floats. // There is only one transform (no batching) so it is omitted. hpk::fft::InplaceDim layout[2] = {{50, 208}, {100}}; auto fft2 = factory->makeInplace<2>(layout); // Three dimensional, 4 slabs x 8 rows x 16 columns, batch 7 // The data for each transform is contiguous, 512 complex points. // The default batch stride would be 1024, but suppose there's // 32 floats of some other data between transforms. hpk::fft::InplaceDim batch = {7, 1056}; auto fft3 = factory->makeInplace<3>({4, 8, 16}, batch); // One dimensional, 128 strided complex points, batch 4 // The batch has stride 2; the points of the batch are packed // into SIMD-style units. Otherwise, there is no extra padding. // Also, we have the Inplace object owned by a shared_ptr so we // can share it (e.g., using the shared_ptr copy constructor). hpk::fft::InplaceDim colLayout[1] = {{128, 8}}; hpk::fft::InplaceDim simd = {4, 2}; std::shared_ptr fft4 = factory->makeInplace<1>(colLayout, simd);
- Parameters:
layout – An array of
hpk::fft::InplaceDim
, each element of which describes one dimension of the FFT.batch – Optionally provides an
hpk::fft::InplaceDim
describing multiple transforms. If omitted,batch.n
is one and the batch stride is irrelevant.alloc – Optionally provides an Allocator for the trigonometric constants (“twiddle factors”) which are used in computing an FFT. If omitted, an instance of the function’s second template parameter is constructed. If that also is not specified, the default is
hpk::AlignedAllocator
.
- Returns:
std::unique_ptr<Inplace>
that owns anInplace
or, in case of failure, is empty.
-
template<typename Allocator = AlignedAllocator<mathType>>
inline std::unique_ptr<Inplace<mathType, timeType, freqType>> makeInplace(std::size_t rank, const InplaceDim *layoutptr, const InplaceDim &batch = InplaceDim(), Allocator &&alloc = Allocator()) const# Makes an instance of
Inplace
for computing in-place FFTs.
-
template<typename Allocator = AlignedAllocator<mathType>>
inline std::unique_ptr<Inplace<mathType, timeType, freqType>> makeInplace(std::size_t rank, const InplaceDim *layoutptr, const InplaceDim *batchptr, Allocator &&alloc = Allocator()) const# Makes an instance of
Inplace
for computing in-place FFTs.
-
template<std::size_t rank, typename Allocator = AlignedAllocator<mathType>>
inline std::unique_ptr<Ooplace<mathType, timeType, freqType>> makeOoplace(const OoplaceDim (&layout)[rank], const OoplaceDim &batch = OoplaceDim(), Allocator &&alloc = Allocator()) const# Makes an instance of
Ooplace
for computing out-of-place FFTs.Note: Overloads of this function are provided that take a rank (the number of dimensions) and a layout pointer, but this one is recommended if
rank
is known at compile time.Examples:
// Single precision, complex time domain, complex freq domain auto factory = hpk::fft::makeFactory<float>(); // One dimensional, 1024 contiguous complex points, batch of 7. // The first argument is an array, so the braces are necessary. // We can also write {{1024, 2, 2}} or {{1024}}. auto fft1 = factory->makeOoplace<1>({1024}, 7); // Two dimensional, 50 rows x 100 columns // The data in each row is contiguous, i.e., the last OoplaceDim // of the layout could be written as {100, 2, 2}. // Each row in the time domain ends with padding consisting of 4 // complex numbers, so we must specify that the stride between rows // is 208 floats. In the frequency domain, there is no padding. // There is only one transform (no batching) so it is omitted. hpk::fft::OoplaceDim layout[2] = {{50, 208, 200}, {100}}; auto fft2 = factory->makeOoplace<2>(layout); // Three dimensional, 4 slabs x 8 rows x 16 columns, batch 7 // The data for each transform is contiguous, 512 complex points. // The default batch stride would be 1024, but suppose there's // 32 floats of some other data between transforms both in the // time domain and in the frequency domain. hpk::fft::OoplaceDim batch = {7, 1056, 1056}; auto fft3 = factory->makeOoplace<3>({4, 8, 16}, batch); // One dimensional, 128 strided complex points, batch 4 // The batch has stride 2; the 4 points of the batch are packed // into SIMD-style arrays. Otherwise, there is no extra padding. // Also, we have the Ooplace object owned by a shared_ptr so we // can share it (e.g., using the shared_ptr copy constructor). hpk::fft::OoplaceDim colLayout[1] = {{128, 8, 8}}; hpk::fft::OoplaceDim simd = {4, 2, 2}; std::shared_ptr fft4 = factory->makeOoplace<1>(colLayout, simd);
- Parameters:
layout – An array of
hpk::fft::OoplaceDim
, each element of which describes one dimension of the FFT.batch – Optionally provides an
hpk::fft::OoplaceDim
describing multiple transforms. If omitted,batch.n
is one and the batch strides are irrelevant.alloc – Optionally provides an Allocator for the trigonometric constants (“twiddle factors”) which are used in computing an FFT. If omitted, an instance of the function’s second template parameter is constructed. If that also is not specified, the default is
hpk::AlignedAllocator
.
- Returns:
std::unique_ptr<Ooplace>
that owns anOoplace
or, in case of failure, is empty.
-
template<typename Allocator = AlignedAllocator<mathType>>
inline std::unique_ptr<Ooplace<mathType, timeType, freqType>> makeOoplace(std::size_t rank, const OoplaceDim *layoutptr, const OoplaceDim &batch = OoplaceDim(), Allocator &&alloc = Allocator()) const# Makes an instance of
Ooplace
for computing out-of-place FFTs.
-
template<typename Allocator = AlignedAllocator<mathType>>
inline std::unique_ptr<Ooplace<mathType, timeType, freqType>> makeOoplace(std::size_t rank, const OoplaceDim *layoutptr, const OoplaceDim *batchptr, Allocator &&alloc = Allocator()) const# Makes an instance of
Ooplace
for computing out-of-place FFTs.
-
template<std::size_t rank>
inline bool nextFastLayout(InplaceDim (&layout)[rank]) const# Modifies the layout array by increasing
n
in each dimension as necessary so eachn
has a natively supported factorization.The computational complexity of an FFT is log-linear for any number of points, but it can be more than 4X slower without a natively supported factorization. This function can be used to guide layout decisions, but note that it only increases
n
; it does not change the strides. If non-zero strides were specified, they may require adjustment. A function overload is provided that takes a rank (the number of dimensions) and a layout pointer, but this one is recommended ifrank
is known at compile time.- Parameters:
layout – An array of
hpk::fft::InplaceDim
, each element of which describes one dimension of the FFT.- Returns:
true
if the layout was modified,false
if the input layout was already factorizable (and so was not modified).
-
virtual bool nextFastLayout(std::size_t rank, InplaceDim *layoutptr) const = 0#
Modifies the layout by increasing
n
in each dimension (as necessary) so eachn
has a natively supported factorization.
-
template<std::size_t rank>
inline bool nextFastLayout(OoplaceDim (&layout)[rank]) const# Modifies the layout array by increasing
n
in each dimension as necessary so eachn
has a natively supported factorization.The computational complexity of an FFT is log-linear for any number of points, but it can be more than 4X slower without a natively supported factorization. This function can be used to guide layout decisions, but note that it only increases
n
; it does not change the strides. If non-zero strides were specified, they may require adjustment. A function overload is provided that takes a rank (the number of dimensions) and a layout pointer, but this one is recommended ifrank
is known at compile time.- Parameters:
layout – An array of
hpk::fft::OoplaceDim
, each element of which describes one dimension of the FFT.- Returns:
true
if the layout was modified,false
if the input layout was already factorizable (and so was not modified).
-
virtual bool nextFastLayout(std::size_t rank, OoplaceDim *layoutptr) const = 0#
Modifies the layout by increasing
n
in each dimension (as necessary) so eachn
has a natively supported factorization.
-
virtual int maxThreads() const = 0#
Returns an upper bound on the number of threads that can be used by compute objects made by this factory.
-
virtual std::string toString() const = 0#
Returns a short string describing the factory.
-
using realTimeType = remove_complex_t<timeType>#