/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *\
*  Copyright (C) 2023--2026, High Performance Kernels LLC                     *
*                                                                             *
*  This software and the related documents are High Performance Kernels LLC   *
*  copyrighted materials, and your use of them is governed by the express     *
*  license under which they were provided to you (License).                   *
*  Unless the License provides otherwise, you may not use, copy, reproduce,   *
*  modify, disclose, transmit, publish, or distribute this software or the    *
*  related documents without prior written permission from High Performance   *
*  Kernels LLC.                                                               *
*                                                                             *
*    This software and the related documents are provided as is, WITHOUT ANY  *
*  WARRANTY, without even the implied warranty of MERCHANTABILITY or FITNESS  *
*  FOR A PARTICULAR PURPOSE.                                                  *
\* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */

#ifndef HPK_FFT_FACTORY_HPP_INCLUDED
#define HPK_FFT_FACTORY_HPP_INCLUDED

/// \file
/// \brief This header declares classes InplaceDim, OoplaceDim, and Factory.

#include <memory>
#include <new>
#include <string>
#include <type_traits>
#include <utility>
#include <vector>

#include <hpk/alignedAllocator.hpp>
#include <hpk/complex_type_traits.hpp>
#include <hpk/fft/fft.hpp>
#include <hpk/visibility.hpp>

namespace hpk {
namespace fft {

/// \brief Base class of InplaceDim and OoplaceDim.
///
/// Note that the dimensions of an in-place FFT are described by `InplaceDim`,
/// and the dimensions of an out-of-place FFT are described by `OoplaceDim`.
/// This base class itself should not be instantiated.
struct Dimension {
    /// \brief Number
    ///
    /// As a dimension of an FFT, `n` is the mathematical number of points
    /// in the discrete Fourier transform.  It is the number of points in
    /// the time domain.
    /// If the time domain is complex, `n` is the number of complex points.
    /// If the time domain is real, `n` is the number of real points.
    /// As a batch dimension, `n` is the number of transforms, and in this
    /// usage either `n == 0` or `n == 1` indicates a single transform.
    long n;

    /// \brief Stride in the time domain
    ///
    /// The `timeStride` is the stride in the time domain.
    /// It is the stride from one point to the next, always measured in
    /// terms of real elements.
    /// For example, adjacent complex points would have stride 2, and for a
    /// complex time domain every other complex point would be specified by
    /// setting `timeStride = 4`.
    /// The stride may also be set to zero, in which case the data for the
    /// (possibly multidimensional) transform is taken to be contiguous in
    /// memory (with row-major order).
    /// For all forward transforms, `timeStride` is the input stride.
    /// For all backward transforms, `timeStride` is the output stride.
    /// For all in-place transforms, since the output overwrites the input,
    /// `timeStride` determines both the input stride and the output stride.
    long timeStride;

    /// \brief Stride in the frequency domain
    ///
    /// For out-of-place transforms, `freqStride` is the stride in the
    /// frequency domain.  It is measured in terms of real elements.
    /// The stride may also be set to zero, in which case the frequency data
    /// for the (possibly multidimensional) out-of-place transform is taken
    /// to be contiguous in memory (with row-major order).
    /// For all `forwardCopy` functions, `freqStride` is the output stride.
    /// For all `backwardCopy` functions, `freqStride` is the input stride.
    long freqStride;

 protected:
    Dimension(long n, long timeStride, long freqStride)
        : n(n), timeStride(timeStride), freqStride(freqStride) {}
};

/// \brief Describes the data layout for one dimension of a (possibly
///        multidimensional) in-place FFT
///
/// InplaceDim describes one dimension of an FFT or describes a batch of FFTs.
/// It is a standard-layout class, and it is both trivially-copyable and
/// trivially-destructible.
struct InplaceDim final : Dimension {
    /// \brief Default constructor
    ///
    /// The value of `n` is set to one and `timeStride` is set to zero.
    /// This is useful as the batch argument to specify a single transform;
    /// it behaves the same as `InplaceDim(1, 0)`.
    InplaceDim() : Dimension(1, 0, 0) {}

    /// \brief Constructor that sets `timeStride` to zero
    ///
    /// The value for `n` is set as specified, and `timeStride` is set to zero.
    /// For example, a 2D complex to complex transform of 3 rows and 4 columns
    /// with contiguous row-major data layout can be constructed using any of
    /// the following:
    ///
    ///     hpk::fft::InplaceDim layout[2] = { 3    ,  4    }
    ///     hpk::fft::InplaceDim layout[2] = {{3, 0}, {4, 0}}
    ///     hpk::fft::InplaceDim layout[2] = {{3, 8}, {4, 2}}
    ///
    InplaceDim(long n) : Dimension(n, 0, 0) {}  // NOLINT

    /// \brief Constructor
    ///
    /// The values for `n` and `timeStride` are set as specified.
    InplaceDim(long n, long stride) : Dimension(n, stride, 0) {}
};

static_assert(std::is_standard_layout_v<InplaceDim>);
static_assert(std::is_trivially_copyable_v<InplaceDim>);
static_assert(std::is_trivially_destructible_v<InplaceDim>);
static_assert(sizeof(InplaceDim) == sizeof(Dimension));

/// Equality requires both `n` and `timeStride` to be equal.
/// \related InplaceDim
inline bool operator==(const InplaceDim& lhs, const InplaceDim& rhs) {
    return lhs.n == rhs.n && lhs.timeStride == rhs.timeStride;
}

/// Equality requires both `n` and `timeStride` to be equal.
/// \related InplaceDim
inline bool operator!=(const InplaceDim& lhs, const InplaceDim& rhs) {
    return !(lhs == rhs);
}

/// Returns a string description of an `InplaceDim`.
/// \related InplaceDim
HPK_API std::string toString(const InplaceDim& d);

/// Overload for ostream's `<<` operator for an `InplaceDim`.
/// \related InplaceDim
inline std::ostream& operator<<(std::ostream& os, const InplaceDim& d) {
    return os << toString(d);
}

/// \brief Describes the data layout for one dimension of a (possibly
///        multidimensional) out-of-place FFT
///
/// OoplaceDim describes one dimension of an FFT or describes a batch of FFTs.
/// It is a standard-layout class, and it is both trivially-copyable and
/// trivially-destructible.
struct OoplaceDim final : Dimension {
    /// \brief Default constructor
    ///
    /// The value of `n` is set to one and both strides are set to zero.
    /// This is useful as the batch argument to specify a single transform;
    /// it behaves the same as `OoplaceDim(1, 0, 0)`.
    OoplaceDim() : Dimension(1, 0, 0) {}

    /// \brief Constructor that sets both strides to zero
    ///
    /// The value for `n` is set as specified, and the strides are set to zero.
    /// For example, a 2D complex to complex transform of 3 rows and 4 columns
    /// with contiguous row-major data layout can be constructed using any of
    /// the following:
    ///
    ///     hpk::fft::OoplaceDim layout[2] = { 3       ,  4       }
    ///     hpk::fft::OoplaceDim layout[2] = {{3, 0, 0}, {4, 0, 0}}
    ///     hpk::fft::OoplaceDim layout[2] = {{3, 8, 8}, {4, 2, 2}}
    ///
    OoplaceDim(long n) : Dimension(n, 0, 0) {}  // NOLINT

    /// \brief Constructor
    ///
    /// The values for `n`, `timeStride`, and `freqStride` are all set as
    /// specified.
    OoplaceDim(long n, long timeStride, long freqStride)
        : Dimension(n, timeStride, freqStride) {}
};

static_assert(std::is_standard_layout_v<OoplaceDim>);
static_assert(std::is_trivially_copyable_v<OoplaceDim>);
static_assert(std::is_trivially_destructible_v<OoplaceDim>);
static_assert(sizeof(OoplaceDim) == sizeof(Dimension));

/// Equality requires `n`, `timeStride`, and `freqStride` to be equal.
/// \related OoplaceDim
inline bool operator==(const OoplaceDim& lhs, const OoplaceDim& rhs) {
    return lhs.n == rhs.n && lhs.timeStride == rhs.timeStride
        && lhs.freqStride == rhs.freqStride;
}

/// Equality requires `n`, `timeStride`, and `freqStride` to be equal.
/// \related OoplaceDim
inline bool operator!=(const OoplaceDim& lhs, const OoplaceDim& rhs) {
    return !(lhs == rhs);
}

/// Returns a string description of an `OoplaceDim`.
/// \related OoplaceDim
HPK_API std::string toString(const OoplaceDim& d);

/// Overload for ostream's `<<` operator for an `OoplaceDim`.
/// \related OoplaceDim
inline std::ostream& operator<<(std::ostream& os, const OoplaceDim& d) {
    return os << toString(d);
}

/// \brief Abstract base class for concrete FFT factories.
///
/// The function `hpk::fft::makeFactory()` returns a `unique_ptr` that owns
/// a `Factory`.  This is then used to make instances of either `Inplace` or
/// `Ooplace`, which compute FFTs.
///
/// Note that all member functions of a factory are `const`-qualified, and
/// instances of a `Factory` can be shared in a multithreaded environment.
///
template<typename fp_t, typename time_t, typename freq_t> class Factory {
 public:
    static_assert(!is_complex_v<fp_t>,
                  "The first template parameter must be real, not complex.");

    using mathType = fp_t;    ///< Type of math computations and scaling factor
    using timeType = time_t;  ///< Type of data in the time domain
    using freqType = freq_t;  ///< Type of data in the frequency domain
    using realTimeType = remove_complex_t<timeType>;  ///< Real timeType
    using realFreqType = remove_complex_t<freqType>;  ///< Real freqType

 protected:
    Factory() = default;

 public:
    virtual ~Factory() = default;
    Factory(const Factory& f) = delete;
    Factory& operator=(const Factory& f) = delete;

 private:
    static constexpr bool checkCache = true;   // Do check for reusing twiddles
    static constexpr bool updateCache = true;  // Cache twiddles for next time

 public:
    /// \brief Makes an instance of `Inplace` for computing in-place FFTs.
    /// \param layout An array of `hpk::fft::InplaceDim`, each element of which
    ///               describes one dimension of the FFT.
    /// \param batch  Optionally provides an `hpk::fft::InplaceDim` describing
    ///               multiple transforms.  If omitted, `batch.n` is one and
    ///               the batch stride is irrelevant.
    /// \param alloc  Optionally provides an Allocator for the trigonometric
    ///               constants ("twiddle factors") which are used in computing
    ///               an FFT.  If omitted, an instance of the function's second
    ///               template parameter is constructed.  If that also is not
    ///               specified, the default is `hpk::AlignedAllocator`.
    /// \return `std::unique_ptr<Inplace>` that owns an `Inplace` or, in case
    ///         of failure, is empty.
    ///
    /// Note: Overloads of this function are provided that take a rank (the
    /// number of dimensions) and a layout pointer, but this one is recommended
    /// if `rank` is known at compile time.
    ///
    /// Examples:
    ///
    ///     // Single precision, complex time domain, complex freq domain
    ///     auto factory = hpk::fft::makeFactory<float>();
    ///
    ///     // One dimensional, 1024 contiguous complex points, batch of 7.
    ///     // The first argument is an array, so the braces are necessary.
    ///     // We can also write {{1024, 0}} or {{1024, 2}} or {{1024}}.
    ///     auto fft1 = factory->makeInplace<1>({1024}, 7);
    ///
    ///     // Two dimensional, 50 rows x 100 columns
    ///     // The data in each row is contiguous, i.e., the last dimension
    ///     // of the layout could be written as {100, 2}.
    ///     // Each row ends with padding consisting of 4 complex numbers,
    ///     // so we must specify that the stride between rows is 208 floats.
    ///     // There is only one transform (no batching) so it is omitted.
    ///     hpk::fft::InplaceDim layout[2] = {{50, 208}, {100}};
    ///     auto fft2 = factory->makeInplace<2>(layout);
    ///
    ///     // Three dimensional, 4 slabs x 8 rows x 16 columns, batch 7
    ///     // The data for each transform is contiguous, 512 complex points.
    ///     // The default batch stride would be 1024, but suppose there's
    ///     // 32 floats of some other data between transforms.
    ///     hpk::fft::InplaceDim batch = {7, 1056};
    ///     auto fft3 = factory->makeInplace<3>({4, 8, 16}, batch);
    ///
    ///     // One dimensional, 128 strided complex points, batch 4
    ///     // The batch has stride 2; the points of the batch are packed
    ///     // into SIMD-style units.  Otherwise, there is no extra padding.
    ///     // Also, we have the Inplace object owned by a shared_ptr so we
    ///     // can share it (e.g., using the shared_ptr copy constructor).
    ///     hpk::fft::InplaceDim colLayout[1] = {{128, 8}};
    ///     hpk::fft::InplaceDim simd = {4, 2};
    ///     std::shared_ptr fft4 = factory->makeInplace<1>(colLayout, simd);
    ///
    template<std::size_t rank, typename Allocator = AlignedAllocator<mathType>>
    [[nodiscard]] std::unique_ptr<Inplace<mathType, timeType, freqType>>
    makeInplace(const InplaceDim (&layout)[rank],
                const InplaceDim& batch = InplaceDim(),
                Allocator&& alloc = Allocator()) const {
        return makeInplace(rank, layout, &batch,
                           std::forward<Allocator>(alloc));
    }

    /// Makes an instance of `Inplace` for computing in-place FFTs.
    template<typename Allocator = AlignedAllocator<mathType>>
    [[nodiscard]] std::unique_ptr<Inplace<mathType, timeType, freqType>>
    makeInplace(std::size_t rank, const InplaceDim* layoutptr,
                const InplaceDim& batch = InplaceDim(),
                Allocator&& alloc = Allocator()) const {
        return makeInplace(rank, layoutptr, &batch,
                           std::forward<Allocator>(alloc));
    }

    /// Makes an instance of `Inplace` for computing in-place FFTs.
    template<typename Allocator = AlignedAllocator<mathType>>
    [[nodiscard]] std::unique_ptr<Inplace<mathType, timeType, freqType>>
    makeInplace(std::size_t rank, const InplaceDim* layoutptr,
                const InplaceDim* batchptr,
                Allocator&& alloc = Allocator()) const {
        using ATraits = std::allocator_traits<std::decay_t<Allocator>>;
        static_assert(std::is_same_v<typename ATraits::value_type, mathType>,
                      "Allocator::value_type must be the factory's mathType");
        std::vector<int> workspace;
        workspace.reserve(4 * rank);
        std::shared_ptr<mathType> twiddle;
        std::size_t n =
                getTwiddle(rank, layoutptr, checkCache, workspace, twiddle);
        if (n) {
            auto deleter = [alloc, n](mathType* ptr) mutable {
                ATraits::deallocate(alloc, ptr, n);
            };
            auto buf = ATraits::allocate(alloc, n);
            if (buf) {
                mathType* ptr = new (static_cast<void*>(buf)) mathType[n];
                twiddle = std::shared_ptr<mathType>(ptr, deleter);
                ptr[0] = mathType{-1.0};
            } else {
                return {};
            }
        }
        return makeInplace(rank, layoutptr, batchptr, updateCache,
                           workspace.data(), std::move(twiddle));
    }

 private:
    virtual std::size_t
    getTwiddle(std::size_t rank, const InplaceDim* layoutptr, bool check,
               std::vector<int>& workspace,
               std::shared_ptr<mathType>& twiddle) const = 0;

    virtual std::unique_ptr<Inplace<mathType, timeType, freqType>>
    makeInplace(std::size_t rank, const InplaceDim* layoutptr,
                const InplaceDim* batchptr, bool update, int* workspace,
                std::shared_ptr<mathType>&& twiddle) const = 0;

 public:
    /// \brief Makes an instance of `Ooplace` for computing out-of-place FFTs.
    /// \param layout An array of `hpk::fft::OoplaceDim`, each element of
    ///               which describes one dimension of the FFT.
    /// \param batch  Optionally provides an `hpk::fft::OoplaceDim` describing
    ///               multiple transforms.  If omitted, `batch.n` is one and
    ///               the batch strides are irrelevant.
    /// \param alloc  Optionally provides an Allocator for the trigonometric
    ///               constants ("twiddle factors") which are used in computing
    ///               an FFT.  If omitted, an instance of the function's second
    ///               template parameter is constructed.  If that also is not
    ///               specified, the default is `hpk::AlignedAllocator`.
    /// \return `std::unique_ptr<Ooplace>` that owns an `Ooplace` or, in case
    ///         of failure, is empty.
    ///
    /// Note: Overloads of this function are provided that take a rank (the
    /// number of dimensions) and a layout pointer, but this one is recommended
    /// if `rank` is known at compile time.
    ///
    /// Examples:
    ///
    ///     // Single precision, complex time domain, complex freq domain
    ///     auto factory = hpk::fft::makeFactory<float>();
    ///
    ///     // One dimensional, 1024 contiguous complex points, batch of 7.
    ///     // The first argument is an array, so the braces are necessary.
    ///     // We can also write {{1024, 2, 2}} or {{1024}}.
    ///     auto fft1 = factory->makeOoplace<1>({1024}, 7);
    ///
    ///     // Two dimensional, 50 rows x 100 columns
    ///     // The data in each row is contiguous, i.e., the last OoplaceDim
    ///     // of the layout could be written as {100, 2, 2}.
    ///     // Each row in the time domain ends with padding consisting of 4
    ///     // complex numbers, so we must specify that the stride between rows
    ///     // is 208 floats.  In the frequency domain, there is no padding.
    ///     // There is only one transform (no batching) so it is omitted.
    ///     hpk::fft::OoplaceDim layout[2] = {{50, 208, 200}, {100}};
    ///     auto fft2 = factory->makeOoplace<2>(layout);
    ///
    ///     // Three dimensional, 4 slabs x 8 rows x 16 columns, batch 7
    ///     // The data for each transform is contiguous, 512 complex points.
    ///     // The default batch stride would be 1024, but suppose there's
    ///     // 32 floats of some other data between transforms both in the
    ///     // time domain and in the frequency domain.
    ///     hpk::fft::OoplaceDim batch = {7, 1056, 1056};
    ///     auto fft3 = factory->makeOoplace<3>({4, 8, 16}, batch);
    ///
    ///     // One dimensional, 128 strided complex points, batch 4
    ///     // The batch has stride 2; the 4 points of the batch are packed
    ///     // into SIMD-style arrays.  Otherwise, there is no extra padding.
    ///     // Also, we have the Ooplace object owned by a shared_ptr so we
    ///     // can share it (e.g., using the shared_ptr copy constructor).
    ///     hpk::fft::OoplaceDim colLayout[1] = {{128, 8, 8}};
    ///     hpk::fft::OoplaceDim simd = {4, 2, 2};
    ///     std::shared_ptr fft4 = factory->makeOoplace<1>(colLayout, simd);
    ///
    template<std::size_t rank, typename Allocator = AlignedAllocator<mathType>>
    [[nodiscard]] std::unique_ptr<Ooplace<mathType, timeType, freqType>>
    makeOoplace(const OoplaceDim (&layout)[rank],
                const OoplaceDim& batch = OoplaceDim(),
                Allocator&& alloc = Allocator()) const {
        return makeOoplace(rank, layout, &batch,
                           std::forward<Allocator>(alloc));
    }

    /// Makes an instance of `Ooplace` for computing out-of-place FFTs.
    template<typename Allocator = AlignedAllocator<mathType>>
    [[nodiscard]] std::unique_ptr<Ooplace<mathType, timeType, freqType>>
    makeOoplace(std::size_t rank, const OoplaceDim* layoutptr,
                const OoplaceDim& batch = OoplaceDim(),
                Allocator&& alloc = Allocator()) const {
        return makeOoplace(rank, layoutptr, &batch,
                           std::forward<Allocator>(alloc));
    }

    /// Makes an instance of `Ooplace` for computing out-of-place FFTs.
    template<typename Allocator = AlignedAllocator<mathType>>
    [[nodiscard]] std::unique_ptr<Ooplace<mathType, timeType, freqType>>
    makeOoplace(std::size_t rank, const OoplaceDim* layoutptr,
                const OoplaceDim* batchptr,
                Allocator&& alloc = Allocator()) const {
        using ATraits = std::allocator_traits<std::decay_t<Allocator>>;
        static_assert(std::is_same_v<typename ATraits::value_type, mathType>,
                      "Allocator::value_type must be the factory's mathType");
        std::vector<int> workspace;
        workspace.reserve(4 * rank);
        std::shared_ptr<mathType> twiddle;
        std::size_t n =
                getTwiddle(rank, layoutptr, checkCache, workspace, twiddle);
        if (n) {
            auto deleter = [alloc, n](mathType* ptr) mutable {
                ATraits::deallocate(alloc, ptr, n);
            };
            auto buf = ATraits::allocate(alloc, n);
            if (buf) {
                mathType* ptr = new (static_cast<void*>(buf)) mathType[n];
                twiddle = std::shared_ptr<mathType>(ptr, deleter);
                ptr[0] = mathType{-1.0};
            } else {
                return {};
            }
        }
        return makeOoplace(rank, layoutptr, batchptr, updateCache,
                           workspace.data(), std::move(twiddle));
    }

 private:
    virtual std::size_t
    getTwiddle(std::size_t rank, const OoplaceDim* layoutptr, bool check,
               std::vector<int>& workspace,
               std::shared_ptr<mathType>& twiddle) const = 0;

    virtual std::unique_ptr<Ooplace<mathType, timeType, freqType>>
    makeOoplace(std::size_t rank, const OoplaceDim* layoutptr,
                const OoplaceDim* batchptr, bool update, int* workspace,
                std::shared_ptr<mathType>&& twiddle) const = 0;

 public:
    /// \brief Modifies the layout array by increasing `n` in each dimension as
    ///        necessary so each `n` has a natively supported factorization.
    /// \param layout An array of `hpk::fft::InplaceDim`, each element of which
    ///               describes one dimension of the FFT.
    /// \return `true` if the layout was modified, `false` if the input layout
    ///         was already factorizable (and so was not modified).
    ///
    /// The computational complexity of an FFT is log-linear for any number of
    /// points, but it can be more than 4X slower without a natively supported
    /// factorization.  This function can be used to guide layout decisions,
    /// but note that it only increases `n`; it does not change the strides.
    /// If non-zero strides were specified, they may require adjustment.
    /// A function overload is provided that takes a rank (the number of
    /// dimensions) and a layout pointer, but this one is recommended if `rank`
    /// is known at compile time.
    template<std::size_t rank>
    bool nextFastLayout(InplaceDim (&layout)[rank]) const {
        return nextFastLayout(rank, layout);
    }

    /// \brief Modifies the layout by increasing `n` in each dimension (as
    ///        necessary) so each `n` has a natively supported factorization.
    virtual bool nextFastLayout(std::size_t rank,
                                InplaceDim* layoutptr) const = 0;

    /// \brief Modifies the layout array by increasing `n` in each dimension as
    ///        necessary so each `n` has a natively supported factorization.
    /// \param layout An array of `hpk::fft::OoplaceDim`, each element of which
    ///               describes one dimension of the FFT.
    /// \return `true` if the layout was modified, `false` if the input layout
    ///         was already factorizable (and so was not modified).
    ///
    /// The computational complexity of an FFT is log-linear for any number of
    /// points, but it can be more than 4X slower without a natively supported
    /// factorization.  This function can be used to guide layout decisions,
    /// but note that it only increases `n`; it does not change the strides.
    /// If non-zero strides were specified, they may require adjustment.
    /// A function overload is provided that takes a rank (the number of
    /// dimensions) and a layout pointer, but this one is recommended if `rank`
    /// is known at compile time.
    template<std::size_t rank>
    bool nextFastLayout(OoplaceDim (&layout)[rank]) const {
        return nextFastLayout(rank, layout);
    }

    /// \brief Modifies the layout by increasing `n` in each dimension (as
    ///        necessary) so each `n` has a natively supported factorization.
    virtual bool nextFastLayout(std::size_t rank,
                                OoplaceDim* layoutptr) const = 0;

    /// \brief Returns an upper bound on the number of threads that can be used
    ///        by compute objects made by this factory.
    virtual int maxThreads() const = 0;

    /// Returns a short string describing the factory.
    virtual std::string toString() const = 0;
};

/// Overload for ostream's `<<` operator for a `Factory`.
/// \related Factory
template<typename fp_t, typename time_t, typename freq_t>
inline std::ostream& operator<<(std::ostream& os,
                                const Factory<fp_t, time_t, freq_t>& factory) {
    return os << factory.toString();
}

/// \brief Convenience type alias for a `Factory` having complex time domain
///        and complex frequency domain.
template<typename fp_t>
using FactoryCC = Factory<fp_t, std::complex<fp_t>, std::complex<fp_t>>;

/// \brief Convenience type alias for a `Factory` having real time domain
///        and complex frequency domain.
template<typename fp_t>
using FactoryRC = Factory<fp_t, fp_t, std::complex<fp_t>>;

}  // namespace fft
}  // namespace hpk

#endif  // HPK_FFT_FACTORY_HPP_INCLUDED
