Program Listing for File cuda_helpers.hpp

Return to documentation for file (pennylane_lightning/core/src/utils/cuda_utils/cuda_helpers.hpp)

// Copyright 2022-2023 Xanadu Quantum Technologies Inc.

// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at

//     http://www.apache.org/licenses/LICENSE-2.0

// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

// Adapted from JET: https://github.com/XanaduAI/jet.git

#pragma once
#include <algorithm>
#include <complex>
#include <functional>
#include <memory>
#include <mutex>
#include <numeric>
#include <type_traits>
#include <unordered_map>
#include <vector>

#include <cuComplex.h>
#include <cublas_v2.h>
#include <cuda.h>
#include <cusparse_v2.h>

#include "DevTag.hpp"
#include "cuError.hpp"

namespace Pennylane::LightningGPU::Util {

// SFINAE check for existence of real() method in complex type
template <typename ComplexT>
constexpr auto is_cxx_complex(const ComplexT &t) -> decltype(t.real(), bool()) {
    return true;
}

// Catch-all fallback for CUDA complex types
constexpr bool is_cxx_complex(...) { return false; }

inline cuFloatComplex operator-(const cuFloatComplex &a) {
    return {-a.x, -a.y};
}
inline cuDoubleComplex operator-(const cuDoubleComplex &a) {
    return {-a.x, -a.y};
}

template <class CFP_t_T, class CFP_t_U = CFP_t_T>
inline static auto Div(const CFP_t_T &a, const CFP_t_U &b) -> CFP_t_T {
    if constexpr (std::is_same_v<CFP_t_T, cuComplex> ||
                  std::is_same_v<CFP_t_T, float2>) {
        return cuCdivf(a, b);
    } else if (std::is_same_v<CFP_t_T, cuDoubleComplex> ||
               std::is_same_v<CFP_t_T, double2>) {
        return cuCdiv(a, b);
    }
}

template <class CFP_t>
__host__ __device__ inline static constexpr auto Conj(CFP_t a) -> CFP_t {
    if constexpr (std::is_same_v<CFP_t, cuComplex> ||
                  std::is_same_v<CFP_t, float2>) {
        return cuConjf(a);
    } else {
        return cuConj(a);
    }
}

template <class CFP_t>
__host__ __device__ inline static constexpr auto Cmul(CFP_t a, CFP_t b)
    -> CFP_t {
    if constexpr (std::is_same_v<CFP_t, cuComplex> ||
                  std::is_same_v<CFP_t, float2>) {
        return cuCmulf(a, b);
    } else {
        return cuCmul(a, b);
    }
}

template <class Real_t, class CFP_t = cuDoubleComplex>
inline static constexpr auto ConstMultSC(Real_t a, CFP_t b) -> CFP_t {
    if constexpr (std::is_same_v<CFP_t, cuDoubleComplex>) {
        return make_cuDoubleComplex(a * b.x, a * b.y);
    } else {
        return make_cuFloatComplex(a * b.x, a * b.y);
    }
}

template <class CFP_t = cuDoubleComplex>
inline static constexpr auto cuToComplex(CFP_t a)
    -> std::complex<decltype(a.x)> {
    return std::complex<decltype(a.x)>{a.x, a.y};
}

template <class CFP_t = std::complex<double>>
inline static constexpr auto complexToCu(CFP_t a) {
    if constexpr (std::is_same_v<CFP_t, std::complex<double>>) {
        return make_cuDoubleComplex(a.real(), a.imag());
    } else {
        return make_cuFloatComplex(a.real(), a.imag());
    }
}

template <class CFP_t_T, class CFP_t_U = CFP_t_T>
inline static constexpr auto ConstMult(CFP_t_T a, CFP_t_U b) -> CFP_t_T {
    if constexpr (is_cxx_complex(b)) {
        return {a.real() * b.real() - a.imag() * b.imag(),
                a.real() * b.imag() + a.imag() * b.real()};
    } else {
        return {a.x * b.x - a.y * b.y, a.x * b.y + a.y * b.x};
    }
}

template <class CFP_t_T, class CFP_t_U = CFP_t_T>
inline static constexpr auto ConstSum(CFP_t_T a, CFP_t_U b) -> CFP_t_T {
    if constexpr (std::is_same_v<CFP_t_T, cuComplex> ||
                  std::is_same_v<CFP_t_T, float2>) {
        return cuCaddf(a, b);
    } else {
        return cuCadd(a, b);
    }
}

template <class CFP_t> inline static constexpr auto ONE() -> CFP_t {
    return {1, 0};
}

template <class CFP_t> inline static constexpr auto ZERO() -> CFP_t {
    return {0, 0};
}

template <class CFP_t> inline static constexpr auto IMAG() -> CFP_t {
    return {0, 1};
}

template <class CFP_t> inline static constexpr auto SQRT2() {
    if constexpr (std::is_same_v<CFP_t, float2> ||
                  std::is_same_v<CFP_t, cuFloatComplex>) {
        return CFP_t{0x1.6a09e6p+0F, 0}; // NOLINT: To be replaced in C++20
    } else if constexpr (std::is_same_v<CFP_t, double2> ||
                         std::is_same_v<CFP_t, cuDoubleComplex>) {
        return CFP_t{0x1.6a09e667f3bcdp+0,
                     0}; // NOLINT: To be replaced in C++20
    } else if constexpr (std::is_same_v<CFP_t, double>) {
        return 0x1.6a09e667f3bcdp+0; // NOLINT: To be replaced in C++20
    } else {
        return 0x1.6a09e6p+0F; // NOLINT: To be replaced in C++20
    }
}

template <class CFP_t> inline static constexpr auto INVSQRT2() -> CFP_t {
    if constexpr (std::is_same_v<CFP_t, std::complex<float>> ||
                  std::is_same_v<CFP_t, std::complex<double>>) {
        return CFP_t(1 / M_SQRT2, 0);
    } else {
        return Div(CFP_t{1, 0}, SQRT2<CFP_t>());
    }
}

template <class T>
constexpr bool is_supported_data_type =
    std::is_same_v<T, cuComplex> || std::is_same_v<T, float2> ||
    std::is_same_v<T, cuDoubleComplex> || std::is_same_v<T, double2>;

inline cuDoubleComplex getCudaType(const double &t) {
    static_cast<void>(t);
    return {};
}
inline cuFloatComplex getCudaType(const float &t) {
    static_cast<void>(t);
    return {};
}

inline int getGPUCount() {
    int result;
    PL_CUDA_IS_SUCCESS(cudaGetDeviceCount(&result));
    return result;
}

inline int getGPUIdx() {
    int result;
    PL_CUDA_IS_SUCCESS(cudaGetDevice(&result));
    return result;
}

inline static void deviceReset() { PL_CUDA_IS_SUCCESS(cudaDeviceReset()); }

static bool isCuQuantumSupported(int device_number = 0) {
    cudaDeviceProp deviceProp;
    cudaGetDeviceProperties(&deviceProp, device_number);
    return deviceProp.major >= 7;
}

static std::pair<int, int> getGPUArch(int device_number = 0) {
    cudaDeviceProp deviceProp;
    cudaGetDeviceProperties(&deviceProp, device_number);
    return std::make_pair(deviceProp.major, deviceProp.minor);
}

inline std::size_t getFreeMemorySize() {
    std::size_t freeBytes{0}, totalBytes{0};
    PL_CUDA_IS_SUCCESS(cudaMemGetInfo(&freeBytes, &totalBytes));
    return freeBytes;
}

struct MatrixHasher {
    template <class Precision = double>
    std::size_t
    operator()(const std::vector<std::complex<Precision>> &matrix) const {
        std::size_t hash_val = matrix.size();
        for (const auto &c_val : matrix) {
            hash_val ^= std::hash<Precision>()(c_val.real()) ^
                        std::hash<Precision>()(c_val.imag());
        }
        return hash_val;
    }
};

} // namespace Pennylane::LightningGPU::Util