From cdcc9adb4474616beb487504bd24862597b1c722 Mon Sep 17 00:00:00 2001 From: Fletterio Date: Fri, 21 Mar 2025 15:48:22 -0300 Subject: [PATCH 001/157] Initial commit --- include/nbl/builtin/hlsl/math/morton.hlsl | 36 +++++++++++++++++++++++ src/nbl/builtin/CMakeLists.txt | 1 + 2 files changed, 37 insertions(+) create mode 100644 include/nbl/builtin/hlsl/math/morton.hlsl diff --git a/include/nbl/builtin/hlsl/math/morton.hlsl b/include/nbl/builtin/hlsl/math/morton.hlsl new file mode 100644 index 0000000000..22c56f8999 --- /dev/null +++ b/include/nbl/builtin/hlsl/math/morton.hlsl @@ -0,0 +1,36 @@ +#ifndef _NBL_BUILTIN_HLSL_MATH_MORTON_INCLUDED_ +#define _NBL_BUILTIN_HLSL_MATH_MORTON_INCLUDED_ + +#include "nbl/builtin/hlsl/concepts/core.hlsl" + +namespace nbl +{ +namespace hlsl +{ +namespace morton +{ + +template) +struct code +{ + using this_t = code; + using U = make_unsigned; + + static this_t create(vector cartesian) + { + //... TODO ... + return this_t(); + } + + //operator+, operator-, operator>>, operator<<, and other bitwise ops + + U value; +}; + +} //namespace morton +} //namespace hlsl +} //namespace nbl + + + +#endif \ No newline at end of file diff --git a/src/nbl/builtin/CMakeLists.txt b/src/nbl/builtin/CMakeLists.txt index 291ee64bad..14e5fe67db 100644 --- a/src/nbl/builtin/CMakeLists.txt +++ b/src/nbl/builtin/CMakeLists.txt @@ -289,6 +289,7 @@ LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/math/linalg/fast_affine.hlsl" LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/math/functions.hlsl") LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/math/geometry.hlsl") LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/math/intutil.hlsl") +LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/math/morton.hlsl") LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/math/equations/quadratic.hlsl") LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/math/equations/cubic.hlsl") LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/math/equations/quartic.hlsl") From 5fe6c0837ff53d156b9fc0500f3899c6c1c546c6 Mon Sep 17 00:00:00 2001 From: Fletterio Date: Sun, 23 Mar 2025 19:30:10 -0300 Subject: [PATCH 002/157] CHeckpoint before master merge --- examples_tests | 2 +- include/nbl/builtin/hlsl/math/morton.hlsl | 54 ++++++++++++++++++++++- 2 files changed, 54 insertions(+), 2 deletions(-) diff --git a/examples_tests b/examples_tests index 91dc3afe4c..f2ea51d0b3 160000 --- a/examples_tests +++ b/examples_tests @@ -1 +1 @@ -Subproject commit 91dc3afe4c66e5bdfd313ec37e7e1863daa52116 +Subproject commit f2ea51d0b3e3388c0f9bae03602ec3b1f658c124 diff --git a/include/nbl/builtin/hlsl/math/morton.hlsl b/include/nbl/builtin/hlsl/math/morton.hlsl index 22c56f8999..bf339f4d6f 100644 --- a/include/nbl/builtin/hlsl/math/morton.hlsl +++ b/include/nbl/builtin/hlsl/math/morton.hlsl @@ -10,12 +10,64 @@ namespace hlsl namespace morton { -template) +namespace impl +{ + +template +struct decode_mask; + +template +struct decode_mask : integral_constant {}; + +template +struct decode_mask : integral_constant::value << Dim) | T(1)> {}; + +template +NBL_CONSTEXPR T decode_mask_v = decode_mask::value; + +// Compile-time still a bit primitive in HLSL, we can support arbitrary-dimensional morton codes in C++ but HLSL's have to be hand coded +template +struct decode_masks_array; + +#ifndef __HLSL_VERSION + +template +struct decode_masks_array +{ + static consteval vector generateMasks() + { + vector masks; + for (auto i = 0u; i < Dim; i++) + { + masks[i] = decode_mask_v << T(i); + } + return masks; + } + + NBL_CONSTEXPR_STATIC_INLINE vector Masks = generateMasks(); +}; + +#else +template +struct decode_masks_array +{ + NBL_CONSTEXPR_STATIC_INLINE vector Masks = vector(decode_mask_v, decode_mask_v << T(1)); +}; +//template +//NBL_CONSTEXPR_STATIC_INLINE vector decode_masks_array::Masks = vector(decode_mask_v, decode_mask_v << T(1)); +#endif + +} //namespace impl + + +template && 1 < D && D < 5) struct code { using this_t = code; using U = make_unsigned; + + static this_t create(vector cartesian) { //... TODO ... From f18b2fa2925cd7f5c5cc94a808cc518b0bd9baaa Mon Sep 17 00:00:00 2001 From: Fletterio Date: Mon, 24 Mar 2025 17:21:37 -0300 Subject: [PATCH 003/157] Checkpoint before merging new type_traits change --- include/nbl/builtin/hlsl/math/morton.hlsl | 56 +++++++++++++++-------- include/nbl/builtin/hlsl/type_traits.hlsl | 6 +++ 2 files changed, 43 insertions(+), 19 deletions(-) diff --git a/include/nbl/builtin/hlsl/math/morton.hlsl b/include/nbl/builtin/hlsl/math/morton.hlsl index bf339f4d6f..22081e2b7f 100644 --- a/include/nbl/builtin/hlsl/math/morton.hlsl +++ b/include/nbl/builtin/hlsl/math/morton.hlsl @@ -13,7 +13,7 @@ namespace morton namespace impl { -template +template struct decode_mask; template @@ -22,15 +22,11 @@ struct decode_mask : integral_constant {}; template struct decode_mask : integral_constant::value << Dim) | T(1)> {}; +#ifndef __HLSL_VERSION + template NBL_CONSTEXPR T decode_mask_v = decode_mask::value; -// Compile-time still a bit primitive in HLSL, we can support arbitrary-dimensional morton codes in C++ but HLSL's have to be hand coded -template -struct decode_masks_array; - -#ifndef __HLSL_VERSION - template struct decode_masks_array { @@ -47,31 +43,50 @@ struct decode_masks_array NBL_CONSTEXPR_STATIC_INLINE vector Masks = generateMasks(); }; -#else -template -struct decode_masks_array -{ - NBL_CONSTEXPR_STATIC_INLINE vector Masks = vector(decode_mask_v, decode_mask_v << T(1)); -}; -//template -//NBL_CONSTEXPR_STATIC_INLINE vector decode_masks_array::Masks = vector(decode_mask_v, decode_mask_v << T(1)); +template +NBL_CONSTEXPR vector decode_masks = decode_masks_array::Masks; + #endif } //namespace impl +// HLSL only supports up to D = 4, and even then having this in a more generic manner is blocked by a DXC issue targeting SPIR-V +#ifndef __HLSL_VERSION + +#define NBL_HLSL_MORTON_MASKS(U, D) impl::decode_masks< U , D > + +#else + +// Up to D = 4 supported +#define NBL_HLSL_MORTON_MASKS(U, D) vector< U , 4 >(impl::decode_mask< U , D >::value,\ + impl::decode_mask< U , D >::value << U (1),\ + impl::decode_mask< U , D >::value << U (2),\ + impl::decode_mask< U , D >::value << U (3)\ + ) +#endif + +// Making this even slightly less ugly is blocked by https://github.com/microsoft/DirectXShaderCompiler/issues/7006 +// In particular, `Masks` should be a `const static` member field instead of appearing in every method using it template && 1 < D && D < 5) struct code { using this_t = code; - using U = make_unsigned; - + using U = make_unsigned_t; +#ifdef __HLSL_VERSION + _Static_assert(is_same_v, + "make_signed requires that T shall be a (possibly cv-qualified) " + "integral type or enumeration but not a bool type."); +#endif static this_t create(vector cartesian) { - //... TODO ... - return this_t(); + NBL_CONSTEXPR_STATIC_INLINE vector Masks = NBL_HLSL_MORTON_MASKS(I, D); + printf("%d %d %d %d", Masks[0], Masks[1], Masks[2], Masks[3]); + this_t foo; + foo.value = U(0); + return foo; } //operator+, operator-, operator>>, operator<<, and other bitwise ops @@ -79,6 +94,9 @@ struct code U value; }; +// Don't forget to delete this macro after usage +#undef NBL_HLSL_MORTON_MASKS + } //namespace morton } //namespace hlsl } //namespace nbl diff --git a/include/nbl/builtin/hlsl/type_traits.hlsl b/include/nbl/builtin/hlsl/type_traits.hlsl index 708f643ab0..222dbcdb7c 100644 --- a/include/nbl/builtin/hlsl/type_traits.hlsl +++ b/include/nbl/builtin/hlsl/type_traits.hlsl @@ -688,6 +688,12 @@ NBL_CONSTEXPR uint64_t extent_v = extent::value; template using make_void_t = typename make_void::type; +template +using make_signed_t = typename make_signed::type; + +template +using make_unsigned_t = typename make_unsigned::type; + template struct conditional_value { From 4ebc555d320cc3e678095d72437e07721dc1441b Mon Sep 17 00:00:00 2001 From: Fletterio Date: Mon, 24 Mar 2025 19:18:49 -0300 Subject: [PATCH 004/157] Works, but throws DXC warning --- include/nbl/builtin/hlsl/math/morton.hlsl | 25 +++++++++-------------- 1 file changed, 10 insertions(+), 15 deletions(-) diff --git a/include/nbl/builtin/hlsl/math/morton.hlsl b/include/nbl/builtin/hlsl/math/morton.hlsl index 22081e2b7f..058bdad862 100644 --- a/include/nbl/builtin/hlsl/math/morton.hlsl +++ b/include/nbl/builtin/hlsl/math/morton.hlsl @@ -22,11 +22,11 @@ struct decode_mask : integral_constant {}; template struct decode_mask : integral_constant::value << Dim) | T(1)> {}; -#ifndef __HLSL_VERSION - template NBL_CONSTEXPR T decode_mask_v = decode_mask::value; +#ifndef __HLSL_VERSION + template struct decode_masks_array { @@ -58,10 +58,11 @@ NBL_CONSTEXPR vector decode_masks = decode_masks_array::Masks; #else // Up to D = 4 supported -#define NBL_HLSL_MORTON_MASKS(U, D) vector< U , 4 >(impl::decode_mask< U , D >::value,\ - impl::decode_mask< U , D >::value << U (1),\ - impl::decode_mask< U , D >::value << U (2),\ - impl::decode_mask< U , D >::value << U (3)\ +// This will throw a DXC warning about the vector being truncated - no way around that +#define NBL_HLSL_MORTON_MASKS(U, D) vector< U , 4 >(impl::decode_mask_v< U , D >,\ + impl::decode_mask_v< U , D > << U (1),\ + impl::decode_mask_v< U , D > << U (2),\ + impl::decode_mask_v< U , D > << U (3)\ ) #endif @@ -74,18 +75,12 @@ struct code using this_t = code; using U = make_unsigned_t; -#ifdef __HLSL_VERSION - _Static_assert(is_same_v, - "make_signed requires that T shall be a (possibly cv-qualified) " - "integral type or enumeration but not a bool type."); -#endif - static this_t create(vector cartesian) { - NBL_CONSTEXPR_STATIC_INLINE vector Masks = NBL_HLSL_MORTON_MASKS(I, D); - printf("%d %d %d %d", Masks[0], Masks[1], Masks[2], Masks[3]); + NBL_CONSTEXPR_STATIC_INLINE vector Masks = NBL_HLSL_MORTON_MASKS(U, D); + printf("%u %u %u %u", Masks[0], Masks[1], Masks[2]); this_t foo; - foo.value = U(0); + foo.value = Masks[0]; return foo; } From 55a2ef637ca12c6c35b6f8001db6f619acfc2315 Mon Sep 17 00:00:00 2001 From: Fletterio Date: Mon, 24 Mar 2025 19:41:14 -0300 Subject: [PATCH 005/157] Added concept for valid morton dimensions --- include/nbl/builtin/hlsl/math/morton.hlsl | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/include/nbl/builtin/hlsl/math/morton.hlsl b/include/nbl/builtin/hlsl/math/morton.hlsl index 058bdad862..99980284e9 100644 --- a/include/nbl/builtin/hlsl/math/morton.hlsl +++ b/include/nbl/builtin/hlsl/math/morton.hlsl @@ -13,6 +13,19 @@ namespace morton namespace impl { +// Valid dimension for a morton code +#ifndef __HLSL_VERSION + +template +NBL_BOOL_CONCEPT MortonDimension = D > 1; + +#else + +template +NBL_BOOL_CONCEPT MortonDimension = 1 < D && D < 5; + +#endif + template struct decode_mask; @@ -69,7 +82,7 @@ NBL_CONSTEXPR vector decode_masks = decode_masks_array::Masks; // Making this even slightly less ugly is blocked by https://github.com/microsoft/DirectXShaderCompiler/issues/7006 // In particular, `Masks` should be a `const static` member field instead of appearing in every method using it -template && 1 < D && D < 5) +template && impl::MortonDimension) struct code { using this_t = code; @@ -78,7 +91,6 @@ struct code static this_t create(vector cartesian) { NBL_CONSTEXPR_STATIC_INLINE vector Masks = NBL_HLSL_MORTON_MASKS(U, D); - printf("%u %u %u %u", Masks[0], Masks[1], Masks[2]); this_t foo; foo.value = Masks[0]; return foo; From f5162561ee2203aa51c8c600aed225d679c9408d Mon Sep 17 00:00:00 2001 From: Fletterio Date: Mon, 24 Mar 2025 21:28:07 -0300 Subject: [PATCH 006/157] Creation from vector working as intended --- include/nbl/builtin/hlsl/math/morton.hlsl | 21 ++++++++++++++++----- 1 file changed, 16 insertions(+), 5 deletions(-) diff --git a/include/nbl/builtin/hlsl/math/morton.hlsl b/include/nbl/builtin/hlsl/math/morton.hlsl index 99980284e9..aab8511b95 100644 --- a/include/nbl/builtin/hlsl/math/morton.hlsl +++ b/include/nbl/builtin/hlsl/math/morton.hlsl @@ -2,6 +2,7 @@ #define _NBL_BUILTIN_HLSL_MATH_MORTON_INCLUDED_ #include "nbl/builtin/hlsl/concepts/core.hlsl" +#include "nbl/builtin/hlsl/bit.hlsl" namespace nbl { @@ -88,12 +89,22 @@ struct code using this_t = code; using U = make_unsigned_t; - static this_t create(vector cartesian) + static this_t create(NBL_CONST_REF_ARG(vector) cartesian) { - NBL_CONSTEXPR_STATIC_INLINE vector Masks = NBL_HLSL_MORTON_MASKS(U, D); - this_t foo; - foo.value = Masks[0]; - return foo; + NBL_CONSTEXPR_STATIC U BitWidth = U(8 * sizeof(U)); + const vector unsignedCartesian = bit_cast, vector >(cartesian); + U val = U(0); + [[unroll]] + // We want to interleave the bits of each number in `unsignedCartesian`. We do this by enumerating + // val[0] = bit 0 of unsignedCartesian[0], val[1] = bit 0 of unsignedCartesian[1], ..., val[D-1] = bit 0 of unsignedCartesian[D-1], + // val[D] = bit 1 of unsignedCartesian[0], val[D+1] = bit 1 of unsignedCartesian[1], ..., val[2D-1] = bit 1 of unsignedCartesian[D-1] + // and so on until we get val[BitDwidth - 1] and stop. + for (U i = U(0); i < BitWidth; i++) + { + val |= (unsignedCartesian[i % D] & (U(1) << (i / D))) << (i - (i / D)); + } + this_t retVal = {val}; + return retVal; } //operator+, operator-, operator>>, operator<<, and other bitwise ops From 534d81bfc2ab1136d959a41ecee521990115d7bb Mon Sep 17 00:00:00 2001 From: Fletterio Date: Wed, 26 Mar 2025 13:05:20 -0300 Subject: [PATCH 007/157] Added some extra macro specifiers, vector truncation with no warnings on HLSL side by specializing , a bunch of morton operators --- include/nbl/builtin/hlsl/cpp_compat/basic.h | 8 +- .../nbl/builtin/hlsl/cpp_compat/vector.hlsl | 30 +++ include/nbl/builtin/hlsl/math/morton.hlsl | 181 ++++++++++++++++-- 3 files changed, 198 insertions(+), 21 deletions(-) diff --git a/include/nbl/builtin/hlsl/cpp_compat/basic.h b/include/nbl/builtin/hlsl/cpp_compat/basic.h index 3802bd69ea..a93727815b 100644 --- a/include/nbl/builtin/hlsl/cpp_compat/basic.h +++ b/include/nbl/builtin/hlsl/cpp_compat/basic.h @@ -40,8 +40,11 @@ inline To _static_cast(From v) #define NBL_CONSTEXPR_FUNC constexpr #define NBL_CONSTEXPR_STATIC constexpr static #define NBL_CONSTEXPR_STATIC_INLINE constexpr static inline +#define NBL_CONSTEXPR_STATIC_FUNC constexpr static #define NBL_CONSTEXPR_INLINE_FUNC constexpr inline -#define NBL_CONSTEXPR_FORCED_INLINE_FUNC NBL_FORCE_INLINE constexpr +#define NBL_CONSTEXPR_STATIC_INLINE_FUNC constexpr static inline +#define NBL_CONSTEXPR_FORCED_INLINE_FUNC NBL_FORCE_INLINE NBL_CONSTEXPR_FUNC +#define NBL_CONSTEXPR_STATIC_FORCED_INLINE_FUNC NBL_FORCE_INLINE NBL_CONSTEXPR_STATIC #define NBL_CONST_MEMBER_FUNC const namespace nbl::hlsl @@ -70,8 +73,11 @@ namespace nbl::hlsl #define NBL_CONSTEXPR_FUNC #define NBL_CONSTEXPR_STATIC const static #define NBL_CONSTEXPR_STATIC_INLINE const static +#define NBL_CONSTEXPR_STATIC_FUNC static #define NBL_CONSTEXPR_INLINE_FUNC inline +#define NBL_CONSTEXPR_STATIC_INLINE_FUNC static inline #define NBL_CONSTEXPR_FORCED_INLINE_FUNC inline +#define NBL_CONSTEXPR_STATIC_FORCED_INLINE_FUNC NBL_CONSTEXPR_STATIC_INLINE_FUNC #define NBL_CONST_MEMBER_FUNC namespace nbl diff --git a/include/nbl/builtin/hlsl/cpp_compat/vector.hlsl b/include/nbl/builtin/hlsl/cpp_compat/vector.hlsl index 354937427a..f6ced52db1 100644 --- a/include/nbl/builtin/hlsl/cpp_compat/vector.hlsl +++ b/include/nbl/builtin/hlsl/cpp_compat/vector.hlsl @@ -1,6 +1,8 @@ #ifndef _NBL_BUILTIN_HLSL_CPP_COMPAT_VECTOR_INCLUDED_ #define _NBL_BUILTIN_HLSL_CPP_COMPAT_VECTOR_INCLUDED_ +#include "nbl/builtin/hlsl/cpp_compat/basic.h" + // stuff for C++ #ifndef __HLSL_VERSION #include @@ -92,4 +94,32 @@ struct blake3_hasher::update_impl,Dummy> } #endif } + +// To prevent implicit truncation warnings +namespace nbl +{ +namespace hlsl +{ +namespace impl +{ + +template +struct static_cast_helper, vector > +{ + NBL_CONSTEXPR_STATIC_INLINE_FUNC vector cast(NBL_CONST_REF_ARG(vector) val) + { + vector retVal; + [[unroll]] + for (uint16_t i = 0; i < N; i++) + { + retVal[i] = val[i]; + } + return retVal; + } +}; + +} +} +} + #endif \ No newline at end of file diff --git a/include/nbl/builtin/hlsl/math/morton.hlsl b/include/nbl/builtin/hlsl/math/morton.hlsl index aab8511b95..ecd94ce69e 100644 --- a/include/nbl/builtin/hlsl/math/morton.hlsl +++ b/include/nbl/builtin/hlsl/math/morton.hlsl @@ -3,6 +3,8 @@ #include "nbl/builtin/hlsl/concepts/core.hlsl" #include "nbl/builtin/hlsl/bit.hlsl" +#include "nbl/builtin/hlsl/cpp_compat.hlsl" +#include "nbl/builtin/hlsl/functional.hlsl" namespace nbl { @@ -15,18 +17,9 @@ namespace impl { // Valid dimension for a morton code -#ifndef __HLSL_VERSION - -template -NBL_BOOL_CONCEPT MortonDimension = D > 1; - -#else - template NBL_BOOL_CONCEPT MortonDimension = 1 < D && D < 5; -#endif - template struct decode_mask; @@ -73,11 +66,12 @@ NBL_CONSTEXPR vector decode_masks = decode_masks_array::Masks; // Up to D = 4 supported // This will throw a DXC warning about the vector being truncated - no way around that -#define NBL_HLSL_MORTON_MASKS(U, D) vector< U , 4 >(impl::decode_mask_v< U , D >,\ +// The only way to avoid this atm (until they fix issue 7006 below) is to wrap the whole class in a macro and expand it for each possible value of `D` +#define NBL_HLSL_MORTON_MASKS(U, D) _static_cast > (vector< U , 4 >(impl::decode_mask_v< U , D >,\ impl::decode_mask_v< U , D > << U (1),\ impl::decode_mask_v< U , D > << U (2),\ impl::decode_mask_v< U , D > << U (3)\ - ) + )) #endif @@ -88,25 +82,134 @@ struct code { using this_t = code; using U = make_unsigned_t; + NBL_CONSTEXPR_STATIC U BitWidth = U(8 * sizeof(U)); + + // ---------------------------------------------------- CONSTRUCTORS --------------------------------------------------------------- + + #ifndef __HLSL_VERSION - static this_t create(NBL_CONST_REF_ARG(vector) cartesian) + code() = default; + + // To immediately get compound operators and functional structs in CPP side + code(const I _value) : value(bit_cast(_value)){} + + #endif + + /** + * @brief Creates a Morton code from a set of cartesian coordinates + * + * @param [in] cartesian Coordinates to encode + */ + NBL_CONSTEXPR_STATIC_FUNC this_t create(NBL_CONST_REF_ARG(vector) cartesian) { - NBL_CONSTEXPR_STATIC U BitWidth = U(8 * sizeof(U)); + NBL_CONSTEXPR_STATIC vector Masks = NBL_HLSL_MORTON_MASKS(U, D); const vector unsignedCartesian = bit_cast, vector >(cartesian); U val = U(0); + [[unroll]] - // We want to interleave the bits of each number in `unsignedCartesian`. We do this by enumerating - // val[0] = bit 0 of unsignedCartesian[0], val[1] = bit 0 of unsignedCartesian[1], ..., val[D-1] = bit 0 of unsignedCartesian[D-1], - // val[D] = bit 1 of unsignedCartesian[0], val[D+1] = bit 1 of unsignedCartesian[1], ..., val[2D-1] = bit 1 of unsignedCartesian[D-1] - // and so on until we get val[BitDwidth - 1] and stop. - for (U i = U(0); i < BitWidth; i++) + for (U dim = 0; dim < U(D); dim++) { - val |= (unsignedCartesian[i % D] & (U(1) << (i / D))) << (i - (i / D)); + [[unroll]] + // Control can be simplified by running a bound on just coordBit based on `BitWidth` and `dim`, but I feel this is clearer + for (U valBit = dim, coordBit = U(1), shift = dim; valBit < BitWidth; valBit += U(D), coordBit <<= 1, shift += U(D) - 1) + { + val |= (unsignedCartesian[dim] & coordBit) << shift; + } } - this_t retVal = {val}; + + this_t retVal; + retVal.value = val; return retVal; } + // CPP can also have a constructor + #ifndef __HLSL_VERSION + + /** + * @brief Creates a Morton code from a set of cartesian coordinates + * + * @param [in] cartesian Coordinates to encode + */ + code(NBL_CONST_REF_ARG(vector) cartesian) + { + *this = create(cartesian); + } + + /** + * @brief Decodes this Morton code back to a set of cartesian coordinates + */ + explicit operator vector() const noexcept + { + // Definition below, we override `impl::static_cast_helper` to have this conversion in both CPP/HLSL + return _static_cast, this_t>(*this); + } + + #endif + + // ------------------------------------------------------- BITWISE OPERATORS ------------------------------------------------- + + NBL_CONSTEXPR_INLINE_FUNC this_t operator&(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC + { + this_t retVal; + retVal.value = value & rhs.value; + return retVal; + } + + NBL_CONSTEXPR_INLINE_FUNC this_t operator|(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC + { + this_t retVal; + retVal.value = value | rhs.value; + return retVal; + } + + NBL_CONSTEXPR_INLINE_FUNC this_t operator^(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC + { + this_t retVal; + retVal.value = value ^ rhs.value; + return retVal; + } + + NBL_CONSTEXPR_INLINE_FUNC this_t operator~() NBL_CONST_MEMBER_FUNC + { + this_t retVal; + retVal.value = ~value; + return retVal; + } + + // Only valid in CPP + #ifndef __HLSL_VERSION + + NBL_CONSTEXPR_INLINE_FUNC this_t operator<<(uint16_t bits) NBL_CONST_MEMBER_FUNC + { + this_t retVal; + retVal.value = value << bits; + return retVal; + } + + NBL_CONSTEXPR_INLINE_FUNC this_t operator>>(uint16_t bits) NBL_CONST_MEMBER_FUNC + { + this_t retVal; + retVal.value = value >> bits; + return retVal; + } + + #endif + + // ------------------------------------------------------- UNARY ARITHMETIC OPERATORS ------------------------------------------------- + + NBL_CONSTEXPR_INLINE_FUNC this_t operator-() NBL_CONST_MEMBER_FUNC + { + this_t allOnes; + // allOnes encodes a cartesian coordinate with all values set to 1 + allOnes.value = (U(1) << D) - U(1); + // Using 2's complement property that arithmetic negation can be obtained by bitwise negation then adding 1 + return operator~() + allOnes; + } + + // ------------------------------------------------------- BINARY ARITHMETIC OPERATORS ------------------------------------------------- + + + //operator+, operator-, operator>>, operator<<, and other bitwise ops U value; @@ -116,6 +219,44 @@ struct code #undef NBL_HLSL_MORTON_MASKS } //namespace morton + +namespace impl +{ + +template +struct static_cast_helper, morton::code > +{ + NBL_CONSTEXPR_STATIC_INLINE_FUNC vector cast(NBL_CONST_REF_ARG(morton::code) val) + { + using U = typename morton::code::U; + NBL_CONSTEXPR_STATIC U BitWidth = morton::code::BitWidth; + // Converting back has an issue with bit-width: when encoding (if template parameter `I` is signed) we cut off the highest bits + // that actually indicated sign. Therefore what we do is set the highest bits instead of the lowest then do an arithmetic right shift + // at the end to preserve sign. + // To this end, we first notice that the coordinate/dimension of index `dim` gets + // `bits(dim) = ceil((BitWidth - dim)/D)` bits when encoded (so the first dimensions get more bits than the last ones if `D` does not + // divide `BitWidth perfectly`). + // Then instead of unpacking all the bits for that coordinate as the lowest bits, we unpack them as the highest ones + // by shifting everything `BitWidth - bits(dim)` bits to the left, then at the end do a final *arithmetic* bitshift right by the same amount. + + vector cartesian; + for (U dim = 0; dim < U(D); dim++) + { + const U bitsDim = (BitWidth - dim + U(D) - 1) / U(D); // <- this computes the ceil + U coordVal = U(0); + // Control can be simplified by running a bound on just coordBit based on `BitWidth` and `dim`, but I feel this is clearer + for (U valBit = dim, coordBit = U(1) << dim, shift = dim; valBit < BitWidth; valBit += U(D), coordBit <<= U(D), shift += U(D) - 1) + { + coordVal |= (val.value & coordBit) << (BitWidth - bitsDim - shift); + } + cartesian[dim] = (bit_cast(coordVal) >> (BitWidth - bitsDim)); + } + return cartesian; + } +}; + +} // namespace impl + } //namespace hlsl } //namespace nbl From 625639031599374d44e8f8a6a79570471f0f4a9c Mon Sep 17 00:00:00 2001 From: Fletterio Date: Wed, 26 Mar 2025 14:53:42 -0300 Subject: [PATCH 008/157] Add safe copile-time vector truncation and some function specifiers for both cpp and hlsl --- include/nbl/builtin/hlsl/cpp_compat.hlsl | 3 + include/nbl/builtin/hlsl/cpp_compat/basic.h | 66 +++++++++---------- .../hlsl/cpp_compat/impl/vector_impl.hlsl | 35 ++++++++++ .../nbl/builtin/hlsl/cpp_compat/vector.hlsl | 30 --------- include/nbl/builtin/hlsl/math/morton.hlsl | 34 ---------- src/nbl/builtin/CMakeLists.txt | 1 + 6 files changed, 72 insertions(+), 97 deletions(-) create mode 100644 include/nbl/builtin/hlsl/cpp_compat/impl/vector_impl.hlsl diff --git a/include/nbl/builtin/hlsl/cpp_compat.hlsl b/include/nbl/builtin/hlsl/cpp_compat.hlsl index 175a3e76c1..cb06447aa1 100644 --- a/include/nbl/builtin/hlsl/cpp_compat.hlsl +++ b/include/nbl/builtin/hlsl/cpp_compat.hlsl @@ -6,4 +6,7 @@ #include #include +// Had to push some stuff here to avoid circular dependencies +#include + #endif \ No newline at end of file diff --git a/include/nbl/builtin/hlsl/cpp_compat/basic.h b/include/nbl/builtin/hlsl/cpp_compat/basic.h index a93727815b..41e920e41e 100644 --- a/include/nbl/builtin/hlsl/cpp_compat/basic.h +++ b/include/nbl/builtin/hlsl/cpp_compat/basic.h @@ -2,35 +2,7 @@ #define _NBL_BUILTIN_HLSL_CPP_COMPAT_BASIC_INCLUDED_ #include - -namespace nbl -{ -namespace hlsl -{ -namespace impl -{ -template -struct static_cast_helper -{ - static inline To cast(From u) - { -#ifndef __HLSL_VERSION - return static_cast(u); -#else - return To(u); -#endif - } -}; -} - -template -inline To _static_cast(From v) -{ - return impl::static_cast_helper::cast(v); -} - -} -} +#include #ifndef __HLSL_VERSION #include @@ -43,8 +15,7 @@ inline To _static_cast(From v) #define NBL_CONSTEXPR_STATIC_FUNC constexpr static #define NBL_CONSTEXPR_INLINE_FUNC constexpr inline #define NBL_CONSTEXPR_STATIC_INLINE_FUNC constexpr static inline -#define NBL_CONSTEXPR_FORCED_INLINE_FUNC NBL_FORCE_INLINE NBL_CONSTEXPR_FUNC -#define NBL_CONSTEXPR_STATIC_FORCED_INLINE_FUNC NBL_FORCE_INLINE NBL_CONSTEXPR_STATIC +#define NBL_CONSTEXPR_FORCED_INLINE_FUNC NBL_FORCE_INLINE constexpr #define NBL_CONST_MEMBER_FUNC const namespace nbl::hlsl @@ -68,6 +39,7 @@ namespace nbl::hlsl #else + #define ARROW .arrow(). #define NBL_CONSTEXPR const static // TODO: rename to NBL_CONSTEXPR_VAR #define NBL_CONSTEXPR_FUNC @@ -77,8 +49,7 @@ namespace nbl::hlsl #define NBL_CONSTEXPR_INLINE_FUNC inline #define NBL_CONSTEXPR_STATIC_INLINE_FUNC static inline #define NBL_CONSTEXPR_FORCED_INLINE_FUNC inline -#define NBL_CONSTEXPR_STATIC_FORCED_INLINE_FUNC NBL_CONSTEXPR_STATIC_INLINE_FUNC -#define NBL_CONST_MEMBER_FUNC +#define NBL_CONST_MEMBER_FUNC namespace nbl { @@ -106,4 +77,33 @@ struct add_pointer #endif +namespace nbl +{ +namespace hlsl +{ +namespace impl +{ +template +struct static_cast_helper +{ + NBL_CONSTEXPR_STATIC_INLINE_FUNC To cast(From u) + { +#ifndef __HLSL_VERSION + return static_cast(u); +#else + return To(u); +#endif + } +}; +} + +template +NBL_CONSTEXPR_INLINE_FUNC To _static_cast(From v) +{ +return impl::static_cast_helper::cast(v); +} + +} +} + #endif diff --git a/include/nbl/builtin/hlsl/cpp_compat/impl/vector_impl.hlsl b/include/nbl/builtin/hlsl/cpp_compat/impl/vector_impl.hlsl new file mode 100644 index 0000000000..524d1fa45e --- /dev/null +++ b/include/nbl/builtin/hlsl/cpp_compat/impl/vector_impl.hlsl @@ -0,0 +1,35 @@ +#ifndef _NBL_BUILTIN_HLSL_CPP_COMPAT_IMPL_VECTOR_IMPL_INCLUDED_ +#define _NBL_BUILTIN_HLSL_CPP_COMPAT_IMPL_VECTOR_IMPL_INCLUDED_ + +#include +#include +#include + +// To prevent implicit truncation warnings +namespace nbl +{ +namespace hlsl +{ +namespace impl +{ + +template NBL_PARTIAL_REQ_TOP(N <= M) +struct static_cast_helper, vector NBL_PARTIAL_REQ_BOT(N <= M) > +{ + NBL_CONSTEXPR_STATIC_INLINE_FUNC vector cast(NBL_CONST_REF_ARG(vector) val) + { + vector retVal; + [[unroll]] + for (uint16_t i = 0; i < N; i++) + { + retVal[i] = val[i]; + } + return retVal; + } +}; + +} +} +} + +#endif \ No newline at end of file diff --git a/include/nbl/builtin/hlsl/cpp_compat/vector.hlsl b/include/nbl/builtin/hlsl/cpp_compat/vector.hlsl index f6ced52db1..354937427a 100644 --- a/include/nbl/builtin/hlsl/cpp_compat/vector.hlsl +++ b/include/nbl/builtin/hlsl/cpp_compat/vector.hlsl @@ -1,8 +1,6 @@ #ifndef _NBL_BUILTIN_HLSL_CPP_COMPAT_VECTOR_INCLUDED_ #define _NBL_BUILTIN_HLSL_CPP_COMPAT_VECTOR_INCLUDED_ -#include "nbl/builtin/hlsl/cpp_compat/basic.h" - // stuff for C++ #ifndef __HLSL_VERSION #include @@ -94,32 +92,4 @@ struct blake3_hasher::update_impl,Dummy> } #endif } - -// To prevent implicit truncation warnings -namespace nbl -{ -namespace hlsl -{ -namespace impl -{ - -template -struct static_cast_helper, vector > -{ - NBL_CONSTEXPR_STATIC_INLINE_FUNC vector cast(NBL_CONST_REF_ARG(vector) val) - { - vector retVal; - [[unroll]] - for (uint16_t i = 0; i < N; i++) - { - retVal[i] = val[i]; - } - return retVal; - } -}; - -} -} -} - #endif \ No newline at end of file diff --git a/include/nbl/builtin/hlsl/math/morton.hlsl b/include/nbl/builtin/hlsl/math/morton.hlsl index ecd94ce69e..50cf78caae 100644 --- a/include/nbl/builtin/hlsl/math/morton.hlsl +++ b/include/nbl/builtin/hlsl/math/morton.hlsl @@ -32,49 +32,15 @@ struct decode_mask : integral_constant::value template NBL_CONSTEXPR T decode_mask_v = decode_mask::value; -#ifndef __HLSL_VERSION - -template -struct decode_masks_array -{ - static consteval vector generateMasks() - { - vector masks; - for (auto i = 0u; i < Dim; i++) - { - masks[i] = decode_mask_v << T(i); - } - return masks; - } - - NBL_CONSTEXPR_STATIC_INLINE vector Masks = generateMasks(); -}; - -template -NBL_CONSTEXPR vector decode_masks = decode_masks_array::Masks; - -#endif - } //namespace impl -// HLSL only supports up to D = 4, and even then having this in a more generic manner is blocked by a DXC issue targeting SPIR-V -#ifndef __HLSL_VERSION - -#define NBL_HLSL_MORTON_MASKS(U, D) impl::decode_masks< U , D > - -#else - // Up to D = 4 supported -// This will throw a DXC warning about the vector being truncated - no way around that -// The only way to avoid this atm (until they fix issue 7006 below) is to wrap the whole class in a macro and expand it for each possible value of `D` #define NBL_HLSL_MORTON_MASKS(U, D) _static_cast > (vector< U , 4 >(impl::decode_mask_v< U , D >,\ impl::decode_mask_v< U , D > << U (1),\ impl::decode_mask_v< U , D > << U (2),\ impl::decode_mask_v< U , D > << U (3)\ )) -#endif - // Making this even slightly less ugly is blocked by https://github.com/microsoft/DirectXShaderCompiler/issues/7006 // In particular, `Masks` should be a `const static` member field instead of appearing in every method using it template && impl::MortonDimension) diff --git a/src/nbl/builtin/CMakeLists.txt b/src/nbl/builtin/CMakeLists.txt index 2e68d1fdf7..fa548e210a 100644 --- a/src/nbl/builtin/CMakeLists.txt +++ b/src/nbl/builtin/CMakeLists.txt @@ -248,6 +248,7 @@ LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/cpp_compat/matrix.hlsl") LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/cpp_compat/promote.hlsl") LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/cpp_compat/vector.hlsl") LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/cpp_compat/impl/intrinsics_impl.hlsl") +LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/cpp_compat/impl/vector_impl.hlsl") #glsl compat LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/glsl_compat/core.hlsl") LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/glsl_compat/subgroup_arithmetic.hlsl") From 246cefc422e8ef7b36cd22c90a1f695d643c3b45 Mon Sep 17 00:00:00 2001 From: Fletterio Date: Thu, 27 Mar 2025 18:44:44 -0300 Subject: [PATCH 009/157] Morton class done! --- include/nbl/builtin/hlsl/math/morton.hlsl | 241 +++++++++++++++++++--- 1 file changed, 215 insertions(+), 26 deletions(-) diff --git a/include/nbl/builtin/hlsl/math/morton.hlsl b/include/nbl/builtin/hlsl/math/morton.hlsl index 50cf78caae..dfe53c3446 100644 --- a/include/nbl/builtin/hlsl/math/morton.hlsl +++ b/include/nbl/builtin/hlsl/math/morton.hlsl @@ -68,18 +68,17 @@ struct code */ NBL_CONSTEXPR_STATIC_FUNC this_t create(NBL_CONST_REF_ARG(vector) cartesian) { - NBL_CONSTEXPR_STATIC vector Masks = NBL_HLSL_MORTON_MASKS(U, D); const vector unsignedCartesian = bit_cast, vector >(cartesian); U val = U(0); [[unroll]] - for (U dim = 0; dim < U(D); dim++) + for (U coord = 0; coord < U(D); coord++) { [[unroll]] - // Control can be simplified by running a bound on just coordBit based on `BitWidth` and `dim`, but I feel this is clearer - for (U valBit = dim, coordBit = U(1), shift = dim; valBit < BitWidth; valBit += U(D), coordBit <<= 1, shift += U(D) - 1) + // Control can be simplified by running a bound on just coordBit based on `BitWidth` and `coord`, but I feel this is clearer + for (U valBitIdx = coord, coordBit = U(1), shift = coord; valBitIdx < BitWidth; valBitIdx += U(D), coordBit <<= 1, shift += U(D) - 1) { - val |= (unsignedCartesian[dim] & coordBit) << shift; + val |= (unsignedCartesian[coord] & coordBit) << shift; } } @@ -112,6 +111,68 @@ struct code #endif + // --------------------------------------------------------- AUX METHODS ------------------------------------------------------------------- + + /** + * @brief Extracts a single coordinate + * + * @param [in] coord The coordinate to extract + */ + NBL_CONSTEXPR_INLINE_FUNC I getCoordinate(uint16_t coord) NBL_CONST_MEMBER_FUNC + { + // Converting back has an issue with bit-width: when encoding (if template parameter `I` is signed) we cut off the highest bits + // that actually indicated sign. Therefore what we do is set the highest bits instead of the lowest then do an arithmetic right shift + // at the end to preserve sign. + // To this end, we first notice that the coordinate of index `coord` gets + // `bits(coord) = ceil((BitWidth - coord)/D)` bits when encoded (so the first dimensions get more bits than the last ones if `D` does not + // divide `BitWidth perfectly`). + // Then instead of unpacking all the bits for that coordinate as the lowest bits, we unpack them as the highest ones + // by shifting everything `BitWidth - bits(coord)` bits to the left, then at the end do a final *arithmetic* bitshift right by the same amount. + + const U bitsCoord = BitWidth / U(D) + ((coord < BitWidth % D) ? U(1) : U(0)); // <- this computes the ceil + U coordVal = U(0); + // Control can be simplified by running a bound on just coordBit based on `BitWidth` and `coord`, but I feel this is clearer + [[unroll]] + for (U valBitIdx = U(coord), coordBit = U(1) << U(coord), shift = U(coord); valBitIdx < BitWidth; valBitIdx += U(D), coordBit <<= U(D), shift += U(D) - 1) + { + coordVal |= (value & coordBit) << (BitWidth - bitsCoord - shift); + } + return bit_cast(coordVal) >> (BitWidth - bitsCoord); + } + + /** + * @brief Returns an element of type U with the highest bit of the number encoded in `coord` set to its right value, and all other bits set to 0 + * + * @param [in] coord The coordinate whose highest bit we want to get + */ + /* + NBL_CONSTEXPR_INLINE_FUNC U extractHighestBit(uint16_t coord) NBL_CONST_MEMBER_FUNC + { + // Like above, if the number encoded in `coord` gets `bits(coord) = ceil((BitWidth - coord)/D)` bits for representation, then the highest index of these + // bits is `bits(coord) - 1` + const U coordHighestBitIdx = BitWidth / U(D) - ((U(coord) < BitWidth % U(D)) ? U(0) : U(1)); + // This is the index of that bit as an index in the encoded value + const U shift = coordHighestBitIdx * U(D) + U(coord); + return value & (U(1) << shift); + } + */ + + /** + * @brief Returns an element of type U by `or`ing this with rhs and extracting only the highest bit. Useful to know if either coord + * (for each value) has its highest bit set to 1. + * + * @param [in] coord The coordinate whose highest bit we want to get + */ + NBL_CONSTEXPR_INLINE_FUNC U logicalOrHighestBits(NBL_CONST_REF_ARG(this_t) rhs, uint16_t coord) NBL_CONST_MEMBER_FUNC + { + // Like above, if the number encoded in `coord` gets `bits(coord) = ceil((BitWidth - coord)/D)` bits for representation, then the highest index of these + // bits is `bits(coord) - 1` + const U coordHighestBitIdx = BitWidth / U(D) - ((U(coord) < BitWidth % U(D)) ? U(0) : U(1)); + // This is the index of that bit as an index in the encoded value + const U shift = coordHighestBitIdx * U(D) + U(coord); + return (value | rhs.value) & (U(1) << shift); + } + // ------------------------------------------------------- BITWISE OPERATORS ------------------------------------------------- NBL_CONSTEXPR_INLINE_FUNC this_t operator&(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC @@ -174,9 +235,153 @@ struct code // ------------------------------------------------------- BINARY ARITHMETIC OPERATORS ------------------------------------------------- + NBL_CONSTEXPR_INLINE_FUNC this_t operator+(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC + { + NBL_CONSTEXPR_STATIC vector Masks = NBL_HLSL_MORTON_MASKS(U, D); + this_t retVal; + [[unroll]] + for (uint16_t coord = 0; coord < D; coord++) + { + // put 1 bits everywhere in the bits the current axis is not using + // then extract just the axis bits for the right hand coordinate + // carry-1 will propagate the bits across the already set bits + // then clear out the bits not belonging to current axis + // Note: Its possible to clear on `this` and fill on `rhs` but that will + // disable optimizations, we expect the compiler to optimize a lot if the + // value of `rhs` is known at compile time, e.g. `static_cast>(glm::ivec3(1,0,0))` + retVal.value |= ((value | (~Masks[coord])) + (rhs.value & Masks[coord])) & Masks[coord]; + } + return retVal; + } + + NBL_CONSTEXPR_INLINE_FUNC this_t operator-(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC + { + NBL_CONSTEXPR_STATIC vector Masks = NBL_HLSL_MORTON_MASKS(U, D); + this_t retVal; + [[unroll]] + for (uint16_t coord = 0; coord < D; coord++) + { + // This is the dual trick of the one used for addition: set all other bits to 0 so borrows propagate + retVal.value |= ((value & Masks[coord]) - (rhs.value & Masks[coord])) & Masks[coord]; + } + return retVal; + } + + // ------------------------------------------------------- COMPARISON OPERATORS ------------------------------------------------- + NBL_CONSTEXPR_INLINE_FUNC bool operator!() NBL_CONST_MEMBER_FUNC + { + return value.operator!(); + } - //operator+, operator-, operator>>, operator<<, and other bitwise ops + NBL_CONSTEXPR_INLINE_FUNC bool coordEquals(NBL_CONST_REF_ARG(this_t) rhs, uint16_t coord) NBL_CONST_MEMBER_FUNC + { + NBL_CONSTEXPR_STATIC vector Masks = NBL_HLSL_MORTON_MASKS(U, D); + return (value & Masks[coord]) == (rhs.value & Masks[coord]); + } + + NBL_CONSTEXPR_INLINE_FUNC vector operator==(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC + { + vector retVal; + [[unroll]] + for (uint16_t coord = 0; coord < D; coord++) + retVal[coord] = coordEquals(rhs, coord); + return retVal; + } + + NBL_CONSTEXPR_INLINE_FUNC bool allEqual(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC + { + return value == rhs.value; + } + + NBL_CONSTEXPR_INLINE_FUNC bool coordNotEquals(NBL_CONST_REF_ARG(this_t) rhs, uint16_t coord) NBL_CONST_MEMBER_FUNC + { + return !coordEquals(rhs, coord); + } + + NBL_CONSTEXPR_INLINE_FUNC vector operator!=(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC + { + vector retVal; + [[unroll]] + for (uint16_t coord = 0; coord < D; coord++) + retVal[coord] = coordNotEquals(rhs, coord); + return retVal; + } + + NBL_CONSTEXPR_INLINE_FUNC bool notAllEqual(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC + { + return ! allEqual(rhs); + } + + + + template + NBL_CONSTEXPR_INLINE_FUNC bool coordOrderCompare(NBL_CONST_REF_ARG(this_t) rhs, uint16_t coord) NBL_CONST_MEMBER_FUNC + { + NBL_CONSTEXPR_STATIC vector Masks = NBL_HLSL_MORTON_MASKS(U, D); + Comparison comparison; + OppositeComparison oppositeComparison; + + // When unsigned, bit representation is the same but with 0s inbetween bits. In particular, we can still use unsigned comparison + #ifndef __HLSL_VERSION + if constexpr (is_unsigned_v) + #else + if (is_unsigned_v) + #endif + { + return comparison(value & Masks[coord], rhs.value & Masks[coord]); + } + // When signed, since the representation is unsigned, we need to divide behaviour based on highest bit + else + { + // I will give an example for the case of `Comparison` being `functional::less`, but other cases are similar + // If both are negative (both bits set to 1) then `x < y` iff `z > w` when `z,w` are the bit representations of `x,y` as unsigned + // If this is nonnegative and rhs is negative, it should return false. Since in this case `highestBit = 0` and `rhsHighestBit = 1` this + // is the same as doing `z > w` again + // If this is negative and rhs is nonnegative, it should return true. But in this case we have `highestBit = 1` and `rhsHighestBit = 0` + // so again we can just return `z > w`. + // All three cases end up in the same expression. + if (logicalOrHighestBits(rhs, coord)) + return oppositeComparison(value & Masks[coord], rhs.value & Masks[coord]); + // If neither of them have their highest bit set, both are nonnegative. Therefore, we can return the unsigned comparison + else + return comparison(value & Masks[coord], rhs.value & Masks[coord]); + } + } + + NBL_CONSTEXPR_INLINE_FUNC bool coordLessThan(NBL_CONST_REF_ARG(this_t) rhs, uint16_t coord) NBL_CONST_MEMBER_FUNC + { + return coordOrderCompare, greater >(rhs, coord); + } + + NBL_CONSTEXPR_INLINE_FUNC bool coordLessThanEquals(NBL_CONST_REF_ARG(this_t) rhs, uint16_t coord) NBL_CONST_MEMBER_FUNC + { + return coordOrderCompare, greater_equal >(rhs, coord); + } + + NBL_CONSTEXPR_INLINE_FUNC bool coordGreaterThan(NBL_CONST_REF_ARG(this_t) rhs, uint16_t coord) NBL_CONST_MEMBER_FUNC + { + return coordOrderCompare, less >(rhs, coord); + } + + NBL_CONSTEXPR_INLINE_FUNC bool coordGreaterThanEquals(NBL_CONST_REF_ARG(this_t) rhs, uint16_t coord) NBL_CONST_MEMBER_FUNC + { + return coordOrderCompare, less_equal >(rhs, coord); + } + + #define DEFINE_OPERATOR(OP, COMPARISON) NBL_CONSTEXPR_INLINE_FUNC vector operator##OP##(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC \ + { \ + vector retVal; \ + [[unroll]] \ + for (uint16_t coord = 0; coord < D; coord++) \ + retVal[coord] = COMPARISON (rhs, coord); \ + return retVal; \ + } + + DEFINE_OPERATOR(< , coordLessThan); + DEFINE_OPERATOR(<= , coordLessThanEquals); + DEFINE_OPERATOR(> , coordGreaterThan); + DEFINE_OPERATOR(>= , coordGreaterThanEquals); U value; }; @@ -186,6 +391,7 @@ struct code } //namespace morton +// Still in nbl::hlsl we can go to nbl::hlsl::impl and specialize the `static_cast_helper` namespace impl { @@ -194,28 +400,11 @@ struct static_cast_helper, morton::code > { NBL_CONSTEXPR_STATIC_INLINE_FUNC vector cast(NBL_CONST_REF_ARG(morton::code) val) { - using U = typename morton::code::U; - NBL_CONSTEXPR_STATIC U BitWidth = morton::code::BitWidth; - // Converting back has an issue with bit-width: when encoding (if template parameter `I` is signed) we cut off the highest bits - // that actually indicated sign. Therefore what we do is set the highest bits instead of the lowest then do an arithmetic right shift - // at the end to preserve sign. - // To this end, we first notice that the coordinate/dimension of index `dim` gets - // `bits(dim) = ceil((BitWidth - dim)/D)` bits when encoded (so the first dimensions get more bits than the last ones if `D` does not - // divide `BitWidth perfectly`). - // Then instead of unpacking all the bits for that coordinate as the lowest bits, we unpack them as the highest ones - // by shifting everything `BitWidth - bits(dim)` bits to the left, then at the end do a final *arithmetic* bitshift right by the same amount. - vector cartesian; - for (U dim = 0; dim < U(D); dim++) + [[unroll]] + for (uint16_t coord = 0; coord < D; coord++) { - const U bitsDim = (BitWidth - dim + U(D) - 1) / U(D); // <- this computes the ceil - U coordVal = U(0); - // Control can be simplified by running a bound on just coordBit based on `BitWidth` and `dim`, but I feel this is clearer - for (U valBit = dim, coordBit = U(1) << dim, shift = dim; valBit < BitWidth; valBit += U(D), coordBit <<= U(D), shift += U(D) - 1) - { - coordVal |= (val.value & coordBit) << (BitWidth - bitsDim - shift); - } - cartesian[dim] = (bit_cast(coordVal) >> (BitWidth - bitsDim)); + cartesian[coord] = val.getCoordinate(coord); } return cartesian; } From 1c7f7911e416c8ec42ba3055b9da9a9da900d23f Mon Sep 17 00:00:00 2001 From: Fletterio Date: Thu, 27 Mar 2025 18:48:35 -0300 Subject: [PATCH 010/157] Remove some leftover commented code --- include/nbl/builtin/hlsl/math/morton.hlsl | 17 ----------------- 1 file changed, 17 deletions(-) diff --git a/include/nbl/builtin/hlsl/math/morton.hlsl b/include/nbl/builtin/hlsl/math/morton.hlsl index dfe53c3446..153ec08bf0 100644 --- a/include/nbl/builtin/hlsl/math/morton.hlsl +++ b/include/nbl/builtin/hlsl/math/morton.hlsl @@ -140,23 +140,6 @@ struct code return bit_cast(coordVal) >> (BitWidth - bitsCoord); } - /** - * @brief Returns an element of type U with the highest bit of the number encoded in `coord` set to its right value, and all other bits set to 0 - * - * @param [in] coord The coordinate whose highest bit we want to get - */ - /* - NBL_CONSTEXPR_INLINE_FUNC U extractHighestBit(uint16_t coord) NBL_CONST_MEMBER_FUNC - { - // Like above, if the number encoded in `coord` gets `bits(coord) = ceil((BitWidth - coord)/D)` bits for representation, then the highest index of these - // bits is `bits(coord) - 1` - const U coordHighestBitIdx = BitWidth / U(D) - ((U(coord) < BitWidth % U(D)) ? U(0) : U(1)); - // This is the index of that bit as an index in the encoded value - const U shift = coordHighestBitIdx * U(D) + U(coord); - return value & (U(1) << shift); - } - */ - /** * @brief Returns an element of type U by `or`ing this with rhs and extracting only the highest bit. Useful to know if either coord * (for each value) has its highest bit set to 1. From 508879948064ff01c05a9e1f2166d2261c17697f Mon Sep 17 00:00:00 2001 From: Fletterio Date: Thu, 27 Mar 2025 18:56:57 -0300 Subject: [PATCH 011/157] Remove leaking macro --- include/nbl/builtin/hlsl/math/morton.hlsl | 2 ++ 1 file changed, 2 insertions(+) diff --git a/include/nbl/builtin/hlsl/math/morton.hlsl b/include/nbl/builtin/hlsl/math/morton.hlsl index 153ec08bf0..4dc05738b6 100644 --- a/include/nbl/builtin/hlsl/math/morton.hlsl +++ b/include/nbl/builtin/hlsl/math/morton.hlsl @@ -366,6 +366,8 @@ struct code DEFINE_OPERATOR(> , coordGreaterThan); DEFINE_OPERATOR(>= , coordGreaterThanEquals); + #undef DEFINE_OPERATOR + U value; }; From e25a35cce8f0554baf98173f9cc1d1dd93629042 Mon Sep 17 00:00:00 2001 From: Fletterio Date: Fri, 28 Mar 2025 20:16:00 -0300 Subject: [PATCH 012/157] Bugfixes with arithmetic --- include/nbl/builtin/hlsl/math/morton.hlsl | 108 +++++++++++++--------- 1 file changed, 63 insertions(+), 45 deletions(-) diff --git a/include/nbl/builtin/hlsl/math/morton.hlsl b/include/nbl/builtin/hlsl/math/morton.hlsl index 4dc05738b6..89d1a99749 100644 --- a/include/nbl/builtin/hlsl/math/morton.hlsl +++ b/include/nbl/builtin/hlsl/math/morton.hlsl @@ -57,7 +57,7 @@ struct code code() = default; // To immediately get compound operators and functional structs in CPP side - code(const I _value) : value(bit_cast(_value)){} + code(const U _value) : value(_value) {} #endif @@ -69,7 +69,7 @@ struct code NBL_CONSTEXPR_STATIC_FUNC this_t create(NBL_CONST_REF_ARG(vector) cartesian) { const vector unsignedCartesian = bit_cast, vector >(cartesian); - U val = U(0); + this_t retVal = { U(0) }; [[unroll]] for (U coord = 0; coord < U(D); coord++) @@ -78,12 +78,10 @@ struct code // Control can be simplified by running a bound on just coordBit based on `BitWidth` and `coord`, but I feel this is clearer for (U valBitIdx = coord, coordBit = U(1), shift = coord; valBitIdx < BitWidth; valBitIdx += U(D), coordBit <<= 1, shift += U(D) - 1) { - val |= (unsignedCartesian[coord] & coordBit) << shift; + retVal.value |= (unsignedCartesian[coord] & coordBit) << shift; } } - this_t retVal; - retVal.value = val; return retVal; } @@ -141,48 +139,43 @@ struct code } /** - * @brief Returns an element of type U by `or`ing this with rhs and extracting only the highest bit. Useful to know if either coord - * (for each value) has its highest bit set to 1. + * @brief Returns an element of type U by extracting only the highest bit (of the bits used to encode `coord`) * - * @param [in] coord The coordinate whose highest bit we want to get + * @param [in] coord The coordinate whose highest bit we want to extract. */ - NBL_CONSTEXPR_INLINE_FUNC U logicalOrHighestBits(NBL_CONST_REF_ARG(this_t) rhs, uint16_t coord) NBL_CONST_MEMBER_FUNC + NBL_CONSTEXPR_INLINE_FUNC U extractHighestBit(uint16_t coord) NBL_CONST_MEMBER_FUNC { // Like above, if the number encoded in `coord` gets `bits(coord) = ceil((BitWidth - coord)/D)` bits for representation, then the highest index of these // bits is `bits(coord) - 1` const U coordHighestBitIdx = BitWidth / U(D) - ((U(coord) < BitWidth % U(D)) ? U(0) : U(1)); // This is the index of that bit as an index in the encoded value const U shift = coordHighestBitIdx * U(D) + U(coord); - return (value | rhs.value) & (U(1) << shift); + return value & (U(1) << shift); } // ------------------------------------------------------- BITWISE OPERATORS ------------------------------------------------- NBL_CONSTEXPR_INLINE_FUNC this_t operator&(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC { - this_t retVal; - retVal.value = value & rhs.value; + this_t retVal = { value & rhs.value }; return retVal; } NBL_CONSTEXPR_INLINE_FUNC this_t operator|(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC { - this_t retVal; - retVal.value = value | rhs.value; + this_t retVal = { value | rhs.value }; return retVal; } NBL_CONSTEXPR_INLINE_FUNC this_t operator^(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC { - this_t retVal; - retVal.value = value ^ rhs.value; + this_t retVal = { value ^ rhs.value }; return retVal; } NBL_CONSTEXPR_INLINE_FUNC this_t operator~() NBL_CONST_MEMBER_FUNC { - this_t retVal; - retVal.value = ~value; + this_t retVal = { ~value }; return retVal; } @@ -191,15 +184,13 @@ struct code NBL_CONSTEXPR_INLINE_FUNC this_t operator<<(uint16_t bits) NBL_CONST_MEMBER_FUNC { - this_t retVal; - retVal.value = value << bits; + this_t retVal = { value << U(bits) }; return retVal; } NBL_CONSTEXPR_INLINE_FUNC this_t operator>>(uint16_t bits) NBL_CONST_MEMBER_FUNC { - this_t retVal; - retVal.value = value >> bits; + this_t retVal = { value >> U(bits) }; return retVal; } @@ -209,19 +200,20 @@ struct code NBL_CONSTEXPR_INLINE_FUNC this_t operator-() NBL_CONST_MEMBER_FUNC { - this_t allOnes; // allOnes encodes a cartesian coordinate with all values set to 1 - allOnes.value = (U(1) << D) - U(1); + const static this_t allOnes = { (U(1) << D) - U(1) }; // Using 2's complement property that arithmetic negation can be obtained by bitwise negation then adding 1 return operator~() + allOnes; } // ------------------------------------------------------- BINARY ARITHMETIC OPERATORS ------------------------------------------------- + // CHANGED FOR DEBUG: REMEMBER TO CHANGE BACK + NBL_CONSTEXPR_INLINE_FUNC this_t operator+(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC { NBL_CONSTEXPR_STATIC vector Masks = NBL_HLSL_MORTON_MASKS(U, D); - this_t retVal; + this_t retVal = { U(0) }; [[unroll]] for (uint16_t coord = 0; coord < D; coord++) { @@ -240,7 +232,7 @@ struct code NBL_CONSTEXPR_INLINE_FUNC this_t operator-(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC { NBL_CONSTEXPR_STATIC vector Masks = NBL_HLSL_MORTON_MASKS(U, D); - this_t retVal; + this_t retVal = { U(0) }; [[unroll]] for (uint16_t coord = 0; coord < D; coord++) { @@ -293,17 +285,15 @@ struct code NBL_CONSTEXPR_INLINE_FUNC bool notAllEqual(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC { - return ! allEqual(rhs); + return !allEqual(rhs); } - - - template + template NBL_CONSTEXPR_INLINE_FUNC bool coordOrderCompare(NBL_CONST_REF_ARG(this_t) rhs, uint16_t coord) NBL_CONST_MEMBER_FUNC { NBL_CONSTEXPR_STATIC vector Masks = NBL_HLSL_MORTON_MASKS(U, D); Comparison comparison; - OppositeComparison oppositeComparison; + OnSignMismatch onSignMismatch; // When unsigned, bit representation is the same but with 0s inbetween bits. In particular, we can still use unsigned comparison #ifndef __HLSL_VERSION @@ -317,39 +307,67 @@ struct code // When signed, since the representation is unsigned, we need to divide behaviour based on highest bit else { - // I will give an example for the case of `Comparison` being `functional::less`, but other cases are similar - // If both are negative (both bits set to 1) then `x < y` iff `z > w` when `z,w` are the bit representations of `x,y` as unsigned - // If this is nonnegative and rhs is negative, it should return false. Since in this case `highestBit = 0` and `rhsHighestBit = 1` this - // is the same as doing `z > w` again - // If this is negative and rhs is nonnegative, it should return true. But in this case we have `highestBit = 1` and `rhsHighestBit = 0` - // so again we can just return `z > w`. - // All three cases end up in the same expression. - if (logicalOrHighestBits(rhs, coord)) - return oppositeComparison(value & Masks[coord], rhs.value & Masks[coord]); - // If neither of them have their highest bit set, both are nonnegative. Therefore, we can return the unsigned comparison + // I will give an example for `operator<` but the same reasoning holds for all others. Some abuse of notation but hopefully it's clear. + + // If `this[coord] >= 0` and `rhs[coord] < 0` then `this[coord] < rhs[coord]` returns false. Notice that in this case, the highest bit of + // `value` (of the bits representing the number encoded in `coord`) is `0`, while the highest bit for rhs is `1`. + // Similarly, if `this[coord] < 0` and `rhs[coord] >= 0` then `this[coord] < rhs[coord]` returns true, and the highest bit situation is inverted. + // This means that if the signs of `this[coord]` and `rhs[coord]` are not equal, the result depends on the sign of `this[coord]`. + // What that result should be is controlled by `OnSignMismatch`. + // Finally, notice that if only one of those bits is set to 1, then the `xor` of that highest bit yields 1 as well + const U highestBit = extractHighestBit(coord); + const U rhsHighestBit = rhs.extractHighestBit(coord); + if (highestBit ^ rhsHighestBit) + return onSignMismatch(highestBit); + // If both are nonnegative, then we can just use the comparison as it comes. + // If both are negative, it just so happens that applying the same operator to their unsigned bitcasted representations yields the same result. + // For `operator<`, for example, consider two negative numbers. Starting from the MSB (we know it's `1` for both in this case) and moving to the right, + // consider what happens when we encounter the first bit where they mismatch: the one with a `0` at position `k` (by position I mean counted from the + // left, starting at 0) is adding at most `2^k - 1` in the lowest bits, while the one with a `1` is adding exactly `2^k`. This means that the one + // with a 0 is "more negative". else return comparison(value & Masks[coord], rhs.value & Masks[coord]); } } + + struct OnSignMismatchLessThan + { + // On a sign mismatch, `thisrhs` is true if this is non-negative (`highestBit` set to `0`) and false otherwise + // Therefore since it takes a number with only the highest bit set we only have to return the opposite of whether there is in fact a bit set + bool operator()(U highestBit) + { + return !bool(highestBit); + } + }; NBL_CONSTEXPR_INLINE_FUNC bool coordLessThan(NBL_CONST_REF_ARG(this_t) rhs, uint16_t coord) NBL_CONST_MEMBER_FUNC { - return coordOrderCompare, greater >(rhs, coord); + return coordOrderCompare, OnSignMismatchLessThan>(rhs, coord); } NBL_CONSTEXPR_INLINE_FUNC bool coordLessThanEquals(NBL_CONST_REF_ARG(this_t) rhs, uint16_t coord) NBL_CONST_MEMBER_FUNC { - return coordOrderCompare, greater_equal >(rhs, coord); + return coordOrderCompare, OnSignMismatchLessThan>(rhs, coord); } NBL_CONSTEXPR_INLINE_FUNC bool coordGreaterThan(NBL_CONST_REF_ARG(this_t) rhs, uint16_t coord) NBL_CONST_MEMBER_FUNC { - return coordOrderCompare, less >(rhs, coord); + return coordOrderCompare, OnSignMismatchGreaterThan>(rhs, coord); } NBL_CONSTEXPR_INLINE_FUNC bool coordGreaterThanEquals(NBL_CONST_REF_ARG(this_t) rhs, uint16_t coord) NBL_CONST_MEMBER_FUNC { - return coordOrderCompare, less_equal >(rhs, coord); + return coordOrderCompare, OnSignMismatchGreaterThan>(rhs, coord); } #define DEFINE_OPERATOR(OP, COMPARISON) NBL_CONSTEXPR_INLINE_FUNC vector operator##OP##(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC \ From 0d9dd4afa6190dd029cf0e8e311ec132a818ec4a Mon Sep 17 00:00:00 2001 From: Fletterio Date: Tue, 1 Apr 2025 15:25:38 -0300 Subject: [PATCH 013/157] Checkpoint, have to check why vector compat isn't working --- include/nbl/builtin/hlsl/cpp_compat/basic.h | 6 +- .../hlsl/cpp_compat/impl/intrinsics_impl.hlsl | 34 ++ .../builtin/hlsl/cpp_compat/intrinsics.hlsl | 13 + .../nbl/builtin/hlsl/emulated/uint64_t.hlsl | 153 +++++++ include/nbl/builtin/hlsl/functional.hlsl | 34 +- include/nbl/builtin/hlsl/math/morton.hlsl | 423 ------------------ include/nbl/builtin/hlsl/morton.hlsl | 72 +++ .../builtin/hlsl/spirv_intrinsics/core.hlsl | 3 +- src/nbl/builtin/CMakeLists.txt | 4 +- 9 files changed, 311 insertions(+), 431 deletions(-) create mode 100644 include/nbl/builtin/hlsl/emulated/uint64_t.hlsl delete mode 100644 include/nbl/builtin/hlsl/math/morton.hlsl create mode 100644 include/nbl/builtin/hlsl/morton.hlsl diff --git a/include/nbl/builtin/hlsl/cpp_compat/basic.h b/include/nbl/builtin/hlsl/cpp_compat/basic.h index 41e920e41e..77d9d887bd 100644 --- a/include/nbl/builtin/hlsl/cpp_compat/basic.h +++ b/include/nbl/builtin/hlsl/cpp_compat/basic.h @@ -17,6 +17,7 @@ #define NBL_CONSTEXPR_STATIC_INLINE_FUNC constexpr static inline #define NBL_CONSTEXPR_FORCED_INLINE_FUNC NBL_FORCE_INLINE constexpr #define NBL_CONST_MEMBER_FUNC const +#define NBL_IF_CONSTEXPR(...) if constexpr (__VA_ARGS__) namespace nbl::hlsl { @@ -49,7 +50,8 @@ namespace nbl::hlsl #define NBL_CONSTEXPR_INLINE_FUNC inline #define NBL_CONSTEXPR_STATIC_INLINE_FUNC static inline #define NBL_CONSTEXPR_FORCED_INLINE_FUNC inline -#define NBL_CONST_MEMBER_FUNC +#define NBL_CONST_MEMBER_FUNC +#define NBL_IF_CONSTEXPR(...) if (__VA_ARGS__) namespace nbl { @@ -100,7 +102,7 @@ struct static_cast_helper template NBL_CONSTEXPR_INLINE_FUNC To _static_cast(From v) { -return impl::static_cast_helper::cast(v); + return impl::static_cast_helper::cast(v); } } diff --git a/include/nbl/builtin/hlsl/cpp_compat/impl/intrinsics_impl.hlsl b/include/nbl/builtin/hlsl/cpp_compat/impl/intrinsics_impl.hlsl index 1d43d9b14a..7b8726566f 100644 --- a/include/nbl/builtin/hlsl/cpp_compat/impl/intrinsics_impl.hlsl +++ b/include/nbl/builtin/hlsl/cpp_compat/impl/intrinsics_impl.hlsl @@ -103,6 +103,10 @@ template struct nMax_helper; template struct nClamp_helper; +template +struct addCarry_helper; +template +struct subBorrow_helper; #ifdef __HLSL_VERSION // HLSL only specializations @@ -162,6 +166,9 @@ template AUTO_SPECIALIZE_TRIVIAL_CASE_HELPER(refract_hel template AUTO_SPECIALIZE_TRIVIAL_CASE_HELPER(nMax_helper, nMax, (T), (T)(T), T) template AUTO_SPECIALIZE_TRIVIAL_CASE_HELPER(nMin_helper, nMin, (T), (T)(T), T) template AUTO_SPECIALIZE_TRIVIAL_CASE_HELPER(nClamp_helper, nClamp, (T), (T)(T), T) +// Can use trivial case and not worry about restricting `T` with a concept since `spirv::AddCarryOutput / SubBorrowOutput` already take care of that +template AUTO_SPECIALIZE_TRIVIAL_CASE_HELPER(addCarry_helper, addCarry, (T), (T)(T), spirv::AddCarryOutput) +template AUTO_SPECIALIZE_TRIVIAL_CASE_HELPER(subBorrow_helper, subBorrow, (T), (T)(T), spirv::SubBorrowOutput) #define BITCOUNT_HELPER_RETRUN_TYPE conditional_t, vector::Dimension>, int32_t> template AUTO_SPECIALIZE_TRIVIAL_CASE_HELPER(bitCount_helper, bitCount, (T), (T), BITCOUNT_HELPER_RETRUN_TYPE) @@ -599,6 +606,33 @@ struct nClamp_helper } }; +// Once again no need to restrict the two below with concepts for same reason as HLSL version +template +struct addCarry_helper +{ + using return_t = spirv::AddCarryOutput; + NBL_CONSTEXPR_STATIC_INLINE_FUNC return_t __call(const T operand1, const T operand2) + { + return_t retVal; + retVal.result = operand1 + operand2; + retVal.carry = retVal.result < operand1 ? T(1) : T(0); + return retVal; + } +}; + +template +struct subBorrow_helper +{ + using return_t = spirv::SubBorrowOutput; + NBL_CONSTEXPR_STATIC_INLINE_FUNC return_t __call(const T operand1, const T operand2) + { + return_t retVal; + retVal.result = static_cast(operand1 - operand2); + retVal.borrow = operand1 >= operand2 ? T(0) : T(1); + return retVal; + } +}; + #endif // C++ only specializations // C++ and HLSL specializations diff --git a/include/nbl/builtin/hlsl/cpp_compat/intrinsics.hlsl b/include/nbl/builtin/hlsl/cpp_compat/intrinsics.hlsl index b695c4b82b..1f1957dbbd 100644 --- a/include/nbl/builtin/hlsl/cpp_compat/intrinsics.hlsl +++ b/include/nbl/builtin/hlsl/cpp_compat/intrinsics.hlsl @@ -217,6 +217,19 @@ inline T refract(NBL_CONST_REF_ARG(T) I, NBL_CONST_REF_ARG(T) N, NBL_CONST_REF_A return cpp_compat_intrinsics_impl::refract_helper::__call(I, N, eta); } +template +NBL_CONSTEXPR_INLINE_FUNC spirv::AddCarryOutput addCarry(NBL_CONST_REF_ARG(T) operand1, NBL_CONST_REF_ARG(T) operand2) +{ + return cpp_compat_intrinsics_impl::addCarry_helper::__call(operand1, operand2); +} + +template +NBL_CONSTEXPR_INLINE_FUNC spirv::SubBorrowOutput subBorrow(NBL_CONST_REF_ARG(T) operand1, NBL_CONST_REF_ARG(T) operand2) +{ + return cpp_compat_intrinsics_impl::subBorrow_helper::__call(operand1, operand2); +} + + #ifdef __HLSL_VERSION #define NAMESPACE spirv #else diff --git a/include/nbl/builtin/hlsl/emulated/uint64_t.hlsl b/include/nbl/builtin/hlsl/emulated/uint64_t.hlsl new file mode 100644 index 0000000000..3178159794 --- /dev/null +++ b/include/nbl/builtin/hlsl/emulated/uint64_t.hlsl @@ -0,0 +1,153 @@ +#ifndef _NBL_BUILTIN_HLSL_EMULATED_UINT64_T_HLSL_INCLUDED_ +#define _NBL_BUILTIN_HLSL_EMULATED_UINT64_T_HLSL_INCLUDED_ + +#include "nbl/builtin/hlsl/cpp_compat.hlsl" +#include "nbl/builtin/hlsl/functional.hlsl" + +namespace nbl +{ +namespace hlsl +{ + +struct emulated_uint64_t +{ + using storage_t = vector; + using this_t = emulated_uint64_t; + + storage_t data; + + // ---------------------------------------------------- CONSTRUCTORS --------------------------------------------------------------- + + + #ifndef __HLSL_VERSION + + emulated_uint64_t() = default; + + // To immediately get compound operators and functional structs in CPP side + explicit emulated_uint64_t(const storage_t _data) : data(_data) {} + + #endif + + /** + * @brief Creates an `emulated_uint64_t` from a vector of two `uint32_t`s representing its bitpattern + * + * @param [in] _data Vector of `uint32_t` encoding the `uint64_t` being emulated + */ + NBL_CONSTEXPR_STATIC_FUNC this_t create(NBL_CONST_REF_ARG(storage_t) _data) + { + this_t retVal; + retVal.data = _data; + return retVal; + } + + // ------------------------------------------------------- BITWISE OPERATORS ------------------------------------------------- + + NBL_CONSTEXPR_INLINE_FUNC this_t operator&(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC + { + this_t retVal = create(data & rhs.data); + return retVal; + } + + NBL_CONSTEXPR_INLINE_FUNC this_t operator|(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC + { + this_t retVal = create(data | rhs.data); + return retVal; + } + + NBL_CONSTEXPR_INLINE_FUNC this_t operator^(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC + { + this_t retVal = create(data ^ rhs.data); + return retVal; + } + + NBL_CONSTEXPR_INLINE_FUNC this_t operator~() NBL_CONST_MEMBER_FUNC + { + this_t retVal = create(~data); + return retVal; + } + + // Only valid in CPP + #ifndef __HLSL_VERSION + + constexpr inline this_t operator<<(uint16_t bits) const; + + constexpr inline this_t operator>>(uint16_t bits) const; + + #endif + + // ------------------------------------------------------- ARITHMETIC OPERATORS ------------------------------------------------- + + NBL_CONSTEXPR_INLINE_FUNC this_t operator+(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC + { + const spirv::AddCarryOutput lowerAddResult = addCarry(data.y, rhs.data.y); + const storage_t addResult = { data.x + rhs.data.x + lowerAddResult.carry, lowerAddResult.result }; + const this_t retVal = create(addResult); + return retVal; + } + + NBL_CONSTEXPR_INLINE_FUNC this_t operator-(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC + { + const spirv::SubBorrowOutput lowerSubResult = subBorrow(data.y, rhs.data.y); + const storage_t subResult = { data.x - rhs.data.x - lowerSubResult.borrow, lowerSubResult.result }; + const this_t retVal = create(subResult); + return retVal; + } + +}; + +template<> +struct left_shift_operator +{ + using type_t = emulated_uint64_t; + NBL_CONSTEXPR_STATIC uint32_t ComponentBitWidth = uint32_t(8 * sizeof(uint32_t)); + + NBL_CONSTEXPR_INLINE_FUNC type_t operator()(NBL_CONST_REF_ARG(type_t) operand, uint16_t bits) + { + const uint32_t _bits = uint32_t(bits); + const uint32_t shift = ComponentBitWidth - _bits; + const uint32_t higherBitsMask = ~uint32_t(0) << shift; + // We need the `x` component of the vector (which represents the higher bits of the emulated uint64) to get the `bits` higher bits of the `y` component + const vector retValData = { (operand.data.x << _bits) | ((operand.data.y & higherBitsMask) >> shift), operand.data.y << _bits }; + return emulated_uint64_t::create(retValData); + } +}; + +template<> +struct arithmetic_right_shift_operator +{ + using type_t = emulated_uint64_t; + NBL_CONSTEXPR_STATIC uint32_t ComponentBitWidth = uint32_t(8 * sizeof(uint32_t)); + + NBL_CONSTEXPR_INLINE_FUNC type_t operator()(NBL_CONST_REF_ARG(type_t) operand, uint16_t bits) + { + const uint32_t _bits = uint32_t(bits); + const uint32_t shift = ComponentBitWidth - _bits; + const uint32_t lowerBitsMask = ~uint32_t(0) >> shift; + // We need the `y` component of the vector (which represents the lower bits of the emulated uint64) to get the `bits` lower bits of the `x` component + const vector retValData = { operand.data.x >> _bits, ((operand.data.x & lowerBitsMask) << shift) | (operand.data.y >> _bits) }; + return emulated_uint64_t::create(retValData); + } +}; + +#ifndef __HLSL_VERSION + +constexpr inline emulated_uint64_t emulated_uint64_t::operator<<(uint16_t bits) const +{ + left_shift_operator leftShift; + return leftShift(*this, bits); +} + +constexpr inline emulated_uint64_t emulated_uint64_t::operator>>(uint16_t bits) const +{ + arithmetic_right_shift_operator rightShift; + return rightShift(*this, bits); +} + +#endif + +} //namespace nbl +} //namespace hlsl + + + +#endif diff --git a/include/nbl/builtin/hlsl/functional.hlsl b/include/nbl/builtin/hlsl/functional.hlsl index 25d822a940..3cf24193a4 100644 --- a/include/nbl/builtin/hlsl/functional.hlsl +++ b/include/nbl/builtin/hlsl/functional.hlsl @@ -165,7 +165,7 @@ COMPOUND_ASSIGN(divides) // ----------------- End of compound assignment ops ---------------- -// Min, Max and Ternary Operator don't use ALIAS_STD because they don't exist in STD +// Min, Max, and Ternary and Shift operators don't use ALIAS_STD because they don't exist in STD // TODO: implement as mix(rhs struct minimum @@ -200,13 +200,39 @@ struct ternary_operator { using type_t = T; - T operator()(bool condition, NBL_CONST_REF_ARG(T) lhs, NBL_CONST_REF_ARG(T) rhs) + NBL_CONSTEXPR_INLINE_FUNC T operator()(bool condition, NBL_CONST_REF_ARG(T) lhs, NBL_CONST_REF_ARG(T) rhs) { return condition ? lhs : rhs; } }; -} -} +template +struct left_shift_operator +{ + using type_t = T; + + NBL_CONSTEXPR_INLINE_FUNC T operator()(NBL_CONST_REF_ARG(T) operand, NBL_CONST_REF_ARG(T) bits) + { + return operand << bits; + } +}; + +template +struct arithmetic_right_shift_operator +{ + using type_t = T; + + NBL_CONSTEXPR_INLINE_FUNC T operator()(NBL_CONST_REF_ARG(T) operand, NBL_CONST_REF_ARG(T) bits) + { + return operand >> bits; + } +}; + +// Declare template, but left unimplemented by default +template +struct logical_right_shift_operator; + +} //namespace nbl +} //namespace hlsl #endif \ No newline at end of file diff --git a/include/nbl/builtin/hlsl/math/morton.hlsl b/include/nbl/builtin/hlsl/math/morton.hlsl deleted file mode 100644 index 89d1a99749..0000000000 --- a/include/nbl/builtin/hlsl/math/morton.hlsl +++ /dev/null @@ -1,423 +0,0 @@ -#ifndef _NBL_BUILTIN_HLSL_MATH_MORTON_INCLUDED_ -#define _NBL_BUILTIN_HLSL_MATH_MORTON_INCLUDED_ - -#include "nbl/builtin/hlsl/concepts/core.hlsl" -#include "nbl/builtin/hlsl/bit.hlsl" -#include "nbl/builtin/hlsl/cpp_compat.hlsl" -#include "nbl/builtin/hlsl/functional.hlsl" - -namespace nbl -{ -namespace hlsl -{ -namespace morton -{ - -namespace impl -{ - -// Valid dimension for a morton code -template -NBL_BOOL_CONCEPT MortonDimension = 1 < D && D < 5; - -template -struct decode_mask; - -template -struct decode_mask : integral_constant {}; - -template -struct decode_mask : integral_constant::value << Dim) | T(1)> {}; - -template -NBL_CONSTEXPR T decode_mask_v = decode_mask::value; - -} //namespace impl - -// Up to D = 4 supported -#define NBL_HLSL_MORTON_MASKS(U, D) _static_cast > (vector< U , 4 >(impl::decode_mask_v< U , D >,\ - impl::decode_mask_v< U , D > << U (1),\ - impl::decode_mask_v< U , D > << U (2),\ - impl::decode_mask_v< U , D > << U (3)\ - )) - -// Making this even slightly less ugly is blocked by https://github.com/microsoft/DirectXShaderCompiler/issues/7006 -// In particular, `Masks` should be a `const static` member field instead of appearing in every method using it -template && impl::MortonDimension) -struct code -{ - using this_t = code; - using U = make_unsigned_t; - NBL_CONSTEXPR_STATIC U BitWidth = U(8 * sizeof(U)); - - // ---------------------------------------------------- CONSTRUCTORS --------------------------------------------------------------- - - #ifndef __HLSL_VERSION - - code() = default; - - // To immediately get compound operators and functional structs in CPP side - code(const U _value) : value(_value) {} - - #endif - - /** - * @brief Creates a Morton code from a set of cartesian coordinates - * - * @param [in] cartesian Coordinates to encode - */ - NBL_CONSTEXPR_STATIC_FUNC this_t create(NBL_CONST_REF_ARG(vector) cartesian) - { - const vector unsignedCartesian = bit_cast, vector >(cartesian); - this_t retVal = { U(0) }; - - [[unroll]] - for (U coord = 0; coord < U(D); coord++) - { - [[unroll]] - // Control can be simplified by running a bound on just coordBit based on `BitWidth` and `coord`, but I feel this is clearer - for (U valBitIdx = coord, coordBit = U(1), shift = coord; valBitIdx < BitWidth; valBitIdx += U(D), coordBit <<= 1, shift += U(D) - 1) - { - retVal.value |= (unsignedCartesian[coord] & coordBit) << shift; - } - } - - return retVal; - } - - // CPP can also have a constructor - #ifndef __HLSL_VERSION - - /** - * @brief Creates a Morton code from a set of cartesian coordinates - * - * @param [in] cartesian Coordinates to encode - */ - code(NBL_CONST_REF_ARG(vector) cartesian) - { - *this = create(cartesian); - } - - /** - * @brief Decodes this Morton code back to a set of cartesian coordinates - */ - explicit operator vector() const noexcept - { - // Definition below, we override `impl::static_cast_helper` to have this conversion in both CPP/HLSL - return _static_cast, this_t>(*this); - } - - #endif - - // --------------------------------------------------------- AUX METHODS ------------------------------------------------------------------- - - /** - * @brief Extracts a single coordinate - * - * @param [in] coord The coordinate to extract - */ - NBL_CONSTEXPR_INLINE_FUNC I getCoordinate(uint16_t coord) NBL_CONST_MEMBER_FUNC - { - // Converting back has an issue with bit-width: when encoding (if template parameter `I` is signed) we cut off the highest bits - // that actually indicated sign. Therefore what we do is set the highest bits instead of the lowest then do an arithmetic right shift - // at the end to preserve sign. - // To this end, we first notice that the coordinate of index `coord` gets - // `bits(coord) = ceil((BitWidth - coord)/D)` bits when encoded (so the first dimensions get more bits than the last ones if `D` does not - // divide `BitWidth perfectly`). - // Then instead of unpacking all the bits for that coordinate as the lowest bits, we unpack them as the highest ones - // by shifting everything `BitWidth - bits(coord)` bits to the left, then at the end do a final *arithmetic* bitshift right by the same amount. - - const U bitsCoord = BitWidth / U(D) + ((coord < BitWidth % D) ? U(1) : U(0)); // <- this computes the ceil - U coordVal = U(0); - // Control can be simplified by running a bound on just coordBit based on `BitWidth` and `coord`, but I feel this is clearer - [[unroll]] - for (U valBitIdx = U(coord), coordBit = U(1) << U(coord), shift = U(coord); valBitIdx < BitWidth; valBitIdx += U(D), coordBit <<= U(D), shift += U(D) - 1) - { - coordVal |= (value & coordBit) << (BitWidth - bitsCoord - shift); - } - return bit_cast(coordVal) >> (BitWidth - bitsCoord); - } - - /** - * @brief Returns an element of type U by extracting only the highest bit (of the bits used to encode `coord`) - * - * @param [in] coord The coordinate whose highest bit we want to extract. - */ - NBL_CONSTEXPR_INLINE_FUNC U extractHighestBit(uint16_t coord) NBL_CONST_MEMBER_FUNC - { - // Like above, if the number encoded in `coord` gets `bits(coord) = ceil((BitWidth - coord)/D)` bits for representation, then the highest index of these - // bits is `bits(coord) - 1` - const U coordHighestBitIdx = BitWidth / U(D) - ((U(coord) < BitWidth % U(D)) ? U(0) : U(1)); - // This is the index of that bit as an index in the encoded value - const U shift = coordHighestBitIdx * U(D) + U(coord); - return value & (U(1) << shift); - } - - // ------------------------------------------------------- BITWISE OPERATORS ------------------------------------------------- - - NBL_CONSTEXPR_INLINE_FUNC this_t operator&(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC - { - this_t retVal = { value & rhs.value }; - return retVal; - } - - NBL_CONSTEXPR_INLINE_FUNC this_t operator|(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC - { - this_t retVal = { value | rhs.value }; - return retVal; - } - - NBL_CONSTEXPR_INLINE_FUNC this_t operator^(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC - { - this_t retVal = { value ^ rhs.value }; - return retVal; - } - - NBL_CONSTEXPR_INLINE_FUNC this_t operator~() NBL_CONST_MEMBER_FUNC - { - this_t retVal = { ~value }; - return retVal; - } - - // Only valid in CPP - #ifndef __HLSL_VERSION - - NBL_CONSTEXPR_INLINE_FUNC this_t operator<<(uint16_t bits) NBL_CONST_MEMBER_FUNC - { - this_t retVal = { value << U(bits) }; - return retVal; - } - - NBL_CONSTEXPR_INLINE_FUNC this_t operator>>(uint16_t bits) NBL_CONST_MEMBER_FUNC - { - this_t retVal = { value >> U(bits) }; - return retVal; - } - - #endif - - // ------------------------------------------------------- UNARY ARITHMETIC OPERATORS ------------------------------------------------- - - NBL_CONSTEXPR_INLINE_FUNC this_t operator-() NBL_CONST_MEMBER_FUNC - { - // allOnes encodes a cartesian coordinate with all values set to 1 - const static this_t allOnes = { (U(1) << D) - U(1) }; - // Using 2's complement property that arithmetic negation can be obtained by bitwise negation then adding 1 - return operator~() + allOnes; - } - - // ------------------------------------------------------- BINARY ARITHMETIC OPERATORS ------------------------------------------------- - - // CHANGED FOR DEBUG: REMEMBER TO CHANGE BACK - - NBL_CONSTEXPR_INLINE_FUNC this_t operator+(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC - { - NBL_CONSTEXPR_STATIC vector Masks = NBL_HLSL_MORTON_MASKS(U, D); - this_t retVal = { U(0) }; - [[unroll]] - for (uint16_t coord = 0; coord < D; coord++) - { - // put 1 bits everywhere in the bits the current axis is not using - // then extract just the axis bits for the right hand coordinate - // carry-1 will propagate the bits across the already set bits - // then clear out the bits not belonging to current axis - // Note: Its possible to clear on `this` and fill on `rhs` but that will - // disable optimizations, we expect the compiler to optimize a lot if the - // value of `rhs` is known at compile time, e.g. `static_cast>(glm::ivec3(1,0,0))` - retVal.value |= ((value | (~Masks[coord])) + (rhs.value & Masks[coord])) & Masks[coord]; - } - return retVal; - } - - NBL_CONSTEXPR_INLINE_FUNC this_t operator-(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC - { - NBL_CONSTEXPR_STATIC vector Masks = NBL_HLSL_MORTON_MASKS(U, D); - this_t retVal = { U(0) }; - [[unroll]] - for (uint16_t coord = 0; coord < D; coord++) - { - // This is the dual trick of the one used for addition: set all other bits to 0 so borrows propagate - retVal.value |= ((value & Masks[coord]) - (rhs.value & Masks[coord])) & Masks[coord]; - } - return retVal; - } - - // ------------------------------------------------------- COMPARISON OPERATORS ------------------------------------------------- - - NBL_CONSTEXPR_INLINE_FUNC bool operator!() NBL_CONST_MEMBER_FUNC - { - return value.operator!(); - } - - NBL_CONSTEXPR_INLINE_FUNC bool coordEquals(NBL_CONST_REF_ARG(this_t) rhs, uint16_t coord) NBL_CONST_MEMBER_FUNC - { - NBL_CONSTEXPR_STATIC vector Masks = NBL_HLSL_MORTON_MASKS(U, D); - return (value & Masks[coord]) == (rhs.value & Masks[coord]); - } - - NBL_CONSTEXPR_INLINE_FUNC vector operator==(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC - { - vector retVal; - [[unroll]] - for (uint16_t coord = 0; coord < D; coord++) - retVal[coord] = coordEquals(rhs, coord); - return retVal; - } - - NBL_CONSTEXPR_INLINE_FUNC bool allEqual(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC - { - return value == rhs.value; - } - - NBL_CONSTEXPR_INLINE_FUNC bool coordNotEquals(NBL_CONST_REF_ARG(this_t) rhs, uint16_t coord) NBL_CONST_MEMBER_FUNC - { - return !coordEquals(rhs, coord); - } - - NBL_CONSTEXPR_INLINE_FUNC vector operator!=(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC - { - vector retVal; - [[unroll]] - for (uint16_t coord = 0; coord < D; coord++) - retVal[coord] = coordNotEquals(rhs, coord); - return retVal; - } - - NBL_CONSTEXPR_INLINE_FUNC bool notAllEqual(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC - { - return !allEqual(rhs); - } - - template - NBL_CONSTEXPR_INLINE_FUNC bool coordOrderCompare(NBL_CONST_REF_ARG(this_t) rhs, uint16_t coord) NBL_CONST_MEMBER_FUNC - { - NBL_CONSTEXPR_STATIC vector Masks = NBL_HLSL_MORTON_MASKS(U, D); - Comparison comparison; - OnSignMismatch onSignMismatch; - - // When unsigned, bit representation is the same but with 0s inbetween bits. In particular, we can still use unsigned comparison - #ifndef __HLSL_VERSION - if constexpr (is_unsigned_v) - #else - if (is_unsigned_v) - #endif - { - return comparison(value & Masks[coord], rhs.value & Masks[coord]); - } - // When signed, since the representation is unsigned, we need to divide behaviour based on highest bit - else - { - // I will give an example for `operator<` but the same reasoning holds for all others. Some abuse of notation but hopefully it's clear. - - // If `this[coord] >= 0` and `rhs[coord] < 0` then `this[coord] < rhs[coord]` returns false. Notice that in this case, the highest bit of - // `value` (of the bits representing the number encoded in `coord`) is `0`, while the highest bit for rhs is `1`. - // Similarly, if `this[coord] < 0` and `rhs[coord] >= 0` then `this[coord] < rhs[coord]` returns true, and the highest bit situation is inverted. - // This means that if the signs of `this[coord]` and `rhs[coord]` are not equal, the result depends on the sign of `this[coord]`. - // What that result should be is controlled by `OnSignMismatch`. - // Finally, notice that if only one of those bits is set to 1, then the `xor` of that highest bit yields 1 as well - const U highestBit = extractHighestBit(coord); - const U rhsHighestBit = rhs.extractHighestBit(coord); - if (highestBit ^ rhsHighestBit) - return onSignMismatch(highestBit); - // If both are nonnegative, then we can just use the comparison as it comes. - // If both are negative, it just so happens that applying the same operator to their unsigned bitcasted representations yields the same result. - // For `operator<`, for example, consider two negative numbers. Starting from the MSB (we know it's `1` for both in this case) and moving to the right, - // consider what happens when we encounter the first bit where they mismatch: the one with a `0` at position `k` (by position I mean counted from the - // left, starting at 0) is adding at most `2^k - 1` in the lowest bits, while the one with a `1` is adding exactly `2^k`. This means that the one - // with a 0 is "more negative". - else - return comparison(value & Masks[coord], rhs.value & Masks[coord]); - } - } - - struct OnSignMismatchLessThan - { - // On a sign mismatch, `thisrhs` is true if this is non-negative (`highestBit` set to `0`) and false otherwise - // Therefore since it takes a number with only the highest bit set we only have to return the opposite of whether there is in fact a bit set - bool operator()(U highestBit) - { - return !bool(highestBit); - } - }; - - NBL_CONSTEXPR_INLINE_FUNC bool coordLessThan(NBL_CONST_REF_ARG(this_t) rhs, uint16_t coord) NBL_CONST_MEMBER_FUNC - { - return coordOrderCompare, OnSignMismatchLessThan>(rhs, coord); - } - - NBL_CONSTEXPR_INLINE_FUNC bool coordLessThanEquals(NBL_CONST_REF_ARG(this_t) rhs, uint16_t coord) NBL_CONST_MEMBER_FUNC - { - return coordOrderCompare, OnSignMismatchLessThan>(rhs, coord); - } - - NBL_CONSTEXPR_INLINE_FUNC bool coordGreaterThan(NBL_CONST_REF_ARG(this_t) rhs, uint16_t coord) NBL_CONST_MEMBER_FUNC - { - return coordOrderCompare, OnSignMismatchGreaterThan>(rhs, coord); - } - - NBL_CONSTEXPR_INLINE_FUNC bool coordGreaterThanEquals(NBL_CONST_REF_ARG(this_t) rhs, uint16_t coord) NBL_CONST_MEMBER_FUNC - { - return coordOrderCompare, OnSignMismatchGreaterThan>(rhs, coord); - } - - #define DEFINE_OPERATOR(OP, COMPARISON) NBL_CONSTEXPR_INLINE_FUNC vector operator##OP##(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC \ - { \ - vector retVal; \ - [[unroll]] \ - for (uint16_t coord = 0; coord < D; coord++) \ - retVal[coord] = COMPARISON (rhs, coord); \ - return retVal; \ - } - - DEFINE_OPERATOR(< , coordLessThan); - DEFINE_OPERATOR(<= , coordLessThanEquals); - DEFINE_OPERATOR(> , coordGreaterThan); - DEFINE_OPERATOR(>= , coordGreaterThanEquals); - - #undef DEFINE_OPERATOR - - U value; -}; - -// Don't forget to delete this macro after usage -#undef NBL_HLSL_MORTON_MASKS - -} //namespace morton - -// Still in nbl::hlsl we can go to nbl::hlsl::impl and specialize the `static_cast_helper` -namespace impl -{ - -template -struct static_cast_helper, morton::code > -{ - NBL_CONSTEXPR_STATIC_INLINE_FUNC vector cast(NBL_CONST_REF_ARG(morton::code) val) - { - vector cartesian; - [[unroll]] - for (uint16_t coord = 0; coord < D; coord++) - { - cartesian[coord] = val.getCoordinate(coord); - } - return cartesian; - } -}; - -} // namespace impl - -} //namespace hlsl -} //namespace nbl - - - -#endif \ No newline at end of file diff --git a/include/nbl/builtin/hlsl/morton.hlsl b/include/nbl/builtin/hlsl/morton.hlsl new file mode 100644 index 0000000000..89eddf8675 --- /dev/null +++ b/include/nbl/builtin/hlsl/morton.hlsl @@ -0,0 +1,72 @@ +#ifndef _NBL_BUILTIN_HLSL_MATH_MORTON_INCLUDED_ +#define _NBL_BUILTIN_HLSL_MATH_MORTON_INCLUDED_ + +#include "nbl/builtin/hlsl/cpp_compat.hlsl" +#include "nbl/builtin/hlsl/concepts/core.hlsl" +#include "nbl/builtin/hlsl/bit.hlsl" +#include "nbl/builtin/hlsl/functional.hlsl" +#include "nbl/builtin/hlsl/emulated/uint64_t.hlsl" + +namespace nbl +{ +namespace hlsl +{ +namespace morton +{ + +namespace impl +{ + +// Valid dimension for a morton code +template +NBL_BOOL_CONCEPT MortonDimension = 1 < D && D < 5; + +// Masks + +template +struct decode_mask; + +template +struct decode_mask : integral_constant {}; + +template +struct decode_mask : integral_constant::value << Dim) | T(1)> {}; + +template +NBL_CONSTEXPR T decode_mask_v = decode_mask::value; + +// Decode masks are different for each dimension + +template +struct MortonDecoder; + +} //namespace impl + +// Up to D = 4 supported +#define NBL_HLSL_MORTON_MASKS(U, D) _static_cast > (vector< U , 4 >(impl::decode_mask_v< U , D >,\ + impl::decode_mask_v< U , D > << U (1),\ + impl::decode_mask_v< U , D > << U (2),\ + impl::decode_mask_v< U , D > << U (3)\ + )) + +// Making this even slightly less ugly is blocked by https://github.com/microsoft/DirectXShaderCompiler/issues/7006 +// In particular, `Masks` should be a `const static` member field instead of appearing in every method using it +template && D * Bits <= 64) +struct code +{ + using this_t = code; + NBL_CONSTEXPR_STATIC uint16_t TotalBitWidth = D * Bits; + using storage_t = conditional_t<(TotalBitWidth>16), conditional_t<(TotalBitWidth>32), _uint64_t, uint32_t>, uint16_t> ; + + + storage_t value; +}; + +// Don't forget to delete this macro after usage +#undef NBL_HLSL_MORTON_MASKS + +} //namespace morton +} //namespace hlsl +} //namespace nbl + +#endif \ No newline at end of file diff --git a/include/nbl/builtin/hlsl/spirv_intrinsics/core.hlsl b/include/nbl/builtin/hlsl/spirv_intrinsics/core.hlsl index d351cab07d..d8d90de726 100644 --- a/include/nbl/builtin/hlsl/spirv_intrinsics/core.hlsl +++ b/include/nbl/builtin/hlsl/spirv_intrinsics/core.hlsl @@ -4,13 +4,14 @@ #ifndef _NBL_BUILTIN_HLSL_SPIRV_INTRINSICS_CORE_INCLUDED_ #define _NBL_BUILTIN_HLSL_SPIRV_INTRINSICS_CORE_INCLUDED_ +#include + #ifdef __HLSL_VERSION // TODO: AnastZIuk fix public search paths so we don't choke #include "spirv/unified1/spirv.hpp" #include #include #include -#include namespace nbl { diff --git a/src/nbl/builtin/CMakeLists.txt b/src/nbl/builtin/CMakeLists.txt index fa548e210a..a11a26d69a 100644 --- a/src/nbl/builtin/CMakeLists.txt +++ b/src/nbl/builtin/CMakeLists.txt @@ -214,6 +214,7 @@ LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/macros.h") # emulated LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/emulated/float64_t.hlsl") LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/emulated/float64_t_impl.hlsl") +LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/emulated/uint64_t.hlsl") LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/emulated/vector_t.hlsl") LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/emulated/matrix_t.hlsl") # portable @@ -291,7 +292,6 @@ LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/math/linalg/fast_affine.hlsl" LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/math/functions.hlsl") LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/math/geometry.hlsl") LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/math/intutil.hlsl") -LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/math/morton.hlsl") LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/math/equations/quadratic.hlsl") LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/math/equations/cubic.hlsl") LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/math/equations/quartic.hlsl") @@ -368,5 +368,7 @@ LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/tgmath/output_structs.hlsl") #blur LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/prefix_sum_blur/blur.hlsl") LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/prefix_sum_blur/box_sampler.hlsl") +#morton codes +LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/morton.hlsl") ADD_CUSTOM_BUILTIN_RESOURCES(nblBuiltinResourceData NBL_RESOURCES_TO_EMBED "${NBL_ROOT_PATH}/include" "nbl/builtin" "nbl::builtin" "${NBL_ROOT_PATH_BINARY}/include" "${NBL_ROOT_PATH_BINARY}/src" "STATIC" "INTERNAL") From 89d2bf2a5d9fab347850babe31fdc8f0a95c64f6 Mon Sep 17 00:00:00 2001 From: Fletterio Date: Wed, 2 Apr 2025 16:19:20 -0300 Subject: [PATCH 014/157] Refactor morton class, get new conversion running --- .../hlsl/cpp_compat/impl/intrinsics_impl.hlsl | 8 +- .../nbl/builtin/hlsl/emulated/uint64_t.hlsl | 11 ++ include/nbl/builtin/hlsl/morton.hlsl | 175 +++++++++++++++++- 3 files changed, 186 insertions(+), 8 deletions(-) diff --git a/include/nbl/builtin/hlsl/cpp_compat/impl/intrinsics_impl.hlsl b/include/nbl/builtin/hlsl/cpp_compat/impl/intrinsics_impl.hlsl index 7b8726566f..92fc9e929b 100644 --- a/include/nbl/builtin/hlsl/cpp_compat/impl/intrinsics_impl.hlsl +++ b/include/nbl/builtin/hlsl/cpp_compat/impl/intrinsics_impl.hlsl @@ -611,11 +611,11 @@ template struct addCarry_helper { using return_t = spirv::AddCarryOutput; - NBL_CONSTEXPR_STATIC_INLINE_FUNC return_t __call(const T operand1, const T operand2) + constexpr static inline return_t __call(const T operand1, const T operand2) { return_t retVal; retVal.result = operand1 + operand2; - retVal.carry = retVal.result < operand1 ? T(1) : T(0); + retVal.carry = T(retVal.result < operand1); return retVal; } }; @@ -624,11 +624,11 @@ template struct subBorrow_helper { using return_t = spirv::SubBorrowOutput; - NBL_CONSTEXPR_STATIC_INLINE_FUNC return_t __call(const T operand1, const T operand2) + constexpr static inline return_t __call(const T operand1, const T operand2) { return_t retVal; retVal.result = static_cast(operand1 - operand2); - retVal.borrow = operand1 >= operand2 ? T(0) : T(1); + retVal.borrow = T(operand1 < operand2); return retVal; } }; diff --git a/include/nbl/builtin/hlsl/emulated/uint64_t.hlsl b/include/nbl/builtin/hlsl/emulated/uint64_t.hlsl index 3178159794..c4f1f1ef1b 100644 --- a/include/nbl/builtin/hlsl/emulated/uint64_t.hlsl +++ b/include/nbl/builtin/hlsl/emulated/uint64_t.hlsl @@ -40,6 +40,17 @@ struct emulated_uint64_t return retVal; } + /** + * @brief Creates an `emulated_uint64_t` from two `uint32_t`s representing its bitpattern + * + * @param [in] hi Highest 32 bits of the `uint64` being emulated + * @param [in] lo Lowest 32 bits of the `uint64` being emulated + */ + NBL_CONSTEXPR_STATIC_FUNC this_t create(NBL_CONST_REF_ARG(uint32_t) hi, NBL_CONST_REF_ARG(uint32_t) lo) + { + return create(storage_t(hi, lo)); + } + // ------------------------------------------------------- BITWISE OPERATORS ------------------------------------------------- NBL_CONSTEXPR_INLINE_FUNC this_t operator&(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC diff --git a/include/nbl/builtin/hlsl/morton.hlsl b/include/nbl/builtin/hlsl/morton.hlsl index 89eddf8675..d4ada29d70 100644 --- a/include/nbl/builtin/hlsl/morton.hlsl +++ b/include/nbl/builtin/hlsl/morton.hlsl @@ -1,11 +1,12 @@ -#ifndef _NBL_BUILTIN_HLSL_MATH_MORTON_INCLUDED_ -#define _NBL_BUILTIN_HLSL_MATH_MORTON_INCLUDED_ +#ifndef _NBL_BUILTIN_HLSL_MORTON_INCLUDED_ +#define _NBL_BUILTIN_HLSL_MORTON_INCLUDED_ #include "nbl/builtin/hlsl/cpp_compat.hlsl" #include "nbl/builtin/hlsl/concepts/core.hlsl" #include "nbl/builtin/hlsl/bit.hlsl" #include "nbl/builtin/hlsl/functional.hlsl" #include "nbl/builtin/hlsl/emulated/uint64_t.hlsl" +#include "nbl/builtin/hlsl/mpl.hlsl" namespace nbl { @@ -35,11 +36,177 @@ struct decode_mask : integral_constant::value template NBL_CONSTEXPR T decode_mask_v = decode_mask::value; +// ----------------------------------------------------------------- MORTON DECODERS --------------------------------------------------- + // Decode masks are different for each dimension +// Decoder works with unsigned, cast to sign depends on the Morton class +// Bit width checks happen in Morton class as well -template +template struct MortonDecoder; +// Specializations for lack of uint64_t + +template +struct MortonDecoder<2, Bits, emulated_uint64_t> +{ + template + NBL_CONSTEXPR_STATIC_INLINE_FUNC decode_t decode(NBL_CONST_REF_ARG(emulated_uint64_t) encodedValue) + { + NBL_CONSTEXPR_STATIC uint16_t MaxIterations = uint16_t(mpl::log2_v) + uint16_t(!mpl::is_pot_v); + + NBL_CONSTEXPR_STATIC emulated_uint64_t DecodeMasks[6] = { emulated_uint64_t::create(uint32_t(0x55555555), uint32_t(0x55555555)), // Groups bits by 1 on, 1 off + emulated_uint64_t::create(uint32_t(0x33333333), uint32_t(0x33333333)), // Groups bits by 2 on, 2 off + emulated_uint64_t::create(uint32_t(0x0F0F0F0F), uint32_t(0x0F0F0F0F)), // Groups bits by 4 on, 4 off + emulated_uint64_t::create(uint32_t(0x00FF00FF), uint32_t(0x00FF00FF)), // Groups bits by 8 on, 8 off + emulated_uint64_t::create(uint32_t(0x0000FFFF), uint32_t(0x0000FFFF)), // Groups bits by 16 on, 16 off + emulated_uint64_t::create(uint32_t(0x00000000), uint32_t(0xFFFFFFFF)) };// Groups bits by 32 on, 32 off + + arithmetic_right_shift_operator rightShift; + + emulated_uint64_t decoded = encodedValue & DecodeMasks[0]; + [[unroll]] + for (uint16_t i = 0, shift = 1; i < MaxIterations; i++, shift <<= 1) + { + decoded = (decoded | rightShift(decoded, shift)) & DecodeMasks[i + 1]; + } + return _static_cast(decoded.data.y); + } +}; + +template +struct MortonDecoder<3, Bits, emulated_uint64_t> +{ + template + NBL_CONSTEXPR_STATIC_INLINE_FUNC decode_t decode(NBL_CONST_REF_ARG(emulated_uint64_t) encodedValue) + { + NBL_CONSTEXPR_STATIC uint16_t MaxIterations = conditional_value<(Bits <= 3), uint16_t, uint16_t(1), + conditional_value<(Bits <= 6), uint16_t, uint16_t(2), + conditional_value<(Bits <= 12), uint16_t, uint16_t(3), uint16_t(4)>::value>::value>::value; + + NBL_CONSTEXPR_STATIC emulated_uint64_t DecodeMasks[5] = { emulated_uint64_t::create(uint32_t(0x12492492), uint32_t(0x49249249)), // Groups bits by 1 on, 2 off (also only considers 21 bits) + emulated_uint64_t::create(uint32_t(0x01C0E070), uint32_t(0x381C0E07)), // Groups bits by 3 on, 6 off + emulated_uint64_t::create(uint32_t(0x0FC003F0), uint32_t(0x00FC003F)), // Groups bits by 6 on, 12 off + emulated_uint64_t::create(uint32_t(0x0000FFF0), uint32_t(0x00000FFF)), // Groups bits by 12 on, 24 off + emulated_uint64_t::create(uint32_t(0x00000000), uint32_t(0x00FFFFFF)) };// Groups bits by 24 on, 48 off (40 off if you're feeling pedantic) + + arithmetic_right_shift_operator rightShift; + + emulated_uint64_t decoded = encodedValue & DecodeMasks[0]; + // First iteration is special + decoded = (decoded | rightShift(decoded, 2) | rightShift(decoded, 4)) & DecodeMasks[1]; + [[unroll]] + for (uint16_t i = 0, shift = 6; i < MaxIterations - 1; i++, shift <<= 1) + { + decoded = (decoded | rightShift(decoded, shift)) & DecodeMasks[i + 2]; + } + return _static_cast(decoded.data.y); + } +}; + +template +struct MortonDecoder<4, Bits, emulated_uint64_t> +{ + template + NBL_CONSTEXPR_STATIC_INLINE_FUNC decode_t decode(NBL_CONST_REF_ARG(emulated_uint64_t) encodedValue) + { + NBL_CONSTEXPR_STATIC uint16_t MaxIterations = uint16_t(mpl::log2_v) + uint16_t(!mpl::is_pot_v); + + NBL_CONSTEXPR_STATIC emulated_uint64_t DecodeMasks[5] = { emulated_uint64_t::create(uint32_t(0x11111111), uint32_t(0x11111111)), // Groups bits by 1 on, 3 off + emulated_uint64_t::create(uint32_t(0x03030303), uint32_t(0x03030303)), // Groups bits by 2 on, 6 off + emulated_uint64_t::create(uint32_t(0x000F000F), uint32_t(0x000F000F)), // Groups bits by 4 on, 12 off + emulated_uint64_t::create(uint32_t(0x000000FF), uint32_t(0x000000FF)), // Groups bits by 8 on, 24 off + emulated_uint64_t::create(uint32_t(0x00000000), uint32_t(0x0000FFFF)) };// Groups bits by 16 on, 48 off + + arithmetic_right_shift_operator rightShift; + + emulated_uint64_t decoded = encodedValue & DecodeMasks[0]; + [[unroll]] + for (uint16_t i = 0, shift = 3; i < MaxIterations; i++, shift <<= 1) + { + decoded = (decoded | rightShift(decoded, shift)) & DecodeMasks[i + 1]; + } + return _static_cast(decoded.data.y); + } +}; + +template +struct MortonDecoder<2, Bits, encode_t> +{ + template + NBL_CONSTEXPR_STATIC_INLINE_FUNC decode_t decode(NBL_CONST_REF_ARG(encode_t) encodedValue) + { + NBL_CONSTEXPR_STATIC uint16_t MaxIterations = uint16_t(mpl::log2_v) + uint16_t(!mpl::is_pot_v); + + NBL_CONSTEXPR_STATIC encode_t DecodeMasks[6] = { _static_cast(0x5555555555555555), // Groups bits by 1 on, 1 off + _static_cast(0x3333333333333333), // Groups bits by 2 on, 2 off + _static_cast(0x0F0F0F0F0F0F0F0F), // Groups bits by 4 on, 4 off + _static_cast(0x00FF00FF00FF00FF), // Groups bits by 8 on, 8 off + _static_cast(0x0000FFFF0000FFFF), // Groups bits by 16 on, 16 off + _static_cast(0x00000000FFFFFFFF) };// Groups bits by 32 on, 32 off + + encode_t decoded = encodedValue & DecodeMasks[0]; + [[unroll]] + for (uint16_t i = 0, shift = 1; i < MaxIterations; i++, shift <<= 1) + { + decoded = (decoded | (decoded >> shift)) & DecodeMasks[i + 1]; + } + return _static_cast(decoded); + } +}; + +template +struct MortonDecoder<3, Bits, encode_t> +{ + template + NBL_CONSTEXPR_STATIC_INLINE_FUNC decode_t decode(NBL_CONST_REF_ARG(encode_t) encodedValue) + { + NBL_CONSTEXPR_STATIC uint16_t MaxIterations = conditional_value<(Bits <= 3), uint16_t, uint16_t(1), + conditional_value<(Bits <= 6), uint16_t, uint16_t(2), + conditional_value<(Bits <= 12), uint16_t, uint16_t(3), uint16_t(4)>::value>::value>::value; + + NBL_CONSTEXPR_STATIC encode_t DecodeMasks[5] = { _static_cast(0x1249249249249249), // Groups bits by 1 on, 2 off (also only considers 21 bits) + _static_cast(0x01C0E070381C0E07), // Groups bits by 3 on, 6 off + _static_cast(0x0FC003F000FC003F), // Groups bits by 6 on, 12 off + _static_cast(0x0000FFF000000FFF), // Groups bits by 12 on, 24 off + _static_cast(0x0000000000FFFFFF) };// Groups bits by 24 on, 48 off (40 off if you're feeling pedantic) + + encode_t decoded = encodedValue & DecodeMasks[0]; + // First iteration is special + decoded = (decoded | (decoded >> 2) | (decoded >> 4)) & DecodeMasks[1]; + [[unroll]] + for (uint16_t i = 0, shift = 6; i < MaxIterations - 1; i++, shift <<= 1) + { + decoded = (decoded | (decoded >> shift)) & DecodeMasks[i + 2]; + } + return _static_cast(decoded); + } +}; + +template +struct MortonDecoder<4, Bits, encode_t> +{ + template + NBL_CONSTEXPR_STATIC_INLINE_FUNC decode_t decode(NBL_CONST_REF_ARG(encode_t) encodedValue) + { + NBL_CONSTEXPR_STATIC uint16_t MaxIterations = uint16_t(mpl::log2_v) + uint16_t(!mpl::is_pot_v); + + NBL_CONSTEXPR_STATIC encode_t DecodeMasks[5] = { _static_cast(0x1111111111111111), // Groups bits by 1 on, 3 off + _static_cast(0x0303030303030303), // Groups bits by 2 on, 6 off + _static_cast(0x000F000F000F000F), // Groups bits by 4 on, 12 off + _static_cast(0x000000FF000000FF), // Groups bits by 8 on, 24 off + _static_cast(0x000000000000FFFF) };// Groups bits by 16 on, 48 off + + encode_t decoded = encodedValue & DecodeMasks[0]; + [[unroll]] + for (uint16_t i = 0, shift = 3; i < MaxIterations; i++, shift <<= 1) + { + decoded = (decoded | (decoded >> shift)) & DecodeMasks[i + 1]; + } + return _static_cast(decoded); + } +}; + } //namespace impl // Up to D = 4 supported @@ -56,7 +223,7 @@ struct code { using this_t = code; NBL_CONSTEXPR_STATIC uint16_t TotalBitWidth = D * Bits; - using storage_t = conditional_t<(TotalBitWidth>16), conditional_t<(TotalBitWidth>32), _uint64_t, uint32_t>, uint16_t> ; + using storage_t = conditional_t<(TotalBitWidth > 16), conditional_t<(TotalBitWidth > 32), _uint64_t, uint32_t>, uint16_t>; storage_t value; From de4d0fb2f266da125d94801c5c38bd81a9260acd Mon Sep 17 00:00:00 2001 From: Fletterio Date: Wed, 2 Apr 2025 23:45:53 -0300 Subject: [PATCH 015/157] Add new classes for encoding/decoding of mortn codes --- .../nbl/builtin/hlsl/emulated/uint64_t.hlsl | 57 ++++ include/nbl/builtin/hlsl/morton.hlsl | 287 ++++++++++++++++-- 2 files changed, 312 insertions(+), 32 deletions(-) diff --git a/include/nbl/builtin/hlsl/emulated/uint64_t.hlsl b/include/nbl/builtin/hlsl/emulated/uint64_t.hlsl index c4f1f1ef1b..3794031c8e 100644 --- a/include/nbl/builtin/hlsl/emulated/uint64_t.hlsl +++ b/include/nbl/builtin/hlsl/emulated/uint64_t.hlsl @@ -3,6 +3,7 @@ #include "nbl/builtin/hlsl/cpp_compat.hlsl" #include "nbl/builtin/hlsl/functional.hlsl" +#include "nbl/builtin/hlsl/concepts/core.hlsl" namespace nbl { @@ -156,6 +157,62 @@ constexpr inline emulated_uint64_t emulated_uint64_t::operator>>(uint16_t bits) #endif +namespace impl +{ + +template NBL_PARTIAL_REQ_TOP(concepts::UnsignedIntegralScalar && (sizeof(Unsigned) <= sizeof(uint32_t))) +struct static_cast_helper && (sizeof(Unsigned) <= sizeof(uint32_t))) > +{ + using To = Unsigned; + using From = emulated_uint64_t; + + // Return only the lowest bits + NBL_CONSTEXPR_STATIC_INLINE_FUNC To cast(From u) + { + return _static_cast(u.data.y); + } +}; + +template NBL_PARTIAL_REQ_TOP(concepts::UnsignedIntegralScalar && (sizeof(Unsigned) > sizeof(uint32_t))) +struct static_cast_helper && (sizeof(Unsigned) > sizeof(uint32_t))) > +{ + using To = Unsigned; + using From = emulated_uint64_t; + + NBL_CONSTEXPR_STATIC_INLINE_FUNC To cast(From u) + { + const To highBits = _static_cast(u.data.x) << To(32); + return highBits | _static_cast(u.data.y); + } +}; + +template NBL_PARTIAL_REQ_TOP(concepts::UnsignedIntegralScalar && (sizeof(Unsigned) <= sizeof(uint32_t))) +struct static_cast_helper && (sizeof(Unsigned) <= sizeof(uint32_t))) > +{ + using To = emulated_uint64_t; + using From = Unsigned; + + // Set only lower bits + NBL_CONSTEXPR_STATIC_INLINE_FUNC To cast(From u) + { + return emulated_uint64_t::create(uint32_t(0), _static_cast(u)); + } +}; + +template NBL_PARTIAL_REQ_TOP(concepts::UnsignedIntegralScalar && (sizeof(Unsigned) > sizeof(uint32_t))) +struct static_cast_helper && (sizeof(Unsigned) > sizeof(uint32_t))) > +{ + using To = emulated_uint64_t; + using From = Unsigned; + + NBL_CONSTEXPR_STATIC_INLINE_FUNC To cast(From u) + { + return emulated_uint64_t::create(_static_cast(u >> 32), _static_cast(u)); + } +}; + +} //namespace impl + } //namespace nbl } //namespace hlsl diff --git a/include/nbl/builtin/hlsl/morton.hlsl b/include/nbl/builtin/hlsl/morton.hlsl index d4ada29d70..e2e1596587 100644 --- a/include/nbl/builtin/hlsl/morton.hlsl +++ b/include/nbl/builtin/hlsl/morton.hlsl @@ -22,7 +22,7 @@ namespace impl template NBL_BOOL_CONCEPT MortonDimension = 1 < D && D < 5; -// Masks +// Basic decode masks template struct decode_mask; @@ -36,17 +36,240 @@ struct decode_mask : integral_constant::value template NBL_CONSTEXPR T decode_mask_v = decode_mask::value; -// ----------------------------------------------------------------- MORTON DECODERS --------------------------------------------------- +// --------------------------------------------------------- MORTON ENCODE/DECODE MASKS --------------------------------------------------- +// Proper encode masks (either generic `T array[masksPerDImension]` or `morton_mask`) impossible to have until at best HLSL202y + +#define NBL_MORTON_GENERIC_DECODE_MASK(DIM, MASK, HEX_VALUE) template struct morton_mask_##DIM##_##MASK \ +{\ + NBL_CONSTEXPR_STATIC_INLINE T value = _static_cast(HEX_VALUE);\ +}; + +#ifndef __HLSL_VERSION + +#define NBL_MORTON_EMULATED_DECODE_MASK(DIM, MASK, HEX_HIGH_VALUE, HEX_LOW_VALUE) template<> struct morton_mask_##DIM##_##MASK##\ +{\ + NBL_CONSTEXPR_STATIC_INLINE emulated_uint64_t value = emulated_uint64_t::create(uint32_t(0x##HEX_HIGH_VALUE), uint32_t(0x##HEX_LOW_VALUE));\ +}; + +#else + +#define NBL_MORTON_EMULATED_DECODE_MASK(DIM, MASK, HEX_HIGH_VALUE, HEX_LOW_VALUE) template<> struct morton_mask_##DIM##_##MASK##\ +{\ + NBL_CONSTEXPR_STATIC_INLINE emulated_uint64_t value;\ +};\ +NBL_CONSTEXPR_STATIC_INLINE emulated_uint64_t morton_mask_##DIM##_##MASK##::value = emulated_uint64_t::create(uint32_t(0x##HEX_HIGH_VALUE), uint32_t(0x##HEX_LOW_VALUE)); +#endif + +#define NBL_MORTON_DECODE_MASK(DIM, MASK, HEX_HIGH_VALUE, HEX_LOW_VALUE) template struct morton_mask_##DIM##_##MASK ;\ + NBL_MORTON_EMULATED_DECODE_MASK(DIM, MASK, HEX_HIGH_VALUE, HEX_LOW_VALUE)\ + NBL_MORTON_GENERIC_DECODE_MASK(DIM, MASK, 0x##HEX_HIGH_VALUE##HEX_LOW_VALUE)\ + template\ + NBL_CONSTEXPR T morton_mask_##DIM##_##MASK##_v = morton_mask_##DIM##_##MASK##::value; + +NBL_MORTON_DECODE_MASK(2, 0, 55555555, 55555555) // Groups bits by 1 on, 1 off +NBL_MORTON_DECODE_MASK(2, 1, 33333333, 33333333) // Groups bits by 2 on, 2 off +NBL_MORTON_DECODE_MASK(2, 2, 0F0F0F0F, 0F0F0F0F) // Groups bits by 4 on, 4 off +NBL_MORTON_DECODE_MASK(2, 3, 00FF00FF, 00FF00FF) // Groups bits by 8 on, 8 off +NBL_MORTON_DECODE_MASK(2, 4, 0000FFFF, 0000FFFF) // Groups bits by 16 on, 16 off +NBL_MORTON_DECODE_MASK(2, 5, 00000000, FFFFFFFF) // Groups bits by 32 on, 32 off + +NBL_MORTON_DECODE_MASK(3, 0, 12492492, 49249249) // Groups bits by 1 on, 2 off - also limits each dimension to 21 bits +NBL_MORTON_DECODE_MASK(3, 1, 01C0E070, 381C0E07) // Groups bits by 3 on, 6 off +NBL_MORTON_DECODE_MASK(3, 2, 0FC003F0, 00FC003F) // Groups bits by 6 on, 12 off +NBL_MORTON_DECODE_MASK(3, 3, 0000FFF0, 00000FFF) // Groups bits by 12 on, 24 off +NBL_MORTON_DECODE_MASK(3, 4, 00000000, 00FFFFFF) // Groups bits by 24 on, 48 off + +NBL_MORTON_DECODE_MASK(4, 0, 11111111, 11111111) // Groups bits by 1 on, 3 off +NBL_MORTON_DECODE_MASK(4, 1, 03030303, 03030303) // Groups bits by 2 on, 6 off +NBL_MORTON_DECODE_MASK(4, 2, 000F000F, 000F000F) // Groups bits by 4 on, 12 off +NBL_MORTON_DECODE_MASK(4, 3, 000000FF, 000000FF) // Groups bits by 8 on, 24 off +NBL_MORTON_DECODE_MASK(4, 4, 00000000, 0000FFFF) // Groups bits by 16 on, 48 off + +#undef NBL_MORTON_DECODE_MASK +#undef NBL_MORTON_EMULATED_DECODE_MASK +#undef NBL_MORTON_GENERIC_DECODE_MASK + +// ----------------------------------------------------------------- MORTON ENCODERS --------------------------------------------------- + +template +struct MortonEncoder; + +template +struct MortonEncoder<2, Bits, encode_t> +{ + template + NBL_CONSTEXPR_STATIC_INLINE_FUNC encode_t encode(NBL_CONST_REF_ARG(decode_t) decodedValue) + { + left_shift_operator leftShift; + encode_t encoded = _static_cast(decodedValue); + NBL_IF_CONSTEXPR(Bits > 16) + { + encoded = (encoded | leftShift(encoded, 16)) & morton_mask_2_4_v; + } + NBL_IF_CONSTEXPR(Bits > 8) + { + encoded = (encoded | leftShift(encoded, 8)) & morton_mask_2_3_v; + } + NBL_IF_CONSTEXPR(Bits > 4) + { + encoded = (encoded | leftShift(encoded, 4)) & morton_mask_2_2_v; + } + NBL_IF_CONSTEXPR(Bits > 2) + { + encoded = (encoded | leftShift(encoded, 2)) & morton_mask_2_1_v; + } + encoded = (encoded | leftShift(encoded, 1)) & morton_mask_2_0_v; + return encoded; + } +}; + +template +struct MortonEncoder<3, Bits, encode_t> +{ + template + NBL_CONSTEXPR_STATIC_INLINE_FUNC encode_t encode(NBL_CONST_REF_ARG(decode_t) decodedValue) + { + left_shift_operator leftShift; + encode_t encoded = _static_cast(decodedValue); + NBL_IF_CONSTEXPR(Bits > 12) + { + encoded = (encoded | leftShift(encoded, 24)) & morton_mask_3_3_v; + } + NBL_IF_CONSTEXPR(Bits > 6) + { + encoded = (encoded | leftShift(encoded, 12)) & morton_mask_3_2_v; + } + NBL_IF_CONSTEXPR(Bits > 3) + { + encoded = (encoded | leftShift(encoded, 6)) & morton_mask_3_1_v; + } + encoded = (encoded | leftShift(encoded, 2) | leftShift(encoded, 4)) & morton_mask_3_0_v; + return encoded; + } +}; -// Decode masks are different for each dimension -// Decoder works with unsigned, cast to sign depends on the Morton class -// Bit width checks happen in Morton class as well +template +struct MortonEncoder<4, Bits, encode_t> +{ + template + NBL_CONSTEXPR_STATIC_INLINE_FUNC encode_t encode(NBL_CONST_REF_ARG(decode_t) decodedValue) + { + left_shift_operator leftShift; + encode_t encoded = _static_cast(decodedValue); + NBL_IF_CONSTEXPR(Bits > 8) + { + encoded = (encoded | leftShift(encoded, 24)) & morton_mask_4_3_v; + } + NBL_IF_CONSTEXPR(Bits > 4) + { + encoded = (encoded | leftShift(encoded, 12)) & morton_mask_4_2_v; + } + NBL_IF_CONSTEXPR(Bits > 2) + { + encoded = (encoded | leftShift(encoded, 6)) & morton_mask_4_1_v; + } + encoded = (encoded | leftShift(encoded, 3)) & morton_mask_4_0_v; + return encoded; + } +}; + +// ----------------------------------------------------------------- MORTON DECODERS --------------------------------------------------- template struct MortonDecoder; -// Specializations for lack of uint64_t +template +struct MortonDecoder<2, Bits, encode_t> +{ + template + NBL_CONSTEXPR_STATIC_INLINE_FUNC decode_t decode(NBL_CONST_REF_ARG(encode_t) encodedValue) + { + arithmetic_right_shift_operator rightShift; + encode_t decoded = encodedValue & morton_mask_2_0_v; + NBL_IF_CONSTEXPR(Bits > 1) + { + decoded = (decoded | rightShift(decoded, 1)) & morton_mask_2_1_v; + } + NBL_IF_CONSTEXPR(Bits > 2) + { + decoded = (decoded | rightShift(decoded, 2)) & morton_mask_2_2_v; + } + NBL_IF_CONSTEXPR(Bits > 4) + { + decoded = (decoded | rightShift(decoded, 4)) & morton_mask_2_3_v; + } + NBL_IF_CONSTEXPR(Bits > 8) + { + decoded = (decoded | rightShift(decoded, 8)) & morton_mask_2_4_v; + } + NBL_IF_CONSTEXPR(Bits > 16) + { + decoded = (decoded | rightShift(decoded, 16)) & morton_mask_2_5_v; + } + + return _static_cast(decoded); + } +}; + +template +struct MortonDecoder<3, Bits, encode_t> +{ + template + NBL_CONSTEXPR_STATIC_INLINE_FUNC decode_t decode(NBL_CONST_REF_ARG(encode_t) encodedValue) + { + arithmetic_right_shift_operator rightShift; + encode_t decoded = encodedValue & morton_mask_3_0_v; + NBL_IF_CONSTEXPR(Bits > 1) + { + decoded = (decoded | rightShift(decoded, 2) | rightShift(decoded, 4)) & morton_mask_3_1_v; + } + NBL_IF_CONSTEXPR(Bits > 3) + { + decoded = (decoded | rightShift(decoded, 6)) & morton_mask_3_2_v; + } + NBL_IF_CONSTEXPR(Bits > 6) + { + decoded = (decoded | rightShift(decoded, 12)) & morton_mask_3_3_v; + } + NBL_IF_CONSTEXPR(Bits > 12) + { + decoded = (decoded | rightShift(decoded, 24)) & morton_mask_3_4_v; + } + + return _static_cast(decoded); + } +}; +template +struct MortonDecoder<4, Bits, encode_t> +{ + template + NBL_CONSTEXPR_STATIC_INLINE_FUNC decode_t decode(NBL_CONST_REF_ARG(encode_t) encodedValue) + { + arithmetic_right_shift_operator rightShift; + encode_t decoded = encodedValue & morton_mask_4_0_v; + NBL_IF_CONSTEXPR(Bits > 1) + { + decoded = (decoded | rightShift(decoded, 3)) & morton_mask_4_1_v; + } + NBL_IF_CONSTEXPR(Bits > 2) + { + decoded = (decoded | rightShift(decoded, 6)) & morton_mask_4_2_v; + } + NBL_IF_CONSTEXPR(Bits > 4) + { + decoded = (decoded | rightShift(decoded, 12)) & morton_mask_4_3_v; + } + NBL_IF_CONSTEXPR(Bits > 8) + { + decoded = (decoded | rightShift(decoded, 24)) & morton_mask_4_4_v; + } + + return _static_cast(decoded); + } +}; + +/* template struct MortonDecoder<2, Bits, emulated_uint64_t> { @@ -55,12 +278,12 @@ struct MortonDecoder<2, Bits, emulated_uint64_t> { NBL_CONSTEXPR_STATIC uint16_t MaxIterations = uint16_t(mpl::log2_v) + uint16_t(!mpl::is_pot_v); - NBL_CONSTEXPR_STATIC emulated_uint64_t DecodeMasks[6] = { emulated_uint64_t::create(uint32_t(0x55555555), uint32_t(0x55555555)), // Groups bits by 1 on, 1 off - emulated_uint64_t::create(uint32_t(0x33333333), uint32_t(0x33333333)), // Groups bits by 2 on, 2 off - emulated_uint64_t::create(uint32_t(0x0F0F0F0F), uint32_t(0x0F0F0F0F)), // Groups bits by 4 on, 4 off - emulated_uint64_t::create(uint32_t(0x00FF00FF), uint32_t(0x00FF00FF)), // Groups bits by 8 on, 8 off - emulated_uint64_t::create(uint32_t(0x0000FFFF), uint32_t(0x0000FFFF)), // Groups bits by 16 on, 16 off - emulated_uint64_t::create(uint32_t(0x00000000), uint32_t(0xFFFFFFFF)) };// Groups bits by 32 on, 32 off + NBL_CONSTEXPR_STATIC emulated_uint64_t DecodeMasks[6] = { emulated_uint64_t::create(uint32_t(0x55555555), uint32_t(0x55555555)), + emulated_uint64_t::create(uint32_t(0x33333333), uint32_t(0x33333333)), + emulated_uint64_t::create(uint32_t(0x0F0F0F0F), uint32_t(0x0F0F0F0F)), + emulated_uint64_t::create(uint32_t(0x00FF00FF), uint32_t(0x00FF00FF)), + emulated_uint64_t::create(uint32_t(0x0000FFFF), uint32_t(0x0000FFFF)), + emulated_uint64_t::create(uint32_t(0x00000000), uint32_t(0xFFFFFFFF)) }; arithmetic_right_shift_operator rightShift; @@ -84,11 +307,11 @@ struct MortonDecoder<3, Bits, emulated_uint64_t> conditional_value<(Bits <= 6), uint16_t, uint16_t(2), conditional_value<(Bits <= 12), uint16_t, uint16_t(3), uint16_t(4)>::value>::value>::value; - NBL_CONSTEXPR_STATIC emulated_uint64_t DecodeMasks[5] = { emulated_uint64_t::create(uint32_t(0x12492492), uint32_t(0x49249249)), // Groups bits by 1 on, 2 off (also only considers 21 bits) - emulated_uint64_t::create(uint32_t(0x01C0E070), uint32_t(0x381C0E07)), // Groups bits by 3 on, 6 off - emulated_uint64_t::create(uint32_t(0x0FC003F0), uint32_t(0x00FC003F)), // Groups bits by 6 on, 12 off - emulated_uint64_t::create(uint32_t(0x0000FFF0), uint32_t(0x00000FFF)), // Groups bits by 12 on, 24 off - emulated_uint64_t::create(uint32_t(0x00000000), uint32_t(0x00FFFFFF)) };// Groups bits by 24 on, 48 off (40 off if you're feeling pedantic) + NBL_CONSTEXPR_STATIC emulated_uint64_t DecodeMasks[5] = { emulated_uint64_t::create(uint32_t(0x12492492), uint32_t(0x49249249)), (also only considers 21 bits) + emulated_uint64_t::create(uint32_t(0x01C0E070), uint32_t(0x381C0E07)), + emulated_uint64_t::create(uint32_t(0x0FC003F0), uint32_t(0x00FC003F)), + emulated_uint64_t::create(uint32_t(0x0000FFF0), uint32_t(0x00000FFF)), + emulated_uint64_t::create(uint32_t(0x00000000), uint32_t(0x00FFFFFF)) }; (40 off if you're feeling pedantic) arithmetic_right_shift_operator rightShift; @@ -112,11 +335,11 @@ struct MortonDecoder<4, Bits, emulated_uint64_t> { NBL_CONSTEXPR_STATIC uint16_t MaxIterations = uint16_t(mpl::log2_v) + uint16_t(!mpl::is_pot_v); - NBL_CONSTEXPR_STATIC emulated_uint64_t DecodeMasks[5] = { emulated_uint64_t::create(uint32_t(0x11111111), uint32_t(0x11111111)), // Groups bits by 1 on, 3 off - emulated_uint64_t::create(uint32_t(0x03030303), uint32_t(0x03030303)), // Groups bits by 2 on, 6 off - emulated_uint64_t::create(uint32_t(0x000F000F), uint32_t(0x000F000F)), // Groups bits by 4 on, 12 off - emulated_uint64_t::create(uint32_t(0x000000FF), uint32_t(0x000000FF)), // Groups bits by 8 on, 24 off - emulated_uint64_t::create(uint32_t(0x00000000), uint32_t(0x0000FFFF)) };// Groups bits by 16 on, 48 off + NBL_CONSTEXPR_STATIC emulated_uint64_t DecodeMasks[5] = { emulated_uint64_t::create(uint32_t(0x11111111), uint32_t(0x11111111)), + emulated_uint64_t::create(uint32_t(0x03030303), uint32_t(0x03030303)), + emulated_uint64_t::create(uint32_t(0x000F000F), uint32_t(0x000F000F)), + emulated_uint64_t::create(uint32_t(0x000000FF), uint32_t(0x000000FF)), + emulated_uint64_t::create(uint32_t(0x00000000), uint32_t(0x0000FFFF)) }; arithmetic_right_shift_operator rightShift; @@ -207,14 +430,9 @@ struct MortonDecoder<4, Bits, encode_t> } }; -} //namespace impl +*/ -// Up to D = 4 supported -#define NBL_HLSL_MORTON_MASKS(U, D) _static_cast > (vector< U , 4 >(impl::decode_mask_v< U , D >,\ - impl::decode_mask_v< U , D > << U (1),\ - impl::decode_mask_v< U , D > << U (2),\ - impl::decode_mask_v< U , D > << U (3)\ - )) +} //namespace impl // Making this even slightly less ugly is blocked by https://github.com/microsoft/DirectXShaderCompiler/issues/7006 // In particular, `Masks` should be a `const static` member field instead of appearing in every method using it @@ -227,10 +445,15 @@ struct code storage_t value; -}; -// Don't forget to delete this macro after usage -#undef NBL_HLSL_MORTON_MASKS + // ---------------------------------------------------- CONSTRUCTORS --------------------------------------------------------------- + + #ifndef __HLSL_VERSION + + code() = default; + + #endif +}; } //namespace morton } //namespace hlsl From 799420e9dfa1f8bd8039fd724edea4ecf3133a87 Mon Sep 17 00:00:00 2001 From: Fletterio Date: Fri, 4 Apr 2025 16:20:54 -0300 Subject: [PATCH 016/157] Fix conversion operators --- .../nbl/builtin/hlsl/emulated/uint64_t.hlsl | 33 ++- include/nbl/builtin/hlsl/morton.hlsl | 279 ++++++------------ 2 files changed, 116 insertions(+), 196 deletions(-) diff --git a/include/nbl/builtin/hlsl/emulated/uint64_t.hlsl b/include/nbl/builtin/hlsl/emulated/uint64_t.hlsl index 3794031c8e..ab08e1ff38 100644 --- a/include/nbl/builtin/hlsl/emulated/uint64_t.hlsl +++ b/include/nbl/builtin/hlsl/emulated/uint64_t.hlsl @@ -24,9 +24,6 @@ struct emulated_uint64_t emulated_uint64_t() = default; - // To immediately get compound operators and functional structs in CPP side - explicit emulated_uint64_t(const storage_t _data) : data(_data) {} - #endif /** @@ -52,6 +49,16 @@ struct emulated_uint64_t return create(storage_t(hi, lo)); } + /** + * @brief Creates an `emulated_uint64_t` from a `uint64_t`. Useful for compile-time encoding. + * + * @param [in] _data `uint64_t` to be unpacked into high and low bits + */ + NBL_CONSTEXPR_STATIC_FUNC this_t create(NBL_CONST_REF_ARG(uint64_t) u) + { + return create(_static_cast(u >> 32), _static_cast(u)); + } + // ------------------------------------------------------- BITWISE OPERATORS ------------------------------------------------- NBL_CONSTEXPR_INLINE_FUNC this_t operator&(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC @@ -115,9 +122,11 @@ struct left_shift_operator NBL_CONSTEXPR_INLINE_FUNC type_t operator()(NBL_CONST_REF_ARG(type_t) operand, uint16_t bits) { + if (!bits) + return operand; const uint32_t _bits = uint32_t(bits); const uint32_t shift = ComponentBitWidth - _bits; - const uint32_t higherBitsMask = ~uint32_t(0) << shift; + const uint32_t higherBitsMask = (~uint32_t(0)) << shift; // We need the `x` component of the vector (which represents the higher bits of the emulated uint64) to get the `bits` higher bits of the `y` component const vector retValData = { (operand.data.x << _bits) | ((operand.data.y & higherBitsMask) >> shift), operand.data.y << _bits }; return emulated_uint64_t::create(retValData); @@ -132,6 +141,8 @@ struct arithmetic_right_shift_operator NBL_CONSTEXPR_INLINE_FUNC type_t operator()(NBL_CONST_REF_ARG(type_t) operand, uint16_t bits) { + if (!bits) + return operand; const uint32_t _bits = uint32_t(bits); const uint32_t shift = ComponentBitWidth - _bits; const uint32_t lowerBitsMask = ~uint32_t(0) >> shift; @@ -173,10 +184,10 @@ struct static_cast_helper NBL_PARTIAL_REQ_TOP(concepts::UnsignedIntegralScalar && (sizeof(Unsigned) > sizeof(uint32_t))) -struct static_cast_helper && (sizeof(Unsigned) > sizeof(uint32_t))) > +template<> +struct static_cast_helper { - using To = Unsigned; + using To = uint64_t; using From = emulated_uint64_t; NBL_CONSTEXPR_STATIC_INLINE_FUNC To cast(From u) @@ -199,15 +210,15 @@ struct static_cast_helper NBL_PARTIAL_REQ_TOP(concepts::UnsignedIntegralScalar && (sizeof(Unsigned) > sizeof(uint32_t))) -struct static_cast_helper && (sizeof(Unsigned) > sizeof(uint32_t))) > +template<> +struct static_cast_helper { using To = emulated_uint64_t; - using From = Unsigned; + using From = uint64_t; NBL_CONSTEXPR_STATIC_INLINE_FUNC To cast(From u) { - return emulated_uint64_t::create(_static_cast(u >> 32), _static_cast(u)); + return emulated_uint64_t::create(u); } }; diff --git a/include/nbl/builtin/hlsl/morton.hlsl b/include/nbl/builtin/hlsl/morton.hlsl index e2e1596587..07aa21b821 100644 --- a/include/nbl/builtin/hlsl/morton.hlsl +++ b/include/nbl/builtin/hlsl/morton.hlsl @@ -46,44 +46,41 @@ NBL_CONSTEXPR T decode_mask_v = decode_mask::value; #ifndef __HLSL_VERSION -#define NBL_MORTON_EMULATED_DECODE_MASK(DIM, MASK, HEX_HIGH_VALUE, HEX_LOW_VALUE) template<> struct morton_mask_##DIM##_##MASK##\ -{\ - NBL_CONSTEXPR_STATIC_INLINE emulated_uint64_t value = emulated_uint64_t::create(uint32_t(0x##HEX_HIGH_VALUE), uint32_t(0x##HEX_LOW_VALUE));\ -}; +#define NBL_MORTON_EMULATED_DECODE_MASK(DIM, MASK, HEX_VALUE) #else -#define NBL_MORTON_EMULATED_DECODE_MASK(DIM, MASK, HEX_HIGH_VALUE, HEX_LOW_VALUE) template<> struct morton_mask_##DIM##_##MASK##\ +#define NBL_MORTON_EMULATED_DECODE_MASK(DIM, MASK, HEX_VALUE) template<> struct morton_mask_##DIM##_##MASK##\ {\ NBL_CONSTEXPR_STATIC_INLINE emulated_uint64_t value;\ };\ -NBL_CONSTEXPR_STATIC_INLINE emulated_uint64_t morton_mask_##DIM##_##MASK##::value = emulated_uint64_t::create(uint32_t(0x##HEX_HIGH_VALUE), uint32_t(0x##HEX_LOW_VALUE)); +NBL_CONSTEXPR_STATIC_INLINE emulated_uint64_t morton_mask_##DIM##_##MASK##::value = _static_cast(HEX_VALUE); #endif -#define NBL_MORTON_DECODE_MASK(DIM, MASK, HEX_HIGH_VALUE, HEX_LOW_VALUE) template struct morton_mask_##DIM##_##MASK ;\ - NBL_MORTON_EMULATED_DECODE_MASK(DIM, MASK, HEX_HIGH_VALUE, HEX_LOW_VALUE)\ - NBL_MORTON_GENERIC_DECODE_MASK(DIM, MASK, 0x##HEX_HIGH_VALUE##HEX_LOW_VALUE)\ +#define NBL_MORTON_DECODE_MASK(DIM, MASK, HEX_VALUE) template struct morton_mask_##DIM##_##MASK ;\ + NBL_MORTON_EMULATED_DECODE_MASK(DIM, MASK, HEX_VALUE)\ + NBL_MORTON_GENERIC_DECODE_MASK(DIM, MASK, HEX_VALUE)\ template\ NBL_CONSTEXPR T morton_mask_##DIM##_##MASK##_v = morton_mask_##DIM##_##MASK##::value; -NBL_MORTON_DECODE_MASK(2, 0, 55555555, 55555555) // Groups bits by 1 on, 1 off -NBL_MORTON_DECODE_MASK(2, 1, 33333333, 33333333) // Groups bits by 2 on, 2 off -NBL_MORTON_DECODE_MASK(2, 2, 0F0F0F0F, 0F0F0F0F) // Groups bits by 4 on, 4 off -NBL_MORTON_DECODE_MASK(2, 3, 00FF00FF, 00FF00FF) // Groups bits by 8 on, 8 off -NBL_MORTON_DECODE_MASK(2, 4, 0000FFFF, 0000FFFF) // Groups bits by 16 on, 16 off -NBL_MORTON_DECODE_MASK(2, 5, 00000000, FFFFFFFF) // Groups bits by 32 on, 32 off - -NBL_MORTON_DECODE_MASK(3, 0, 12492492, 49249249) // Groups bits by 1 on, 2 off - also limits each dimension to 21 bits -NBL_MORTON_DECODE_MASK(3, 1, 01C0E070, 381C0E07) // Groups bits by 3 on, 6 off -NBL_MORTON_DECODE_MASK(3, 2, 0FC003F0, 00FC003F) // Groups bits by 6 on, 12 off -NBL_MORTON_DECODE_MASK(3, 3, 0000FFF0, 00000FFF) // Groups bits by 12 on, 24 off -NBL_MORTON_DECODE_MASK(3, 4, 00000000, 00FFFFFF) // Groups bits by 24 on, 48 off - -NBL_MORTON_DECODE_MASK(4, 0, 11111111, 11111111) // Groups bits by 1 on, 3 off -NBL_MORTON_DECODE_MASK(4, 1, 03030303, 03030303) // Groups bits by 2 on, 6 off -NBL_MORTON_DECODE_MASK(4, 2, 000F000F, 000F000F) // Groups bits by 4 on, 12 off -NBL_MORTON_DECODE_MASK(4, 3, 000000FF, 000000FF) // Groups bits by 8 on, 24 off -NBL_MORTON_DECODE_MASK(4, 4, 00000000, 0000FFFF) // Groups bits by 16 on, 48 off +NBL_MORTON_DECODE_MASK(2, 0, uint64_t(0x5555555555555555)) // Groups bits by 1 on, 1 off +NBL_MORTON_DECODE_MASK(2, 1, uint64_t(0x3333333333333333)) // Groups bits by 2 on, 2 off +NBL_MORTON_DECODE_MASK(2, 2, uint64_t(0x0F0F0F0F0F0F0F0F)) // Groups bits by 4 on, 4 off +NBL_MORTON_DECODE_MASK(2, 3, uint64_t(0x00FF00FF00FF00FF)) // Groups bits by 8 on, 8 off +NBL_MORTON_DECODE_MASK(2, 4, uint64_t(0x0000FFFF0000FFFF)) // Groups bits by 16 on, 16 off +NBL_MORTON_DECODE_MASK(2, 5, uint64_t(0x00000000FFFFFFFF)) // Groups bits by 32 on, 32 off + +NBL_MORTON_DECODE_MASK(3, 0, uint64_t(0x1249249249249249)) // Groups bits by 1 on, 2 off - also limits each dimension to 21 bits +NBL_MORTON_DECODE_MASK(3, 1, uint64_t(0x01C0E070381C0E07)) // Groups bits by 3 on, 6 off +NBL_MORTON_DECODE_MASK(3, 2, uint64_t(0x0FC003F000FC003F)) // Groups bits by 6 on, 12 off +NBL_MORTON_DECODE_MASK(3, 3, uint64_t(0x0000FFF000000FFF)) // Groups bits by 12 on, 24 off +NBL_MORTON_DECODE_MASK(3, 4, uint64_t(0x0000000000FFFFFF)) // Groups bits by 24 on, 48 off + +NBL_MORTON_DECODE_MASK(4, 0, uint64_t(0x1111111111111111)) // Groups bits by 1 on, 3 off +NBL_MORTON_DECODE_MASK(4, 1, uint64_t(0x0303030303030303)) // Groups bits by 2 on, 6 off +NBL_MORTON_DECODE_MASK(4, 2, uint64_t(0x000F000F000F000F)) // Groups bits by 4 on, 12 off +NBL_MORTON_DECODE_MASK(4, 3, uint64_t(0x000000FF000000FF)) // Groups bits by 8 on, 24 off +NBL_MORTON_DECODE_MASK(4, 4, uint64_t(0x000000000000FFFF)) // Groups bits by 16 on, 48 off #undef NBL_MORTON_DECODE_MASK #undef NBL_MORTON_EMULATED_DECODE_MASK @@ -269,193 +266,105 @@ struct MortonDecoder<4, Bits, encode_t> } }; -/* -template -struct MortonDecoder<2, Bits, emulated_uint64_t> -{ - template - NBL_CONSTEXPR_STATIC_INLINE_FUNC decode_t decode(NBL_CONST_REF_ARG(emulated_uint64_t) encodedValue) - { - NBL_CONSTEXPR_STATIC uint16_t MaxIterations = uint16_t(mpl::log2_v) + uint16_t(!mpl::is_pot_v); +} //namespace impl - NBL_CONSTEXPR_STATIC emulated_uint64_t DecodeMasks[6] = { emulated_uint64_t::create(uint32_t(0x55555555), uint32_t(0x55555555)), - emulated_uint64_t::create(uint32_t(0x33333333), uint32_t(0x33333333)), - emulated_uint64_t::create(uint32_t(0x0F0F0F0F), uint32_t(0x0F0F0F0F)), - emulated_uint64_t::create(uint32_t(0x00FF00FF), uint32_t(0x00FF00FF)), - emulated_uint64_t::create(uint32_t(0x0000FFFF), uint32_t(0x0000FFFF)), - emulated_uint64_t::create(uint32_t(0x00000000), uint32_t(0xFFFFFFFF)) }; +// Making this even slightly less ugly is blocked by https://github.com/microsoft/DirectXShaderCompiler/issues/7006 +// In particular, `Masks` should be a `const static` member field instead of appearing in every method using it +template && D * Bits <= 64) +struct code +{ + using this_t = code; + NBL_CONSTEXPR_STATIC uint16_t TotalBitWidth = D * Bits; + using storage_t = conditional_t<(TotalBitWidth > 16), conditional_t<(TotalBitWidth > 32), _uint64_t, uint32_t>, uint16_t>; - arithmetic_right_shift_operator rightShift; + + storage_t value; - emulated_uint64_t decoded = encodedValue & DecodeMasks[0]; - [[unroll]] - for (uint16_t i = 0, shift = 1; i < MaxIterations; i++, shift <<= 1) - { - decoded = (decoded | rightShift(decoded, shift)) & DecodeMasks[i + 1]; - } - return _static_cast(decoded.data.y); - } -}; + // ---------------------------------------------------- CONSTRUCTORS --------------------------------------------------------------- -template -struct MortonDecoder<3, Bits, emulated_uint64_t> -{ - template - NBL_CONSTEXPR_STATIC_INLINE_FUNC decode_t decode(NBL_CONST_REF_ARG(emulated_uint64_t) encodedValue) - { - NBL_CONSTEXPR_STATIC uint16_t MaxIterations = conditional_value<(Bits <= 3), uint16_t, uint16_t(1), - conditional_value<(Bits <= 6), uint16_t, uint16_t(2), - conditional_value<(Bits <= 12), uint16_t, uint16_t(3), uint16_t(4)>::value>::value>::value; + #ifndef __HLSL_VERSION - NBL_CONSTEXPR_STATIC emulated_uint64_t DecodeMasks[5] = { emulated_uint64_t::create(uint32_t(0x12492492), uint32_t(0x49249249)), (also only considers 21 bits) - emulated_uint64_t::create(uint32_t(0x01C0E070), uint32_t(0x381C0E07)), - emulated_uint64_t::create(uint32_t(0x0FC003F0), uint32_t(0x00FC003F)), - emulated_uint64_t::create(uint32_t(0x0000FFF0), uint32_t(0x00000FFF)), - emulated_uint64_t::create(uint32_t(0x00000000), uint32_t(0x00FFFFFF)) }; (40 off if you're feeling pedantic) + code() = default; - arithmetic_right_shift_operator rightShift; + #endif - emulated_uint64_t decoded = encodedValue & DecodeMasks[0]; - // First iteration is special - decoded = (decoded | rightShift(decoded, 2) | rightShift(decoded, 4)) & DecodeMasks[1]; + /** + * @brief Creates a Morton code from a set of integral cartesian coordinates + * + * @param [in] cartesian Coordinates to encode. Signedness MUST match the signedness of this Morton code class + */ + template + NBL_CONSTEXPR_STATIC_FUNC enable_if_t && is_scalar_v && (is_signed_v == Signed), this_t> + create(NBL_CONST_REF_ARG(vector) cartesian) + { + using U = make_unsigned_t; + left_shift_operator leftShift; + storage_t encodedCartesian = _static_cast(uint64_t(0)); [[unroll]] - for (uint16_t i = 0, shift = 6; i < MaxIterations - 1; i++, shift <<= 1) + for (uint16_t i = 0; i < D; i++) { - decoded = (decoded | rightShift(decoded, shift)) & DecodeMasks[i + 2]; + encodedCartesian = encodedCartesian | leftShift(impl::MortonEncoder::encode(_static_cast(cartesian[i])), i); } - return _static_cast(decoded.data.y); + this_t retVal; + retVal.value = encodedCartesian; + return retVal; } -}; -template -struct MortonDecoder<4, Bits, emulated_uint64_t> -{ - template - NBL_CONSTEXPR_STATIC_INLINE_FUNC decode_t decode(NBL_CONST_REF_ARG(emulated_uint64_t) encodedValue) - { - NBL_CONSTEXPR_STATIC uint16_t MaxIterations = uint16_t(mpl::log2_v) + uint16_t(!mpl::is_pot_v); - - NBL_CONSTEXPR_STATIC emulated_uint64_t DecodeMasks[5] = { emulated_uint64_t::create(uint32_t(0x11111111), uint32_t(0x11111111)), - emulated_uint64_t::create(uint32_t(0x03030303), uint32_t(0x03030303)), - emulated_uint64_t::create(uint32_t(0x000F000F), uint32_t(0x000F000F)), - emulated_uint64_t::create(uint32_t(0x000000FF), uint32_t(0x000000FF)), - emulated_uint64_t::create(uint32_t(0x00000000), uint32_t(0x0000FFFF)) }; + // CPP can also have an actual constructor + #ifndef __HLSL_VERSION - arithmetic_right_shift_operator rightShift; + /** + * @brief Creates a Morton code from a set of cartesian coordinates + * + * @param [in] cartesian Coordinates to encode + */ - emulated_uint64_t decoded = encodedValue & DecodeMasks[0]; - [[unroll]] - for (uint16_t i = 0, shift = 3; i < MaxIterations; i++, shift <<= 1) - { - decoded = (decoded | rightShift(decoded, shift)) & DecodeMasks[i + 1]; - } - return _static_cast(decoded.data.y); - } -}; - -template -struct MortonDecoder<2, Bits, encode_t> -{ - template - NBL_CONSTEXPR_STATIC_INLINE_FUNC decode_t decode(NBL_CONST_REF_ARG(encode_t) encodedValue) + template + explicit code(NBL_CONST_REF_ARG(vector) cartesian) { - NBL_CONSTEXPR_STATIC uint16_t MaxIterations = uint16_t(mpl::log2_v) + uint16_t(!mpl::is_pot_v); - - NBL_CONSTEXPR_STATIC encode_t DecodeMasks[6] = { _static_cast(0x5555555555555555), // Groups bits by 1 on, 1 off - _static_cast(0x3333333333333333), // Groups bits by 2 on, 2 off - _static_cast(0x0F0F0F0F0F0F0F0F), // Groups bits by 4 on, 4 off - _static_cast(0x00FF00FF00FF00FF), // Groups bits by 8 on, 8 off - _static_cast(0x0000FFFF0000FFFF), // Groups bits by 16 on, 16 off - _static_cast(0x00000000FFFFFFFF) };// Groups bits by 32 on, 32 off - - encode_t decoded = encodedValue & DecodeMasks[0]; - [[unroll]] - for (uint16_t i = 0, shift = 1; i < MaxIterations; i++, shift <<= 1) - { - decoded = (decoded | (decoded >> shift)) & DecodeMasks[i + 1]; - } - return _static_cast(decoded); + *this = create(cartesian); } -}; -template -struct MortonDecoder<3, Bits, encode_t> -{ - template - NBL_CONSTEXPR_STATIC_INLINE_FUNC decode_t decode(NBL_CONST_REF_ARG(encode_t) encodedValue) + // This one is defined later since it requires `static_cast_helper` specialization + + /** + * @brief Decodes this Morton code back to a set of cartesian coordinates + */ + + template + explicit operator vector() const noexcept { - NBL_CONSTEXPR_STATIC uint16_t MaxIterations = conditional_value<(Bits <= 3), uint16_t, uint16_t(1), - conditional_value<(Bits <= 6), uint16_t, uint16_t(2), - conditional_value<(Bits <= 12), uint16_t, uint16_t(3), uint16_t(4)>::value>::value>::value; - - NBL_CONSTEXPR_STATIC encode_t DecodeMasks[5] = { _static_cast(0x1249249249249249), // Groups bits by 1 on, 2 off (also only considers 21 bits) - _static_cast(0x01C0E070381C0E07), // Groups bits by 3 on, 6 off - _static_cast(0x0FC003F000FC003F), // Groups bits by 6 on, 12 off - _static_cast(0x0000FFF000000FFF), // Groups bits by 12 on, 24 off - _static_cast(0x0000000000FFFFFF) };// Groups bits by 24 on, 48 off (40 off if you're feeling pedantic) - - encode_t decoded = encodedValue & DecodeMasks[0]; - // First iteration is special - decoded = (decoded | (decoded >> 2) | (decoded >> 4)) & DecodeMasks[1]; - [[unroll]] - for (uint16_t i = 0, shift = 6; i < MaxIterations - 1; i++, shift <<= 1) - { - decoded = (decoded | (decoded >> shift)) & DecodeMasks[i + 2]; - } - return _static_cast(decoded); + return _static_cast, morton::code, Bits, D>>(*this); } + + #endif }; -template -struct MortonDecoder<4, Bits, encode_t> +} //namespace morton + +// Specialize the `static_cast_helper` +namespace impl { - template - NBL_CONSTEXPR_STATIC_INLINE_FUNC decode_t decode(NBL_CONST_REF_ARG(encode_t) encodedValue) +// I must be of same signedness as the morton code, and be wide enough to hold each component +template NBL_PARTIAL_REQ_TOP(concepts::IntegralScalar && (8 * sizeof(I) >= Bits)) +struct static_cast_helper, morton::code, Bits, D, _uint64_t> NBL_PARTIAL_REQ_BOT(concepts::IntegralScalar && (8 * sizeof(I) >= Bits)) > +{ + NBL_CONSTEXPR_STATIC_INLINE_FUNC vector cast(NBL_CONST_REF_ARG(morton::code, Bits, D, _uint64_t>) val) { - NBL_CONSTEXPR_STATIC uint16_t MaxIterations = uint16_t(mpl::log2_v) + uint16_t(!mpl::is_pot_v); - - NBL_CONSTEXPR_STATIC encode_t DecodeMasks[5] = { _static_cast(0x1111111111111111), // Groups bits by 1 on, 3 off - _static_cast(0x0303030303030303), // Groups bits by 2 on, 6 off - _static_cast(0x000F000F000F000F), // Groups bits by 4 on, 12 off - _static_cast(0x000000FF000000FF), // Groups bits by 8 on, 24 off - _static_cast(0x000000000000FFFF) };// Groups bits by 16 on, 48 off - - encode_t decoded = encodedValue & DecodeMasks[0]; - [[unroll]] - for (uint16_t i = 0, shift = 3; i < MaxIterations; i++, shift <<= 1) + using U = make_unsigned_t; + using storage_t = typename morton::code, Bits, D, _uint64_t>::storage_t; + arithmetic_right_shift_operator rightShift; + vector cartesian; + for (uint16_t i = 0; i < D; i++) { - decoded = (decoded | (decoded >> shift)) & DecodeMasks[i + 1]; + cartesian[i] = _static_cast(morton::impl::MortonDecoder::template decode(rightShift(val.value, i))); } - return _static_cast(decoded); + return cartesian; } }; -*/ - -} //namespace impl +} // namespace impl -// Making this even slightly less ugly is blocked by https://github.com/microsoft/DirectXShaderCompiler/issues/7006 -// In particular, `Masks` should be a `const static` member field instead of appearing in every method using it -template && D * Bits <= 64) -struct code -{ - using this_t = code; - NBL_CONSTEXPR_STATIC uint16_t TotalBitWidth = D * Bits; - using storage_t = conditional_t<(TotalBitWidth > 16), conditional_t<(TotalBitWidth > 32), _uint64_t, uint32_t>, uint16_t>; - - - storage_t value; - - // ---------------------------------------------------- CONSTRUCTORS --------------------------------------------------------------- - - #ifndef __HLSL_VERSION - - code() = default; - - #endif -}; - -} //namespace morton } //namespace hlsl } //namespace nbl From 52323bc1f67e58b547c65be11ae9ac9d08e8e4ed Mon Sep 17 00:00:00 2001 From: Fletterio Date: Fri, 4 Apr 2025 23:45:39 -0300 Subject: [PATCH 017/157] Finish the rest of comparison ops and we're done! --- .../nbl/builtin/hlsl/emulated/vector_t.hlsl | 2 + include/nbl/builtin/hlsl/functional.hlsl | 23 +- include/nbl/builtin/hlsl/morton.hlsl | 231 +++++++++++++++++- .../nbl/builtin/hlsl/portable/uint64_t.hlsl | 30 +++ .../nbl/builtin/hlsl/portable/vector_t.hlsl | 18 ++ src/nbl/builtin/CMakeLists.txt | 1 + 6 files changed, 294 insertions(+), 11 deletions(-) create mode 100644 include/nbl/builtin/hlsl/portable/uint64_t.hlsl diff --git a/include/nbl/builtin/hlsl/emulated/vector_t.hlsl b/include/nbl/builtin/hlsl/emulated/vector_t.hlsl index 0053008aa4..a106cec440 100644 --- a/include/nbl/builtin/hlsl/emulated/vector_t.hlsl +++ b/include/nbl/builtin/hlsl/emulated/vector_t.hlsl @@ -2,6 +2,7 @@ #define _NBL_BUILTIN_HLSL_EMULATED_VECTOR_T_HLSL_INCLUDED_ #include +#include #include #include #include @@ -329,6 +330,7 @@ struct emulated_vector : CRTP DEFINE_OPERATORS_FOR_TYPE(emulated_float64_t) DEFINE_OPERATORS_FOR_TYPE(emulated_float64_t) DEFINE_OPERATORS_FOR_TYPE(emulated_float64_t) + DEFINE_OPERATORS_FOR_TYPE(emulated_uint64_t) DEFINE_OPERATORS_FOR_TYPE(float32_t) DEFINE_OPERATORS_FOR_TYPE(float64_t) DEFINE_OPERATORS_FOR_TYPE(uint16_t) diff --git a/include/nbl/builtin/hlsl/functional.hlsl b/include/nbl/builtin/hlsl/functional.hlsl index 3cf24193a4..e5486e2727 100644 --- a/include/nbl/builtin/hlsl/functional.hlsl +++ b/include/nbl/builtin/hlsl/functional.hlsl @@ -195,7 +195,7 @@ struct maximum NBL_CONSTEXPR_STATIC_INLINE T identity = numeric_limits::lowest; // TODO: `all_components` }; -template +template struct ternary_operator { using type_t = T; @@ -206,7 +206,7 @@ struct ternary_operator } }; -template +template struct left_shift_operator { using type_t = T; @@ -217,7 +217,7 @@ struct left_shift_operator } }; -template +template struct arithmetic_right_shift_operator { using type_t = T; @@ -228,9 +228,20 @@ struct arithmetic_right_shift_operator } }; -// Declare template, but left unimplemented by default -template -struct logical_right_shift_operator; +template +struct logical_right_shift_operator +{ + using type_t = T; + using unsigned_type_t = make_unsigned_t; + + NBL_CONSTEXPR_INLINE_FUNC T operator()(NBL_CONST_REF_ARG(T) operand, NBL_CONST_REF_ARG(T) bits) + { + arithmetic_right_shift_operator arithmeticRightShift; + return _static_cast(arithmeticRightShift(_static_cast(operand), _static_cast(bits))); + } +}; + + } //namespace nbl } //namespace hlsl diff --git a/include/nbl/builtin/hlsl/morton.hlsl b/include/nbl/builtin/hlsl/morton.hlsl index 07aa21b821..499deb1db4 100644 --- a/include/nbl/builtin/hlsl/morton.hlsl +++ b/include/nbl/builtin/hlsl/morton.hlsl @@ -266,6 +266,47 @@ struct MortonDecoder<4, Bits, encode_t> } }; +// ---------------------------------------------------- COMPARISON OPERATORS --------------------------------------------------------------- +// Here because no partial specialization of methods + +template +struct Equals; + +template +struct Equals +{ + NBL_CONSTEXPR_INLINE_FUNC vector operator()(NBL_CONST_REF_ARG(storage_t) _value, NBL_CONST_REF_ARG(vector) rhs) + { + NBL_CONSTEXPR_STATIC storage_t Mask = impl::decode_mask_v; + vector retVal; + [[unroll]] + for (uint16_t i = 0; i < D; i++) + { + retVal[i] = (_value & rhs[i]) == rhs[i]; + } + return retVal; + } +}; + +template +struct Equals +{ + template + NBL_CONSTEXPR_INLINE_FUNC enable_if_t&& is_scalar_v && (is_signed_v == Signed), vector > + operator()(NBL_CONST_REF_ARG(storage_t) _value, NBL_CONST_REF_ARG(vector) rhs) + { + using U = make_unsigned_t; + vector interleaved; + [[unroll]] + for (uint16_t i = 0; i < D; i++) + { + interleaved[i] = impl::MortonEncoder::encode(_static_cast(rhs[i])); + } + Equals equals; + return equals(_value, interleaved); + } +}; + } //namespace impl // Making this even slightly less ugly is blocked by https://github.com/microsoft/DirectXShaderCompiler/issues/7006 @@ -274,10 +315,10 @@ template; + using this_signed_t = code; NBL_CONSTEXPR_STATIC uint16_t TotalBitWidth = D * Bits; using storage_t = conditional_t<(TotalBitWidth > 16), conditional_t<(TotalBitWidth > 32), _uint64_t, uint32_t>, uint16_t>; - storage_t value; // ---------------------------------------------------- CONSTRUCTORS --------------------------------------------------------------- @@ -325,26 +366,205 @@ struct code *this = create(cartesian); } - // This one is defined later since it requires `static_cast_helper` specialization - /** * @brief Decodes this Morton code back to a set of cartesian coordinates */ - template - explicit operator vector() const noexcept + constexpr inline explicit operator vector() const noexcept { return _static_cast, morton::code, Bits, D>>(*this); } #endif + + // ------------------------------------------------------- BITWISE OPERATORS ------------------------------------------------- + + NBL_CONSTEXPR_INLINE_FUNC this_t operator&(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC + { + this_t retVal; + retVal.value = value & rhs.value; + return retVal; + } + + NBL_CONSTEXPR_INLINE_FUNC this_t operator|(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC + { + this_t retVal; + retVal.value = value | rhs.value; + return retVal; + } + + NBL_CONSTEXPR_INLINE_FUNC this_t operator^(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC + { + this_t retVal; + retVal.value = value ^ rhs.value; + return retVal; + } + + NBL_CONSTEXPR_INLINE_FUNC this_t operator~() NBL_CONST_MEMBER_FUNC + { + this_t retVal; + retVal.value = ~value; + return retVal; + } + + // Only valid in CPP + #ifndef __HLSL_VERSION + + constexpr inline this_t operator<<(uint16_t bits) const; + + constexpr inline this_t operator>>(uint16_t bits) const; + + #endif + + // ------------------------------------------------------- UNARY ARITHMETIC OPERATORS ------------------------------------------------- + + NBL_CONSTEXPR_INLINE_FUNC this_signed_t operator-() NBL_CONST_MEMBER_FUNC + { + left_shift_operator leftShift; + // allOnes encodes a cartesian coordinate with all values set to 1 + this_t allOnes; + allOnes.value = leftShift(_static_cast(1), D) - _static_cast(1); + // Using 2's complement property that arithmetic negation can be obtained by bitwise negation then adding 1 + this_signed_t retVal; + retVal.value = (operator~() + allOnes).value; + return retVal; + } + + // ------------------------------------------------------- BINARY ARITHMETIC OPERATORS ------------------------------------------------- + + NBL_CONSTEXPR_INLINE_FUNC this_t operator+(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC + { + NBL_CONSTEXPR_STATIC storage_t Mask = impl::decode_mask_v; + left_shift_operator leftShift; + this_t retVal; + retVal.value = _static_cast(uint64_t(0)); + [[unroll]] + for (uint16_t i = 0; i < D; i++) + { + // put 1 bits everywhere in the bits the current axis is not using + // then extract just the axis bits for the right hand coordinate + // carry-1 will propagate the bits across the already set bits + // then clear out the bits not belonging to current axis + // Note: Its possible to clear on `this` and fill on `rhs` but that will + // disable optimizations, we expect the compiler to optimize a lot if the + // value of `rhs` is known at compile time, e.g. `static_cast>(glm::ivec3(1,0,0))` + retVal.value |= ((value | (~leftShift(Mask, i))) + (rhs.value & leftShift(Mask, i))) & leftShift(Mask, i); + } + return retVal; + } + + NBL_CONSTEXPR_INLINE_FUNC this_t operator-(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC + { + NBL_CONSTEXPR_STATIC storage_t Mask = impl::decode_mask_v; + left_shift_operator leftShift; + this_t retVal; + retVal.value = _static_cast(uint64_t(0)); + [[unroll]] + for (uint16_t i = 0; i < D; i++) + { + // This is the dual trick of the one used for addition: set all other bits to 0 so borrows propagate + retVal.value |= ((value & leftShift(Mask, i)) - (rhs.value & leftShift(Mask, i))) & leftShift(Mask, i); + } + return retVal; + } + + // ------------------------------------------------------- COMPARISON OPERATORS ------------------------------------------------- + + NBL_CONSTEXPR_INLINE_FUNC bool operator==(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC + { + return value == rhs.value; + } + + template + enable_if_t<(is_signed_v == Signed) || (is_same_v && BitsAlreadySpread), vector > operator==(NBL_CONST_REF_ARG(vector) rhs) + { + impl::Equals equals; + return equals(value, rhs); + } + + NBL_CONSTEXPR_INLINE_FUNC bool operator!=(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC + { + return value != rhs.value; + } + + template + enable_if_t<(is_signed_v == Signed) || (is_same_v && BitsAlreadySpread), vector > operator!=(NBL_CONST_REF_ARG(vector) rhs) + { + return !operator==(rhs); + } }; } //namespace morton +template +struct left_shift_operator > +{ + using type_t = morton::code; + using storage_t = typename type_t::storage_t; + + NBL_CONSTEXPR_INLINE_FUNC type_t operator()(NBL_CONST_REF_ARG(type_t) operand, uint16_t bits) + { + left_shift_operator valueLeftShift; + type_t retVal; + // Shift every coordinate by `bits` + retVal.value = valueLeftShift(operand.value, bits * D); + return retVal; + } +}; + +template +struct arithmetic_right_shift_operator > +{ + using type_t = morton::code; + using storage_t = typename type_t::storage_t; + + NBL_CONSTEXPR_INLINE_FUNC type_t operator()(NBL_CONST_REF_ARG(type_t) operand, uint16_t bits) + { + arithmetic_right_shift_operator valueArithmeticRightShift; + type_t retVal; + // Shift every coordinate by `bits` + retVal.value = valueArithmeticRightShift(operand.value, bits * D); + return retVal; + } +}; + +// This one's uglier - have to unpack to get the expected behaviour +template +struct arithmetic_right_shift_operator > +{ + using type_t = morton::code; + using scalar_t = conditional_t<(Bits > 16), int32_t, int16_t>; + + NBL_CONSTEXPR_INLINE_FUNC type_t operator()(NBL_CONST_REF_ARG(type_t) operand, uint16_t bits) + { + vector cartesian = _static_cast >(operand); + cartesian >> scalar_t(bits); + return type_t::create(cartesian); + } +}; + +#ifndef __HLSL_VERSION + +template&& D* Bits <= 64) +constexpr inline morton::code morton::code::operator<<(uint16_t bits) const +{ + left_shift_operator> leftShift; + return leftShift(*this, bits); +} + +template&& D* Bits <= 64) +constexpr inline morton::code morton::code::operator>>(uint16_t bits) const +{ + arithmetic_right_shift_operator> rightShift; + return rightShift(*this, bits); +} + +#endif + // Specialize the `static_cast_helper` namespace impl { + // I must be of same signedness as the morton code, and be wide enough to hold each component template NBL_PARTIAL_REQ_TOP(concepts::IntegralScalar && (8 * sizeof(I) >= Bits)) struct static_cast_helper, morton::code, Bits, D, _uint64_t> NBL_PARTIAL_REQ_BOT(concepts::IntegralScalar && (8 * sizeof(I) >= Bits)) > @@ -355,6 +575,7 @@ struct static_cast_helper, morton::code, Bits, D, _u using storage_t = typename morton::code, Bits, D, _uint64_t>::storage_t; arithmetic_right_shift_operator rightShift; vector cartesian; + [[unroll]] for (uint16_t i = 0; i < D; i++) { cartesian[i] = _static_cast(morton::impl::MortonDecoder::template decode(rightShift(val.value, i))); diff --git a/include/nbl/builtin/hlsl/portable/uint64_t.hlsl b/include/nbl/builtin/hlsl/portable/uint64_t.hlsl new file mode 100644 index 0000000000..ac081234ac --- /dev/null +++ b/include/nbl/builtin/hlsl/portable/uint64_t.hlsl @@ -0,0 +1,30 @@ +#ifndef _NBL_BUILTIN_HLSL_PORTABLE_UINT64_T_INCLUDED_ +#define _NBL_BUILTIN_HLSL_PORTABLE_UINT64_T_INCLUDED_ + +#include +#include + +// define NBL_FORCE_EMULATED_UINT_64 to force using emulated uint64 + +namespace nbl +{ +namespace hlsl +{ +template +#ifdef __HLSL_VERSION +#ifdef NBL_FORCE_EMULATED_UINT_64 +using portable_uint64_t = emulated_uint64_t; +#else +using portable_uint64_t = typename conditional::shaderInt64, uint64_t, emulated_uint64_t>::type; +#endif + +#else +using portable_uint64_t = uint64_t; +#endif + +//static_assert(sizeof(portable_uint64_t) == sizeof(uint64_t)); + +} +} + +#endif \ No newline at end of file diff --git a/include/nbl/builtin/hlsl/portable/vector_t.hlsl b/include/nbl/builtin/hlsl/portable/vector_t.hlsl index ace199e20b..dcaea97739 100644 --- a/include/nbl/builtin/hlsl/portable/vector_t.hlsl +++ b/include/nbl/builtin/hlsl/portable/vector_t.hlsl @@ -36,19 +36,37 @@ template using portable_vector_t4 = portable_vector_t; #ifdef __HLSL_VERSION +// Float template using portable_float64_t2 = portable_vector_t2 >; template using portable_float64_t3 = portable_vector_t3 >; template using portable_float64_t4 = portable_vector_t4 >; + +// Uint +template +using portable_uint64_t2 = portable_vector_t2 >; +template +using portable_uint64_t3 = portable_vector_t3 >; +template +using portable_uint64_t4 = portable_vector_t4 >; #else +// Float template using portable_float64_t2 = portable_vector_t2; template using portable_float64_t3 = portable_vector_t3; template using portable_float64_t4 = portable_vector_t4; + +// Uint +template +using portable_uint64_t2 = portable_vector_t2; +template +using portable_uint64_t3 = portable_vector_t3; +template +using portable_uint64_t4 = portable_vector_t4; #endif } diff --git a/src/nbl/builtin/CMakeLists.txt b/src/nbl/builtin/CMakeLists.txt index a11a26d69a..d7005a1ed6 100644 --- a/src/nbl/builtin/CMakeLists.txt +++ b/src/nbl/builtin/CMakeLists.txt @@ -219,6 +219,7 @@ LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/emulated/vector_t.hlsl") LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/emulated/matrix_t.hlsl") # portable LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/portable/float64_t.hlsl") +LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/portable/uint64_t.hlsl") LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/portable/vector_t.hlsl") LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/portable/matrix_t.hlsl") # ieee754 From b6b70030434018a9e70ea4c52c86d48c135cc94e Mon Sep 17 00:00:00 2001 From: Fletterio Date: Mon, 7 Apr 2025 19:41:08 -0300 Subject: [PATCH 018/157] Final Mortons --- .../nbl/builtin/hlsl/emulated/int64_t.hlsl | 488 ++++++++++++++++++ .../nbl/builtin/hlsl/emulated/uint64_t.hlsl | 232 --------- .../nbl/builtin/hlsl/emulated/vector_t.hlsl | 3 +- include/nbl/builtin/hlsl/morton.hlsl | 107 +++- .../nbl/builtin/hlsl/portable/int64_t.hlsl | 31 ++ .../nbl/builtin/hlsl/portable/uint64_t.hlsl | 30 -- .../nbl/builtin/hlsl/portable/vector_t.hlsl | 17 + src/nbl/builtin/CMakeLists.txt | 4 +- 8 files changed, 641 insertions(+), 271 deletions(-) create mode 100644 include/nbl/builtin/hlsl/emulated/int64_t.hlsl delete mode 100644 include/nbl/builtin/hlsl/emulated/uint64_t.hlsl create mode 100644 include/nbl/builtin/hlsl/portable/int64_t.hlsl delete mode 100644 include/nbl/builtin/hlsl/portable/uint64_t.hlsl diff --git a/include/nbl/builtin/hlsl/emulated/int64_t.hlsl b/include/nbl/builtin/hlsl/emulated/int64_t.hlsl new file mode 100644 index 0000000000..f3269cc6ba --- /dev/null +++ b/include/nbl/builtin/hlsl/emulated/int64_t.hlsl @@ -0,0 +1,488 @@ +#ifndef _NBL_BUILTIN_HLSL_EMULATED_INT64_T_HLSL_INCLUDED_ +#define _NBL_BUILTIN_HLSL_EMULATED_INT64_T_HLSL_INCLUDED_ + +#include "nbl/builtin/hlsl/cpp_compat.hlsl" +#include "nbl/builtin/hlsl/functional.hlsl" +#include "nbl/builtin/hlsl/concepts/core.hlsl" + +namespace nbl +{ +namespace hlsl +{ + +struct emulated_uint64_t +{ + using storage_t = vector; + using this_t = emulated_uint64_t; + + storage_t data; + + // ---------------------------------------------------- CONSTRUCTORS --------------------------------------------------------------- + + + #ifndef __HLSL_VERSION + + emulated_uint64_t() = default; + + #endif + + /** + * @brief Creates an `emulated_uint64_t` from a vector of two `uint32_t`s representing its bitpattern + * + * @param [in] _data Vector of `uint32_t` encoding the `uint64_t` being emulated + */ + NBL_CONSTEXPR_STATIC_FUNC this_t create(NBL_CONST_REF_ARG(storage_t) _data) + { + this_t retVal; + retVal.data = _data; + return retVal; + } + + /** + * @brief Creates an `emulated_uint64_t` from two `uint32_t`s representing its bitpattern + * + * @param [in] hi Highest 32 bits of the `uint64` being emulated + * @param [in] lo Lowest 32 bits of the `uint64` being emulated + */ + NBL_CONSTEXPR_STATIC_FUNC this_t create(NBL_CONST_REF_ARG(uint32_t) hi, NBL_CONST_REF_ARG(uint32_t) lo) + { + return create(storage_t(hi, lo)); + } + + /** + * @brief Creates an `emulated_uint64_t` from a `uint64_t`. Useful for compile-time encoding. + * + * @param [in] _data `uint64_t` to be unpacked into high and low bits + */ + NBL_CONSTEXPR_STATIC_FUNC this_t create(NBL_CONST_REF_ARG(uint64_t) u) + { + return create(_static_cast(u >> 32), _static_cast(u)); + } + + // ------------------------------------------------------- BITWISE OPERATORS ------------------------------------------------- + + NBL_CONSTEXPR_INLINE_FUNC this_t operator&(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC + { + this_t retVal = create(data & rhs.data); + return retVal; + } + + NBL_CONSTEXPR_INLINE_FUNC this_t operator|(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC + { + this_t retVal = create(data | rhs.data); + return retVal; + } + + NBL_CONSTEXPR_INLINE_FUNC this_t operator^(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC + { + this_t retVal = create(data ^ rhs.data); + return retVal; + } + + NBL_CONSTEXPR_INLINE_FUNC this_t operator~() NBL_CONST_MEMBER_FUNC + { + this_t retVal = create(~data); + return retVal; + } + + // Only valid in CPP + #ifndef __HLSL_VERSION + + constexpr inline this_t operator<<(uint16_t bits) const; + + constexpr inline this_t operator>>(uint16_t bits) const; + + #endif + + // ------------------------------------------------------- ARITHMETIC OPERATORS ------------------------------------------------- + + NBL_CONSTEXPR_INLINE_FUNC this_t operator+(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC + { + const spirv::AddCarryOutput lowerAddResult = addCarry(data.y, rhs.data.y); + const storage_t addResult = { data.x + rhs.data.x + lowerAddResult.carry, lowerAddResult.result }; + const this_t retVal = create(addResult); + return retVal; + } + + NBL_CONSTEXPR_INLINE_FUNC this_t operator-(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC + { + const spirv::SubBorrowOutput lowerSubResult = subBorrow(data.y, rhs.data.y); + const storage_t subResult = { data.x - rhs.data.x - lowerSubResult.borrow, lowerSubResult.result }; + const this_t retVal = create(subResult); + return retVal; + } + + // ------------------------------------------------------- COMPARISON OPERATORS ------------------------------------------------- + NBL_CONSTEXPR_INLINE_FUNC bool operator==(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC + { + return data.x == rhs.data.x && data.y == rhs.data.y; + } + + NBL_CONSTEXPR_INLINE_FUNC bool operator!=(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC + { + return data.x != rhs.data.x || data.y != rhs.data.y; + } + + NBL_CONSTEXPR_INLINE_FUNC bool operator<(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC + { + if (data.x != rhs.data.x) + return data.x < rhs.data.x; + else + return data.y < rhs.data.y; + } + + NBL_CONSTEXPR_INLINE_FUNC bool operator>(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC + { + if (data.x != rhs.data.x) + return data.x > rhs.data.x; + else + return data.y > rhs.data.y; + } + + NBL_CONSTEXPR_INLINE_FUNC bool operator<=(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC + { + return !operator>(rhs); + } + + NBL_CONSTEXPR_INLINE_FUNC bool operator>=(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC + { + return !operator<(rhs); + } +}; + +struct emulated_int64_t : emulated_uint64_t +{ + using base_t = emulated_uint64_t; + using base_t::storage_t; + using this_t = emulated_int64_t; + + // ---------------------------------------------------- CONSTRUCTORS --------------------------------------------------------------- + + + #ifndef __HLSL_VERSION + + emulated_int64_t() = default; + + #endif + + /** + * @brief Creates an `emulated_int64_t` from a vector of two `uint32_t`s representing its bitpattern + * + * @param [in] _data Vector of `uint32_t` encoding the `int64_t` being emulated + */ + NBL_CONSTEXPR_STATIC_FUNC this_t create(NBL_CONST_REF_ARG(storage_t) _data) + { + return _static_cast(base_t::create(_data)); + } + + /** + * @brief Creates an `emulated_int64_t` from two `uint32_t`s representing its bitpattern + * + * @param [in] hi Highest 32 bits of the `int64` being emulated + * @param [in] lo Lowest 32 bits of the `int64` being emulated + */ + NBL_CONSTEXPR_STATIC_FUNC this_t create(NBL_CONST_REF_ARG(uint32_t) hi, NBL_CONST_REF_ARG(uint32_t) lo) + { + return _static_cast(base_t::create(hi, lo)); + } + + /** + * @brief Creates an `emulated_int64_t` from a `int64_t`. Useful for compile-time encoding. + * + * @param [in] _data `int64_t` to be unpacked into high and low bits + */ + NBL_CONSTEXPR_STATIC_FUNC this_t create(NBL_CONST_REF_ARG(int64_t) i) + { + return _static_cast(base_t::create(_static_cast(i))); + } + + // Only valid in CPP + #ifndef __HLSL_VERSION + + // Only this one needs to be redefined since it's arithmetic + constexpr inline this_t operator>>(uint16_t bits) const; + + #endif + + // ------------------------------------------------------- COMPARISON OPERATORS ------------------------------------------------- + + // Same as unsigned but the topmost bits are compared as signed + NBL_CONSTEXPR_INLINE_FUNC bool operator<(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC + { + if (data.x != rhs.data.x) + return _static_cast(data.x) < _static_cast(rhs.data.x); + else + return data.y < rhs.data.y; + } + + NBL_CONSTEXPR_INLINE_FUNC bool operator>(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC + { + if (data.x != rhs.data.x) + return _static_cast(data.x) > _static_cast(rhs.data.x); + else + return data.y > rhs.data.y; + } + + NBL_CONSTEXPR_INLINE_FUNC bool operator<=(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC + { + return !operator>(rhs); + } + + NBL_CONSTEXPR_INLINE_FUNC bool operator>=(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC + { + return !operator<(rhs); + } +}; + +template<> +struct left_shift_operator +{ + using type_t = emulated_uint64_t; + NBL_CONSTEXPR_STATIC uint32_t ComponentBitWidth = uint32_t(8 * sizeof(uint32_t)); + + NBL_CONSTEXPR_INLINE_FUNC type_t operator()(NBL_CONST_REF_ARG(type_t) operand, uint16_t bits) + { + if (!bits) + return operand; + const uint32_t _bits = uint32_t(bits); + const uint32_t shift = ComponentBitWidth - _bits; + // We need the `x` component of the vector (which represents the higher bits of the emulated uint64) to get the `bits` higher bits of the `y` component + const vector retValData = { (operand.data.x << _bits) | (operand.data.y >> shift), operand.data.y << _bits }; + return emulated_uint64_t::create(retValData); + } +}; + +template<> +struct arithmetic_right_shift_operator +{ + using type_t = emulated_uint64_t; + NBL_CONSTEXPR_STATIC uint32_t ComponentBitWidth = uint32_t(8 * sizeof(uint32_t)); + + NBL_CONSTEXPR_INLINE_FUNC type_t operator()(NBL_CONST_REF_ARG(type_t) operand, uint16_t bits) + { + if (!bits) + return operand; + const uint32_t _bits = uint32_t(bits); + const uint32_t shift = ComponentBitWidth - _bits; + // We need the `y` component of the vector (which represents the lower bits of the emulated uint64) to get the `bits` lower bits of the `x` component + const vector retValData = { operand.data.x >> _bits, (operand.data.x << shift) | (operand.data.y >> _bits) }; + return emulated_uint64_t::create(retValData); + } +}; + +template<> +struct left_shift_operator +{ + using type_t = emulated_int64_t; + + NBL_CONSTEXPR_INLINE_FUNC type_t operator()(NBL_CONST_REF_ARG(type_t) operand, uint16_t bits) + { + left_shift_operator leftShift; + return _static_cast(leftShift(_static_cast(operand), bits)); + } +}; + +template<> +struct arithmetic_right_shift_operator +{ + using type_t = emulated_int64_t; + NBL_CONSTEXPR_STATIC uint32_t ComponentBitWidth = uint32_t(8 * sizeof(uint32_t)); + + NBL_CONSTEXPR_INLINE_FUNC type_t operator()(NBL_CONST_REF_ARG(type_t) operand, uint16_t bits) + { + if (!bits) + return operand; + const uint32_t _bits = uint32_t(bits); + const uint32_t shift = ComponentBitWidth - _bits; + // We need the `y` component of the vector (which represents the lower bits of the emulated uint64) to get the `bits` lower bits of the `x` component + // Also the right shift *only* in the top bits happens as a signed arithmetic right shift + const vector retValData = { _static_cast(_static_cast(operand.data.x)) >> _bits, (operand.data.x << shift) | (operand.data.y >> _bits) }; + return emulated_int64_t::create(retValData); + } +}; + +#ifndef __HLSL_VERSION + +constexpr inline emulated_uint64_t emulated_uint64_t::operator<<(uint16_t bits) const +{ + left_shift_operator leftShift; + return leftShift(*this, bits); +} + +constexpr inline emulated_uint64_t emulated_uint64_t::operator>>(uint16_t bits) const +{ + arithmetic_right_shift_operator rightShift; + return rightShift(*this, bits); +} + +constexpr inline emulated_int64_t emulated_int64_t::operator>>(uint16_t bits) const +{ + arithmetic_right_shift_operator rightShift; + return rightShift(*this, bits); +} + +#endif + +namespace impl +{ + +template<> +struct static_cast_helper +{ + using To = emulated_uint64_t; + using From = emulated_int64_t; + + // Return only the lowest bits + NBL_CONSTEXPR_STATIC_INLINE_FUNC To cast(From i) + { + To retVal; + retVal.data = i.data; + return retVal; + } +}; + +template<> +struct static_cast_helper +{ + using To = emulated_int64_t; + using From = emulated_uint64_t; + + // Return only the lowest bits + NBL_CONSTEXPR_STATIC_INLINE_FUNC To cast(From u) + { + To retVal; + retVal.data = u.data; + return retVal; + } +}; + +template NBL_PARTIAL_REQ_TOP(concepts::UnsignedIntegralScalar && (sizeof(Unsigned) <= sizeof(uint32_t))) +struct static_cast_helper && (sizeof(Unsigned) <= sizeof(uint32_t))) > +{ + using To = Unsigned; + using From = emulated_uint64_t; + + // Return only the lowest bits + NBL_CONSTEXPR_STATIC_INLINE_FUNC To cast(From u) + { + return _static_cast(u.data.y); + } +}; + +template<> +struct static_cast_helper +{ + using To = uint64_t; + using From = emulated_uint64_t; + + NBL_CONSTEXPR_STATIC_INLINE_FUNC To cast(From u) + { + const To highBits = _static_cast(u.data.x) << To(32); + return highBits | _static_cast(u.data.y); + } +}; + +template NBL_PARTIAL_REQ_TOP(concepts::UnsignedIntegralScalar && (sizeof(Unsigned) <= sizeof(uint32_t))) +struct static_cast_helper && (sizeof(Unsigned) <= sizeof(uint32_t))) > +{ + using To = emulated_uint64_t; + using From = Unsigned; + + // Set only lower bits + NBL_CONSTEXPR_STATIC_INLINE_FUNC To cast(From u) + { + return emulated_uint64_t::create(uint32_t(0), _static_cast(u)); + } +}; + +template<> +struct static_cast_helper +{ + using To = emulated_uint64_t; + using From = uint64_t; + + NBL_CONSTEXPR_STATIC_INLINE_FUNC To cast(From u) + { + return emulated_uint64_t::create(u); + } +}; + +template NBL_PARTIAL_REQ_TOP(concepts::SignedIntegralScalar && (sizeof(Signed) <= sizeof(uint32_t))) +struct static_cast_helper && (sizeof(Signed) <= sizeof(uint32_t))) > +{ + using To = Signed; + using From = emulated_int64_t; + + // Return only the lowest bits + NBL_CONSTEXPR_STATIC_INLINE_FUNC To cast(From i) + { + return _static_cast(i.data.y); + } +}; + +template<> +struct static_cast_helper +{ + using To = int64_t; + using From = emulated_int64_t; + + NBL_CONSTEXPR_STATIC_INLINE_FUNC To cast(From i) + { + const To highBits = _static_cast(i.data.x) << To(32); + return highBits | _static_cast(i.data.y); + } +}; + +template NBL_PARTIAL_REQ_TOP(concepts::SignedIntegralScalar && (sizeof(Signed) <= sizeof(uint32_t))) +struct static_cast_helper && (sizeof(Signed) <= sizeof(uint32_t))) > +{ + using To = emulated_int64_t; + using From = Signed; + + // Set only lower bits + NBL_CONSTEXPR_STATIC_INLINE_FUNC To cast(From i) + { + return emulated_int64_t::create(uint32_t(0), _static_cast(i)); + } +}; + +template<> +struct static_cast_helper +{ + using To = emulated_int64_t; + using From = int64_t; + + NBL_CONSTEXPR_STATIC_INLINE_FUNC To cast(From i) + { + return emulated_int64_t::create(i); + } +}; + +} //namespace impl + +} //namespace nbl +} //namespace hlsl + +#ifndef __HLSL_VERSION +#define NBL_ADD_STD std:: +#else +#define NBL_ADD_STD nbl::hlsl:: +#endif + +template<> +struct NBL_ADD_STD make_unsigned : type_identity {}; + +template<> +struct NBL_ADD_STD make_unsigned : type_identity {}; + +template<> +struct NBL_ADD_STD make_signed : type_identity {}; + +template<> +struct NBL_ADD_STD make_signed : type_identity {}; + +#undef NBL_ADD_STD + + + +#endif diff --git a/include/nbl/builtin/hlsl/emulated/uint64_t.hlsl b/include/nbl/builtin/hlsl/emulated/uint64_t.hlsl deleted file mode 100644 index ab08e1ff38..0000000000 --- a/include/nbl/builtin/hlsl/emulated/uint64_t.hlsl +++ /dev/null @@ -1,232 +0,0 @@ -#ifndef _NBL_BUILTIN_HLSL_EMULATED_UINT64_T_HLSL_INCLUDED_ -#define _NBL_BUILTIN_HLSL_EMULATED_UINT64_T_HLSL_INCLUDED_ - -#include "nbl/builtin/hlsl/cpp_compat.hlsl" -#include "nbl/builtin/hlsl/functional.hlsl" -#include "nbl/builtin/hlsl/concepts/core.hlsl" - -namespace nbl -{ -namespace hlsl -{ - -struct emulated_uint64_t -{ - using storage_t = vector; - using this_t = emulated_uint64_t; - - storage_t data; - - // ---------------------------------------------------- CONSTRUCTORS --------------------------------------------------------------- - - - #ifndef __HLSL_VERSION - - emulated_uint64_t() = default; - - #endif - - /** - * @brief Creates an `emulated_uint64_t` from a vector of two `uint32_t`s representing its bitpattern - * - * @param [in] _data Vector of `uint32_t` encoding the `uint64_t` being emulated - */ - NBL_CONSTEXPR_STATIC_FUNC this_t create(NBL_CONST_REF_ARG(storage_t) _data) - { - this_t retVal; - retVal.data = _data; - return retVal; - } - - /** - * @brief Creates an `emulated_uint64_t` from two `uint32_t`s representing its bitpattern - * - * @param [in] hi Highest 32 bits of the `uint64` being emulated - * @param [in] lo Lowest 32 bits of the `uint64` being emulated - */ - NBL_CONSTEXPR_STATIC_FUNC this_t create(NBL_CONST_REF_ARG(uint32_t) hi, NBL_CONST_REF_ARG(uint32_t) lo) - { - return create(storage_t(hi, lo)); - } - - /** - * @brief Creates an `emulated_uint64_t` from a `uint64_t`. Useful for compile-time encoding. - * - * @param [in] _data `uint64_t` to be unpacked into high and low bits - */ - NBL_CONSTEXPR_STATIC_FUNC this_t create(NBL_CONST_REF_ARG(uint64_t) u) - { - return create(_static_cast(u >> 32), _static_cast(u)); - } - - // ------------------------------------------------------- BITWISE OPERATORS ------------------------------------------------- - - NBL_CONSTEXPR_INLINE_FUNC this_t operator&(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC - { - this_t retVal = create(data & rhs.data); - return retVal; - } - - NBL_CONSTEXPR_INLINE_FUNC this_t operator|(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC - { - this_t retVal = create(data | rhs.data); - return retVal; - } - - NBL_CONSTEXPR_INLINE_FUNC this_t operator^(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC - { - this_t retVal = create(data ^ rhs.data); - return retVal; - } - - NBL_CONSTEXPR_INLINE_FUNC this_t operator~() NBL_CONST_MEMBER_FUNC - { - this_t retVal = create(~data); - return retVal; - } - - // Only valid in CPP - #ifndef __HLSL_VERSION - - constexpr inline this_t operator<<(uint16_t bits) const; - - constexpr inline this_t operator>>(uint16_t bits) const; - - #endif - - // ------------------------------------------------------- ARITHMETIC OPERATORS ------------------------------------------------- - - NBL_CONSTEXPR_INLINE_FUNC this_t operator+(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC - { - const spirv::AddCarryOutput lowerAddResult = addCarry(data.y, rhs.data.y); - const storage_t addResult = { data.x + rhs.data.x + lowerAddResult.carry, lowerAddResult.result }; - const this_t retVal = create(addResult); - return retVal; - } - - NBL_CONSTEXPR_INLINE_FUNC this_t operator-(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC - { - const spirv::SubBorrowOutput lowerSubResult = subBorrow(data.y, rhs.data.y); - const storage_t subResult = { data.x - rhs.data.x - lowerSubResult.borrow, lowerSubResult.result }; - const this_t retVal = create(subResult); - return retVal; - } - -}; - -template<> -struct left_shift_operator -{ - using type_t = emulated_uint64_t; - NBL_CONSTEXPR_STATIC uint32_t ComponentBitWidth = uint32_t(8 * sizeof(uint32_t)); - - NBL_CONSTEXPR_INLINE_FUNC type_t operator()(NBL_CONST_REF_ARG(type_t) operand, uint16_t bits) - { - if (!bits) - return operand; - const uint32_t _bits = uint32_t(bits); - const uint32_t shift = ComponentBitWidth - _bits; - const uint32_t higherBitsMask = (~uint32_t(0)) << shift; - // We need the `x` component of the vector (which represents the higher bits of the emulated uint64) to get the `bits` higher bits of the `y` component - const vector retValData = { (operand.data.x << _bits) | ((operand.data.y & higherBitsMask) >> shift), operand.data.y << _bits }; - return emulated_uint64_t::create(retValData); - } -}; - -template<> -struct arithmetic_right_shift_operator -{ - using type_t = emulated_uint64_t; - NBL_CONSTEXPR_STATIC uint32_t ComponentBitWidth = uint32_t(8 * sizeof(uint32_t)); - - NBL_CONSTEXPR_INLINE_FUNC type_t operator()(NBL_CONST_REF_ARG(type_t) operand, uint16_t bits) - { - if (!bits) - return operand; - const uint32_t _bits = uint32_t(bits); - const uint32_t shift = ComponentBitWidth - _bits; - const uint32_t lowerBitsMask = ~uint32_t(0) >> shift; - // We need the `y` component of the vector (which represents the lower bits of the emulated uint64) to get the `bits` lower bits of the `x` component - const vector retValData = { operand.data.x >> _bits, ((operand.data.x & lowerBitsMask) << shift) | (operand.data.y >> _bits) }; - return emulated_uint64_t::create(retValData); - } -}; - -#ifndef __HLSL_VERSION - -constexpr inline emulated_uint64_t emulated_uint64_t::operator<<(uint16_t bits) const -{ - left_shift_operator leftShift; - return leftShift(*this, bits); -} - -constexpr inline emulated_uint64_t emulated_uint64_t::operator>>(uint16_t bits) const -{ - arithmetic_right_shift_operator rightShift; - return rightShift(*this, bits); -} - -#endif - -namespace impl -{ - -template NBL_PARTIAL_REQ_TOP(concepts::UnsignedIntegralScalar && (sizeof(Unsigned) <= sizeof(uint32_t))) -struct static_cast_helper && (sizeof(Unsigned) <= sizeof(uint32_t))) > -{ - using To = Unsigned; - using From = emulated_uint64_t; - - // Return only the lowest bits - NBL_CONSTEXPR_STATIC_INLINE_FUNC To cast(From u) - { - return _static_cast(u.data.y); - } -}; - -template<> -struct static_cast_helper -{ - using To = uint64_t; - using From = emulated_uint64_t; - - NBL_CONSTEXPR_STATIC_INLINE_FUNC To cast(From u) - { - const To highBits = _static_cast(u.data.x) << To(32); - return highBits | _static_cast(u.data.y); - } -}; - -template NBL_PARTIAL_REQ_TOP(concepts::UnsignedIntegralScalar && (sizeof(Unsigned) <= sizeof(uint32_t))) -struct static_cast_helper && (sizeof(Unsigned) <= sizeof(uint32_t))) > -{ - using To = emulated_uint64_t; - using From = Unsigned; - - // Set only lower bits - NBL_CONSTEXPR_STATIC_INLINE_FUNC To cast(From u) - { - return emulated_uint64_t::create(uint32_t(0), _static_cast(u)); - } -}; - -template<> -struct static_cast_helper -{ - using To = emulated_uint64_t; - using From = uint64_t; - - NBL_CONSTEXPR_STATIC_INLINE_FUNC To cast(From u) - { - return emulated_uint64_t::create(u); - } -}; - -} //namespace impl - -} //namespace nbl -} //namespace hlsl - - - -#endif diff --git a/include/nbl/builtin/hlsl/emulated/vector_t.hlsl b/include/nbl/builtin/hlsl/emulated/vector_t.hlsl index a106cec440..65a97bbe68 100644 --- a/include/nbl/builtin/hlsl/emulated/vector_t.hlsl +++ b/include/nbl/builtin/hlsl/emulated/vector_t.hlsl @@ -2,7 +2,7 @@ #define _NBL_BUILTIN_HLSL_EMULATED_VECTOR_T_HLSL_INCLUDED_ #include -#include +#include #include #include #include @@ -331,6 +331,7 @@ struct emulated_vector : CRTP DEFINE_OPERATORS_FOR_TYPE(emulated_float64_t) DEFINE_OPERATORS_FOR_TYPE(emulated_float64_t) DEFINE_OPERATORS_FOR_TYPE(emulated_uint64_t) + DEFINE_OPERATORS_FOR_TYPE(emulated_int64_t) DEFINE_OPERATORS_FOR_TYPE(float32_t) DEFINE_OPERATORS_FOR_TYPE(float64_t) DEFINE_OPERATORS_FOR_TYPE(uint16_t) diff --git a/include/nbl/builtin/hlsl/morton.hlsl b/include/nbl/builtin/hlsl/morton.hlsl index 499deb1db4..9c834424a8 100644 --- a/include/nbl/builtin/hlsl/morton.hlsl +++ b/include/nbl/builtin/hlsl/morton.hlsl @@ -5,7 +5,7 @@ #include "nbl/builtin/hlsl/concepts/core.hlsl" #include "nbl/builtin/hlsl/bit.hlsl" #include "nbl/builtin/hlsl/functional.hlsl" -#include "nbl/builtin/hlsl/emulated/uint64_t.hlsl" +#include "nbl/builtin/hlsl/emulated/int64_t.hlsl" #include "nbl/builtin/hlsl/mpl.hlsl" namespace nbl @@ -275,14 +275,15 @@ struct Equals; template struct Equals { - NBL_CONSTEXPR_INLINE_FUNC vector operator()(NBL_CONST_REF_ARG(storage_t) _value, NBL_CONST_REF_ARG(vector) rhs) + NBL_CONSTEXPR_INLINE_FUNC vector operator()(NBL_CONST_REF_ARG(storage_t) value, NBL_CONST_REF_ARG(vector) rhs) { NBL_CONSTEXPR_STATIC storage_t Mask = impl::decode_mask_v; + left_shift_operator leftShift; vector retVal; [[unroll]] for (uint16_t i = 0; i < D; i++) { - retVal[i] = (_value & rhs[i]) == rhs[i]; + retVal[i] = (value & leftShift(Mask, i)) == leftShift(rhs[i], i); } return retVal; } @@ -293,7 +294,7 @@ struct Equals { template NBL_CONSTEXPR_INLINE_FUNC enable_if_t&& is_scalar_v && (is_signed_v == Signed), vector > - operator()(NBL_CONST_REF_ARG(storage_t) _value, NBL_CONST_REF_ARG(vector) rhs) + operator()(NBL_CONST_REF_ARG(storage_t) value, NBL_CONST_REF_ARG(vector) rhs) { using U = make_unsigned_t; vector interleaved; @@ -303,10 +304,77 @@ struct Equals interleaved[i] = impl::MortonEncoder::encode(_static_cast(rhs[i])); } Equals equals; - return equals(_value, interleaved); + return equals(value, interleaved); + } +}; + +template +struct BaseComparison; + +// Aux method for extracting highest bit, used by the comparison below +template +NBL_CONSTEXPR_INLINE_FUNC storage_t extractHighestBit(storage_t value, uint16_t coord) +{ + // Like above, if the number encoded in `coord` gets `bits(coord) = ceil((BitWidth - coord)/D)` bits for representation, then the highest index of these + // bits is `bits(coord) - 1` + const uint16_t coordHighestBitIdx = Bits / D - ((coord < Bits % D) ? uint16_t(0) : uint16_t(1)); + // This is the index of that bit as an index in the encoded value + const uint16_t shift = coordHighestBitIdx * D + coord; + left_shift_operator leftShift; + return value & leftShift(_static_cast(uint16_t(1)), shift); +} + +template +struct BaseComparison +{ + NBL_CONSTEXPR_INLINE_FUNC vector operator()(NBL_CONST_REF_ARG(storage_t) value, NBL_CONST_REF_ARG(vector) rhs) + { + NBL_CONSTEXPR_STATIC storage_t Mask = impl::decode_mask_v; + left_shift_operator leftShift; + vector retVal; + ComparisonOp comparison; + [[unroll]] + for (uint16_t i = 0; i < D; i++) + { + storage_t thisCoord = value & leftShift(Mask, i); + storage_t rhsCoord = leftShift(rhs[i], i); + // If coordinate is negative, we add 1s in every bit not corresponding to coord + if (extractHighestBit(thisCoord) != _static_cast(uint64_t(0))) + thisCoord = thisCoord | ~leftShift(Mask, i); + if (extractHighestBit(rhsCoord) != _static_cast(uint64_t(0))) + rhsCoord = rhsCoord | ~leftShift(Mask, i); + retVal[i] = comparison(thisCoord, rhsCoord); + } + return retVal; + } +}; + +template +struct BaseComparison +{ + template + NBL_CONSTEXPR_INLINE_FUNC enable_if_t&& is_scalar_v && (is_signed_v == Signed), vector > + operator()(NBL_CONST_REF_ARG(storage_t) value, NBL_CONST_REF_ARG(vector) rhs) + { + using U = make_unsigned_t; + vector interleaved; + [[unroll]] + for (uint16_t i = 0; i < D; i++) + { + interleaved[i] = impl::MortonEncoder::encode(_static_cast(rhs[i])); + } + BaseComparison baseComparison; + return baseComparison(value, interleaved); } }; +template +struct LessThan : BaseComparison > {}; + +template +struct LessEquals : BaseComparison > {}; + + } //namespace impl // Making this even slightly less ugly is blocked by https://github.com/microsoft/DirectXShaderCompiler/issues/7006 @@ -490,8 +558,35 @@ struct code template enable_if_t<(is_signed_v == Signed) || (is_same_v && BitsAlreadySpread), vector > operator!=(NBL_CONST_REF_ARG(vector) rhs) { - return !operator==(rhs); + return !operator== (rhs); + } + + template + enable_if_t<(is_signed_v == Signed) || (is_same_v && BitsAlreadySpread), vector > operator<(NBL_CONST_REF_ARG(vector) rhs) + { + impl::LessThan lessThan; + return lessThan(value, rhs); } + + template + enable_if_t<(is_signed_v == Signed) || (is_same_v && BitsAlreadySpread), vector > operator<=(NBL_CONST_REF_ARG(vector) rhs) + { + impl::LessEquals lessEquals; + return lessEquals(value, rhs); + } + + template + enable_if_t<(is_signed_v == Signed) || (is_same_v && BitsAlreadySpread), vector > operator>(NBL_CONST_REF_ARG(vector) rhs) + { + return !operator<= (rhs); + } + + template + enable_if_t<(is_signed_v == Signed) || (is_same_v && BitsAlreadySpread), vector > operator>=(NBL_CONST_REF_ARG(vector) rhs) + { + return !operator< (rhs); + } + }; } //namespace morton diff --git a/include/nbl/builtin/hlsl/portable/int64_t.hlsl b/include/nbl/builtin/hlsl/portable/int64_t.hlsl new file mode 100644 index 0000000000..6929e160fa --- /dev/null +++ b/include/nbl/builtin/hlsl/portable/int64_t.hlsl @@ -0,0 +1,31 @@ +#ifndef _NBL_BUILTIN_HLSL_PORTABLE_INT64_T_INCLUDED_ +#define _NBL_BUILTIN_HLSL_PORTABLE_INT64_T_INCLUDED_ + +#include +#include + +// define NBL_FORCE_EMULATED_INT_64 to force using emulated int64 types + +namespace nbl +{ +namespace hlsl +{ +template +#ifdef __HLSL_VERSION +#ifdef NBL_FORCE_EMULATED_INT_64 +using portable_uint64_t = emulated_uint64_t; +using portable_int64_t = emulated_int64_t; +#else +using portable_uint64_t = typename conditional::shaderInt64, uint64_t, emulated_uint64_t>::type; +using portable_int64_t = typename conditional::shaderInt64, int64_t, emulated_int64_t>::type; +#endif + +#else +using portable_uint64_t = uint64_t; +using portable_int64_t = int64_t; +#endif + +} +} + +#endif \ No newline at end of file diff --git a/include/nbl/builtin/hlsl/portable/uint64_t.hlsl b/include/nbl/builtin/hlsl/portable/uint64_t.hlsl deleted file mode 100644 index ac081234ac..0000000000 --- a/include/nbl/builtin/hlsl/portable/uint64_t.hlsl +++ /dev/null @@ -1,30 +0,0 @@ -#ifndef _NBL_BUILTIN_HLSL_PORTABLE_UINT64_T_INCLUDED_ -#define _NBL_BUILTIN_HLSL_PORTABLE_UINT64_T_INCLUDED_ - -#include -#include - -// define NBL_FORCE_EMULATED_UINT_64 to force using emulated uint64 - -namespace nbl -{ -namespace hlsl -{ -template -#ifdef __HLSL_VERSION -#ifdef NBL_FORCE_EMULATED_UINT_64 -using portable_uint64_t = emulated_uint64_t; -#else -using portable_uint64_t = typename conditional::shaderInt64, uint64_t, emulated_uint64_t>::type; -#endif - -#else -using portable_uint64_t = uint64_t; -#endif - -//static_assert(sizeof(portable_uint64_t) == sizeof(uint64_t)); - -} -} - -#endif \ No newline at end of file diff --git a/include/nbl/builtin/hlsl/portable/vector_t.hlsl b/include/nbl/builtin/hlsl/portable/vector_t.hlsl index dcaea97739..16d5b40f81 100644 --- a/include/nbl/builtin/hlsl/portable/vector_t.hlsl +++ b/include/nbl/builtin/hlsl/portable/vector_t.hlsl @@ -3,6 +3,7 @@ #include #include +#include namespace nbl { @@ -51,6 +52,14 @@ template using portable_uint64_t3 = portable_vector_t3 >; template using portable_uint64_t4 = portable_vector_t4 >; + +//Int +template +using portable_int64_t2 = portable_vector_t2 >; +template +using portable_int64_t3 = portable_vector_t3 >; +template +using portable_int64_t4 = portable_vector_t4 >; #else // Float template @@ -67,6 +76,14 @@ template using portable_uint64_t3 = portable_vector_t3; template using portable_uint64_t4 = portable_vector_t4; + +// Int +template +using portable_int64_t2 = portable_vector_t2; +template +using portable_int64_t3 = portable_vector_t3; +template +using portable_int64_t4 = portable_vector_t4; #endif } diff --git a/src/nbl/builtin/CMakeLists.txt b/src/nbl/builtin/CMakeLists.txt index d7005a1ed6..f03d8ae22c 100644 --- a/src/nbl/builtin/CMakeLists.txt +++ b/src/nbl/builtin/CMakeLists.txt @@ -214,12 +214,12 @@ LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/macros.h") # emulated LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/emulated/float64_t.hlsl") LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/emulated/float64_t_impl.hlsl") -LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/emulated/uint64_t.hlsl") +LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/emulated/int64_t.hlsl") LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/emulated/vector_t.hlsl") LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/emulated/matrix_t.hlsl") # portable LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/portable/float64_t.hlsl") -LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/portable/uint64_t.hlsl") +LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/portable/int64_t.hlsl") LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/portable/vector_t.hlsl") LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/portable/matrix_t.hlsl") # ieee754 From 60ff99a4dadfdecc5abf59e4fb2d95e62d6ed929 Mon Sep 17 00:00:00 2001 From: Fletterio Date: Mon, 7 Apr 2025 23:20:42 -0300 Subject: [PATCH 019/157] Clean up the emulated int code, fix some constant creation in the morton code --- .../nbl/builtin/hlsl/emulated/int64_t.hlsl | 317 ++++++++---------- include/nbl/builtin/hlsl/morton.hlsl | 15 +- 2 files changed, 161 insertions(+), 171 deletions(-) diff --git a/include/nbl/builtin/hlsl/emulated/int64_t.hlsl b/include/nbl/builtin/hlsl/emulated/int64_t.hlsl index f3269cc6ba..cad10242f2 100644 --- a/include/nbl/builtin/hlsl/emulated/int64_t.hlsl +++ b/include/nbl/builtin/hlsl/emulated/int64_t.hlsl @@ -5,31 +5,35 @@ #include "nbl/builtin/hlsl/functional.hlsl" #include "nbl/builtin/hlsl/concepts/core.hlsl" +// Didn't bother with operator*, operator/, implement if you need them. Multiplication is pretty straightforward, division requires switching on signs +// and whether the topmost bits of the divisor are equal to 0 +// - Francisco + namespace nbl { namespace hlsl { -struct emulated_uint64_t +template +struct emulated_int64_base { - using storage_t = vector; - using this_t = emulated_uint64_t; + using storage_t = vector; + using this_t = emulated_int64_base; - storage_t data; + storage_t data; // ---------------------------------------------------- CONSTRUCTORS --------------------------------------------------------------- - #ifndef __HLSL_VERSION - emulated_uint64_t() = default; + emulated_int64_base() = default; #endif /** - * @brief Creates an `emulated_uint64_t` from a vector of two `uint32_t`s representing its bitpattern + * @brief Creates an `emulated_int64` from a vector of two `uint32_t`s representing its bitpattern * - * @param [in] _data Vector of `uint32_t` encoding the `uint64_t` being emulated + * @param [in] _data Vector of `uint32_t` encoding the `uint64_t/int64_t` being emulated */ NBL_CONSTEXPR_STATIC_FUNC this_t create(NBL_CONST_REF_ARG(storage_t) _data) { @@ -39,10 +43,10 @@ struct emulated_uint64_t } /** - * @brief Creates an `emulated_uint64_t` from two `uint32_t`s representing its bitpattern + * @brief Creates an `emulated_int64` from two `uint32_t`s representing its bitpattern * - * @param [in] hi Highest 32 bits of the `uint64` being emulated - * @param [in] lo Lowest 32 bits of the `uint64` being emulated + * @param [in] hi Highest 32 bits of the `uint64_t/int64_t` being emulated + * @param [in] lo Lowest 32 bits of the `uint64_t/int64_t` being emulated */ NBL_CONSTEXPR_STATIC_FUNC this_t create(NBL_CONST_REF_ARG(uint32_t) hi, NBL_CONST_REF_ARG(uint32_t) lo) { @@ -50,9 +54,9 @@ struct emulated_uint64_t } /** - * @brief Creates an `emulated_uint64_t` from a `uint64_t`. Useful for compile-time encoding. + * @brief Creates an `emulated_int64_base` from a `uint64_t` with its bitpattern. Useful for compile-time encoding. * - * @param [in] _data `uint64_t` to be unpacked into high and low bits + * @param [in] u `uint64_t` to be unpacked into high and low bits */ NBL_CONSTEXPR_STATIC_FUNC this_t create(NBL_CONST_REF_ARG(uint64_t) u) { @@ -126,7 +130,15 @@ struct emulated_uint64_t NBL_CONSTEXPR_INLINE_FUNC bool operator<(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC { if (data.x != rhs.data.x) - return data.x < rhs.data.x; + { + // If signed, compare topmost bits as signed + NBL_IF_CONSTEXPR(Signed) + return _static_cast(data.x) < _static_cast(rhs.data.x); + // If unsigned, compare them as-is + else + return data.x < rhs.data.x; + } + // Lower bits are positive in both signed and unsigned else return data.y < rhs.data.y; } @@ -134,7 +146,14 @@ struct emulated_uint64_t NBL_CONSTEXPR_INLINE_FUNC bool operator>(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC { if (data.x != rhs.data.x) - return data.x > rhs.data.x; + { + // If signed, compare topmost bits as signed + NBL_IF_CONSTEXPR(Signed) + return _static_cast(data.x) > _static_cast(rhs.data.x); + // If unsigned, compare them as-is + else + return data.x > rhs.data.x; + } else return data.y > rhs.data.y; } @@ -150,94 +169,15 @@ struct emulated_uint64_t } }; -struct emulated_int64_t : emulated_uint64_t -{ - using base_t = emulated_uint64_t; - using base_t::storage_t; - using this_t = emulated_int64_t; - - // ---------------------------------------------------- CONSTRUCTORS --------------------------------------------------------------- - +using emulated_uint64_t = emulated_int64_base; +using emulated_int64_t = emulated_int64_base; - #ifndef __HLSL_VERSION - - emulated_int64_t() = default; - - #endif +// ---------------------- Functional operatos ------------------------ - /** - * @brief Creates an `emulated_int64_t` from a vector of two `uint32_t`s representing its bitpattern - * - * @param [in] _data Vector of `uint32_t` encoding the `int64_t` being emulated - */ - NBL_CONSTEXPR_STATIC_FUNC this_t create(NBL_CONST_REF_ARG(storage_t) _data) - { - return _static_cast(base_t::create(_data)); - } - - /** - * @brief Creates an `emulated_int64_t` from two `uint32_t`s representing its bitpattern - * - * @param [in] hi Highest 32 bits of the `int64` being emulated - * @param [in] lo Lowest 32 bits of the `int64` being emulated - */ - NBL_CONSTEXPR_STATIC_FUNC this_t create(NBL_CONST_REF_ARG(uint32_t) hi, NBL_CONST_REF_ARG(uint32_t) lo) - { - return _static_cast(base_t::create(hi, lo)); - } - - /** - * @brief Creates an `emulated_int64_t` from a `int64_t`. Useful for compile-time encoding. - * - * @param [in] _data `int64_t` to be unpacked into high and low bits - */ - NBL_CONSTEXPR_STATIC_FUNC this_t create(NBL_CONST_REF_ARG(int64_t) i) - { - return _static_cast(base_t::create(_static_cast(i))); - } - - // Only valid in CPP - #ifndef __HLSL_VERSION - - // Only this one needs to be redefined since it's arithmetic - constexpr inline this_t operator>>(uint16_t bits) const; - - #endif - - // ------------------------------------------------------- COMPARISON OPERATORS ------------------------------------------------- - - // Same as unsigned but the topmost bits are compared as signed - NBL_CONSTEXPR_INLINE_FUNC bool operator<(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC - { - if (data.x != rhs.data.x) - return _static_cast(data.x) < _static_cast(rhs.data.x); - else - return data.y < rhs.data.y; - } - - NBL_CONSTEXPR_INLINE_FUNC bool operator>(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC - { - if (data.x != rhs.data.x) - return _static_cast(data.x) > _static_cast(rhs.data.x); - else - return data.y > rhs.data.y; - } - - NBL_CONSTEXPR_INLINE_FUNC bool operator<=(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC - { - return !operator>(rhs); - } - - NBL_CONSTEXPR_INLINE_FUNC bool operator>=(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC - { - return !operator<(rhs); - } -}; - -template<> -struct left_shift_operator +template +struct left_shift_operator > { - using type_t = emulated_uint64_t; + using type_t = emulated_int64_base; NBL_CONSTEXPR_STATIC uint32_t ComponentBitWidth = uint32_t(8 * sizeof(uint32_t)); NBL_CONSTEXPR_INLINE_FUNC type_t operator()(NBL_CONST_REF_ARG(type_t) operand, uint16_t bits) @@ -248,7 +188,7 @@ struct left_shift_operator const uint32_t shift = ComponentBitWidth - _bits; // We need the `x` component of the vector (which represents the higher bits of the emulated uint64) to get the `bits` higher bits of the `y` component const vector retValData = { (operand.data.x << _bits) | (operand.data.y >> shift), operand.data.y << _bits }; - return emulated_uint64_t::create(retValData); + return type_t::create(retValData); } }; @@ -270,18 +210,6 @@ struct arithmetic_right_shift_operator } }; -template<> -struct left_shift_operator -{ - using type_t = emulated_int64_t; - - NBL_CONSTEXPR_INLINE_FUNC type_t operator()(NBL_CONST_REF_ARG(type_t) operand, uint16_t bits) - { - left_shift_operator leftShift; - return _static_cast(leftShift(_static_cast(operand), bits)); - } -}; - template<> struct arithmetic_right_shift_operator { @@ -303,7 +231,8 @@ struct arithmetic_right_shift_operator #ifndef __HLSL_VERSION -constexpr inline emulated_uint64_t emulated_uint64_t::operator<<(uint16_t bits) const +template +constexpr inline emulated_int64_base emulated_int64_base::operator<<(uint16_t bits) const { left_shift_operator leftShift; return leftShift(*this, bits); @@ -356,113 +285,163 @@ struct static_cast_helper } }; -template NBL_PARTIAL_REQ_TOP(concepts::UnsignedIntegralScalar && (sizeof(Unsigned) <= sizeof(uint32_t))) -struct static_cast_helper && (sizeof(Unsigned) <= sizeof(uint32_t))) > +template NBL_PARTIAL_REQ_TOP(concepts::IntegralScalar && (sizeof(I) <= sizeof(uint32_t)) && (is_signed_v == Signed)) +struct static_cast_helper NBL_PARTIAL_REQ_BOT(concepts::IntegralScalar && (sizeof(I) <= sizeof(uint32_t))) > { - using To = Unsigned; - using From = emulated_uint64_t; + using To = I; + using From = emulated_int64_base; // Return only the lowest bits - NBL_CONSTEXPR_STATIC_INLINE_FUNC To cast(From u) + NBL_CONSTEXPR_STATIC_INLINE_FUNC To cast(From val) { - return _static_cast(u.data.y); + return _static_cast(val.data.y); } }; -template<> -struct static_cast_helper +template NBL_PARTIAL_REQ_TOP((is_same_v || is_same_v) && (is_signed_v == Signed)) +struct static_cast_helper NBL_PARTIAL_REQ_BOT((is_same_v || is_same_v) && (is_signed_v == Signed)) > { - using To = uint64_t; - using From = emulated_uint64_t; + using To = I; + using From = emulated_int64_base; - NBL_CONSTEXPR_STATIC_INLINE_FUNC To cast(From u) + NBL_CONSTEXPR_STATIC_INLINE_FUNC To cast(From val) { - const To highBits = _static_cast(u.data.x) << To(32); - return highBits | _static_cast(u.data.y); + const To highBits = _static_cast(val.data.x) << To(32); + return highBits | _static_cast(val.data.y); } }; -template NBL_PARTIAL_REQ_TOP(concepts::UnsignedIntegralScalar && (sizeof(Unsigned) <= sizeof(uint32_t))) -struct static_cast_helper && (sizeof(Unsigned) <= sizeof(uint32_t))) > +template NBL_PARTIAL_REQ_TOP(concepts::IntegralScalar && (sizeof(I) <= sizeof(uint32_t)) && (is_signed_v == Signed)) +struct static_cast_helper, I NBL_PARTIAL_REQ_BOT(concepts::IntegralScalar && (sizeof(I) <= sizeof(uint32_t)) && (is_signed_v == Signed)) > { - using To = emulated_uint64_t; - using From = Unsigned; + using To = emulated_int64_base; + using From = I; // Set only lower bits - NBL_CONSTEXPR_STATIC_INLINE_FUNC To cast(From u) + NBL_CONSTEXPR_STATIC_INLINE_FUNC To cast(From i) { - return emulated_uint64_t::create(uint32_t(0), _static_cast(u)); + return To::create(uint32_t(0), _static_cast(i)); } }; -template<> -struct static_cast_helper +template NBL_PARTIAL_REQ_TOP((is_same_v || is_same_v) && (is_signed_v == Signed)) +struct static_cast_helper, I NBL_PARTIAL_REQ_BOT((is_same_v || is_same_v) && (is_signed_v == Signed)) > { - using To = emulated_uint64_t; - using From = uint64_t; + using To = emulated_int64_base; + using From = I; - NBL_CONSTEXPR_STATIC_INLINE_FUNC To cast(From u) + NBL_CONSTEXPR_STATIC_INLINE_FUNC To cast(From i) { - return emulated_uint64_t::create(u); + return To::create(_static_cast(i)); } }; -template NBL_PARTIAL_REQ_TOP(concepts::SignedIntegralScalar && (sizeof(Signed) <= sizeof(uint32_t))) -struct static_cast_helper && (sizeof(Signed) <= sizeof(uint32_t))) > +} //namespace impl + +// ---------------------- STD arithmetic operators ------------------------ +// Specializations of the structs found in functional.hlsl +// These all have to be specialized because of the identity that can't be initialized inside the struct definition + +template +struct plus > { - using To = Signed; - using From = emulated_int64_t; + using type_t = emulated_int64_base; - // Return only the lowest bits - NBL_CONSTEXPR_STATIC_INLINE_FUNC To cast(From i) + type_t operator()(NBL_CONST_REF_ARG(type_t) lhs, NBL_CONST_REF_ARG(type_t) rhs) { - return _static_cast(i.data.y); + return lhs + rhs; } + + #ifndef __HLSL_VERSION + NBL_CONSTEXPR_STATIC_INLINE type_t identity = _static_cast(uint64_t(0)); + #else + NBL_CONSTEXPR_STATIC_INLINE type_t identity; + #endif }; -template<> -struct static_cast_helper +template +struct minus > { - using To = int64_t; - using From = emulated_int64_t; + using type_t = emulated_int64_base; - NBL_CONSTEXPR_STATIC_INLINE_FUNC To cast(From i) + type_t operator()(NBL_CONST_REF_ARG(type_t) lhs, NBL_CONST_REF_ARG(type_t) rhs) { - const To highBits = _static_cast(i.data.x) << To(32); - return highBits | _static_cast(i.data.y); + return lhs - rhs; } + + #ifndef __HLSL_VERSION + NBL_CONSTEXPR_STATIC_INLINE type_t identity = _static_cast(uint64_t(0)); + #else + NBL_CONSTEXPR_STATIC_INLINE type_t identity; + #endif }; -template NBL_PARTIAL_REQ_TOP(concepts::SignedIntegralScalar && (sizeof(Signed) <= sizeof(uint32_t))) -struct static_cast_helper && (sizeof(Signed) <= sizeof(uint32_t))) > -{ - using To = emulated_int64_t; - using From = Signed; +#ifdef __HLSL_VERSION +template<> +NBL_CONSTEXPR emulated_uint64_t plus::identity = _static_cast(uint64_t(0)); +template<> +NBL_CONSTEXPR emulated_int64_t plus::identity = _static_cast(int64_t(0)); +template<> +NBL_CONSTEXPR emulated_uint64_t minus::identity = _static_cast(uint64_t(0)); +template<> +NBL_CONSTEXPR emulated_int64_t minus::identity = _static_cast(int64_t(0)); +#endif - // Set only lower bits - NBL_CONSTEXPR_STATIC_INLINE_FUNC To cast(From i) +// --------------------------------- Compound assignment operators ------------------------------------------ +// Specializations of the structs found in functional.hlsl + +template +struct plus_assign > +{ + using type_t = emulated_int64_base; + using base_t = plus; + base_t baseOp; + void operator()(NBL_REF_ARG(type_t) lhs, NBL_CONST_REF_ARG(type_t) rhs) { - return emulated_int64_t::create(uint32_t(0), _static_cast(i)); + lhs = baseOp(lhs, rhs); } + + #ifndef __HLSL_VERSION + NBL_CONSTEXPR_STATIC_INLINE type_t identity = base_t::identity; + #else + NBL_CONSTEXPR_STATIC_INLINE type_t identity; + #endif }; -template<> -struct static_cast_helper +template +struct minus_assign > { - using To = emulated_int64_t; - using From = int64_t; - - NBL_CONSTEXPR_STATIC_INLINE_FUNC To cast(From i) + using type_t = emulated_int64_base; + using base_t = minus; + base_t baseOp; + void operator()(NBL_REF_ARG(type_t) lhs, NBL_CONST_REF_ARG(type_t) rhs) { - return emulated_int64_t::create(i); + lhs = baseOp(lhs, rhs); } + + #ifndef __HLSL_VERSION + NBL_CONSTEXPR_STATIC_INLINE type_t identity = base_t::identity; + #else + NBL_CONSTEXPR_STATIC_INLINE type_t identity; + #endif }; -} //namespace impl +#ifdef __HLSL_VERSION +template<> +NBL_CONSTEXPR emulated_uint64_t plus_assign::identity = plus::identity; +template<> +NBL_CONSTEXPR emulated_int64_t plus_assign::identity = plus::identity; +template<> +NBL_CONSTEXPR emulated_uint64_t minus_assign::identity = minus::identity; +template<> +NBL_CONSTEXPR emulated_int64_t minus_assign::identity = minus::identity; +#endif } //namespace nbl } //namespace hlsl +// Declare them as signed/unsigned versions of each other + #ifndef __HLSL_VERSION #define NBL_ADD_STD std:: #else diff --git a/include/nbl/builtin/hlsl/morton.hlsl b/include/nbl/builtin/hlsl/morton.hlsl index 9c834424a8..e2ae3d8b0a 100644 --- a/include/nbl/builtin/hlsl/morton.hlsl +++ b/include/nbl/builtin/hlsl/morton.hlsl @@ -39,17 +39,28 @@ NBL_CONSTEXPR T decode_mask_v = decode_mask::value; // --------------------------------------------------------- MORTON ENCODE/DECODE MASKS --------------------------------------------------- // Proper encode masks (either generic `T array[masksPerDImension]` or `morton_mask`) impossible to have until at best HLSL202y +#ifndef __HLSL_VERSION + #define NBL_MORTON_GENERIC_DECODE_MASK(DIM, MASK, HEX_VALUE) template struct morton_mask_##DIM##_##MASK \ {\ NBL_CONSTEXPR_STATIC_INLINE T value = _static_cast(HEX_VALUE);\ }; -#ifndef __HLSL_VERSION - #define NBL_MORTON_EMULATED_DECODE_MASK(DIM, MASK, HEX_VALUE) #else +#define NBL_MORTON_GENERIC_DECODE_MASK(DIM, MASK, HEX_VALUE) template struct morton_mask_##DIM##_##MASK \ +{\ + NBL_CONSTEXPR_STATIC_INLINE T value;\ +};\ +template<>\ +NBL_CONSTEXPR_STATIC_INLINE uint16_t morton_mask_##DIM##_##MASK##::value = _static_cast(HEX_VALUE);\ +template<>\ +NBL_CONSTEXPR_STATIC_INLINE uint32_t morton_mask_##DIM##_##MASK##::value = _static_cast(HEX_VALUE);\ +template<>\ +NBL_CONSTEXPR_STATIC_INLINE uint64_t morton_mask_##DIM##_##MASK##::value = _static_cast(HEX_VALUE);\ + #define NBL_MORTON_EMULATED_DECODE_MASK(DIM, MASK, HEX_VALUE) template<> struct morton_mask_##DIM##_##MASK##\ {\ NBL_CONSTEXPR_STATIC_INLINE emulated_uint64_t value;\ From 55601628733ca20218f0c13d481e0c1df29bed1a Mon Sep 17 00:00:00 2001 From: Fletterio Date: Tue, 8 Apr 2025 19:44:15 -0300 Subject: [PATCH 020/157] Addressing latest PR review. Generic overloads for of different functional structs blocked by https://github.com/microsoft/DirectXShaderCompiler/issues/7325 --- .../nbl/builtin/hlsl/emulated/int64_t.hlsl | 218 +++++++++--------- include/nbl/builtin/hlsl/functional.hlsl | 102 ++++++++ include/nbl/builtin/hlsl/morton.hlsl | 2 + .../nbl/builtin/hlsl/portable/int64_t.hlsl | 7 +- 4 files changed, 218 insertions(+), 111 deletions(-) diff --git a/include/nbl/builtin/hlsl/emulated/int64_t.hlsl b/include/nbl/builtin/hlsl/emulated/int64_t.hlsl index cad10242f2..45cb82ed78 100644 --- a/include/nbl/builtin/hlsl/emulated/int64_t.hlsl +++ b/include/nbl/builtin/hlsl/emulated/int64_t.hlsl @@ -4,6 +4,7 @@ #include "nbl/builtin/hlsl/cpp_compat.hlsl" #include "nbl/builtin/hlsl/functional.hlsl" #include "nbl/builtin/hlsl/concepts/core.hlsl" +#include "nbl/builtin/hlsl/bit.hlsl" // Didn't bother with operator*, operator/, implement if you need them. Multiplication is pretty straightforward, division requires switching on signs // and whether the topmost bits of the divisor are equal to 0 @@ -35,7 +36,7 @@ struct emulated_int64_base * * @param [in] _data Vector of `uint32_t` encoding the `uint64_t/int64_t` being emulated */ - NBL_CONSTEXPR_STATIC_FUNC this_t create(NBL_CONST_REF_ARG(storage_t) _data) + NBL_CONSTEXPR_STATIC_INLINE_FUNC this_t create(NBL_CONST_REF_ARG(storage_t) _data) { this_t retVal; retVal.data = _data; @@ -48,19 +49,9 @@ struct emulated_int64_base * @param [in] hi Highest 32 bits of the `uint64_t/int64_t` being emulated * @param [in] lo Lowest 32 bits of the `uint64_t/int64_t` being emulated */ - NBL_CONSTEXPR_STATIC_FUNC this_t create(NBL_CONST_REF_ARG(uint32_t) hi, NBL_CONST_REF_ARG(uint32_t) lo) + NBL_CONSTEXPR_STATIC_INLINE_FUNC this_t create(NBL_CONST_REF_ARG(uint32_t) lo, NBL_CONST_REF_ARG(uint32_t) hi) { - return create(storage_t(hi, lo)); - } - - /** - * @brief Creates an `emulated_int64_base` from a `uint64_t` with its bitpattern. Useful for compile-time encoding. - * - * @param [in] u `uint64_t` to be unpacked into high and low bits - */ - NBL_CONSTEXPR_STATIC_FUNC this_t create(NBL_CONST_REF_ARG(uint64_t) u) - { - return create(_static_cast(u >> 32), _static_cast(u)); + return create(storage_t(lo, hi)); } // ------------------------------------------------------- BITWISE OPERATORS ------------------------------------------------- @@ -92,9 +83,9 @@ struct emulated_int64_base // Only valid in CPP #ifndef __HLSL_VERSION - constexpr inline this_t operator<<(uint16_t bits) const; + constexpr inline this_t operator<<(this_t bits) const; - constexpr inline this_t operator>>(uint16_t bits) const; + constexpr inline this_t operator>>(this_t bits) const; #endif @@ -102,16 +93,16 @@ struct emulated_int64_base NBL_CONSTEXPR_INLINE_FUNC this_t operator+(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC { - const spirv::AddCarryOutput lowerAddResult = addCarry(data.y, rhs.data.y); - const storage_t addResult = { data.x + rhs.data.x + lowerAddResult.carry, lowerAddResult.result }; + const spirv::AddCarryOutput lowerAddResult = addCarry(data.x, rhs.data.x); + const storage_t addResult = { lowerAddResult.result, data.y + rhs.data.y + lowerAddResult.carry }; const this_t retVal = create(addResult); return retVal; } NBL_CONSTEXPR_INLINE_FUNC this_t operator-(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC { - const spirv::SubBorrowOutput lowerSubResult = subBorrow(data.y, rhs.data.y); - const storage_t subResult = { data.x - rhs.data.x - lowerSubResult.borrow, lowerSubResult.result }; + const spirv::SubBorrowOutput lowerSubResult = subBorrow(data.x, rhs.data.x); + const storage_t subResult = { lowerSubResult.result, data.y - rhs.data.y - lowerSubResult.borrow }; const this_t retVal = create(subResult); return retVal; } @@ -172,86 +163,6 @@ struct emulated_int64_base using emulated_uint64_t = emulated_int64_base; using emulated_int64_t = emulated_int64_base; -// ---------------------- Functional operatos ------------------------ - -template -struct left_shift_operator > -{ - using type_t = emulated_int64_base; - NBL_CONSTEXPR_STATIC uint32_t ComponentBitWidth = uint32_t(8 * sizeof(uint32_t)); - - NBL_CONSTEXPR_INLINE_FUNC type_t operator()(NBL_CONST_REF_ARG(type_t) operand, uint16_t bits) - { - if (!bits) - return operand; - const uint32_t _bits = uint32_t(bits); - const uint32_t shift = ComponentBitWidth - _bits; - // We need the `x` component of the vector (which represents the higher bits of the emulated uint64) to get the `bits` higher bits of the `y` component - const vector retValData = { (operand.data.x << _bits) | (operand.data.y >> shift), operand.data.y << _bits }; - return type_t::create(retValData); - } -}; - -template<> -struct arithmetic_right_shift_operator -{ - using type_t = emulated_uint64_t; - NBL_CONSTEXPR_STATIC uint32_t ComponentBitWidth = uint32_t(8 * sizeof(uint32_t)); - - NBL_CONSTEXPR_INLINE_FUNC type_t operator()(NBL_CONST_REF_ARG(type_t) operand, uint16_t bits) - { - if (!bits) - return operand; - const uint32_t _bits = uint32_t(bits); - const uint32_t shift = ComponentBitWidth - _bits; - // We need the `y` component of the vector (which represents the lower bits of the emulated uint64) to get the `bits` lower bits of the `x` component - const vector retValData = { operand.data.x >> _bits, (operand.data.x << shift) | (operand.data.y >> _bits) }; - return emulated_uint64_t::create(retValData); - } -}; - -template<> -struct arithmetic_right_shift_operator -{ - using type_t = emulated_int64_t; - NBL_CONSTEXPR_STATIC uint32_t ComponentBitWidth = uint32_t(8 * sizeof(uint32_t)); - - NBL_CONSTEXPR_INLINE_FUNC type_t operator()(NBL_CONST_REF_ARG(type_t) operand, uint16_t bits) - { - if (!bits) - return operand; - const uint32_t _bits = uint32_t(bits); - const uint32_t shift = ComponentBitWidth - _bits; - // We need the `y` component of the vector (which represents the lower bits of the emulated uint64) to get the `bits` lower bits of the `x` component - // Also the right shift *only* in the top bits happens as a signed arithmetic right shift - const vector retValData = { _static_cast(_static_cast(operand.data.x)) >> _bits, (operand.data.x << shift) | (operand.data.y >> _bits) }; - return emulated_int64_t::create(retValData); - } -}; - -#ifndef __HLSL_VERSION - -template -constexpr inline emulated_int64_base emulated_int64_base::operator<<(uint16_t bits) const -{ - left_shift_operator leftShift; - return leftShift(*this, bits); -} - -constexpr inline emulated_uint64_t emulated_uint64_t::operator>>(uint16_t bits) const -{ - arithmetic_right_shift_operator rightShift; - return rightShift(*this, bits); -} - -constexpr inline emulated_int64_t emulated_int64_t::operator>>(uint16_t bits) const -{ - arithmetic_right_shift_operator rightShift; - return rightShift(*this, bits); -} - -#endif - namespace impl { @@ -285,7 +196,7 @@ struct static_cast_helper } }; -template NBL_PARTIAL_REQ_TOP(concepts::IntegralScalar && (sizeof(I) <= sizeof(uint32_t)) && (is_signed_v == Signed)) +template NBL_PARTIAL_REQ_TOP(concepts::IntegralScalar && (sizeof(I) <= sizeof(uint32_t))) struct static_cast_helper NBL_PARTIAL_REQ_BOT(concepts::IntegralScalar && (sizeof(I) <= sizeof(uint32_t))) > { using To = I; @@ -294,25 +205,24 @@ struct static_cast_helper NBL_PARTIAL_REQ_BOT(con // Return only the lowest bits NBL_CONSTEXPR_STATIC_INLINE_FUNC To cast(From val) { - return _static_cast(val.data.y); + return _static_cast(val.data.x); } }; -template NBL_PARTIAL_REQ_TOP((is_same_v || is_same_v) && (is_signed_v == Signed)) -struct static_cast_helper NBL_PARTIAL_REQ_BOT((is_same_v || is_same_v) && (is_signed_v == Signed)) > +template NBL_PARTIAL_REQ_TOP(is_same_v || is_same_v) +struct static_cast_helper NBL_PARTIAL_REQ_BOT(is_same_v || is_same_v) > { using To = I; using From = emulated_int64_base; NBL_CONSTEXPR_STATIC_INLINE_FUNC To cast(From val) { - const To highBits = _static_cast(val.data.x) << To(32); - return highBits | _static_cast(val.data.y); + return bit_cast(val.data); } }; -template NBL_PARTIAL_REQ_TOP(concepts::IntegralScalar && (sizeof(I) <= sizeof(uint32_t)) && (is_signed_v == Signed)) -struct static_cast_helper, I NBL_PARTIAL_REQ_BOT(concepts::IntegralScalar && (sizeof(I) <= sizeof(uint32_t)) && (is_signed_v == Signed)) > +template NBL_PARTIAL_REQ_TOP(concepts::IntegralScalar && (sizeof(I) <= sizeof(uint32_t))) +struct static_cast_helper, I NBL_PARTIAL_REQ_BOT(concepts::IntegralScalar && (sizeof(I) <= sizeof(uint32_t))) > { using To = emulated_int64_base; using From = I; @@ -324,20 +234,108 @@ struct static_cast_helper, I NBL_PARTIAL_REQ_BOT(con } }; -template NBL_PARTIAL_REQ_TOP((is_same_v || is_same_v) && (is_signed_v == Signed)) -struct static_cast_helper, I NBL_PARTIAL_REQ_BOT((is_same_v || is_same_v) && (is_signed_v == Signed)) > +template NBL_PARTIAL_REQ_TOP(is_same_v || is_same_v ) +struct static_cast_helper, I NBL_PARTIAL_REQ_BOT(is_same_v || is_same_v) > { using To = emulated_int64_base; using From = I; NBL_CONSTEXPR_STATIC_INLINE_FUNC To cast(From i) { - return To::create(_static_cast(i)); + To retVal; + retVal.data = bit_cast(i); + return retVal; } }; } //namespace impl +// ---------------------- Functional operators ------------------------ + +template +struct left_shift_operator > +{ + using type_t = emulated_int64_base; + NBL_CONSTEXPR_STATIC uint32_t ComponentBitWidth = uint32_t(8 * sizeof(uint32_t)); + + // Can only be defined with `_bits` being of `type_t`, see: + //https://github.com/microsoft/DirectXShaderCompiler/issues/7325 + NBL_CONSTEXPR_INLINE_FUNC type_t operator()(NBL_CONST_REF_ARG(type_t) operand, type_t _bits) + { + const uint32_t bits = _static_cast(_bits); + if (!bits) + return operand; + const uint32_t shift = ComponentBitWidth - bits; + // We need the `x` component of the vector (which represents the higher bits of the emulated uint64) to get the `bits` higher bits of the `y` component + const vector retValData = { (operand.data.x << bits) | (operand.data.y >> shift), operand.data.y << bits }; + return type_t::create(retValData); + } +}; + +template<> +struct arithmetic_right_shift_operator +{ + using type_t = emulated_uint64_t; + NBL_CONSTEXPR_STATIC uint32_t ComponentBitWidth = uint32_t(8 * sizeof(uint32_t)); + + // Can only be defined with `_bits` being of `type_t`, see: + //https://github.com/microsoft/DirectXShaderCompiler/issues/7325 + NBL_CONSTEXPR_INLINE_FUNC type_t operator()(NBL_CONST_REF_ARG(type_t) operand, type_t _bits) + { + const uint32_t bits = _static_cast(_bits); + if (!bits) + return operand; + const uint32_t shift = ComponentBitWidth - bits; + // We need the `y` component of the vector (which represents the lower bits of the emulated uint64) to get the `bits` lower bits of the `x` component + const vector retValData = { operand.data.x >> bits, (operand.data.x << shift) | (operand.data.y >> bits) }; + return emulated_uint64_t::create(retValData); + } +}; + +template<> +struct arithmetic_right_shift_operator +{ + using type_t = emulated_int64_t; + NBL_CONSTEXPR_STATIC uint32_t ComponentBitWidth = uint32_t(8 * sizeof(uint32_t)); + + // Can only be defined with `_bits` being of `type_t`, see: + //https://github.com/microsoft/DirectXShaderCompiler/issues/7325 + NBL_CONSTEXPR_INLINE_FUNC type_t operator()(NBL_CONST_REF_ARG(type_t) operand, type_t _bits) + { + const uint32_t bits = _static_cast(_bits); + if (!bits) + return operand; + const uint32_t shift = ComponentBitWidth - bits; + // We need the `y` component of the vector (which represents the lower bits of the emulated uint64) to get the `bits` lower bits of the `x` component + // Also the right shift *only* in the top bits happens as a signed arithmetic right shift + const vector retValData = { _static_cast(_static_cast(operand.data.x)) >> bits, (operand.data.x << shift) | (operand.data.y >> bits) }; + return emulated_int64_t::create(retValData); + } +}; + +#ifndef __HLSL_VERSION + +template +constexpr inline emulated_int64_base emulated_int64_base::operator<<(this_t bits) const +{ + left_shift_operator leftShift; + return leftShift(*this, bits); +} + +constexpr inline emulated_uint64_t emulated_uint64_t::operator>>(this_t bits) const +{ + arithmetic_right_shift_operator rightShift; + return rightShift(*this, bits); +} + +constexpr inline emulated_int64_t emulated_int64_t::operator>>(this_t bits) const +{ + arithmetic_right_shift_operator rightShift; + return rightShift(*this, bits); +} + +#endif + // ---------------------- STD arithmetic operators ------------------------ // Specializations of the structs found in functional.hlsl // These all have to be specialized because of the identity that can't be initialized inside the struct definition diff --git a/include/nbl/builtin/hlsl/functional.hlsl b/include/nbl/builtin/hlsl/functional.hlsl index e5486e2727..cc95633f44 100644 --- a/include/nbl/builtin/hlsl/functional.hlsl +++ b/include/nbl/builtin/hlsl/functional.hlsl @@ -7,6 +7,7 @@ #include "nbl/builtin/hlsl/glsl_compat/core.hlsl" #include "nbl/builtin/hlsl/limits.hlsl" +#include "nbl/builtin/hlsl/concepts/vector.hlsl" namespace nbl @@ -217,6 +218,56 @@ struct left_shift_operator } }; +template NBL_PARTIAL_REQ_TOP(concepts::IntVector) +struct left_shift_operator) > +{ + using type_t = T; + using scalar_t = scalar_type_t; + + NBL_CONSTEXPR_INLINE_FUNC T operator()(NBL_CONST_REF_ARG(T) operand, NBL_CONST_REF_ARG(T) bits) + { + return operand << bits; + } + + NBL_CONSTEXPR_INLINE_FUNC T operator()(NBL_CONST_REF_ARG(T) operand, NBL_CONST_REF_ARG(scalar_t) bits) + { + return operand << bits; + } +}; + +template NBL_PARTIAL_REQ_TOP(! (concepts::IntVector) && concepts::Vectorial) +struct left_shift_operator) && concepts::Vectorial) > +{ + using type_t = T; + using scalar_t = typename vector_traits::scalar_type; + + NBL_CONSTEXPR_INLINE_FUNC T operator()(NBL_CONST_REF_ARG(T) operand, NBL_CONST_REF_ARG(T) bits) + { + NBL_CONSTEXPR_STATIC_INLINE uint16_t extent = uint16_t(extent_v); + left_shift_operator leftShift; + T shifted; + [[unroll]] + for (uint16_t i = 0; i < extent; i++) + { + shifted.setComponent(i, leftShift(operand.getComponent(i), bits.getComponent(i))); + } + return shifted; + } + + NBL_CONSTEXPR_INLINE_FUNC T operator()(NBL_CONST_REF_ARG(T) operand, NBL_CONST_REF_ARG(scalar_t) bits) + { + NBL_CONSTEXPR_STATIC_INLINE uint16_t extent = uint16_t(extent_v); + left_shift_operator leftShift; + T shifted; + [[unroll]] + for (uint16_t i = 0; i < extent; i++) + { + shifted.setComponent(i, leftShift(operand.getComponent(i), bits)); + } + return shifted; + } +}; + template struct arithmetic_right_shift_operator { @@ -228,6 +279,57 @@ struct arithmetic_right_shift_operator } }; +template NBL_PARTIAL_REQ_TOP(concepts::IntVector) +struct arithmetic_right_shift_operator) > +{ + using type_t = T; + using scalar_t = scalar_type_t; + + NBL_CONSTEXPR_INLINE_FUNC T operator()(NBL_CONST_REF_ARG(T) operand, NBL_CONST_REF_ARG(T) bits) + { + return operand >> bits; + } + + NBL_CONSTEXPR_INLINE_FUNC T operator()(NBL_CONST_REF_ARG(T) operand, NBL_CONST_REF_ARG(scalar_t) bits) + { + return operand >> bits; + } +}; + +template NBL_PARTIAL_REQ_TOP(concepts::Vectorial) +struct arithmetic_right_shift_operator) > +{ + using type_t = T; + using scalar_t = typename vector_traits::scalar_type; + + NBL_CONSTEXPR_INLINE_FUNC T operator()(NBL_CONST_REF_ARG(T) operand, NBL_CONST_REF_ARG(T) bits) + { + NBL_CONSTEXPR_STATIC_INLINE uint16_t extent = uint16_t(extent_v); + arithmetic_right_shift_operator rightShift; + T shifted; + [[unroll]] + for (uint16_t i = 0; i < extent; i++) + { + shifted.setComponent(i, rightShift(operand.getComponent(i), bits.getComponent(i))); + } + return shifted; + } + + NBL_CONSTEXPR_INLINE_FUNC T operator()(NBL_CONST_REF_ARG(T) operand, NBL_CONST_REF_ARG(scalar_t) bits) + { + NBL_CONSTEXPR_STATIC_INLINE uint16_t extent = uint16_t(extent_v); + arithmetic_right_shift_operator rightShift; + T shifted; + [[unroll]] + for (uint16_t i = 0; i < extent; i++) + { + shifted.setComponent(i, rightShift(operand.getComponent(i), bits)); + } + return shifted; + } +}; + +// Left unimplemented for vectorial types by default template struct logical_right_shift_operator { diff --git a/include/nbl/builtin/hlsl/morton.hlsl b/include/nbl/builtin/hlsl/morton.hlsl index e2ae3d8b0a..ea583fddfa 100644 --- a/include/nbl/builtin/hlsl/morton.hlsl +++ b/include/nbl/builtin/hlsl/morton.hlsl @@ -8,6 +8,8 @@ #include "nbl/builtin/hlsl/emulated/int64_t.hlsl" #include "nbl/builtin/hlsl/mpl.hlsl" +// TODO: mega macro to get functional plus, minus, plus_assign, minus_assign + namespace nbl { namespace hlsl diff --git a/include/nbl/builtin/hlsl/portable/int64_t.hlsl b/include/nbl/builtin/hlsl/portable/int64_t.hlsl index 6929e160fa..2dffa40a2d 100644 --- a/include/nbl/builtin/hlsl/portable/int64_t.hlsl +++ b/include/nbl/builtin/hlsl/portable/int64_t.hlsl @@ -10,18 +10,23 @@ namespace nbl { namespace hlsl { -template #ifdef __HLSL_VERSION #ifdef NBL_FORCE_EMULATED_INT_64 +template using portable_uint64_t = emulated_uint64_t; +template using portable_int64_t = emulated_int64_t; #else +template using portable_uint64_t = typename conditional::shaderInt64, uint64_t, emulated_uint64_t>::type; +template using portable_int64_t = typename conditional::shaderInt64, int64_t, emulated_int64_t>::type; #endif #else +template using portable_uint64_t = uint64_t; +template using portable_int64_t = int64_t; #endif From e50c56b52e873da965804153eba64b3cb133c4a3 Mon Sep 17 00:00:00 2001 From: Fletterio Date: Wed, 9 Apr 2025 00:23:55 -0300 Subject: [PATCH 021/157] Bunch of emulated int64 fixes regarding creation, comparison operators and left/right shifts --- .../nbl/builtin/hlsl/emulated/int64_t.hlsl | 95 +++++++++---------- 1 file changed, 44 insertions(+), 51 deletions(-) diff --git a/include/nbl/builtin/hlsl/emulated/int64_t.hlsl b/include/nbl/builtin/hlsl/emulated/int64_t.hlsl index 45cb82ed78..98fcf2835b 100644 --- a/include/nbl/builtin/hlsl/emulated/int64_t.hlsl +++ b/include/nbl/builtin/hlsl/emulated/int64_t.hlsl @@ -34,7 +34,7 @@ struct emulated_int64_base /** * @brief Creates an `emulated_int64` from a vector of two `uint32_t`s representing its bitpattern * - * @param [in] _data Vector of `uint32_t` encoding the `uint64_t/int64_t` being emulated + * @param [in] _data Vector of `uint32_t` encoding the `uint64_t/int64_t` being emulated. Stored as little endian (first component are the lower 32 bits) */ NBL_CONSTEXPR_STATIC_INLINE_FUNC this_t create(NBL_CONST_REF_ARG(storage_t) _data) { @@ -54,6 +54,18 @@ struct emulated_int64_base return create(storage_t(lo, hi)); } + // ------------------------------------------------------- INTERNAL GETTERS ------------------------------------------------- + + NBL_CONSTEXPR_INLINE_FUNC uint32_t __getLSB() NBL_CONST_MEMBER_FUNC + { + return data.x; + } + + NBL_CONSTEXPR_INLINE_FUNC uint32_t __getMSB() NBL_CONST_MEMBER_FUNC + { + return data.y; + } + // ------------------------------------------------------- BITWISE OPERATORS ------------------------------------------------- NBL_CONSTEXPR_INLINE_FUNC this_t operator&(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC @@ -93,60 +105,42 @@ struct emulated_int64_base NBL_CONSTEXPR_INLINE_FUNC this_t operator+(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC { - const spirv::AddCarryOutput lowerAddResult = addCarry(data.x, rhs.data.x); - const storage_t addResult = { lowerAddResult.result, data.y + rhs.data.y + lowerAddResult.carry }; - const this_t retVal = create(addResult); + const spirv::AddCarryOutput lowerAddResult = addCarry(__getLSB(), rhs.__getLSB()); + const this_t retVal = create(lowerAddResult.result, __getMSB() + rhs.__getMSB() + lowerAddResult.carry); return retVal; } NBL_CONSTEXPR_INLINE_FUNC this_t operator-(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC { - const spirv::SubBorrowOutput lowerSubResult = subBorrow(data.x, rhs.data.x); - const storage_t subResult = { lowerSubResult.result, data.y - rhs.data.y - lowerSubResult.borrow }; - const this_t retVal = create(subResult); + const spirv::SubBorrowOutput lowerSubResult = subBorrow(__getLSB(), rhs.__getLSB()); + const this_t retVal = create(lowerSubResult.result, __getMSB() - rhs.__getMSB() - lowerSubResult.borrow); return retVal; } // ------------------------------------------------------- COMPARISON OPERATORS ------------------------------------------------- NBL_CONSTEXPR_INLINE_FUNC bool operator==(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC { - return data.x == rhs.data.x && data.y == rhs.data.y; + return all(data == rhs.data); } NBL_CONSTEXPR_INLINE_FUNC bool operator!=(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC { - return data.x != rhs.data.x || data.y != rhs.data.y; + return any(data != rhs.data); } NBL_CONSTEXPR_INLINE_FUNC bool operator<(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC { - if (data.x != rhs.data.x) - { - // If signed, compare topmost bits as signed - NBL_IF_CONSTEXPR(Signed) - return _static_cast(data.x) < _static_cast(rhs.data.x); - // If unsigned, compare them as-is - else - return data.x < rhs.data.x; - } - // Lower bits are positive in both signed and unsigned - else - return data.y < rhs.data.y; + // Either the topmost bits, when interpreted with correct sign, are less than those of `rhs`, or they're equal and the lower bits are less + // (lower bits are always positive in both unsigned and 2's complement so comparison can happen as-is) + const bool MSB = Signed ? (_static_cast(__getMSB()) < _static_cast(rhs.__getMSB())) : (__getMSB() < rhs.__getMSB()); + return any(vector(MSB, (__getMSB() == rhs.__getMSB()) && (__getLSB() < rhs.__getLSB()))); } NBL_CONSTEXPR_INLINE_FUNC bool operator>(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC { - if (data.x != rhs.data.x) - { - // If signed, compare topmost bits as signed - NBL_IF_CONSTEXPR(Signed) - return _static_cast(data.x) > _static_cast(rhs.data.x); - // If unsigned, compare them as-is - else - return data.x > rhs.data.x; - } - else - return data.y > rhs.data.y; + // Same reasoning as above + const bool MSB = Signed ? (_static_cast(__getMSB()) > _static_cast(rhs.__getMSB())) : (__getMSB() > rhs.__getMSB()); + return any(vector(MSB, (__getMSB() == rhs.__getMSB()) && (__getLSB() > rhs.__getLSB()))); } NBL_CONSTEXPR_INLINE_FUNC bool operator<=(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC @@ -260,15 +254,15 @@ struct left_shift_operator > // Can only be defined with `_bits` being of `type_t`, see: //https://github.com/microsoft/DirectXShaderCompiler/issues/7325 + + // If `_bits > 63` the result is undefined (current impl returns `0` in LSB and the result of `uint32_t(1) << 32` in your architecture in MSB) NBL_CONSTEXPR_INLINE_FUNC type_t operator()(NBL_CONST_REF_ARG(type_t) operand, type_t _bits) { const uint32_t bits = _static_cast(_bits); - if (!bits) - return operand; - const uint32_t shift = ComponentBitWidth - bits; - // We need the `x` component of the vector (which represents the higher bits of the emulated uint64) to get the `bits` higher bits of the `y` component - const vector retValData = { (operand.data.x << bits) | (operand.data.y >> shift), operand.data.y << bits }; - return type_t::create(retValData); + const uint32_t shift = bits >= ComponentBitWidth ? bits - ComponentBitWidth : ComponentBitWidth - bits; + const type_t shifted = type_t::create(bits >= ComponentBitWidth ? vector(0, operand.__getLSB() << shift) + : vector(operand.__getLSB() << bits, (operand.__getMSB() << bits) | (operand.__getLSB() >> shift))); + return bits ? shifted : operand; } }; @@ -280,15 +274,15 @@ struct arithmetic_right_shift_operator // Can only be defined with `_bits` being of `type_t`, see: //https://github.com/microsoft/DirectXShaderCompiler/issues/7325 + + // If `_bits > 63` the result is undefined (current impl returns `0` in MSB and the result of `~uint32_t(0) >> 32` in your architecture in LSB) NBL_CONSTEXPR_INLINE_FUNC type_t operator()(NBL_CONST_REF_ARG(type_t) operand, type_t _bits) { const uint32_t bits = _static_cast(_bits); - if (!bits) - return operand; - const uint32_t shift = ComponentBitWidth - bits; - // We need the `y` component of the vector (which represents the lower bits of the emulated uint64) to get the `bits` lower bits of the `x` component - const vector retValData = { operand.data.x >> bits, (operand.data.x << shift) | (operand.data.y >> bits) }; - return emulated_uint64_t::create(retValData); + const uint32_t shift = bits >= ComponentBitWidth ? bits - ComponentBitWidth : ComponentBitWidth - bits; + const type_t shifted = type_t::create(bits >= ComponentBitWidth ? vector(operand.__getMSB() >> shift, 0) + : vector((operand.__getMSB() << shift) | (operand.__getLSB() >> bits), operand.__getMSB() >> bits)); + return bits ? shifted : operand; } }; @@ -300,16 +294,15 @@ struct arithmetic_right_shift_operator // Can only be defined with `_bits` being of `type_t`, see: //https://github.com/microsoft/DirectXShaderCompiler/issues/7325 + + // If `_bits > 63` the result is undefined (current impl returns `0xFFFFFFFF` in MSB and the result of `~uint32_t(0) >> 32` in your architecture in LSB) NBL_CONSTEXPR_INLINE_FUNC type_t operator()(NBL_CONST_REF_ARG(type_t) operand, type_t _bits) { const uint32_t bits = _static_cast(_bits); - if (!bits) - return operand; - const uint32_t shift = ComponentBitWidth - bits; - // We need the `y` component of the vector (which represents the lower bits of the emulated uint64) to get the `bits` lower bits of the `x` component - // Also the right shift *only* in the top bits happens as a signed arithmetic right shift - const vector retValData = { _static_cast(_static_cast(operand.data.x)) >> bits, (operand.data.x << shift) | (operand.data.y >> bits) }; - return emulated_int64_t::create(retValData); + const uint32_t shift = bits >= ComponentBitWidth ? bits - ComponentBitWidth : ComponentBitWidth - bits; + const type_t shifted = type_t::create(bits >= ComponentBitWidth ? vector(uint32_t(int32_t(operand.__getMSB()) >> bits), ~uint32_t(0)) + : vector((operand.__getMSB() << shift) | (operand.__getLSB() >> bits), uint32_t(int32_t(operand.__getMSB()) >> bits))); + return bits ? shifted : operand; } }; From b1de9c37b2e2572ea13163f241e9fab0a044bb8e Mon Sep 17 00:00:00 2001 From: Fletterio Date: Wed, 9 Apr 2025 16:24:21 -0300 Subject: [PATCH 022/157] Fix automatic specialize macro in cpp compat intrinsics, add intrinsic and generic ternary operator that should work for all compatible types, address PR review comments --- include/nbl/builtin/hlsl/complex.hlsl | 16 -------- .../hlsl/cpp_compat/impl/intrinsics_impl.hlsl | 38 +++++++++++++++++-- .../builtin/hlsl/cpp_compat/intrinsics.hlsl | 6 +++ .../nbl/builtin/hlsl/emulated/int64_t.hlsl | 36 +++++++++++------- include/nbl/builtin/hlsl/functional.hlsl | 21 +++++++++- .../builtin/hlsl/spirv_intrinsics/core.hlsl | 6 +++ 6 files changed, 89 insertions(+), 34 deletions(-) diff --git a/include/nbl/builtin/hlsl/complex.hlsl b/include/nbl/builtin/hlsl/complex.hlsl index 6728a9bf3d..a3a9f387d0 100644 --- a/include/nbl/builtin/hlsl/complex.hlsl +++ b/include/nbl/builtin/hlsl/complex.hlsl @@ -427,22 +427,6 @@ complex_t rotateRight(NBL_CONST_REF_ARG(complex_t) value) return retVal; } -template -struct ternary_operator< complex_t > -{ - using type_t = complex_t; - - complex_t operator()(bool condition, NBL_CONST_REF_ARG(complex_t) lhs, NBL_CONST_REF_ARG(complex_t) rhs) - { - const vector lhsVector = vector(lhs.real(), lhs.imag()); - const vector rhsVector = vector(rhs.real(), rhs.imag()); - const vector resultVector = condition ? lhsVector : rhsVector; - const complex_t result = { resultVector.x, resultVector.y }; - return result; - } -}; - - } } diff --git a/include/nbl/builtin/hlsl/cpp_compat/impl/intrinsics_impl.hlsl b/include/nbl/builtin/hlsl/cpp_compat/impl/intrinsics_impl.hlsl index 92fc9e929b..e1ba823b9b 100644 --- a/include/nbl/builtin/hlsl/cpp_compat/impl/intrinsics_impl.hlsl +++ b/include/nbl/builtin/hlsl/cpp_compat/impl/intrinsics_impl.hlsl @@ -75,6 +75,8 @@ template struct all_helper; template struct any_helper; +template +struct select_helper; template struct bitReverseAs_helper; template @@ -121,8 +123,8 @@ struct subBorrow_helper; // the template<> needs to be written ourselves // return type is __VA_ARGS__ to protect against `,` in templated return types #define AUTO_SPECIALIZE_TRIVIAL_CASE_HELPER(HELPER_NAME, SPIRV_FUNCTION_NAME, ARG_TYPE_LIST, ARG_TYPE_SET, ...)\ -NBL_PARTIAL_REQ_TOP(is_same_v(BOOST_PP_SEQ_FOR_EACH_I(DECLVAL, _, ARG_TYPE_SET))), __VA_ARGS__ >) \ -struct HELPER_NAME(BOOST_PP_SEQ_FOR_EACH_I(DECLVAL, _, ARG_TYPE_SET))), __VA_ARGS__ >) >\ +NBL_PARTIAL_REQ_TOP(is_same_v(BOOST_PP_SEQ_FOR_EACH_I(DECLVAL, _, ARG_TYPE_SET))), __VA_ARGS__ >) \ +struct HELPER_NAME(BOOST_PP_SEQ_FOR_EACH_I(DECLVAL, _, ARG_TYPE_SET))), __VA_ARGS__ >) >\ {\ using return_t = __VA_ARGS__;\ static inline return_t __call( BOOST_PP_SEQ_FOR_EACH_I(DECL_ARG, _, ARG_TYPE_SET) )\ @@ -143,8 +145,9 @@ template AUTO_SPECIALIZE_TRIVIAL_CASE_HELPER(length_helper, length, template AUTO_SPECIALIZE_TRIVIAL_CASE_HELPER(normalize_helper, normalize, (T), (T), T) template AUTO_SPECIALIZE_TRIVIAL_CASE_HELPER(rsqrt_helper, inverseSqrt, (T), (T), T) template AUTO_SPECIALIZE_TRIVIAL_CASE_HELPER(fract_helper, fract, (T), (T), T) -template AUTO_SPECIALIZE_TRIVIAL_CASE_HELPER(all_helper, any, (T), (T), T) +template AUTO_SPECIALIZE_TRIVIAL_CASE_HELPER(all_helper, all, (T), (T), T) template AUTO_SPECIALIZE_TRIVIAL_CASE_HELPER(any_helper, any, (T), (T), T) +template AUTO_SPECIALIZE_TRIVIAL_CASE_HELPER(select_helper, select, (B)(T), (B)(T)(T), T) template AUTO_SPECIALIZE_TRIVIAL_CASE_HELPER(sign_helper, fSign, (T), (T), T) template AUTO_SPECIALIZE_TRIVIAL_CASE_HELPER(sign_helper, sSign, (T), (T), T) template AUTO_SPECIALIZE_TRIVIAL_CASE_HELPER(radians_helper, radians, (T), (T), T) @@ -633,6 +636,35 @@ struct subBorrow_helper } }; +template +NBL_PARTIAL_REQ_TOP(concepts::BooleanScalar) +struct select_helper) > +{ + NBL_CONSTEXPR_STATIC_INLINE_FUNC T __call(NBL_CONST_REF_ARG(B) condition, NBL_CONST_REF_ARG(T) object1, NBL_CONST_REF_ARG(T) object2) + { + return condition ? object1 : object2; + } +}; + +template +NBL_PARTIAL_REQ_TOP(concepts::Boolean&& concepts::Vector&& concepts::Vector && (extent_v == extent_v)) +struct select_helper&& concepts::Vector&& concepts::Vector && (extent_v == extent_v)) > +{ + NBL_CONSTEXPR_STATIC_INLINE_FUNC T __call(NBL_CONST_REF_ARG(B) condition, NBL_CONST_REF_ARG(T) object1, NBL_CONST_REF_ARG(T) object2) + { + using traits = hlsl::vector_traits; + array_get conditionGetter; + array_get objectGetter; + array_set setter; + + T selected; + for (uint32_t i = 0; i < traits::Dimension; ++i) + setter(selected, i, conditionGetter(condition, i) ? objectGetter(object1, i) : objectGetter(object2, i)); + + return selected; + } +}; + #endif // C++ only specializations // C++ and HLSL specializations diff --git a/include/nbl/builtin/hlsl/cpp_compat/intrinsics.hlsl b/include/nbl/builtin/hlsl/cpp_compat/intrinsics.hlsl index 1f1957dbbd..284ba564d7 100644 --- a/include/nbl/builtin/hlsl/cpp_compat/intrinsics.hlsl +++ b/include/nbl/builtin/hlsl/cpp_compat/intrinsics.hlsl @@ -150,6 +150,12 @@ inline bool any(Vector vec) return cpp_compat_intrinsics_impl::any_helper::__call(vec); } +template +NBL_CONSTEXPR_INLINE_FUNC ResultType select(Condition condition, ResultType object1, ResultType object2) +{ + return cpp_compat_intrinsics_impl::select_helper::__call(condition, object1, object2); +} + /** * @brief Returns x - floor(x). * diff --git a/include/nbl/builtin/hlsl/emulated/int64_t.hlsl b/include/nbl/builtin/hlsl/emulated/int64_t.hlsl index 98fcf2835b..53881423e9 100644 --- a/include/nbl/builtin/hlsl/emulated/int64_t.hlsl +++ b/include/nbl/builtin/hlsl/emulated/int64_t.hlsl @@ -132,15 +132,19 @@ struct emulated_int64_base { // Either the topmost bits, when interpreted with correct sign, are less than those of `rhs`, or they're equal and the lower bits are less // (lower bits are always positive in both unsigned and 2's complement so comparison can happen as-is) + const bool MSBEqual = __getMSB() == rhs.__getMSB(); const bool MSB = Signed ? (_static_cast(__getMSB()) < _static_cast(rhs.__getMSB())) : (__getMSB() < rhs.__getMSB()); - return any(vector(MSB, (__getMSB() == rhs.__getMSB()) && (__getLSB() < rhs.__getLSB()))); + const bool LSB = __getLSB() < rhs.__getLSB(); + return MSBEqual ? LSB : MSB; } NBL_CONSTEXPR_INLINE_FUNC bool operator>(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC { // Same reasoning as above + const bool MSBEqual = __getMSB() == rhs.__getMSB(); const bool MSB = Signed ? (_static_cast(__getMSB()) > _static_cast(rhs.__getMSB())) : (__getMSB() > rhs.__getMSB()); - return any(vector(MSB, (__getMSB() == rhs.__getMSB()) && (__getLSB() > rhs.__getLSB()))); + const bool LSB = __getLSB() > rhs.__getLSB(); + return MSBEqual ? LSB : MSB; } NBL_CONSTEXPR_INLINE_FUNC bool operator<=(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC @@ -259,10 +263,12 @@ struct left_shift_operator > NBL_CONSTEXPR_INLINE_FUNC type_t operator()(NBL_CONST_REF_ARG(type_t) operand, type_t _bits) { const uint32_t bits = _static_cast(_bits); - const uint32_t shift = bits >= ComponentBitWidth ? bits - ComponentBitWidth : ComponentBitWidth - bits; - const type_t shifted = type_t::create(bits >= ComponentBitWidth ? vector(0, operand.__getLSB() << shift) - : vector(operand.__getLSB() << bits, (operand.__getMSB() << bits) | (operand.__getLSB() >> shift))); - return bits ? shifted : operand; + const bool bigShift = bits >= ComponentBitWidth; // Shift that completely rewrites LSB + const uint32_t shift = bigShift ? bits - ComponentBitWidth : ComponentBitWidth - bits; + const type_t shifted = type_t::create(bigShift ? vector(0, operand.__getLSB() << shift) + : vector(operand.__getLSB() << bits, (operand.__getMSB() << bits) | (operand.__getLSB() >> shift))); + ternary_operator ternary; + return ternary(bool(bits), shifted, operand); } }; @@ -279,10 +285,12 @@ struct arithmetic_right_shift_operator NBL_CONSTEXPR_INLINE_FUNC type_t operator()(NBL_CONST_REF_ARG(type_t) operand, type_t _bits) { const uint32_t bits = _static_cast(_bits); - const uint32_t shift = bits >= ComponentBitWidth ? bits - ComponentBitWidth : ComponentBitWidth - bits; - const type_t shifted = type_t::create(bits >= ComponentBitWidth ? vector(operand.__getMSB() >> shift, 0) - : vector((operand.__getMSB() << shift) | (operand.__getLSB() >> bits), operand.__getMSB() >> bits)); - return bits ? shifted : operand; + const bool bigShift = bits >= ComponentBitWidth; // Shift that completely rewrites MSB + const uint32_t shift = bigShift ? bits - ComponentBitWidth : ComponentBitWidth - bits; + const type_t shifted = type_t::create(bigShift ? vector(operand.__getMSB() >> shift, 0) + : vector((operand.__getMSB() << shift) | (operand.__getLSB() >> bits), operand.__getMSB() >> bits)); + ternary_operator ternary; + return ternary(bool(bits), shifted, operand); } }; @@ -299,10 +307,12 @@ struct arithmetic_right_shift_operator NBL_CONSTEXPR_INLINE_FUNC type_t operator()(NBL_CONST_REF_ARG(type_t) operand, type_t _bits) { const uint32_t bits = _static_cast(_bits); - const uint32_t shift = bits >= ComponentBitWidth ? bits - ComponentBitWidth : ComponentBitWidth - bits; - const type_t shifted = type_t::create(bits >= ComponentBitWidth ? vector(uint32_t(int32_t(operand.__getMSB()) >> bits), ~uint32_t(0)) + const bool bigShift = bits >= ComponentBitWidth; // Shift that completely rewrites MSB + const uint32_t shift = bigShift ? bits - ComponentBitWidth : ComponentBitWidth - bits; + const type_t shifted = type_t::create(bigShift ? vector(uint32_t(int32_t(operand.__getMSB()) >> shift), ~uint32_t(0)) : vector((operand.__getMSB() << shift) | (operand.__getLSB() >> bits), uint32_t(int32_t(operand.__getMSB()) >> bits))); - return bits ? shifted : operand; + ternary_operator ternary; + return ternary(bool(bits), shifted, operand); } }; diff --git a/include/nbl/builtin/hlsl/functional.hlsl b/include/nbl/builtin/hlsl/functional.hlsl index cc95633f44..51ee4f4829 100644 --- a/include/nbl/builtin/hlsl/functional.hlsl +++ b/include/nbl/builtin/hlsl/functional.hlsl @@ -201,9 +201,26 @@ struct ternary_operator { using type_t = T; - NBL_CONSTEXPR_INLINE_FUNC T operator()(bool condition, NBL_CONST_REF_ARG(T) lhs, NBL_CONST_REF_ARG(T) rhs) + NBL_CONSTEXPR_INLINE_FUNC T operator()(NBL_CONST_REF_ARG(bool) condition, NBL_CONST_REF_ARG(T) lhs, NBL_CONST_REF_ARG(T) rhs) { - return condition ? lhs : rhs; + return select(condition, lhs, rhs); + } +}; + +template +struct ternary_operator > +{ + using type_t = T; + using traits = hlsl::vector_traits; + + NBL_CONSTEXPR_INLINE_FUNC T operator()(NBL_CONST_REF_ARG(bool) condition, NBL_CONST_REF_ARG(T) lhs, NBL_CONST_REF_ARG(T) rhs) + { + return select(condition, lhs, rhs); + } + + NBL_CONSTEXPR_INLINE_FUNC T operator()(NBL_CONST_REF_ARG(vector) condition, NBL_CONST_REF_ARG(T) lhs, NBL_CONST_REF_ARG(T) rhs) + { + return select, T>(condition, lhs, rhs); } }; diff --git a/include/nbl/builtin/hlsl/spirv_intrinsics/core.hlsl b/include/nbl/builtin/hlsl/spirv_intrinsics/core.hlsl index d8d90de726..8add7a9ed3 100644 --- a/include/nbl/builtin/hlsl/spirv_intrinsics/core.hlsl +++ b/include/nbl/builtin/hlsl/spirv_intrinsics/core.hlsl @@ -12,6 +12,7 @@ #include #include #include +#include namespace nbl { @@ -335,6 +336,11 @@ template [[vk::ext_instruction(spv::OpAny)]] enable_if_t&& is_same_v::scalar_type, bool>, BooleanVector> any(BooleanVector vec); +// If Condition is a vector, ResultType must be a vector with the same number of components. Using (p -> q) = (~p v q) +template && (! concepts::Vector || (concepts::Vector && (extent_v == extent_v)))) +[[vk::ext_instruction(spv::OpSelect)]] +ResultType select(Condition condition, ResultType object1, ResultType object2); + template) [[vk::ext_instruction(spv::OpIAddCarry)]] AddCarryOutput addCarry(T operand1, T operand2); From ea8cd43756146225058dcfbc1ddf4d254b0fd579 Mon Sep 17 00:00:00 2001 From: Fletterio Date: Fri, 11 Apr 2025 12:39:16 -0300 Subject: [PATCH 023/157] Checkpoint: adding a bunch of operators to emulated vector types --- include/nbl/builtin/hlsl/concepts/core.hlsl | 10 + include/nbl/builtin/hlsl/concepts/vector.hlsl | 2 + include/nbl/builtin/hlsl/cpp_compat/basic.h | 2 + .../nbl/builtin/hlsl/emulated/int64_t.hlsl | 108 ++++--- .../nbl/builtin/hlsl/emulated/vector_t.hlsl | 4 +- include/nbl/builtin/hlsl/functional.hlsl | 101 ++++-- include/nbl/builtin/hlsl/morton.hlsl | 290 +++++------------- include/nbl/builtin/hlsl/type_traits.hlsl | 2 + 8 files changed, 236 insertions(+), 283 deletions(-) diff --git a/include/nbl/builtin/hlsl/concepts/core.hlsl b/include/nbl/builtin/hlsl/concepts/core.hlsl index dcbafae8a5..4a8b848cb8 100644 --- a/include/nbl/builtin/hlsl/concepts/core.hlsl +++ b/include/nbl/builtin/hlsl/concepts/core.hlsl @@ -74,12 +74,22 @@ struct is_emulating_floating_point_scalar { NBL_CONSTEXPR_STATIC_INLINE bool value = FloatingPointScalar; }; + +template +struct is_emulating_integral_scalar +{ + NBL_CONSTEXPR_STATIC_INLINE bool value = IntegralScalar; +}; } //! Floating point types are native floating point types or types that imitate native floating point types (for example emulated_float64_t) template NBL_BOOL_CONCEPT FloatingPointLikeScalar = impl::is_emulating_floating_point_scalar::value; +//! Integral-like types are native integral types or types that imitate native integral types (for example emulated_uint64_t) +template +NBL_BOOL_CONCEPT IntegralLikeScalar = impl::is_emulating_integral_scalar::value; + } } } diff --git a/include/nbl/builtin/hlsl/concepts/vector.hlsl b/include/nbl/builtin/hlsl/concepts/vector.hlsl index 468838730a..3ea3199951 100644 --- a/include/nbl/builtin/hlsl/concepts/vector.hlsl +++ b/include/nbl/builtin/hlsl/concepts/vector.hlsl @@ -40,6 +40,8 @@ NBL_BOOL_CONCEPT FloatingPointLikeVectorial = concepts::Vectorial && concepts template NBL_BOOL_CONCEPT IntVectorial = concepts::Vectorial && (is_integral_v::scalar_type>); template +NBL_BOOL_CONCEPT IntegralLikeVectorial = concepts::Vectorial && concepts::IntegralLikeScalar::scalar_type>; +template NBL_BOOL_CONCEPT SignedIntVectorial = concepts::Vectorial && concepts::SignedIntegralScalar::scalar_type>; } diff --git a/include/nbl/builtin/hlsl/cpp_compat/basic.h b/include/nbl/builtin/hlsl/cpp_compat/basic.h index 77d9d887bd..81bdf32c19 100644 --- a/include/nbl/builtin/hlsl/cpp_compat/basic.h +++ b/include/nbl/builtin/hlsl/cpp_compat/basic.h @@ -11,6 +11,7 @@ #define NBL_CONSTEXPR constexpr // TODO: rename to NBL_CONSTEXPR_VAR #define NBL_CONSTEXPR_FUNC constexpr #define NBL_CONSTEXPR_STATIC constexpr static +#define NBL_CONSTEXPR_INLINE constexpr inline #define NBL_CONSTEXPR_STATIC_INLINE constexpr static inline #define NBL_CONSTEXPR_STATIC_FUNC constexpr static #define NBL_CONSTEXPR_INLINE_FUNC constexpr inline @@ -45,6 +46,7 @@ namespace nbl::hlsl #define NBL_CONSTEXPR const static // TODO: rename to NBL_CONSTEXPR_VAR #define NBL_CONSTEXPR_FUNC #define NBL_CONSTEXPR_STATIC const static +#define NBL_CONSTEXPR_INLINE const static #define NBL_CONSTEXPR_STATIC_INLINE const static #define NBL_CONSTEXPR_STATIC_FUNC static #define NBL_CONSTEXPR_INLINE_FUNC inline diff --git a/include/nbl/builtin/hlsl/emulated/int64_t.hlsl b/include/nbl/builtin/hlsl/emulated/int64_t.hlsl index 53881423e9..ca51b0060a 100644 --- a/include/nbl/builtin/hlsl/emulated/int64_t.hlsl +++ b/include/nbl/builtin/hlsl/emulated/int64_t.hlsl @@ -94,10 +94,8 @@ struct emulated_int64_base // Only valid in CPP #ifndef __HLSL_VERSION - - constexpr inline this_t operator<<(this_t bits) const; - - constexpr inline this_t operator>>(this_t bits) const; + constexpr inline this_t operator<<(uint32_t bits) const; + constexpr inline this_t operator>>(uint32_t bits) const; #endif @@ -256,13 +254,12 @@ struct left_shift_operator > using type_t = emulated_int64_base; NBL_CONSTEXPR_STATIC uint32_t ComponentBitWidth = uint32_t(8 * sizeof(uint32_t)); - // Can only be defined with `_bits` being of `type_t`, see: + // Can't do generic templated definition, see: //https://github.com/microsoft/DirectXShaderCompiler/issues/7325 - // If `_bits > 63` the result is undefined (current impl returns `0` in LSB and the result of `uint32_t(1) << 32` in your architecture in MSB) - NBL_CONSTEXPR_INLINE_FUNC type_t operator()(NBL_CONST_REF_ARG(type_t) operand, type_t _bits) + // If `_bits > 63` or `_bits < 0` the result is undefined + NBL_CONSTEXPR_INLINE_FUNC type_t operator()(NBL_CONST_REF_ARG(type_t) operand, uint32_t bits) { - const uint32_t bits = _static_cast(_bits); const bool bigShift = bits >= ComponentBitWidth; // Shift that completely rewrites LSB const uint32_t shift = bigShift ? bits - ComponentBitWidth : ComponentBitWidth - bits; const type_t shifted = type_t::create(bigShift ? vector(0, operand.__getLSB() << shift) @@ -270,6 +267,12 @@ struct left_shift_operator > ternary_operator ternary; return ternary(bool(bits), shifted, operand); } + + // If `_bits > 63` or `_bits < 0` the result is undefined + NBL_CONSTEXPR_INLINE_FUNC type_t operator()(NBL_CONST_REF_ARG(type_t) operand, type_t bits) + { + return operator()(operand, _static_cast(bits)); + } }; template<> @@ -278,13 +281,12 @@ struct arithmetic_right_shift_operator using type_t = emulated_uint64_t; NBL_CONSTEXPR_STATIC uint32_t ComponentBitWidth = uint32_t(8 * sizeof(uint32_t)); - // Can only be defined with `_bits` being of `type_t`, see: + // Can't do generic templated definition, see: //https://github.com/microsoft/DirectXShaderCompiler/issues/7325 - // If `_bits > 63` the result is undefined (current impl returns `0` in MSB and the result of `~uint32_t(0) >> 32` in your architecture in LSB) - NBL_CONSTEXPR_INLINE_FUNC type_t operator()(NBL_CONST_REF_ARG(type_t) operand, type_t _bits) + // If `_bits > 63` the result is undefined + NBL_CONSTEXPR_INLINE_FUNC type_t operator()(NBL_CONST_REF_ARG(type_t) operand, uint32_t bits) { - const uint32_t bits = _static_cast(_bits); const bool bigShift = bits >= ComponentBitWidth; // Shift that completely rewrites MSB const uint32_t shift = bigShift ? bits - ComponentBitWidth : ComponentBitWidth - bits; const type_t shifted = type_t::create(bigShift ? vector(operand.__getMSB() >> shift, 0) @@ -292,6 +294,12 @@ struct arithmetic_right_shift_operator ternary_operator ternary; return ternary(bool(bits), shifted, operand); } + + // If `_bits > 63` the result is undefined + NBL_CONSTEXPR_INLINE_FUNC type_t operator()(NBL_CONST_REF_ARG(type_t) operand, type_t bits) + { + return operator()(operand, _static_cast(bits)); + } }; template<> @@ -300,13 +308,12 @@ struct arithmetic_right_shift_operator using type_t = emulated_int64_t; NBL_CONSTEXPR_STATIC uint32_t ComponentBitWidth = uint32_t(8 * sizeof(uint32_t)); - // Can only be defined with `_bits` being of `type_t`, see: + // Can't do generic templated definition, see: //https://github.com/microsoft/DirectXShaderCompiler/issues/7325 - // If `_bits > 63` the result is undefined (current impl returns `0xFFFFFFFF` in MSB and the result of `~uint32_t(0) >> 32` in your architecture in LSB) - NBL_CONSTEXPR_INLINE_FUNC type_t operator()(NBL_CONST_REF_ARG(type_t) operand, type_t _bits) + // If `_bits > 63` or `_bits < 0` the result is undefined + NBL_CONSTEXPR_INLINE_FUNC type_t operator()(NBL_CONST_REF_ARG(type_t) operand, uint32_t bits) { - const uint32_t bits = _static_cast(_bits); const bool bigShift = bits >= ComponentBitWidth; // Shift that completely rewrites MSB const uint32_t shift = bigShift ? bits - ComponentBitWidth : ComponentBitWidth - bits; const type_t shifted = type_t::create(bigShift ? vector(uint32_t(int32_t(operand.__getMSB()) >> shift), ~uint32_t(0)) @@ -314,24 +321,30 @@ struct arithmetic_right_shift_operator ternary_operator ternary; return ternary(bool(bits), shifted, operand); } + + // If `_bits > 63` or `_bits < 0` the result is undefined + NBL_CONSTEXPR_INLINE_FUNC type_t operator()(NBL_CONST_REF_ARG(type_t) operand, type_t bits) + { + return operator()(operand, _static_cast(bits)); + } }; #ifndef __HLSL_VERSION template -constexpr inline emulated_int64_base emulated_int64_base::operator<<(this_t bits) const +constexpr inline emulated_int64_base emulated_int64_base::operator<<(uint32_t bits) const { left_shift_operator leftShift; return leftShift(*this, bits); } -constexpr inline emulated_uint64_t emulated_uint64_t::operator>>(this_t bits) const +constexpr inline emulated_uint64_t emulated_uint64_t::operator>>(uint32_t bits) const { arithmetic_right_shift_operator rightShift; return rightShift(*this, bits); } -constexpr inline emulated_int64_t emulated_int64_t::operator>>(this_t bits) const +constexpr inline emulated_int64_t emulated_int64_t::operator>>(uint32_t bits) const { arithmetic_right_shift_operator rightShift; return rightShift(*this, bits); @@ -353,11 +366,7 @@ struct plus > return lhs + rhs; } - #ifndef __HLSL_VERSION - NBL_CONSTEXPR_STATIC_INLINE type_t identity = _static_cast(uint64_t(0)); - #else - NBL_CONSTEXPR_STATIC_INLINE type_t identity; - #endif + const static type_t identity; }; template @@ -370,23 +379,17 @@ struct minus > return lhs - rhs; } - #ifndef __HLSL_VERSION - NBL_CONSTEXPR_STATIC_INLINE type_t identity = _static_cast(uint64_t(0)); - #else - NBL_CONSTEXPR_STATIC_INLINE type_t identity; - #endif + const static type_t identity; }; -#ifdef __HLSL_VERSION template<> -NBL_CONSTEXPR emulated_uint64_t plus::identity = _static_cast(uint64_t(0)); +NBL_CONSTEXPR_INLINE emulated_uint64_t plus::identity = _static_cast(uint64_t(0)); template<> -NBL_CONSTEXPR emulated_int64_t plus::identity = _static_cast(int64_t(0)); +NBL_CONSTEXPR_INLINE emulated_int64_t plus::identity = _static_cast(int64_t(0)); template<> -NBL_CONSTEXPR emulated_uint64_t minus::identity = _static_cast(uint64_t(0)); +NBL_CONSTEXPR_INLINE emulated_uint64_t minus::identity = _static_cast(uint64_t(0)); template<> -NBL_CONSTEXPR emulated_int64_t minus::identity = _static_cast(int64_t(0)); -#endif +NBL_CONSTEXPR_INLINE emulated_int64_t minus::identity = _static_cast(int64_t(0)); // --------------------------------- Compound assignment operators ------------------------------------------ // Specializations of the structs found in functional.hlsl @@ -402,11 +405,7 @@ struct plus_assign > lhs = baseOp(lhs, rhs); } - #ifndef __HLSL_VERSION - NBL_CONSTEXPR_STATIC_INLINE type_t identity = base_t::identity; - #else - NBL_CONSTEXPR_STATIC_INLINE type_t identity; - #endif + const static type_t identity; }; template @@ -420,23 +419,30 @@ struct minus_assign > lhs = baseOp(lhs, rhs); } - #ifndef __HLSL_VERSION - NBL_CONSTEXPR_STATIC_INLINE type_t identity = base_t::identity; - #else - NBL_CONSTEXPR_STATIC_INLINE type_t identity; - #endif + const static type_t identity; }; -#ifdef __HLSL_VERSION template<> -NBL_CONSTEXPR emulated_uint64_t plus_assign::identity = plus::identity; +NBL_CONSTEXPR_INLINE emulated_uint64_t plus_assign::identity = plus::identity; template<> -NBL_CONSTEXPR emulated_int64_t plus_assign::identity = plus::identity; +NBL_CONSTEXPR_INLINE emulated_int64_t plus_assign::identity = plus::identity; template<> -NBL_CONSTEXPR emulated_uint64_t minus_assign::identity = minus::identity; +NBL_CONSTEXPR_INLINE emulated_uint64_t minus_assign::identity = minus::identity; template<> -NBL_CONSTEXPR emulated_int64_t minus_assign::identity = minus::identity; -#endif +NBL_CONSTEXPR_INLINE emulated_int64_t minus_assign::identity = minus::identity; + +// --------------------------------------------------- CONCEPTS SATISFIED ----------------------------------------------------- +namespace concepts +{ +namespace impl +{ +template +struct is_emulating_integral_scalar > +{ + NBL_CONSTEXPR_STATIC_INLINE bool value = true; +}; +} +} } //namespace nbl } //namespace hlsl diff --git a/include/nbl/builtin/hlsl/emulated/vector_t.hlsl b/include/nbl/builtin/hlsl/emulated/vector_t.hlsl index 65a97bbe68..4d7c3839d9 100644 --- a/include/nbl/builtin/hlsl/emulated/vector_t.hlsl +++ b/include/nbl/builtin/hlsl/emulated/vector_t.hlsl @@ -330,8 +330,8 @@ struct emulated_vector : CRTP DEFINE_OPERATORS_FOR_TYPE(emulated_float64_t) DEFINE_OPERATORS_FOR_TYPE(emulated_float64_t) DEFINE_OPERATORS_FOR_TYPE(emulated_float64_t) - DEFINE_OPERATORS_FOR_TYPE(emulated_uint64_t) - DEFINE_OPERATORS_FOR_TYPE(emulated_int64_t) + //DEFINE_OPERATORS_FOR_TYPE(emulated_uint64_t) + //DEFINE_OPERATORS_FOR_TYPE(emulated_int64_t) DEFINE_OPERATORS_FOR_TYPE(float32_t) DEFINE_OPERATORS_FOR_TYPE(float64_t) DEFINE_OPERATORS_FOR_TYPE(uint16_t) diff --git a/include/nbl/builtin/hlsl/functional.hlsl b/include/nbl/builtin/hlsl/functional.hlsl index 51ee4f4829..93687bdb6a 100644 --- a/include/nbl/builtin/hlsl/functional.hlsl +++ b/include/nbl/builtin/hlsl/functional.hlsl @@ -207,23 +207,6 @@ struct ternary_operator } }; -template -struct ternary_operator > -{ - using type_t = T; - using traits = hlsl::vector_traits; - - NBL_CONSTEXPR_INLINE_FUNC T operator()(NBL_CONST_REF_ARG(bool) condition, NBL_CONST_REF_ARG(T) lhs, NBL_CONST_REF_ARG(T) rhs) - { - return select(condition, lhs, rhs); - } - - NBL_CONSTEXPR_INLINE_FUNC T operator()(NBL_CONST_REF_ARG(vector) condition, NBL_CONST_REF_ARG(T) lhs, NBL_CONST_REF_ARG(T) rhs) - { - return select, T>(condition, lhs, rhs); - } -}; - template struct left_shift_operator { @@ -252,34 +235,68 @@ struct left_shift_operator) > } }; -template NBL_PARTIAL_REQ_TOP(! (concepts::IntVector) && concepts::Vectorial) -struct left_shift_operator) && concepts::Vectorial) > +template NBL_PARTIAL_REQ_TOP(!concepts::Vector && concepts::IntegralLikeVectorial) +struct left_shift_operator && concepts::IntegralLikeVectorial) > { using type_t = T; using scalar_t = typename vector_traits::scalar_type; NBL_CONSTEXPR_INLINE_FUNC T operator()(NBL_CONST_REF_ARG(T) operand, NBL_CONST_REF_ARG(T) bits) { + array_get getter; + array_set setter; NBL_CONSTEXPR_STATIC_INLINE uint16_t extent = uint16_t(extent_v); left_shift_operator leftShift; T shifted; [[unroll]] for (uint16_t i = 0; i < extent; i++) { - shifted.setComponent(i, leftShift(operand.getComponent(i), bits.getComponent(i))); + setter(shifted, i, leftShift(getter(operand, i), getter(bits, i))); } return shifted; } NBL_CONSTEXPR_INLINE_FUNC T operator()(NBL_CONST_REF_ARG(T) operand, NBL_CONST_REF_ARG(scalar_t) bits) { + array_get getter; + array_set setter; + NBL_CONSTEXPR_STATIC_INLINE uint16_t extent = uint16_t(extent_v); + left_shift_operator leftShift; + T shifted; + [[unroll]] + for (uint16_t i = 0; i < extent; i++) + { + setter(shifted, i, leftShift(getter(operand, i), bits)); + } + return shifted; + } + + NBL_CONSTEXPR_INLINE_FUNC T operator()(NBL_CONST_REF_ARG(T) operand, NBL_CONST_REF_ARG(vector::Dimension>) bits) + { + array_get getter; + array_set setter; + NBL_CONSTEXPR_STATIC_INLINE uint16_t extent = uint16_t(extent_v); + left_shift_operator leftShift; + T shifted; + [[unroll]] + for (uint16_t i = 0; i < extent; i++) + { + setter(shifted, i, leftShift(getter(operand, i), bits[i])); + } + return shifted; + } + + NBL_CONSTEXPR_INLINE_FUNC T operator()(NBL_CONST_REF_ARG(T) operand, NBL_CONST_REF_ARG(uint32_t) bits) + { + array_get getter; + array_set setter; NBL_CONSTEXPR_STATIC_INLINE uint16_t extent = uint16_t(extent_v); left_shift_operator leftShift; T shifted; [[unroll]] for (uint16_t i = 0; i < extent; i++) { - shifted.setComponent(i, leftShift(operand.getComponent(i), bits)); + setter(shifted, i, leftShift(getter(operand, i), bits)); } return shifted; } @@ -313,34 +330,68 @@ struct arithmetic_right_shift_operator NBL_PARTIAL_REQ_TOP(concepts::Vectorial) -struct arithmetic_right_shift_operator) > +template NBL_PARTIAL_REQ_TOP(!concepts::Vector&& concepts::IntegralLikeVectorial) +struct arithmetic_right_shift_operator&& concepts::IntegralLikeVectorial) > { using type_t = T; using scalar_t = typename vector_traits::scalar_type; NBL_CONSTEXPR_INLINE_FUNC T operator()(NBL_CONST_REF_ARG(T) operand, NBL_CONST_REF_ARG(T) bits) { + array_get getter; + array_set setter; NBL_CONSTEXPR_STATIC_INLINE uint16_t extent = uint16_t(extent_v); arithmetic_right_shift_operator rightShift; T shifted; [[unroll]] for (uint16_t i = 0; i < extent; i++) { - shifted.setComponent(i, rightShift(operand.getComponent(i), bits.getComponent(i))); + setter(shifted, i, rightShift(getter(operand, i), getter(bits, i))); } return shifted; } NBL_CONSTEXPR_INLINE_FUNC T operator()(NBL_CONST_REF_ARG(T) operand, NBL_CONST_REF_ARG(scalar_t) bits) { + array_get getter; + array_set setter; + NBL_CONSTEXPR_STATIC_INLINE uint16_t extent = uint16_t(extent_v); + arithmetic_right_shift_operator rightShift; + T shifted; + [[unroll]] + for (uint16_t i = 0; i < extent; i++) + { + setter(shifted, i, rightShift(getter(operand, i), bits)); + } + return shifted; + } + + NBL_CONSTEXPR_INLINE_FUNC T operator()(NBL_CONST_REF_ARG(T) operand, NBL_CONST_REF_ARG(vector::Dimension>) bits) + { + array_get getter; + array_set setter; + NBL_CONSTEXPR_STATIC_INLINE uint16_t extent = uint16_t(extent_v); + arithmetic_right_shift_operator rightShift; + T shifted; + [[unroll]] + for (uint16_t i = 0; i < extent; i++) + { + setter(shifted, i, rightShift(getter(operand, i), bits[i])); + } + return shifted; + } + + NBL_CONSTEXPR_INLINE_FUNC T operator()(NBL_CONST_REF_ARG(T) operand, NBL_CONST_REF_ARG(uint32_t) bits) + { + array_get getter; + array_set setter; NBL_CONSTEXPR_STATIC_INLINE uint16_t extent = uint16_t(extent_v); arithmetic_right_shift_operator rightShift; T shifted; [[unroll]] for (uint16_t i = 0; i < extent; i++) { - shifted.setComponent(i, rightShift(operand.getComponent(i), bits)); + setter(shifted, i, rightShift(getter(operand, i), bits)); } return shifted; } diff --git a/include/nbl/builtin/hlsl/morton.hlsl b/include/nbl/builtin/hlsl/morton.hlsl index ea583fddfa..9e62e40c2a 100644 --- a/include/nbl/builtin/hlsl/morton.hlsl +++ b/include/nbl/builtin/hlsl/morton.hlsl @@ -7,6 +7,7 @@ #include "nbl/builtin/hlsl/functional.hlsl" #include "nbl/builtin/hlsl/emulated/int64_t.hlsl" #include "nbl/builtin/hlsl/mpl.hlsl" +#include "nbl/builtin/hlsl/portable/vector_t.hlsl" // TODO: mega macro to get functional plus, minus, plus_assign, minus_assign @@ -22,90 +23,67 @@ namespace impl // Valid dimension for a morton code template -NBL_BOOL_CONCEPT MortonDimension = 1 < D && D < 5; +NBL_BOOL_CONCEPT Dimension = 1 < D && D < 5; -// Basic decode masks - -template -struct decode_mask; +// --------------------------------------------------------- MORTON ENCODE/DECODE MASKS --------------------------------------------------- -template -struct decode_mask : integral_constant {}; +NBL_CONSTEXPR uint16_t CodingStages = 5; -template -struct decode_mask : integral_constant::value << Dim) | T(1)> {}; +template +struct coding_mask; -template -NBL_CONSTEXPR T decode_mask_v = decode_mask::value; +template +NBL_CONSTEXPR uint64_t coding_mask_v = coding_mask::value; -// --------------------------------------------------------- MORTON ENCODE/DECODE MASKS --------------------------------------------------- -// Proper encode masks (either generic `T array[masksPerDImension]` or `morton_mask`) impossible to have until at best HLSL202y +// 0th stage will be special: to avoid masking twice during encode/decode, and to get a proper mask that only gets the relevant bits out of a morton code, the 0th stage +// mask also considers the total number of bits we're cnsidering for a code (all other masks operate on a bit-agnostic basis). +#define NBL_HLSL_MORTON_SPECIALIZE_FIRST_CODING_MASK(DIM, BASE_VALUE) template struct coding_mask\ +{\ + enum : uint64_t { _Bits = Bits };\ + NBL_CONSTEXPR_STATIC_INLINE uint64_t KilloffMask = _Bits * DIM < 64 ? (uint64_t(1) << (_Bits * DIM)) - 1 : ~uint64_t(0);\ + NBL_CONSTEXPR_STATIC_INLINE uint64_t value = uint64_t(BASE_VALUE) & KilloffMask;\ +}; -#ifndef __HLSL_VERSION +#define NBL_HLSL_MORTON_SPECIALIZE_CODING_MASK(DIM, STAGE, BASE_VALUE) template struct coding_mask\ +{\ + NBL_CONSTEXPR_STATIC_INLINE uint64_t value = uint64_t(BASE_VALUE);\ +}; -#define NBL_MORTON_GENERIC_DECODE_MASK(DIM, MASK, HEX_VALUE) template struct morton_mask_##DIM##_##MASK \ +// Final stage mask also counts exact number of bits, although maybe it's not necessary +#define NBL_HLSL_MORTON_SPECIALIZE_LAST_CODING_MASKS template struct coding_mask\ {\ - NBL_CONSTEXPR_STATIC_INLINE T value = _static_cast(HEX_VALUE);\ + enum : uint64_t { _Bits = Bits };\ + NBL_CONSTEXPR_STATIC_INLINE uint64_t value = (uint64_t(1) << _Bits) - 1;\ }; -#define NBL_MORTON_EMULATED_DECODE_MASK(DIM, MASK, HEX_VALUE) +NBL_HLSL_MORTON_SPECIALIZE_FIRST_CODING_MASK(2, 0x5555555555555555) // Groups bits by 1 on, 1 off +NBL_HLSL_MORTON_SPECIALIZE_CODING_MASK(2, 1, uint64_t(0x3333333333333333)) // Groups bits by 2 on, 2 off +NBL_HLSL_MORTON_SPECIALIZE_CODING_MASK(2, 2, uint64_t(0x0F0F0F0F0F0F0F0F)) // Groups bits by 4 on, 4 off +NBL_HLSL_MORTON_SPECIALIZE_CODING_MASK(2, 3, uint64_t(0x00FF00FF00FF00FF)) // Groups bits by 8 on, 8 off +NBL_HLSL_MORTON_SPECIALIZE_CODING_MASK(2, 4, uint64_t(0x0000FFFF0000FFFF)) // Groups bits by 16 on, 16 off -#else +NBL_HLSL_MORTON_SPECIALIZE_FIRST_CODING_MASK(3, 0x9249249249249249) // Groups bits by 1 on, 2 off +NBL_HLSL_MORTON_SPECIALIZE_CODING_MASK(3, 1, uint64_t(0x30C30C30C30C30C3)) // Groups bits by 2 on, 4 off +NBL_HLSL_MORTON_SPECIALIZE_CODING_MASK(3, 2, uint64_t(0xF00F00F00F00F00F)) // Groups bits by 4 on, 8 off +NBL_HLSL_MORTON_SPECIALIZE_CODING_MASK(3, 3, uint64_t(0x00FF0000FF0000FF)) // Groups bits by 8 on, 16 off +NBL_HLSL_MORTON_SPECIALIZE_CODING_MASK(3, 4, uint64_t(0xFFFF00000000FFFF)) // Groups bits by 16 on, 32 off -#define NBL_MORTON_GENERIC_DECODE_MASK(DIM, MASK, HEX_VALUE) template struct morton_mask_##DIM##_##MASK \ -{\ - NBL_CONSTEXPR_STATIC_INLINE T value;\ -};\ -template<>\ -NBL_CONSTEXPR_STATIC_INLINE uint16_t morton_mask_##DIM##_##MASK##::value = _static_cast(HEX_VALUE);\ -template<>\ -NBL_CONSTEXPR_STATIC_INLINE uint32_t morton_mask_##DIM##_##MASK##::value = _static_cast(HEX_VALUE);\ -template<>\ -NBL_CONSTEXPR_STATIC_INLINE uint64_t morton_mask_##DIM##_##MASK##::value = _static_cast(HEX_VALUE);\ - -#define NBL_MORTON_EMULATED_DECODE_MASK(DIM, MASK, HEX_VALUE) template<> struct morton_mask_##DIM##_##MASK##\ -{\ - NBL_CONSTEXPR_STATIC_INLINE emulated_uint64_t value;\ -};\ -NBL_CONSTEXPR_STATIC_INLINE emulated_uint64_t morton_mask_##DIM##_##MASK##::value = _static_cast(HEX_VALUE); -#endif +NBL_HLSL_MORTON_SPECIALIZE_FIRST_CODING_MASK(4, 0x1111111111111111) // Groups bits by 1 on, 3 off +NBL_HLSL_MORTON_SPECIALIZE_CODING_MASK(4, 1, uint64_t(0x0303030303030303)) // Groups bits by 2 on, 6 off +NBL_HLSL_MORTON_SPECIALIZE_CODING_MASK(4, 2, uint64_t(0x000F000F000F000F)) // Groups bits by 4 on, 12 off +NBL_HLSL_MORTON_SPECIALIZE_CODING_MASK(4, 3, uint64_t(0x000000FF000000FF)) // Groups bits by 8 on, 24 off +NBL_HLSL_MORTON_SPECIALIZE_CODING_MASK(4, 4, uint64_t(0x000000000000FFFF)) // Groups bits by 16 on, 48 off (unused but here for completion + likely keeps compiler from complaining) -#define NBL_MORTON_DECODE_MASK(DIM, MASK, HEX_VALUE) template struct morton_mask_##DIM##_##MASK ;\ - NBL_MORTON_EMULATED_DECODE_MASK(DIM, MASK, HEX_VALUE)\ - NBL_MORTON_GENERIC_DECODE_MASK(DIM, MASK, HEX_VALUE)\ - template\ - NBL_CONSTEXPR T morton_mask_##DIM##_##MASK##_v = morton_mask_##DIM##_##MASK##::value; - -NBL_MORTON_DECODE_MASK(2, 0, uint64_t(0x5555555555555555)) // Groups bits by 1 on, 1 off -NBL_MORTON_DECODE_MASK(2, 1, uint64_t(0x3333333333333333)) // Groups bits by 2 on, 2 off -NBL_MORTON_DECODE_MASK(2, 2, uint64_t(0x0F0F0F0F0F0F0F0F)) // Groups bits by 4 on, 4 off -NBL_MORTON_DECODE_MASK(2, 3, uint64_t(0x00FF00FF00FF00FF)) // Groups bits by 8 on, 8 off -NBL_MORTON_DECODE_MASK(2, 4, uint64_t(0x0000FFFF0000FFFF)) // Groups bits by 16 on, 16 off -NBL_MORTON_DECODE_MASK(2, 5, uint64_t(0x00000000FFFFFFFF)) // Groups bits by 32 on, 32 off - -NBL_MORTON_DECODE_MASK(3, 0, uint64_t(0x1249249249249249)) // Groups bits by 1 on, 2 off - also limits each dimension to 21 bits -NBL_MORTON_DECODE_MASK(3, 1, uint64_t(0x01C0E070381C0E07)) // Groups bits by 3 on, 6 off -NBL_MORTON_DECODE_MASK(3, 2, uint64_t(0x0FC003F000FC003F)) // Groups bits by 6 on, 12 off -NBL_MORTON_DECODE_MASK(3, 3, uint64_t(0x0000FFF000000FFF)) // Groups bits by 12 on, 24 off -NBL_MORTON_DECODE_MASK(3, 4, uint64_t(0x0000000000FFFFFF)) // Groups bits by 24 on, 48 off - -NBL_MORTON_DECODE_MASK(4, 0, uint64_t(0x1111111111111111)) // Groups bits by 1 on, 3 off -NBL_MORTON_DECODE_MASK(4, 1, uint64_t(0x0303030303030303)) // Groups bits by 2 on, 6 off -NBL_MORTON_DECODE_MASK(4, 2, uint64_t(0x000F000F000F000F)) // Groups bits by 4 on, 12 off -NBL_MORTON_DECODE_MASK(4, 3, uint64_t(0x000000FF000000FF)) // Groups bits by 8 on, 24 off -NBL_MORTON_DECODE_MASK(4, 4, uint64_t(0x000000000000FFFF)) // Groups bits by 16 on, 48 off - -#undef NBL_MORTON_DECODE_MASK -#undef NBL_MORTON_EMULATED_DECODE_MASK -#undef NBL_MORTON_GENERIC_DECODE_MASK - -// ----------------------------------------------------------------- MORTON ENCODERS --------------------------------------------------- - -template -struct MortonEncoder; - -template -struct MortonEncoder<2, Bits, encode_t> +NBL_HLSL_MORTON_SPECIALIZE_LAST_CODING_MASKS + +#undef NBL_HLSL_MORTON_SPECIALIZE_LAST_CODING_MASK +#undef NBL_HLSL_MORTON_SPECIALIZE_CODING_MASK +#undef NBL_HLSL_MORTON_SPECIALIZE_FIRST_CODING_MASK + +// ----------------------------------------------------------------- MORTON ENCODER --------------------------------------------------- + +template && (Dim * Bits <= 64) && (sizeof(encode_t) * 8 >= Dim * Bits)) +struct MortonEncoder { template NBL_CONSTEXPR_STATIC_INLINE_FUNC encode_t encode(NBL_CONST_REF_ARG(decode_t) decodedValue) @@ -114,168 +92,70 @@ struct MortonEncoder<2, Bits, encode_t> encode_t encoded = _static_cast(decodedValue); NBL_IF_CONSTEXPR(Bits > 16) { - encoded = (encoded | leftShift(encoded, 16)) & morton_mask_2_4_v; + encoded = encoded | leftShift(encoded, 16 * (Dim - 1)); } NBL_IF_CONSTEXPR(Bits > 8) { - encoded = (encoded | leftShift(encoded, 8)) & morton_mask_2_3_v; + encoded = encoded & _static_cast(coding_mask_v); + encoded = encoded | leftShift(encoded, 8 * (Dim - 1)); } NBL_IF_CONSTEXPR(Bits > 4) { - encoded = (encoded | leftShift(encoded, 4)) & morton_mask_2_2_v; + encoded = encoded & _static_cast(coding_mask_v); + encoded = encoded | leftShift(encoded, 4 * (Dim - 1)); } NBL_IF_CONSTEXPR(Bits > 2) { - encoded = (encoded | leftShift(encoded, 2)) & morton_mask_2_1_v; + encoded = encoded & _static_cast(coding_mask_v); + encoded = encoded | leftShift(encoded, 2 * (Dim - 1)); } - encoded = (encoded | leftShift(encoded, 1)) & morton_mask_2_0_v; - return encoded; - } -}; - -template -struct MortonEncoder<3, Bits, encode_t> -{ - template - NBL_CONSTEXPR_STATIC_INLINE_FUNC encode_t encode(NBL_CONST_REF_ARG(decode_t) decodedValue) - { - left_shift_operator leftShift; - encode_t encoded = _static_cast(decodedValue); - NBL_IF_CONSTEXPR(Bits > 12) - { - encoded = (encoded | leftShift(encoded, 24)) & morton_mask_3_3_v; - } - NBL_IF_CONSTEXPR(Bits > 6) - { - encoded = (encoded | leftShift(encoded, 12)) & morton_mask_3_2_v; - } - NBL_IF_CONSTEXPR(Bits > 3) - { - encoded = (encoded | leftShift(encoded, 6)) & morton_mask_3_1_v; - } - encoded = (encoded | leftShift(encoded, 2) | leftShift(encoded, 4)) & morton_mask_3_0_v; - return encoded; - } -}; - -template -struct MortonEncoder<4, Bits, encode_t> -{ - template - NBL_CONSTEXPR_STATIC_INLINE_FUNC encode_t encode(NBL_CONST_REF_ARG(decode_t) decodedValue) - { - left_shift_operator leftShift; - encode_t encoded = _static_cast(decodedValue); - NBL_IF_CONSTEXPR(Bits > 8) - { - encoded = (encoded | leftShift(encoded, 24)) & morton_mask_4_3_v; - } - NBL_IF_CONSTEXPR(Bits > 4) - { - encoded = (encoded | leftShift(encoded, 12)) & morton_mask_4_2_v; - } - NBL_IF_CONSTEXPR(Bits > 2) + NBL_IF_CONSTEXPR(Bits > 1) { - encoded = (encoded | leftShift(encoded, 6)) & morton_mask_4_1_v; + encoded = encoded & _static_cast(coding_mask_v); + encoded = encoded | leftShift(encoded, 1 * (Dim - 1)); } - encoded = (encoded | leftShift(encoded, 3)) & morton_mask_4_0_v; - return encoded; + return encoded & _static_cast(coding_mask_v); } }; -// ----------------------------------------------------------------- MORTON DECODERS --------------------------------------------------- +// ----------------------------------------------------------------- MORTON DECODER --------------------------------------------------- -template -struct MortonDecoder; - -template -struct MortonDecoder<2, Bits, encode_t> +template && (Dim* Bits <= 64) && (sizeof(encode_t) * 8 >= Dim * Bits)) +struct MortonDecoder { - template + template 16), uint32_t, uint16_t> + NBL_FUNC_REQUIRES(concepts::IntVector && sizeof(vector_traits::scalar_type) * 8 >= Bits) NBL_CONSTEXPR_STATIC_INLINE_FUNC decode_t decode(NBL_CONST_REF_ARG(encode_t) encodedValue) { - arithmetic_right_shift_operator rightShift; - encode_t decoded = encodedValue & morton_mask_2_0_v; + arithmetic_right_shift_operator > rightShift; + portable_vector_t decoded; NBL_IF_CONSTEXPR(Bits > 1) { - decoded = (decoded | rightShift(decoded, 1)) & morton_mask_2_1_v; + decoded = decoded & _static_cast(coding_mask_v); + decoded = decoded | rightShift(decoded, 1 * (Dim - 1)); } NBL_IF_CONSTEXPR(Bits > 2) { - decoded = (decoded | rightShift(decoded, 2)) & morton_mask_2_2_v; + decoded = decoded & _static_cast(coding_mask_v); + decoded = decoded | rightShift(decoded, 2 * (Dim - 1)); } NBL_IF_CONSTEXPR(Bits > 4) { - decoded = (decoded | rightShift(decoded, 4)) & morton_mask_2_3_v; + decoded = decoded & _static_cast(coding_mask_v); + decoded = decoded | rightShift(decoded, 4 * (Dim - 1)); } NBL_IF_CONSTEXPR(Bits > 8) { - decoded = (decoded | rightShift(decoded, 8)) & morton_mask_2_4_v; + decoded = decoded & _static_cast(coding_mask_v); + decoded = decoded | rightShift(decoded, 8 * (Dim - 1)); } NBL_IF_CONSTEXPR(Bits > 16) { - decoded = (decoded | rightShift(decoded, 16)) & morton_mask_2_5_v; - } - - return _static_cast(decoded); - } -}; - -template -struct MortonDecoder<3, Bits, encode_t> -{ - template - NBL_CONSTEXPR_STATIC_INLINE_FUNC decode_t decode(NBL_CONST_REF_ARG(encode_t) encodedValue) - { - arithmetic_right_shift_operator rightShift; - encode_t decoded = encodedValue & morton_mask_3_0_v; - NBL_IF_CONSTEXPR(Bits > 1) - { - decoded = (decoded | rightShift(decoded, 2) | rightShift(decoded, 4)) & morton_mask_3_1_v; - } - NBL_IF_CONSTEXPR(Bits > 3) - { - decoded = (decoded | rightShift(decoded, 6)) & morton_mask_3_2_v; - } - NBL_IF_CONSTEXPR(Bits > 6) - { - decoded = (decoded | rightShift(decoded, 12)) & morton_mask_3_3_v; - } - NBL_IF_CONSTEXPR(Bits > 12) - { - decoded = (decoded | rightShift(decoded, 24)) & morton_mask_3_4_v; - } - - return _static_cast(decoded); - } -}; - -template -struct MortonDecoder<4, Bits, encode_t> -{ - template - NBL_CONSTEXPR_STATIC_INLINE_FUNC decode_t decode(NBL_CONST_REF_ARG(encode_t) encodedValue) - { - arithmetic_right_shift_operator rightShift; - encode_t decoded = encodedValue & morton_mask_4_0_v; - NBL_IF_CONSTEXPR(Bits > 1) - { - decoded = (decoded | rightShift(decoded, 3)) & morton_mask_4_1_v; - } - NBL_IF_CONSTEXPR(Bits > 2) - { - decoded = (decoded | rightShift(decoded, 6)) & morton_mask_4_2_v; - } - NBL_IF_CONSTEXPR(Bits > 4) - { - decoded = (decoded | rightShift(decoded, 12)) & morton_mask_4_3_v; - } - NBL_IF_CONSTEXPR(Bits > 8) - { - decoded = (decoded | rightShift(decoded, 24)) & morton_mask_4_4_v; + decoded = decoded & _static_cast(coding_mask_v); + decoded = decoded | rightShift(decoded, 16 * (Dim - 1)); } - return _static_cast(decoded); + return _static_cast(decoded & _static_cast(coding_mask_v)); } }; @@ -290,7 +170,7 @@ struct Equals { NBL_CONSTEXPR_INLINE_FUNC vector operator()(NBL_CONST_REF_ARG(storage_t) value, NBL_CONST_REF_ARG(vector) rhs) { - NBL_CONSTEXPR_STATIC storage_t Mask = impl::decode_mask_v; + NBL_CONSTEXPR_STATIC storage_t Mask = _static_cast(impl::coding_mask_v); left_shift_operator leftShift; vector retVal; [[unroll]] @@ -342,7 +222,7 @@ struct BaseComparison { NBL_CONSTEXPR_INLINE_FUNC vector operator()(NBL_CONST_REF_ARG(storage_t) value, NBL_CONST_REF_ARG(vector) rhs) { - NBL_CONSTEXPR_STATIC storage_t Mask = impl::decode_mask_v; + NBL_CONSTEXPR_STATIC storage_t Mask = _static_cast(impl::coding_mask_v); left_shift_operator leftShift; vector retVal; ComparisonOp comparison; @@ -392,7 +272,7 @@ struct LessEquals : BaseComparison && D * Bits <= 64) +template && D * Bits <= 64) struct code { using this_t = code; @@ -515,7 +395,7 @@ struct code NBL_CONSTEXPR_INLINE_FUNC this_t operator+(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC { - NBL_CONSTEXPR_STATIC storage_t Mask = impl::decode_mask_v; + NBL_CONSTEXPR_STATIC storage_t Mask = _static_cast(impl::coding_mask_v); left_shift_operator leftShift; this_t retVal; retVal.value = _static_cast(uint64_t(0)); @@ -536,7 +416,7 @@ struct code NBL_CONSTEXPR_INLINE_FUNC this_t operator-(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC { - NBL_CONSTEXPR_STATIC storage_t Mask = impl::decode_mask_v; + NBL_CONSTEXPR_STATIC storage_t Mask = _static_cast(impl::coding_mask_v); left_shift_operator leftShift; this_t retVal; retVal.value = _static_cast(uint64_t(0)); @@ -653,14 +533,14 @@ struct arithmetic_right_shift_operator > #ifndef __HLSL_VERSION -template&& D* Bits <= 64) +template&& D* Bits <= 64) constexpr inline morton::code morton::code::operator<<(uint16_t bits) const { left_shift_operator> leftShift; return leftShift(*this, bits); } -template&& D* Bits <= 64) +template&& D* Bits <= 64) constexpr inline morton::code morton::code::operator>>(uint16_t bits) const { arithmetic_right_shift_operator> rightShift; diff --git a/include/nbl/builtin/hlsl/type_traits.hlsl b/include/nbl/builtin/hlsl/type_traits.hlsl index 5bfc7ca89b..bc160de788 100644 --- a/include/nbl/builtin/hlsl/type_traits.hlsl +++ b/include/nbl/builtin/hlsl/type_traits.hlsl @@ -664,6 +664,8 @@ using conditional_t = typename conditional::type; // Template Variables +template +NBL_CONSTEXPR T integral_constant_v = integral_constant::value; template NBL_CONSTEXPR bool is_same_v = is_same::value; template From 53a5f6a8cd4c19718694ff701c3723bbfffcf0f5 Mon Sep 17 00:00:00 2001 From: Fletterio Date: Fri, 11 Apr 2025 17:04:15 -0300 Subject: [PATCH 024/157] Vectorized encode/decode for better pipelining --- .../nbl/builtin/hlsl/emulated/vector_t.hlsl | 152 ++++++++++-------- include/nbl/builtin/hlsl/morton.hlsl | 29 +++- 2 files changed, 106 insertions(+), 75 deletions(-) diff --git a/include/nbl/builtin/hlsl/emulated/vector_t.hlsl b/include/nbl/builtin/hlsl/emulated/vector_t.hlsl index 4d7c3839d9..c4938fc9c2 100644 --- a/include/nbl/builtin/hlsl/emulated/vector_t.hlsl +++ b/include/nbl/builtin/hlsl/emulated/vector_t.hlsl @@ -147,93 +147,107 @@ struct emulated_vector : CRTP return output; } - NBL_CONSTEXPR_INLINE_FUNC this_t operator+(component_t val) - { - this_t output; - - for (uint32_t i = 0u; i < CRTP::Dimension; ++i) - output.setComponent(i, this_t::getComponent(i) + val); - - return output; + #define NBL_EMULATED_VECTOR_DEFINE_OPERATOR(OP)\ + NBL_CONSTEXPR_INLINE_FUNC this_t operator##OP (component_t val)\ + {\ + this_t output;\ + for (uint32_t i = 0u; i < CRTP::Dimension; ++i)\ + output.setComponent(i, this_t::getComponent(i) OP val);\ + return output;\ + }\ + NBL_CONSTEXPR_INLINE_FUNC this_t operator##OP (this_t other)\ + {\ + this_t output;\ + for (uint32_t i = 0u; i < CRTP::Dimension; ++i)\ + output.setComponent(i, this_t::getComponent(i) OP other.getComponent(i));\ + return output;\ + }\ + NBL_CONSTEXPR_INLINE_FUNC this_t operator##OP (vector other)\ + {\ + this_t output;\ + for (uint32_t i = 0u; i < CRTP::Dimension; ++i)\ + output.setComponent(i, this_t::getComponent(i) OP other[i]);\ + return output;\ } - NBL_CONSTEXPR_INLINE_FUNC this_t operator+(this_t other) - { - this_t output; - - for (uint32_t i = 0u; i < CRTP::Dimension; ++i) - output.setComponent(i, this_t::getComponent(i) + other.getComponent(i)); - return output; - } - NBL_CONSTEXPR_INLINE_FUNC this_t operator+(vector other) - { - this_t output; + NBL_EMULATED_VECTOR_DEFINE_OPERATOR(&) + NBL_EMULATED_VECTOR_DEFINE_OPERATOR(|) + NBL_EMULATED_VECTOR_DEFINE_OPERATOR(^) + NBL_EMULATED_VECTOR_DEFINE_OPERATOR(+) + NBL_EMULATED_VECTOR_DEFINE_OPERATOR(-) + NBL_EMULATED_VECTOR_DEFINE_OPERATOR(*) + NBL_EMULATED_VECTOR_DEFINE_OPERATOR(/) - for (uint32_t i = 0u; i < CRTP::Dimension; ++i) - output.setComponent(i, this_t::getComponent(i) + other[i]); + #undef NBL_EMULATED_VECTOR_DEFINE_OPERATOR - return output; - } - - NBL_CONSTEXPR_INLINE_FUNC this_t operator-(component_t val) + NBL_CONSTEXPR_INLINE_FUNC component_t calcComponentSum() { - this_t output; - + component_t sum = 0; for (uint32_t i = 0u; i < CRTP::Dimension; ++i) - output.setComponent(i, CRTP::getComponent(i) - val); + sum = sum + CRTP::getComponent(i); - return output; + return sum; } - NBL_CONSTEXPR_INLINE_FUNC this_t operator-(this_t other) - { - this_t output; +}; - for (uint32_t i = 0u; i < CRTP::Dimension; ++i) - output.setComponent(i, CRTP::getComponent(i) - other.getComponent(i)); +template +struct emulated_vector : CRTP +{ + using component_t = ComponentType; + using this_t = emulated_vector; - return output; - } - NBL_CONSTEXPR_INLINE_FUNC this_t operator-(vector other) + NBL_CONSTEXPR_STATIC_INLINE this_t create(this_t other) { this_t output; - + [[unroll]] for (uint32_t i = 0u; i < CRTP::Dimension; ++i) - output.setComponent(i, CRTP::getComponent(i) - other[i]); + output.setComponent(i, other.getComponent(i)); return output; } - NBL_CONSTEXPR_INLINE_FUNC this_t operator*(component_t val) + template + NBL_CONSTEXPR_STATIC_INLINE this_t create(vector other) { this_t output; - + [[unroll]] for (uint32_t i = 0u; i < CRTP::Dimension; ++i) - output.setComponent(i, CRTP::getComponent(i) * val); + output.setComponent(i, ComponentType::create(other[i])); return output; } - NBL_CONSTEXPR_INLINE_FUNC this_t operator*(this_t other) - { - this_t output; - - for (uint32_t i = 0u; i < CRTP::Dimension; ++i) - output.setComponent(i, CRTP::getComponent(i) * other.getComponent(i)); - return output; + #define NBL_EMULATED_VECTOR_OPERATOR(OP, ENABLE_CONDITION) NBL_CONSTEXPR_INLINE_FUNC enable_if_t< ENABLE_CONDITION , this_t> operator##OP (component_t val)\ + {\ + this_t output;\ + [[unroll]]\ + for (uint32_t i = 0u; i < CRTP::Dimension; ++i)\ + output.setComponent(i, CRTP::getComponent(i) + val);\ + return output;\ + }\ + NBL_CONSTEXPR_INLINE_FUNC enable_if_t< ENABLE_CONDITION , this_t> operator##OP (this_t other)\ + {\ + this_t output;\ + [[unroll]]\ + for (uint32_t i = 0u; i < CRTP::Dimension; ++i)\ + output.setComponent(i, CRTP::getComponent(i) + other.getComponent(i));\ + return output;\ } - NBL_CONSTEXPR_INLINE_FUNC this_t operator*(vector other) - { - this_t output; - - for (uint32_t i = 0u; i < CRTP::Dimension; ++i) - output.setComponent(i, CRTP::getComponent(i) * other[i]); - return output; - } + NBL_EMULATED_VECTOR_OPERATOR(&, concepts::IntegralLikeScalar) + NBL_EMULATED_VECTOR_OPERATOR(|, concepts::IntegralLikeScalar) + NBL_EMULATED_VECTOR_OPERATOR(^, concepts::IntegralLikeScalar) + NBL_EMULATED_VECTOR_OPERATOR(+, true) + NBL_EMULATED_VECTOR_OPERATOR(-, true) + NBL_EMULATED_VECTOR_OPERATOR(*, true) + NBL_EMULATED_VECTOR_OPERATOR(/, true) + + #undef NBL_EMULATED_VECTOR_OPERATOR - NBL_CONSTEXPR_INLINE_FUNC component_t calcComponentSum() + NBL_CONSTEXPR_INLINE_FUNC ComponentType calcComponentSum() { - component_t sum = 0; + ComponentType sum = ComponentType::create(0); + [[unroll]] for (uint32_t i = 0u; i < CRTP::Dimension; ++i) sum = sum + CRTP::getComponent(i); @@ -241,6 +255,7 @@ struct emulated_vector : CRTP } }; + #define DEFINE_OPERATORS_FOR_TYPE(...)\ NBL_CONSTEXPR_INLINE_FUNC this_t operator+(__VA_ARGS__ val)\ {\ @@ -270,12 +285,13 @@ NBL_CONSTEXPR_INLINE_FUNC this_t operator*(__VA_ARGS__ val)\ }\ \ -// TODO: some of code duplication could be avoided -template -struct emulated_vector : CRTP +// ----------------------------------------------------- EMULATED FLOAT SPECIALIZATION -------------------------------------------------------------------- + +template +struct emulated_vector, CRTP, false> : CRTP { - using component_t = ComponentType; - using this_t = emulated_vector; + using component_t = emulated_float64_t; + using this_t = emulated_vector; NBL_CONSTEXPR_STATIC_INLINE this_t create(this_t other) { @@ -293,7 +309,7 @@ struct emulated_vector : CRTP this_t output; for (uint32_t i = 0u; i < CRTP::Dimension; ++i) - output.setComponent(i, ComponentType::create(other[i])); + output.setComponent(i, component_t::create(other[i])); return output; } @@ -330,8 +346,6 @@ struct emulated_vector : CRTP DEFINE_OPERATORS_FOR_TYPE(emulated_float64_t) DEFINE_OPERATORS_FOR_TYPE(emulated_float64_t) DEFINE_OPERATORS_FOR_TYPE(emulated_float64_t) - //DEFINE_OPERATORS_FOR_TYPE(emulated_uint64_t) - //DEFINE_OPERATORS_FOR_TYPE(emulated_int64_t) DEFINE_OPERATORS_FOR_TYPE(float32_t) DEFINE_OPERATORS_FOR_TYPE(float64_t) DEFINE_OPERATORS_FOR_TYPE(uint16_t) @@ -341,9 +355,9 @@ struct emulated_vector : CRTP DEFINE_OPERATORS_FOR_TYPE(int32_t) DEFINE_OPERATORS_FOR_TYPE(int64_t) - NBL_CONSTEXPR_INLINE_FUNC ComponentType calcComponentSum() + NBL_CONSTEXPR_INLINE_FUNC component_t calcComponentSum() { - ComponentType sum = ComponentType::create(0); + component_t sum = component_t::create(0); for (uint32_t i = 0u; i < CRTP::Dimension; ++i) sum = sum + CRTP::getComponent(i); diff --git a/include/nbl/builtin/hlsl/morton.hlsl b/include/nbl/builtin/hlsl/morton.hlsl index 9e62e40c2a..e8cb2b73bf 100644 --- a/include/nbl/builtin/hlsl/morton.hlsl +++ b/include/nbl/builtin/hlsl/morton.hlsl @@ -85,11 +85,12 @@ NBL_HLSL_MORTON_SPECIALIZE_LAST_CODING_MASKS template && (Dim * Bits <= 64) && (sizeof(encode_t) * 8 >= Dim * Bits)) struct MortonEncoder { - template + template 16), vector, vector > + NBL_FUNC_REQUIRES(concepts::IntVector && sizeof(typename vector_traits::scalar_type) * 8 >= Bits) NBL_CONSTEXPR_STATIC_INLINE_FUNC encode_t encode(NBL_CONST_REF_ARG(decode_t) decodedValue) { - left_shift_operator leftShift; - encode_t encoded = _static_cast(decodedValue); + left_shift_operator > leftShift; + portable_vector_t encoded = _static_cast >(decodedValue); NBL_IF_CONSTEXPR(Bits > 16) { encoded = encoded | leftShift(encoded, 16 * (Dim - 1)); @@ -114,7 +115,16 @@ struct MortonEncoder encoded = encoded & _static_cast(coding_mask_v); encoded = encoded | leftShift(encoded, 1 * (Dim - 1)); } - return encoded & _static_cast(coding_mask_v); + encoded = encoded & _static_cast(coding_mask_v); + encoded = leftShift(encoded, _static_cast >(vector(0, 1, 2, 3))); + // The `encoded` above is vectorized for each coord, here we collapse all coords into a single element + encode_t actualEncoded = _static_cast(uint64_t(0)); + array_get, encode_t> getter; + [[unroll]] + for (uint16_t i = 0; i < Dim; i++) + actualEncoded = actualEncoded | getter(encoded, i); + + return actualEncoded; } }; @@ -123,12 +133,19 @@ struct MortonEncoder template && (Dim* Bits <= 64) && (sizeof(encode_t) * 8 >= Dim * Bits)) struct MortonDecoder { - template 16), uint32_t, uint16_t> - NBL_FUNC_REQUIRES(concepts::IntVector && sizeof(vector_traits::scalar_type) * 8 >= Bits) + template 16), vector, vector > + NBL_FUNC_REQUIRES(concepts::IntVector && sizeof(typename vector_traits::scalar_type) * 8 >= Bits) NBL_CONSTEXPR_STATIC_INLINE_FUNC decode_t decode(NBL_CONST_REF_ARG(encode_t) encodedValue) { arithmetic_right_shift_operator > rightShift; portable_vector_t decoded; + array_set, encode_t> setter; + // Write initial values into decoded + [[unroll]] + for (uint16_t i = 0; i < Dim; i++) + setter(decoded, i, encodedValue); + decoded = rightShift(decoded, _static_cast >(vector(0, 1, 2, 3))); + NBL_IF_CONSTEXPR(Bits > 1) { decoded = decoded & _static_cast(coding_mask_v); From cf52d9cbf2d99e3ceb16495ef9049511cbde2096 Mon Sep 17 00:00:00 2001 From: Fletterio Date: Mon, 14 Apr 2025 16:02:17 -0300 Subject: [PATCH 025/157] Adress the last of PR review changes: vectorize more operators, add a bunch of operators and functional structs for vectorial types --- include/nbl/builtin/hlsl/cpp_compat/basic.h | 20 +- .../nbl/builtin/hlsl/emulated/int64_t.hlsl | 8 + .../nbl/builtin/hlsl/emulated/vector_t.hlsl | 113 ++++++- include/nbl/builtin/hlsl/functional.hlsl | 28 +- include/nbl/builtin/hlsl/morton.hlsl | 319 ++++++++---------- include/nbl/builtin/hlsl/mpl.hlsl | 28 +- 6 files changed, 310 insertions(+), 206 deletions(-) diff --git a/include/nbl/builtin/hlsl/cpp_compat/basic.h b/include/nbl/builtin/hlsl/cpp_compat/basic.h index 81bdf32c19..f01d2d78ec 100644 --- a/include/nbl/builtin/hlsl/cpp_compat/basic.h +++ b/include/nbl/builtin/hlsl/cpp_compat/basic.h @@ -90,7 +90,7 @@ namespace impl template struct static_cast_helper { - NBL_CONSTEXPR_STATIC_INLINE_FUNC To cast(From u) + NBL_CONSTEXPR_STATIC_INLINE_FUNC To cast(NBL_CONST_REF_ARG(From) u) { #ifndef __HLSL_VERSION return static_cast(u); @@ -99,10 +99,26 @@ struct static_cast_helper #endif } }; + +// CPP-side, this can invoke the copy constructor if the copy is non-trivial in generic code +// HLSL-side, this enables generic conversion code between types, contemplating the case where no conversion is needed +template +struct static_cast_helper +{ + NBL_CONSTEXPR_STATIC_INLINE_FUNC Same cast(NBL_CONST_REF_ARG(Same) s) + { +#ifndef __HLSL_VERSION + return static_cast(s); +#else + return s; +#endif + } +}; + } template -NBL_CONSTEXPR_INLINE_FUNC To _static_cast(From v) +NBL_CONSTEXPR_INLINE_FUNC To _static_cast(NBL_CONST_REF_ARG(From) v) { return impl::static_cast_helper::cast(v); } diff --git a/include/nbl/builtin/hlsl/emulated/int64_t.hlsl b/include/nbl/builtin/hlsl/emulated/int64_t.hlsl index ca51b0060a..4f354c900e 100644 --- a/include/nbl/builtin/hlsl/emulated/int64_t.hlsl +++ b/include/nbl/builtin/hlsl/emulated/int64_t.hlsl @@ -431,6 +431,14 @@ NBL_CONSTEXPR_INLINE emulated_uint64_t minus_assign::identity template<> NBL_CONSTEXPR_INLINE emulated_int64_t minus_assign::identity = minus::identity; +// ------------------------------------------------ TYPE TRAITS SATISFIED ----------------------------------------------------- + +template<> +struct is_signed : bool_constant {}; + +template<> +struct is_unsigned : bool_constant {}; + // --------------------------------------------------- CONCEPTS SATISFIED ----------------------------------------------------- namespace concepts { diff --git a/include/nbl/builtin/hlsl/emulated/vector_t.hlsl b/include/nbl/builtin/hlsl/emulated/vector_t.hlsl index c4938fc9c2..fd5f5e3c34 100644 --- a/include/nbl/builtin/hlsl/emulated/vector_t.hlsl +++ b/include/nbl/builtin/hlsl/emulated/vector_t.hlsl @@ -147,7 +147,7 @@ struct emulated_vector : CRTP return output; } - #define NBL_EMULATED_VECTOR_DEFINE_OPERATOR(OP)\ + #define NBL_EMULATED_VECTOR_OPERATOR(OP)\ NBL_CONSTEXPR_INLINE_FUNC this_t operator##OP (component_t val)\ {\ this_t output;\ @@ -170,15 +170,33 @@ struct emulated_vector : CRTP return output;\ } - NBL_EMULATED_VECTOR_DEFINE_OPERATOR(&) - NBL_EMULATED_VECTOR_DEFINE_OPERATOR(|) - NBL_EMULATED_VECTOR_DEFINE_OPERATOR(^) - NBL_EMULATED_VECTOR_DEFINE_OPERATOR(+) - NBL_EMULATED_VECTOR_DEFINE_OPERATOR(-) - NBL_EMULATED_VECTOR_DEFINE_OPERATOR(*) - NBL_EMULATED_VECTOR_DEFINE_OPERATOR(/) + NBL_EMULATED_VECTOR_OPERATOR(&) + NBL_EMULATED_VECTOR_OPERATOR(|) + NBL_EMULATED_VECTOR_OPERATOR(^) + NBL_EMULATED_VECTOR_OPERATOR(+) + NBL_EMULATED_VECTOR_OPERATOR(-) + NBL_EMULATED_VECTOR_OPERATOR(*) + NBL_EMULATED_VECTOR_OPERATOR(/) - #undef NBL_EMULATED_VECTOR_DEFINE_OPERATOR + #undef NBL_EMULATED_VECTOR_OPERATOR + + #define NBL_EMULATED_VECTOR_COMPARISON(OP) NBL_CONSTEXPR_INLINE_FUNC vector operator##OP (this_t other)\ + {\ + vector output;\ + [[unroll]]\ + for (uint32_t i = 0u; i < CRTP::Dimension; ++i)\ + output[i] = CRTP::getComponent(i) OP other.getComponent(i);\ + return output;\ + } + + NBL_EMULATED_VECTOR_COMPARISON(==) + NBL_EMULATED_VECTOR_COMPARISON(!=) + NBL_EMULATED_VECTOR_COMPARISON(<) + NBL_EMULATED_VECTOR_COMPARISON(<=) + NBL_EMULATED_VECTOR_COMPARISON(>) + NBL_EMULATED_VECTOR_COMPARISON(>=) + + #undef NBL_EMULATED_VECTOR_COMPARISON NBL_CONSTEXPR_INLINE_FUNC component_t calcComponentSum() { @@ -222,7 +240,7 @@ struct emulated_vector : CRTP this_t output;\ [[unroll]]\ for (uint32_t i = 0u; i < CRTP::Dimension; ++i)\ - output.setComponent(i, CRTP::getComponent(i) + val);\ + output.setComponent(i, CRTP::getComponent(i) OP val);\ return output;\ }\ NBL_CONSTEXPR_INLINE_FUNC enable_if_t< ENABLE_CONDITION , this_t> operator##OP (this_t other)\ @@ -230,7 +248,7 @@ struct emulated_vector : CRTP this_t output;\ [[unroll]]\ for (uint32_t i = 0u; i < CRTP::Dimension; ++i)\ - output.setComponent(i, CRTP::getComponent(i) + other.getComponent(i));\ + output.setComponent(i, CRTP::getComponent(i) OP other.getComponent(i));\ return output;\ } @@ -244,6 +262,24 @@ struct emulated_vector : CRTP #undef NBL_EMULATED_VECTOR_OPERATOR + #define NBL_EMULATED_VECTOR_COMPARISON(OP) NBL_CONSTEXPR_INLINE_FUNC vector operator##OP (this_t other)\ + {\ + vector output;\ + [[unroll]]\ + for (uint32_t i = 0u; i < CRTP::Dimension; ++i)\ + output[i] = CRTP::getComponent(i) OP other.getComponent(i);\ + return output;\ + } + + NBL_EMULATED_VECTOR_COMPARISON(==) + NBL_EMULATED_VECTOR_COMPARISON(!=) + NBL_EMULATED_VECTOR_COMPARISON(<) + NBL_EMULATED_VECTOR_COMPARISON(<=) + NBL_EMULATED_VECTOR_COMPARISON(>) + NBL_EMULATED_VECTOR_COMPARISON(>=) + + #undef NBL_EMULATED_VECTOR_COMPARISON + NBL_CONSTEXPR_INLINE_FUNC ComponentType calcComponentSum() { ComponentType sum = ComponentType::create(0); @@ -442,7 +478,7 @@ namespace impl template struct static_cast_helper, vector, void> { - static inline emulated_vector_t2 cast(vector vec) + NBL_CONSTEXPR_STATIC_INLINE emulated_vector_t2 cast(vector vec) { emulated_vector_t2 output; output.x = _static_cast(vec.x); @@ -455,7 +491,7 @@ struct static_cast_helper, vector, void> template struct static_cast_helper, vector, void> { - static inline emulated_vector_t3 cast(vector vec) + NBL_CONSTEXPR_STATIC_INLINE emulated_vector_t3 cast(vector vec) { emulated_vector_t3 output; output.x = _static_cast(vec.x); @@ -469,7 +505,7 @@ struct static_cast_helper, vector, void> template struct static_cast_helper, vector, void> { - static inline emulated_vector_t4 cast(vector vec) + NBL_CONSTEXPR_STATIC_INLINE emulated_vector_t4 cast(vector vec) { emulated_vector_t4 output; output.x = _static_cast(vec.x); @@ -487,7 +523,7 @@ struct static_cast_helper, emulated_vector_t; using InputVecType = emulated_vector_t; - static inline OutputVecType cast(InputVecType vec) + NBL_CONSTEXPR_STATIC_INLINE OutputVecType cast(InputVecType vec) { array_get getter; array_set setter; @@ -500,6 +536,53 @@ struct static_cast_helper, emulated_vector_t\ +struct static_cast_helper, emulated_vector_t##N , void>\ +{\ + using OutputVecType = emulated_vector_t##N ;\ + using InputVecType = emulated_vector_t##N ;\ + NBL_CONSTEXPR_STATIC_INLINE OutputVecType cast(InputVecType vec)\ + {\ + array_get getter;\ + array_set setter;\ + OutputVecType output;\ + for (int i = 0; i < N; ++i)\ + setter(output, i, _static_cast(getter(vec, i)));\ + return output;\ + }\ +}; + +NBL_EMULATED_VEC_TO_EMULATED_VEC_STATIC_CAST(2) +NBL_EMULATED_VEC_TO_EMULATED_VEC_STATIC_CAST(3) +NBL_EMULATED_VEC_TO_EMULATED_VEC_STATIC_CAST(4) + +#undef NBL_EMULATED_VEC_TO_EMULATED_VEC_STATIC_CAST + +#define NBL_EMULATED_VEC_TRUNCATION(N, M) template\ +struct static_cast_helper, emulated_vector_t##M , void>\ +{\ + using OutputVecType = emulated_vector_t##N ;\ + using InputVecType = emulated_vector_t##M ;\ + NBL_CONSTEXPR_STATIC_INLINE OutputVecType cast(InputVecType vec)\ + {\ + array_get getter;\ + array_set setter;\ + OutputVecType output;\ + for (int i = 0; i < N; ++i)\ + setter(output, i, getter(vec, i));\ + return output;\ + }\ +}; + +NBL_EMULATED_VEC_TRUNCATION(2, 2) +NBL_EMULATED_VEC_TRUNCATION(2, 3) +NBL_EMULATED_VEC_TRUNCATION(2, 4) +NBL_EMULATED_VEC_TRUNCATION(3, 3) +NBL_EMULATED_VEC_TRUNCATION(3, 4) +NBL_EMULATED_VEC_TRUNCATION(4, 4) + +#undef NBL_EMULATED_VEC_TRUNCATION + } } diff --git a/include/nbl/builtin/hlsl/functional.hlsl b/include/nbl/builtin/hlsl/functional.hlsl index 93687bdb6a..45198cbe7a 100644 --- a/include/nbl/builtin/hlsl/functional.hlsl +++ b/include/nbl/builtin/hlsl/functional.hlsl @@ -80,7 +80,7 @@ struct reference_wrapper : enable_if_t< // TODO: partial specializations for T being a special SPIR-V type for image ops, etc. -#define ALIAS_STD(NAME,OP) template struct NAME { \ +#define ALIAS_STD(NAME,OP) template struct NAME { \ using type_t = T; \ \ T operator()(NBL_CONST_REF_ARG(T) lhs, NBL_CONST_REF_ARG(T) rhs) \ @@ -92,7 +92,7 @@ struct reference_wrapper : enable_if_t< #else // CPP -#define ALIAS_STD(NAME,OP) template struct NAME : std::NAME { \ +#define ALIAS_STD(NAME,OP) template struct NAME : std::NAME { \ using type_t = T; #endif @@ -136,13 +136,35 @@ ALIAS_STD(divides,/) }; +ALIAS_STD(equal_to,==) }; +ALIAS_STD(not_equal_to,!=) }; ALIAS_STD(greater,>) }; ALIAS_STD(less,<) }; ALIAS_STD(greater_equal,>=) }; -ALIAS_STD(less_equal,<=) }; +ALIAS_STD(less_equal, <= ) }; #undef ALIAS_STD +// The above comparison operators return bool on STD. Here's a specialization so that they return `vector` for vectorial types +#define NBL_COMPARISON_VECTORIAL_SPECIALIZATION(NAME, OP) template NBL_PARTIAL_REQ_TOP(concepts::Vectorial)\ +struct NAME ) >\ +{\ + using type_t = T;\ + vector::Dimension> operator()(NBL_CONST_REF_ARG(T) lhs, NBL_CONST_REF_ARG(T) rhs)\ + {\ + return lhs OP rhs;\ + }\ +}; + +NBL_COMPARISON_VECTORIAL_SPECIALIZATION(equal_to, ==) +NBL_COMPARISON_VECTORIAL_SPECIALIZATION(not_equal_to, !=) +NBL_COMPARISON_VECTORIAL_SPECIALIZATION(greater, >) +NBL_COMPARISON_VECTORIAL_SPECIALIZATION(less, <) +NBL_COMPARISON_VECTORIAL_SPECIALIZATION(greater_equal, >=) +NBL_COMPARISON_VECTORIAL_SPECIALIZATION(less_equal, <=) + +#undef NBL_COMPARISON_VECTORIAL_SPECIALIZATION + // ------------------------ Compound assignment operators ---------------------- #define COMPOUND_ASSIGN(NAME) template struct NAME##_assign { \ diff --git a/include/nbl/builtin/hlsl/morton.hlsl b/include/nbl/builtin/hlsl/morton.hlsl index e8cb2b73bf..d2fca1165f 100644 --- a/include/nbl/builtin/hlsl/morton.hlsl +++ b/include/nbl/builtin/hlsl/morton.hlsl @@ -8,6 +8,7 @@ #include "nbl/builtin/hlsl/emulated/int64_t.hlsl" #include "nbl/builtin/hlsl/mpl.hlsl" #include "nbl/builtin/hlsl/portable/vector_t.hlsl" +#include "nbl/builtin/hlsl/mpl.hlsl" // TODO: mega macro to get functional plus, minus, plus_assign, minus_assign @@ -82,61 +83,65 @@ NBL_HLSL_MORTON_SPECIALIZE_LAST_CODING_MASKS // ----------------------------------------------------------------- MORTON ENCODER --------------------------------------------------- -template && (Dim * Bits <= 64) && (sizeof(encode_t) * 8 >= Dim * Bits)) +template && Dim * Bits <= 64 && 8 * sizeof(encode_t) == mpl::round_up_to_pot_v) struct MortonEncoder { template 16), vector, vector > - NBL_FUNC_REQUIRES(concepts::IntVector && sizeof(typename vector_traits::scalar_type) * 8 >= Bits) - NBL_CONSTEXPR_STATIC_INLINE_FUNC encode_t encode(NBL_CONST_REF_ARG(decode_t) decodedValue) + NBL_FUNC_REQUIRES(concepts::IntVector && 8 * sizeof(typename vector_traits::scalar_type) >= Bits) + /** + * @brief Interleaves each coordinate with `Dim - 1` zeros inbetween each bit, and left-shifts each by their coordinate index + * + * @param [in] decodedValue Cartesian coordinates to interleave and shift + */ + NBL_CONSTEXPR_STATIC_INLINE_FUNC portable_vector_t interleaveShift(NBL_CONST_REF_ARG(decode_t) decodedValue) { + NBL_CONSTEXPR_STATIC encode_t EncodeMasks[CodingStages + 1] = { _static_cast(coding_mask_v), _static_cast(coding_mask_v), _static_cast(coding_mask_v) , _static_cast(coding_mask_v) , _static_cast(coding_mask_v) , _static_cast(coding_mask_v) }; left_shift_operator > leftShift; - portable_vector_t encoded = _static_cast >(decodedValue); - NBL_IF_CONSTEXPR(Bits > 16) - { - encoded = encoded | leftShift(encoded, 16 * (Dim - 1)); - } - NBL_IF_CONSTEXPR(Bits > 8) - { - encoded = encoded & _static_cast(coding_mask_v); - encoded = encoded | leftShift(encoded, 8 * (Dim - 1)); - } - NBL_IF_CONSTEXPR(Bits > 4) - { - encoded = encoded & _static_cast(coding_mask_v); - encoded = encoded | leftShift(encoded, 4 * (Dim - 1)); - } - NBL_IF_CONSTEXPR(Bits > 2) - { - encoded = encoded & _static_cast(coding_mask_v); - encoded = encoded | leftShift(encoded, 2 * (Dim - 1)); - } - NBL_IF_CONSTEXPR(Bits > 1) + portable_vector_t interleaved = _static_cast >(decodedValue)& EncodeMasks[CodingStages]; + + NBL_CONSTEXPR_STATIC uint16_t Stages = mpl::log2_ceil_v; + [[unroll]] + for (uint16_t i = Stages; i > 0; i--) { - encoded = encoded & _static_cast(coding_mask_v); - encoded = encoded | leftShift(encoded, 1 * (Dim - 1)); + interleaved = interleaved | leftShift(interleaved, (uint32_t(1) << (i - 1)) * (Dim - 1)); + interleaved = interleaved & EncodeMasks[i - 1]; } - encoded = encoded & _static_cast(coding_mask_v); - encoded = leftShift(encoded, _static_cast >(vector(0, 1, 2, 3))); - // The `encoded` above is vectorized for each coord, here we collapse all coords into a single element - encode_t actualEncoded = _static_cast(uint64_t(0)); + + // After interleaving, shift each coordinate left by their index + return leftShift(interleaved, _static_cast >(vector(0, 1, 2, 3))); + } + + template 16), vector, vector > + NBL_FUNC_REQUIRES(concepts::IntVector && 8 * sizeof(typename vector_traits::scalar_type) >= Bits) + /** + * @brief Encodes a vector of cartesian coordinates as a Morton code + * + * @param [in] decodedValue Cartesian coordinates to encode + */ + NBL_CONSTEXPR_STATIC_INLINE_FUNC encode_t encode(NBL_CONST_REF_ARG(decode_t) decodedValue) + { + portable_vector_t interleaveShifted = interleaveShift(decodedValue); + + encode_t encoded = _static_cast(uint64_t(0)); array_get, encode_t> getter; [[unroll]] for (uint16_t i = 0; i < Dim; i++) - actualEncoded = actualEncoded | getter(encoded, i); - - return actualEncoded; + encoded = encoded | getter(interleaveShifted, i); + + return encoded; } }; // ----------------------------------------------------------------- MORTON DECODER --------------------------------------------------- -template && (Dim* Bits <= 64) && (sizeof(encode_t) * 8 >= Dim * Bits)) +template && Dim * Bits <= 64 && 8 * sizeof(encode_t) == mpl::round_up_to_pot_v) struct MortonDecoder { template 16), vector, vector > - NBL_FUNC_REQUIRES(concepts::IntVector && sizeof(typename vector_traits::scalar_type) * 8 >= Bits) + NBL_FUNC_REQUIRES(concepts::IntVector && 8 * sizeof(typename vector_traits::scalar_type) >= Bits) NBL_CONSTEXPR_STATIC_INLINE_FUNC decode_t decode(NBL_CONST_REF_ARG(encode_t) encodedValue) { + NBL_CONSTEXPR_STATIC encode_t DecodeMasks[CodingStages + 1] = { _static_cast(coding_mask_v), _static_cast(coding_mask_v), _static_cast(coding_mask_v) , _static_cast(coding_mask_v) , _static_cast(coding_mask_v) , _static_cast(coding_mask_v) }; arithmetic_right_shift_operator > rightShift; portable_vector_t decoded; array_set, encode_t> setter; @@ -146,38 +151,28 @@ struct MortonDecoder setter(decoded, i, encodedValue); decoded = rightShift(decoded, _static_cast >(vector(0, 1, 2, 3))); - NBL_IF_CONSTEXPR(Bits > 1) - { - decoded = decoded & _static_cast(coding_mask_v); - decoded = decoded | rightShift(decoded, 1 * (Dim - 1)); - } - NBL_IF_CONSTEXPR(Bits > 2) - { - decoded = decoded & _static_cast(coding_mask_v); - decoded = decoded | rightShift(decoded, 2 * (Dim - 1)); - } - NBL_IF_CONSTEXPR(Bits > 4) - { - decoded = decoded & _static_cast(coding_mask_v); - decoded = decoded | rightShift(decoded, 4 * (Dim - 1)); - } - NBL_IF_CONSTEXPR(Bits > 8) - { - decoded = decoded & _static_cast(coding_mask_v); - decoded = decoded | rightShift(decoded, 8 * (Dim - 1)); - } - NBL_IF_CONSTEXPR(Bits > 16) + NBL_CONSTEXPR_STATIC uint16_t Stages = mpl::log2_ceil_v; + [[unroll]] + for (uint16_t i = 0; i < Stages; i++) { - decoded = decoded & _static_cast(coding_mask_v); - decoded = decoded | rightShift(decoded, 16 * (Dim - 1)); + decoded = decoded & DecodeMasks[i]; + decoded = decoded | rightShift(decoded, (uint32_t(1) << i) * (Dim - 1)); } - return _static_cast(decoded & _static_cast(coding_mask_v)); + // If `Bits` is greater than half the bitwidth of the decode type, then we can avoid `&`ing against the last mask since duplicated MSB get truncated + NBL_IF_CONSTEXPR(Bits > 4 * sizeof(typename vector_traits::scalar_type)) + return _static_cast(decoded); + else + return _static_cast(decoded & DecodeMasks[CodingStages]); } }; // ---------------------------------------------------- COMPARISON OPERATORS --------------------------------------------------------------- // Here because no partial specialization of methods +// `BitsAlreadySpread` assumes both pre-interleaved and pre-shifted + +template +NBL_BOOL_CONCEPT Comparable = concepts::IntegralLikeScalar && is_signed_v == Signed && ((BitsAlreadySpread && sizeof(I) == sizeof(storage_t)) || (!BitsAlreadySpread && 8 * sizeof(I) == mpl::round_up_to_pot_v)); template struct Equals; @@ -185,105 +180,76 @@ struct Equals; template struct Equals { - NBL_CONSTEXPR_INLINE_FUNC vector operator()(NBL_CONST_REF_ARG(storage_t) value, NBL_CONST_REF_ARG(vector) rhs) + template) + NBL_CONSTEXPR_STATIC_INLINE_FUNC vector __call(NBL_CONST_REF_ARG(storage_t) value, NBL_CONST_REF_ARG(portable_vector_t) rhs) { - NBL_CONSTEXPR_STATIC storage_t Mask = _static_cast(impl::coding_mask_v); - left_shift_operator leftShift; - vector retVal; - [[unroll]] - for (uint16_t i = 0; i < D; i++) - { - retVal[i] = (value & leftShift(Mask, i)) == leftShift(rhs[i], i); - } - return retVal; + NBL_CONSTEXPR portable_vector_t zeros = _static_cast >(_static_cast >(vector(0,0,0,0))); + + portable_vector_t rhsCasted = _static_cast >(rhs); + portable_vector_t xored = rhsCasted ^ value; + return xored == zeros; } }; template struct Equals { - template - NBL_CONSTEXPR_INLINE_FUNC enable_if_t&& is_scalar_v && (is_signed_v == Signed), vector > - operator()(NBL_CONST_REF_ARG(storage_t) value, NBL_CONST_REF_ARG(vector) rhs) + template) + NBL_CONSTEXPR_STATIC_INLINE_FUNC vector __call(NBL_CONST_REF_ARG(storage_t) value, NBL_CONST_REF_ARG(portable_vector_t) rhs) { - using U = make_unsigned_t; - vector interleaved; - [[unroll]] - for (uint16_t i = 0; i < D; i++) - { - interleaved[i] = impl::MortonEncoder::encode(_static_cast(rhs[i])); - } - Equals equals; - return equals(value, interleaved); + const portable_vector_t interleaved = MortonEncoder::interleaveShift(rhs); + return Equals::__call(value, interleaved); } }; template struct BaseComparison; -// Aux method for extracting highest bit, used by the comparison below -template -NBL_CONSTEXPR_INLINE_FUNC storage_t extractHighestBit(storage_t value, uint16_t coord) -{ - // Like above, if the number encoded in `coord` gets `bits(coord) = ceil((BitWidth - coord)/D)` bits for representation, then the highest index of these - // bits is `bits(coord) - 1` - const uint16_t coordHighestBitIdx = Bits / D - ((coord < Bits % D) ? uint16_t(0) : uint16_t(1)); - // This is the index of that bit as an index in the encoded value - const uint16_t shift = coordHighestBitIdx * D + coord; - left_shift_operator leftShift; - return value & leftShift(_static_cast(uint16_t(1)), shift); -} +// Aux variable that has only the sign bit for the first of D dimensions +template +NBL_CONSTEXPR uint64_t SignMask = uint64_t(1) << (D * (Bits - 1)); template struct BaseComparison { - NBL_CONSTEXPR_INLINE_FUNC vector operator()(NBL_CONST_REF_ARG(storage_t) value, NBL_CONST_REF_ARG(vector) rhs) + template) + NBL_CONSTEXPR_STATIC_INLINE_FUNC vector __call(NBL_CONST_REF_ARG(storage_t) value, NBL_CONST_REF_ARG(portable_vector_t) rhs) { - NBL_CONSTEXPR_STATIC storage_t Mask = _static_cast(impl::coding_mask_v); - left_shift_operator leftShift; - vector retVal; + NBL_CONSTEXPR_STATIC portable_vector_t InterleaveMasks = _static_cast >(_static_cast >(vector(coding_mask_v, coding_mask_v << 1, coding_mask_v << 2, coding_mask_v << 3))); + NBL_CONSTEXPR_STATIC portable_vector_t SignMasks = _static_cast >(_static_cast >(vector(SignMask, SignMask << 1, SignMask << 2, SignMask << 3))); ComparisonOp comparison; - [[unroll]] - for (uint16_t i = 0; i < D; i++) - { - storage_t thisCoord = value & leftShift(Mask, i); - storage_t rhsCoord = leftShift(rhs[i], i); - // If coordinate is negative, we add 1s in every bit not corresponding to coord - if (extractHighestBit(thisCoord) != _static_cast(uint64_t(0))) - thisCoord = thisCoord | ~leftShift(Mask, i); - if (extractHighestBit(rhsCoord) != _static_cast(uint64_t(0))) - rhsCoord = rhsCoord | ~leftShift(Mask, i); - retVal[i] = comparison(thisCoord, rhsCoord); - } - return retVal; + // Obtain a vector of deinterleaved coordinates and flip their sign bits + const portable_vector_t thisCoord = (InterleaveMasks & value) ^ SignMasks; + // rhs already deinterleaved, just have to cast type and flip sign + const portable_vector_t rhsCoord = _static_cast >(rhs) ^ SignMasks; + + return comparison(thisCoord, rhsCoord); } }; template struct BaseComparison { - template - NBL_CONSTEXPR_INLINE_FUNC enable_if_t&& is_scalar_v && (is_signed_v == Signed), vector > - operator()(NBL_CONST_REF_ARG(storage_t) value, NBL_CONST_REF_ARG(vector) rhs) + template) + NBL_CONSTEXPR_STATIC_INLINE_FUNC vector __call(NBL_CONST_REF_ARG(storage_t) value, NBL_CONST_REF_ARG(portable_vector_t) rhs) { - using U = make_unsigned_t; - vector interleaved; - [[unroll]] - for (uint16_t i = 0; i < D; i++) - { - interleaved[i] = impl::MortonEncoder::encode(_static_cast(rhs[i])); - } + const vector interleaved = MortonEncoder::interleaveShift(rhs); BaseComparison baseComparison; return baseComparison(value, interleaved); } }; template -struct LessThan : BaseComparison > {}; +struct LessThan : BaseComparison > > {}; template -struct LessEquals : BaseComparison > {}; +struct LessEquals : BaseComparison > > {}; +template +struct GreaterThan : BaseComparison > > {}; + +template +struct GreaterEquals : BaseComparison > > {}; } //namespace impl @@ -313,19 +279,11 @@ struct code * @param [in] cartesian Coordinates to encode. Signedness MUST match the signedness of this Morton code class */ template - NBL_CONSTEXPR_STATIC_FUNC enable_if_t && is_scalar_v && (is_signed_v == Signed), this_t> + NBL_CONSTEXPR_STATIC_FUNC enable_if_t && is_scalar_v && (is_signed_v == Signed) && (8 * sizeof(I) >= Bits), this_t> create(NBL_CONST_REF_ARG(vector) cartesian) { - using U = make_unsigned_t; - left_shift_operator leftShift; - storage_t encodedCartesian = _static_cast(uint64_t(0)); - [[unroll]] - for (uint16_t i = 0; i < D; i++) - { - encodedCartesian = encodedCartesian | leftShift(impl::MortonEncoder::encode(_static_cast(cartesian[i])), i); - } this_t retVal; - retVal.value = encodedCartesian; + retVal.value = impl::MortonEncoder::encode(cartesian); return retVal; } @@ -337,8 +295,7 @@ struct code * * @param [in] cartesian Coordinates to encode */ - - template + template= Bits) explicit code(NBL_CONST_REF_ARG(vector) cartesian) { *this = create(cartesian); @@ -347,11 +304,8 @@ struct code /** * @brief Decodes this Morton code back to a set of cartesian coordinates */ - template - constexpr inline explicit operator vector() const noexcept - { - return _static_cast, morton::code, Bits, D>>(*this); - } + template= Bits) + constexpr inline explicit operator vector() const noexcept; #endif @@ -398,14 +352,13 @@ struct code NBL_CONSTEXPR_INLINE_FUNC this_signed_t operator-() NBL_CONST_MEMBER_FUNC { - left_shift_operator leftShift; - // allOnes encodes a cartesian coordinate with all values set to 1 - this_t allOnes; - allOnes.value = leftShift(_static_cast(1), D) - _static_cast(1); - // Using 2's complement property that arithmetic negation can be obtained by bitwise negation then adding 1 - this_signed_t retVal; - retVal.value = (operator~() + allOnes).value; - return retVal; + this_t zero; + zero.value = _static_cast(0); + #ifndef __HLSL_VERSION + return zero - *this; + #else + return zero - this; + #endif } // ------------------------------------------------------- BINARY ARITHMETIC OPERATORS ------------------------------------------------- @@ -453,48 +406,51 @@ struct code return value == rhs.value; } - template - enable_if_t<(is_signed_v == Signed) || (is_same_v && BitsAlreadySpread), vector > operator==(NBL_CONST_REF_ARG(vector) rhs) + template) + NBL_CONSTEXPR_INLINE_FUNC vector equals(NBL_CONST_REF_ARG(vector) rhs) NBL_CONST_MEMBER_FUNC { - impl::Equals equals; - return equals(value, rhs); - } + return impl::Equals::__call(value, rhs); + } NBL_CONSTEXPR_INLINE_FUNC bool operator!=(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC { return value != rhs.value; } - template - enable_if_t<(is_signed_v == Signed) || (is_same_v && BitsAlreadySpread), vector > operator!=(NBL_CONST_REF_ARG(vector) rhs) + template) + NBL_CONSTEXPR_INLINE_FUNC vector notEquals(NBL_CONST_REF_ARG(vector) rhs) NBL_CONST_MEMBER_FUNC { - return !operator== (rhs); + return !equals(rhs); } - template - enable_if_t<(is_signed_v == Signed) || (is_same_v && BitsAlreadySpread), vector > operator<(NBL_CONST_REF_ARG(vector) rhs) + template) + NBL_CONSTEXPR_INLINE_FUNC vector less(NBL_CONST_REF_ARG(vector) rhs) NBL_CONST_MEMBER_FUNC { - impl::LessThan lessThan; - return lessThan(value, rhs); + return impl::LessThan::__call(value, rhs); } - template - enable_if_t<(is_signed_v == Signed) || (is_same_v && BitsAlreadySpread), vector > operator<=(NBL_CONST_REF_ARG(vector) rhs) + template) + NBL_CONSTEXPR_INLINE_FUNC vector lessEquals(NBL_CONST_REF_ARG(vector) rhs) NBL_CONST_MEMBER_FUNC { - impl::LessEquals lessEquals; - return lessEquals(value, rhs); + return impl::LessEquals::__call(value, rhs); } - template - enable_if_t<(is_signed_v == Signed) || (is_same_v && BitsAlreadySpread), vector > operator>(NBL_CONST_REF_ARG(vector) rhs) + template) + NBL_CONSTEXPR_INLINE_FUNC vector greater(NBL_CONST_REF_ARG(vector) rhs) NBL_CONST_MEMBER_FUNC { - return !operator<= (rhs); + return impl::GreaterThan::__call(value, rhs); } - template - enable_if_t<(is_signed_v == Signed) || (is_same_v && BitsAlreadySpread), vector > operator>=(NBL_CONST_REF_ARG(vector) rhs) + template) + NBL_CONSTEXPR_INLINE_FUNC vector greaterEquals(NBL_CONST_REF_ARG(vector) rhs) NBL_CONST_MEMBER_FUNC { - return !operator< (rhs); + return impl::GreaterEquals::__call(value, rhs); } }; @@ -571,26 +527,29 @@ namespace impl { // I must be of same signedness as the morton code, and be wide enough to hold each component -template NBL_PARTIAL_REQ_TOP(concepts::IntegralScalar && (8 * sizeof(I) >= Bits)) -struct static_cast_helper, morton::code, Bits, D, _uint64_t> NBL_PARTIAL_REQ_BOT(concepts::IntegralScalar && (8 * sizeof(I) >= Bits)) > +template NBL_PARTIAL_REQ_TOP(concepts::IntegralScalar && 8 * sizeof(I) >= Bits) +struct static_cast_helper, morton::code, Bits, D, _uint64_t> NBL_PARTIAL_REQ_BOT(concepts::IntegralScalar && 8 * sizeof(I) >= Bits) > { NBL_CONSTEXPR_STATIC_INLINE_FUNC vector cast(NBL_CONST_REF_ARG(morton::code, Bits, D, _uint64_t>) val) { - using U = make_unsigned_t; using storage_t = typename morton::code, Bits, D, _uint64_t>::storage_t; - arithmetic_right_shift_operator rightShift; - vector cartesian; - [[unroll]] - for (uint16_t i = 0; i < D; i++) - { - cartesian[i] = _static_cast(morton::impl::MortonDecoder::template decode(rightShift(val.value, i))); - } - return cartesian; + return morton::impl::MortonDecoder::decode(val.value); } }; } // namespace impl +#ifndef __HLSL_VERSION + +template && D* Bits <= 64) +template = Bits) +constexpr inline morton::code::operator vector() const noexcept +{ + return _static_cast, morton::code, Bits, D>>(*this); +} + +#endif + } //namespace hlsl } //namespace nbl diff --git a/include/nbl/builtin/hlsl/mpl.hlsl b/include/nbl/builtin/hlsl/mpl.hlsl index 2015b05b3d..67f6445324 100644 --- a/include/nbl/builtin/hlsl/mpl.hlsl +++ b/include/nbl/builtin/hlsl/mpl.hlsl @@ -43,13 +43,23 @@ struct countl_zero : impl::countl_zero template NBL_CONSTEXPR T countl_zero_v = countl_zero::value; +template +struct is_pot : bool_constant< (N > 0 && !(N & (N - 1))) > {}; +template +NBL_CONSTEXPR bool is_pot_v = is_pot::value; + template struct log2 { NBL_CONSTEXPR_STATIC_INLINE uint16_t value = X ? (1ull<<6)-countl_zero::value-1 : -1ull; }; template -NBL_CONSTEXPR uint64_t log2_v = log2::value; +NBL_CONSTEXPR uint16_t log2_v = log2::value; + +template +struct log2_ceil : integral_constant + uint16_t(!is_pot_v)> {}; +template +NBL_CONSTEXPR uint16_t log2_ceil_v = log2_ceil::value; template struct rotl @@ -79,11 +89,6 @@ struct align_up template NBL_CONSTEXPR uint64_t align_up_v = align_up::value; -template -struct is_pot : bool_constant< (N > 0 && !(N & (N - 1))) > {}; -template -NBL_CONSTEXPR bool is_pot_v = is_pot::value; - template struct max { @@ -99,6 +104,17 @@ struct min }; template NBL_CONSTEXPR T min_v = min::value; + +template +struct round_up_to_pot : integral_constant > {}; +template +NBL_CONSTEXPR uint64_t round_up_to_pot_v = round_up_to_pot::value; + +template +struct round_down_to_pot : integral_constant > {}; +template +NBL_CONSTEXPR uint64_t round_down_to_pot_v = round_down_to_pot::value; + } } } From f954522001947a4f7f4c74696b71571924a5c590 Mon Sep 17 00:00:00 2001 From: Fletterio Date: Thu, 24 Apr 2025 15:57:18 -0300 Subject: [PATCH 026/157] Removed `NBL_CONSTEXPR_INLINE_FUNC` macro, replaced all usages with `NBL_CONSTEXPR_FUNC` Adds `OpUndef` to spirv `intrinsics.hlsl` and `cpp_compat.hlsl` Adds an explicit `truncate` function for vectors and emulated vectors Adds a bunch of specializations for vectorial types in `functional.hlsl` Bugfixes and changes to Morton codes, very close to them working properly with emulated ints --- include/nbl/builtin/hlsl/algorithm.hlsl | 18 +- include/nbl/builtin/hlsl/cpp_compat.hlsl | 3 +- include/nbl/builtin/hlsl/cpp_compat/basic.h | 27 +- .../hlsl/cpp_compat/impl/intrinsics_impl.hlsl | 17 +- .../hlsl/cpp_compat/impl/vector_impl.hlsl | 35 -- .../builtin/hlsl/cpp_compat/intrinsics.hlsl | 12 +- .../nbl/builtin/hlsl/cpp_compat/promote.hlsl | 12 +- .../nbl/builtin/hlsl/cpp_compat/truncate.hlsl | 76 ++++ .../nbl/builtin/hlsl/emulated/float64_t.hlsl | 16 +- .../builtin/hlsl/emulated/float64_t_impl.hlsl | 16 +- .../nbl/builtin/hlsl/emulated/int64_t.hlsl | 160 ++++--- .../nbl/builtin/hlsl/emulated/vector_t.hlsl | 423 ++++++++++-------- include/nbl/builtin/hlsl/functional.hlsl | 144 ++++-- include/nbl/builtin/hlsl/ieee754.hlsl | 16 +- include/nbl/builtin/hlsl/ieee754/impl.hlsl | 16 +- include/nbl/builtin/hlsl/morton.hlsl | 358 ++++++++------- .../builtin/hlsl/spirv_intrinsics/core.hlsl | 7 +- include/nbl/builtin/hlsl/type_traits.hlsl | 2 + src/nbl/builtin/CMakeLists.txt | 2 +- 19 files changed, 798 insertions(+), 562 deletions(-) delete mode 100644 include/nbl/builtin/hlsl/cpp_compat/impl/vector_impl.hlsl create mode 100644 include/nbl/builtin/hlsl/cpp_compat/truncate.hlsl diff --git a/include/nbl/builtin/hlsl/algorithm.hlsl b/include/nbl/builtin/hlsl/algorithm.hlsl index 3a7c4963c2..0178673f4e 100644 --- a/include/nbl/builtin/hlsl/algorithm.hlsl +++ b/include/nbl/builtin/hlsl/algorithm.hlsl @@ -18,7 +18,7 @@ namespace impl // TODO: use structs template - NBL_CONSTEXPR_INLINE_FUNC void swap(NBL_REF_ARG(T) lhs, NBL_REF_ARG(T) rhs) + NBL_CONSTEXPR_FUNC void swap(NBL_REF_ARG(T) lhs, NBL_REF_ARG(T) rhs) { T tmp = lhs; lhs = rhs; @@ -26,7 +26,7 @@ namespace impl } template<> - NBL_CONSTEXPR_INLINE_FUNC void swap(NBL_REF_ARG(uint16_t) lhs, NBL_REF_ARG(uint16_t) rhs) + NBL_CONSTEXPR_FUNC void swap(NBL_REF_ARG(uint16_t) lhs, NBL_REF_ARG(uint16_t) rhs) { lhs ^= rhs; rhs ^= lhs; @@ -34,7 +34,7 @@ namespace impl } template<> - NBL_CONSTEXPR_INLINE_FUNC void swap(NBL_REF_ARG(uint32_t) lhs, NBL_REF_ARG(uint32_t) rhs) + NBL_CONSTEXPR_FUNC void swap(NBL_REF_ARG(uint32_t) lhs, NBL_REF_ARG(uint32_t) rhs) { lhs ^= rhs; rhs ^= lhs; @@ -42,7 +42,7 @@ namespace impl } template<> - NBL_CONSTEXPR_INLINE_FUNC void swap(NBL_REF_ARG(uint64_t) lhs, NBL_REF_ARG(uint64_t) rhs) + NBL_CONSTEXPR_FUNC void swap(NBL_REF_ARG(uint64_t) lhs, NBL_REF_ARG(uint64_t) rhs) { lhs ^= rhs; rhs ^= lhs; @@ -50,7 +50,7 @@ namespace impl } template<> - NBL_CONSTEXPR_INLINE_FUNC void swap(NBL_REF_ARG(int16_t) lhs, NBL_REF_ARG(int16_t) rhs) + NBL_CONSTEXPR_FUNC void swap(NBL_REF_ARG(int16_t) lhs, NBL_REF_ARG(int16_t) rhs) { lhs ^= rhs; rhs ^= lhs; @@ -58,7 +58,7 @@ namespace impl } template<> - NBL_CONSTEXPR_INLINE_FUNC void swap(NBL_REF_ARG(int32_t) lhs, NBL_REF_ARG(int32_t) rhs) + NBL_CONSTEXPR_FUNC void swap(NBL_REF_ARG(int32_t) lhs, NBL_REF_ARG(int32_t) rhs) { lhs ^= rhs; rhs ^= lhs; @@ -66,7 +66,7 @@ namespace impl } template<> - NBL_CONSTEXPR_INLINE_FUNC void swap(NBL_REF_ARG(int64_t) lhs, NBL_REF_ARG(int64_t) rhs) + NBL_CONSTEXPR_FUNC void swap(NBL_REF_ARG(int64_t) lhs, NBL_REF_ARG(int64_t) rhs) { lhs ^= rhs; rhs ^= lhs; @@ -74,7 +74,7 @@ namespace impl } #else template - NBL_CONSTEXPR_INLINE_FUNC void swap(NBL_REF_ARG(T) lhs, NBL_REF_ARG(T) rhs) + NBL_CONSTEXPR_FUNC void swap(NBL_REF_ARG(T) lhs, NBL_REF_ARG(T) rhs) { std::swap(lhs, rhs); } @@ -82,7 +82,7 @@ namespace impl } template -NBL_CONSTEXPR_INLINE_FUNC void swap(NBL_REF_ARG(T) lhs, NBL_REF_ARG(T) rhs) +NBL_CONSTEXPR_FUNC void swap(NBL_REF_ARG(T) lhs, NBL_REF_ARG(T) rhs) { impl::swap(lhs, rhs); } diff --git a/include/nbl/builtin/hlsl/cpp_compat.hlsl b/include/nbl/builtin/hlsl/cpp_compat.hlsl index cb06447aa1..03d47864fb 100644 --- a/include/nbl/builtin/hlsl/cpp_compat.hlsl +++ b/include/nbl/builtin/hlsl/cpp_compat.hlsl @@ -5,8 +5,9 @@ // it includes vector and matrix #include #include +#include // Had to push some stuff here to avoid circular dependencies -#include +#include #endif \ No newline at end of file diff --git a/include/nbl/builtin/hlsl/cpp_compat/basic.h b/include/nbl/builtin/hlsl/cpp_compat/basic.h index f01d2d78ec..0985af6eb3 100644 --- a/include/nbl/builtin/hlsl/cpp_compat/basic.h +++ b/include/nbl/builtin/hlsl/cpp_compat/basic.h @@ -14,8 +14,6 @@ #define NBL_CONSTEXPR_INLINE constexpr inline #define NBL_CONSTEXPR_STATIC_INLINE constexpr static inline #define NBL_CONSTEXPR_STATIC_FUNC constexpr static -#define NBL_CONSTEXPR_INLINE_FUNC constexpr inline -#define NBL_CONSTEXPR_STATIC_INLINE_FUNC constexpr static inline #define NBL_CONSTEXPR_FORCED_INLINE_FUNC NBL_FORCE_INLINE constexpr #define NBL_CONST_MEMBER_FUNC const #define NBL_IF_CONSTEXPR(...) if constexpr (__VA_ARGS__) @@ -44,13 +42,11 @@ namespace nbl::hlsl #define ARROW .arrow(). #define NBL_CONSTEXPR const static // TODO: rename to NBL_CONSTEXPR_VAR -#define NBL_CONSTEXPR_FUNC +#define NBL_CONSTEXPR_FUNC inline #define NBL_CONSTEXPR_STATIC const static #define NBL_CONSTEXPR_INLINE const static #define NBL_CONSTEXPR_STATIC_INLINE const static -#define NBL_CONSTEXPR_STATIC_FUNC static -#define NBL_CONSTEXPR_INLINE_FUNC inline -#define NBL_CONSTEXPR_STATIC_INLINE_FUNC static inline +#define NBL_CONSTEXPR_STATIC_FUNC static inline #define NBL_CONSTEXPR_FORCED_INLINE_FUNC inline #define NBL_CONST_MEMBER_FUNC #define NBL_IF_CONSTEXPR(...) if (__VA_ARGS__) @@ -90,7 +86,7 @@ namespace impl template struct static_cast_helper { - NBL_CONSTEXPR_STATIC_INLINE_FUNC To cast(NBL_CONST_REF_ARG(From) u) + NBL_CONSTEXPR_STATIC_FUNC To cast(NBL_CONST_REF_ARG(From) u) { #ifndef __HLSL_VERSION return static_cast(u); @@ -100,25 +96,10 @@ struct static_cast_helper } }; -// CPP-side, this can invoke the copy constructor if the copy is non-trivial in generic code -// HLSL-side, this enables generic conversion code between types, contemplating the case where no conversion is needed -template -struct static_cast_helper -{ - NBL_CONSTEXPR_STATIC_INLINE_FUNC Same cast(NBL_CONST_REF_ARG(Same) s) - { -#ifndef __HLSL_VERSION - return static_cast(s); -#else - return s; -#endif - } -}; - } template -NBL_CONSTEXPR_INLINE_FUNC To _static_cast(NBL_CONST_REF_ARG(From) v) +NBL_CONSTEXPR_FUNC To _static_cast(NBL_CONST_REF_ARG(From) v) { return impl::static_cast_helper::cast(v); } diff --git a/include/nbl/builtin/hlsl/cpp_compat/impl/intrinsics_impl.hlsl b/include/nbl/builtin/hlsl/cpp_compat/impl/intrinsics_impl.hlsl index e1ba823b9b..4f7c7370bc 100644 --- a/include/nbl/builtin/hlsl/cpp_compat/impl/intrinsics_impl.hlsl +++ b/include/nbl/builtin/hlsl/cpp_compat/impl/intrinsics_impl.hlsl @@ -109,6 +109,8 @@ template struct addCarry_helper; template struct subBorrow_helper; +template +struct undef_helper; #ifdef __HLSL_VERSION // HLSL only specializations @@ -172,6 +174,7 @@ template AUTO_SPECIALIZE_TRIVIAL_CASE_HELPER(nClamp_helper, nClamp, // Can use trivial case and not worry about restricting `T` with a concept since `spirv::AddCarryOutput / SubBorrowOutput` already take care of that template AUTO_SPECIALIZE_TRIVIAL_CASE_HELPER(addCarry_helper, addCarry, (T), (T)(T), spirv::AddCarryOutput) template AUTO_SPECIALIZE_TRIVIAL_CASE_HELPER(subBorrow_helper, subBorrow, (T), (T)(T), spirv::SubBorrowOutput) +template AUTO_SPECIALIZE_TRIVIAL_CASE_HELPER(undef_helper, undef, (T), , T) #define BITCOUNT_HELPER_RETRUN_TYPE conditional_t, vector::Dimension>, int32_t> template AUTO_SPECIALIZE_TRIVIAL_CASE_HELPER(bitCount_helper, bitCount, (T), (T), BITCOUNT_HELPER_RETRUN_TYPE) @@ -640,7 +643,7 @@ template NBL_PARTIAL_REQ_TOP(concepts::BooleanScalar) struct select_helper) > { - NBL_CONSTEXPR_STATIC_INLINE_FUNC T __call(NBL_CONST_REF_ARG(B) condition, NBL_CONST_REF_ARG(T) object1, NBL_CONST_REF_ARG(T) object2) + NBL_CONSTEXPR_STATIC_FUNC T __call(NBL_CONST_REF_ARG(B) condition, NBL_CONST_REF_ARG(T) object1, NBL_CONST_REF_ARG(T) object2) { return condition ? object1 : object2; } @@ -650,7 +653,7 @@ template NBL_PARTIAL_REQ_TOP(concepts::Boolean&& concepts::Vector&& concepts::Vector && (extent_v == extent_v)) struct select_helper&& concepts::Vector&& concepts::Vector && (extent_v == extent_v)) > { - NBL_CONSTEXPR_STATIC_INLINE_FUNC T __call(NBL_CONST_REF_ARG(B) condition, NBL_CONST_REF_ARG(T) object1, NBL_CONST_REF_ARG(T) object2) + NBL_CONSTEXPR_STATIC_FUNC T __call(NBL_CONST_REF_ARG(B) condition, NBL_CONST_REF_ARG(T) object1, NBL_CONST_REF_ARG(T) object2) { using traits = hlsl::vector_traits; array_get conditionGetter; @@ -665,6 +668,16 @@ struct select_helper&& concepts::V } }; +template +struct undef_helper +{ + NBL_CONSTEXPR_STATIC_FUNC T __call() + { + T t; + return t; + } +}; + #endif // C++ only specializations // C++ and HLSL specializations diff --git a/include/nbl/builtin/hlsl/cpp_compat/impl/vector_impl.hlsl b/include/nbl/builtin/hlsl/cpp_compat/impl/vector_impl.hlsl deleted file mode 100644 index 524d1fa45e..0000000000 --- a/include/nbl/builtin/hlsl/cpp_compat/impl/vector_impl.hlsl +++ /dev/null @@ -1,35 +0,0 @@ -#ifndef _NBL_BUILTIN_HLSL_CPP_COMPAT_IMPL_VECTOR_IMPL_INCLUDED_ -#define _NBL_BUILTIN_HLSL_CPP_COMPAT_IMPL_VECTOR_IMPL_INCLUDED_ - -#include -#include -#include - -// To prevent implicit truncation warnings -namespace nbl -{ -namespace hlsl -{ -namespace impl -{ - -template NBL_PARTIAL_REQ_TOP(N <= M) -struct static_cast_helper, vector NBL_PARTIAL_REQ_BOT(N <= M) > -{ - NBL_CONSTEXPR_STATIC_INLINE_FUNC vector cast(NBL_CONST_REF_ARG(vector) val) - { - vector retVal; - [[unroll]] - for (uint16_t i = 0; i < N; i++) - { - retVal[i] = val[i]; - } - return retVal; - } -}; - -} -} -} - -#endif \ No newline at end of file diff --git a/include/nbl/builtin/hlsl/cpp_compat/intrinsics.hlsl b/include/nbl/builtin/hlsl/cpp_compat/intrinsics.hlsl index 284ba564d7..c511042c27 100644 --- a/include/nbl/builtin/hlsl/cpp_compat/intrinsics.hlsl +++ b/include/nbl/builtin/hlsl/cpp_compat/intrinsics.hlsl @@ -23,6 +23,12 @@ namespace nbl namespace hlsl { +template +NBL_CONSTEXPR_FUNC T undef() +{ + return cpp_compat_intrinsics_impl::undef_helper::__call(); +} + template inline typename cpp_compat_intrinsics_impl::bitCount_helper::return_t bitCount(NBL_CONST_REF_ARG(T) val) { @@ -151,7 +157,7 @@ inline bool any(Vector vec) } template -NBL_CONSTEXPR_INLINE_FUNC ResultType select(Condition condition, ResultType object1, ResultType object2) +NBL_CONSTEXPR_FUNC ResultType select(Condition condition, ResultType object1, ResultType object2) { return cpp_compat_intrinsics_impl::select_helper::__call(condition, object1, object2); } @@ -224,13 +230,13 @@ inline T refract(NBL_CONST_REF_ARG(T) I, NBL_CONST_REF_ARG(T) N, NBL_CONST_REF_A } template -NBL_CONSTEXPR_INLINE_FUNC spirv::AddCarryOutput addCarry(NBL_CONST_REF_ARG(T) operand1, NBL_CONST_REF_ARG(T) operand2) +NBL_CONSTEXPR_FUNC spirv::AddCarryOutput addCarry(NBL_CONST_REF_ARG(T) operand1, NBL_CONST_REF_ARG(T) operand2) { return cpp_compat_intrinsics_impl::addCarry_helper::__call(operand1, operand2); } template -NBL_CONSTEXPR_INLINE_FUNC spirv::SubBorrowOutput subBorrow(NBL_CONST_REF_ARG(T) operand1, NBL_CONST_REF_ARG(T) operand2) +NBL_CONSTEXPR_FUNC spirv::SubBorrowOutput subBorrow(NBL_CONST_REF_ARG(T) operand1, NBL_CONST_REF_ARG(T) operand2) { return cpp_compat_intrinsics_impl::subBorrow_helper::__call(operand1, operand2); } diff --git a/include/nbl/builtin/hlsl/cpp_compat/promote.hlsl b/include/nbl/builtin/hlsl/cpp_compat/promote.hlsl index 51ca73f6d3..0afe214de7 100644 --- a/include/nbl/builtin/hlsl/cpp_compat/promote.hlsl +++ b/include/nbl/builtin/hlsl/cpp_compat/promote.hlsl @@ -15,7 +15,7 @@ namespace impl template struct Promote { - T operator()(U v) + NBL_CONSTEXPR_FUNC T operator()(NBL_CONST_REF_ARG(U) v) { return T(v); } @@ -26,7 +26,7 @@ struct Promote template struct Promote, U> { - enable_if_t::value && is_scalar::value, vector > operator()(U v) + NBL_CONSTEXPR_FUNC enable_if_t::value && is_scalar::value, vector > operator()(NBL_CONST_REF_ARG(U) v) { vector promoted = {Scalar(v)}; return promoted; @@ -36,7 +36,7 @@ struct Promote, U> template struct Promote, U> { - enable_if_t::value && is_scalar::value, vector > operator()(U v) + NBL_CONSTEXPR_FUNC enable_if_t::value && is_scalar::value, vector > operator()(NBL_CONST_REF_ARG(U) v) { vector promoted = {Scalar(v), Scalar(v)}; return promoted; @@ -46,7 +46,7 @@ struct Promote, U> template struct Promote, U> { - enable_if_t::value && is_scalar::value, vector > operator()(U v) + NBL_CONSTEXPR_FUNC enable_if_t::value && is_scalar::value, vector > operator()(NBL_CONST_REF_ARG(U) v) { vector promoted = {Scalar(v), Scalar(v), Scalar(v)}; return promoted; @@ -56,7 +56,7 @@ struct Promote, U> template struct Promote, U> { - enable_if_t::value && is_scalar::value, vector > operator()(U v) + NBL_CONSTEXPR_FUNC enable_if_t::value && is_scalar::value, vector > operator()(NBL_CONST_REF_ARG(U) v) { vector promoted = {Scalar(v), Scalar(v), Scalar(v), Scalar(v)}; return promoted; @@ -68,7 +68,7 @@ struct Promote, U> } template -T promote(const U v) // TODO: use NBL_CONST_REF_ARG(U) instead of U v (circular ref) +NBL_CONSTEXPR_FUNC T promote(const U v) // TODO: use NBL_CONST_REF_ARG(U) instead of U v (circular ref) { impl::Promote _promote; return _promote(v); diff --git a/include/nbl/builtin/hlsl/cpp_compat/truncate.hlsl b/include/nbl/builtin/hlsl/cpp_compat/truncate.hlsl new file mode 100644 index 0000000000..a95df183be --- /dev/null +++ b/include/nbl/builtin/hlsl/cpp_compat/truncate.hlsl @@ -0,0 +1,76 @@ +#ifndef _NBL_BUILTIN_HLSL_CPP_COMPAT_TRUNCATE_INCLUDED_ +#define _NBL_BUILTIN_HLSL_CPP_COMPAT_TRUNCATE_INCLUDED_ + +#include "nbl/builtin/hlsl/type_traits.hlsl" +#include "nbl/builtin/hlsl/concepts/core.hlsl" + +namespace nbl +{ +namespace hlsl +{ + +namespace impl +{ + +template +struct Truncate +{ + NBL_CONSTEXPR_FUNC T operator()(NBL_CONST_REF_ARG(U) v) + { + return T(v); + } +}; + +template NBL_PARTIAL_REQ_TOP(concepts::Scalar) +struct Truncate, vector NBL_PARTIAL_REQ_BOT(concepts::Scalar) > +{ + NBL_CONSTEXPR_FUNC vector operator()(NBL_CONST_REF_ARG(vector) v) + { + vector truncated = { v[0] }; + return truncated; + } +}; + +template NBL_PARTIAL_REQ_TOP(concepts::Scalar && N >= 2) +struct Truncate, vector NBL_PARTIAL_REQ_BOT(concepts::Scalar && N >= 2) > +{ + NBL_CONSTEXPR_FUNC vector operator()(NBL_CONST_REF_ARG(vector) v) + { + vector truncated = { v[0], v[1]}; + return truncated; + } +}; + +template NBL_PARTIAL_REQ_TOP(concepts::Scalar&& N >= 3) +struct Truncate, vector NBL_PARTIAL_REQ_BOT(concepts::Scalar&& N >= 3) > +{ + NBL_CONSTEXPR_FUNC vector operator()(NBL_CONST_REF_ARG(vector) v) + { + vector truncated = { v[0], v[1], v[2] }; + return truncated; + } +}; + +template NBL_PARTIAL_REQ_TOP(concepts::Scalar&& N >= 4) +struct Truncate, vector NBL_PARTIAL_REQ_BOT(concepts::Scalar&& N >= 4) > +{ + NBL_CONSTEXPR_FUNC vector operator()(NBL_CONST_REF_ARG(vector) v) + { + vector truncated = { v[0], v[1], v[2], v[3] }; + return truncated; + } +}; + +} //namespace impl + +template +NBL_CONSTEXPR_FUNC T truncate(NBL_CONST_REF_ARG(U) v) +{ + impl::Truncate _truncate; + return _truncate(v); +} + +} +} + +#endif \ No newline at end of file diff --git a/include/nbl/builtin/hlsl/emulated/float64_t.hlsl b/include/nbl/builtin/hlsl/emulated/float64_t.hlsl index a0cde90df9..2dfc52c957 100644 --- a/include/nbl/builtin/hlsl/emulated/float64_t.hlsl +++ b/include/nbl/builtin/hlsl/emulated/float64_t.hlsl @@ -412,25 +412,25 @@ inline int extractExponent(__VA_ARGS__ x)\ }\ \ template<>\ -NBL_CONSTEXPR_INLINE_FUNC __VA_ARGS__ replaceBiasedExponent(__VA_ARGS__ x, typename unsigned_integer_of_size::type biasedExp)\ +NBL_CONSTEXPR_FUNC __VA_ARGS__ replaceBiasedExponent(__VA_ARGS__ x, typename unsigned_integer_of_size::type biasedExp)\ {\ return __VA_ARGS__(replaceBiasedExponent(x.data, biasedExp));\ }\ \ template <>\ -NBL_CONSTEXPR_INLINE_FUNC __VA_ARGS__ fastMulExp2(__VA_ARGS__ x, int n)\ +NBL_CONSTEXPR_FUNC __VA_ARGS__ fastMulExp2(__VA_ARGS__ x, int n)\ {\ return __VA_ARGS__(replaceBiasedExponent(x.data, extractBiasedExponent(x) + uint32_t(n)));\ }\ \ template <>\ -NBL_CONSTEXPR_INLINE_FUNC unsigned_integer_of_size::type extractMantissa(__VA_ARGS__ x)\ +NBL_CONSTEXPR_FUNC unsigned_integer_of_size::type extractMantissa(__VA_ARGS__ x)\ {\ return extractMantissa(x.data);\ }\ \ template <>\ -NBL_CONSTEXPR_INLINE_FUNC uint64_t extractNormalizeMantissa(__VA_ARGS__ x)\ +NBL_CONSTEXPR_FUNC uint64_t extractNormalizeMantissa(__VA_ARGS__ x)\ {\ return extractNormalizeMantissa(x.data);\ }\ @@ -577,10 +577,10 @@ namespace ieee754 { namespace impl { -template<> NBL_CONSTEXPR_INLINE_FUNC uint64_t bitCastToUintType(emulated_float64_t x) { return x.data; } -template<> NBL_CONSTEXPR_INLINE_FUNC uint64_t bitCastToUintType(emulated_float64_t x) { return x.data; } -template<> NBL_CONSTEXPR_INLINE_FUNC uint64_t bitCastToUintType(emulated_float64_t x) { return x.data; } -template<> NBL_CONSTEXPR_INLINE_FUNC uint64_t bitCastToUintType(emulated_float64_t x) { return x.data; } +template<> NBL_CONSTEXPR_FUNC uint64_t bitCastToUintType(emulated_float64_t x) { return x.data; } +template<> NBL_CONSTEXPR_FUNC uint64_t bitCastToUintType(emulated_float64_t x) { return x.data; } +template<> NBL_CONSTEXPR_FUNC uint64_t bitCastToUintType(emulated_float64_t x) { return x.data; } +template<> NBL_CONSTEXPR_FUNC uint64_t bitCastToUintType(emulated_float64_t x) { return x.data; } } IMPLEMENT_IEEE754_FUNC_SPEC_FOR_EMULATED_F64_TYPE(emulated_float64_t); diff --git a/include/nbl/builtin/hlsl/emulated/float64_t_impl.hlsl b/include/nbl/builtin/hlsl/emulated/float64_t_impl.hlsl index 44b881345d..df785e3e8f 100644 --- a/include/nbl/builtin/hlsl/emulated/float64_t_impl.hlsl +++ b/include/nbl/builtin/hlsl/emulated/float64_t_impl.hlsl @@ -41,7 +41,7 @@ namespace hlsl { namespace emulated_float64_t_impl { -NBL_CONSTEXPR_INLINE_FUNC uint64_t2 shiftMantissaLeftBy53(uint64_t mantissa64) +NBL_CONSTEXPR_FUNC uint64_t2 shiftMantissaLeftBy53(uint64_t mantissa64) { uint64_t2 output; output.x = mantissa64 >> (64 - ieee754::traits::mantissaBitCnt); @@ -74,7 +74,7 @@ inline uint64_t castFloat32ToStorageType(float32_t val) } }; -NBL_CONSTEXPR_INLINE_FUNC bool isZero(uint64_t val) +NBL_CONSTEXPR_FUNC bool isZero(uint64_t val) { return (val << 1) == 0ull; } @@ -137,18 +137,18 @@ inline uint64_t reinterpretAsFloat64BitPattern(int64_t val) return sign | reinterpretAsFloat64BitPattern(absVal); }; -NBL_CONSTEXPR_INLINE_FUNC uint64_t flushDenormToZero(uint64_t value) +NBL_CONSTEXPR_FUNC uint64_t flushDenormToZero(uint64_t value) { const uint64_t biasBits = value & ieee754::traits::exponentMask; return biasBits ? value : (value & ieee754::traits::signMask); } -NBL_CONSTEXPR_INLINE_FUNC uint64_t assembleFloat64(uint64_t signShifted, uint64_t expShifted, uint64_t mantissa) +NBL_CONSTEXPR_FUNC uint64_t assembleFloat64(uint64_t signShifted, uint64_t expShifted, uint64_t mantissa) { return signShifted | expShifted | mantissa; } -NBL_CONSTEXPR_INLINE_FUNC bool areBothInfinity(uint64_t lhs, uint64_t rhs) +NBL_CONSTEXPR_FUNC bool areBothInfinity(uint64_t lhs, uint64_t rhs) { lhs &= ~ieee754::traits::signMask; rhs &= ~ieee754::traits::signMask; @@ -156,18 +156,18 @@ NBL_CONSTEXPR_INLINE_FUNC bool areBothInfinity(uint64_t lhs, uint64_t rhs) return lhs == rhs && lhs == ieee754::traits::inf; } -NBL_CONSTEXPR_INLINE_FUNC bool areBothZero(uint64_t lhs, uint64_t rhs) +NBL_CONSTEXPR_FUNC bool areBothZero(uint64_t lhs, uint64_t rhs) { return !bool((lhs | rhs) << 1); } -NBL_CONSTEXPR_INLINE_FUNC bool areBothSameSignZero(uint64_t lhs, uint64_t rhs) +NBL_CONSTEXPR_FUNC bool areBothSameSignZero(uint64_t lhs, uint64_t rhs) { return !bool((lhs) << 1) && (lhs == rhs); } template -NBL_CONSTEXPR_INLINE_FUNC bool operatorLessAndGreaterCommonImplementation(uint64_t lhs, uint64_t rhs) +NBL_CONSTEXPR_FUNC bool operatorLessAndGreaterCommonImplementation(uint64_t lhs, uint64_t rhs) { if (!FastMath) { diff --git a/include/nbl/builtin/hlsl/emulated/int64_t.hlsl b/include/nbl/builtin/hlsl/emulated/int64_t.hlsl index 4f354c900e..8a3fd42faf 100644 --- a/include/nbl/builtin/hlsl/emulated/int64_t.hlsl +++ b/include/nbl/builtin/hlsl/emulated/int64_t.hlsl @@ -20,6 +20,7 @@ struct emulated_int64_base { using storage_t = vector; using this_t = emulated_int64_base; + using this_signed_t = emulated_int64_base; storage_t data; @@ -29,6 +30,12 @@ struct emulated_int64_base emulated_int64_base() = default; + // GLM requires these to cast vectors because it uses a native `static_cast` + template + constexpr explicit emulated_int64_base(const I& toEmulate); + + constexpr explicit emulated_int64_base(const emulated_int64_base& other) : data(other.data) {} + #endif /** @@ -36,7 +43,7 @@ struct emulated_int64_base * * @param [in] _data Vector of `uint32_t` encoding the `uint64_t/int64_t` being emulated. Stored as little endian (first component are the lower 32 bits) */ - NBL_CONSTEXPR_STATIC_INLINE_FUNC this_t create(NBL_CONST_REF_ARG(storage_t) _data) + NBL_CONSTEXPR_STATIC_FUNC this_t create(NBL_CONST_REF_ARG(storage_t) _data) { this_t retVal; retVal.data = _data; @@ -46,47 +53,57 @@ struct emulated_int64_base /** * @brief Creates an `emulated_int64` from two `uint32_t`s representing its bitpattern * - * @param [in] hi Highest 32 bits of the `uint64_t/int64_t` being emulated * @param [in] lo Lowest 32 bits of the `uint64_t/int64_t` being emulated + * @param [in] hi Highest 32 bits of the `uint64_t/int64_t` being emulated */ - NBL_CONSTEXPR_STATIC_INLINE_FUNC this_t create(NBL_CONST_REF_ARG(uint32_t) lo, NBL_CONST_REF_ARG(uint32_t) hi) + NBL_CONSTEXPR_STATIC_FUNC this_t create(NBL_CONST_REF_ARG(uint32_t) lo, NBL_CONST_REF_ARG(uint32_t) hi) { return create(storage_t(lo, hi)); } + // ------------------------------------------------------- CONVERSION OPERATORS--------------------------------------------------------------- + // GLM requires these for vector casts + + #ifndef __HLSL_VERSION + + template + constexpr explicit operator I() const noexcept; + + #endif + // ------------------------------------------------------- INTERNAL GETTERS ------------------------------------------------- - NBL_CONSTEXPR_INLINE_FUNC uint32_t __getLSB() NBL_CONST_MEMBER_FUNC + NBL_CONSTEXPR_FUNC uint32_t __getLSB() NBL_CONST_MEMBER_FUNC { return data.x; } - NBL_CONSTEXPR_INLINE_FUNC uint32_t __getMSB() NBL_CONST_MEMBER_FUNC + NBL_CONSTEXPR_FUNC uint32_t __getMSB() NBL_CONST_MEMBER_FUNC { return data.y; } // ------------------------------------------------------- BITWISE OPERATORS ------------------------------------------------- - NBL_CONSTEXPR_INLINE_FUNC this_t operator&(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC + NBL_CONSTEXPR_FUNC this_t operator&(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC { this_t retVal = create(data & rhs.data); return retVal; } - NBL_CONSTEXPR_INLINE_FUNC this_t operator|(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC + NBL_CONSTEXPR_FUNC this_t operator|(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC { this_t retVal = create(data | rhs.data); return retVal; } - NBL_CONSTEXPR_INLINE_FUNC this_t operator^(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC + NBL_CONSTEXPR_FUNC this_t operator^(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC { this_t retVal = create(data ^ rhs.data); return retVal; } - NBL_CONSTEXPR_INLINE_FUNC this_t operator~() NBL_CONST_MEMBER_FUNC + NBL_CONSTEXPR_FUNC this_t operator~() NBL_CONST_MEMBER_FUNC { this_t retVal = create(~data); return retVal; @@ -101,56 +118,62 @@ struct emulated_int64_base // ------------------------------------------------------- ARITHMETIC OPERATORS ------------------------------------------------- - NBL_CONSTEXPR_INLINE_FUNC this_t operator+(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC + NBL_CONSTEXPR_FUNC this_signed_t operator-() NBL_CONST_MEMBER_FUNC + { + vector negated = -data; + return this_signed_t::create(_static_cast(negated)); + } + + NBL_CONSTEXPR_FUNC this_t operator+(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC { const spirv::AddCarryOutput lowerAddResult = addCarry(__getLSB(), rhs.__getLSB()); - const this_t retVal = create(lowerAddResult.result, __getMSB() + rhs.__getMSB() + lowerAddResult.carry); - return retVal; + return create(lowerAddResult.result, __getMSB() + rhs.__getMSB() + lowerAddResult.carry); } - NBL_CONSTEXPR_INLINE_FUNC this_t operator-(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC + NBL_CONSTEXPR_FUNC this_t operator-(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC { const spirv::SubBorrowOutput lowerSubResult = subBorrow(__getLSB(), rhs.__getLSB()); - const this_t retVal = create(lowerSubResult.result, __getMSB() - rhs.__getMSB() - lowerSubResult.borrow); - return retVal; + return create(lowerSubResult.result, __getMSB() - rhs.__getMSB() - lowerSubResult.borrow); } // ------------------------------------------------------- COMPARISON OPERATORS ------------------------------------------------- - NBL_CONSTEXPR_INLINE_FUNC bool operator==(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC + NBL_CONSTEXPR_FUNC bool operator==(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC { - return all(data == rhs.data); + equal_to equals; + return all(equals(data, rhs.data)); } - NBL_CONSTEXPR_INLINE_FUNC bool operator!=(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC + NBL_CONSTEXPR_FUNC bool operator!=(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC { - return any(data != rhs.data); + not_equal_to notEquals; + return any(notEquals(data, rhs.data)); } - NBL_CONSTEXPR_INLINE_FUNC bool operator<(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC + NBL_CONSTEXPR_FUNC bool operator<(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC { // Either the topmost bits, when interpreted with correct sign, are less than those of `rhs`, or they're equal and the lower bits are less // (lower bits are always positive in both unsigned and 2's complement so comparison can happen as-is) const bool MSBEqual = __getMSB() == rhs.__getMSB(); - const bool MSB = Signed ? (_static_cast(__getMSB()) < _static_cast(rhs.__getMSB())) : (__getMSB() < rhs.__getMSB()); + const bool MSB = Signed ? (bit_cast(__getMSB()) < bit_cast(rhs.__getMSB())) : (__getMSB() < rhs.__getMSB()); const bool LSB = __getLSB() < rhs.__getLSB(); return MSBEqual ? LSB : MSB; } - NBL_CONSTEXPR_INLINE_FUNC bool operator>(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC + NBL_CONSTEXPR_FUNC bool operator>(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC { // Same reasoning as above const bool MSBEqual = __getMSB() == rhs.__getMSB(); - const bool MSB = Signed ? (_static_cast(__getMSB()) > _static_cast(rhs.__getMSB())) : (__getMSB() > rhs.__getMSB()); + const bool MSB = Signed ? (bit_cast(__getMSB()) > bit_cast(rhs.__getMSB())) : (__getMSB() > rhs.__getMSB()); const bool LSB = __getLSB() > rhs.__getLSB(); return MSBEqual ? LSB : MSB; } - NBL_CONSTEXPR_INLINE_FUNC bool operator<=(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC + NBL_CONSTEXPR_FUNC bool operator<=(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC { return !operator>(rhs); } - NBL_CONSTEXPR_INLINE_FUNC bool operator>=(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC + NBL_CONSTEXPR_FUNC bool operator>=(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC { return !operator<(rhs); } @@ -162,32 +185,16 @@ using emulated_int64_t = emulated_int64_base; namespace impl { -template<> -struct static_cast_helper -{ - using To = emulated_uint64_t; - using From = emulated_int64_t; - - // Return only the lowest bits - NBL_CONSTEXPR_STATIC_INLINE_FUNC To cast(From i) - { - To retVal; - retVal.data = i.data; - return retVal; - } -}; - -template<> -struct static_cast_helper +template +struct static_cast_helper, emulated_int64_base > { - using To = emulated_int64_t; - using From = emulated_uint64_t; + using To = emulated_int64_base; + using From = emulated_int64_base; - // Return only the lowest bits - NBL_CONSTEXPR_STATIC_INLINE_FUNC To cast(From u) + NBL_CONSTEXPR_STATIC_FUNC To cast(NBL_CONST_REF_ARG(From) other) { To retVal; - retVal.data = u.data; + retVal.data = other.data; return retVal; } }; @@ -199,19 +206,19 @@ struct static_cast_helper NBL_PARTIAL_REQ_BOT(con using From = emulated_int64_base; // Return only the lowest bits - NBL_CONSTEXPR_STATIC_INLINE_FUNC To cast(From val) + NBL_CONSTEXPR_STATIC_FUNC To cast(NBL_CONST_REF_ARG(From) val) { return _static_cast(val.data.x); } }; -template NBL_PARTIAL_REQ_TOP(is_same_v || is_same_v) -struct static_cast_helper NBL_PARTIAL_REQ_BOT(is_same_v || is_same_v) > +template NBL_PARTIAL_REQ_TOP(concepts::IntegralScalar && (sizeof(I) > sizeof(uint32_t))) +struct static_cast_helper NBL_PARTIAL_REQ_BOT(concepts::IntegralScalar && (sizeof(I) > sizeof(uint32_t))) > { using To = I; using From = emulated_int64_base; - NBL_CONSTEXPR_STATIC_INLINE_FUNC To cast(From val) + NBL_CONSTEXPR_STATIC_FUNC To cast(NBL_CONST_REF_ARG(From) val) { return bit_cast(val.data); } @@ -224,28 +231,53 @@ struct static_cast_helper, I NBL_PARTIAL_REQ_BOT(con using From = I; // Set only lower bits - NBL_CONSTEXPR_STATIC_INLINE_FUNC To cast(From i) + NBL_CONSTEXPR_STATIC_FUNC To cast(NBL_CONST_REF_ARG(From) i) { - return To::create(uint32_t(0), _static_cast(i)); + return To::create(_static_cast(i), uint32_t(0)); } }; -template NBL_PARTIAL_REQ_TOP(is_same_v || is_same_v ) -struct static_cast_helper, I NBL_PARTIAL_REQ_BOT(is_same_v || is_same_v) > +template NBL_PARTIAL_REQ_TOP(concepts::IntegralScalar && (sizeof(I) > sizeof(uint32_t))) +struct static_cast_helper, I NBL_PARTIAL_REQ_BOT(concepts::IntegralScalar && (sizeof(I) > sizeof(uint32_t))) > { using To = emulated_int64_base; using From = I; - NBL_CONSTEXPR_STATIC_INLINE_FUNC To cast(From i) + NBL_CONSTEXPR_STATIC_FUNC To cast(NBL_CONST_REF_ARG(From) i) { + // `bit_cast` blocked by GLM vectors using a union + #ifndef __HLSL_VERSION + return To::create(_static_cast(i), _static_cast(i >> 32)); + #else To retVal; - retVal.data = bit_cast(i); + retVal.data = bit_cast >(i); return retVal; + #endif } }; } //namespace impl +// Define constructor and conversion operators + +#ifndef __HLSL_VERSION + +template +template +constexpr emulated_int64_base::emulated_int64_base(const I& toEmulate) +{ + *this = _static_cast>(toEmulate); +} + +template +template +constexpr emulated_int64_base::operator I() const noexcept +{ + return _static_cast(*this); +} + +#endif + // ---------------------- Functional operators ------------------------ template @@ -258,7 +290,7 @@ struct left_shift_operator > //https://github.com/microsoft/DirectXShaderCompiler/issues/7325 // If `_bits > 63` or `_bits < 0` the result is undefined - NBL_CONSTEXPR_INLINE_FUNC type_t operator()(NBL_CONST_REF_ARG(type_t) operand, uint32_t bits) + NBL_CONSTEXPR_FUNC type_t operator()(NBL_CONST_REF_ARG(type_t) operand, uint32_t bits) { const bool bigShift = bits >= ComponentBitWidth; // Shift that completely rewrites LSB const uint32_t shift = bigShift ? bits - ComponentBitWidth : ComponentBitWidth - bits; @@ -269,7 +301,7 @@ struct left_shift_operator > } // If `_bits > 63` or `_bits < 0` the result is undefined - NBL_CONSTEXPR_INLINE_FUNC type_t operator()(NBL_CONST_REF_ARG(type_t) operand, type_t bits) + NBL_CONSTEXPR_FUNC type_t operator()(NBL_CONST_REF_ARG(type_t) operand, type_t bits) { return operator()(operand, _static_cast(bits)); } @@ -285,7 +317,7 @@ struct arithmetic_right_shift_operator //https://github.com/microsoft/DirectXShaderCompiler/issues/7325 // If `_bits > 63` the result is undefined - NBL_CONSTEXPR_INLINE_FUNC type_t operator()(NBL_CONST_REF_ARG(type_t) operand, uint32_t bits) + NBL_CONSTEXPR_FUNC type_t operator()(NBL_CONST_REF_ARG(type_t) operand, uint32_t bits) { const bool bigShift = bits >= ComponentBitWidth; // Shift that completely rewrites MSB const uint32_t shift = bigShift ? bits - ComponentBitWidth : ComponentBitWidth - bits; @@ -296,7 +328,7 @@ struct arithmetic_right_shift_operator } // If `_bits > 63` the result is undefined - NBL_CONSTEXPR_INLINE_FUNC type_t operator()(NBL_CONST_REF_ARG(type_t) operand, type_t bits) + NBL_CONSTEXPR_FUNC type_t operator()(NBL_CONST_REF_ARG(type_t) operand, type_t bits) { return operator()(operand, _static_cast(bits)); } @@ -312,18 +344,18 @@ struct arithmetic_right_shift_operator //https://github.com/microsoft/DirectXShaderCompiler/issues/7325 // If `_bits > 63` or `_bits < 0` the result is undefined - NBL_CONSTEXPR_INLINE_FUNC type_t operator()(NBL_CONST_REF_ARG(type_t) operand, uint32_t bits) + NBL_CONSTEXPR_FUNC type_t operator()(NBL_CONST_REF_ARG(type_t) operand, uint32_t bits) { const bool bigShift = bits >= ComponentBitWidth; // Shift that completely rewrites MSB const uint32_t shift = bigShift ? bits - ComponentBitWidth : ComponentBitWidth - bits; - const type_t shifted = type_t::create(bigShift ? vector(uint32_t(int32_t(operand.__getMSB()) >> shift), ~uint32_t(0)) + const type_t shifted = type_t::create(bigShift ? vector(uint32_t(int32_t(operand.__getMSB()) >> shift), int32_t(operand.__getMSB()) < 0 ? ~uint32_t(0) : uint32_t(0)) : vector((operand.__getMSB() << shift) | (operand.__getLSB() >> bits), uint32_t(int32_t(operand.__getMSB()) >> bits))); ternary_operator ternary; return ternary(bool(bits), shifted, operand); } // If `_bits > 63` or `_bits < 0` the result is undefined - NBL_CONSTEXPR_INLINE_FUNC type_t operator()(NBL_CONST_REF_ARG(type_t) operand, type_t bits) + NBL_CONSTEXPR_FUNC type_t operator()(NBL_CONST_REF_ARG(type_t) operand, type_t bits) { return operator()(operand, _static_cast(bits)); } diff --git a/include/nbl/builtin/hlsl/emulated/vector_t.hlsl b/include/nbl/builtin/hlsl/emulated/vector_t.hlsl index fd5f5e3c34..3780ce001b 100644 --- a/include/nbl/builtin/hlsl/emulated/vector_t.hlsl +++ b/include/nbl/builtin/hlsl/emulated/vector_t.hlsl @@ -24,7 +24,7 @@ struct _2_component_vec static_assert(sizeof(T) <= 8); - NBL_CONSTEXPR_INLINE_FUNC void setComponent(uint32_t componentIdx, T val) + NBL_CONSTEXPR_FUNC void setComponent(uint32_t componentIdx, T val) { if (componentIdx == 0) x = val; @@ -32,7 +32,7 @@ struct _2_component_vec y = val; } - NBL_CONSTEXPR_INLINE_FUNC T getComponent(uint32_t componentIdx) NBL_CONST_MEMBER_FUNC + NBL_CONSTEXPR_FUNC T getComponent(uint32_t componentIdx) NBL_CONST_MEMBER_FUNC { if (componentIdx == 0) return x; @@ -40,9 +40,10 @@ struct _2_component_vec return y; // TODO: avoid code duplication, make it constexpr - using TAsUint = typename unsigned_integer_of_size::type; - TAsUint invalidComponentValue = nbl::hlsl::_static_cast(0xdeadbeefbadcaffeull); - return nbl::hlsl::bit_cast(invalidComponentValue); + //using TAsUint = typename unsigned_integer_of_size::type; + //TAsUint invalidComponentValue = nbl::hlsl::_static_cast(0xdeadbeefbadcaffeull); + //return nbl::hlsl::bit_cast(invalidComponentValue); + return nbl::hlsl::undef(); } NBL_CONSTEXPR_STATIC uint32_t Dimension = 2; @@ -56,7 +57,7 @@ struct _3_component_vec T z; - NBL_CONSTEXPR_INLINE_FUNC void setComponent(uint32_t componentIdx, T val) + NBL_CONSTEXPR_FUNC void setComponent(uint32_t componentIdx, T val) { if (componentIdx == 0) x = val; @@ -66,7 +67,7 @@ struct _3_component_vec z = val; } - NBL_CONSTEXPR_INLINE_FUNC T getComponent(uint32_t componentIdx) NBL_CONST_MEMBER_FUNC + NBL_CONSTEXPR_FUNC T getComponent(uint32_t componentIdx) NBL_CONST_MEMBER_FUNC { if (componentIdx == 0) return x; @@ -76,9 +77,10 @@ struct _3_component_vec return z; // TODO: avoid code duplication, make it constexpr - using TAsUint = typename unsigned_integer_of_size::type; - TAsUint invalidComponentValue = nbl::hlsl::_static_cast(0xdeadbeefbadcaffeull >> (64 - sizeof(T) * 8)); - return nbl::hlsl::bit_cast(invalidComponentValue); + //using TAsUint = typename unsigned_integer_of_size::type; + //TAsUint invalidComponentValue = nbl::hlsl::_static_cast(0xdeadbeefbadcaffeull >> (64 - sizeof(T) * 8)); + //return nbl::hlsl::bit_cast(invalidComponentValue); + return nbl::hlsl::undef(); } NBL_CONSTEXPR_STATIC uint32_t Dimension = 3; @@ -92,7 +94,7 @@ struct _4_component_vec T z; T w; - NBL_CONSTEXPR_INLINE_FUNC void setComponent(uint32_t componentIdx, T val) + NBL_CONSTEXPR_FUNC void setComponent(uint32_t componentIdx, T val) { if (componentIdx == 0) x = val; @@ -104,7 +106,7 @@ struct _4_component_vec w = val; } - NBL_CONSTEXPR_INLINE_FUNC T getComponent(uint32_t componentIdx) NBL_CONST_MEMBER_FUNC + NBL_CONSTEXPR_FUNC T getComponent(uint32_t componentIdx) NBL_CONST_MEMBER_FUNC { if (componentIdx == 0) return x; @@ -116,184 +118,210 @@ struct _4_component_vec return w; // TODO: avoid code duplication, make it constexpr - using TAsUint = typename unsigned_integer_of_size::type; - uint64_t invalidComponentValue = nbl::hlsl::_static_cast(0xdeadbeefbadcaffeull >> (64 - sizeof(T) * 8)); - return nbl::hlsl::bit_cast(invalidComponentValue); + //using TAsUint = typename unsigned_integer_of_size::type; + //uint64_t invalidComponentValue = nbl::hlsl::_static_cast(0xdeadbeefbadcaffeull >> (64 - sizeof(T) * 8)); + //return nbl::hlsl::bit_cast(invalidComponentValue); + return nbl::hlsl::undef(); } NBL_CONSTEXPR_STATIC uint32_t Dimension = 4; }; -template ::value> -struct emulated_vector : CRTP -{ - using this_t = emulated_vector; - using component_t = ComponentType; +template +struct emulated_vector; - NBL_CONSTEXPR_STATIC_INLINE this_t create(this_t other) - { - CRTP output; +// Generic ComponentType vectors still have to be partial specialized based on whether they're fundamental and/or integral - for (uint32_t i = 0u; i < CRTP::Dimension; ++i) - output.setComponent(i, other.getComponent(i)); - } - NBL_CONSTEXPR_STATIC_INLINE this_t create(vector other) - { - this_t output; - - for (uint32_t i = 0u; i < CRTP::Dimension; ++i) - output.setComponent(i, other[i]); - - return output; - } - - #define NBL_EMULATED_VECTOR_OPERATOR(OP)\ - NBL_CONSTEXPR_INLINE_FUNC this_t operator##OP (component_t val)\ - {\ - this_t output;\ - for (uint32_t i = 0u; i < CRTP::Dimension; ++i)\ - output.setComponent(i, this_t::getComponent(i) OP val);\ - return output;\ - }\ - NBL_CONSTEXPR_INLINE_FUNC this_t operator##OP (this_t other)\ - {\ - this_t output;\ - for (uint32_t i = 0u; i < CRTP::Dimension; ++i)\ - output.setComponent(i, this_t::getComponent(i) OP other.getComponent(i));\ - return output;\ - }\ - NBL_CONSTEXPR_INLINE_FUNC this_t operator##OP (vector other)\ - {\ - this_t output;\ - for (uint32_t i = 0u; i < CRTP::Dimension; ++i)\ - output.setComponent(i, this_t::getComponent(i) OP other[i]);\ - return output;\ - } +#define NBL_EMULATED_VECTOR_UNARY_OPERATOR(OP)\ +NBL_CONSTEXPR_FUNC this_t operator##OP() NBL_CONST_MEMBER_FUNC \ +{\ + this_t output;\ + [[unroll]]\ + for (uint32_t i = 0u; i < CRTP::Dimension; ++i)\ + output.setComponent(i, this_t::getComponent(i).operator##OP());\ + return output;\ +} - NBL_EMULATED_VECTOR_OPERATOR(&) - NBL_EMULATED_VECTOR_OPERATOR(|) - NBL_EMULATED_VECTOR_OPERATOR(^) - NBL_EMULATED_VECTOR_OPERATOR(+) - NBL_EMULATED_VECTOR_OPERATOR(-) - NBL_EMULATED_VECTOR_OPERATOR(*) - NBL_EMULATED_VECTOR_OPERATOR(/) +#define NBL_EMULATED_VECTOR_ARITHMETIC_OPERATOR(OP)\ +NBL_CONSTEXPR_FUNC this_t operator##OP (component_t val) NBL_CONST_MEMBER_FUNC \ +{\ + this_t output;\ + [[unroll]]\ + for (uint32_t i = 0u; i < CRTP::Dimension; ++i)\ + output.setComponent(i, this_t::getComponent(i) OP val);\ + return output;\ +}\ +NBL_CONSTEXPR_FUNC this_t operator##OP (this_t other) NBL_CONST_MEMBER_FUNC \ +{\ + this_t output;\ + [[unroll]]\ + for (uint32_t i = 0u; i < CRTP::Dimension; ++i)\ + output.setComponent(i, this_t::getComponent(i) OP other.getComponent(i));\ + return output;\ +} - #undef NBL_EMULATED_VECTOR_OPERATOR +#define NBL_EMULATED_FUNDAMENTAL_TYPE_VECTOR_ARITHMETIC_OPERATOR(OP) NBL_EMULATED_VECTOR_ARITHMETIC_OPERATOR(OP)\ +NBL_CONSTEXPR_FUNC this_t operator##OP(vector other) NBL_CONST_MEMBER_FUNC \ +{\ + this_t output;\ + [[unroll]]\ + for (uint32_t i = 0u; i < CRTP::Dimension; ++i)\ + output.setComponent(i, this_t::getComponent(i) OP other[i]);\ + return output;\ +} - #define NBL_EMULATED_VECTOR_COMPARISON(OP) NBL_CONSTEXPR_INLINE_FUNC vector operator##OP (this_t other)\ - {\ - vector output;\ - [[unroll]]\ - for (uint32_t i = 0u; i < CRTP::Dimension; ++i)\ - output[i] = CRTP::getComponent(i) OP other.getComponent(i);\ - return output;\ - } +#define NBL_EMULATED_VECTOR_COMPARISON_OPERATOR(OP) NBL_CONSTEXPR_FUNC vector operator##OP (this_t other) NBL_CONST_MEMBER_FUNC \ +{\ + vector output;\ + [[unroll]]\ + for (uint32_t i = 0u; i < CRTP::Dimension; ++i)\ + output[i] = CRTP::getComponent(i) OP other.getComponent(i);\ + return output;\ +} - NBL_EMULATED_VECTOR_COMPARISON(==) - NBL_EMULATED_VECTOR_COMPARISON(!=) - NBL_EMULATED_VECTOR_COMPARISON(<) - NBL_EMULATED_VECTOR_COMPARISON(<=) - NBL_EMULATED_VECTOR_COMPARISON(>) - NBL_EMULATED_VECTOR_COMPARISON(>=) +#define NBL_EMULATED_FUNDAMENTAL_TYPE_VECTOR_COMPARISON_OPERATOR(OP) NBL_EMULATED_VECTOR_COMPARISON_OPERATOR(OP)\ +NBL_CONSTEXPR_FUNC vector operator##OP (vector other) NBL_CONST_MEMBER_FUNC \ +{\ + vector output;\ + [[unroll]]\ + for (uint32_t i = 0u; i < CRTP::Dimension; ++i)\ + output[i] = CRTP::getComponent(i) OP other[i];\ + return output;\ +} - #undef NBL_EMULATED_VECTOR_COMPARISON +#define NBL_EMULATED_VECTOR_CREATION_AND_COMPONENT_SUM \ +using this_t = emulated_vector;\ +using component_t = ComponentType;\ +NBL_CONSTEXPR_STATIC_FUNC this_t create(this_t other)\ +{\ + CRTP output;\ + [[unroll]]\ + for (uint32_t i = 0u; i < CRTP::Dimension; ++i)\ + output.setComponent(i, other.getComponent(i));\ +}\ +NBL_CONSTEXPR_FUNC component_t calcComponentSum() NBL_CONST_MEMBER_FUNC \ +{\ + component_t sum = CRTP::getComponent(0);\ + [[unroll]]\ + for (uint32_t i = 1u; i < CRTP::Dimension; ++i)\ + sum = sum + CRTP::getComponent(i);\ + return sum;\ +} - NBL_CONSTEXPR_INLINE_FUNC component_t calcComponentSum() - { - component_t sum = 0; - for (uint32_t i = 0u; i < CRTP::Dimension; ++i) - sum = sum + CRTP::getComponent(i); +#define NBL_EMULATED_FUNDAMENTAL_TYPE_VECTOR_CREATION_AND_COMPONENT_SUM NBL_EMULATED_VECTOR_CREATION_AND_COMPONENT_SUM \ +NBL_CONSTEXPR_STATIC_FUNC this_t create(vector other)\ +{\ + this_t output;\ + [[unroll]]\ + for (uint32_t i = 0u; i < CRTP::Dimension; ++i)\ + output.setComponent(i, other[i]);\ + return output;\ +} - return sum; - } +// Fundamental, integral +template NBL_PARTIAL_REQ_TOP(is_fundamental_v && concepts::IntegralLikeScalar) +struct emulated_vector&& concepts::IntegralLikeScalar) > : CRTP +{ + // Creation for fundamental type + NBL_EMULATED_FUNDAMENTAL_TYPE_VECTOR_CREATION_AND_COMPONENT_SUM + // Operators, including integral + NBL_EMULATED_VECTOR_UNARY_OPERATOR(~) + NBL_EMULATED_FUNDAMENTAL_TYPE_VECTOR_ARITHMETIC_OPERATOR(&) + NBL_EMULATED_FUNDAMENTAL_TYPE_VECTOR_ARITHMETIC_OPERATOR(|) + NBL_EMULATED_FUNDAMENTAL_TYPE_VECTOR_ARITHMETIC_OPERATOR(^) + NBL_EMULATED_VECTOR_UNARY_OPERATOR(-) + NBL_EMULATED_FUNDAMENTAL_TYPE_VECTOR_ARITHMETIC_OPERATOR(+) + NBL_EMULATED_FUNDAMENTAL_TYPE_VECTOR_ARITHMETIC_OPERATOR(-) + NBL_EMULATED_FUNDAMENTAL_TYPE_VECTOR_ARITHMETIC_OPERATOR(*) + NBL_EMULATED_FUNDAMENTAL_TYPE_VECTOR_ARITHMETIC_OPERATOR(/) + // Comparison operators + NBL_EMULATED_FUNDAMENTAL_TYPE_VECTOR_COMPARISON_OPERATOR(==) + NBL_EMULATED_FUNDAMENTAL_TYPE_VECTOR_COMPARISON_OPERATOR(!=) + NBL_EMULATED_FUNDAMENTAL_TYPE_VECTOR_COMPARISON_OPERATOR(<) + NBL_EMULATED_FUNDAMENTAL_TYPE_VECTOR_COMPARISON_OPERATOR(<=) + NBL_EMULATED_FUNDAMENTAL_TYPE_VECTOR_COMPARISON_OPERATOR(>) + NBL_EMULATED_FUNDAMENTAL_TYPE_VECTOR_COMPARISON_OPERATOR(>=) }; -template -struct emulated_vector : CRTP +// Fundamental, not integral +template NBL_PARTIAL_REQ_TOP(is_fundamental_v && !concepts::IntegralLikeScalar) +struct emulated_vector && !concepts::IntegralLikeScalar) > : CRTP { - using component_t = ComponentType; - using this_t = emulated_vector; - - NBL_CONSTEXPR_STATIC_INLINE this_t create(this_t other) - { - this_t output; - [[unroll]] - for (uint32_t i = 0u; i < CRTP::Dimension; ++i) - output.setComponent(i, other.getComponent(i)); - - return output; - } - - template - NBL_CONSTEXPR_STATIC_INLINE this_t create(vector other) - { - this_t output; - [[unroll]] - for (uint32_t i = 0u; i < CRTP::Dimension; ++i) - output.setComponent(i, ComponentType::create(other[i])); - - return output; - } - - #define NBL_EMULATED_VECTOR_OPERATOR(OP, ENABLE_CONDITION) NBL_CONSTEXPR_INLINE_FUNC enable_if_t< ENABLE_CONDITION , this_t> operator##OP (component_t val)\ - {\ - this_t output;\ - [[unroll]]\ - for (uint32_t i = 0u; i < CRTP::Dimension; ++i)\ - output.setComponent(i, CRTP::getComponent(i) OP val);\ - return output;\ - }\ - NBL_CONSTEXPR_INLINE_FUNC enable_if_t< ENABLE_CONDITION , this_t> operator##OP (this_t other)\ - {\ - this_t output;\ - [[unroll]]\ - for (uint32_t i = 0u; i < CRTP::Dimension; ++i)\ - output.setComponent(i, CRTP::getComponent(i) OP other.getComponent(i));\ - return output;\ - } - - NBL_EMULATED_VECTOR_OPERATOR(&, concepts::IntegralLikeScalar) - NBL_EMULATED_VECTOR_OPERATOR(|, concepts::IntegralLikeScalar) - NBL_EMULATED_VECTOR_OPERATOR(^, concepts::IntegralLikeScalar) - NBL_EMULATED_VECTOR_OPERATOR(+, true) - NBL_EMULATED_VECTOR_OPERATOR(-, true) - NBL_EMULATED_VECTOR_OPERATOR(*, true) - NBL_EMULATED_VECTOR_OPERATOR(/, true) - - #undef NBL_EMULATED_VECTOR_OPERATOR - - #define NBL_EMULATED_VECTOR_COMPARISON(OP) NBL_CONSTEXPR_INLINE_FUNC vector operator##OP (this_t other)\ - {\ - vector output;\ - [[unroll]]\ - for (uint32_t i = 0u; i < CRTP::Dimension; ++i)\ - output[i] = CRTP::getComponent(i) OP other.getComponent(i);\ - return output;\ - } - - NBL_EMULATED_VECTOR_COMPARISON(==) - NBL_EMULATED_VECTOR_COMPARISON(!=) - NBL_EMULATED_VECTOR_COMPARISON(<) - NBL_EMULATED_VECTOR_COMPARISON(<=) - NBL_EMULATED_VECTOR_COMPARISON(>) - NBL_EMULATED_VECTOR_COMPARISON(>=) - - #undef NBL_EMULATED_VECTOR_COMPARISON + // Creation for fundamental type + NBL_EMULATED_FUNDAMENTAL_TYPE_VECTOR_CREATION_AND_COMPONENT_SUM + // Operators + NBL_EMULATED_VECTOR_UNARY_OPERATOR(-) + NBL_EMULATED_FUNDAMENTAL_TYPE_VECTOR_ARITHMETIC_OPERATOR(+) + NBL_EMULATED_FUNDAMENTAL_TYPE_VECTOR_ARITHMETIC_OPERATOR(-) + NBL_EMULATED_FUNDAMENTAL_TYPE_VECTOR_ARITHMETIC_OPERATOR(*) + NBL_EMULATED_FUNDAMENTAL_TYPE_VECTOR_ARITHMETIC_OPERATOR(/) + // Comparison operators + NBL_EMULATED_FUNDAMENTAL_TYPE_VECTOR_COMPARISON_OPERATOR(==) + NBL_EMULATED_FUNDAMENTAL_TYPE_VECTOR_COMPARISON_OPERATOR(!=) + NBL_EMULATED_FUNDAMENTAL_TYPE_VECTOR_COMPARISON_OPERATOR(<) + NBL_EMULATED_FUNDAMENTAL_TYPE_VECTOR_COMPARISON_OPERATOR(<=) + NBL_EMULATED_FUNDAMENTAL_TYPE_VECTOR_COMPARISON_OPERATOR(>) + NBL_EMULATED_FUNDAMENTAL_TYPE_VECTOR_COMPARISON_OPERATOR(>=) +}; - NBL_CONSTEXPR_INLINE_FUNC ComponentType calcComponentSum() - { - ComponentType sum = ComponentType::create(0); - [[unroll]] - for (uint32_t i = 0u; i < CRTP::Dimension; ++i) - sum = sum + CRTP::getComponent(i); +// Not fundamental, integral +template NBL_PARTIAL_REQ_TOP(!is_fundamental_v && concepts::IntegralLikeScalar) +struct emulated_vector && concepts::IntegralLikeScalar) > : CRTP +{ + // Creation + NBL_EMULATED_VECTOR_CREATION_AND_COMPONENT_SUM + // Operators, including integral + NBL_EMULATED_VECTOR_UNARY_OPERATOR(~) + NBL_EMULATED_VECTOR_ARITHMETIC_OPERATOR(&) + NBL_EMULATED_VECTOR_ARITHMETIC_OPERATOR(|) + NBL_EMULATED_VECTOR_ARITHMETIC_OPERATOR(^) + NBL_EMULATED_VECTOR_UNARY_OPERATOR(-) + NBL_EMULATED_VECTOR_ARITHMETIC_OPERATOR(+) + NBL_EMULATED_VECTOR_ARITHMETIC_OPERATOR(-) + NBL_EMULATED_VECTOR_ARITHMETIC_OPERATOR(*) + NBL_EMULATED_VECTOR_ARITHMETIC_OPERATOR(/) + // Comparison operators + NBL_EMULATED_VECTOR_COMPARISON_OPERATOR(==) + NBL_EMULATED_VECTOR_COMPARISON_OPERATOR(!=) + NBL_EMULATED_VECTOR_COMPARISON_OPERATOR(<) + NBL_EMULATED_VECTOR_COMPARISON_OPERATOR(<=) + NBL_EMULATED_VECTOR_COMPARISON_OPERATOR(>) + NBL_EMULATED_VECTOR_COMPARISON_OPERATOR(>=) +}; - return sum; - } +// Not fundamental, not integral +template NBL_PARTIAL_REQ_TOP(!is_fundamental_v && !concepts::IntegralLikeScalar) +struct emulated_vector && !concepts::IntegralLikeScalar) > : CRTP +{ + // Creation + NBL_EMULATED_VECTOR_CREATION_AND_COMPONENT_SUM + // Operators + NBL_EMULATED_VECTOR_UNARY_OPERATOR(-) + NBL_EMULATED_VECTOR_ARITHMETIC_OPERATOR(+) + NBL_EMULATED_VECTOR_ARITHMETIC_OPERATOR(-) + NBL_EMULATED_VECTOR_ARITHMETIC_OPERATOR(*) + NBL_EMULATED_VECTOR_ARITHMETIC_OPERATOR(/) + // Comparison operators + NBL_EMULATED_VECTOR_COMPARISON_OPERATOR(==) + NBL_EMULATED_VECTOR_COMPARISON_OPERATOR(!=) + NBL_EMULATED_VECTOR_COMPARISON_OPERATOR(<) + NBL_EMULATED_VECTOR_COMPARISON_OPERATOR(<=) + NBL_EMULATED_VECTOR_COMPARISON_OPERATOR(>) + NBL_EMULATED_VECTOR_COMPARISON_OPERATOR(>=) }; +#undef NBL_EMULATED_FUNDAMENTAL_TYPE_VECTOR_CREATION_AND_COMPONENT_SUM +#undef NBL_EMULATED_VECTOR_CREATION_AND_COMPONENT_SUM +#undef NBL_EMULATED_FUNDAMENTAL_TYPE_VECTOR_COMPARISON_OPERATOR +#undef NBL_EMULATED_VECTOR_COMPARISON_OPERATOR +#undef NBL_EMULATED_FUNDAMENTAL_TYPE_VECTOR_ARITHMETIC_OPERATOR +#undef NBL_EMULATED_VECTOR_ARITHMETIC_OPERATOR +#undef NBL_EMULATED_VECTOR_UNARY_OPERATOR + +// ----------------------------------------------------- EMULATED FLOAT SPECIALIZATION -------------------------------------------------------------------- #define DEFINE_OPERATORS_FOR_TYPE(...)\ -NBL_CONSTEXPR_INLINE_FUNC this_t operator+(__VA_ARGS__ val)\ +NBL_CONSTEXPR_FUNC this_t operator+(__VA_ARGS__ val) NBL_CONST_MEMBER_FUNC \ {\ this_t output;\ for (uint32_t i = 0u; i < CRTP::Dimension; ++i)\ @@ -302,7 +330,7 @@ NBL_CONSTEXPR_INLINE_FUNC this_t operator+(__VA_ARGS__ val)\ return output;\ }\ \ -NBL_CONSTEXPR_INLINE_FUNC this_t operator-(__VA_ARGS__ val)\ +NBL_CONSTEXPR_FUNC this_t operator-(__VA_ARGS__ val) NBL_CONST_MEMBER_FUNC \ {\ this_t output;\ for (uint32_t i = 0u; i < CRTP::Dimension; ++i)\ @@ -311,7 +339,7 @@ NBL_CONSTEXPR_INLINE_FUNC this_t operator-(__VA_ARGS__ val)\ return output;\ }\ \ -NBL_CONSTEXPR_INLINE_FUNC this_t operator*(__VA_ARGS__ val)\ +NBL_CONSTEXPR_FUNC this_t operator*(__VA_ARGS__ val) NBL_CONST_MEMBER_FUNC \ {\ this_t output;\ for (uint32_t i = 0u; i < CRTP::Dimension; ++i)\ @@ -321,15 +349,14 @@ NBL_CONSTEXPR_INLINE_FUNC this_t operator*(__VA_ARGS__ val)\ }\ \ -// ----------------------------------------------------- EMULATED FLOAT SPECIALIZATION -------------------------------------------------------------------- template -struct emulated_vector, CRTP, false> : CRTP +struct emulated_vector, CRTP> : CRTP { using component_t = emulated_float64_t; - using this_t = emulated_vector; + using this_t = emulated_vector; - NBL_CONSTEXPR_STATIC_INLINE this_t create(this_t other) + NBL_CONSTEXPR_STATIC_FUNC this_t create(this_t other) { this_t output; @@ -340,7 +367,7 @@ struct emulated_vector, CRTP, fa } template - NBL_CONSTEXPR_STATIC_INLINE this_t create(vector other) + NBL_CONSTEXPR_STATIC_FUNC this_t create(vector other) { this_t output; @@ -350,7 +377,7 @@ struct emulated_vector, CRTP, fa return output; } - NBL_CONSTEXPR_INLINE_FUNC this_t operator+(this_t other) + NBL_CONSTEXPR_FUNC this_t operator+(this_t other) NBL_CONST_MEMBER_FUNC { this_t output; @@ -359,7 +386,7 @@ struct emulated_vector, CRTP, fa return output; } - NBL_CONSTEXPR_INLINE_FUNC this_t operator-(this_t other) + NBL_CONSTEXPR_FUNC this_t operator-(this_t other) NBL_CONST_MEMBER_FUNC { this_t output; @@ -368,7 +395,7 @@ struct emulated_vector, CRTP, fa return output; } - NBL_CONSTEXPR_INLINE_FUNC this_t operator*(this_t other) + NBL_CONSTEXPR_FUNC this_t operator*(this_t other) NBL_CONST_MEMBER_FUNC { this_t output; @@ -391,7 +418,7 @@ struct emulated_vector, CRTP, fa DEFINE_OPERATORS_FOR_TYPE(int32_t) DEFINE_OPERATORS_FOR_TYPE(int64_t) - NBL_CONSTEXPR_INLINE_FUNC component_t calcComponentSum() + NBL_CONSTEXPR_FUNC component_t calcComponentSum() NBL_CONST_MEMBER_FUNC { component_t sum = component_t::create(0); for (uint32_t i = 0u; i < CRTP::Dimension; ++i) @@ -478,7 +505,7 @@ namespace impl template struct static_cast_helper, vector, void> { - NBL_CONSTEXPR_STATIC_INLINE emulated_vector_t2 cast(vector vec) + NBL_CONSTEXPR_STATIC_FUNC emulated_vector_t2 cast(NBL_CONST_REF_ARG(vector) vec) { emulated_vector_t2 output; output.x = _static_cast(vec.x); @@ -491,7 +518,7 @@ struct static_cast_helper, vector, void> template struct static_cast_helper, vector, void> { - NBL_CONSTEXPR_STATIC_INLINE emulated_vector_t3 cast(vector vec) + NBL_CONSTEXPR_STATIC_FUNC emulated_vector_t3 cast(NBL_CONST_REF_ARG(vector) vec) { emulated_vector_t3 output; output.x = _static_cast(vec.x); @@ -505,7 +532,7 @@ struct static_cast_helper, vector, void> template struct static_cast_helper, vector, void> { - NBL_CONSTEXPR_STATIC_INLINE emulated_vector_t4 cast(vector vec) + NBL_CONSTEXPR_STATIC_FUNC emulated_vector_t4 cast(NBL_CONST_REF_ARG(vector) vec) { emulated_vector_t4 output; output.x = _static_cast(vec.x); @@ -523,12 +550,13 @@ struct static_cast_helper, emulated_vector_t; using InputVecType = emulated_vector_t; - NBL_CONSTEXPR_STATIC_INLINE OutputVecType cast(InputVecType vec) + NBL_CONSTEXPR_STATIC_FUNC OutputVecType cast(NBL_CONST_REF_ARG(InputVecType) vec) { array_get getter; array_set setter; OutputVecType output; + [[unroll]] for (int i = 0; i < N; ++i) setter(output, i, _static_cast(getter(vec, i))); @@ -541,11 +569,12 @@ struct static_cast_helper, emulated_vecto {\ using OutputVecType = emulated_vector_t##N ;\ using InputVecType = emulated_vector_t##N ;\ - NBL_CONSTEXPR_STATIC_INLINE OutputVecType cast(InputVecType vec)\ + NBL_CONSTEXPR_STATIC_FUNC OutputVecType cast(NBL_CONST_REF_ARG(InputVecType) vec)\ {\ array_get getter;\ array_set setter;\ OutputVecType output;\ + [[unroll]]\ for (int i = 0; i < N; ++i)\ setter(output, i, _static_cast(getter(vec, i)));\ return output;\ @@ -558,16 +587,38 @@ NBL_EMULATED_VEC_TO_EMULATED_VEC_STATIC_CAST(4) #undef NBL_EMULATED_VEC_TO_EMULATED_VEC_STATIC_CAST +#define NBL_EMULATED_VEC_PROMOTION(N) template\ +struct Promote, ComponentType>\ +{\ + using VecType = emulated_vector_t##N ;\ + NBL_CONSTEXPR_FUNC VecType operator()(NBL_CONST_REF_ARG(ComponentType) v)\ + {\ + array_set setter;\ + VecType promoted;\ + [[unroll]]\ + for (int i = 0; i < N; ++i)\ + setter(promoted, i, v);\ + return promoted;\ + }\ +}; + +NBL_EMULATED_VEC_PROMOTION(2) +NBL_EMULATED_VEC_PROMOTION(3) +NBL_EMULATED_VEC_PROMOTION(4) + +#undef NBL_EMULATED_VEC_PROMOTION + #define NBL_EMULATED_VEC_TRUNCATION(N, M) template\ -struct static_cast_helper, emulated_vector_t##M , void>\ +struct Truncate, emulated_vector_t##M >\ {\ using OutputVecType = emulated_vector_t##N ;\ using InputVecType = emulated_vector_t##M ;\ - NBL_CONSTEXPR_STATIC_INLINE OutputVecType cast(InputVecType vec)\ + NBL_CONSTEXPR_FUNC OutputVecType operator()(NBL_CONST_REF_ARG(InputVecType) vec)\ {\ array_get getter;\ array_set setter;\ OutputVecType output;\ + [[unroll]]\ for (int i = 0; i < N; ++i)\ setter(output, i, getter(vec, i));\ return output;\ @@ -583,7 +634,7 @@ NBL_EMULATED_VEC_TRUNCATION(4, 4) #undef NBL_EMULATED_VEC_TRUNCATION -} +} //namespace impl } } diff --git a/include/nbl/builtin/hlsl/functional.hlsl b/include/nbl/builtin/hlsl/functional.hlsl index 45198cbe7a..76b527f6bd 100644 --- a/include/nbl/builtin/hlsl/functional.hlsl +++ b/include/nbl/builtin/hlsl/functional.hlsl @@ -91,7 +91,6 @@ struct reference_wrapper : enable_if_t< #else // CPP - #define ALIAS_STD(NAME,OP) template struct NAME : std::NAME { \ using type_t = T; @@ -135,18 +134,69 @@ ALIAS_STD(divides,/) NBL_CONSTEXPR_STATIC_INLINE T identity = T(1); }; +#ifndef __HLSL_VERSION + +template +struct bit_not : std::bit_not +{ + using type_t = T; +}; + +#else + +template +struct bit_not +{ + using type_t = T; + + T operator()(NBL_CONST_REF_ARG(T) operand) + { + return ~operand; + } +}; + +// The default version above only works for fundamental scalars, vectors and matrices. This is because you can't call `~x` unless `x` is one of the former. +// Similarly, calling `x.operator~()` is not valid for the aforementioned, and only for types overriding this operator. So, we need a specialization. +template NBL_PARTIAL_REQ_TOP(!(concepts::Scalar || concepts::Vector || concepts::Matrix)) +struct bit_not || concepts::Vector || concepts::Matrix)) > +{ + using type_t = T; + + T operator()(NBL_CONST_REF_ARG(T) operand) + { + return operand.operator~(); + } +}; + +#endif -ALIAS_STD(equal_to,==) }; -ALIAS_STD(not_equal_to,!=) }; -ALIAS_STD(greater,>) }; -ALIAS_STD(less,<) }; -ALIAS_STD(greater_equal,>=) }; -ALIAS_STD(less_equal, <= ) }; +ALIAS_STD(equal_to, ==) }; +ALIAS_STD(not_equal_to, !=) }; +ALIAS_STD(greater, >) }; +ALIAS_STD(less, <) }; +ALIAS_STD(greater_equal, >=) }; +ALIAS_STD(less_equal, <=) }; #undef ALIAS_STD -// The above comparison operators return bool on STD. Here's a specialization so that they return `vector` for vectorial types -#define NBL_COMPARISON_VECTORIAL_SPECIALIZATION(NAME, OP) template NBL_PARTIAL_REQ_TOP(concepts::Vectorial)\ +// The above comparison operators return bool on STD, but in HLSL they're supposed to yield bool vectors, so here's a specialization so that they return `vector` for vectorial types + +// GLM doesn't have operators on vectors +#ifndef __HLSL_VERSION + +#define NBL_COMPARISON_VECTORIAL_SPECIALIZATION(NAME, OP, GLM_OP) template NBL_PARTIAL_REQ_TOP(concepts::Vectorial)\ +struct NAME ) >\ +{\ + using type_t = T;\ + vector::Dimension> operator()(NBL_CONST_REF_ARG(T) lhs, NBL_CONST_REF_ARG(T) rhs)\ + {\ + return glm::GLM_OP (lhs, rhs);\ + }\ +}; + +#else + +#define NBL_COMPARISON_VECTORIAL_SPECIALIZATION(NAME, OP, GLM_OP) template NBL_PARTIAL_REQ_TOP(concepts::Vectorial)\ struct NAME ) >\ {\ using type_t = T;\ @@ -156,16 +206,18 @@ struct NAME ) >\ }\ }; -NBL_COMPARISON_VECTORIAL_SPECIALIZATION(equal_to, ==) -NBL_COMPARISON_VECTORIAL_SPECIALIZATION(not_equal_to, !=) -NBL_COMPARISON_VECTORIAL_SPECIALIZATION(greater, >) -NBL_COMPARISON_VECTORIAL_SPECIALIZATION(less, <) -NBL_COMPARISON_VECTORIAL_SPECIALIZATION(greater_equal, >=) -NBL_COMPARISON_VECTORIAL_SPECIALIZATION(less_equal, <=) +#endif + +NBL_COMPARISON_VECTORIAL_SPECIALIZATION(equal_to, ==, equal) +NBL_COMPARISON_VECTORIAL_SPECIALIZATION(not_equal_to, !=, notEqual) +NBL_COMPARISON_VECTORIAL_SPECIALIZATION(greater, >, greaterThan) +NBL_COMPARISON_VECTORIAL_SPECIALIZATION(less, <, lessThan) +NBL_COMPARISON_VECTORIAL_SPECIALIZATION(greater_equal, >=, greaterThanEqual) +NBL_COMPARISON_VECTORIAL_SPECIALIZATION(less_equal, <=, lessThanEqual) #undef NBL_COMPARISON_VECTORIAL_SPECIALIZATION -// ------------------------ Compound assignment operators ---------------------- +// ------------------------------------------------------------- COMPOUND ASSIGNMENT OPERATORS -------------------------------------------------------------------- #define COMPOUND_ASSIGN(NAME) template struct NAME##_assign { \ using type_t = T; \ @@ -186,7 +238,7 @@ COMPOUND_ASSIGN(divides) #undef COMPOUND_ASSIGN -// ----------------- End of compound assignment ops ---------------- +// ---------------------------------------------------------------- MIN, MAX, TERNARY ------------------------------------------------------------------------- // Min, Max, and Ternary and Shift operators don't use ALIAS_STD because they don't exist in STD // TODO: implement as mix(rhs(condition, lhs, rhs); } }; +// ----------------------------------------------------------------- SHIFT OPERATORS -------------------------------------------------------------------- + template struct left_shift_operator { using type_t = T; - NBL_CONSTEXPR_INLINE_FUNC T operator()(NBL_CONST_REF_ARG(T) operand, NBL_CONST_REF_ARG(T) bits) + NBL_CONSTEXPR_FUNC T operator()(NBL_CONST_REF_ARG(T) operand, NBL_CONST_REF_ARG(T) bits) { return operand << bits; } @@ -246,28 +300,28 @@ struct left_shift_operator) > using type_t = T; using scalar_t = scalar_type_t; - NBL_CONSTEXPR_INLINE_FUNC T operator()(NBL_CONST_REF_ARG(T) operand, NBL_CONST_REF_ARG(T) bits) + NBL_CONSTEXPR_FUNC T operator()(NBL_CONST_REF_ARG(T) operand, NBL_CONST_REF_ARG(T) bits) { return operand << bits; } - NBL_CONSTEXPR_INLINE_FUNC T operator()(NBL_CONST_REF_ARG(T) operand, NBL_CONST_REF_ARG(scalar_t) bits) + NBL_CONSTEXPR_FUNC T operator()(NBL_CONST_REF_ARG(T) operand, NBL_CONST_REF_ARG(scalar_t) bits) { return operand << bits; } }; -template NBL_PARTIAL_REQ_TOP(!concepts::Vector && concepts::IntegralLikeVectorial) -struct left_shift_operator && concepts::IntegralLikeVectorial) > +template NBL_PARTIAL_REQ_TOP(!concepts::IntVector && concepts::IntegralLikeVectorial) +struct left_shift_operator && concepts::IntegralLikeVectorial) > { using type_t = T; using scalar_t = typename vector_traits::scalar_type; - NBL_CONSTEXPR_INLINE_FUNC T operator()(NBL_CONST_REF_ARG(T) operand, NBL_CONST_REF_ARG(T) bits) + NBL_CONSTEXPR_FUNC T operator()(NBL_CONST_REF_ARG(T) operand, NBL_CONST_REF_ARG(T) bits) { array_get getter; array_set setter; - NBL_CONSTEXPR_STATIC_INLINE uint16_t extent = uint16_t(extent_v); + NBL_CONSTEXPR_STATIC uint16_t extent = uint16_t(extent_v); left_shift_operator leftShift; T shifted; [[unroll]] @@ -278,11 +332,11 @@ struct left_shift_operator && concept return shifted; } - NBL_CONSTEXPR_INLINE_FUNC T operator()(NBL_CONST_REF_ARG(T) operand, NBL_CONST_REF_ARG(scalar_t) bits) + NBL_CONSTEXPR_FUNC T operator()(NBL_CONST_REF_ARG(T) operand, NBL_CONST_REF_ARG(scalar_t) bits) { array_get getter; array_set setter; - NBL_CONSTEXPR_STATIC_INLINE uint16_t extent = uint16_t(extent_v); + NBL_CONSTEXPR_STATIC uint16_t extent = uint16_t(extent_v); left_shift_operator leftShift; T shifted; [[unroll]] @@ -293,11 +347,11 @@ struct left_shift_operator && concept return shifted; } - NBL_CONSTEXPR_INLINE_FUNC T operator()(NBL_CONST_REF_ARG(T) operand, NBL_CONST_REF_ARG(vector::Dimension>) bits) + NBL_CONSTEXPR_FUNC T operator()(NBL_CONST_REF_ARG(T) operand, NBL_CONST_REF_ARG(vector::Dimension>) bits) { array_get getter; array_set setter; - NBL_CONSTEXPR_STATIC_INLINE uint16_t extent = uint16_t(extent_v); + NBL_CONSTEXPR_STATIC uint16_t extent = uint16_t(extent_v); left_shift_operator leftShift; T shifted; [[unroll]] @@ -308,11 +362,11 @@ struct left_shift_operator && concept return shifted; } - NBL_CONSTEXPR_INLINE_FUNC T operator()(NBL_CONST_REF_ARG(T) operand, NBL_CONST_REF_ARG(uint32_t) bits) + NBL_CONSTEXPR_FUNC T operator()(NBL_CONST_REF_ARG(T) operand, NBL_CONST_REF_ARG(uint16_t) bits) { array_get getter; array_set setter; - NBL_CONSTEXPR_STATIC_INLINE uint16_t extent = uint16_t(extent_v); + NBL_CONSTEXPR_STATIC uint16_t extent = uint16_t(extent_v); left_shift_operator leftShift; T shifted; [[unroll]] @@ -329,7 +383,7 @@ struct arithmetic_right_shift_operator { using type_t = T; - NBL_CONSTEXPR_INLINE_FUNC T operator()(NBL_CONST_REF_ARG(T) operand, NBL_CONST_REF_ARG(T) bits) + NBL_CONSTEXPR_FUNC T operator()(NBL_CONST_REF_ARG(T) operand, NBL_CONST_REF_ARG(T) bits) { return operand >> bits; } @@ -341,28 +395,28 @@ struct arithmetic_right_shift_operator; - NBL_CONSTEXPR_INLINE_FUNC T operator()(NBL_CONST_REF_ARG(T) operand, NBL_CONST_REF_ARG(T) bits) + NBL_CONSTEXPR_FUNC T operator()(NBL_CONST_REF_ARG(T) operand, NBL_CONST_REF_ARG(T) bits) { return operand >> bits; } - NBL_CONSTEXPR_INLINE_FUNC T operator()(NBL_CONST_REF_ARG(T) operand, NBL_CONST_REF_ARG(scalar_t) bits) + NBL_CONSTEXPR_FUNC T operator()(NBL_CONST_REF_ARG(T) operand, NBL_CONST_REF_ARG(scalar_t) bits) { return operand >> bits; } }; -template NBL_PARTIAL_REQ_TOP(!concepts::Vector&& concepts::IntegralLikeVectorial) -struct arithmetic_right_shift_operator&& concepts::IntegralLikeVectorial) > +template NBL_PARTIAL_REQ_TOP(!concepts::IntVector&& concepts::IntegralLikeVectorial) +struct arithmetic_right_shift_operator&& concepts::IntegralLikeVectorial) > { using type_t = T; using scalar_t = typename vector_traits::scalar_type; - NBL_CONSTEXPR_INLINE_FUNC T operator()(NBL_CONST_REF_ARG(T) operand, NBL_CONST_REF_ARG(T) bits) + NBL_CONSTEXPR_FUNC T operator()(NBL_CONST_REF_ARG(T) operand, NBL_CONST_REF_ARG(T) bits) { array_get getter; array_set setter; - NBL_CONSTEXPR_STATIC_INLINE uint16_t extent = uint16_t(extent_v); + NBL_CONSTEXPR_STATIC uint16_t extent = uint16_t(extent_v); arithmetic_right_shift_operator rightShift; T shifted; [[unroll]] @@ -373,11 +427,11 @@ struct arithmetic_right_shift_operator getter; array_set setter; - NBL_CONSTEXPR_STATIC_INLINE uint16_t extent = uint16_t(extent_v); + NBL_CONSTEXPR_STATIC uint16_t extent = uint16_t(extent_v); arithmetic_right_shift_operator rightShift; T shifted; [[unroll]] @@ -388,11 +442,11 @@ struct arithmetic_right_shift_operator::Dimension>) bits) + NBL_CONSTEXPR_FUNC T operator()(NBL_CONST_REF_ARG(T) operand, NBL_CONST_REF_ARG(vector::Dimension>) bits) { array_get getter; array_set setter; - NBL_CONSTEXPR_STATIC_INLINE uint16_t extent = uint16_t(extent_v); + NBL_CONSTEXPR_STATIC uint16_t extent = uint16_t(extent_v); arithmetic_right_shift_operator rightShift; T shifted; [[unroll]] @@ -403,11 +457,11 @@ struct arithmetic_right_shift_operator getter; array_set setter; - NBL_CONSTEXPR_STATIC_INLINE uint16_t extent = uint16_t(extent_v); + NBL_CONSTEXPR_STATIC uint16_t extent = uint16_t(extent_v); arithmetic_right_shift_operator rightShift; T shifted; [[unroll]] @@ -426,7 +480,7 @@ struct logical_right_shift_operator using type_t = T; using unsigned_type_t = make_unsigned_t; - NBL_CONSTEXPR_INLINE_FUNC T operator()(NBL_CONST_REF_ARG(T) operand, NBL_CONST_REF_ARG(T) bits) + NBL_CONSTEXPR_FUNC T operator()(NBL_CONST_REF_ARG(T) operand, NBL_CONST_REF_ARG(T) bits) { arithmetic_right_shift_operator arithmeticRightShift; return _static_cast(arithmeticRightShift(_static_cast(operand), _static_cast(bits))); diff --git a/include/nbl/builtin/hlsl/ieee754.hlsl b/include/nbl/builtin/hlsl/ieee754.hlsl index 8d9c78a9f0..e81ff08c7b 100644 --- a/include/nbl/builtin/hlsl/ieee754.hlsl +++ b/include/nbl/builtin/hlsl/ieee754.hlsl @@ -89,7 +89,7 @@ inline int extractExponent(T x) } template -NBL_CONSTEXPR_INLINE_FUNC T replaceBiasedExponent(T x, typename unsigned_integer_of_size::type biasedExp) +NBL_CONSTEXPR_FUNC T replaceBiasedExponent(T x, typename unsigned_integer_of_size::type biasedExp) { using AsFloat = typename float_of_size::type; return impl::castBackToFloatType(glsl::bitfieldInsert(ieee754::impl::bitCastToUintType(x), biasedExp, traits::mantissaBitCnt, traits::exponentBitCnt)); @@ -97,20 +97,20 @@ NBL_CONSTEXPR_INLINE_FUNC T replaceBiasedExponent(T x, typename unsigned_integer // performs no overflow tests, returns x*exp2(n) template -NBL_CONSTEXPR_INLINE_FUNC T fastMulExp2(T x, int n) +NBL_CONSTEXPR_FUNC T fastMulExp2(T x, int n) { return replaceBiasedExponent(x, extractBiasedExponent(x) + uint32_t(n)); } template -NBL_CONSTEXPR_INLINE_FUNC typename unsigned_integer_of_size::type extractMantissa(T x) +NBL_CONSTEXPR_FUNC typename unsigned_integer_of_size::type extractMantissa(T x) { using AsUint = typename unsigned_integer_of_size::type; return ieee754::impl::bitCastToUintType(x) & traits::type>::mantissaMask; } template -NBL_CONSTEXPR_INLINE_FUNC typename unsigned_integer_of_size::type extractNormalizeMantissa(T x) +NBL_CONSTEXPR_FUNC typename unsigned_integer_of_size::type extractNormalizeMantissa(T x) { using AsUint = typename unsigned_integer_of_size::type; using AsFloat = typename float_of_size::type; @@ -118,21 +118,21 @@ NBL_CONSTEXPR_INLINE_FUNC typename unsigned_integer_of_size::type ext } template -NBL_CONSTEXPR_INLINE_FUNC typename unsigned_integer_of_size::type extractSign(T x) +NBL_CONSTEXPR_FUNC typename unsigned_integer_of_size::type extractSign(T x) { using AsFloat = typename float_of_size::type; return (ieee754::impl::bitCastToUintType(x) & traits::signMask) >> ((sizeof(T) * 8) - 1); } template -NBL_CONSTEXPR_INLINE_FUNC typename unsigned_integer_of_size::type extractSignPreserveBitPattern(T x) +NBL_CONSTEXPR_FUNC typename unsigned_integer_of_size::type extractSignPreserveBitPattern(T x) { using AsFloat = typename float_of_size::type; return ieee754::impl::bitCastToUintType(x) & traits::signMask; } template ) -NBL_CONSTEXPR_INLINE_FUNC FloatingPoint copySign(FloatingPoint to, FloatingPoint from) +NBL_CONSTEXPR_FUNC FloatingPoint copySign(FloatingPoint to, FloatingPoint from) { using AsUint = typename unsigned_integer_of_size::type; @@ -143,7 +143,7 @@ NBL_CONSTEXPR_INLINE_FUNC FloatingPoint copySign(FloatingPoint to, FloatingPoint } template ) -NBL_CONSTEXPR_INLINE_FUNC FloatingPoint flipSign(FloatingPoint val, bool flip = true) +NBL_CONSTEXPR_FUNC FloatingPoint flipSign(FloatingPoint val, bool flip = true) { using AsFloat = typename float_of_size::type; using AsUint = typename unsigned_integer_of_size::type; diff --git a/include/nbl/builtin/hlsl/ieee754/impl.hlsl b/include/nbl/builtin/hlsl/ieee754/impl.hlsl index ad8a3f9228..69fba9795f 100644 --- a/include/nbl/builtin/hlsl/ieee754/impl.hlsl +++ b/include/nbl/builtin/hlsl/ieee754/impl.hlsl @@ -15,25 +15,25 @@ namespace ieee754 namespace impl { template -NBL_CONSTEXPR_INLINE_FUNC unsigned_integer_of_size_t bitCastToUintType(T x) +NBL_CONSTEXPR_FUNC unsigned_integer_of_size_t bitCastToUintType(T x) { using AsUint = unsigned_integer_of_size_t; return bit_cast(x); } // to avoid bit cast from uintN_t to uintN_t -template <> NBL_CONSTEXPR_INLINE_FUNC unsigned_integer_of_size_t<2> bitCastToUintType(uint16_t x) { return x; } -template <> NBL_CONSTEXPR_INLINE_FUNC unsigned_integer_of_size_t<4> bitCastToUintType(uint32_t x) { return x; } -template <> NBL_CONSTEXPR_INLINE_FUNC unsigned_integer_of_size_t<8> bitCastToUintType(uint64_t x) { return x; } +template <> NBL_CONSTEXPR_FUNC unsigned_integer_of_size_t<2> bitCastToUintType(uint16_t x) { return x; } +template <> NBL_CONSTEXPR_FUNC unsigned_integer_of_size_t<4> bitCastToUintType(uint32_t x) { return x; } +template <> NBL_CONSTEXPR_FUNC unsigned_integer_of_size_t<8> bitCastToUintType(uint64_t x) { return x; } template -NBL_CONSTEXPR_INLINE_FUNC T castBackToFloatType(T x) +NBL_CONSTEXPR_FUNC T castBackToFloatType(T x) { using AsFloat = typename float_of_size::type; return bit_cast(x); } -template<> NBL_CONSTEXPR_INLINE_FUNC uint16_t castBackToFloatType(uint16_t x) { return x; } -template<> NBL_CONSTEXPR_INLINE_FUNC uint32_t castBackToFloatType(uint32_t x) { return x; } -template<> NBL_CONSTEXPR_INLINE_FUNC uint64_t castBackToFloatType(uint64_t x) { return x; } +template<> NBL_CONSTEXPR_FUNC uint16_t castBackToFloatType(uint16_t x) { return x; } +template<> NBL_CONSTEXPR_FUNC uint32_t castBackToFloatType(uint32_t x) { return x; } +template<> NBL_CONSTEXPR_FUNC uint64_t castBackToFloatType(uint64_t x) { return x; } } } diff --git a/include/nbl/builtin/hlsl/morton.hlsl b/include/nbl/builtin/hlsl/morton.hlsl index d2fca1165f..650d9ce6ba 100644 --- a/include/nbl/builtin/hlsl/morton.hlsl +++ b/include/nbl/builtin/hlsl/morton.hlsl @@ -8,7 +8,6 @@ #include "nbl/builtin/hlsl/emulated/int64_t.hlsl" #include "nbl/builtin/hlsl/mpl.hlsl" #include "nbl/builtin/hlsl/portable/vector_t.hlsl" -#include "nbl/builtin/hlsl/mpl.hlsl" // TODO: mega macro to get functional plus, minus, plus_assign, minus_assign @@ -33,8 +32,30 @@ NBL_CONSTEXPR uint16_t CodingStages = 5; template struct coding_mask; -template -NBL_CONSTEXPR uint64_t coding_mask_v = coding_mask::value; +template +NBL_CONSTEXPR T coding_mask_v = _static_cast(coding_mask::value); + +template +NBL_CONSTEXPR portable_vector_t InterleaveMasks = _static_cast >( + truncate >( + vector(coding_mask_v, + coding_mask_v << 1, + coding_mask_v << 2, + coding_mask_v << 3))); + +template +struct sign_mask : integral_constant {}; + +template +NBL_CONSTEXPR T sign_mask_v = _static_cast(sign_mask::value); + +template +NBL_CONSTEXPR portable_vector_t SignMasks = _static_cast >( + truncate >( + vector(sign_mask_v, + sign_mask_v << 1, + sign_mask_v << 2, + sign_mask_v << 3))); // 0th stage will be special: to avoid masking twice during encode/decode, and to get a proper mask that only gets the relevant bits out of a morton code, the 0th stage // mask also considers the total number of bits we're cnsidering for a code (all other masks operate on a bit-agnostic basis). @@ -57,23 +78,23 @@ NBL_CONSTEXPR uint64_t coding_mask_v = coding_mask::value; NBL_CONSTEXPR_STATIC_INLINE uint64_t value = (uint64_t(1) << _Bits) - 1;\ }; -NBL_HLSL_MORTON_SPECIALIZE_FIRST_CODING_MASK(2, 0x5555555555555555) // Groups bits by 1 on, 1 off -NBL_HLSL_MORTON_SPECIALIZE_CODING_MASK(2, 1, uint64_t(0x3333333333333333)) // Groups bits by 2 on, 2 off -NBL_HLSL_MORTON_SPECIALIZE_CODING_MASK(2, 2, uint64_t(0x0F0F0F0F0F0F0F0F)) // Groups bits by 4 on, 4 off -NBL_HLSL_MORTON_SPECIALIZE_CODING_MASK(2, 3, uint64_t(0x00FF00FF00FF00FF)) // Groups bits by 8 on, 8 off -NBL_HLSL_MORTON_SPECIALIZE_CODING_MASK(2, 4, uint64_t(0x0000FFFF0000FFFF)) // Groups bits by 16 on, 16 off +NBL_HLSL_MORTON_SPECIALIZE_FIRST_CODING_MASK(2, 0x5555555555555555ull) // Groups bits by 1 on, 1 off +NBL_HLSL_MORTON_SPECIALIZE_CODING_MASK(2, 1, 0x3333333333333333ull) // Groups bits by 2 on, 2 off +NBL_HLSL_MORTON_SPECIALIZE_CODING_MASK(2, 2, 0x0F0F0F0F0F0F0F0Full) // Groups bits by 4 on, 4 off +NBL_HLSL_MORTON_SPECIALIZE_CODING_MASK(2, 3, 0x00FF00FF00FF00FFull) // Groups bits by 8 on, 8 off +NBL_HLSL_MORTON_SPECIALIZE_CODING_MASK(2, 4, 0x0000FFFF0000FFFFull) // Groups bits by 16 on, 16 off -NBL_HLSL_MORTON_SPECIALIZE_FIRST_CODING_MASK(3, 0x9249249249249249) // Groups bits by 1 on, 2 off -NBL_HLSL_MORTON_SPECIALIZE_CODING_MASK(3, 1, uint64_t(0x30C30C30C30C30C3)) // Groups bits by 2 on, 4 off -NBL_HLSL_MORTON_SPECIALIZE_CODING_MASK(3, 2, uint64_t(0xF00F00F00F00F00F)) // Groups bits by 4 on, 8 off -NBL_HLSL_MORTON_SPECIALIZE_CODING_MASK(3, 3, uint64_t(0x00FF0000FF0000FF)) // Groups bits by 8 on, 16 off -NBL_HLSL_MORTON_SPECIALIZE_CODING_MASK(3, 4, uint64_t(0xFFFF00000000FFFF)) // Groups bits by 16 on, 32 off +NBL_HLSL_MORTON_SPECIALIZE_FIRST_CODING_MASK(3, 0x9249249249249249ull) // Groups bits by 1 on, 2 off +NBL_HLSL_MORTON_SPECIALIZE_CODING_MASK(3, 1, 0x30C30C30C30C30C3ull) // Groups bits by 2 on, 4 off +NBL_HLSL_MORTON_SPECIALIZE_CODING_MASK(3, 2, 0xF00F00F00F00F00Full) // Groups bits by 4 on, 8 off +NBL_HLSL_MORTON_SPECIALIZE_CODING_MASK(3, 3, 0x00FF0000FF0000FFull) // Groups bits by 8 on, 16 off +NBL_HLSL_MORTON_SPECIALIZE_CODING_MASK(3, 4, 0xFFFF00000000FFFFull) // Groups bits by 16 on, 32 off -NBL_HLSL_MORTON_SPECIALIZE_FIRST_CODING_MASK(4, 0x1111111111111111) // Groups bits by 1 on, 3 off -NBL_HLSL_MORTON_SPECIALIZE_CODING_MASK(4, 1, uint64_t(0x0303030303030303)) // Groups bits by 2 on, 6 off -NBL_HLSL_MORTON_SPECIALIZE_CODING_MASK(4, 2, uint64_t(0x000F000F000F000F)) // Groups bits by 4 on, 12 off -NBL_HLSL_MORTON_SPECIALIZE_CODING_MASK(4, 3, uint64_t(0x000000FF000000FF)) // Groups bits by 8 on, 24 off -NBL_HLSL_MORTON_SPECIALIZE_CODING_MASK(4, 4, uint64_t(0x000000000000FFFF)) // Groups bits by 16 on, 48 off (unused but here for completion + likely keeps compiler from complaining) +NBL_HLSL_MORTON_SPECIALIZE_FIRST_CODING_MASK(4, 0x1111111111111111ull) // Groups bits by 1 on, 3 off +NBL_HLSL_MORTON_SPECIALIZE_CODING_MASK(4, 1, 0x0303030303030303ull) // Groups bits by 2 on, 6 off +NBL_HLSL_MORTON_SPECIALIZE_CODING_MASK(4, 2, 0x000F000F000F000Full) // Groups bits by 4 on, 12 off +NBL_HLSL_MORTON_SPECIALIZE_CODING_MASK(4, 3, 0x000000FF000000FFull) // Groups bits by 8 on, 24 off +NBL_HLSL_MORTON_SPECIALIZE_CODING_MASK(4, 4, 0x000000000000FFFFull) // Groups bits by 16 on, 48 off (unused but here for completion + likely keeps compiler from complaining) NBL_HLSL_MORTON_SPECIALIZE_LAST_CODING_MASKS @@ -81,10 +102,9 @@ NBL_HLSL_MORTON_SPECIALIZE_LAST_CODING_MASKS #undef NBL_HLSL_MORTON_SPECIALIZE_CODING_MASK #undef NBL_HLSL_MORTON_SPECIALIZE_FIRST_CODING_MASK -// ----------------------------------------------------------------- MORTON ENCODER --------------------------------------------------- - -template && Dim * Bits <= 64 && 8 * sizeof(encode_t) == mpl::round_up_to_pot_v) -struct MortonEncoder +// ----------------------------------------------------------------- MORTON TRANSCODER --------------------------------------------------- +template && Dim * Bits <= 64 && 8 * sizeof(encode_t) == mpl::max_v, uint64_t(16)>) +struct Transcoder { template 16), vector, vector > NBL_FUNC_REQUIRES(concepts::IntVector && 8 * sizeof(typename vector_traits::scalar_type) >= Bits) @@ -93,22 +113,26 @@ struct MortonEncoder * * @param [in] decodedValue Cartesian coordinates to interleave and shift */ - NBL_CONSTEXPR_STATIC_INLINE_FUNC portable_vector_t interleaveShift(NBL_CONST_REF_ARG(decode_t) decodedValue) + NBL_CONSTEXPR_STATIC_FUNC portable_vector_t interleaveShift(NBL_CONST_REF_ARG(decode_t) decodedValue) { - NBL_CONSTEXPR_STATIC encode_t EncodeMasks[CodingStages + 1] = { _static_cast(coding_mask_v), _static_cast(coding_mask_v), _static_cast(coding_mask_v) , _static_cast(coding_mask_v) , _static_cast(coding_mask_v) , _static_cast(coding_mask_v) }; left_shift_operator > leftShift; - portable_vector_t interleaved = _static_cast >(decodedValue)& EncodeMasks[CodingStages]; + portable_vector_t interleaved = _static_cast >(decodedValue) & coding_mask_v; - NBL_CONSTEXPR_STATIC uint16_t Stages = mpl::log2_ceil_v; - [[unroll]] - for (uint16_t i = Stages; i > 0; i--) - { - interleaved = interleaved | leftShift(interleaved, (uint32_t(1) << (i - 1)) * (Dim - 1)); - interleaved = interleaved & EncodeMasks[i - 1]; + #define ENCODE_LOOP_ITERATION(I) NBL_IF_CONSTEXPR(Bits > (uint16_t(1) << I))\ + {\ + interleaved = interleaved | leftShift(interleaved, (uint16_t(1) << I) * (Dim - 1));\ + interleaved = interleaved & coding_mask_v;\ } + ENCODE_LOOP_ITERATION(4) + ENCODE_LOOP_ITERATION(3) + ENCODE_LOOP_ITERATION(2) + ENCODE_LOOP_ITERATION(1) + ENCODE_LOOP_ITERATION(0) + + #undef ENCODE_LOOP_ITERATION // After interleaving, shift each coordinate left by their index - return leftShift(interleaved, _static_cast >(vector(0, 1, 2, 3))); + return leftShift(interleaved, truncate >(vector(0, 1, 2, 3))); } template 16), vector, vector > @@ -118,52 +142,58 @@ struct MortonEncoder * * @param [in] decodedValue Cartesian coordinates to encode */ - NBL_CONSTEXPR_STATIC_INLINE_FUNC encode_t encode(NBL_CONST_REF_ARG(decode_t) decodedValue) + NBL_CONSTEXPR_STATIC_FUNC encode_t encode(NBL_CONST_REF_ARG(decode_t) decodedValue) { - portable_vector_t interleaveShifted = interleaveShift(decodedValue); + const portable_vector_t interleaveShifted = interleaveShift(decodedValue); - encode_t encoded = _static_cast(uint64_t(0)); array_get, encode_t> getter; + encode_t encoded = getter(interleaveShifted, 0); + [[unroll]] - for (uint16_t i = 0; i < Dim; i++) + for (uint16_t i = 1; i < Dim; i++) encoded = encoded | getter(interleaveShifted, i); return encoded; } -}; - -// ----------------------------------------------------------------- MORTON DECODER --------------------------------------------------- -template && Dim * Bits <= 64 && 8 * sizeof(encode_t) == mpl::round_up_to_pot_v) -struct MortonDecoder -{ template 16), vector, vector > NBL_FUNC_REQUIRES(concepts::IntVector && 8 * sizeof(typename vector_traits::scalar_type) >= Bits) - NBL_CONSTEXPR_STATIC_INLINE_FUNC decode_t decode(NBL_CONST_REF_ARG(encode_t) encodedValue) + /** + * @brief Decodes a Morton code back to a vector of cartesian coordinates + * + * @param [in] encodedValue Representation of a Morton code (binary code, not the morton class defined below) + */ + NBL_CONSTEXPR_STATIC_FUNC decode_t decode(NBL_CONST_REF_ARG(encode_t) encodedValue) { - NBL_CONSTEXPR_STATIC encode_t DecodeMasks[CodingStages + 1] = { _static_cast(coding_mask_v), _static_cast(coding_mask_v), _static_cast(coding_mask_v) , _static_cast(coding_mask_v) , _static_cast(coding_mask_v) , _static_cast(coding_mask_v) }; - arithmetic_right_shift_operator > rightShift; + arithmetic_right_shift_operator encodedRightShift; portable_vector_t decoded; array_set, encode_t> setter; // Write initial values into decoded [[unroll]] for (uint16_t i = 0; i < Dim; i++) - setter(decoded, i, encodedValue); - decoded = rightShift(decoded, _static_cast >(vector(0, 1, 2, 3))); + setter(decoded, i, encodedRightShift(encodedValue, i)); - NBL_CONSTEXPR_STATIC uint16_t Stages = mpl::log2_ceil_v; - [[unroll]] - for (uint16_t i = 0; i < Stages; i++) - { - decoded = decoded & DecodeMasks[i]; - decoded = decoded | rightShift(decoded, (uint32_t(1) << i) * (Dim - 1)); + arithmetic_right_shift_operator > rightShift; + + #define DECODE_LOOP_ITERATION(I) NBL_IF_CONSTEXPR(Bits > (uint16_t(1) << I))\ + {\ + decoded = decoded & coding_mask_v;\ + decoded = decoded | rightShift(decoded, (uint16_t(1) << I) * (Dim - 1));\ } + DECODE_LOOP_ITERATION(0) + DECODE_LOOP_ITERATION(1) + DECODE_LOOP_ITERATION(2) + DECODE_LOOP_ITERATION(3) + DECODE_LOOP_ITERATION(4) + + #undef DECODE_LOOP_ITERATION + // If `Bits` is greater than half the bitwidth of the decode type, then we can avoid `&`ing against the last mask since duplicated MSB get truncated NBL_IF_CONSTEXPR(Bits > 4 * sizeof(typename vector_traits::scalar_type)) return _static_cast(decoded); else - return _static_cast(decoded & DecodeMasks[CodingStages]); + return _static_cast(decoded & coding_mask_v); } }; @@ -172,7 +202,7 @@ struct MortonDecoder // `BitsAlreadySpread` assumes both pre-interleaved and pre-shifted template -NBL_BOOL_CONCEPT Comparable = concepts::IntegralLikeScalar && is_signed_v == Signed && ((BitsAlreadySpread && sizeof(I) == sizeof(storage_t)) || (!BitsAlreadySpread && 8 * sizeof(I) == mpl::round_up_to_pot_v)); +NBL_BOOL_CONCEPT Comparable = concepts::IntegralLikeScalar && is_signed_v == Signed && ((BitsAlreadySpread && sizeof(I) == sizeof(storage_t)) || (!BitsAlreadySpread && 8 * sizeof(I) == mpl::max_v, uint64_t(16)>)); template struct Equals; @@ -181,13 +211,14 @@ template struct Equals { template) - NBL_CONSTEXPR_STATIC_INLINE_FUNC vector __call(NBL_CONST_REF_ARG(storage_t) value, NBL_CONST_REF_ARG(portable_vector_t) rhs) + NBL_CONSTEXPR_STATIC_FUNC vector __call(NBL_CONST_REF_ARG(storage_t) value, NBL_CONST_REF_ARG(portable_vector_t) rhs) { - NBL_CONSTEXPR portable_vector_t zeros = _static_cast >(_static_cast >(vector(0,0,0,0))); + const portable_vector_t zeros = _static_cast >(truncate >(vector(0,0,0,0))); - portable_vector_t rhsCasted = _static_cast >(rhs); - portable_vector_t xored = rhsCasted ^ value; - return xored == zeros; + const portable_vector_t rhsCasted = _static_cast >(rhs); + const portable_vector_t xored = rhsCasted ^ value; + equal_to > equal; + return equal(xored, zeros); } }; @@ -195,10 +226,11 @@ template struct Equals { template) - NBL_CONSTEXPR_STATIC_INLINE_FUNC vector __call(NBL_CONST_REF_ARG(storage_t) value, NBL_CONST_REF_ARG(portable_vector_t) rhs) + NBL_CONSTEXPR_STATIC_FUNC vector __call(NBL_CONST_REF_ARG(storage_t) value, NBL_CONST_REF_ARG(vector) rhs) { - const portable_vector_t interleaved = MortonEncoder::interleaveShift(rhs); - return Equals::__call(value, interleaved); + using right_sign_t = conditional_t, make_unsigned_t >; + const portable_vector_t interleaved = _static_cast >(Transcoder::interleaveShift(rhs)); + return Equals::template __call(value, interleaved); } }; @@ -213,17 +245,28 @@ template { template) - NBL_CONSTEXPR_STATIC_INLINE_FUNC vector __call(NBL_CONST_REF_ARG(storage_t) value, NBL_CONST_REF_ARG(portable_vector_t) rhs) + NBL_CONSTEXPR_STATIC_FUNC vector __call(NBL_CONST_REF_ARG(storage_t) value, NBL_CONST_REF_ARG(portable_vector_t) rhs) { - NBL_CONSTEXPR_STATIC portable_vector_t InterleaveMasks = _static_cast >(_static_cast >(vector(coding_mask_v, coding_mask_v << 1, coding_mask_v << 2, coding_mask_v << 3))); - NBL_CONSTEXPR_STATIC portable_vector_t SignMasks = _static_cast >(_static_cast >(vector(SignMask, SignMask << 1, SignMask << 2, SignMask << 3))); ComparisonOp comparison; - // Obtain a vector of deinterleaved coordinates and flip their sign bits - const portable_vector_t thisCoord = (InterleaveMasks & value) ^ SignMasks; - // rhs already deinterleaved, just have to cast type and flip sign - const portable_vector_t rhsCoord = _static_cast >(rhs) ^ SignMasks; + NBL_IF_CONSTEXPR(Signed) + { + // Obtain a vector of deinterleaved coordinates and flip their sign bits + portable_vector_t thisCoord = (InterleaveMasks & value) ^ SignMasks; + // rhs already deinterleaved, just have to cast type and flip sign + const portable_vector_t rhsCoord = _static_cast >(rhs) ^ SignMasks; + + return comparison(thisCoord, rhsCoord); + } + else + { + // Obtain a vector of deinterleaved coordinates + portable_vector_t thisCoord = InterleaveMasks & value; + // rhs already deinterleaved, just have to cast type + const portable_vector_t rhsCoord = _static_cast >(rhs); - return comparison(thisCoord, rhsCoord); + return comparison(thisCoord, rhsCoord); + } + } }; @@ -231,11 +274,11 @@ template { template) - NBL_CONSTEXPR_STATIC_INLINE_FUNC vector __call(NBL_CONST_REF_ARG(storage_t) value, NBL_CONST_REF_ARG(portable_vector_t) rhs) + NBL_CONSTEXPR_STATIC_FUNC vector __call(NBL_CONST_REF_ARG(storage_t) value, NBL_CONST_REF_ARG(vector) rhs) { - const vector interleaved = MortonEncoder::interleaveShift(rhs); - BaseComparison baseComparison; - return baseComparison(value, interleaved); + using right_sign_t = conditional_t, make_unsigned_t >; + const portable_vector_t interleaved = _static_cast >(Transcoder::interleaveShift(rhs)); + return BaseComparison::template __call(value, interleaved); } }; @@ -283,7 +326,7 @@ struct code create(NBL_CONST_REF_ARG(vector) cartesian) { this_t retVal; - retVal.value = impl::MortonEncoder::encode(cartesian); + retVal.value = impl::Transcoder::encode(cartesian); return retVal; } @@ -296,7 +339,7 @@ struct code * @param [in] cartesian Coordinates to encode */ template= Bits) - explicit code(NBL_CONST_REF_ARG(vector) cartesian) + inline explicit code(NBL_CONST_REF_ARG(vector) cartesian) { *this = create(cartesian); } @@ -304,35 +347,35 @@ struct code /** * @brief Decodes this Morton code back to a set of cartesian coordinates */ - template= Bits) - constexpr inline explicit operator vector() const noexcept; + template= Bits && is_signed_v == Signed) + constexpr explicit operator vector() const noexcept; #endif // ------------------------------------------------------- BITWISE OPERATORS ------------------------------------------------- - NBL_CONSTEXPR_INLINE_FUNC this_t operator&(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC + NBL_CONSTEXPR_FUNC this_t operator&(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC { this_t retVal; retVal.value = value & rhs.value; return retVal; } - NBL_CONSTEXPR_INLINE_FUNC this_t operator|(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC + NBL_CONSTEXPR_FUNC this_t operator|(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC { this_t retVal; retVal.value = value | rhs.value; return retVal; } - NBL_CONSTEXPR_INLINE_FUNC this_t operator^(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC + NBL_CONSTEXPR_FUNC this_t operator^(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC { this_t retVal; retVal.value = value ^ rhs.value; return retVal; } - NBL_CONSTEXPR_INLINE_FUNC this_t operator~() NBL_CONST_MEMBER_FUNC + NBL_CONSTEXPR_FUNC this_t operator~() NBL_CONST_MEMBER_FUNC { this_t retVal; retVal.value = ~value; @@ -342,15 +385,15 @@ struct code // Only valid in CPP #ifndef __HLSL_VERSION - constexpr inline this_t operator<<(uint16_t bits) const; + constexpr this_t operator<<(uint16_t bits) const; - constexpr inline this_t operator>>(uint16_t bits) const; + constexpr this_t operator>>(uint16_t bits) const; #endif // ------------------------------------------------------- UNARY ARITHMETIC OPERATORS ------------------------------------------------- - NBL_CONSTEXPR_INLINE_FUNC this_signed_t operator-() NBL_CONST_MEMBER_FUNC + NBL_CONSTEXPR_FUNC this_signed_t operator-() NBL_CONST_MEMBER_FUNC { this_t zero; zero.value = _static_cast(0); @@ -363,107 +406,135 @@ struct code // ------------------------------------------------------- BINARY ARITHMETIC OPERATORS ------------------------------------------------- - NBL_CONSTEXPR_INLINE_FUNC this_t operator+(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC + // put 1 bits everywhere in the bits the current axis is not using + // then extract just the axis bits for the right hand coordinate + // carry-1 will propagate the bits across the already set bits + // then clear out the bits not belonging to current axis + // Note: Its possible to clear on `this` and fill on `rhs` but that will + // disable optimizations, we expect the compiler to optimize a lot if the + // value of `rhs` is known at compile time, e.g. `static_cast>(glm::ivec3(1,0,0))` + NBL_CONSTEXPR_FUNC this_t operator+(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC { - NBL_CONSTEXPR_STATIC storage_t Mask = _static_cast(impl::coding_mask_v); - left_shift_operator leftShift; + bit_not > bitnot; + // For each coordinate, leave its bits intact and turn every other bit ON + const portable_vector_t counterMaskedValue = bitnot(impl::InterleaveMasks) | value; + // For each coordinate in rhs, leave its bits intact and turn every other bit OFF + const portable_vector_t maskedRhsValue = impl::InterleaveMasks & rhs.value; + // Add these coordinate-wise, then turn all bits not belonging to the current coordinate OFF + const portable_vector_t interleaveShiftedResult = (counterMaskedValue + maskedRhsValue) & impl::InterleaveMasks; + // Re-encode the result + array_get, storage_t> getter; this_t retVal; - retVal.value = _static_cast(uint64_t(0)); + retVal.value = getter(interleaveShiftedResult, 0); [[unroll]] - for (uint16_t i = 0; i < D; i++) - { - // put 1 bits everywhere in the bits the current axis is not using - // then extract just the axis bits for the right hand coordinate - // carry-1 will propagate the bits across the already set bits - // then clear out the bits not belonging to current axis - // Note: Its possible to clear on `this` and fill on `rhs` but that will - // disable optimizations, we expect the compiler to optimize a lot if the - // value of `rhs` is known at compile time, e.g. `static_cast>(glm::ivec3(1,0,0))` - retVal.value |= ((value | (~leftShift(Mask, i))) + (rhs.value & leftShift(Mask, i))) & leftShift(Mask, i); - } + for (uint16_t i = 1; i < D; i++) + retVal.value = retVal.value | getter(interleaveShiftedResult, i); + return retVal; } - NBL_CONSTEXPR_INLINE_FUNC this_t operator-(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC + // This is the dual trick of the one used for addition: set all other bits to 0 so borrows propagate + NBL_CONSTEXPR_FUNC this_t operator-(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC { - NBL_CONSTEXPR_STATIC storage_t Mask = _static_cast(impl::coding_mask_v); - left_shift_operator leftShift; + // For each coordinate, leave its bits intact and turn every other bit OFF + const portable_vector_t maskedValue = impl::InterleaveMasks & value; + // Do the same for each coordinate in rhs + const portable_vector_t maskedRhsValue = impl::InterleaveMasks & rhs.value; + // Subtract these coordinate-wise, then turn all bits not belonging to the current coordinate OFF + const portable_vector_t interleaveShiftedResult = (maskedValue - maskedRhsValue) & impl::InterleaveMasks; + // Re-encode the result + array_get, storage_t> getter; this_t retVal; - retVal.value = _static_cast(uint64_t(0)); + retVal.value = getter(interleaveShiftedResult, 0); [[unroll]] - for (uint16_t i = 0; i < D; i++) - { - // This is the dual trick of the one used for addition: set all other bits to 0 so borrows propagate - retVal.value |= ((value & leftShift(Mask, i)) - (rhs.value & leftShift(Mask, i))) & leftShift(Mask, i); - } + for (uint16_t i = 1; i < D; i++) + retVal.value = retVal.value | getter(interleaveShiftedResult, i); + return retVal; } // ------------------------------------------------------- COMPARISON OPERATORS ------------------------------------------------- - NBL_CONSTEXPR_INLINE_FUNC bool operator==(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC + NBL_CONSTEXPR_FUNC bool operator==(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC { return value == rhs.value; } template) - NBL_CONSTEXPR_INLINE_FUNC vector equals(NBL_CONST_REF_ARG(vector) rhs) NBL_CONST_MEMBER_FUNC + NBL_CONSTEXPR_FUNC vector equal(NBL_CONST_REF_ARG(vector) rhs) NBL_CONST_MEMBER_FUNC { - return impl::Equals::__call(value, rhs); + return impl::Equals::template __call(value, rhs); } - NBL_CONSTEXPR_INLINE_FUNC bool operator!=(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC + NBL_CONSTEXPR_FUNC bool operator!=(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC { return value != rhs.value; } template) - NBL_CONSTEXPR_INLINE_FUNC vector notEquals(NBL_CONST_REF_ARG(vector) rhs) NBL_CONST_MEMBER_FUNC + NBL_CONSTEXPR_FUNC vector notEqual(NBL_CONST_REF_ARG(vector) rhs) NBL_CONST_MEMBER_FUNC { - return !equals(rhs); + return !equal(rhs); } template) - NBL_CONSTEXPR_INLINE_FUNC vector less(NBL_CONST_REF_ARG(vector) rhs) NBL_CONST_MEMBER_FUNC + NBL_CONSTEXPR_FUNC vector lessThan(NBL_CONST_REF_ARG(vector) rhs) NBL_CONST_MEMBER_FUNC { - return impl::LessThan::__call(value, rhs); + return impl::LessThan::template __call(value, rhs); } template) - NBL_CONSTEXPR_INLINE_FUNC vector lessEquals(NBL_CONST_REF_ARG(vector) rhs) NBL_CONST_MEMBER_FUNC + NBL_CONSTEXPR_FUNC vector lessThanEquals(NBL_CONST_REF_ARG(vector) rhs) NBL_CONST_MEMBER_FUNC { - return impl::LessEquals::__call(value, rhs); + return impl::LessEquals::template __call(value, rhs); } template) - NBL_CONSTEXPR_INLINE_FUNC vector greater(NBL_CONST_REF_ARG(vector) rhs) NBL_CONST_MEMBER_FUNC + NBL_CONSTEXPR_FUNC vector greaterThan(NBL_CONST_REF_ARG(vector) rhs) NBL_CONST_MEMBER_FUNC { - return impl::GreaterThan::__call(value, rhs); + return impl::GreaterThan::template __call(value, rhs); } template) - NBL_CONSTEXPR_INLINE_FUNC vector greaterEquals(NBL_CONST_REF_ARG(vector) rhs) NBL_CONST_MEMBER_FUNC + NBL_CONSTEXPR_FUNC vector greaterThanEquals(NBL_CONST_REF_ARG(vector) rhs) NBL_CONST_MEMBER_FUNC { - return impl::GreaterEquals::__call(value, rhs); + return impl::GreaterEquals::template __call(value, rhs); } }; } //namespace morton +// Specialize the `static_cast_helper` +namespace impl +{ + +// I must be of same signedness as the morton code, and be wide enough to hold each component +template NBL_PARTIAL_REQ_TOP(concepts::IntegralScalar && 8 * sizeof(I) >= Bits) +struct static_cast_helper, morton::code, Bits, D, _uint64_t> NBL_PARTIAL_REQ_BOT(concepts::IntegralScalar && 8 * sizeof(I) >= Bits) > +{ + NBL_CONSTEXPR_STATIC_FUNC vector cast(NBL_CONST_REF_ARG(morton::code, Bits, D, _uint64_t>) val) + { + using storage_t = typename morton::code, Bits, D, _uint64_t>::storage_t; + return morton::impl::Transcoder::decode(val.value); + } +}; + +} // namespace impl + template struct left_shift_operator > { using type_t = morton::code; using storage_t = typename type_t::storage_t; - NBL_CONSTEXPR_INLINE_FUNC type_t operator()(NBL_CONST_REF_ARG(type_t) operand, uint16_t bits) + NBL_CONSTEXPR_FUNC type_t operator()(NBL_CONST_REF_ARG(type_t) operand, uint16_t bits) { left_shift_operator valueLeftShift; type_t retVal; @@ -479,7 +550,7 @@ struct arithmetic_right_shift_operator > using type_t = morton::code; using storage_t = typename type_t::storage_t; - NBL_CONSTEXPR_INLINE_FUNC type_t operator()(NBL_CONST_REF_ARG(type_t) operand, uint16_t bits) + NBL_CONSTEXPR_FUNC type_t operator()(NBL_CONST_REF_ARG(type_t) operand, uint16_t bits) { arithmetic_right_shift_operator valueArithmeticRightShift; type_t retVal; @@ -496,10 +567,10 @@ struct arithmetic_right_shift_operator > using type_t = morton::code; using scalar_t = conditional_t<(Bits > 16), int32_t, int16_t>; - NBL_CONSTEXPR_INLINE_FUNC type_t operator()(NBL_CONST_REF_ARG(type_t) operand, uint16_t bits) + NBL_CONSTEXPR_FUNC type_t operator()(NBL_CONST_REF_ARG(type_t) operand, uint16_t bits) { vector cartesian = _static_cast >(operand); - cartesian >> scalar_t(bits); + cartesian >>= scalar_t(bits); return type_t::create(cartesian); } }; @@ -507,45 +578,24 @@ struct arithmetic_right_shift_operator > #ifndef __HLSL_VERSION template&& D* Bits <= 64) -constexpr inline morton::code morton::code::operator<<(uint16_t bits) const +constexpr morton::code morton::code::operator<<(uint16_t bits) const { left_shift_operator> leftShift; return leftShift(*this, bits); } template&& D* Bits <= 64) -constexpr inline morton::code morton::code::operator>>(uint16_t bits) const +constexpr morton::code morton::code::operator>>(uint16_t bits) const { arithmetic_right_shift_operator> rightShift; return rightShift(*this, bits); } -#endif - -// Specialize the `static_cast_helper` -namespace impl -{ - -// I must be of same signedness as the morton code, and be wide enough to hold each component -template NBL_PARTIAL_REQ_TOP(concepts::IntegralScalar && 8 * sizeof(I) >= Bits) -struct static_cast_helper, morton::code, Bits, D, _uint64_t> NBL_PARTIAL_REQ_BOT(concepts::IntegralScalar && 8 * sizeof(I) >= Bits) > -{ - NBL_CONSTEXPR_STATIC_INLINE_FUNC vector cast(NBL_CONST_REF_ARG(morton::code, Bits, D, _uint64_t>) val) - { - using storage_t = typename morton::code, Bits, D, _uint64_t>::storage_t; - return morton::impl::MortonDecoder::decode(val.value); - } -}; - -} // namespace impl - -#ifndef __HLSL_VERSION - template && D* Bits <= 64) -template = Bits) -constexpr inline morton::code::operator vector() const noexcept +template = Bits && is_signed_v == Signed) +constexpr morton::code::operator vector() const noexcept { - return _static_cast, morton::code, Bits, D>>(*this); + return _static_cast, morton::code>(*this); } #endif diff --git a/include/nbl/builtin/hlsl/spirv_intrinsics/core.hlsl b/include/nbl/builtin/hlsl/spirv_intrinsics/core.hlsl index 8add7a9ed3..901a8e419a 100644 --- a/include/nbl/builtin/hlsl/spirv_intrinsics/core.hlsl +++ b/include/nbl/builtin/hlsl/spirv_intrinsics/core.hlsl @@ -114,7 +114,12 @@ NBL_CONSTEXPR_STATIC_INLINE bool is_bda_pointer_v = is_bda_pointer::value; //! General Operations - + +//! Miscellaneous Instructions +template +[[vk::ext_instruction(spv::OpUndef)]] +T undef(); + // template [[vk::ext_instruction(spv::OpAccessChain)]] diff --git a/include/nbl/builtin/hlsl/type_traits.hlsl b/include/nbl/builtin/hlsl/type_traits.hlsl index bc160de788..a6b3db6708 100644 --- a/include/nbl/builtin/hlsl/type_traits.hlsl +++ b/include/nbl/builtin/hlsl/type_traits.hlsl @@ -684,6 +684,8 @@ template NBL_CONSTEXPR uint32_t alignment_of_v = alignment_of::value; template NBL_CONSTEXPR uint64_t extent_v = extent::value; +template +NBL_CONSTEXPR bool is_fundamental_v = is_fundamental::value; // Overlapping definitions diff --git a/src/nbl/builtin/CMakeLists.txt b/src/nbl/builtin/CMakeLists.txt index f03d8ae22c..c57eec4e61 100644 --- a/src/nbl/builtin/CMakeLists.txt +++ b/src/nbl/builtin/CMakeLists.txt @@ -248,9 +248,9 @@ LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/cpp_compat/basic.h") LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/cpp_compat/intrinsics.hlsl") LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/cpp_compat/matrix.hlsl") LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/cpp_compat/promote.hlsl") +LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/cpp_compat/truncate.hlsl") LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/cpp_compat/vector.hlsl") LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/cpp_compat/impl/intrinsics_impl.hlsl") -LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/cpp_compat/impl/vector_impl.hlsl") #glsl compat LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/glsl_compat/core.hlsl") LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/glsl_compat/subgroup_arithmetic.hlsl") From 2d0ffbadf914f84e4f7d5bfc8fec3b860121f655 Mon Sep 17 00:00:00 2001 From: Fletterio Date: Mon, 28 Apr 2025 15:16:08 -0300 Subject: [PATCH 027/157] Fix the last of the operators --- include/nbl/builtin/hlsl/morton.hlsl | 70 +++++++++++++++++----------- 1 file changed, 44 insertions(+), 26 deletions(-) diff --git a/include/nbl/builtin/hlsl/morton.hlsl b/include/nbl/builtin/hlsl/morton.hlsl index 650d9ce6ba..d570e249c8 100644 --- a/include/nbl/builtin/hlsl/morton.hlsl +++ b/include/nbl/builtin/hlsl/morton.hlsl @@ -35,13 +35,16 @@ struct coding_mask; template NBL_CONSTEXPR T coding_mask_v = _static_cast(coding_mask::value); -template -NBL_CONSTEXPR portable_vector_t InterleaveMasks = _static_cast >( - truncate >( - vector(coding_mask_v, - coding_mask_v << 1, - coding_mask_v << 2, - coding_mask_v << 3))); +// It's a complete cointoss whether template variables work or not, since it's a C++14 feature (not supported in HLSL2021). Most of the ones we use in Nabla work, +// but this one will only work for some parameters and not for others. Therefore, this was made into a macro to inline where used + +#define NBL_MORTON_INTERLEAVE_MASKS(STORAGE_T, DIM, BITS, NAMESPACE_PREFIX) _static_cast >(\ + truncate >(\ + vector(NAMESPACE_PREFIX coding_mask_v< DIM, BITS, 0>,\ + NAMESPACE_PREFIX coding_mask_v< DIM, BITS, 0> << 1,\ + NAMESPACE_PREFIX coding_mask_v< DIM, BITS, 0> << 2,\ + NAMESPACE_PREFIX coding_mask_v< DIM, BITS, 0> << 3))) + template struct sign_mask : integral_constant {}; @@ -49,13 +52,12 @@ struct sign_mask : integral_constant NBL_CONSTEXPR T sign_mask_v = _static_cast(sign_mask::value); -template -NBL_CONSTEXPR portable_vector_t SignMasks = _static_cast >( - truncate >( - vector(sign_mask_v, - sign_mask_v << 1, - sign_mask_v << 2, - sign_mask_v << 3))); +#define NBL_MORTON_SIGN_MASKS(STORAGE_T, DIM, BITS) _static_cast >(\ + truncate >(\ + vector(sign_mask_v< DIM, BITS >,\ + sign_mask_v< DIM, BITS > << 1,\ + sign_mask_v< DIM, BITS > << 2,\ + sign_mask_v< DIM, BITS > << 3))) // 0th stage will be special: to avoid masking twice during encode/decode, and to get a proper mask that only gets the relevant bits out of a morton code, the 0th stage // mask also considers the total number of bits we're cnsidering for a code (all other masks operate on a bit-agnostic basis). @@ -213,10 +215,11 @@ struct Equals template) NBL_CONSTEXPR_STATIC_FUNC vector __call(NBL_CONST_REF_ARG(storage_t) value, NBL_CONST_REF_ARG(portable_vector_t) rhs) { + const portable_vector_t InterleaveMasks = NBL_MORTON_INTERLEAVE_MASKS(storage_t, D, Bits, ); const portable_vector_t zeros = _static_cast >(truncate >(vector(0,0,0,0))); const portable_vector_t rhsCasted = _static_cast >(rhs); - const portable_vector_t xored = rhsCasted ^ value; + const portable_vector_t xored = rhsCasted ^ (InterleaveMasks & value); equal_to > equal; return equal(xored, zeros); } @@ -247,20 +250,22 @@ struct BaseComparison template) NBL_CONSTEXPR_STATIC_FUNC vector __call(NBL_CONST_REF_ARG(storage_t) value, NBL_CONST_REF_ARG(portable_vector_t) rhs) { + const portable_vector_t InterleaveMasks = NBL_MORTON_INTERLEAVE_MASKS(storage_t, D, Bits, ); + const portable_vector_t SignMasks = NBL_MORTON_SIGN_MASKS(storage_t, D, Bits); ComparisonOp comparison; NBL_IF_CONSTEXPR(Signed) { // Obtain a vector of deinterleaved coordinates and flip their sign bits - portable_vector_t thisCoord = (InterleaveMasks & value) ^ SignMasks; + portable_vector_t thisCoord = (InterleaveMasks & value) ^ SignMasks; // rhs already deinterleaved, just have to cast type and flip sign - const portable_vector_t rhsCoord = _static_cast >(rhs) ^ SignMasks; + const portable_vector_t rhsCoord = _static_cast >(rhs) ^ SignMasks; return comparison(thisCoord, rhsCoord); } else { // Obtain a vector of deinterleaved coordinates - portable_vector_t thisCoord = InterleaveMasks & value; + portable_vector_t thisCoord = InterleaveMasks & value; // rhs already deinterleaved, just have to cast type const portable_vector_t rhsCoord = _static_cast >(rhs); @@ -415,13 +420,14 @@ struct code // value of `rhs` is known at compile time, e.g. `static_cast>(glm::ivec3(1,0,0))` NBL_CONSTEXPR_FUNC this_t operator+(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC { + const portable_vector_t InterleaveMasks = NBL_MORTON_INTERLEAVE_MASKS(storage_t, D, Bits, impl::); bit_not > bitnot; // For each coordinate, leave its bits intact and turn every other bit ON - const portable_vector_t counterMaskedValue = bitnot(impl::InterleaveMasks) | value; + const portable_vector_t counterMaskedValue = bitnot(InterleaveMasks) | value; // For each coordinate in rhs, leave its bits intact and turn every other bit OFF - const portable_vector_t maskedRhsValue = impl::InterleaveMasks & rhs.value; + const portable_vector_t maskedRhsValue = InterleaveMasks & rhs.value; // Add these coordinate-wise, then turn all bits not belonging to the current coordinate OFF - const portable_vector_t interleaveShiftedResult = (counterMaskedValue + maskedRhsValue) & impl::InterleaveMasks; + const portable_vector_t interleaveShiftedResult = (counterMaskedValue + maskedRhsValue) & InterleaveMasks; // Re-encode the result array_get, storage_t> getter; this_t retVal; @@ -429,19 +435,19 @@ struct code [[unroll]] for (uint16_t i = 1; i < D; i++) retVal.value = retVal.value | getter(interleaveShiftedResult, i); - return retVal; } // This is the dual trick of the one used for addition: set all other bits to 0 so borrows propagate NBL_CONSTEXPR_FUNC this_t operator-(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC { + const portable_vector_t InterleaveMasks = NBL_MORTON_INTERLEAVE_MASKS(storage_t, D, Bits, impl::); // For each coordinate, leave its bits intact and turn every other bit OFF - const portable_vector_t maskedValue = impl::InterleaveMasks & value; + const portable_vector_t maskedValue = InterleaveMasks & value; // Do the same for each coordinate in rhs - const portable_vector_t maskedRhsValue = impl::InterleaveMasks & rhs.value; + const portable_vector_t maskedRhsValue = InterleaveMasks & rhs.value; // Subtract these coordinate-wise, then turn all bits not belonging to the current coordinate OFF - const portable_vector_t interleaveShiftedResult = (maskedValue - maskedRhsValue) & impl::InterleaveMasks; + const portable_vector_t interleaveShiftedResult = (maskedValue - maskedRhsValue) & InterleaveMasks; // Re-encode the result array_get, storage_t> getter; this_t retVal; @@ -540,6 +546,10 @@ struct left_shift_operator > type_t retVal; // Shift every coordinate by `bits` retVal.value = valueLeftShift(operand.value, bits * D); + // Previous shift might move bits to positions that storage has available but the morton code does not use + // Un-decoding the resulting morton is still fine and produces expected results, but some operations such as equality expect these unused bits to be 0 so we mask them off + const uint64_t UsedBitsMask = Bits * D < 64 ? (uint64_t(1) << (Bits * D)) - 1 : ~uint64_t(0); + retVal.value = retVal.value & _static_cast(UsedBitsMask); return retVal; } }; @@ -570,7 +580,12 @@ struct arithmetic_right_shift_operator > NBL_CONSTEXPR_FUNC type_t operator()(NBL_CONST_REF_ARG(type_t) operand, uint16_t bits) { vector cartesian = _static_cast >(operand); - cartesian >>= scalar_t(bits); + // To avoid branching, we left-shift each coordinate to put the MSB (of the encoded Morton) at the position of the MSB (of the `scalar_t` used for the decoded coordinate), + // then right-shift again to get correct sign on each coordinate + // The number of bits we shift by to put MSB of Morton at MSB of `scalar_t` is the difference between the bitwidth of `scalar_t` and Bits + const scalar_t ShiftFactor = scalar_t(8 * sizeof(scalar_t) - Bits); + cartesian <<= ShiftFactor; + cartesian >>= ShiftFactor + scalar_t(bits); return type_t::create(cartesian); } }; @@ -600,6 +615,9 @@ constexpr morton::code::operator vector() cons #endif +#undef NBL_MORTON_INTERLEAVE_MASKS +#undef NBL_MORTON_SIGN_MASKS + } //namespace hlsl } //namespace nbl From 68edc322f2ba9c19ab0bd8068da2bae2390d7182 Mon Sep 17 00:00:00 2001 From: Fletterio Date: Mon, 28 Apr 2025 15:19:48 -0300 Subject: [PATCH 028/157] Change examples test submodule for master merge --- examples_tests | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples_tests b/examples_tests index f2ea51d0b3..f4cc4cd22e 160000 --- a/examples_tests +++ b/examples_tests @@ -1 +1 @@ -Subproject commit f2ea51d0b3e3388c0f9bae03602ec3b1f658c124 +Subproject commit f4cc4cd22ee4bd5506d794e63caafddf974ed7a4 From e5da351b3bff6aebce6e872594def0472901cafe Mon Sep 17 00:00:00 2001 From: Arkadiusz Lachowicz Date: Wed, 22 Oct 2025 14:58:21 +0200 Subject: [PATCH 029/157] Resolved conflicts, cherry picked 9381491 --- cmake/common.cmake | 23 ++++++++++++++--------- 1 file changed, 14 insertions(+), 9 deletions(-) diff --git a/cmake/common.cmake b/cmake/common.cmake index 645837aaaa..6c7722a41b 100755 --- a/cmake/common.cmake +++ b/cmake/common.cmake @@ -1277,17 +1277,22 @@ namespace @IMPL_NAMESPACE@ { foreach(INDEX RANGE ${LAST_INDEX}) string(JSON INPUT GET "${IMPL_INPUTS}" ${INDEX} INPUT) string(JSON BASE_KEY GET "${IMPL_INPUTS}" ${INDEX} KEY) - string(JSON COMPILE_OPTIONS_LENGTH LENGTH "${IMPL_INPUTS}" ${INDEX} COMPILE_OPTIONS) - + set(COMPILE_OPTIONS "") - math(EXPR LAST_CO "${COMPILE_OPTIONS_LENGTH} - 1") - foreach(COMP_IDX RANGE 0 ${LAST_CO}) - string(JSON COMP_ITEM GET "${IMPL_INPUTS}" ${INDEX} COMPILE_OPTIONS ${COMP_IDX}) - list(APPEND COMPILE_OPTIONS "${COMP_ITEM}") - endforeach() + string(JSON HAS_COMPILE_OPTIONS ERROR_VARIABLE ERROR_VAR TYPE "${IMPL_INPUTS}" ${INDEX} COMPILE_OPTIONS) + if(HAS_COMPILE_OPTIONS STREQUAL "ARRAY") + string(JSON COMPILE_OPTIONS_LENGTH LENGTH "${IMPL_INPUTS}" ${INDEX} COMPILE_OPTIONS) + if(NOT COMPILE_OPTIONS_LENGTH EQUAL 0) + math(EXPR LAST_CO "${COMPILE_OPTIONS_LENGTH} - 1") + foreach(COMP_IDX RANGE 0 ${LAST_CO}) + string(JSON COMP_ITEM GET "${IMPL_INPUTS}" ${INDEX} COMPILE_OPTIONS ${COMP_IDX}) + list(APPEND COMPILE_OPTIONS "${COMP_ITEM}") + endforeach() + endif() + endif() set(DEPENDS_ON "") - string(JSON HAS_DEPENDS TYPE "${IMPL_INPUTS}" ${INDEX} DEPENDS) + string(JSON HAS_DEPENDS ERROR_VARIABLE ERROR_VAR TYPE "${IMPL_INPUTS}" ${INDEX} DEPENDS) if(HAS_DEPENDS STREQUAL "ARRAY") string(JSON DEPENDS_LENGTH LENGTH "${IMPL_INPUTS}" ${INDEX} DEPENDS) if(NOT DEPENDS_LENGTH EQUAL 0) @@ -1305,7 +1310,7 @@ namespace @IMPL_NAMESPACE@ { set(HAS_CAPS FALSE) set(CAPS_LENGTH 0) - string(JSON CAPS_TYPE TYPE "${IMPL_INPUTS}" ${INDEX} CAPS) + string(JSON CAPS_TYPE ERROR_VARIABLE ERROR_VAR TYPE "${IMPL_INPUTS}" ${INDEX} CAPS) if(CAPS_TYPE STREQUAL "ARRAY") string(JSON CAPS_LENGTH LENGTH "${IMPL_INPUTS}" ${INDEX} CAPS) if(NOT CAPS_LENGTH EQUAL 0) From d7da234c506658675beac2e9049415f909201c26 Mon Sep 17 00:00:00 2001 From: Przemog1 Date: Fri, 24 Oct 2025 13:34:58 +0200 Subject: [PATCH 030/157] Updated examples --- examples_tests | 2 +- include/nbl/builtin/hlsl/ieee754.hlsl | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/examples_tests b/examples_tests index 2b4db21239..f85ae8045c 160000 --- a/examples_tests +++ b/examples_tests @@ -1 +1 @@ -Subproject commit 2b4db2123918f380cc0a35f6889315a02f84ea73 +Subproject commit f85ae8045c13380ace4c124d8a07349b4fd5fb62 diff --git a/include/nbl/builtin/hlsl/ieee754.hlsl b/include/nbl/builtin/hlsl/ieee754.hlsl index 6bdfcf2514..65db917883 100644 --- a/include/nbl/builtin/hlsl/ieee754.hlsl +++ b/include/nbl/builtin/hlsl/ieee754.hlsl @@ -159,7 +159,7 @@ struct flipSign_helper(asUint ^ spirv::select(AsUint(0ull), ieee754::traits::signMask, flip)); + return bit_cast(asUint ^ spirv::select(flip, ieee754::traits::signMask, AsUint(0ull))); #else return bit_cast(asUint ^ (flip ? ieee754::traits::signMask : AsUint(0ull))); #endif From 9e4c75ee1d7afcc3164aa8c680efe08058508249 Mon Sep 17 00:00:00 2001 From: Arkadiusz Lachowicz Date: Fri, 10 Oct 2025 09:33:03 +0200 Subject: [PATCH 031/157] cap kind args --- cmake/common.cmake | 29 +++++++++++++++++++++++++---- 1 file changed, 25 insertions(+), 4 deletions(-) diff --git a/cmake/common.cmake b/cmake/common.cmake index 6c7722a41b..4bcbee1c4e 100755 --- a/cmake/common.cmake +++ b/cmake/common.cmake @@ -1210,7 +1210,7 @@ struct DeviceConfigCaps get_target_property(HEADER_RULE_GENERATED ${IMPL_TARGET} NBL_HEADER_GENERATED_RULE) if(NOT HEADER_RULE_GENERATED) - set(INCLUDE_DIR "$/${IMPL_TARGET}/.cmake/include") + set(INCLUDE_DIR "$/${IMPL_TARGET}/.cmake/include") set(INCLUDE_FILE "${INCLUDE_DIR}/$") set(INCLUDE_CONTENT $) @@ -1328,12 +1328,27 @@ namespace @IMPL_NAMESPACE@ { set(CAP_NAMES "") set(CAP_TYPES "") + set(CAP_KINDS "") if(HAS_CAPS) math(EXPR LAST_CAP "${CAPS_LENGTH} - 1") foreach(CAP_IDX RANGE 0 ${LAST_CAP}) + string(JSON CAP_KIND ERROR_VARIABLE CAP_TYPE_ERROR GET "${IMPL_INPUTS}" ${INDEX} CAPS ${CAP_IDX} kind) string(JSON CAP_NAME GET "${IMPL_INPUTS}" ${INDEX} CAPS ${CAP_IDX} name) string(JSON CAP_TYPE GET "${IMPL_INPUTS}" ${INDEX} CAPS ${CAP_IDX} type) + # -> TODO: improve validation, input should be string + if(CAP_TYPE_ERROR) + set(CAP_KIND limits) # I assume its limit by default (or when invalid value present, currently) + else() + if(NOT CAP_KIND MATCHES "^(limits|features)$") + ERROR_WHILE_PARSING_ITEM( + "Invalid CAP kind \"${CAP_KIND}\" for ${CAP_NAME}\n" + "Allowed kinds are: limits, features" + ) + endif() + endif() + # <- + if(NOT CAP_TYPE MATCHES "^(bool|uint16_t|uint32_t|uint64_t)$") ERROR_WHILE_PARSING_ITEM( "Invalid CAP type \"${CAP_TYPE}\" for ${CAP_NAME}\n" @@ -1371,6 +1386,7 @@ namespace @IMPL_NAMESPACE@ { set(CAP_VALUES_${CAP_IDX} "${VALUES}") list(APPEND CAP_NAMES "${CAP_NAME}") list(APPEND CAP_TYPES "${CAP_TYPE}") + list(APPEND CAP_KINDS "${CAP_KIND}") endforeach() endif() @@ -1410,12 +1426,16 @@ namespace @IMPL_NAMESPACE@ { ]=]) unset(RETVAL_EVAL) - foreach(CAP ${CAP_NAMES}) + list(LENGTH CAP_NAMES CAP_COUNT) + math(EXPR CAP_COUNT "${CAP_COUNT} - 1") + foreach(i RANGE ${CAP_COUNT}) + list(GET CAP_NAMES ${i} CAP) + list(GET CAP_KINDS ${i} KIND) string(CONFIGURE [=[ - retval += ".@CAP@_" + std::to_string(limits.@CAP@); + retval += ".@CAP@_" + std::to_string(@KIND@.@CAP@); ]=] RETVALUE_VIEW @ONLY) string(APPEND RETVAL_EVAL "${RETVALUE_VIEW}") - endforeach(CAP) + endforeach() string(CONFIGURE "${HEADER_ITEM_VIEW}" HEADER_ITEM_EVAL @ONLY) set_property(TARGET ${IMPL_TARGET} APPEND_STRING PROPERTY NBL_HEADER_CONTENT "${HEADER_ITEM_EVAL}") @@ -1466,6 +1486,7 @@ namespace @IMPL_NAMESPACE@ { list(GET CAP_NAMES ${CAP_INDEX} CURRENT_CAP) list(GET CAP_TYPES ${CAP_INDEX} CURRENT_TYPE) + list(GET CAP_KINDS ${CAP_INDEX} CURRENT_KIND) set(VAR_NAME "CAP_VALUES_${CAP_INDEX}") set(VALUES "${${VAR_NAME}}") From 639e2d4d73ef8691591434f1388c2e07447cda8f Mon Sep 17 00:00:00 2001 From: Arkadiusz Lachowicz Date: Fri, 10 Oct 2025 10:23:00 +0200 Subject: [PATCH 032/157] fix a bug after my NSC rules update --- cmake/common.cmake | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/cmake/common.cmake b/cmake/common.cmake index 4bcbee1c4e..ab215a59e3 100755 --- a/cmake/common.cmake +++ b/cmake/common.cmake @@ -1427,15 +1427,18 @@ namespace @IMPL_NAMESPACE@ { ]=]) unset(RETVAL_EVAL) list(LENGTH CAP_NAMES CAP_COUNT) - math(EXPR CAP_COUNT "${CAP_COUNT} - 1") - foreach(i RANGE ${CAP_COUNT}) - list(GET CAP_NAMES ${i} CAP) - list(GET CAP_KINDS ${i} KIND) - string(CONFIGURE [=[ + if(CAP_COUNT GREATER 0) + math(EXPR LAST_CAP "${CAP_COUNT} - 1") + foreach(i RANGE ${LAST_CAP}) + list(GET CAP_NAMES ${i} CAP) + list(GET CAP_KINDS ${i} KIND) + string(CONFIGURE [=[ retval += ".@CAP@_" + std::to_string(@KIND@.@CAP@); -]=] RETVALUE_VIEW @ONLY) - string(APPEND RETVAL_EVAL "${RETVALUE_VIEW}") - endforeach() +]=] RETVALUE_VIEW @ONLY) + string(APPEND RETVAL_EVAL "${RETVALUE_VIEW}") + endforeach() + endif() + string(CONFIGURE "${HEADER_ITEM_VIEW}" HEADER_ITEM_EVAL @ONLY) set_property(TARGET ${IMPL_TARGET} APPEND_STRING PROPERTY NBL_HEADER_CONTENT "${HEADER_ITEM_EVAL}") From baa6b4af6a3c37cf8937357625fb02fdabf10955 Mon Sep 17 00:00:00 2001 From: Arkadiusz Lachowicz Date: Fri, 10 Oct 2025 09:33:03 +0200 Subject: [PATCH 033/157] Resolved conflicts, cherry-picked e3129939c8253ec04525bdb726578cfe61b754ac --- cmake/common.cmake | 19 ++++++++----------- 1 file changed, 8 insertions(+), 11 deletions(-) diff --git a/cmake/common.cmake b/cmake/common.cmake index ab215a59e3..4bcbee1c4e 100755 --- a/cmake/common.cmake +++ b/cmake/common.cmake @@ -1427,18 +1427,15 @@ namespace @IMPL_NAMESPACE@ { ]=]) unset(RETVAL_EVAL) list(LENGTH CAP_NAMES CAP_COUNT) - if(CAP_COUNT GREATER 0) - math(EXPR LAST_CAP "${CAP_COUNT} - 1") - foreach(i RANGE ${LAST_CAP}) - list(GET CAP_NAMES ${i} CAP) - list(GET CAP_KINDS ${i} KIND) - string(CONFIGURE [=[ + math(EXPR CAP_COUNT "${CAP_COUNT} - 1") + foreach(i RANGE ${CAP_COUNT}) + list(GET CAP_NAMES ${i} CAP) + list(GET CAP_KINDS ${i} KIND) + string(CONFIGURE [=[ retval += ".@CAP@_" + std::to_string(@KIND@.@CAP@); -]=] RETVALUE_VIEW @ONLY) - string(APPEND RETVAL_EVAL "${RETVALUE_VIEW}") - endforeach() - endif() - +]=] RETVALUE_VIEW @ONLY) + string(APPEND RETVAL_EVAL "${RETVALUE_VIEW}") + endforeach() string(CONFIGURE "${HEADER_ITEM_VIEW}" HEADER_ITEM_EVAL @ONLY) set_property(TARGET ${IMPL_TARGET} APPEND_STRING PROPERTY NBL_HEADER_CONTENT "${HEADER_ITEM_EVAL}") From 0bd968e543e879c737bf264e446cbdd91976af38 Mon Sep 17 00:00:00 2001 From: Arkadiusz Lachowicz Date: Fri, 10 Oct 2025 10:23:00 +0200 Subject: [PATCH 034/157] fix a bug after my NSC rules update --- cmake/common.cmake | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/cmake/common.cmake b/cmake/common.cmake index 4bcbee1c4e..ab215a59e3 100755 --- a/cmake/common.cmake +++ b/cmake/common.cmake @@ -1427,15 +1427,18 @@ namespace @IMPL_NAMESPACE@ { ]=]) unset(RETVAL_EVAL) list(LENGTH CAP_NAMES CAP_COUNT) - math(EXPR CAP_COUNT "${CAP_COUNT} - 1") - foreach(i RANGE ${CAP_COUNT}) - list(GET CAP_NAMES ${i} CAP) - list(GET CAP_KINDS ${i} KIND) - string(CONFIGURE [=[ + if(CAP_COUNT GREATER 0) + math(EXPR LAST_CAP "${CAP_COUNT} - 1") + foreach(i RANGE ${LAST_CAP}) + list(GET CAP_NAMES ${i} CAP) + list(GET CAP_KINDS ${i} KIND) + string(CONFIGURE [=[ retval += ".@CAP@_" + std::to_string(@KIND@.@CAP@); -]=] RETVALUE_VIEW @ONLY) - string(APPEND RETVAL_EVAL "${RETVALUE_VIEW}") - endforeach() +]=] RETVALUE_VIEW @ONLY) + string(APPEND RETVAL_EVAL "${RETVALUE_VIEW}") + endforeach() + endif() + string(CONFIGURE "${HEADER_ITEM_VIEW}" HEADER_ITEM_EVAL @ONLY) set_property(TARGET ${IMPL_TARGET} APPEND_STRING PROPERTY NBL_HEADER_CONTENT "${HEADER_ITEM_EVAL}") From 0c57c4e2e6c3cbfa5d41c0079cacc82a191b2437 Mon Sep 17 00:00:00 2001 From: Arkadiusz Lachowicz Date: Wed, 22 Oct 2025 14:58:21 +0200 Subject: [PATCH 035/157] Resolved conflicts, cherry-picked 93814913e84eb86e4c625d9e32d83221c62965c9 From 15a91b4a584892a1115fb583402c5155a97ddafa Mon Sep 17 00:00:00 2001 From: Przemog1 Date: Mon, 27 Oct 2025 15:52:36 +0100 Subject: [PATCH 036/157] Updated examples --- examples_tests | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples_tests b/examples_tests index f85ae8045c..22f2a17401 160000 --- a/examples_tests +++ b/examples_tests @@ -1 +1 @@ -Subproject commit f85ae8045c13380ace4c124d8a07349b4fd5fb62 +Subproject commit 22f2a17401e8e70dddff477e11db12ebd1dea2bd From 33e742842f6b9fe9d23f101af63b647d00f4b6b2 Mon Sep 17 00:00:00 2001 From: Przemog1 Date: Wed, 29 Oct 2025 17:12:05 +0100 Subject: [PATCH 037/157] Update examples --- examples_tests | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples_tests b/examples_tests index 22f2a17401..eb1e29f4d0 160000 --- a/examples_tests +++ b/examples_tests @@ -1 +1 @@ -Subproject commit 22f2a17401e8e70dddff477e11db12ebd1dea2bd +Subproject commit eb1e29f4d071956d8397108680cb0256ec012b5b From b4ce8f6999c92d38a5fd0f4446c850fe239eebc2 Mon Sep 17 00:00:00 2001 From: Przemog1 Date: Thu, 30 Oct 2025 18:37:37 +0100 Subject: [PATCH 038/157] Updated examples --- examples_tests | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples_tests b/examples_tests index eb1e29f4d0..e1e8dd6fb0 160000 --- a/examples_tests +++ b/examples_tests @@ -1 +1 @@ -Subproject commit eb1e29f4d071956d8397108680cb0256ec012b5b +Subproject commit e1e8dd6fb0c46612defeea46c960a6b85f4b4155 From 977c7dddb9300e830432df96a77d58121063775c Mon Sep 17 00:00:00 2001 From: kevyuu Date: Thu, 20 Nov 2025 15:44:51 +0700 Subject: [PATCH 039/157] Add constexpr to _static_cast --- include/nbl/builtin/hlsl/cpp_compat/basic.h | 57 +++++++++++---------- 1 file changed, 29 insertions(+), 28 deletions(-) diff --git a/include/nbl/builtin/hlsl/cpp_compat/basic.h b/include/nbl/builtin/hlsl/cpp_compat/basic.h index 87baa1f0d6..f871e2a23d 100644 --- a/include/nbl/builtin/hlsl/cpp_compat/basic.h +++ b/include/nbl/builtin/hlsl/cpp_compat/basic.h @@ -3,34 +3,6 @@ #include -namespace nbl -{ -namespace hlsl -{ -namespace impl -{ -template -struct static_cast_helper -{ - static inline To cast(From u) - { -#ifndef __HLSL_VERSION - return static_cast(u); -#else - return To(u); -#endif - } -}; -} - -template -inline To _static_cast(From v) -{ - return impl::static_cast_helper::cast(v); -} - -} -} #ifndef __HLSL_VERSION #include @@ -102,4 +74,33 @@ struct add_pointer #endif +namespace nbl +{ +namespace hlsl +{ +namespace impl +{ +template +struct static_cast_helper +{ + NBL_CONSTEXPR_STATIC_INLINE To cast(From u) + { +#ifndef __HLSL_VERSION + return static_cast(u); +#else + return To(u); +#endif + } +}; +} + +template +NBL_CONSTEXPR_INLINE_FUNC To _static_cast(From v) +{ + return impl::static_cast_helper::cast(v); +} + +} +} + #endif From 3294d0451c367aaa5963eebf3ce3ec7f850f852c Mon Sep 17 00:00:00 2001 From: kevyuu Date: Thu, 20 Nov 2025 15:55:22 +0700 Subject: [PATCH 040/157] Change NBL_CONSTEXPR_STATIC_FUNC to NBL_CONSTEXPR_STATIC --- .../hlsl/cpp_compat/impl/intrinsics_impl.hlsl | 6 +++--- .../nbl/builtin/hlsl/emulated/vector_t.hlsl | 18 +++++++++--------- include/nbl/builtin/hlsl/morton.hlsl | 18 +++++++++--------- 3 files changed, 21 insertions(+), 21 deletions(-) diff --git a/include/nbl/builtin/hlsl/cpp_compat/impl/intrinsics_impl.hlsl b/include/nbl/builtin/hlsl/cpp_compat/impl/intrinsics_impl.hlsl index 4566e2097b..242e30dfbe 100644 --- a/include/nbl/builtin/hlsl/cpp_compat/impl/intrinsics_impl.hlsl +++ b/include/nbl/builtin/hlsl/cpp_compat/impl/intrinsics_impl.hlsl @@ -673,7 +673,7 @@ template NBL_PARTIAL_REQ_TOP(concepts::BooleanScalar) struct select_helper) > { - NBL_CONSTEXPR_STATIC_FUNC T __call(NBL_CONST_REF_ARG(B) condition, NBL_CONST_REF_ARG(T) object1, NBL_CONST_REF_ARG(T) object2) + NBL_CONSTEXPR_STATIC T __call(NBL_CONST_REF_ARG(B) condition, NBL_CONST_REF_ARG(T) object1, NBL_CONST_REF_ARG(T) object2) { return condition ? object1 : object2; } @@ -683,7 +683,7 @@ template NBL_PARTIAL_REQ_TOP(concepts::Boolean&& concepts::Vector&& concepts::Vector && (extent_v == extent_v)) struct select_helper&& concepts::Vector&& concepts::Vector && (extent_v == extent_v)) > { - NBL_CONSTEXPR_STATIC_FUNC T __call(NBL_CONST_REF_ARG(B) condition, NBL_CONST_REF_ARG(T) object1, NBL_CONST_REF_ARG(T) object2) + NBL_CONSTEXPR_STATIC T __call(NBL_CONST_REF_ARG(B) condition, NBL_CONST_REF_ARG(T) object1, NBL_CONST_REF_ARG(T) object2) { using traits = hlsl::vector_traits; array_get conditionGetter; @@ -701,7 +701,7 @@ struct select_helper&& concepts::V template struct undef_helper { - NBL_CONSTEXPR_STATIC_FUNC T __call() + NBL_CONSTEXPR_STATIC T __call() { T t; return t; diff --git a/include/nbl/builtin/hlsl/emulated/vector_t.hlsl b/include/nbl/builtin/hlsl/emulated/vector_t.hlsl index 3780ce001b..47eb573359 100644 --- a/include/nbl/builtin/hlsl/emulated/vector_t.hlsl +++ b/include/nbl/builtin/hlsl/emulated/vector_t.hlsl @@ -192,7 +192,7 @@ NBL_CONSTEXPR_FUNC vector operator##OP (vector;\ using component_t = ComponentType;\ -NBL_CONSTEXPR_STATIC_FUNC this_t create(this_t other)\ +NBL_CONSTEXPR_STATIC this_t create(this_t other)\ {\ CRTP output;\ [[unroll]]\ @@ -209,7 +209,7 @@ NBL_CONSTEXPR_FUNC component_t calcComponentSum() NBL_CONST_MEMBER_FUNC \ } #define NBL_EMULATED_FUNDAMENTAL_TYPE_VECTOR_CREATION_AND_COMPONENT_SUM NBL_EMULATED_VECTOR_CREATION_AND_COMPONENT_SUM \ -NBL_CONSTEXPR_STATIC_FUNC this_t create(vector other)\ +NBL_CONSTEXPR_STATIC this_t create(vector other)\ {\ this_t output;\ [[unroll]]\ @@ -356,7 +356,7 @@ struct emulated_vector, CRTP> : using component_t = emulated_float64_t; using this_t = emulated_vector; - NBL_CONSTEXPR_STATIC_FUNC this_t create(this_t other) + NBL_CONSTEXPR_STATIC this_t create(this_t other) { this_t output; @@ -367,7 +367,7 @@ struct emulated_vector, CRTP> : } template - NBL_CONSTEXPR_STATIC_FUNC this_t create(vector other) + NBL_CONSTEXPR_STATIC this_t create(vector other) { this_t output; @@ -505,7 +505,7 @@ namespace impl template struct static_cast_helper, vector, void> { - NBL_CONSTEXPR_STATIC_FUNC emulated_vector_t2 cast(NBL_CONST_REF_ARG(vector) vec) + NBL_CONSTEXPR_STATIC emulated_vector_t2 cast(NBL_CONST_REF_ARG(vector) vec) { emulated_vector_t2 output; output.x = _static_cast(vec.x); @@ -518,7 +518,7 @@ struct static_cast_helper, vector, void> template struct static_cast_helper, vector, void> { - NBL_CONSTEXPR_STATIC_FUNC emulated_vector_t3 cast(NBL_CONST_REF_ARG(vector) vec) + NBL_CONSTEXPR_STATIC emulated_vector_t3 cast(NBL_CONST_REF_ARG(vector) vec) { emulated_vector_t3 output; output.x = _static_cast(vec.x); @@ -532,7 +532,7 @@ struct static_cast_helper, vector, void> template struct static_cast_helper, vector, void> { - NBL_CONSTEXPR_STATIC_FUNC emulated_vector_t4 cast(NBL_CONST_REF_ARG(vector) vec) + NBL_CONSTEXPR_STATIC emulated_vector_t4 cast(NBL_CONST_REF_ARG(vector) vec) { emulated_vector_t4 output; output.x = _static_cast(vec.x); @@ -550,7 +550,7 @@ struct static_cast_helper, emulated_vector_t; using InputVecType = emulated_vector_t; - NBL_CONSTEXPR_STATIC_FUNC OutputVecType cast(NBL_CONST_REF_ARG(InputVecType) vec) + NBL_CONSTEXPR_STATIC OutputVecType cast(NBL_CONST_REF_ARG(InputVecType) vec) { array_get getter; array_set setter; @@ -569,7 +569,7 @@ struct static_cast_helper, emulated_vecto {\ using OutputVecType = emulated_vector_t##N ;\ using InputVecType = emulated_vector_t##N ;\ - NBL_CONSTEXPR_STATIC_FUNC OutputVecType cast(NBL_CONST_REF_ARG(InputVecType) vec)\ + NBL_CONSTEXPR_STATIC OutputVecType cast(NBL_CONST_REF_ARG(InputVecType) vec)\ {\ array_get getter;\ array_set setter;\ diff --git a/include/nbl/builtin/hlsl/morton.hlsl b/include/nbl/builtin/hlsl/morton.hlsl index d570e249c8..35ce511359 100644 --- a/include/nbl/builtin/hlsl/morton.hlsl +++ b/include/nbl/builtin/hlsl/morton.hlsl @@ -115,7 +115,7 @@ struct Transcoder * * @param [in] decodedValue Cartesian coordinates to interleave and shift */ - NBL_CONSTEXPR_STATIC_FUNC portable_vector_t interleaveShift(NBL_CONST_REF_ARG(decode_t) decodedValue) + NBL_CONSTEXPR_STATIC portable_vector_t interleaveShift(NBL_CONST_REF_ARG(decode_t) decodedValue) { left_shift_operator > leftShift; portable_vector_t interleaved = _static_cast >(decodedValue) & coding_mask_v; @@ -144,7 +144,7 @@ struct Transcoder * * @param [in] decodedValue Cartesian coordinates to encode */ - NBL_CONSTEXPR_STATIC_FUNC encode_t encode(NBL_CONST_REF_ARG(decode_t) decodedValue) + NBL_CONSTEXPR_STATIC encode_t encode(NBL_CONST_REF_ARG(decode_t) decodedValue) { const portable_vector_t interleaveShifted = interleaveShift(decodedValue); @@ -165,7 +165,7 @@ struct Transcoder * * @param [in] encodedValue Representation of a Morton code (binary code, not the morton class defined below) */ - NBL_CONSTEXPR_STATIC_FUNC decode_t decode(NBL_CONST_REF_ARG(encode_t) encodedValue) + NBL_CONSTEXPR_STATIC decode_t decode(NBL_CONST_REF_ARG(encode_t) encodedValue) { arithmetic_right_shift_operator encodedRightShift; portable_vector_t decoded; @@ -213,7 +213,7 @@ template struct Equals { template) - NBL_CONSTEXPR_STATIC_FUNC vector __call(NBL_CONST_REF_ARG(storage_t) value, NBL_CONST_REF_ARG(portable_vector_t) rhs) + NBL_CONSTEXPR_STATIC vector __call(NBL_CONST_REF_ARG(storage_t) value, NBL_CONST_REF_ARG(portable_vector_t) rhs) { const portable_vector_t InterleaveMasks = NBL_MORTON_INTERLEAVE_MASKS(storage_t, D, Bits, ); const portable_vector_t zeros = _static_cast >(truncate >(vector(0,0,0,0))); @@ -229,7 +229,7 @@ template struct Equals { template) - NBL_CONSTEXPR_STATIC_FUNC vector __call(NBL_CONST_REF_ARG(storage_t) value, NBL_CONST_REF_ARG(vector) rhs) + NBL_CONSTEXPR_STATIC vector __call(NBL_CONST_REF_ARG(storage_t) value, NBL_CONST_REF_ARG(vector) rhs) { using right_sign_t = conditional_t, make_unsigned_t >; const portable_vector_t interleaved = _static_cast >(Transcoder::interleaveShift(rhs)); @@ -248,7 +248,7 @@ template { template) - NBL_CONSTEXPR_STATIC_FUNC vector __call(NBL_CONST_REF_ARG(storage_t) value, NBL_CONST_REF_ARG(portable_vector_t) rhs) + NBL_CONSTEXPR_STATIC vector __call(NBL_CONST_REF_ARG(storage_t) value, NBL_CONST_REF_ARG(portable_vector_t) rhs) { const portable_vector_t InterleaveMasks = NBL_MORTON_INTERLEAVE_MASKS(storage_t, D, Bits, ); const portable_vector_t SignMasks = NBL_MORTON_SIGN_MASKS(storage_t, D, Bits); @@ -279,7 +279,7 @@ template { template) - NBL_CONSTEXPR_STATIC_FUNC vector __call(NBL_CONST_REF_ARG(storage_t) value, NBL_CONST_REF_ARG(vector) rhs) + NBL_CONSTEXPR_STATIC vector __call(NBL_CONST_REF_ARG(storage_t) value, NBL_CONST_REF_ARG(vector) rhs) { using right_sign_t = conditional_t, make_unsigned_t >; const portable_vector_t interleaved = _static_cast >(Transcoder::interleaveShift(rhs)); @@ -327,7 +327,7 @@ struct code * @param [in] cartesian Coordinates to encode. Signedness MUST match the signedness of this Morton code class */ template - NBL_CONSTEXPR_STATIC_FUNC enable_if_t && is_scalar_v && (is_signed_v == Signed) && (8 * sizeof(I) >= Bits), this_t> + NBL_CONSTEXPR_STATIC enable_if_t && is_scalar_v && (is_signed_v == Signed) && (8 * sizeof(I) >= Bits), this_t> create(NBL_CONST_REF_ARG(vector) cartesian) { this_t retVal; @@ -525,7 +525,7 @@ namespace impl template NBL_PARTIAL_REQ_TOP(concepts::IntegralScalar && 8 * sizeof(I) >= Bits) struct static_cast_helper, morton::code, Bits, D, _uint64_t> NBL_PARTIAL_REQ_BOT(concepts::IntegralScalar && 8 * sizeof(I) >= Bits) > { - NBL_CONSTEXPR_STATIC_FUNC vector cast(NBL_CONST_REF_ARG(morton::code, Bits, D, _uint64_t>) val) + NBL_CONSTEXPR_STATIC vector cast(NBL_CONST_REF_ARG(morton::code, Bits, D, _uint64_t>) val) { using storage_t = typename morton::code, Bits, D, _uint64_t>::storage_t; return morton::impl::Transcoder::decode(val.value); From e2401c6ff03dd39c58751a4e10d0a5d65065c23d Mon Sep 17 00:00:00 2001 From: kevyuu Date: Thu, 20 Nov 2025 15:55:51 +0700 Subject: [PATCH 041/157] Add template<> to signify specialization --- include/nbl/builtin/hlsl/emulated/int64_t.hlsl | 2 ++ 1 file changed, 2 insertions(+) diff --git a/include/nbl/builtin/hlsl/emulated/int64_t.hlsl b/include/nbl/builtin/hlsl/emulated/int64_t.hlsl index 8a3fd42faf..7f52638c61 100644 --- a/include/nbl/builtin/hlsl/emulated/int64_t.hlsl +++ b/include/nbl/builtin/hlsl/emulated/int64_t.hlsl @@ -370,12 +370,14 @@ constexpr inline emulated_int64_base emulated_int64_base::operat return leftShift(*this, bits); } +template<> constexpr inline emulated_uint64_t emulated_uint64_t::operator>>(uint32_t bits) const { arithmetic_right_shift_operator rightShift; return rightShift(*this, bits); } +template<> constexpr inline emulated_int64_t emulated_int64_t::operator>>(uint32_t bits) const { arithmetic_right_shift_operator rightShift; From 07f7a4acf59637f0641ed5ac485a55c28befd07b Mon Sep 17 00:00:00 2001 From: kevyuu Date: Thu, 20 Nov 2025 15:56:13 +0700 Subject: [PATCH 042/157] Remove duplicate partial specialization. --- include/nbl/builtin/hlsl/type_traits.hlsl | 3 --- 1 file changed, 3 deletions(-) diff --git a/include/nbl/builtin/hlsl/type_traits.hlsl b/include/nbl/builtin/hlsl/type_traits.hlsl index fb05e11fe2..bf2a35ede9 100644 --- a/include/nbl/builtin/hlsl/type_traits.hlsl +++ b/include/nbl/builtin/hlsl/type_traits.hlsl @@ -855,9 +855,6 @@ struct float_of_size<8> template using float_of_size_t = typename float_of_size::type; -template -struct extent, 0> : integral_constant {}; - template struct extent, 0> : integral_constant {}; From 42baa6c7eb7d78234d78ae12aba5d82ccdc32447 Mon Sep 17 00:00:00 2001 From: kevyuu Date: Thu, 20 Nov 2025 15:56:54 +0700 Subject: [PATCH 043/157] Change NBL_CONSTEXPR_STATIC_FUNC to NBL_CONSTEXPR_STATIC --- .../nbl/builtin/hlsl/emulated/int64_t.hlsl | 48 +++++++++++++------ 1 file changed, 33 insertions(+), 15 deletions(-) diff --git a/include/nbl/builtin/hlsl/emulated/int64_t.hlsl b/include/nbl/builtin/hlsl/emulated/int64_t.hlsl index 7f52638c61..89c9e2e733 100644 --- a/include/nbl/builtin/hlsl/emulated/int64_t.hlsl +++ b/include/nbl/builtin/hlsl/emulated/int64_t.hlsl @@ -43,7 +43,7 @@ struct emulated_int64_base * * @param [in] _data Vector of `uint32_t` encoding the `uint64_t/int64_t` being emulated. Stored as little endian (first component are the lower 32 bits) */ - NBL_CONSTEXPR_STATIC_FUNC this_t create(NBL_CONST_REF_ARG(storage_t) _data) + NBL_CONSTEXPR_STATIC this_t create(NBL_CONST_REF_ARG(storage_t) _data) { this_t retVal; retVal.data = _data; @@ -56,7 +56,7 @@ struct emulated_int64_base * @param [in] lo Lowest 32 bits of the `uint64_t/int64_t` being emulated * @param [in] hi Highest 32 bits of the `uint64_t/int64_t` being emulated */ - NBL_CONSTEXPR_STATIC_FUNC this_t create(NBL_CONST_REF_ARG(uint32_t) lo, NBL_CONST_REF_ARG(uint32_t) hi) + NBL_CONSTEXPR_STATIC this_t create(NBL_CONST_REF_ARG(uint32_t) lo, NBL_CONST_REF_ARG(uint32_t) hi) { return create(storage_t(lo, hi)); } @@ -114,6 +114,24 @@ struct emulated_int64_base constexpr inline this_t operator<<(uint32_t bits) const; constexpr inline this_t operator>>(uint32_t bits) const; + constexpr inline this_t& operator&=(const this_t& val) + { + data &= val.data; + return *this; + } + + constexpr inline this_t& operator|=(const this_t& val) + { + data |= val.data; + return *this; + } + + constexpr inline this_t& operator^=(const this_t& val) + { + data ^= val.data; + return *this; + } + #endif // ------------------------------------------------------- ARITHMETIC OPERATORS ------------------------------------------------- @@ -191,7 +209,7 @@ struct static_cast_helper, emulated_int64_base; using From = emulated_int64_base; - NBL_CONSTEXPR_STATIC_FUNC To cast(NBL_CONST_REF_ARG(From) other) + NBL_CONSTEXPR_STATIC To cast(NBL_CONST_REF_ARG(From) other) { To retVal; retVal.data = other.data; @@ -206,7 +224,7 @@ struct static_cast_helper NBL_PARTIAL_REQ_BOT(con using From = emulated_int64_base; // Return only the lowest bits - NBL_CONSTEXPR_STATIC_FUNC To cast(NBL_CONST_REF_ARG(From) val) + NBL_CONSTEXPR_STATIC To cast(NBL_CONST_REF_ARG(From) val) { return _static_cast(val.data.x); } @@ -218,7 +236,7 @@ struct static_cast_helper NBL_PARTIAL_REQ_BOT(con using To = I; using From = emulated_int64_base; - NBL_CONSTEXPR_STATIC_FUNC To cast(NBL_CONST_REF_ARG(From) val) + NBL_CONSTEXPR_STATIC To cast(NBL_CONST_REF_ARG(From) val) { return bit_cast(val.data); } @@ -231,7 +249,7 @@ struct static_cast_helper, I NBL_PARTIAL_REQ_BOT(con using From = I; // Set only lower bits - NBL_CONSTEXPR_STATIC_FUNC To cast(NBL_CONST_REF_ARG(From) i) + NBL_CONSTEXPR_STATIC To cast(NBL_CONST_REF_ARG(From) i) { return To::create(_static_cast(i), uint32_t(0)); } @@ -243,7 +261,7 @@ struct static_cast_helper, I NBL_PARTIAL_REQ_BOT(con using To = emulated_int64_base; using From = I; - NBL_CONSTEXPR_STATIC_FUNC To cast(NBL_CONST_REF_ARG(From) i) + NBL_CONSTEXPR_STATIC To cast(NBL_CONST_REF_ARG(From) i) { // `bit_cast` blocked by GLM vectors using a union #ifndef __HLSL_VERSION @@ -417,13 +435,13 @@ struct minus > }; template<> -NBL_CONSTEXPR_INLINE emulated_uint64_t plus::identity = _static_cast(uint64_t(0)); +NBL_CONSTEXPR emulated_uint64_t plus::identity = _static_cast(uint64_t(0)); template<> -NBL_CONSTEXPR_INLINE emulated_int64_t plus::identity = _static_cast(int64_t(0)); +NBL_CONSTEXPR emulated_int64_t plus::identity = _static_cast(int64_t(0)); template<> -NBL_CONSTEXPR_INLINE emulated_uint64_t minus::identity = _static_cast(uint64_t(0)); +NBL_CONSTEXPR emulated_uint64_t minus::identity = _static_cast(uint64_t(0)); template<> -NBL_CONSTEXPR_INLINE emulated_int64_t minus::identity = _static_cast(int64_t(0)); +NBL_CONSTEXPR emulated_int64_t minus::identity = _static_cast(int64_t(0)); // --------------------------------- Compound assignment operators ------------------------------------------ // Specializations of the structs found in functional.hlsl @@ -457,13 +475,13 @@ struct minus_assign > }; template<> -NBL_CONSTEXPR_INLINE emulated_uint64_t plus_assign::identity = plus::identity; +NBL_CONSTEXPR emulated_uint64_t plus_assign::identity = plus::identity; template<> -NBL_CONSTEXPR_INLINE emulated_int64_t plus_assign::identity = plus::identity; +NBL_CONSTEXPR emulated_int64_t plus_assign::identity = plus::identity; template<> -NBL_CONSTEXPR_INLINE emulated_uint64_t minus_assign::identity = minus::identity; +NBL_CONSTEXPR emulated_uint64_t minus_assign::identity = minus::identity; template<> -NBL_CONSTEXPR_INLINE emulated_int64_t minus_assign::identity = minus::identity; +NBL_CONSTEXPR emulated_int64_t minus_assign::identity = minus::identity; // ------------------------------------------------ TYPE TRAITS SATISFIED ----------------------------------------------------- From 22e78eb4bbd9ae1675ec3812fe6773dc34ecc508 Mon Sep 17 00:00:00 2001 From: kevyuu Date: Thu, 20 Nov 2025 15:57:53 +0700 Subject: [PATCH 044/157] Fix concatenation of 'operator' and OP with '##' since operatorOP is not a single token --- include/nbl/builtin/hlsl/emulated/vector_t.hlsl | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/include/nbl/builtin/hlsl/emulated/vector_t.hlsl b/include/nbl/builtin/hlsl/emulated/vector_t.hlsl index 47eb573359..cdeddeb105 100644 --- a/include/nbl/builtin/hlsl/emulated/vector_t.hlsl +++ b/include/nbl/builtin/hlsl/emulated/vector_t.hlsl @@ -133,17 +133,17 @@ struct emulated_vector; // Generic ComponentType vectors still have to be partial specialized based on whether they're fundamental and/or integral #define NBL_EMULATED_VECTOR_UNARY_OPERATOR(OP)\ -NBL_CONSTEXPR_FUNC this_t operator##OP() NBL_CONST_MEMBER_FUNC \ +NBL_CONSTEXPR_FUNC this_t operator OP() NBL_CONST_MEMBER_FUNC \ {\ this_t output;\ [[unroll]]\ for (uint32_t i = 0u; i < CRTP::Dimension; ++i)\ - output.setComponent(i, this_t::getComponent(i).operator##OP());\ + output.setComponent(i, this_t::getComponent(i).operator OP());\ return output;\ } #define NBL_EMULATED_VECTOR_ARITHMETIC_OPERATOR(OP)\ -NBL_CONSTEXPR_FUNC this_t operator##OP (component_t val) NBL_CONST_MEMBER_FUNC \ +NBL_CONSTEXPR_FUNC this_t operator OP (component_t val) NBL_CONST_MEMBER_FUNC \ {\ this_t output;\ [[unroll]]\ @@ -151,7 +151,7 @@ NBL_CONSTEXPR_FUNC this_t operator##OP (component_t val) NBL_CONST_MEMBER_FUNC \ output.setComponent(i, this_t::getComponent(i) OP val);\ return output;\ }\ -NBL_CONSTEXPR_FUNC this_t operator##OP (this_t other) NBL_CONST_MEMBER_FUNC \ +NBL_CONSTEXPR_FUNC this_t operator OP (this_t other) NBL_CONST_MEMBER_FUNC \ {\ this_t output;\ [[unroll]]\ @@ -161,7 +161,7 @@ NBL_CONSTEXPR_FUNC this_t operator##OP (this_t other) NBL_CONST_MEMBER_FUNC \ } #define NBL_EMULATED_FUNDAMENTAL_TYPE_VECTOR_ARITHMETIC_OPERATOR(OP) NBL_EMULATED_VECTOR_ARITHMETIC_OPERATOR(OP)\ -NBL_CONSTEXPR_FUNC this_t operator##OP(vector other) NBL_CONST_MEMBER_FUNC \ +NBL_CONSTEXPR_FUNC this_t operator OP(vector other) NBL_CONST_MEMBER_FUNC \ {\ this_t output;\ [[unroll]]\ @@ -170,7 +170,7 @@ NBL_CONSTEXPR_FUNC this_t operator##OP(vector othe return output;\ } -#define NBL_EMULATED_VECTOR_COMPARISON_OPERATOR(OP) NBL_CONSTEXPR_FUNC vector operator##OP (this_t other) NBL_CONST_MEMBER_FUNC \ +#define NBL_EMULATED_VECTOR_COMPARISON_OPERATOR(OP) NBL_CONSTEXPR_FUNC vector operator OP (this_t other) NBL_CONST_MEMBER_FUNC \ {\ vector output;\ [[unroll]]\ @@ -180,7 +180,7 @@ NBL_CONSTEXPR_FUNC this_t operator##OP(vector othe } #define NBL_EMULATED_FUNDAMENTAL_TYPE_VECTOR_COMPARISON_OPERATOR(OP) NBL_EMULATED_VECTOR_COMPARISON_OPERATOR(OP)\ -NBL_CONSTEXPR_FUNC vector operator##OP (vector other) NBL_CONST_MEMBER_FUNC \ +NBL_CONSTEXPR_FUNC vector operator OP (vector other) NBL_CONST_MEMBER_FUNC \ {\ vector output;\ [[unroll]]\ From 8daf855e0a88f6d5b81b6b94b1b3426a4da211ea Mon Sep 17 00:00:00 2001 From: kevyuu Date: Thu, 20 Nov 2025 16:31:27 +0700 Subject: [PATCH 045/157] 'equals' to 'equal' --- include/nbl/builtin/hlsl/morton.hlsl | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/include/nbl/builtin/hlsl/morton.hlsl b/include/nbl/builtin/hlsl/morton.hlsl index 35ce511359..696124ae0c 100644 --- a/include/nbl/builtin/hlsl/morton.hlsl +++ b/include/nbl/builtin/hlsl/morton.hlsl @@ -207,10 +207,10 @@ template && is_signed_v == Signed && ((BitsAlreadySpread && sizeof(I) == sizeof(storage_t)) || (!BitsAlreadySpread && 8 * sizeof(I) == mpl::max_v, uint64_t(16)>)); template -struct Equals; +struct Equal; template -struct Equals +struct Equal { template) NBL_CONSTEXPR_STATIC vector __call(NBL_CONST_REF_ARG(storage_t) value, NBL_CONST_REF_ARG(portable_vector_t) rhs) @@ -226,14 +226,14 @@ struct Equals }; template -struct Equals +struct Equal { template) NBL_CONSTEXPR_STATIC vector __call(NBL_CONST_REF_ARG(storage_t) value, NBL_CONST_REF_ARG(vector) rhs) { using right_sign_t = conditional_t, make_unsigned_t >; const portable_vector_t interleaved = _static_cast >(Transcoder::interleaveShift(rhs)); - return Equals::template __call(value, interleaved); + return Equal::template __call(value, interleaved); } }; @@ -291,13 +291,13 @@ template > > {}; template -struct LessEquals : BaseComparison > > {}; +struct LessEqual : BaseComparison > > {}; template struct GreaterThan : BaseComparison > > {}; template -struct GreaterEquals : BaseComparison > > {}; +struct GreaterEqual : BaseComparison > > {}; } //namespace impl @@ -470,7 +470,7 @@ struct code NBL_FUNC_REQUIRES(impl::Comparable) NBL_CONSTEXPR_FUNC vector equal(NBL_CONST_REF_ARG(vector) rhs) NBL_CONST_MEMBER_FUNC { - return impl::Equals::template __call(value, rhs); + return impl::Equal::template __call(value, rhs); } NBL_CONSTEXPR_FUNC bool operator!=(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC @@ -494,9 +494,9 @@ struct code template) - NBL_CONSTEXPR_FUNC vector lessThanEquals(NBL_CONST_REF_ARG(vector) rhs) NBL_CONST_MEMBER_FUNC + NBL_CONSTEXPR_FUNC vector lessThanEqual(NBL_CONST_REF_ARG(vector) rhs) NBL_CONST_MEMBER_FUNC { - return impl::LessEquals::template __call(value, rhs); + return impl::LessEqual::template __call(value, rhs); } template) - NBL_CONSTEXPR_FUNC vector greaterThanEquals(NBL_CONST_REF_ARG(vector) rhs) NBL_CONST_MEMBER_FUNC + NBL_CONSTEXPR_FUNC vector greaterThanEqual(NBL_CONST_REF_ARG(vector) rhs) NBL_CONST_MEMBER_FUNC { - return impl::GreaterEquals::template __call(value, rhs); + return impl::GreaterEqual::template __call(value, rhs); } }; From 831244f2f407763511f47d9332560e5d2bf9d308 Mon Sep 17 00:00:00 2001 From: kevyuu Date: Thu, 20 Nov 2025 16:31:41 +0700 Subject: [PATCH 046/157] Pass vec by value not ref --- include/nbl/builtin/hlsl/cpp_compat/promote.hlsl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/nbl/builtin/hlsl/cpp_compat/promote.hlsl b/include/nbl/builtin/hlsl/cpp_compat/promote.hlsl index 0afe214de7..27461d5949 100644 --- a/include/nbl/builtin/hlsl/cpp_compat/promote.hlsl +++ b/include/nbl/builtin/hlsl/cpp_compat/promote.hlsl @@ -15,7 +15,7 @@ namespace impl template struct Promote { - NBL_CONSTEXPR_FUNC T operator()(NBL_CONST_REF_ARG(U) v) + NBL_CONSTEXPR_FUNC T operator()(const U v) { return T(v); } From a560180f8ef3962921e3060423250fbb37ecb0ea Mon Sep 17 00:00:00 2001 From: kevyuu Date: Thu, 20 Nov 2025 16:41:08 +0700 Subject: [PATCH 047/157] Use truncate to truncate --- include/nbl/builtin/hlsl/morton.hlsl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/nbl/builtin/hlsl/morton.hlsl b/include/nbl/builtin/hlsl/morton.hlsl index 696124ae0c..4512774b14 100644 --- a/include/nbl/builtin/hlsl/morton.hlsl +++ b/include/nbl/builtin/hlsl/morton.hlsl @@ -118,7 +118,7 @@ struct Transcoder NBL_CONSTEXPR_STATIC portable_vector_t interleaveShift(NBL_CONST_REF_ARG(decode_t) decodedValue) { left_shift_operator > leftShift; - portable_vector_t interleaved = _static_cast >(decodedValue) & coding_mask_v; + portable_vector_t interleaved = truncate >(decodedValue) & coding_mask_v; #define ENCODE_LOOP_ITERATION(I) NBL_IF_CONSTEXPR(Bits > (uint16_t(1) << I))\ {\ From e320ed8f8f620d1c133b14edf8ec7b96bfb39956 Mon Sep 17 00:00:00 2001 From: kevyuu Date: Sat, 22 Nov 2025 12:30:57 +0700 Subject: [PATCH 048/157] Make morton compile --- include/nbl/builtin/hlsl/morton.hlsl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/nbl/builtin/hlsl/morton.hlsl b/include/nbl/builtin/hlsl/morton.hlsl index 4512774b14..696124ae0c 100644 --- a/include/nbl/builtin/hlsl/morton.hlsl +++ b/include/nbl/builtin/hlsl/morton.hlsl @@ -118,7 +118,7 @@ struct Transcoder NBL_CONSTEXPR_STATIC portable_vector_t interleaveShift(NBL_CONST_REF_ARG(decode_t) decodedValue) { left_shift_operator > leftShift; - portable_vector_t interleaved = truncate >(decodedValue) & coding_mask_v; + portable_vector_t interleaved = _static_cast >(decodedValue) & coding_mask_v; #define ENCODE_LOOP_ITERATION(I) NBL_IF_CONSTEXPR(Bits > (uint16_t(1) << I))\ {\ From 83d27c9b1050be68bddd5c5b4abed0f2c3fca94c Mon Sep 17 00:00:00 2001 From: kevyuu Date: Thu, 27 Nov 2025 20:55:09 +0700 Subject: [PATCH 049/157] NBL_CONSTEXPR_INLINE_VAR for template constexpr variable --- include/nbl/builtin/hlsl/cpp_compat/basic.h | 2 ++ include/nbl/builtin/hlsl/emulated/int64_t.hlsl | 16 ++++++++-------- 2 files changed, 10 insertions(+), 8 deletions(-) diff --git a/include/nbl/builtin/hlsl/cpp_compat/basic.h b/include/nbl/builtin/hlsl/cpp_compat/basic.h index f871e2a23d..84d7b9d8b0 100644 --- a/include/nbl/builtin/hlsl/cpp_compat/basic.h +++ b/include/nbl/builtin/hlsl/cpp_compat/basic.h @@ -13,6 +13,7 @@ #define NBL_CONSTEXPR_STATIC constexpr static #define NBL_CONSTEXPR_STATIC_INLINE constexpr static inline #define NBL_CONSTEXPR_INLINE_FUNC constexpr inline +#define NBL_CONSTEXPR_INLINE_VAR constexpr inline #define NBL_CONSTEXPR_FORCED_INLINE_FUNC NBL_FORCE_INLINE constexpr #define NBL_CONST_MEMBER_FUNC const #define NBL_IF_CONSTEXPR(...) if constexpr (__VA_ARGS__) @@ -44,6 +45,7 @@ namespace nbl::hlsl #define NBL_CONSTEXPR_STATIC const static #define NBL_CONSTEXPR_STATIC_INLINE const static #define NBL_CONSTEXPR_INLINE_FUNC inline +#define NBL_CONSTEXPR_INLINE_VAR inline #define NBL_CONSTEXPR_FORCED_INLINE_FUNC inline #define NBL_CONST_MEMBER_FUNC #define NBL_IF_CONSTEXPR(...) if (__VA_ARGS__) diff --git a/include/nbl/builtin/hlsl/emulated/int64_t.hlsl b/include/nbl/builtin/hlsl/emulated/int64_t.hlsl index 89c9e2e733..2214835df9 100644 --- a/include/nbl/builtin/hlsl/emulated/int64_t.hlsl +++ b/include/nbl/builtin/hlsl/emulated/int64_t.hlsl @@ -435,13 +435,13 @@ struct minus > }; template<> -NBL_CONSTEXPR emulated_uint64_t plus::identity = _static_cast(uint64_t(0)); +NBL_CONSTEXPR_INLINE_VAR emulated_uint64_t plus::identity = _static_cast(uint64_t(0)); template<> -NBL_CONSTEXPR emulated_int64_t plus::identity = _static_cast(int64_t(0)); +NBL_CONSTEXPR_INLINE_VAR emulated_int64_t plus::identity = _static_cast(int64_t(0)); template<> -NBL_CONSTEXPR emulated_uint64_t minus::identity = _static_cast(uint64_t(0)); +NBL_CONSTEXPR_INLINE_VAR emulated_uint64_t minus::identity = _static_cast(uint64_t(0)); template<> -NBL_CONSTEXPR emulated_int64_t minus::identity = _static_cast(int64_t(0)); +NBL_CONSTEXPR_INLINE_VAR emulated_int64_t minus::identity = _static_cast(int64_t(0)); // --------------------------------- Compound assignment operators ------------------------------------------ // Specializations of the structs found in functional.hlsl @@ -475,13 +475,13 @@ struct minus_assign > }; template<> -NBL_CONSTEXPR emulated_uint64_t plus_assign::identity = plus::identity; +NBL_CONSTEXPR_INLINE_VAR emulated_uint64_t plus_assign::identity = plus::identity; template<> -NBL_CONSTEXPR emulated_int64_t plus_assign::identity = plus::identity; +NBL_CONSTEXPR_INLINE_VAR emulated_int64_t plus_assign::identity = plus::identity; template<> -NBL_CONSTEXPR emulated_uint64_t minus_assign::identity = minus::identity; +NBL_CONSTEXPR_INLINE_VAR emulated_uint64_t minus_assign::identity = minus::identity; template<> -NBL_CONSTEXPR emulated_int64_t minus_assign::identity = minus::identity; +NBL_CONSTEXPR_INLINE_VAR emulated_int64_t minus_assign::identity = minus::identity; // ------------------------------------------------ TYPE TRAITS SATISFIED ----------------------------------------------------- From c49691656ac8f54e5fbcaa298c2f75050235f682 Mon Sep 17 00:00:00 2001 From: kevyuu Date: Fri, 28 Nov 2025 17:07:48 +0700 Subject: [PATCH 050/157] Promote and Truncate take vector and scalar by value and the rest by reference --- include/nbl/builtin/hlsl/cpp_compat/promote.hlsl | 14 ++++++++------ include/nbl/builtin/hlsl/cpp_compat/truncate.hlsl | 9 +++++---- 2 files changed, 13 insertions(+), 10 deletions(-) diff --git a/include/nbl/builtin/hlsl/cpp_compat/promote.hlsl b/include/nbl/builtin/hlsl/cpp_compat/promote.hlsl index 27461d5949..e267895ed5 100644 --- a/include/nbl/builtin/hlsl/cpp_compat/promote.hlsl +++ b/include/nbl/builtin/hlsl/cpp_compat/promote.hlsl @@ -15,7 +15,7 @@ namespace impl template struct Promote { - NBL_CONSTEXPR_FUNC T operator()(const U v) + NBL_CONSTEXPR_FUNC T operator()(NBL_CONST_REF_ARG(U) v) { return T(v); } @@ -26,7 +26,7 @@ struct Promote template struct Promote, U> { - NBL_CONSTEXPR_FUNC enable_if_t::value && is_scalar::value, vector > operator()(NBL_CONST_REF_ARG(U) v) + NBL_CONSTEXPR_FUNC enable_if_t::value && is_scalar::value, vector > operator()(const U v) { vector promoted = {Scalar(v)}; return promoted; @@ -36,7 +36,7 @@ struct Promote, U> template struct Promote, U> { - NBL_CONSTEXPR_FUNC enable_if_t::value && is_scalar::value, vector > operator()(NBL_CONST_REF_ARG(U) v) + NBL_CONSTEXPR_FUNC enable_if_t::value && is_scalar::value, vector > operator()(const U v) { vector promoted = {Scalar(v), Scalar(v)}; return promoted; @@ -46,7 +46,7 @@ struct Promote, U> template struct Promote, U> { - NBL_CONSTEXPR_FUNC enable_if_t::value && is_scalar::value, vector > operator()(NBL_CONST_REF_ARG(U) v) + NBL_CONSTEXPR_FUNC enable_if_t::value && is_scalar::value, vector > operator()(const U v) { vector promoted = {Scalar(v), Scalar(v), Scalar(v)}; return promoted; @@ -56,7 +56,7 @@ struct Promote, U> template struct Promote, U> { - NBL_CONSTEXPR_FUNC enable_if_t::value && is_scalar::value, vector > operator()(NBL_CONST_REF_ARG(U) v) + NBL_CONSTEXPR_FUNC enable_if_t::value && is_scalar::value, vector > operator()(const U v) { vector promoted = {Scalar(v), Scalar(v), Scalar(v), Scalar(v)}; return promoted; @@ -67,13 +67,15 @@ struct Promote, U> } +// TODO(kevinyu): Should we specialize this for vector and scalar to take argument by value instead of reference? template -NBL_CONSTEXPR_FUNC T promote(const U v) // TODO: use NBL_CONST_REF_ARG(U) instead of U v (circular ref) +NBL_CONSTEXPR_FUNC T promote(NBL_CONST_REF_ARG(U) v) { impl::Promote _promote; return _promote(v); } + } } diff --git a/include/nbl/builtin/hlsl/cpp_compat/truncate.hlsl b/include/nbl/builtin/hlsl/cpp_compat/truncate.hlsl index a95df183be..1e6b5b0f94 100644 --- a/include/nbl/builtin/hlsl/cpp_compat/truncate.hlsl +++ b/include/nbl/builtin/hlsl/cpp_compat/truncate.hlsl @@ -24,7 +24,7 @@ struct Truncate template NBL_PARTIAL_REQ_TOP(concepts::Scalar) struct Truncate, vector NBL_PARTIAL_REQ_BOT(concepts::Scalar) > { - NBL_CONSTEXPR_FUNC vector operator()(NBL_CONST_REF_ARG(vector) v) + NBL_CONSTEXPR_FUNC vector operator()(const vector v) { vector truncated = { v[0] }; return truncated; @@ -34,7 +34,7 @@ struct Truncate, vector NBL_PARTIAL_REQ_BOT(concept template NBL_PARTIAL_REQ_TOP(concepts::Scalar && N >= 2) struct Truncate, vector NBL_PARTIAL_REQ_BOT(concepts::Scalar && N >= 2) > { - NBL_CONSTEXPR_FUNC vector operator()(NBL_CONST_REF_ARG(vector) v) + NBL_CONSTEXPR_FUNC vector operator()(const vector v) { vector truncated = { v[0], v[1]}; return truncated; @@ -44,7 +44,7 @@ struct Truncate, vector NBL_PARTIAL_REQ_BOT(concept template NBL_PARTIAL_REQ_TOP(concepts::Scalar&& N >= 3) struct Truncate, vector NBL_PARTIAL_REQ_BOT(concepts::Scalar&& N >= 3) > { - NBL_CONSTEXPR_FUNC vector operator()(NBL_CONST_REF_ARG(vector) v) + NBL_CONSTEXPR_FUNC vector operator()(const vector v) { vector truncated = { v[0], v[1], v[2] }; return truncated; @@ -54,7 +54,7 @@ struct Truncate, vector NBL_PARTIAL_REQ_BOT(concept template NBL_PARTIAL_REQ_TOP(concepts::Scalar&& N >= 4) struct Truncate, vector NBL_PARTIAL_REQ_BOT(concepts::Scalar&& N >= 4) > { - NBL_CONSTEXPR_FUNC vector operator()(NBL_CONST_REF_ARG(vector) v) + NBL_CONSTEXPR_FUNC vector operator()(const vector v) { vector truncated = { v[0], v[1], v[2], v[3] }; return truncated; @@ -63,6 +63,7 @@ struct Truncate, vector NBL_PARTIAL_REQ_BOT(concept } //namespace impl +// TODO(kevinyu): Should we specialize this for vector and scalar to take argument by value instead of reference? template NBL_CONSTEXPR_FUNC T truncate(NBL_CONST_REF_ARG(U) v) { From d7bd053e5c129bdad2ced9a399d182a51ba239e6 Mon Sep 17 00:00:00 2001 From: kevyuu Date: Fri, 28 Nov 2025 17:10:57 +0700 Subject: [PATCH 051/157] Remove promote and truncate comment about specialization --- include/nbl/builtin/hlsl/cpp_compat/promote.hlsl | 1 - include/nbl/builtin/hlsl/cpp_compat/truncate.hlsl | 1 - 2 files changed, 2 deletions(-) diff --git a/include/nbl/builtin/hlsl/cpp_compat/promote.hlsl b/include/nbl/builtin/hlsl/cpp_compat/promote.hlsl index e267895ed5..6e75a55b1b 100644 --- a/include/nbl/builtin/hlsl/cpp_compat/promote.hlsl +++ b/include/nbl/builtin/hlsl/cpp_compat/promote.hlsl @@ -67,7 +67,6 @@ struct Promote, U> } -// TODO(kevinyu): Should we specialize this for vector and scalar to take argument by value instead of reference? template NBL_CONSTEXPR_FUNC T promote(NBL_CONST_REF_ARG(U) v) { diff --git a/include/nbl/builtin/hlsl/cpp_compat/truncate.hlsl b/include/nbl/builtin/hlsl/cpp_compat/truncate.hlsl index 1e6b5b0f94..63e0ab7b93 100644 --- a/include/nbl/builtin/hlsl/cpp_compat/truncate.hlsl +++ b/include/nbl/builtin/hlsl/cpp_compat/truncate.hlsl @@ -63,7 +63,6 @@ struct Truncate, vector NBL_PARTIAL_REQ_BOT(concept } //namespace impl -// TODO(kevinyu): Should we specialize this for vector and scalar to take argument by value instead of reference? template NBL_CONSTEXPR_FUNC T truncate(NBL_CONST_REF_ARG(U) v) { From 3f3a23e13548f0d140c1dfa86a4e877b0d29214f Mon Sep 17 00:00:00 2001 From: kevyuu Date: Fri, 28 Nov 2025 17:11:12 +0700 Subject: [PATCH 052/157] Add comment to rename log2 --- include/nbl/builtin/hlsl/mpl.hlsl | 1 + 1 file changed, 1 insertion(+) diff --git a/include/nbl/builtin/hlsl/mpl.hlsl b/include/nbl/builtin/hlsl/mpl.hlsl index 4594662969..7de4983c8e 100644 --- a/include/nbl/builtin/hlsl/mpl.hlsl +++ b/include/nbl/builtin/hlsl/mpl.hlsl @@ -110,6 +110,7 @@ struct round_up_to_pot : integral_constant NBL_CONSTEXPR uint64_t round_up_to_pot_v = round_up_to_pot::value; +// TODO: should rename log2 to log2_floor template struct round_down_to_pot : integral_constant > {}; template From 8dcdfdd930a99487134e65de707bb2d675cf5446 Mon Sep 17 00:00:00 2001 From: kevyuu Date: Fri, 28 Nov 2025 17:17:54 +0700 Subject: [PATCH 053/157] Change dimension type from uint16_t to int32_t --- include/nbl/builtin/hlsl/cpp_compat/truncate.hlsl | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/include/nbl/builtin/hlsl/cpp_compat/truncate.hlsl b/include/nbl/builtin/hlsl/cpp_compat/truncate.hlsl index 63e0ab7b93..38467942f9 100644 --- a/include/nbl/builtin/hlsl/cpp_compat/truncate.hlsl +++ b/include/nbl/builtin/hlsl/cpp_compat/truncate.hlsl @@ -21,7 +21,7 @@ struct Truncate } }; -template NBL_PARTIAL_REQ_TOP(concepts::Scalar) +template NBL_PARTIAL_REQ_TOP(concepts::Scalar) struct Truncate, vector NBL_PARTIAL_REQ_BOT(concepts::Scalar) > { NBL_CONSTEXPR_FUNC vector operator()(const vector v) @@ -31,7 +31,7 @@ struct Truncate, vector NBL_PARTIAL_REQ_BOT(concept } }; -template NBL_PARTIAL_REQ_TOP(concepts::Scalar && N >= 2) +template NBL_PARTIAL_REQ_TOP(concepts::Scalar && N >= 2) struct Truncate, vector NBL_PARTIAL_REQ_BOT(concepts::Scalar && N >= 2) > { NBL_CONSTEXPR_FUNC vector operator()(const vector v) @@ -41,7 +41,7 @@ struct Truncate, vector NBL_PARTIAL_REQ_BOT(concept } }; -template NBL_PARTIAL_REQ_TOP(concepts::Scalar&& N >= 3) +template NBL_PARTIAL_REQ_TOP(concepts::Scalar&& N >= 3) struct Truncate, vector NBL_PARTIAL_REQ_BOT(concepts::Scalar&& N >= 3) > { NBL_CONSTEXPR_FUNC vector operator()(const vector v) @@ -51,7 +51,7 @@ struct Truncate, vector NBL_PARTIAL_REQ_BOT(concept } }; -template NBL_PARTIAL_REQ_TOP(concepts::Scalar&& N >= 4) +template NBL_PARTIAL_REQ_TOP(concepts::Scalar&& N >= 4) struct Truncate, vector NBL_PARTIAL_REQ_BOT(concepts::Scalar&& N >= 4) > { NBL_CONSTEXPR_FUNC vector operator()(const vector v) From 92cd9e775f49d87c46a8c24228dcfbfa909e0866 Mon Sep 17 00:00:00 2001 From: kevyuu Date: Fri, 28 Nov 2025 18:21:10 +0700 Subject: [PATCH 054/157] Redefine some macro --- include/nbl/builtin/hlsl/cpp_compat/basic.h | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/include/nbl/builtin/hlsl/cpp_compat/basic.h b/include/nbl/builtin/hlsl/cpp_compat/basic.h index 84d7b9d8b0..bbb2a73ee7 100644 --- a/include/nbl/builtin/hlsl/cpp_compat/basic.h +++ b/include/nbl/builtin/hlsl/cpp_compat/basic.h @@ -9,6 +9,7 @@ #define ARROW -> #define NBL_CONSTEXPR constexpr // TODO: rename to NBL_CONSTEXPR_VAR +#define NBL_CONSTEXPR_INLINE constexpr inline #define NBL_CONSTEXPR_FUNC constexpr #define NBL_CONSTEXPR_STATIC constexpr static #define NBL_CONSTEXPR_STATIC_INLINE constexpr static inline @@ -16,6 +17,9 @@ #define NBL_CONSTEXPR_INLINE_VAR constexpr inline #define NBL_CONSTEXPR_FORCED_INLINE_FUNC NBL_FORCE_INLINE constexpr #define NBL_CONST_MEMBER_FUNC const +#define NBL_CONSTEXPR_FUNC_SCOPE_VAR constexpr +#define NBL_CONSTEXPR_OOL_MEMBER constexpr +#define NBL_CONSTEXPR_INLINE_OOL_MEMBER constexpr inline #define NBL_IF_CONSTEXPR(...) if constexpr (__VA_ARGS__) namespace nbl::hlsl @@ -41,13 +45,17 @@ namespace nbl::hlsl #define ARROW .arrow(). #define NBL_CONSTEXPR const static // TODO: rename to NBL_CONSTEXPR_VAR +#define NBL_CONSTEXPR_INLINE const static #define NBL_CONSTEXPR_FUNC #define NBL_CONSTEXPR_STATIC const static #define NBL_CONSTEXPR_STATIC_INLINE const static #define NBL_CONSTEXPR_INLINE_FUNC inline -#define NBL_CONSTEXPR_INLINE_VAR inline +#define NBL_CONSTEXPR_INLINE_VAR static const #define NBL_CONSTEXPR_FORCED_INLINE_FUNC inline #define NBL_CONST_MEMBER_FUNC +#define NBL_CONSTEXPR_FUNC_SCOPE_VAR const +#define NBL_CONSTEXPR_OOL_MEMBER const +#define NBL_CONSTEXPR_INLINE_OOL_MEMBER const #define NBL_IF_CONSTEXPR(...) if (__VA_ARGS__) namespace nbl From 7f6d8b82bdd3825e2ad99b9c60695ef7d1ed0c6f Mon Sep 17 00:00:00 2001 From: kevyuu Date: Fri, 28 Nov 2025 18:42:41 +0700 Subject: [PATCH 055/157] use const instead of static const for local variable in hlsl --- include/nbl/builtin/hlsl/functional.hlsl | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/include/nbl/builtin/hlsl/functional.hlsl b/include/nbl/builtin/hlsl/functional.hlsl index 76b527f6bd..fd23ad388c 100644 --- a/include/nbl/builtin/hlsl/functional.hlsl +++ b/include/nbl/builtin/hlsl/functional.hlsl @@ -321,7 +321,7 @@ struct left_shift_operator && conc { array_get getter; array_set setter; - NBL_CONSTEXPR_STATIC uint16_t extent = uint16_t(extent_v); + NBL_CONSTEXPR_FUNC_SCOPE_VAR uint16_t extent = uint16_t(extent_v); left_shift_operator leftShift; T shifted; [[unroll]] @@ -336,7 +336,7 @@ struct left_shift_operator && conc { array_get getter; array_set setter; - NBL_CONSTEXPR_STATIC uint16_t extent = uint16_t(extent_v); + NBL_CONSTEXPR_FUNC_SCOPE_VAR uint16_t extent = uint16_t(extent_v); left_shift_operator leftShift; T shifted; [[unroll]] @@ -351,7 +351,7 @@ struct left_shift_operator && conc { array_get getter; array_set setter; - NBL_CONSTEXPR_STATIC uint16_t extent = uint16_t(extent_v); + NBL_CONSTEXPR_FUNC_SCOPE_VAR uint16_t extent = uint16_t(extent_v); left_shift_operator leftShift; T shifted; [[unroll]] @@ -366,7 +366,7 @@ struct left_shift_operator && conc { array_get getter; array_set setter; - NBL_CONSTEXPR_STATIC uint16_t extent = uint16_t(extent_v); + NBL_CONSTEXPR_FUNC_SCOPE_VAR uint16_t extent = uint16_t(extent_v); left_shift_operator leftShift; T shifted; [[unroll]] @@ -416,7 +416,7 @@ struct arithmetic_right_shift_operator getter; array_set setter; - NBL_CONSTEXPR_STATIC uint16_t extent = uint16_t(extent_v); + NBL_CONSTEXPR_FUNC_SCOPE_VAR uint16_t extent = uint16_t(extent_v); arithmetic_right_shift_operator rightShift; T shifted; [[unroll]] @@ -431,7 +431,7 @@ struct arithmetic_right_shift_operator getter; array_set setter; - NBL_CONSTEXPR_STATIC uint16_t extent = uint16_t(extent_v); + NBL_CONSTEXPR_FUNC_SCOPE_VAR uint16_t extent = uint16_t(extent_v); arithmetic_right_shift_operator rightShift; T shifted; [[unroll]] @@ -446,7 +446,7 @@ struct arithmetic_right_shift_operator getter; array_set setter; - NBL_CONSTEXPR_STATIC uint16_t extent = uint16_t(extent_v); + NBL_CONSTEXPR_FUNC_SCOPE_VAR uint16_t extent = uint16_t(extent_v); arithmetic_right_shift_operator rightShift; T shifted; [[unroll]] @@ -461,7 +461,7 @@ struct arithmetic_right_shift_operator getter; array_set setter; - NBL_CONSTEXPR_STATIC uint16_t extent = uint16_t(extent_v); + NBL_CONSTEXPR_FUNC_SCOPE_VAR uint16_t extent = uint16_t(extent_v); arithmetic_right_shift_operator rightShift; T shifted; [[unroll]] From 1d9ce208cf873228ff7de438608b7b59af2506ca Mon Sep 17 00:00:00 2001 From: kevyuu Date: Fri, 28 Nov 2025 20:26:21 +0700 Subject: [PATCH 056/157] Rename NBL_CONSTEXPR_INLINE to NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR --- include/nbl/builtin/hlsl/bxdf/ndf/ggx.hlsl | 2 +- include/nbl/builtin/hlsl/cpp_compat/basic.h | 4 +- .../hlsl/cpp_compat/impl/intrinsics_impl.hlsl | 2 +- include/nbl/builtin/hlsl/math/functions.hlsl | 4 +- .../math/quadrature/gauss_legendre/impl.hlsl | 56 +++++++++---------- include/nbl/builtin/hlsl/mpl.hlsl | 24 ++++---- include/nbl/builtin/hlsl/numbers.hlsl | 28 +++++----- include/nbl/builtin/hlsl/type_traits.hlsl | 26 ++++----- .../hlsl/workgroup2/arithmetic_config.hlsl | 2 +- 9 files changed, 74 insertions(+), 74 deletions(-) diff --git a/include/nbl/builtin/hlsl/bxdf/ndf/ggx.hlsl b/include/nbl/builtin/hlsl/bxdf/ndf/ggx.hlsl index b27c892abe..40f64d9cf8 100644 --- a/include/nbl/builtin/hlsl/bxdf/ndf/ggx.hlsl +++ b/include/nbl/builtin/hlsl/bxdf/ndf/ggx.hlsl @@ -406,7 +406,7 @@ template struct is_ggx : impl::is_ggx {}; template -NBL_CONSTEXPR bool is_ggx_v = is_ggx::value; +NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR bool is_ggx_v = is_ggx::value; } } diff --git a/include/nbl/builtin/hlsl/cpp_compat/basic.h b/include/nbl/builtin/hlsl/cpp_compat/basic.h index bbb2a73ee7..89c10d14fd 100644 --- a/include/nbl/builtin/hlsl/cpp_compat/basic.h +++ b/include/nbl/builtin/hlsl/cpp_compat/basic.h @@ -9,7 +9,6 @@ #define ARROW -> #define NBL_CONSTEXPR constexpr // TODO: rename to NBL_CONSTEXPR_VAR -#define NBL_CONSTEXPR_INLINE constexpr inline #define NBL_CONSTEXPR_FUNC constexpr #define NBL_CONSTEXPR_STATIC constexpr static #define NBL_CONSTEXPR_STATIC_INLINE constexpr static inline @@ -17,6 +16,7 @@ #define NBL_CONSTEXPR_INLINE_VAR constexpr inline #define NBL_CONSTEXPR_FORCED_INLINE_FUNC NBL_FORCE_INLINE constexpr #define NBL_CONST_MEMBER_FUNC const +#define NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR constexpr inline #define NBL_CONSTEXPR_FUNC_SCOPE_VAR constexpr #define NBL_CONSTEXPR_OOL_MEMBER constexpr #define NBL_CONSTEXPR_INLINE_OOL_MEMBER constexpr inline @@ -45,7 +45,6 @@ namespace nbl::hlsl #define ARROW .arrow(). #define NBL_CONSTEXPR const static // TODO: rename to NBL_CONSTEXPR_VAR -#define NBL_CONSTEXPR_INLINE const static #define NBL_CONSTEXPR_FUNC #define NBL_CONSTEXPR_STATIC const static #define NBL_CONSTEXPR_STATIC_INLINE const static @@ -53,6 +52,7 @@ namespace nbl::hlsl #define NBL_CONSTEXPR_INLINE_VAR static const #define NBL_CONSTEXPR_FORCED_INLINE_FUNC inline #define NBL_CONST_MEMBER_FUNC +#define NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR const static #define NBL_CONSTEXPR_FUNC_SCOPE_VAR const #define NBL_CONSTEXPR_OOL_MEMBER const #define NBL_CONSTEXPR_INLINE_OOL_MEMBER const diff --git a/include/nbl/builtin/hlsl/cpp_compat/impl/intrinsics_impl.hlsl b/include/nbl/builtin/hlsl/cpp_compat/impl/intrinsics_impl.hlsl index 242e30dfbe..7850fd7cf3 100644 --- a/include/nbl/builtin/hlsl/cpp_compat/impl/intrinsics_impl.hlsl +++ b/include/nbl/builtin/hlsl/cpp_compat/impl/intrinsics_impl.hlsl @@ -27,7 +27,7 @@ template::type; - NBL_CONSTEXPR UnsignedInteger Mask = (UnsignedInteger(0) - 1) >> 1; + NBL_CONSTEXPR_FUNC_SCOPE_VAR UnsignedInteger Mask = (UnsignedInteger(0) - 1) >> 1; UnsignedInteger absVal = val & Mask; return absVal > (ieee754::traits::specialValueExp << ieee754::traits::mantissaBitCnt); } diff --git a/include/nbl/builtin/hlsl/math/functions.hlsl b/include/nbl/builtin/hlsl/math/functions.hlsl index 20442c467b..21f0e6ef2b 100644 --- a/include/nbl/builtin/hlsl/math/functions.hlsl +++ b/include/nbl/builtin/hlsl/math/functions.hlsl @@ -123,9 +123,9 @@ void frisvad(NBL_CONST_REF_ARG(T) normal, NBL_REF_ARG(T) tangent, NBL_REF_ARG(T) bool partitionRandVariable(float leftProb, NBL_REF_ARG(float) xi, NBL_REF_ARG(float) rcpChoiceProb) { #ifdef __HLSL_VERSION - NBL_CONSTEXPR float NEXT_ULP_AFTER_UNITY = asfloat(0x3f800001u); + NBL_CONSTEXPR_FUNC_SCOPE_VAR float NEXT_ULP_AFTER_UNITY = asfloat(0x3f800001u); #else - NBL_CONSTEXPR float32_t NEXT_ULP_AFTER_UNITY = bit_cast(0x3f800001u); + NBL_CONSTEXPR_FUNC_SCOPE_VAR float32_t NEXT_ULP_AFTER_UNITY = bit_cast(0x3f800001u); #endif const bool pickRight = xi >= leftProb * NEXT_ULP_AFTER_UNITY; diff --git a/include/nbl/builtin/hlsl/math/quadrature/gauss_legendre/impl.hlsl b/include/nbl/builtin/hlsl/math/quadrature/gauss_legendre/impl.hlsl index 3bcfbb2388..cd402d0cd4 100644 --- a/include/nbl/builtin/hlsl/math/quadrature/gauss_legendre/impl.hlsl +++ b/include/nbl/builtin/hlsl/math/quadrature/gauss_legendre/impl.hlsl @@ -14,25 +14,25 @@ namespace float_t_namespace { -NBL_CONSTEXPR float_t xi_2[2] = { +NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR float_t xi_2[2] = { TYPED_NUMBER(-0.5773502691896257), TYPED_NUMBER(0.5773502691896257) }; -NBL_CONSTEXPR float_t xi_3[3] = { +NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR float_t xi_3[3] = { TYPED_NUMBER(0.0), TYPED_NUMBER(-0.7745966692414833), TYPED_NUMBER(0.7745966692414833) }; -NBL_CONSTEXPR float_t xi_4[4] = { +NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR float_t xi_4[4] = { TYPED_NUMBER(-0.3399810435848562), TYPED_NUMBER(0.3399810435848562), TYPED_NUMBER(-0.8611363115940525), TYPED_NUMBER(0.8611363115940525) }; -NBL_CONSTEXPR float_t xi_5[5] = { +NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR float_t xi_5[5] = { TYPED_NUMBER(0.0), TYPED_NUMBER(-0.5384693101056830), TYPED_NUMBER(0.5384693101056830), @@ -40,7 +40,7 @@ NBL_CONSTEXPR float_t xi_5[5] = { TYPED_NUMBER(0.9061798459386639) }; -NBL_CONSTEXPR float_t xi_6[6] = { +NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR float_t xi_6[6] = { TYPED_NUMBER(0.6612093864662645), TYPED_NUMBER(-0.6612093864662645), TYPED_NUMBER(-0.2386191860831969), @@ -49,7 +49,7 @@ NBL_CONSTEXPR float_t xi_6[6] = { TYPED_NUMBER(0.9324695142031520) }; -NBL_CONSTEXPR float_t xi_7[7] = { +NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR float_t xi_7[7] = { TYPED_NUMBER(0.0), TYPED_NUMBER(0.4058451513773971), TYPED_NUMBER(-0.4058451513773971), @@ -59,7 +59,7 @@ NBL_CONSTEXPR float_t xi_7[7] = { TYPED_NUMBER(0.9491079123427585) }; -NBL_CONSTEXPR float_t xi_8[8] = { +NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR float_t xi_8[8] = { TYPED_NUMBER(-0.1834346424956498), TYPED_NUMBER(0.1834346424956498), TYPED_NUMBER(-0.5255324099163289), @@ -70,7 +70,7 @@ NBL_CONSTEXPR float_t xi_8[8] = { TYPED_NUMBER(0.9602898564975362) }; -NBL_CONSTEXPR float_t xi_9[9] = { +NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR float_t xi_9[9] = { TYPED_NUMBER(0.0), TYPED_NUMBER(-0.8360311073266357), TYPED_NUMBER(0.8360311073266357), @@ -82,7 +82,7 @@ NBL_CONSTEXPR float_t xi_9[9] = { TYPED_NUMBER(0.6133714327005903) }; -NBL_CONSTEXPR float_t xi_10[10] = { +NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR float_t xi_10[10] = { TYPED_NUMBER(-0.1488743389816312), TYPED_NUMBER(0.1488743389816312), TYPED_NUMBER(-0.4333953941292471), @@ -95,7 +95,7 @@ NBL_CONSTEXPR float_t xi_10[10] = { TYPED_NUMBER(0.9739065285171717) }; -NBL_CONSTEXPR float_t xi_11[11] = { +NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR float_t xi_11[11] = { TYPED_NUMBER(0.0), TYPED_NUMBER(-0.2695431559523449), TYPED_NUMBER(0.2695431559523449), @@ -109,7 +109,7 @@ NBL_CONSTEXPR float_t xi_11[11] = { TYPED_NUMBER(0.9782286581460569) }; -NBL_CONSTEXPR float_t xi_12[12] = { +NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR float_t xi_12[12] = { TYPED_NUMBER(-0.1252334085114689), TYPED_NUMBER(0.1252334085114689), TYPED_NUMBER(-0.3678314989981801), @@ -124,7 +124,7 @@ NBL_CONSTEXPR float_t xi_12[12] = { TYPED_NUMBER(0.9815606342467192) }; -NBL_CONSTEXPR float_t xi_13[13] = { +NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR float_t xi_13[13] = { TYPED_NUMBER(0.0), TYPED_NUMBER(-0.2304583159551347), TYPED_NUMBER(0.2304583159551347), @@ -140,7 +140,7 @@ NBL_CONSTEXPR float_t xi_13[13] = { TYPED_NUMBER(0.9841830547185881) }; -NBL_CONSTEXPR float_t xi_14[14] = { +NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR float_t xi_14[14] = { TYPED_NUMBER(-0.1080549487073436), TYPED_NUMBER(0.1080549487073436), TYPED_NUMBER(-0.3191123689278897), @@ -157,7 +157,7 @@ NBL_CONSTEXPR float_t xi_14[14] = { TYPED_NUMBER(0.9862838086968123) }; -NBL_CONSTEXPR float_t xi_15[15] = { +NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR float_t xi_15[15] = { TYPED_NUMBER(0.0), TYPED_NUMBER(-0.2011940939974345), TYPED_NUMBER(0.2011940939974345), @@ -175,25 +175,25 @@ NBL_CONSTEXPR float_t xi_15[15] = { TYPED_NUMBER(0.9879925180204854) }; -NBL_CONSTEXPR float_t wi_2[2] = { +NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR float_t wi_2[2] = { TYPED_NUMBER(1.0000000000000000), TYPED_NUMBER(1.0000000000000000) }; -NBL_CONSTEXPR float_t wi_3[3] = { +NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR float_t wi_3[3] = { TYPED_NUMBER(0.8888888888888888), TYPED_NUMBER(0.5555555555555555), TYPED_NUMBER(0.5555555555555555) }; -NBL_CONSTEXPR float_t wi_4[4] = { +NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR float_t wi_4[4] = { TYPED_NUMBER(0.6521451548625461), TYPED_NUMBER(0.6521451548625461), TYPED_NUMBER(0.3478548451374538), TYPED_NUMBER(0.3478548451374538) }; -NBL_CONSTEXPR float_t wi_5[5] = { +NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR float_t wi_5[5] = { TYPED_NUMBER(0.5688888888888888), TYPED_NUMBER(0.4786286704993664), TYPED_NUMBER(0.4786286704993664), @@ -201,7 +201,7 @@ NBL_CONSTEXPR float_t wi_5[5] = { TYPED_NUMBER(0.2369268850561890) }; -NBL_CONSTEXPR float_t wi_6[6] = { +NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR float_t wi_6[6] = { TYPED_NUMBER(0.3607615730481386), TYPED_NUMBER(0.3607615730481386), TYPED_NUMBER(0.4679139345726910), @@ -210,7 +210,7 @@ NBL_CONSTEXPR float_t wi_6[6] = { TYPED_NUMBER(0.1713244923791703) }; -NBL_CONSTEXPR float_t wi_7[7] = { +NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR float_t wi_7[7] = { TYPED_NUMBER(0.4179591836734693), TYPED_NUMBER(0.3818300505051189), TYPED_NUMBER(0.3818300505051189), @@ -220,7 +220,7 @@ NBL_CONSTEXPR float_t wi_7[7] = { TYPED_NUMBER(0.1294849661688696) }; -NBL_CONSTEXPR float_t wi_8[8] = { +NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR float_t wi_8[8] = { TYPED_NUMBER(0.3626837833783619), TYPED_NUMBER(0.3626837833783619), TYPED_NUMBER(0.3137066458778872), @@ -231,7 +231,7 @@ NBL_CONSTEXPR float_t wi_8[8] = { TYPED_NUMBER(0.1012285362903762) }; -NBL_CONSTEXPR float_t wi_9[9] = { +NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR float_t wi_9[9] = { TYPED_NUMBER(0.3302393550012597), TYPED_NUMBER(0.1806481606948574), TYPED_NUMBER(0.1806481606948574), @@ -243,7 +243,7 @@ NBL_CONSTEXPR float_t wi_9[9] = { TYPED_NUMBER(0.2606106964029354) }; -NBL_CONSTEXPR float_t wi_10[10] = { +NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR float_t wi_10[10] = { TYPED_NUMBER(0.2955242247147528), TYPED_NUMBER(0.2955242247147528), TYPED_NUMBER(0.2692667193099963), @@ -256,7 +256,7 @@ NBL_CONSTEXPR float_t wi_10[10] = { TYPED_NUMBER(0.0666713443086881) }; -NBL_CONSTEXPR float_t wi_11[11] = { +NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR float_t wi_11[11] = { TYPED_NUMBER(0.2729250867779006), TYPED_NUMBER(0.2628045445102466), TYPED_NUMBER(0.2628045445102466), @@ -270,7 +270,7 @@ NBL_CONSTEXPR float_t wi_11[11] = { TYPED_NUMBER(0.0556685671161736) }; -NBL_CONSTEXPR float_t wi_12[12] = { +NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR float_t wi_12[12] = { TYPED_NUMBER(0.2491470458134027), TYPED_NUMBER(0.2491470458134027), TYPED_NUMBER(0.2334925365383548), @@ -285,7 +285,7 @@ NBL_CONSTEXPR float_t wi_12[12] = { TYPED_NUMBER(0.0471753363865118) }; -NBL_CONSTEXPR float_t wi_13[13] = { +NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR float_t wi_13[13] = { TYPED_NUMBER(0.2325515532308739), TYPED_NUMBER(0.2262831802628972), TYPED_NUMBER(0.2262831802628972), @@ -301,7 +301,7 @@ NBL_CONSTEXPR float_t wi_13[13] = { TYPED_NUMBER(0.0404840047653158) }; -NBL_CONSTEXPR float_t wi_14[14] = { +NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR float_t wi_14[14] = { TYPED_NUMBER(0.2152638534631577), TYPED_NUMBER(0.2152638534631577), TYPED_NUMBER(0.2051984637212956), @@ -318,7 +318,7 @@ NBL_CONSTEXPR float_t wi_14[14] = { TYPED_NUMBER(0.0351194603317518) }; -NBL_CONSTEXPR float_t wi_15[15] = { +NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR float_t wi_15[15] = { TYPED_NUMBER(0.2025782419255612), TYPED_NUMBER(0.1984314853271115), TYPED_NUMBER(0.1984314853271115), diff --git a/include/nbl/builtin/hlsl/mpl.hlsl b/include/nbl/builtin/hlsl/mpl.hlsl index 7de4983c8e..7734dea15f 100644 --- a/include/nbl/builtin/hlsl/mpl.hlsl +++ b/include/nbl/builtin/hlsl/mpl.hlsl @@ -41,12 +41,12 @@ struct countl_zero : impl::countl_zero static_assert(is_integral::value, "countl_zero type parameter must be an integral type"); }; template -NBL_CONSTEXPR T countl_zero_v = countl_zero::value; +NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR T countl_zero_v = countl_zero::value; template struct is_pot : bool_constant< (N > 0 && !(N & (N - 1))) > {}; template -NBL_CONSTEXPR bool is_pot_v = is_pot::value; +NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR bool is_pot_v = is_pot::value; template struct log2 @@ -54,12 +54,12 @@ struct log2 NBL_CONSTEXPR_STATIC_INLINE uint16_t value = X ? (1ull<<6)-countl_zero::value-1 : -1ull; }; template -NBL_CONSTEXPR uint16_t log2_v = log2::value; +NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR uint16_t log2_v = log2::value; template struct log2_ceil : integral_constant + uint16_t(!is_pot_v)> {}; template -NBL_CONSTEXPR uint16_t log2_ceil_v = log2_ceil::value; +NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR uint16_t log2_ceil_v = log2_ceil::value; template struct rotl @@ -69,7 +69,7 @@ struct rotl NBL_CONSTEXPR_STATIC_INLINE T value = (S >= 0) ? ((X << r) | (X >> (N - r))) : (X >> (-r)) | (X << (N - (-r))); }; template -NBL_CONSTEXPR T rotl_v = rotl::value; +NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR T rotl_v = rotl::value; template struct rotr @@ -79,7 +79,7 @@ struct rotr NBL_CONSTEXPR_STATIC_INLINE T value = (S >= 0) ? ((X >> r) | (X << (N - r))) : (X << (-r)) | (X >> (N - (-r))); }; template -NBL_CONSTEXPR T rotr_v = rotr::value; +NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR T rotr_v = rotr::value; template struct align_up @@ -87,7 +87,7 @@ struct align_up NBL_CONSTEXPR_STATIC_INLINE uint64_t value = X ? (((X-1)/M+1)*M):0; }; template -NBL_CONSTEXPR uint64_t align_up_v = align_up::value; +NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR uint64_t align_up_v = align_up::value; template struct max @@ -95,7 +95,7 @@ struct max NBL_CONSTEXPR_STATIC_INLINE T value = X -NBL_CONSTEXPR T max_v = max::value; +NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR T max_v = max::value; template struct min @@ -103,18 +103,18 @@ struct min NBL_CONSTEXPR_STATIC_INLINE T value = X -NBL_CONSTEXPR T min_v = min::value; +NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR T min_v = min::value; template struct round_up_to_pot : integral_constant > {}; template -NBL_CONSTEXPR uint64_t round_up_to_pot_v = round_up_to_pot::value; +NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR uint64_t round_up_to_pot_v = round_up_to_pot::value; // TODO: should rename log2 to log2_floor template struct round_down_to_pot : integral_constant > {}; template -NBL_CONSTEXPR uint64_t round_down_to_pot_v = round_down_to_pot::value; +NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR uint64_t round_down_to_pot_v = round_down_to_pot::value; template struct find_lsb @@ -122,7 +122,7 @@ struct find_lsb NBL_CONSTEXPR_STATIC_INLINE uint16_t value = log2::value; }; template -NBL_CONSTEXPR uint64_t find_lsb_v = find_lsb::value; +NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR uint64_t find_lsb_v = find_lsb::value; } } } diff --git a/include/nbl/builtin/hlsl/numbers.hlsl b/include/nbl/builtin/hlsl/numbers.hlsl index 6671a44756..4594596590 100644 --- a/include/nbl/builtin/hlsl/numbers.hlsl +++ b/include/nbl/builtin/hlsl/numbers.hlsl @@ -11,33 +11,33 @@ namespace numbers { template -NBL_CONSTEXPR float_t e = float_t(2.718281828459045); +NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR float_t e = float_t(2.718281828459045); template -NBL_CONSTEXPR float_t log2e = float_t(1.4426950408889634); +NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR float_t log2e = float_t(1.4426950408889634); template -NBL_CONSTEXPR float_t log10e = float_t(0.4342944819032518); +NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR float_t log10e = float_t(0.4342944819032518); template -NBL_CONSTEXPR float_t pi = float_t(3.141592653589793); +NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR float_t pi = float_t(3.141592653589793); template -NBL_CONSTEXPR float_t inv_pi = float_t(0.3183098861837907); +NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR float_t inv_pi = float_t(0.3183098861837907); template -NBL_CONSTEXPR float_t inv_sqrtpi = float_t(0.5641895835477563); +NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR float_t inv_sqrtpi = float_t(0.5641895835477563); template -NBL_CONSTEXPR float_t ln2 = float_t(0.6931471805599453); +NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR float_t ln2 = float_t(0.6931471805599453); template -NBL_CONSTEXPR float_t inv_ln2 = float_t(1.44269504088896); +NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR float_t inv_ln2 = float_t(1.44269504088896); template -NBL_CONSTEXPR float_t ln10 = float_t(2.302585092994046); +NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR float_t ln10 = float_t(2.302585092994046); template -NBL_CONSTEXPR float_t sqrt2 = float_t(1.4142135623730951); +NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR float_t sqrt2 = float_t(1.4142135623730951); template -NBL_CONSTEXPR float_t sqrt3 = float_t(1.7320508075688772); +NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR float_t sqrt3 = float_t(1.7320508075688772); template -NBL_CONSTEXPR float_t inv_sqrt3 = float_t(0.5773502691896257); +NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR float_t inv_sqrt3 = float_t(0.5773502691896257); template -NBL_CONSTEXPR float_t egamma = float_t(0.5772156649015329); +NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR float_t egamma = float_t(0.5772156649015329); template -NBL_CONSTEXPR float_t phi = float_t(1.618033988749895); +NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR float_t phi = float_t(1.618033988749895); } } diff --git a/include/nbl/builtin/hlsl/type_traits.hlsl b/include/nbl/builtin/hlsl/type_traits.hlsl index bf2a35ede9..b682b8da8b 100644 --- a/include/nbl/builtin/hlsl/type_traits.hlsl +++ b/include/nbl/builtin/hlsl/type_traits.hlsl @@ -638,25 +638,25 @@ using conditional_t = typename conditional::type; // Template Variables template -NBL_CONSTEXPR T integral_constant_v = integral_constant::value; +NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR T integral_constant_v = integral_constant::value; template -NBL_CONSTEXPR bool is_same_v = is_same::value; +NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR bool is_same_v = is_same::value; template -NBL_CONSTEXPR bool is_unsigned_v = is_unsigned::value; +NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR bool is_unsigned_v = is_unsigned::value; template -NBL_CONSTEXPR bool is_integral_v = is_integral::value; +NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR bool is_integral_v = is_integral::value; template -NBL_CONSTEXPR bool is_floating_point_v = is_floating_point::value; +NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR bool is_floating_point_v = is_floating_point::value; template -NBL_CONSTEXPR bool is_signed_v = is_signed::value; +NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR bool is_signed_v = is_signed::value; template -NBL_CONSTEXPR bool is_scalar_v = is_scalar::value; +NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR bool is_scalar_v = is_scalar::value; template -NBL_CONSTEXPR uint64_t size_of_v = size_of::value; +NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR uint64_t size_of_v = size_of::value; template -NBL_CONSTEXPR uint32_t alignment_of_v = alignment_of::value; +NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR uint32_t alignment_of_v = alignment_of::value; template -NBL_CONSTEXPR bool is_fundamental_v = is_fundamental::value; +NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR bool is_fundamental_v = is_fundamental::value; // Overlapping definitions @@ -685,7 +685,7 @@ template struct is_vector > : bool_constant {}; template -NBL_CONSTEXPR bool is_vector_v = is_vector::value; +NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR bool is_vector_v = is_vector::value; #ifndef __HLSL_VERSION template @@ -696,7 +696,7 @@ template struct is_matrix > : bool_constant {}; template -NBL_CONSTEXPR bool is_matrix_v = is_matrix::value; +NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR bool is_matrix_v = is_matrix::value; template @@ -741,7 +741,7 @@ struct extent, I> : integral_constant: // Template Variables template -NBL_CONSTEXPR uint64_t extent_v = extent::value; +NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR uint64_t extent_v = extent::value; template::value> diff --git a/include/nbl/builtin/hlsl/workgroup2/arithmetic_config.hlsl b/include/nbl/builtin/hlsl/workgroup2/arithmetic_config.hlsl index 03ccd64d4e..22c93ce193 100644 --- a/include/nbl/builtin/hlsl/workgroup2/arithmetic_config.hlsl +++ b/include/nbl/builtin/hlsl/workgroup2/arithmetic_config.hlsl @@ -225,7 +225,7 @@ template struct is_configuration > : bool_constant {}; template -NBL_CONSTEXPR bool is_configuration_v = is_configuration::value; +NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR bool is_configuration_v = is_configuration::value; } } From 1eded124d9a0f26251e6e7ad22843ac57e0f288b Mon Sep 17 00:00:00 2001 From: kevyuu Date: Mon, 1 Dec 2025 14:59:38 +0700 Subject: [PATCH 057/157] Refactor emulated_integral_64 --- .../emulated/int64_common_member_inc.hlsl | 155 ++++++++ .../nbl/builtin/hlsl/emulated/int64_t.hlsl | 332 ++++++------------ include/nbl/builtin/hlsl/functional.hlsl | 2 +- 3 files changed, 261 insertions(+), 228 deletions(-) create mode 100644 include/nbl/builtin/hlsl/emulated/int64_common_member_inc.hlsl diff --git a/include/nbl/builtin/hlsl/emulated/int64_common_member_inc.hlsl b/include/nbl/builtin/hlsl/emulated/int64_common_member_inc.hlsl new file mode 100644 index 0000000000..2dd7bafa41 --- /dev/null +++ b/include/nbl/builtin/hlsl/emulated/int64_common_member_inc.hlsl @@ -0,0 +1,155 @@ + +storage_t data; + +/** +* @brief Creates an `emulated_int64` from a vector of two `uint32_t`s representing its bitpattern +* +* @param [in] _data Vector of `uint32_t` encoding the `uint64_t/int64_t` being emulated. Stored as little endian (first component are the lower 32 bits) +*/ +NBL_CONSTEXPR_STATIC this_t create(NBL_CONST_REF_ARG(storage_t) _data) +{ + this_t retVal; + retVal.data = _data; + return retVal; +} + +/** +* @brief Creates an `emulated_int64` from two `uint32_t`s representing its bitpattern +* +* @param [in] lo Lowest 32 bits of the `uint64_t/int64_t` being emulated +* @param [in] hi Highest 32 bits of the `uint64_t/int64_t` being emulated +*/ +NBL_CONSTEXPR_STATIC this_t create(NBL_CONST_REF_ARG(uint32_t) lo, NBL_CONST_REF_ARG(uint32_t) hi) +{ + return create(storage_t(lo, hi)); +} + +// ------------------------------------------------------- CONVERSION OPERATORS--------------------------------------------------------------- +// GLM requires these for vector casts + +#ifndef __HLSL_VERSION + +template +constexpr explicit operator I() const noexcept; + +#endif + +// ------------------------------------------------------- INTERNAL GETTERS ------------------------------------------------- + +NBL_CONSTEXPR_FUNC uint32_t __getLSB() NBL_CONST_MEMBER_FUNC +{ + return data.x; +} + +NBL_CONSTEXPR_FUNC uint32_t __getMSB() NBL_CONST_MEMBER_FUNC +{ + return data.y; +} + +// ------------------------------------------------------- BITWISE OPERATORS ------------------------------------------------- + +NBL_CONSTEXPR_FUNC this_t operator&(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC +{ + this_t retVal = create(data & rhs.data); + return retVal; +} + +NBL_CONSTEXPR_FUNC this_t operator|(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC +{ + this_t retVal = create(data | rhs.data); + return retVal; +} + +NBL_CONSTEXPR_FUNC this_t operator^(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC +{ + this_t retVal = create(data ^ rhs.data); + return retVal; +} + +NBL_CONSTEXPR_FUNC this_t operator~() NBL_CONST_MEMBER_FUNC +{ + this_t retVal = create(~data); + return retVal; +} + +// Only valid in CPP +#ifndef __HLSL_VERSION +constexpr inline this_t operator>>(uint32_t bits) const; + +constexpr inline this_t operator<<(uint32_t bits) const; + +constexpr inline this_t& operator&=(const this_t& val) +{ + data &= val.data; + return *this; +} + +constexpr inline this_t& operator|=(const this_t& val) +{ + data |= val.data; + return *this; +} + +constexpr inline this_t& operator^=(const this_t& val) +{ + data ^= val.data; + return *this; +} + +#endif + +// ------------------------------------------------------- ARITHMETIC OPERATORS ------------------------------------------------- + +NBL_CONSTEXPR_FUNC this_t operator+(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC +{ + const spirv::AddCarryOutput lowerAddResult = addCarry(__getLSB(), rhs.__getLSB()); + return create(lowerAddResult.result, __getMSB() + rhs.__getMSB() + lowerAddResult.carry); +} + +NBL_CONSTEXPR_FUNC this_t operator-(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC +{ + const spirv::SubBorrowOutput lowerSubResult = subBorrow(__getLSB(), rhs.__getLSB()); + return create(lowerSubResult.result, __getMSB() - rhs.__getMSB() - lowerSubResult.borrow); +} + +// ------------------------------------------------------- COMPARISON OPERATORS ------------------------------------------------- +NBL_CONSTEXPR_FUNC bool operator==(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC +{ + equal_to equals; + return all(equals(data, rhs.data)); +} + +NBL_CONSTEXPR_FUNC bool operator!=(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC +{ + not_equal_to notEquals; + return any(notEquals(data, rhs.data)); +} + +NBL_CONSTEXPR_FUNC bool operator<(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC +{ + // Either the topmost bits, when interpreted with correct sign, are less than those of `rhs`, or they're equal and the lower bits are less + // (lower bits are always positive in both unsigned and 2's complement so comparison can happen as-is) + const bool MSBEqual = __getMSB() == rhs.__getMSB(); + const bool MSB = Signed ? (bit_cast(__getMSB()) < bit_cast(rhs.__getMSB())) : (__getMSB() < rhs.__getMSB()); + const bool LSB = __getLSB() < rhs.__getLSB(); + return MSBEqual ? LSB : MSB; +} + +NBL_CONSTEXPR_FUNC bool operator>(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC +{ + // Same reasoning as above + const bool MSBEqual = __getMSB() == rhs.__getMSB(); + const bool MSB = Signed ? (bit_cast(__getMSB()) > bit_cast(rhs.__getMSB())) : (__getMSB() > rhs.__getMSB()); + const bool LSB = __getLSB() > rhs.__getLSB(); + return MSBEqual ? LSB : MSB; +} + +NBL_CONSTEXPR_FUNC bool operator<=(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC +{ + return !operator>(rhs); +} + +NBL_CONSTEXPR_FUNC bool operator>=(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC +{ + return !operator<(rhs); +} diff --git a/include/nbl/builtin/hlsl/emulated/int64_t.hlsl b/include/nbl/builtin/hlsl/emulated/int64_t.hlsl index 2214835df9..ce98d5268f 100644 --- a/include/nbl/builtin/hlsl/emulated/int64_t.hlsl +++ b/include/nbl/builtin/hlsl/emulated/int64_t.hlsl @@ -15,199 +15,92 @@ namespace nbl namespace hlsl { -template -struct emulated_int64_base +struct emulated_int64_t; + +struct emulated_uint64_t { using storage_t = vector; - using this_t = emulated_int64_base; - using this_signed_t = emulated_int64_base; - - storage_t data; + using this_t = emulated_uint64_t; + NBL_CONSTEXPR_STATIC_INLINE bool Signed = false; - // ---------------------------------------------------- CONSTRUCTORS --------------------------------------------------------------- + #include "int64_common_member_inc.hlsl" #ifndef __HLSL_VERSION - - emulated_int64_base() = default; - + emulated_uint64_t() = default; // GLM requires these to cast vectors because it uses a native `static_cast` template - constexpr explicit emulated_int64_base(const I& toEmulate); - - constexpr explicit emulated_int64_base(const emulated_int64_base& other) : data(other.data) {} + constexpr explicit emulated_uint64_t(const I& toEmulate); + constexpr explicit emulated_uint64_t(const emulated_int64_t& other); #endif +}; - /** - * @brief Creates an `emulated_int64` from a vector of two `uint32_t`s representing its bitpattern - * - * @param [in] _data Vector of `uint32_t` encoding the `uint64_t/int64_t` being emulated. Stored as little endian (first component are the lower 32 bits) - */ - NBL_CONSTEXPR_STATIC this_t create(NBL_CONST_REF_ARG(storage_t) _data) - { - this_t retVal; - retVal.data = _data; - return retVal; - } - - /** - * @brief Creates an `emulated_int64` from two `uint32_t`s representing its bitpattern - * - * @param [in] lo Lowest 32 bits of the `uint64_t/int64_t` being emulated - * @param [in] hi Highest 32 bits of the `uint64_t/int64_t` being emulated - */ - NBL_CONSTEXPR_STATIC this_t create(NBL_CONST_REF_ARG(uint32_t) lo, NBL_CONST_REF_ARG(uint32_t) hi) - { - return create(storage_t(lo, hi)); - } - - // ------------------------------------------------------- CONVERSION OPERATORS--------------------------------------------------------------- - // GLM requires these for vector casts +struct emulated_int64_t +{ + using storage_t = vector; + using this_t = emulated_int64_t; + NBL_CONSTEXPR_STATIC_INLINE bool Signed = true; + + #include "int64_common_member_inc.hlsl" + #ifndef __HLSL_VERSION - + emulated_int64_t() = default; + // GLM requires these to cast vectors because it uses a native `static_cast` template - constexpr explicit operator I() const noexcept; + constexpr explicit emulated_int64_t(const I& toEmulate); + constexpr explicit emulated_int64_t(const emulated_uint64_t& other); #endif - // ------------------------------------------------------- INTERNAL GETTERS ------------------------------------------------- - - NBL_CONSTEXPR_FUNC uint32_t __getLSB() NBL_CONST_MEMBER_FUNC + NBL_CONSTEXPR_FUNC this_t operator-() NBL_CONST_MEMBER_FUNC { - return data.x; - } - - NBL_CONSTEXPR_FUNC uint32_t __getMSB() NBL_CONST_MEMBER_FUNC - { - return data.y; - } - - // ------------------------------------------------------- BITWISE OPERATORS ------------------------------------------------- - - NBL_CONSTEXPR_FUNC this_t operator&(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC - { - this_t retVal = create(data & rhs.data); - return retVal; - } - - NBL_CONSTEXPR_FUNC this_t operator|(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC - { - this_t retVal = create(data | rhs.data); - return retVal; + storage_t inverted = ~data; + return create(_static_cast(inverted)) + _static_cast(1); } +}; - NBL_CONSTEXPR_FUNC this_t operator^(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC - { - this_t retVal = create(data ^ rhs.data); - return retVal; - } +// ------------------------------------------------ TYPE TRAITS SATISFIED ----------------------------------------------------- - NBL_CONSTEXPR_FUNC this_t operator~() NBL_CONST_MEMBER_FUNC - { - this_t retVal = create(~data); - return retVal; - } +template<> +struct is_signed : bool_constant {}; - // Only valid in CPP - #ifndef __HLSL_VERSION - constexpr inline this_t operator<<(uint32_t bits) const; - constexpr inline this_t operator>>(uint32_t bits) const; +template<> +struct is_unsigned : bool_constant {}; - constexpr inline this_t& operator&=(const this_t& val) - { - data &= val.data; - return *this; - } +// --------------------------------------------------- CONCEPTS SATISFIED ----------------------------------------------------- +namespace concepts +{ - constexpr inline this_t& operator|=(const this_t& val) - { - data |= val.data; - return *this; - } +template +NBL_BOOL_CONCEPT ImitationIntegral64Scalar = same_as || same_as; - constexpr inline this_t& operator^=(const this_t& val) - { - data ^= val.data; - return *this; - } - - #endif - - // ------------------------------------------------------- ARITHMETIC OPERATORS ------------------------------------------------- - - NBL_CONSTEXPR_FUNC this_signed_t operator-() NBL_CONST_MEMBER_FUNC - { - vector negated = -data; - return this_signed_t::create(_static_cast(negated)); - } - - NBL_CONSTEXPR_FUNC this_t operator+(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC - { - const spirv::AddCarryOutput lowerAddResult = addCarry(__getLSB(), rhs.__getLSB()); - return create(lowerAddResult.result, __getMSB() + rhs.__getMSB() + lowerAddResult.carry); - } - - NBL_CONSTEXPR_FUNC this_t operator-(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC - { - const spirv::SubBorrowOutput lowerSubResult = subBorrow(__getLSB(), rhs.__getLSB()); - return create(lowerSubResult.result, __getMSB() - rhs.__getMSB() - lowerSubResult.borrow); - } - - // ------------------------------------------------------- COMPARISON OPERATORS ------------------------------------------------- - NBL_CONSTEXPR_FUNC bool operator==(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC - { - equal_to equals; - return all(equals(data, rhs.data)); - } - - NBL_CONSTEXPR_FUNC bool operator!=(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC - { - not_equal_to notEquals; - return any(notEquals(data, rhs.data)); - } +namespace impl +{ - NBL_CONSTEXPR_FUNC bool operator<(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC - { - // Either the topmost bits, when interpreted with correct sign, are less than those of `rhs`, or they're equal and the lower bits are less - // (lower bits are always positive in both unsigned and 2's complement so comparison can happen as-is) - const bool MSBEqual = __getMSB() == rhs.__getMSB(); - const bool MSB = Signed ? (bit_cast(__getMSB()) < bit_cast(rhs.__getMSB())) : (__getMSB() < rhs.__getMSB()); - const bool LSB = __getLSB() < rhs.__getLSB(); - return MSBEqual ? LSB : MSB; - } +template<> +struct is_emulating_integral_scalar +{ + NBL_CONSTEXPR_STATIC_INLINE bool value = true; +}; - NBL_CONSTEXPR_FUNC bool operator>(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC - { - // Same reasoning as above - const bool MSBEqual = __getMSB() == rhs.__getMSB(); - const bool MSB = Signed ? (bit_cast(__getMSB()) > bit_cast(rhs.__getMSB())) : (__getMSB() > rhs.__getMSB()); - const bool LSB = __getLSB() > rhs.__getLSB(); - return MSBEqual ? LSB : MSB; - } +template<> +struct is_emulating_integral_scalar +{ + NBL_CONSTEXPR_STATIC_INLINE bool value = true; +}; +} - NBL_CONSTEXPR_FUNC bool operator<=(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC - { - return !operator>(rhs); - } - NBL_CONSTEXPR_FUNC bool operator>=(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC - { - return !operator<(rhs); - } -}; +} -using emulated_uint64_t = emulated_int64_base; -using emulated_int64_t = emulated_int64_base; namespace impl { -template -struct static_cast_helper, emulated_int64_base > +template NBL_PARTIAL_REQ_TOP(concepts::ImitationIntegral64Scalar && concepts::ImitationIntegral64Scalar && !concepts::same_as) +struct static_cast_helper && concepts::ImitationIntegral64Scalar && !concepts::same_as) > { - using To = emulated_int64_base; - using From = emulated_int64_base; NBL_CONSTEXPR_STATIC To cast(NBL_CONST_REF_ARG(From) other) { @@ -217,12 +110,9 @@ struct static_cast_helper, emulated_int64_base NBL_PARTIAL_REQ_TOP(concepts::IntegralScalar && (sizeof(I) <= sizeof(uint32_t))) -struct static_cast_helper NBL_PARTIAL_REQ_BOT(concepts::IntegralScalar && (sizeof(I) <= sizeof(uint32_t))) > +template NBL_PARTIAL_REQ_TOP(concepts::IntegralScalar && (sizeof(To) <= sizeof(uint32_t)) && concepts::ImitationIntegral64Scalar) +struct static_cast_helper && (sizeof(To) <= sizeof(uint32_t)) && concepts::ImitationIntegral64Scalar) > { - using To = I; - using From = emulated_int64_base; - // Return only the lowest bits NBL_CONSTEXPR_STATIC To cast(NBL_CONST_REF_ARG(From) val) { @@ -230,24 +120,18 @@ struct static_cast_helper NBL_PARTIAL_REQ_BOT(con } }; -template NBL_PARTIAL_REQ_TOP(concepts::IntegralScalar && (sizeof(I) > sizeof(uint32_t))) -struct static_cast_helper NBL_PARTIAL_REQ_BOT(concepts::IntegralScalar && (sizeof(I) > sizeof(uint32_t))) > +template NBL_PARTIAL_REQ_TOP(concepts::IntegralScalar && (sizeof(To) > sizeof(uint32_t)) && concepts::ImitationIntegral64Scalar) +struct static_cast_helper && (sizeof(To) > sizeof(uint32_t)) && concepts::ImitationIntegral64Scalar) > { - using To = I; - using From = emulated_int64_base; - NBL_CONSTEXPR_STATIC To cast(NBL_CONST_REF_ARG(From) val) { return bit_cast(val.data); } }; -template NBL_PARTIAL_REQ_TOP(concepts::IntegralScalar && (sizeof(I) <= sizeof(uint32_t))) -struct static_cast_helper, I NBL_PARTIAL_REQ_BOT(concepts::IntegralScalar && (sizeof(I) <= sizeof(uint32_t))) > +template NBL_PARTIAL_REQ_TOP(concepts::IntegralScalar && (sizeof(From) <= sizeof(uint32_t)) && concepts::ImitationIntegral64Scalar) +struct static_cast_helper && (sizeof(From) <= sizeof(uint32_t)) && concepts::ImitationIntegral64Scalar) > { - using To = emulated_int64_base; - using From = I; - // Set only lower bits NBL_CONSTEXPR_STATIC To cast(NBL_CONST_REF_ARG(From) i) { @@ -255,12 +139,9 @@ struct static_cast_helper, I NBL_PARTIAL_REQ_BOT(con } }; -template NBL_PARTIAL_REQ_TOP(concepts::IntegralScalar && (sizeof(I) > sizeof(uint32_t))) -struct static_cast_helper, I NBL_PARTIAL_REQ_BOT(concepts::IntegralScalar && (sizeof(I) > sizeof(uint32_t))) > +template NBL_PARTIAL_REQ_TOP(concepts::IntegralScalar && (sizeof(From) > sizeof(uint32_t)) && concepts::ImitationIntegral64Scalar) +struct static_cast_helper && (sizeof(From) > sizeof(uint32_t)) && concepts::ImitationIntegral64Scalar) > { - using To = emulated_int64_base; - using From = I; - NBL_CONSTEXPR_STATIC To cast(NBL_CONST_REF_ARG(From) i) { // `bit_cast` blocked by GLM vectors using a union @@ -280,16 +161,30 @@ struct static_cast_helper, I NBL_PARTIAL_REQ_BOT(con #ifndef __HLSL_VERSION -template +constexpr emulated_int64_t::emulated_int64_t(const emulated_uint64_t& other) : data(other.data) {} + +constexpr emulated_uint64_t::emulated_uint64_t(const emulated_int64_t& other) : data(other.data) {} + +template +constexpr emulated_int64_t::emulated_int64_t(const I& toEmulate) +{ + *this = _static_cast(toEmulate); +} + template -constexpr emulated_int64_base::emulated_int64_base(const I& toEmulate) +constexpr emulated_uint64_t::emulated_uint64_t(const I& toEmulate) { - *this = _static_cast>(toEmulate); + *this = _static_cast(toEmulate); } -template template -constexpr emulated_int64_base::operator I() const noexcept +constexpr emulated_int64_t::operator I() const noexcept +{ + return _static_cast(*this); +} + +template +constexpr emulated_uint64_t::operator I() const noexcept { return _static_cast(*this); } @@ -298,28 +193,27 @@ constexpr emulated_int64_base::operator I() const noexcept // ---------------------- Functional operators ------------------------ -template -struct left_shift_operator > +template NBL_PARTIAL_REQ_TOP(concepts::ImitationIntegral64Scalar) +struct left_shift_operator) > { - using type_t = emulated_int64_base; NBL_CONSTEXPR_STATIC uint32_t ComponentBitWidth = uint32_t(8 * sizeof(uint32_t)); // Can't do generic templated definition, see: //https://github.com/microsoft/DirectXShaderCompiler/issues/7325 // If `_bits > 63` or `_bits < 0` the result is undefined - NBL_CONSTEXPR_FUNC type_t operator()(NBL_CONST_REF_ARG(type_t) operand, uint32_t bits) + NBL_CONSTEXPR_FUNC T operator()(NBL_CONST_REF_ARG(T) operand, uint32_t bits) { const bool bigShift = bits >= ComponentBitWidth; // Shift that completely rewrites LSB const uint32_t shift = bigShift ? bits - ComponentBitWidth : ComponentBitWidth - bits; - const type_t shifted = type_t::create(bigShift ? vector(0, operand.__getLSB() << shift) + const T shifted = T::create(bigShift ? vector(0, operand.__getLSB() << shift) : vector(operand.__getLSB() << bits, (operand.__getMSB() << bits) | (operand.__getLSB() >> shift))); - ternary_operator ternary; + ternary_operator ternary; return ternary(bool(bits), shifted, operand); } // If `_bits > 63` or `_bits < 0` the result is undefined - NBL_CONSTEXPR_FUNC type_t operator()(NBL_CONST_REF_ARG(type_t) operand, type_t bits) + NBL_CONSTEXPR_FUNC T operator()(NBL_CONST_REF_ARG(T) operand, T bits) { return operator()(operand, _static_cast(bits)); } @@ -381,21 +275,24 @@ struct arithmetic_right_shift_operator #ifndef __HLSL_VERSION -template -constexpr inline emulated_int64_base emulated_int64_base::operator<<(uint32_t bits) const +constexpr inline emulated_int64_t emulated_int64_t::operator<<(uint32_t bits) const +{ + left_shift_operator leftShift; + return leftShift(*this, bits); +} + +constexpr inline emulated_uint64_t emulated_uint64_t::operator<<(uint32_t bits) const { left_shift_operator leftShift; return leftShift(*this, bits); } -template<> constexpr inline emulated_uint64_t emulated_uint64_t::operator>>(uint32_t bits) const { arithmetic_right_shift_operator rightShift; return rightShift(*this, bits); } -template<> constexpr inline emulated_int64_t emulated_int64_t::operator>>(uint32_t bits) const { arithmetic_right_shift_operator rightShift; @@ -404,14 +301,15 @@ constexpr inline emulated_int64_t emulated_int64_t::operator>>(uint32_t bits) co #endif + // ---------------------- STD arithmetic operators ------------------------ // Specializations of the structs found in functional.hlsl // These all have to be specialized because of the identity that can't be initialized inside the struct definition -template -struct plus > +template NBL_PARTIAL_REQ_TOP(concepts::ImitationIntegral64Scalar) +struct plus) > { - using type_t = emulated_int64_base; + using type_t = T; type_t operator()(NBL_CONST_REF_ARG(type_t) lhs, NBL_CONST_REF_ARG(type_t) rhs) { @@ -421,10 +319,10 @@ struct plus > const static type_t identity; }; -template -struct minus > +template NBL_PARTIAL_REQ_TOP(concepts::ImitationIntegral64Scalar) +struct minus) > { - using type_t = emulated_int64_base; + using type_t = T; type_t operator()(NBL_CONST_REF_ARG(type_t) lhs, NBL_CONST_REF_ARG(type_t) rhs) { @@ -446,10 +344,10 @@ NBL_CONSTEXPR_INLINE_VAR emulated_int64_t minus::identity = _s // --------------------------------- Compound assignment operators ------------------------------------------ // Specializations of the structs found in functional.hlsl -template -struct plus_assign > +template NBL_PARTIAL_REQ_TOP(concepts::ImitationIntegral64Scalar) +struct plus_assign) > { - using type_t = emulated_int64_base; + using type_t = T; using base_t = plus; base_t baseOp; void operator()(NBL_REF_ARG(type_t) lhs, NBL_CONST_REF_ARG(type_t) rhs) @@ -460,10 +358,10 @@ struct plus_assign > const static type_t identity; }; -template -struct minus_assign > +template NBL_PARTIAL_REQ_TOP(concepts::ImitationIntegral64Scalar) +struct minus_assign) > { - using type_t = emulated_int64_base; + using type_t = T; using base_t = minus; base_t baseOp; void operator()(NBL_REF_ARG(type_t) lhs, NBL_CONST_REF_ARG(type_t) rhs) @@ -483,26 +381,6 @@ NBL_CONSTEXPR_INLINE_VAR emulated_uint64_t minus_assign::iden template<> NBL_CONSTEXPR_INLINE_VAR emulated_int64_t minus_assign::identity = minus::identity; -// ------------------------------------------------ TYPE TRAITS SATISFIED ----------------------------------------------------- - -template<> -struct is_signed : bool_constant {}; - -template<> -struct is_unsigned : bool_constant {}; - -// --------------------------------------------------- CONCEPTS SATISFIED ----------------------------------------------------- -namespace concepts -{ -namespace impl -{ -template -struct is_emulating_integral_scalar > -{ - NBL_CONSTEXPR_STATIC_INLINE bool value = true; -}; -} -} } //namespace nbl } //namespace hlsl diff --git a/include/nbl/builtin/hlsl/functional.hlsl b/include/nbl/builtin/hlsl/functional.hlsl index fd23ad388c..98858bae80 100644 --- a/include/nbl/builtin/hlsl/functional.hlsl +++ b/include/nbl/builtin/hlsl/functional.hlsl @@ -219,7 +219,7 @@ NBL_COMPARISON_VECTORIAL_SPECIALIZATION(less_equal, <=, lessThanEqual) // ------------------------------------------------------------- COMPOUND ASSIGNMENT OPERATORS -------------------------------------------------------------------- -#define COMPOUND_ASSIGN(NAME) template struct NAME##_assign { \ +#define COMPOUND_ASSIGN(NAME) template struct NAME##_assign { \ using type_t = T; \ using base_t = NAME ; \ base_t baseOp; \ From aa9e24daf8bb7bae9ff743f1db899234819ac17f Mon Sep 17 00:00:00 2001 From: kevyuu Date: Mon, 1 Dec 2025 18:42:11 +0700 Subject: [PATCH 058/157] Add unary_minus_operator class --- include/nbl/builtin/hlsl/cpp_compat/basic.h | 2 ++ .../nbl/builtin/hlsl/emulated/int64_t.hlsl | 36 ++++++++++++++----- include/nbl/builtin/hlsl/functional.hlsl | 10 ++++++ 3 files changed, 39 insertions(+), 9 deletions(-) diff --git a/include/nbl/builtin/hlsl/cpp_compat/basic.h b/include/nbl/builtin/hlsl/cpp_compat/basic.h index 89c10d14fd..b51860a399 100644 --- a/include/nbl/builtin/hlsl/cpp_compat/basic.h +++ b/include/nbl/builtin/hlsl/cpp_compat/basic.h @@ -8,6 +8,7 @@ #include #define ARROW -> +#define NBL_DEREF_THIS (*this) #define NBL_CONSTEXPR constexpr // TODO: rename to NBL_CONSTEXPR_VAR #define NBL_CONSTEXPR_FUNC constexpr #define NBL_CONSTEXPR_STATIC constexpr static @@ -44,6 +45,7 @@ namespace nbl::hlsl #else #define ARROW .arrow(). +#define NBL_DEREF_THIS this #define NBL_CONSTEXPR const static // TODO: rename to NBL_CONSTEXPR_VAR #define NBL_CONSTEXPR_FUNC #define NBL_CONSTEXPR_STATIC const static diff --git a/include/nbl/builtin/hlsl/emulated/int64_t.hlsl b/include/nbl/builtin/hlsl/emulated/int64_t.hlsl index ce98d5268f..ba4facad01 100644 --- a/include/nbl/builtin/hlsl/emulated/int64_t.hlsl +++ b/include/nbl/builtin/hlsl/emulated/int64_t.hlsl @@ -53,11 +53,8 @@ struct emulated_int64_t constexpr explicit emulated_int64_t(const emulated_uint64_t& other); #endif - NBL_CONSTEXPR_FUNC this_t operator-() NBL_CONST_MEMBER_FUNC - { - storage_t inverted = ~data; - return create(_static_cast(inverted)) + _static_cast(1); - } + NBL_CONSTEXPR_FUNC emulated_int64_t operator-() NBL_CONST_MEMBER_FUNC; + }; // ------------------------------------------------ TYPE TRAITS SATISFIED ----------------------------------------------------- @@ -196,24 +193,25 @@ constexpr emulated_uint64_t::operator I() const noexcept template NBL_PARTIAL_REQ_TOP(concepts::ImitationIntegral64Scalar) struct left_shift_operator) > { + using type_t = T; NBL_CONSTEXPR_STATIC uint32_t ComponentBitWidth = uint32_t(8 * sizeof(uint32_t)); // Can't do generic templated definition, see: //https://github.com/microsoft/DirectXShaderCompiler/issues/7325 // If `_bits > 63` or `_bits < 0` the result is undefined - NBL_CONSTEXPR_FUNC T operator()(NBL_CONST_REF_ARG(T) operand, uint32_t bits) + NBL_CONSTEXPR_FUNC type_t operator()(NBL_CONST_REF_ARG(type_t) operand, uint32_t bits) { const bool bigShift = bits >= ComponentBitWidth; // Shift that completely rewrites LSB const uint32_t shift = bigShift ? bits - ComponentBitWidth : ComponentBitWidth - bits; - const T shifted = T::create(bigShift ? vector(0, operand.__getLSB() << shift) + const type_t shifted = type_t::create(bigShift ? vector(0, operand.__getLSB() << shift) : vector(operand.__getLSB() << bits, (operand.__getMSB() << bits) | (operand.__getLSB() >> shift))); - ternary_operator ternary; + ternary_operator ternary; return ternary(bool(bits), shifted, operand); } // If `_bits > 63` or `_bits < 0` the result is undefined - NBL_CONSTEXPR_FUNC T operator()(NBL_CONST_REF_ARG(T) operand, T bits) + NBL_CONSTEXPR_FUNC type_t operator()(NBL_CONST_REF_ARG(type_t) operand, type_t bits) { return operator()(operand, _static_cast(bits)); } @@ -381,6 +379,26 @@ NBL_CONSTEXPR_INLINE_VAR emulated_uint64_t minus_assign::iden template<> NBL_CONSTEXPR_INLINE_VAR emulated_int64_t minus_assign::identity = minus::identity; +// --------------------------------- Unary operators ------------------------------------------ +// Specializations of the structs found in functional.hlsl +template<> +struct unary_minus_operator +{ + using type_t = emulated_int64_t; + + NBL_CONSTEXPR_FUNC type_t operator()(NBL_CONST_REF_ARG(type_t) operand) + { + using storage_t = type_t::storage_t; + storage_t inverted = ~operand.data; + return type_t::create(_static_cast(inverted)) + _static_cast(1); + } +}; + +NBL_CONSTEXPR_INLINE_FUNC emulated_int64_t emulated_int64_t::operator-() NBL_CONST_MEMBER_FUNC +{ + unary_minus_operator unaryMinus; + return unaryMinus(NBL_DEREF_THIS); +} } //namespace nbl } //namespace hlsl diff --git a/include/nbl/builtin/hlsl/functional.hlsl b/include/nbl/builtin/hlsl/functional.hlsl index 98858bae80..f0730a12d2 100644 --- a/include/nbl/builtin/hlsl/functional.hlsl +++ b/include/nbl/builtin/hlsl/functional.hlsl @@ -487,7 +487,17 @@ struct logical_right_shift_operator } }; +// ----------------------------------------------------------------- UNARY OPERATORS -------------------------------------------------------------------- +template +struct unary_minus_operator +{ + using type_t = T; + NBL_CONSTEXPR_FUNC T operator()(NBL_CONST_REF_ARG(T) operand) + { + return -operand; + } +}; } //namespace nbl } //namespace hlsl From 6683cd5a0f7965caa8484ea40b3847bab23b54a0 Mon Sep 17 00:00:00 2001 From: kevyuu Date: Mon, 1 Dec 2025 18:42:29 +0700 Subject: [PATCH 059/157] Remove commented code on emulated/vector_t.hlsl --- include/nbl/builtin/hlsl/emulated/vector_t.hlsl | 9 --------- 1 file changed, 9 deletions(-) diff --git a/include/nbl/builtin/hlsl/emulated/vector_t.hlsl b/include/nbl/builtin/hlsl/emulated/vector_t.hlsl index cdeddeb105..f153fb1062 100644 --- a/include/nbl/builtin/hlsl/emulated/vector_t.hlsl +++ b/include/nbl/builtin/hlsl/emulated/vector_t.hlsl @@ -40,9 +40,6 @@ struct _2_component_vec return y; // TODO: avoid code duplication, make it constexpr - //using TAsUint = typename unsigned_integer_of_size::type; - //TAsUint invalidComponentValue = nbl::hlsl::_static_cast(0xdeadbeefbadcaffeull); - //return nbl::hlsl::bit_cast(invalidComponentValue); return nbl::hlsl::undef(); } @@ -77,9 +74,6 @@ struct _3_component_vec return z; // TODO: avoid code duplication, make it constexpr - //using TAsUint = typename unsigned_integer_of_size::type; - //TAsUint invalidComponentValue = nbl::hlsl::_static_cast(0xdeadbeefbadcaffeull >> (64 - sizeof(T) * 8)); - //return nbl::hlsl::bit_cast(invalidComponentValue); return nbl::hlsl::undef(); } @@ -118,9 +112,6 @@ struct _4_component_vec return w; // TODO: avoid code duplication, make it constexpr - //using TAsUint = typename unsigned_integer_of_size::type; - //uint64_t invalidComponentValue = nbl::hlsl::_static_cast(0xdeadbeefbadcaffeull >> (64 - sizeof(T) * 8)); - //return nbl::hlsl::bit_cast(invalidComponentValue); return nbl::hlsl::undef(); } From 21a576573db68a59f026c3a16fb5042ab3be3126 Mon Sep 17 00:00:00 2001 From: Arkadiusz Lachowicz Date: Mon, 1 Dec 2025 14:50:38 +0100 Subject: [PATCH 060/157] update dxc to https://github.com/Devsh-Graphics-Programming/DirectXShaderCompiler/commit/1e5414bcc21b002d795f97075dff63e387fc668f --- 3rdparty/dxc/dxc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/3rdparty/dxc/dxc b/3rdparty/dxc/dxc index dafad1d9a3..1e5414bcc2 160000 --- a/3rdparty/dxc/dxc +++ b/3rdparty/dxc/dxc @@ -1 +1 @@ -Subproject commit dafad1d9a370d17ac9ce69928ef518f842cb5191 +Subproject commit 1e5414bcc21b002d795f97075dff63e387fc668f From cdb6ad7d3865af1e3390127af5da008e44ead6ce Mon Sep 17 00:00:00 2001 From: kevyuu Date: Mon, 1 Dec 2025 21:00:47 +0700 Subject: [PATCH 061/157] Unify all Truncate specializaton for vector type --- .../nbl/builtin/hlsl/cpp_compat/truncate.hlsl | 50 ++++++------------- 1 file changed, 16 insertions(+), 34 deletions(-) diff --git a/include/nbl/builtin/hlsl/cpp_compat/truncate.hlsl b/include/nbl/builtin/hlsl/cpp_compat/truncate.hlsl index 38467942f9..ffe3d12641 100644 --- a/include/nbl/builtin/hlsl/cpp_compat/truncate.hlsl +++ b/include/nbl/builtin/hlsl/cpp_compat/truncate.hlsl @@ -9,6 +9,12 @@ namespace nbl namespace hlsl { +namespace concepts +{ + template + NBL_BOOL_CONCEPT can_truncate_vector = concepts::Vectorial && concepts::Vectorial && concepts::same_as::scalar_type, typename vector_traits::scalar_type > && vector_traits::Dimension <= vector_traits::Dimension; +} + namespace impl { @@ -21,44 +27,20 @@ struct Truncate } }; -template NBL_PARTIAL_REQ_TOP(concepts::Scalar) -struct Truncate, vector NBL_PARTIAL_REQ_BOT(concepts::Scalar) > +template NBL_PARTIAL_REQ_TOP(concepts::can_truncate_vector) +struct Truncate) > { - NBL_CONSTEXPR_FUNC vector operator()(const vector v) + NBL_CONSTEXPR_FUNC To operator()(const From v) { - vector truncated = { v[0] }; - return truncated; + array_get::scalar_type> getter; + array_set::scalar_type> setter; + To output; + [[unroll]] + for (int i = 0; i < vector_traits::Dimension; ++i) + setter(output, i, getter(v, i)); + return output; } -}; - -template NBL_PARTIAL_REQ_TOP(concepts::Scalar && N >= 2) -struct Truncate, vector NBL_PARTIAL_REQ_BOT(concepts::Scalar && N >= 2) > -{ - NBL_CONSTEXPR_FUNC vector operator()(const vector v) - { - vector truncated = { v[0], v[1]}; - return truncated; - } -}; -template NBL_PARTIAL_REQ_TOP(concepts::Scalar&& N >= 3) -struct Truncate, vector NBL_PARTIAL_REQ_BOT(concepts::Scalar&& N >= 3) > -{ - NBL_CONSTEXPR_FUNC vector operator()(const vector v) - { - vector truncated = { v[0], v[1], v[2] }; - return truncated; - } -}; - -template NBL_PARTIAL_REQ_TOP(concepts::Scalar&& N >= 4) -struct Truncate, vector NBL_PARTIAL_REQ_BOT(concepts::Scalar&& N >= 4) > -{ - NBL_CONSTEXPR_FUNC vector operator()(const vector v) - { - vector truncated = { v[0], v[1], v[2], v[3] }; - return truncated; - } }; } //namespace impl From a9e107835c0c0735cb0a83c70908c8375c7e544a Mon Sep 17 00:00:00 2001 From: Przemog1 Date: Mon, 1 Dec 2025 13:57:18 +0100 Subject: [PATCH 062/157] Resolved conflicts cherry-picked from the `unified_testing_interface` branch --- examples_tests | 2 +- include/nbl/system/to_string.h | 84 ++++++++++++++++++++++++++++++++++ 2 files changed, 85 insertions(+), 1 deletion(-) create mode 100644 include/nbl/system/to_string.h diff --git a/examples_tests b/examples_tests index 829ea34183..e5d5ae2ca9 160000 --- a/examples_tests +++ b/examples_tests @@ -1 +1 @@ -Subproject commit 829ea34183a0a62a3bd68ded4dd9e451b97126d4 +Subproject commit e5d5ae2ca9137a6966d00aa039f3e6dae7c23fb9 diff --git a/include/nbl/system/to_string.h b/include/nbl/system/to_string.h new file mode 100644 index 0000000000..70ecfba211 --- /dev/null +++ b/include/nbl/system/to_string.h @@ -0,0 +1,84 @@ +#ifndef _NBL_SYSTEM_TO_STRING_INCLUDED_ +#define _NBL_SYSTEM_TO_STRING_INCLUDED_ + +#include +#include +#include + +namespace nbl +{ +namespace system +{ +namespace impl +{ + +template +struct to_string_helper +{ + static std::string __call(const T& value) + { + return std::to_string(value); + } +}; + +template<> +struct to_string_helper +{ + static std::string __call(const hlsl::emulated_uint64_t& value) + { + return std::to_string(static_cast(value)); + } +}; + +template<> +struct to_string_helper +{ + static std::string __call(const hlsl::emulated_int64_t& value) + { + return std::to_string(static_cast(value)); + } +}; + +template +struct to_string_helper> +{ + static std::string __call(const hlsl::vector& value) + { + std::stringstream output; + output << "{ "; + for (int i = 0; i < N; ++i) + { + output << to_string_helper::__call(value[i]); + + if (i < N - 1) + output << ", "; + } + output << " }"; + + return output.str(); + } +}; + +template +struct to_string_helper> +{ + using value_t = hlsl::morton::code; + static std::string __call(value_t value) + { + TestValueToTextConverter mortonCodeDataToTextConverter; + return mortonCodeDataToTextConverter(value.value); + } +}; + + +} + +template +std::string to_string(T value) +{ + return impl::to_string_helper::__call(value); +} +} +} + +#endif \ No newline at end of file From 1e7ea64e23be96f204d87dc98b5913cb5db44664 Mon Sep 17 00:00:00 2001 From: Przemog1 Date: Mon, 1 Dec 2025 15:37:21 +0100 Subject: [PATCH 063/157] Removed from the `to_string` function specialization of types not present yet in the master branch --- include/nbl/system/to_string.h | 32 -------------------------------- 1 file changed, 32 deletions(-) diff --git a/include/nbl/system/to_string.h b/include/nbl/system/to_string.h index 70ecfba211..92888704c0 100644 --- a/include/nbl/system/to_string.h +++ b/include/nbl/system/to_string.h @@ -2,8 +2,6 @@ #define _NBL_SYSTEM_TO_STRING_INCLUDED_ #include -#include -#include namespace nbl { @@ -21,24 +19,6 @@ struct to_string_helper } }; -template<> -struct to_string_helper -{ - static std::string __call(const hlsl::emulated_uint64_t& value) - { - return std::to_string(static_cast(value)); - } -}; - -template<> -struct to_string_helper -{ - static std::string __call(const hlsl::emulated_int64_t& value) - { - return std::to_string(static_cast(value)); - } -}; - template struct to_string_helper> { @@ -59,18 +39,6 @@ struct to_string_helper> } }; -template -struct to_string_helper> -{ - using value_t = hlsl::morton::code; - static std::string __call(value_t value) - { - TestValueToTextConverter mortonCodeDataToTextConverter; - return mortonCodeDataToTextConverter(value.value); - } -}; - - } template From c365240ed060b45d535e3a9293c91da2d9f01e61 Mon Sep 17 00:00:00 2001 From: kevyuu Date: Mon, 1 Dec 2025 23:04:34 +0700 Subject: [PATCH 064/157] Fix promote.hlsl and reduce the amount of specialization for Promote --- .../nbl/builtin/hlsl/cpp_compat/promote.hlsl | 49 ++++--------------- include/nbl/builtin/hlsl/morton.hlsl | 5 +- 2 files changed, 12 insertions(+), 42 deletions(-) diff --git a/include/nbl/builtin/hlsl/cpp_compat/promote.hlsl b/include/nbl/builtin/hlsl/cpp_compat/promote.hlsl index 6e75a55b1b..cd4ac3193c 100644 --- a/include/nbl/builtin/hlsl/cpp_compat/promote.hlsl +++ b/include/nbl/builtin/hlsl/cpp_compat/promote.hlsl @@ -21,50 +21,21 @@ struct Promote } }; -#ifdef __HLSL_VERSION - -template -struct Promote, U> +// TODO(kevinyu): Should we enable truncation from uint64_t to emulated_vector? +template NBL_PARTIAL_REQ_TOP(concepts::Vectorial && is_scalar_v && is_same_v::scalar_type, From>) +struct Promote && is_scalar_v && is_same_v::scalar_type, From>) > { - NBL_CONSTEXPR_FUNC enable_if_t::value && is_scalar::value, vector > operator()(const U v) + NBL_CONSTEXPR_FUNC To operator()(const From v) { - vector promoted = {Scalar(v)}; - return promoted; + array_set setter; + To output; + [[unroll]] + for (int i = 0; i < vector_traits::Dimension; ++i) + setter(output, i, v); + return output; } }; -template -struct Promote, U> -{ - NBL_CONSTEXPR_FUNC enable_if_t::value && is_scalar::value, vector > operator()(const U v) - { - vector promoted = {Scalar(v), Scalar(v)}; - return promoted; - } -}; - -template -struct Promote, U> -{ - NBL_CONSTEXPR_FUNC enable_if_t::value && is_scalar::value, vector > operator()(const U v) - { - vector promoted = {Scalar(v), Scalar(v), Scalar(v)}; - return promoted; - } -}; - -template -struct Promote, U> -{ - NBL_CONSTEXPR_FUNC enable_if_t::value && is_scalar::value, vector > operator()(const U v) - { - vector promoted = {Scalar(v), Scalar(v), Scalar(v), Scalar(v)}; - return promoted; - } -}; - -#endif - } template diff --git a/include/nbl/builtin/hlsl/morton.hlsl b/include/nbl/builtin/hlsl/morton.hlsl index 696124ae0c..6968d414fc 100644 --- a/include/nbl/builtin/hlsl/morton.hlsl +++ b/include/nbl/builtin/hlsl/morton.hlsl @@ -137,8 +137,7 @@ struct Transcoder return leftShift(interleaved, truncate >(vector(0, 1, 2, 3))); } - template 16), vector, vector > - NBL_FUNC_REQUIRES(concepts::IntVector && 8 * sizeof(typename vector_traits::scalar_type) >= Bits) + template 16), vector, vector > > /** * @brief Encodes a vector of cartesian coordinates as a Morton code * @@ -216,7 +215,7 @@ struct Equal NBL_CONSTEXPR_STATIC vector __call(NBL_CONST_REF_ARG(storage_t) value, NBL_CONST_REF_ARG(portable_vector_t) rhs) { const portable_vector_t InterleaveMasks = NBL_MORTON_INTERLEAVE_MASKS(storage_t, D, Bits, ); - const portable_vector_t zeros = _static_cast >(truncate >(vector(0,0,0,0))); + const portable_vector_t zeros = promote>(_static_cast(0)); const portable_vector_t rhsCasted = _static_cast >(rhs); const portable_vector_t xored = rhsCasted ^ (InterleaveMasks & value); From e674772baba13c08445c29835cedc8276909a0f9 Mon Sep 17 00:00:00 2001 From: Mateusz Kielan Date: Mon, 1 Dec 2025 19:16:39 +0100 Subject: [PATCH 065/157] add `nbl::system::to_string` utility function all credit goes to @Przemog1 --- include/nbl/system/to_string.h | 83 ++++++++++++++++++++++++++++++++++ 1 file changed, 83 insertions(+) create mode 100644 include/nbl/system/to_string.h diff --git a/include/nbl/system/to_string.h b/include/nbl/system/to_string.h new file mode 100644 index 0000000000..3169503a06 --- /dev/null +++ b/include/nbl/system/to_string.h @@ -0,0 +1,83 @@ +#ifndef _NBL_SYSTEM_TO_STRING_INCLUDED_ +#define _NBL_SYSTEM_TO_STRING_INCLUDED_ + +#include +#include +#include + +namespace nbl +{ +namespace system +{ +namespace impl +{ + +template +struct to_string_helper +{ + static std::string __call(const T& value) + { + return std::to_string(value); + } +}; + +template<> +struct to_string_helper +{ + static std::string __call(const hlsl::emulated_uint64_t& value) + { + return std::to_string(static_cast(value)); + } +}; + +template<> +struct to_string_helper +{ + static std::string __call(const hlsl::emulated_int64_t& value) + { + return std::to_string(static_cast(value)); + } +}; + +template +struct to_string_helper> +{ + static std::string __call(const hlsl::vector& value) + { + std::stringstream output; + output << "{ "; + for (int i = 0; i < N; ++i) + { + output << to_string_helper::__call(value[i]); + + if (i < N - 1) + output << ", "; + } + output << " }"; + + return output.str(); + } +}; + +template +struct to_string_helper> +{ + using value_t = hlsl::morton::code; + static std::string __call(value_t value) + { + return to_string_helper::__call(value.value); + } +}; + + +} + +template +std::string to_string(T value) +{ + return impl::to_string_helper::__call(value); +} +} +} + +#endif \ No newline at end of file From ec1d6745fb34451f9fda2f0a68a6047866630e14 Mon Sep 17 00:00:00 2001 From: kevyuu Date: Tue, 2 Dec 2025 06:09:26 +0700 Subject: [PATCH 066/157] Make promote constrainable --- include/nbl/builtin/hlsl/cpp_compat/promote.hlsl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/nbl/builtin/hlsl/cpp_compat/promote.hlsl b/include/nbl/builtin/hlsl/cpp_compat/promote.hlsl index cd4ac3193c..9f2b58047f 100644 --- a/include/nbl/builtin/hlsl/cpp_compat/promote.hlsl +++ b/include/nbl/builtin/hlsl/cpp_compat/promote.hlsl @@ -12,7 +12,7 @@ namespace impl { // partial specialize this for `T=matrix|vector` and `U=matrix|vector|scalar_t` -template +template struct Promote { NBL_CONSTEXPR_FUNC T operator()(NBL_CONST_REF_ARG(U) v) From 51e35cf27e59311b5abd586b424d63a6502fdeb3 Mon Sep 17 00:00:00 2001 From: kevyuu Date: Tue, 2 Dec 2025 06:09:34 +0700 Subject: [PATCH 067/157] equal to _equal --- include/nbl/builtin/hlsl/morton.hlsl | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/include/nbl/builtin/hlsl/morton.hlsl b/include/nbl/builtin/hlsl/morton.hlsl index 6968d414fc..d03a02a09c 100644 --- a/include/nbl/builtin/hlsl/morton.hlsl +++ b/include/nbl/builtin/hlsl/morton.hlsl @@ -215,12 +215,12 @@ struct Equal NBL_CONSTEXPR_STATIC vector __call(NBL_CONST_REF_ARG(storage_t) value, NBL_CONST_REF_ARG(portable_vector_t) rhs) { const portable_vector_t InterleaveMasks = NBL_MORTON_INTERLEAVE_MASKS(storage_t, D, Bits, ); - const portable_vector_t zeros = promote>(_static_cast(0)); + const portable_vector_t zeros = promote >(_static_cast(0)); const portable_vector_t rhsCasted = _static_cast >(rhs); const portable_vector_t xored = rhsCasted ^ (InterleaveMasks & value); - equal_to > equal; - return equal(xored, zeros); + equal_to > _equal; + return _equal(xored, zeros); } }; From 062ce7b632b7fc90c1b3fdaec0fdce2ddb52c1b5 Mon Sep 17 00:00:00 2001 From: kevyuu Date: Tue, 2 Dec 2025 06:18:19 +0700 Subject: [PATCH 068/157] Remove some constraint in morton::code::create --- include/nbl/builtin/hlsl/morton.hlsl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/nbl/builtin/hlsl/morton.hlsl b/include/nbl/builtin/hlsl/morton.hlsl index d03a02a09c..869425b856 100644 --- a/include/nbl/builtin/hlsl/morton.hlsl +++ b/include/nbl/builtin/hlsl/morton.hlsl @@ -326,7 +326,7 @@ struct code * @param [in] cartesian Coordinates to encode. Signedness MUST match the signedness of this Morton code class */ template - NBL_CONSTEXPR_STATIC enable_if_t && is_scalar_v && (is_signed_v == Signed) && (8 * sizeof(I) >= Bits), this_t> + NBL_CONSTEXPR_STATIC enable_if_t && is_scalar_v && (is_signed_v == Signed), this_t> create(NBL_CONST_REF_ARG(vector) cartesian) { this_t retVal; From 6c824283c493c050aa85bb7710fa3d22768b4341 Mon Sep 17 00:00:00 2001 From: kevyuu Date: Tue, 2 Dec 2025 12:40:28 +0700 Subject: [PATCH 069/157] Remove NBL_CONSTEXPR_STATIC_INLINE_VAR macro --- include/nbl/builtin/hlsl/cpp_compat/basic.h | 2 -- include/nbl/builtin/hlsl/emulated/int64_t.hlsl | 16 ++++++++-------- 2 files changed, 8 insertions(+), 10 deletions(-) diff --git a/include/nbl/builtin/hlsl/cpp_compat/basic.h b/include/nbl/builtin/hlsl/cpp_compat/basic.h index b51860a399..a5715efa15 100644 --- a/include/nbl/builtin/hlsl/cpp_compat/basic.h +++ b/include/nbl/builtin/hlsl/cpp_compat/basic.h @@ -14,7 +14,6 @@ #define NBL_CONSTEXPR_STATIC constexpr static #define NBL_CONSTEXPR_STATIC_INLINE constexpr static inline #define NBL_CONSTEXPR_INLINE_FUNC constexpr inline -#define NBL_CONSTEXPR_INLINE_VAR constexpr inline #define NBL_CONSTEXPR_FORCED_INLINE_FUNC NBL_FORCE_INLINE constexpr #define NBL_CONST_MEMBER_FUNC const #define NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR constexpr inline @@ -51,7 +50,6 @@ namespace nbl::hlsl #define NBL_CONSTEXPR_STATIC const static #define NBL_CONSTEXPR_STATIC_INLINE const static #define NBL_CONSTEXPR_INLINE_FUNC inline -#define NBL_CONSTEXPR_INLINE_VAR static const #define NBL_CONSTEXPR_FORCED_INLINE_FUNC inline #define NBL_CONST_MEMBER_FUNC #define NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR const static diff --git a/include/nbl/builtin/hlsl/emulated/int64_t.hlsl b/include/nbl/builtin/hlsl/emulated/int64_t.hlsl index ba4facad01..0b890fb2b2 100644 --- a/include/nbl/builtin/hlsl/emulated/int64_t.hlsl +++ b/include/nbl/builtin/hlsl/emulated/int64_t.hlsl @@ -331,13 +331,13 @@ struct minus) > }; template<> -NBL_CONSTEXPR_INLINE_VAR emulated_uint64_t plus::identity = _static_cast(uint64_t(0)); +NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR emulated_uint64_t plus::identity = _static_cast(uint64_t(0)); template<> -NBL_CONSTEXPR_INLINE_VAR emulated_int64_t plus::identity = _static_cast(int64_t(0)); +NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR emulated_int64_t plus::identity = _static_cast(int64_t(0)); template<> -NBL_CONSTEXPR_INLINE_VAR emulated_uint64_t minus::identity = _static_cast(uint64_t(0)); +NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR emulated_uint64_t minus::identity = _static_cast(uint64_t(0)); template<> -NBL_CONSTEXPR_INLINE_VAR emulated_int64_t minus::identity = _static_cast(int64_t(0)); +NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR emulated_int64_t minus::identity = _static_cast(int64_t(0)); // --------------------------------- Compound assignment operators ------------------------------------------ // Specializations of the structs found in functional.hlsl @@ -371,13 +371,13 @@ struct minus_assign }; template<> -NBL_CONSTEXPR_INLINE_VAR emulated_uint64_t plus_assign::identity = plus::identity; +NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR emulated_uint64_t plus_assign::identity = plus::identity; template<> -NBL_CONSTEXPR_INLINE_VAR emulated_int64_t plus_assign::identity = plus::identity; +NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR emulated_int64_t plus_assign::identity = plus::identity; template<> -NBL_CONSTEXPR_INLINE_VAR emulated_uint64_t minus_assign::identity = minus::identity; +NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR emulated_uint64_t minus_assign::identity = minus::identity; template<> -NBL_CONSTEXPR_INLINE_VAR emulated_int64_t minus_assign::identity = minus::identity; +NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR emulated_int64_t minus_assign::identity = minus::identity; // --------------------------------- Unary operators ------------------------------------------ // Specializations of the structs found in functional.hlsl From ca2ac6f5151b35f4570f9232356e40fc85cdaf64 Mon Sep 17 00:00:00 2001 From: kevyuu Date: Tue, 2 Dec 2025 12:41:02 +0700 Subject: [PATCH 070/157] Remove Bit count constraint on some of Transcoder method due to redundancy. --- include/nbl/builtin/hlsl/morton.hlsl | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/include/nbl/builtin/hlsl/morton.hlsl b/include/nbl/builtin/hlsl/morton.hlsl index 869425b856..e6deaf5be4 100644 --- a/include/nbl/builtin/hlsl/morton.hlsl +++ b/include/nbl/builtin/hlsl/morton.hlsl @@ -342,7 +342,7 @@ struct code * * @param [in] cartesian Coordinates to encode */ - template= Bits) + template inline explicit code(NBL_CONST_REF_ARG(vector) cartesian) { *this = create(cartesian); @@ -351,7 +351,7 @@ struct code /** * @brief Decodes this Morton code back to a set of cartesian coordinates */ - template= Bits && is_signed_v == Signed) + template == Signed) constexpr explicit operator vector() const noexcept; #endif @@ -521,8 +521,8 @@ namespace impl { // I must be of same signedness as the morton code, and be wide enough to hold each component -template NBL_PARTIAL_REQ_TOP(concepts::IntegralScalar && 8 * sizeof(I) >= Bits) -struct static_cast_helper, morton::code, Bits, D, _uint64_t> NBL_PARTIAL_REQ_BOT(concepts::IntegralScalar && 8 * sizeof(I) >= Bits) > +template NBL_PARTIAL_REQ_TOP(concepts::IntegralScalar) +struct static_cast_helper, morton::code, Bits, D, _uint64_t> NBL_PARTIAL_REQ_BOT(concepts::IntegralScalar) > { NBL_CONSTEXPR_STATIC vector cast(NBL_CONST_REF_ARG(morton::code, Bits, D, _uint64_t>) val) { @@ -606,7 +606,7 @@ constexpr morton::code morton::code&& D* Bits <= 64) -template = Bits && is_signed_v == Signed) +template == Signed) constexpr morton::code::operator vector() const noexcept { return _static_cast, morton::code>(*this); From 4c9635d5bf6ead8d39b2775a6c12c75930732aa3 Mon Sep 17 00:00:00 2001 From: kevyuu Date: Tue, 2 Dec 2025 12:46:48 +0700 Subject: [PATCH 071/157] Use cpp syntax instead of portable macro wherever possible --- include/nbl/builtin/hlsl/functional.hlsl | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/include/nbl/builtin/hlsl/functional.hlsl b/include/nbl/builtin/hlsl/functional.hlsl index f0730a12d2..dc718e5928 100644 --- a/include/nbl/builtin/hlsl/functional.hlsl +++ b/include/nbl/builtin/hlsl/functional.hlsl @@ -91,7 +91,7 @@ struct reference_wrapper : enable_if_t< #else // CPP -#define ALIAS_STD(NAME,OP) template struct NAME : std::NAME { \ +#define ALIAS_STD(NAME,OP) template struct NAME : std::NAME { \ using type_t = T; #endif @@ -136,7 +136,7 @@ ALIAS_STD(divides,/) #ifndef __HLSL_VERSION -template +template struct bit_not : std::bit_not { using type_t = T; @@ -184,11 +184,11 @@ ALIAS_STD(less_equal, <=) }; // GLM doesn't have operators on vectors #ifndef __HLSL_VERSION -#define NBL_COMPARISON_VECTORIAL_SPECIALIZATION(NAME, OP, GLM_OP) template NBL_PARTIAL_REQ_TOP(concepts::Vectorial)\ -struct NAME ) >\ +#define NBL_COMPARISON_VECTORIAL_SPECIALIZATION(NAME, OP, GLM_OP) template requires (concepts::Vectorial)\ +struct NAME \ {\ using type_t = T;\ - vector::Dimension> operator()(NBL_CONST_REF_ARG(T) lhs, NBL_CONST_REF_ARG(T) rhs)\ + vector::Dimension> operator()(const T& lhs, const T& rhs)\ {\ return glm::GLM_OP (lhs, rhs);\ }\ From fbfde73761bc84b4a78bed5a8b2ad45aff573cad Mon Sep 17 00:00:00 2001 From: keptsecret Date: Tue, 2 Dec 2025 17:04:02 +0700 Subject: [PATCH 072/157] change quaternion struct name to match what it will be --- include/nbl/builtin/hlsl/math/quaternions.hlsl | 4 ++-- include/nbl/builtin/hlsl/sampling/spherical_triangle.hlsl | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/include/nbl/builtin/hlsl/math/quaternions.hlsl b/include/nbl/builtin/hlsl/math/quaternions.hlsl index aca8d1ff3c..8d50202f4e 100644 --- a/include/nbl/builtin/hlsl/math/quaternions.hlsl +++ b/include/nbl/builtin/hlsl/math/quaternions.hlsl @@ -15,9 +15,9 @@ namespace math { template -struct quaternion_t +struct quaternion { - using this_t = quaternion_t; + using this_t = quaternion; using scalar_type = T; using data_type = vector; using vector3_type = vector; diff --git a/include/nbl/builtin/hlsl/sampling/spherical_triangle.hlsl b/include/nbl/builtin/hlsl/sampling/spherical_triangle.hlsl index 0c86b69793..c31e194788 100644 --- a/include/nbl/builtin/hlsl/sampling/spherical_triangle.hlsl +++ b/include/nbl/builtin/hlsl/sampling/spherical_triangle.hlsl @@ -51,7 +51,7 @@ struct SphericalTriangle { const scalar_type cosAngleAlongAC = ((v_ * q - u_ * p) * cos_vertices[0] - v_) / ((v_ * p + u_ * q) * sin_vertices[0]); if (nbl::hlsl::abs(cosAngleAlongAC) < 1.f) - C_s += math::quaternion_t::slerp_delta(tri.vertex0, tri.vertex2 * csc_b, cosAngleAlongAC); + C_s += math::quaternion::slerp_delta(tri.vertex0, tri.vertex2 * csc_b, cosAngleAlongAC); } vector3_type retval = tri.vertex1; @@ -61,7 +61,7 @@ struct SphericalTriangle { const scalar_type cosAngleAlongBC_s = nbl::hlsl::clamp(1.0 + cosBC_s * u.y - u.y, -1.f, 1.f); if (nbl::hlsl::abs(cosAngleAlongBC_s) < 1.f) - retval += math::quaternion_t::slerp_delta(tri.vertex1, C_s * csc_b_s, cosAngleAlongBC_s); + retval += math::quaternion::slerp_delta(tri.vertex1, C_s * csc_b_s, cosAngleAlongBC_s); } return retval; } From 23292bd47b9ef9b5d1d4bae7f97be0fa19b68b2b Mon Sep 17 00:00:00 2001 From: kevyuu Date: Wed, 3 Dec 2025 13:58:02 +0700 Subject: [PATCH 073/157] Fix morton code constraint --- include/nbl/builtin/hlsl/morton.hlsl | 30 +++++++++++++++++----------- 1 file changed, 18 insertions(+), 12 deletions(-) diff --git a/include/nbl/builtin/hlsl/morton.hlsl b/include/nbl/builtin/hlsl/morton.hlsl index e6deaf5be4..41461a0841 100644 --- a/include/nbl/builtin/hlsl/morton.hlsl +++ b/include/nbl/builtin/hlsl/morton.hlsl @@ -108,18 +108,21 @@ NBL_HLSL_MORTON_SPECIALIZE_LAST_CODING_MASKS template && Dim * Bits <= 64 && 8 * sizeof(encode_t) == mpl::max_v, uint64_t(16)>) struct Transcoder { - template 16), vector, vector > - NBL_FUNC_REQUIRES(concepts::IntVector && 8 * sizeof(typename vector_traits::scalar_type) >= Bits) + using decode_t = conditional_t < (Bits > 16), vector, vector >; + + template ) /** * @brief Interleaves each coordinate with `Dim - 1` zeros inbetween each bit, and left-shifts each by their coordinate index * * @param [in] decodedValue Cartesian coordinates to interleave and shift */ - NBL_CONSTEXPR_STATIC portable_vector_t interleaveShift(NBL_CONST_REF_ARG(decode_t) decodedValue) + NBL_CONSTEXPR_STATIC portable_vector_t interleaveShift(NBL_CONST_REF_ARG(T) decodedValue) { left_shift_operator > leftShift; portable_vector_t interleaved = _static_cast >(decodedValue) & coding_mask_v; + // Read this to understand how interleaving and spreading bits works https://fgiesen.wordpress.com/2009/12/13/decoding-morton-codes/ #define ENCODE_LOOP_ITERATION(I) NBL_IF_CONSTEXPR(Bits > (uint16_t(1) << I))\ {\ interleaved = interleaved | leftShift(interleaved, (uint16_t(1) << I) * (Dim - 1));\ @@ -137,15 +140,15 @@ struct Transcoder return leftShift(interleaved, truncate >(vector(0, 1, 2, 3))); } - template 16), vector, vector > > + template /** * @brief Encodes a vector of cartesian coordinates as a Morton code * * @param [in] decodedValue Cartesian coordinates to encode */ - NBL_CONSTEXPR_STATIC encode_t encode(NBL_CONST_REF_ARG(decode_t) decodedValue) + NBL_CONSTEXPR_STATIC encode_t encode(NBL_CONST_REF_ARG(T) decodedValue) { - const portable_vector_t interleaveShifted = interleaveShift(decodedValue); + const portable_vector_t interleaveShifted = interleaveShift(decodedValue); array_get, encode_t> getter; encode_t encoded = getter(interleaveShifted, 0); @@ -157,8 +160,6 @@ struct Transcoder return encoded; } - template 16), vector, vector > - NBL_FUNC_REQUIRES(concepts::IntVector && 8 * sizeof(typename vector_traits::scalar_type) >= Bits) /** * @brief Decodes a Morton code back to a vector of cartesian coordinates * @@ -231,7 +232,8 @@ struct Equal NBL_CONSTEXPR_STATIC vector __call(NBL_CONST_REF_ARG(storage_t) value, NBL_CONST_REF_ARG(vector) rhs) { using right_sign_t = conditional_t, make_unsigned_t >; - const portable_vector_t interleaved = _static_cast >(Transcoder::interleaveShift(rhs)); + using transcoder_t = Transcoder; + const portable_vector_t interleaved = _static_cast >(transcoder_t::interleaveShift(_static_cast(rhs))); return Equal::template __call(value, interleaved); } }; @@ -281,7 +283,8 @@ struct BaseComparison NBL_CONSTEXPR_STATIC vector __call(NBL_CONST_REF_ARG(storage_t) value, NBL_CONST_REF_ARG(vector) rhs) { using right_sign_t = conditional_t, make_unsigned_t >; - const portable_vector_t interleaved = _static_cast >(Transcoder::interleaveShift(rhs)); + using transcoder_t = Transcoder; + const portable_vector_t interleaved = _static_cast >(transcoder_t::interleaveShift(_static_cast(rhs))); return BaseComparison::template __call(value, interleaved); } }; @@ -309,6 +312,8 @@ struct code using this_signed_t = code; NBL_CONSTEXPR_STATIC uint16_t TotalBitWidth = D * Bits; using storage_t = conditional_t<(TotalBitWidth > 16), conditional_t<(TotalBitWidth > 32), _uint64_t, uint32_t>, uint16_t>; + + using transcoder_t = impl::Transcoder; storage_t value; @@ -326,11 +331,12 @@ struct code * @param [in] cartesian Coordinates to encode. Signedness MUST match the signedness of this Morton code class */ template - NBL_CONSTEXPR_STATIC enable_if_t && is_scalar_v && (is_signed_v == Signed), this_t> + NBL_CONSTEXPR_STATIC enable_if_t && is_scalar_v && (is_signed_v == Signed && sizeof(I) == sizeof(vector_traits::scalar_type)), this_t> create(NBL_CONST_REF_ARG(vector) cartesian) { this_t retVal; - retVal.value = impl::Transcoder::encode(cartesian); + using decode_t = typename transcoder_t::decode_t; + retVal.value = transcoder_t::encode(_static_cast(cartesian)); return retVal; } From 33a324721ab1282432b2a0e854a579358092417b Mon Sep 17 00:00:00 2001 From: Mateusz Kielan Date: Wed, 3 Dec 2025 15:11:21 +0100 Subject: [PATCH 074/157] Make sure NBL_VALID_EXPRESSION works outside the `nbl::hlsl` namespace --- include/nbl/builtin/hlsl/concepts.hlsl | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/include/nbl/builtin/hlsl/concepts.hlsl b/include/nbl/builtin/hlsl/concepts.hlsl index 6e0f380d01..3c40b3e6c6 100644 --- a/include/nbl/builtin/hlsl/concepts.hlsl +++ b/include/nbl/builtin/hlsl/concepts.hlsl @@ -128,9 +128,9 @@ NBL_CONSTEXPR bool NBL_CONCEPT_NAME = BOOST_PP_SEQ_FOR_EACH_I(NBL_IMPL_CONCEPT_E namespace impl\ {\ template\ -struct CONCEPT_NAME : false_type {};\ +struct CONCEPT_NAME : ::nbl::hlsl::false_type {};\ template\ -struct CONCEPT_NAME > : true_type {};\ +struct CONCEPT_NAME > : ::nbl::hlsl::true_type {};\ }\ template\ NBL_BOOL_CONCEPT CONCEPT_NAME = impl::CONCEPT_NAME::value\ @@ -139,4 +139,4 @@ NBL_BOOL_CONCEPT CONCEPT_NAME = impl::CONCEPT_NAME Date: Wed, 3 Dec 2025 16:14:03 +0100 Subject: [PATCH 075/157] "Fix" `SelectIsCallable` its really HLSL's fault as a language --- include/nbl/builtin/hlsl/spirv_intrinsics/core.hlsl | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/include/nbl/builtin/hlsl/spirv_intrinsics/core.hlsl b/include/nbl/builtin/hlsl/spirv_intrinsics/core.hlsl index a7614469dd..b71558c49d 100644 --- a/include/nbl/builtin/hlsl/spirv_intrinsics/core.hlsl +++ b/include/nbl/builtin/hlsl/spirv_intrinsics/core.hlsl @@ -382,7 +382,8 @@ template && (!conc [[vk::ext_instruction(spv::OpSelect)]] T select(U a, T x, T y); -NBL_VALID_EXPRESSION(SelectIsCallable, (T)(U), select(experimental::declval(),experimental::declval(),experimental::declval())); +// need to use `spirv::` even in the namespace because it matches the HLSL intrinsic which is not namespaced at all, and will happily match anything +NBL_VALID_EXPRESSION(SelectIsCallable, (T)(U), spirv::select(experimental::declval(),experimental::declval(),experimental::declval())); } From 5da522e319acdf93c9f0bfd581791e2c25826354 Mon Sep 17 00:00:00 2001 From: kevyuu Date: Wed, 3 Dec 2025 23:17:45 +0700 Subject: [PATCH 076/157] Add assert in morton code creation --- include/nbl/builtin/hlsl/cpp_compat/basic.h | 2 ++ include/nbl/builtin/hlsl/morton.hlsl | 35 +++++++++++++++++++-- 2 files changed, 34 insertions(+), 3 deletions(-) diff --git a/include/nbl/builtin/hlsl/cpp_compat/basic.h b/include/nbl/builtin/hlsl/cpp_compat/basic.h index a5715efa15..3ca499c567 100644 --- a/include/nbl/builtin/hlsl/cpp_compat/basic.h +++ b/include/nbl/builtin/hlsl/cpp_compat/basic.h @@ -21,6 +21,7 @@ #define NBL_CONSTEXPR_OOL_MEMBER constexpr #define NBL_CONSTEXPR_INLINE_OOL_MEMBER constexpr inline #define NBL_IF_CONSTEXPR(...) if constexpr (__VA_ARGS__) +#define NBL_ASSERT(...) assert(__VA_ARGS__) namespace nbl::hlsl { @@ -57,6 +58,7 @@ namespace nbl::hlsl #define NBL_CONSTEXPR_OOL_MEMBER const #define NBL_CONSTEXPR_INLINE_OOL_MEMBER const #define NBL_IF_CONSTEXPR(...) if (__VA_ARGS__) +#define NBL_ASSERT(...) namespace nbl { diff --git a/include/nbl/builtin/hlsl/morton.hlsl b/include/nbl/builtin/hlsl/morton.hlsl index 41461a0841..9ee59b7e78 100644 --- a/include/nbl/builtin/hlsl/morton.hlsl +++ b/include/nbl/builtin/hlsl/morton.hlsl @@ -25,7 +25,31 @@ namespace impl template NBL_BOOL_CONCEPT Dimension = 1 < D && D < 5; -// --------------------------------------------------------- MORTON ENCODE/DECODE MASKS --------------------------------------------------- +template && concepts::Scalar) +NBL_CONSTEXPR_FUNC bool verifyAnyBitIntegral(T val) +{ + NBL_CONSTEXPR_FUNC_SCOPE_VAR T mask = (~((T(1) << Bits) - 1)); + const bool allZero = ((val & mask) == 0); + NBL_IF_CONSTEXPR(is_signed_v) + { + const bool allOne = ((val & mask) == mask); + return allZero || allOne; + } + return allZero; +} + +template && concepts::Scalar) +NBL_CONSTEXPR_FUNC bool verifyAnyBitIntegralVec(vector vec) +{ + array_get, T> getter; + [[unroll]] + for (uint16_t i = 0; i < Dim; i++) + if (!verifyAnyBitIntegral(getter(vec, i))) return false; + return true; +} + + +// --------------------------------------------------------- MORTON ENCOE/DECODE MASKS --------------------------------------------------- NBL_CONSTEXPR uint16_t CodingStages = 5; @@ -108,7 +132,8 @@ NBL_HLSL_MORTON_SPECIALIZE_LAST_CODING_MASKS template && Dim * Bits <= 64 && 8 * sizeof(encode_t) == mpl::max_v, uint64_t(16)>) struct Transcoder { - using decode_t = conditional_t < (Bits > 16), vector, vector >; + using decode_component_t = conditional_t<(Bits > 16), uint32_t, uint16_t>; + using decode_t = vector; template ) @@ -314,6 +339,9 @@ struct code using storage_t = conditional_t<(TotalBitWidth > 16), conditional_t<(TotalBitWidth > 32), _uint64_t, uint32_t>, uint16_t>; using transcoder_t = impl::Transcoder; + using decode_component_t = conditional_t, + typename transcoder_t::decode_component_t>; storage_t value; @@ -331,10 +359,11 @@ struct code * @param [in] cartesian Coordinates to encode. Signedness MUST match the signedness of this Morton code class */ template - NBL_CONSTEXPR_STATIC enable_if_t && is_scalar_v && (is_signed_v == Signed && sizeof(I) == sizeof(vector_traits::scalar_type)), this_t> + NBL_CONSTEXPR_STATIC enable_if_t , this_t> create(NBL_CONST_REF_ARG(vector) cartesian) { this_t retVal; + NBL_ASSERT((impl::verifyAnyBitIntegralVec(cartesian) == true)); using decode_t = typename transcoder_t::decode_t; retVal.value = transcoder_t::encode(_static_cast(cartesian)); return retVal; From 812ae7b580ef4e283dc8b8c45e331a5ac85f08d9 Mon Sep 17 00:00:00 2001 From: kevyuu Date: Wed, 3 Dec 2025 23:30:37 +0700 Subject: [PATCH 077/157] Fix is_emulating concepts --- include/nbl/builtin/hlsl/concepts/core.hlsl | 8 ++++---- include/nbl/builtin/hlsl/morton.hlsl | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/include/nbl/builtin/hlsl/concepts/core.hlsl b/include/nbl/builtin/hlsl/concepts/core.hlsl index 4a8b848cb8..e3ff3f611f 100644 --- a/include/nbl/builtin/hlsl/concepts/core.hlsl +++ b/include/nbl/builtin/hlsl/concepts/core.hlsl @@ -72,23 +72,23 @@ namespace impl template struct is_emulating_floating_point_scalar { - NBL_CONSTEXPR_STATIC_INLINE bool value = FloatingPointScalar; + NBL_CONSTEXPR_STATIC_INLINE bool value = false; }; template struct is_emulating_integral_scalar { - NBL_CONSTEXPR_STATIC_INLINE bool value = IntegralScalar; + NBL_CONSTEXPR_STATIC_INLINE bool value = false; }; } //! Floating point types are native floating point types or types that imitate native floating point types (for example emulated_float64_t) template -NBL_BOOL_CONCEPT FloatingPointLikeScalar = impl::is_emulating_floating_point_scalar::value; +NBL_BOOL_CONCEPT FloatingPointLikeScalar = FloatingPointScalar || impl::is_emulating_floating_point_scalar::value; //! Integral-like types are native integral types or types that imitate native integral types (for example emulated_uint64_t) template -NBL_BOOL_CONCEPT IntegralLikeScalar = impl::is_emulating_integral_scalar::value; +NBL_BOOL_CONCEPT IntegralLikeScalar = IntegralScalar || impl::is_emulating_integral_scalar::value; } } diff --git a/include/nbl/builtin/hlsl/morton.hlsl b/include/nbl/builtin/hlsl/morton.hlsl index 9ee59b7e78..67e83f6169 100644 --- a/include/nbl/builtin/hlsl/morton.hlsl +++ b/include/nbl/builtin/hlsl/morton.hlsl @@ -28,7 +28,7 @@ NBL_BOOL_CONCEPT Dimension = 1 < D && D < 5; template && concepts::Scalar) NBL_CONSTEXPR_FUNC bool verifyAnyBitIntegral(T val) { - NBL_CONSTEXPR_FUNC_SCOPE_VAR T mask = (~((T(1) << Bits) - 1)); + NBL_CONSTEXPR_FUNC_SCOPE_VAR T mask = ~((T(1) << Bits) - 1); const bool allZero = ((val & mask) == 0); NBL_IF_CONSTEXPR(is_signed_v) { From 341d6cd033969efcd214a2bb495d1612a591eb14 Mon Sep 17 00:00:00 2001 From: kevyuu Date: Thu, 4 Dec 2025 00:10:17 +0700 Subject: [PATCH 078/157] Move storage_t to common_inc --- include/nbl/builtin/hlsl/emulated/int64_common_member_inc.hlsl | 2 +- include/nbl/builtin/hlsl/emulated/int64_t.hlsl | 2 -- 2 files changed, 1 insertion(+), 3 deletions(-) diff --git a/include/nbl/builtin/hlsl/emulated/int64_common_member_inc.hlsl b/include/nbl/builtin/hlsl/emulated/int64_common_member_inc.hlsl index 2dd7bafa41..3818814a49 100644 --- a/include/nbl/builtin/hlsl/emulated/int64_common_member_inc.hlsl +++ b/include/nbl/builtin/hlsl/emulated/int64_common_member_inc.hlsl @@ -1,4 +1,4 @@ - +using storage_t = vector; storage_t data; /** diff --git a/include/nbl/builtin/hlsl/emulated/int64_t.hlsl b/include/nbl/builtin/hlsl/emulated/int64_t.hlsl index 0b890fb2b2..b44709bc01 100644 --- a/include/nbl/builtin/hlsl/emulated/int64_t.hlsl +++ b/include/nbl/builtin/hlsl/emulated/int64_t.hlsl @@ -19,7 +19,6 @@ struct emulated_int64_t; struct emulated_uint64_t { - using storage_t = vector; using this_t = emulated_uint64_t; NBL_CONSTEXPR_STATIC_INLINE bool Signed = false; @@ -38,7 +37,6 @@ struct emulated_uint64_t struct emulated_int64_t { - using storage_t = vector; using this_t = emulated_int64_t; NBL_CONSTEXPR_STATIC_INLINE bool Signed = true; From 2fd2cbaaedea4d20233d0869d8f2e8125398b46f Mon Sep 17 00:00:00 2001 From: kevyuu Date: Thu, 4 Dec 2025 00:10:49 +0700 Subject: [PATCH 079/157] Rename ImitationIntegral64Scalar to EmulatedIntegral64Scalar --- .../nbl/builtin/hlsl/emulated/int64_t.hlsl | 42 +++++++++---------- 1 file changed, 21 insertions(+), 21 deletions(-) diff --git a/include/nbl/builtin/hlsl/emulated/int64_t.hlsl b/include/nbl/builtin/hlsl/emulated/int64_t.hlsl index b44709bc01..4c950859e6 100644 --- a/include/nbl/builtin/hlsl/emulated/int64_t.hlsl +++ b/include/nbl/builtin/hlsl/emulated/int64_t.hlsl @@ -68,7 +68,7 @@ namespace concepts { template -NBL_BOOL_CONCEPT ImitationIntegral64Scalar = same_as || same_as; +NBL_BOOL_CONCEPT EmulatedIntegralScalar64 = same_as || same_as; namespace impl { @@ -93,8 +93,8 @@ struct is_emulating_integral_scalar namespace impl { -template NBL_PARTIAL_REQ_TOP(concepts::ImitationIntegral64Scalar && concepts::ImitationIntegral64Scalar && !concepts::same_as) -struct static_cast_helper && concepts::ImitationIntegral64Scalar && !concepts::same_as) > +template NBL_PARTIAL_REQ_TOP(concepts::EmulatedIntegralScalar64 && concepts::EmulatedIntegralScalar64 && !concepts::same_as) +struct static_cast_helper && concepts::EmulatedIntegralScalar64 && !concepts::same_as) > { NBL_CONSTEXPR_STATIC To cast(NBL_CONST_REF_ARG(From) other) @@ -105,8 +105,8 @@ struct static_cast_helper NBL_PARTIAL_REQ_TOP(concepts::IntegralScalar && (sizeof(To) <= sizeof(uint32_t)) && concepts::ImitationIntegral64Scalar) -struct static_cast_helper && (sizeof(To) <= sizeof(uint32_t)) && concepts::ImitationIntegral64Scalar) > +template NBL_PARTIAL_REQ_TOP(concepts::IntegralScalar && (sizeof(To) <= sizeof(uint32_t)) && concepts::EmulatedIntegralScalar64) +struct static_cast_helper && (sizeof(To) <= sizeof(uint32_t)) && concepts::EmulatedIntegralScalar64) > { // Return only the lowest bits NBL_CONSTEXPR_STATIC To cast(NBL_CONST_REF_ARG(From) val) @@ -115,8 +115,8 @@ struct static_cast_helper NBL_PARTIAL_REQ_TOP(concepts::IntegralScalar && (sizeof(To) > sizeof(uint32_t)) && concepts::ImitationIntegral64Scalar) -struct static_cast_helper && (sizeof(To) > sizeof(uint32_t)) && concepts::ImitationIntegral64Scalar) > +template NBL_PARTIAL_REQ_TOP(concepts::IntegralScalar && (sizeof(To) > sizeof(uint32_t)) && concepts::EmulatedIntegralScalar64) +struct static_cast_helper && (sizeof(To) > sizeof(uint32_t)) && concepts::EmulatedIntegralScalar64) > { NBL_CONSTEXPR_STATIC To cast(NBL_CONST_REF_ARG(From) val) { @@ -124,8 +124,8 @@ struct static_cast_helper NBL_PARTIAL_REQ_TOP(concepts::IntegralScalar && (sizeof(From) <= sizeof(uint32_t)) && concepts::ImitationIntegral64Scalar) -struct static_cast_helper && (sizeof(From) <= sizeof(uint32_t)) && concepts::ImitationIntegral64Scalar) > +template NBL_PARTIAL_REQ_TOP(concepts::IntegralScalar && (sizeof(From) <= sizeof(uint32_t)) && concepts::EmulatedIntegralScalar64) +struct static_cast_helper && (sizeof(From) <= sizeof(uint32_t)) && concepts::EmulatedIntegralScalar64) > { // Set only lower bits NBL_CONSTEXPR_STATIC To cast(NBL_CONST_REF_ARG(From) i) @@ -134,8 +134,8 @@ struct static_cast_helper NBL_PARTIAL_REQ_TOP(concepts::IntegralScalar && (sizeof(From) > sizeof(uint32_t)) && concepts::ImitationIntegral64Scalar) -struct static_cast_helper && (sizeof(From) > sizeof(uint32_t)) && concepts::ImitationIntegral64Scalar) > +template NBL_PARTIAL_REQ_TOP(concepts::IntegralScalar && (sizeof(From) > sizeof(uint32_t)) && concepts::EmulatedIntegralScalar64) +struct static_cast_helper && (sizeof(From) > sizeof(uint32_t)) && concepts::EmulatedIntegralScalar64) > { NBL_CONSTEXPR_STATIC To cast(NBL_CONST_REF_ARG(From) i) { @@ -188,8 +188,8 @@ constexpr emulated_uint64_t::operator I() const noexcept // ---------------------- Functional operators ------------------------ -template NBL_PARTIAL_REQ_TOP(concepts::ImitationIntegral64Scalar) -struct left_shift_operator) > +template NBL_PARTIAL_REQ_TOP(concepts::EmulatedIntegralScalar64) +struct left_shift_operator) > { using type_t = T; NBL_CONSTEXPR_STATIC uint32_t ComponentBitWidth = uint32_t(8 * sizeof(uint32_t)); @@ -302,8 +302,8 @@ constexpr inline emulated_int64_t emulated_int64_t::operator>>(uint32_t bits) co // Specializations of the structs found in functional.hlsl // These all have to be specialized because of the identity that can't be initialized inside the struct definition -template NBL_PARTIAL_REQ_TOP(concepts::ImitationIntegral64Scalar) -struct plus) > +template NBL_PARTIAL_REQ_TOP(concepts::EmulatedIntegralScalar64) +struct plus) > { using type_t = T; @@ -315,8 +315,8 @@ struct plus) > const static type_t identity; }; -template NBL_PARTIAL_REQ_TOP(concepts::ImitationIntegral64Scalar) -struct minus) > +template NBL_PARTIAL_REQ_TOP(concepts::EmulatedIntegralScalar64) +struct minus) > { using type_t = T; @@ -340,8 +340,8 @@ NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR emulated_int64_t minus::id // --------------------------------- Compound assignment operators ------------------------------------------ // Specializations of the structs found in functional.hlsl -template NBL_PARTIAL_REQ_TOP(concepts::ImitationIntegral64Scalar) -struct plus_assign) > +template NBL_PARTIAL_REQ_TOP(concepts::EmulatedIntegralScalar64) +struct plus_assign) > { using type_t = T; using base_t = plus; @@ -354,8 +354,8 @@ struct plus_assign) const static type_t identity; }; -template NBL_PARTIAL_REQ_TOP(concepts::ImitationIntegral64Scalar) -struct minus_assign) > +template NBL_PARTIAL_REQ_TOP(concepts::EmulatedIntegralScalar64) +struct minus_assign) > { using type_t = T; using base_t = minus; From 1255d1c30e9f2f1b36c3f71bdad1ff26b5488038 Mon Sep 17 00:00:00 2001 From: kevyuu Date: Thu, 4 Dec 2025 00:21:36 +0700 Subject: [PATCH 080/157] Fix extent and remove duplicated extent specialization --- include/nbl/builtin/hlsl/type_traits.hlsl | 12 +++--------- 1 file changed, 3 insertions(+), 9 deletions(-) diff --git a/include/nbl/builtin/hlsl/type_traits.hlsl b/include/nbl/builtin/hlsl/type_traits.hlsl index b682b8da8b..257a753129 100644 --- a/include/nbl/builtin/hlsl/type_traits.hlsl +++ b/include/nbl/builtin/hlsl/type_traits.hlsl @@ -732,11 +732,11 @@ struct extent : integral_constant::value> {}; template struct extent : integral_constant::value> {}; -template -struct extent, 0> : integral_constant {}; +template +struct extent, I> : extent {}; template -struct extent, I> : integral_constant::value> {}; +struct extent, I> : extent {}; // Template Variables @@ -855,12 +855,6 @@ struct float_of_size<8> template using float_of_size_t = typename float_of_size::type; -template -struct extent, 0> : integral_constant {}; - -template -struct extent, 1> : integral_constant {}; - } } From 527129fa79399008977dfe730cf2d2ed11873fd1 Mon Sep 17 00:00:00 2001 From: kevyuu Date: Thu, 4 Dec 2025 00:40:55 +0700 Subject: [PATCH 081/157] Remove redundant extent --- include/nbl/builtin/hlsl/concepts/vector.hlsl | 4 ---- include/nbl/builtin/hlsl/emulated/vector_t.hlsl | 2 ++ 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/include/nbl/builtin/hlsl/concepts/vector.hlsl b/include/nbl/builtin/hlsl/concepts/vector.hlsl index 3ea3199951..f132531cb9 100644 --- a/include/nbl/builtin/hlsl/concepts/vector.hlsl +++ b/include/nbl/builtin/hlsl/concepts/vector.hlsl @@ -46,10 +46,6 @@ NBL_BOOL_CONCEPT SignedIntVectorial = concepts::Vectorial && concepts::Signed } -template -NBL_PARTIAL_REQ_TOP(concepts::Vectorial) -struct extent) > : integral_constant::Dimension> {}; - } } #endif \ No newline at end of file diff --git a/include/nbl/builtin/hlsl/emulated/vector_t.hlsl b/include/nbl/builtin/hlsl/emulated/vector_t.hlsl index f153fb1062..82a1360b49 100644 --- a/include/nbl/builtin/hlsl/emulated/vector_t.hlsl +++ b/include/nbl/builtin/hlsl/emulated/vector_t.hlsl @@ -627,6 +627,8 @@ NBL_EMULATED_VEC_TRUNCATION(4, 4) } //namespace impl +template +struct extent, I> : extent {}; } } #endif \ No newline at end of file From ed696efd6ea9e715591c2b2d7e98e7f0f1a1eada Mon Sep 17 00:00:00 2001 From: kevyuu Date: Thu, 4 Dec 2025 01:24:34 +0700 Subject: [PATCH 082/157] Fix unary_minus_operator --- .../nbl/builtin/hlsl/emulated/int64_t.hlsl | 27 ++++--------------- include/nbl/builtin/hlsl/functional.hlsl | 12 ++++++++- 2 files changed, 16 insertions(+), 23 deletions(-) diff --git a/include/nbl/builtin/hlsl/emulated/int64_t.hlsl b/include/nbl/builtin/hlsl/emulated/int64_t.hlsl index 4c950859e6..30c23d8693 100644 --- a/include/nbl/builtin/hlsl/emulated/int64_t.hlsl +++ b/include/nbl/builtin/hlsl/emulated/int64_t.hlsl @@ -51,7 +51,11 @@ struct emulated_int64_t constexpr explicit emulated_int64_t(const emulated_uint64_t& other); #endif - NBL_CONSTEXPR_FUNC emulated_int64_t operator-() NBL_CONST_MEMBER_FUNC; + NBL_CONSTEXPR_FUNC emulated_int64_t operator-() NBL_CONST_MEMBER_FUNC + { + storage_t inverted = ~data; + return create(_static_cast(inverted)) + _static_cast(1); + } }; @@ -377,27 +381,6 @@ NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR emulated_uint64_t minus_assign NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR emulated_int64_t minus_assign::identity = minus::identity; -// --------------------------------- Unary operators ------------------------------------------ -// Specializations of the structs found in functional.hlsl -template<> -struct unary_minus_operator -{ - using type_t = emulated_int64_t; - - NBL_CONSTEXPR_FUNC type_t operator()(NBL_CONST_REF_ARG(type_t) operand) - { - using storage_t = type_t::storage_t; - storage_t inverted = ~operand.data; - return type_t::create(_static_cast(inverted)) + _static_cast(1); - } -}; - -NBL_CONSTEXPR_INLINE_FUNC emulated_int64_t emulated_int64_t::operator-() NBL_CONST_MEMBER_FUNC -{ - unary_minus_operator unaryMinus; - return unaryMinus(NBL_DEREF_THIS); -} - } //namespace nbl } //namespace hlsl diff --git a/include/nbl/builtin/hlsl/functional.hlsl b/include/nbl/builtin/hlsl/functional.hlsl index dc718e5928..4d5889fe05 100644 --- a/include/nbl/builtin/hlsl/functional.hlsl +++ b/include/nbl/builtin/hlsl/functional.hlsl @@ -495,7 +495,17 @@ struct unary_minus_operator NBL_CONSTEXPR_FUNC T operator()(NBL_CONST_REF_ARG(T) operand) { - return -operand; + return operand.operator-(); + } +}; + +template NBL_PARTIAL_REQ_TOP(is_fundamental_v) +struct unary_minus_operator) > +{ + using type_t = T; + NBL_CONSTEXPR_FUNC T operator()(const T operand) + { + return -operand; } }; From 4da1fb8ea99a06b39b4bb2c85c534bf538c3e78b Mon Sep 17 00:00:00 2001 From: kevyuu Date: Thu, 4 Dec 2025 01:24:58 +0700 Subject: [PATCH 083/157] Fix redundant extent specialization --- include/nbl/builtin/hlsl/emulated/vector_t.hlsl | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/include/nbl/builtin/hlsl/emulated/vector_t.hlsl b/include/nbl/builtin/hlsl/emulated/vector_t.hlsl index 82a1360b49..4eb8b7bf06 100644 --- a/include/nbl/builtin/hlsl/emulated/vector_t.hlsl +++ b/include/nbl/builtin/hlsl/emulated/vector_t.hlsl @@ -491,6 +491,15 @@ DEFINE_SCALAR_OF_SPECIALIZATION(3) DEFINE_SCALAR_OF_SPECIALIZATION(4) #undef DEFINE_SCALAR_OF_SPECIALIZATION +#define DEFINE_EXTENT_SPECIALIZATION(DIMENSION)\ +template\ +struct extent, I> : extent {}; + +DEFINE_EXTENT_SPECIALIZATION(2) +DEFINE_EXTENT_SPECIALIZATION(3) +DEFINE_EXTENT_SPECIALIZATION(4) +#undef DEFINE_EXTENT_SPECIALIZATION + namespace impl { template @@ -627,8 +636,6 @@ NBL_EMULATED_VEC_TRUNCATION(4, 4) } //namespace impl -template -struct extent, I> : extent {}; } } #endif \ No newline at end of file From 14636d30b1f08c0f87a25dea12e2c900ad726981 Mon Sep 17 00:00:00 2001 From: Karim Mohamed Date: Wed, 3 Dec 2025 23:29:35 +0300 Subject: [PATCH 084/157] update examples submodule --- examples_tests | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples_tests b/examples_tests index 1508702f27..f18160276e 160000 --- a/examples_tests +++ b/examples_tests @@ -1 +1 @@ -Subproject commit 1508702f27dbd4c7fa9642e26b1047b0cd8889c9 +Subproject commit f18160276e78f860f64c45111c874e3351b44ffb From 402b8231a3c53090cfa5db751ed63fd2e328473f Mon Sep 17 00:00:00 2001 From: kevyuu Date: Thu, 4 Dec 2025 07:40:54 +0700 Subject: [PATCH 085/157] Replace [[unroll]] with NBL_UNROLL --- include/nbl/builtin/hlsl/morton.hlsl | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/include/nbl/builtin/hlsl/morton.hlsl b/include/nbl/builtin/hlsl/morton.hlsl index 67e83f6169..d7a781fad9 100644 --- a/include/nbl/builtin/hlsl/morton.hlsl +++ b/include/nbl/builtin/hlsl/morton.hlsl @@ -42,7 +42,7 @@ template vec) { array_get, T> getter; - [[unroll]] + NBL_UNROLL for (uint16_t i = 0; i < Dim; i++) if (!verifyAnyBitIntegral(getter(vec, i))) return false; return true; @@ -178,7 +178,7 @@ struct Transcoder array_get, encode_t> getter; encode_t encoded = getter(interleaveShifted, 0); - [[unroll]] + NBL_UNROLL for (uint16_t i = 1; i < Dim; i++) encoded = encoded | getter(interleaveShifted, i); @@ -196,7 +196,7 @@ struct Transcoder portable_vector_t decoded; array_set, encode_t> setter; // Write initial values into decoded - [[unroll]] + NBL_UNROLL for (uint16_t i = 0; i < Dim; i++) setter(decoded, i, encodedRightShift(encodedValue, i)); @@ -363,7 +363,7 @@ struct code create(NBL_CONST_REF_ARG(vector) cartesian) { this_t retVal; - NBL_ASSERT((impl::verifyAnyBitIntegralVec(cartesian) == true)); + NBL_ASSERT((impl::verifyAnyBitIntegralVec(cartesian))); using decode_t = typename transcoder_t::decode_t; retVal.value = transcoder_t::encode(_static_cast(cartesian)); return retVal; @@ -466,7 +466,7 @@ struct code array_get, storage_t> getter; this_t retVal; retVal.value = getter(interleaveShiftedResult, 0); - [[unroll]] + NBL_UNROLL for (uint16_t i = 1; i < D; i++) retVal.value = retVal.value | getter(interleaveShiftedResult, i); return retVal; @@ -486,7 +486,7 @@ struct code array_get, storage_t> getter; this_t retVal; retVal.value = getter(interleaveShiftedResult, 0); - [[unroll]] + NBL_UNROLL for (uint16_t i = 1; i < D; i++) retVal.value = retVal.value | getter(interleaveShiftedResult, i); From 99b25ffd97c9b5ace8213d5c5ca334361ea3ff75 Mon Sep 17 00:00:00 2001 From: kevyuu Date: Thu, 4 Dec 2025 08:18:24 +0700 Subject: [PATCH 086/157] Allow promote to work between emulated type and non emulated type --- include/nbl/builtin/hlsl/cpp_compat/promote.hlsl | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/include/nbl/builtin/hlsl/cpp_compat/promote.hlsl b/include/nbl/builtin/hlsl/cpp_compat/promote.hlsl index 9f2b58047f..6a8476e644 100644 --- a/include/nbl/builtin/hlsl/cpp_compat/promote.hlsl +++ b/include/nbl/builtin/hlsl/cpp_compat/promote.hlsl @@ -21,8 +21,7 @@ struct Promote } }; -// TODO(kevinyu): Should we enable truncation from uint64_t to emulated_vector? -template NBL_PARTIAL_REQ_TOP(concepts::Vectorial && is_scalar_v && is_same_v::scalar_type, From>) +template NBL_PARTIAL_REQ_TOP(concepts::Vectorial && (concepts::IntegralLikeScalar || concepts::FloatingPointLikeScalar) && is_same_v::scalar_type, From>) struct Promote && is_scalar_v && is_same_v::scalar_type, From>) > { NBL_CONSTEXPR_FUNC To operator()(const From v) From 37d5c5a776b6667d08262374e3ee849e649942e6 Mon Sep 17 00:00:00 2001 From: keptsecret Date: Thu, 4 Dec 2025 11:17:02 +0700 Subject: [PATCH 087/157] removed temp fix for mix_helper require --- include/nbl/builtin/hlsl/cpp_compat/impl/intrinsics_impl.hlsl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/nbl/builtin/hlsl/cpp_compat/impl/intrinsics_impl.hlsl b/include/nbl/builtin/hlsl/cpp_compat/impl/intrinsics_impl.hlsl index 0c595bb0e2..cd89ce45d1 100644 --- a/include/nbl/builtin/hlsl/cpp_compat/impl/intrinsics_impl.hlsl +++ b/include/nbl/builtin/hlsl/cpp_compat/impl/intrinsics_impl.hlsl @@ -256,8 +256,8 @@ struct mix_helper) > }; template -NBL_PARTIAL_REQ_TOP(spirv::SelectIsCallable && concepts::Boolean) -struct mix_helper && concepts::Boolean) > +NBL_PARTIAL_REQ_TOP(spirv::SelectIsCallable) +struct mix_helper) > { using return_t = conditional_t, vector::scalar_type, vector_traits::Dimension>, T>; // for a component of a that is false, the corresponding component of x is returned From 70a88fa975b91bad0d141e30b9b5ee9476c59f29 Mon Sep 17 00:00:00 2001 From: kevyuu Date: Thu, 4 Dec 2025 13:54:11 +0700 Subject: [PATCH 088/157] Refactor unary operator in hlsl functionals --- include/nbl/builtin/hlsl/functional.hlsl | 82 +++++++++--------------- 1 file changed, 29 insertions(+), 53 deletions(-) diff --git a/include/nbl/builtin/hlsl/functional.hlsl b/include/nbl/builtin/hlsl/functional.hlsl index 4d5889fe05..da416a538f 100644 --- a/include/nbl/builtin/hlsl/functional.hlsl +++ b/include/nbl/builtin/hlsl/functional.hlsl @@ -134,41 +134,6 @@ ALIAS_STD(divides,/) NBL_CONSTEXPR_STATIC_INLINE T identity = T(1); }; -#ifndef __HLSL_VERSION - -template -struct bit_not : std::bit_not -{ - using type_t = T; -}; - -#else - -template -struct bit_not -{ - using type_t = T; - - T operator()(NBL_CONST_REF_ARG(T) operand) - { - return ~operand; - } -}; - -// The default version above only works for fundamental scalars, vectors and matrices. This is because you can't call `~x` unless `x` is one of the former. -// Similarly, calling `x.operator~()` is not valid for the aforementioned, and only for types overriding this operator. So, we need a specialization. -template NBL_PARTIAL_REQ_TOP(!(concepts::Scalar || concepts::Vector || concepts::Matrix)) -struct bit_not || concepts::Vector || concepts::Matrix)) > -{ - using type_t = T; - - T operator()(NBL_CONST_REF_ARG(T) operand) - { - return operand.operator~(); - } -}; - -#endif ALIAS_STD(equal_to, ==) }; ALIAS_STD(not_equal_to, !=) }; @@ -488,27 +453,38 @@ struct logical_right_shift_operator }; // ----------------------------------------------------------------- UNARY OPERATORS -------------------------------------------------------------------- -template -struct unary_minus_operator -{ - using type_t = T; - - NBL_CONSTEXPR_FUNC T operator()(NBL_CONST_REF_ARG(T) operand) - { - return operand.operator-(); - } +#ifndef __HLSL_VERSION +#define NBL_UNARY_OP_SPECIALIZATION(NAME, OP) template \ +struct NAME : std::NAME { \ + using type_t = T; \ }; +#else +#define NBL_UNARY_OP_SPECIALIZATION(NAME, OP) template \ +struct NAME \ +{ \ + using type_t = T; \ + NBL_CONSTEXPR_FUNC T operator()(NBL_CONST_REF_ARG(T) operand) \ + { \ + return operand.operator OP(); \ + } \ +}; \ +template NBL_PARTIAL_REQ_TOP(concepts::Scalar || concepts::Vector || concepts::Matrix ) \ +struct NAME || concepts::Vector || concepts::Matrix ) > \ +{ \ + using type_t = T; \ + NBL_CONSTEXPR_FUNC T operator()(const T operand) \ + { \ + return (OP operand); \ + } \ +}; +#endif + +NBL_UNARY_OP_SPECIALIZATION(bit_not, ~) +NBL_UNARY_OP_SPECIALIZATION(negate, -) -template NBL_PARTIAL_REQ_TOP(is_fundamental_v) -struct unary_minus_operator) > -{ - using type_t = T; - NBL_CONSTEXPR_FUNC T operator()(const T operand) - { - return -operand; - } -}; + +#endif } //namespace nbl } //namespace hlsl From ded5d8fcd8aa348b9934f02a786b740d68c5b7a7 Mon Sep 17 00:00:00 2001 From: kevyuu Date: Thu, 4 Dec 2025 15:19:21 +0700 Subject: [PATCH 089/157] Fix misplaced #endif in functional.hlsl --- include/nbl/builtin/hlsl/functional.hlsl | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/include/nbl/builtin/hlsl/functional.hlsl b/include/nbl/builtin/hlsl/functional.hlsl index da416a538f..757ad7294d 100644 --- a/include/nbl/builtin/hlsl/functional.hlsl +++ b/include/nbl/builtin/hlsl/functional.hlsl @@ -482,10 +482,7 @@ struct NAME || concepts::Vector || NBL_UNARY_OP_SPECIALIZATION(bit_not, ~) NBL_UNARY_OP_SPECIALIZATION(negate, -) - - -#endif } //namespace nbl } //namespace hlsl -#endif \ No newline at end of file +#endif From 17d07177d87f38e7beff6cf8881e548ee670aa29 Mon Sep 17 00:00:00 2001 From: kevyuu Date: Thu, 4 Dec 2025 17:50:22 +0700 Subject: [PATCH 090/157] Fix ternary_operation --- .../nbl/builtin/hlsl/emulated/int64_t.hlsl | 10 +++--- include/nbl/builtin/hlsl/functional.hlsl | 33 +++++++++++++++---- 2 files changed, 30 insertions(+), 13 deletions(-) diff --git a/include/nbl/builtin/hlsl/emulated/int64_t.hlsl b/include/nbl/builtin/hlsl/emulated/int64_t.hlsl index 30c23d8693..1324998d1a 100644 --- a/include/nbl/builtin/hlsl/emulated/int64_t.hlsl +++ b/include/nbl/builtin/hlsl/emulated/int64_t.hlsl @@ -208,8 +208,7 @@ struct left_shift_operator(0, operand.__getLSB() << shift) : vector(operand.__getLSB() << bits, (operand.__getMSB() << bits) | (operand.__getLSB() >> shift))); - ternary_operator ternary; - return ternary(bool(bits), shifted, operand); + return select(bool(bits), shifted, operand); } // If `_bits > 63` or `_bits < 0` the result is undefined @@ -235,8 +234,8 @@ struct arithmetic_right_shift_operator const uint32_t shift = bigShift ? bits - ComponentBitWidth : ComponentBitWidth - bits; const type_t shifted = type_t::create(bigShift ? vector(operand.__getMSB() >> shift, 0) : vector((operand.__getMSB() << shift) | (operand.__getLSB() >> bits), operand.__getMSB() >> bits)); - ternary_operator ternary; - return ternary(bool(bits), shifted, operand); + + return select(bool(bits), shifted, operand); } // If `_bits > 63` the result is undefined @@ -262,8 +261,7 @@ struct arithmetic_right_shift_operator const uint32_t shift = bigShift ? bits - ComponentBitWidth : ComponentBitWidth - bits; const type_t shifted = type_t::create(bigShift ? vector(uint32_t(int32_t(operand.__getMSB()) >> shift), int32_t(operand.__getMSB()) < 0 ? ~uint32_t(0) : uint32_t(0)) : vector((operand.__getMSB() << shift) | (operand.__getLSB() >> bits), uint32_t(int32_t(operand.__getMSB()) >> bits))); - ternary_operator ternary; - return ternary(bool(bits), shifted, operand); + return select(bool(bits), shifted, operand); } // If `_bits > 63` or `_bits < 0` the result is undefined diff --git a/include/nbl/builtin/hlsl/functional.hlsl b/include/nbl/builtin/hlsl/functional.hlsl index 757ad7294d..7531c5cdb9 100644 --- a/include/nbl/builtin/hlsl/functional.hlsl +++ b/include/nbl/builtin/hlsl/functional.hlsl @@ -235,16 +235,35 @@ struct maximum NBL_CONSTEXPR_STATIC_INLINE T identity = numeric_limits::lowest; // TODO: `all_components` }; -template +#ifndef __HLSL_VERSION +template requires(is_same_v, std::invoke_result_t()> ) struct ternary_operator { - using type_t = T; - - NBL_CONSTEXPR_FUNC T operator()(NBL_CONST_REF_ARG(bool) condition, NBL_CONST_REF_ARG(T) lhs, NBL_CONST_REF_ARG(T) rhs) - { - return select(condition, lhs, rhs); - } + using type_t = std::invoke_result_t; + + constexpr inline type_t operator()(const bool condition, const F1& lhs, const F2& rhs) + { + if (condition) + return std::invoke(lhs); + else + return std::invoke(rhs); + } }; +#else +template()()),decltype(experimental::declval()())> ) +struct ternary_operator +{ + using type_t = decltype(experimental::declval().operator()); + + NBL_CONSTEXPR_FUNC type_t operator()(const bool condition, NBL_CONST_REF_ARG(F1) lhs, NBL_CONST_REF_ARG(F2) rhs) + { + if (condition) + return lhs(); + else + return rhs(); + } +}; +#endif // ----------------------------------------------------------------- SHIFT OPERATORS -------------------------------------------------------------------- From 791b2b917d785f51616a13ea24510a5ae30b602e Mon Sep 17 00:00:00 2001 From: kevyuu Date: Thu, 4 Dec 2025 17:59:08 +0700 Subject: [PATCH 091/157] Improve some comment --- include/nbl/builtin/hlsl/morton.hlsl | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/include/nbl/builtin/hlsl/morton.hlsl b/include/nbl/builtin/hlsl/morton.hlsl index d7a781fad9..e72ec9a76b 100644 --- a/include/nbl/builtin/hlsl/morton.hlsl +++ b/include/nbl/builtin/hlsl/morton.hlsl @@ -59,9 +59,7 @@ struct coding_mask; template NBL_CONSTEXPR T coding_mask_v = _static_cast(coding_mask::value); -// It's a complete cointoss whether template variables work or not, since it's a C++14 feature (not supported in HLSL2021). Most of the ones we use in Nabla work, -// but this one will only work for some parameters and not for others. Therefore, this was made into a macro to inline where used - +// constexpr vector is not supported since it is not a fundamental type, which means it cannot be stored or leaked outside of constexpr context, it can only exist transiently. So the only way to return vector is to make the function consteval. Thus, we use macro to inline where it is used. #define NBL_MORTON_INTERLEAVE_MASKS(STORAGE_T, DIM, BITS, NAMESPACE_PREFIX) _static_cast >(\ truncate >(\ vector(NAMESPACE_PREFIX coding_mask_v< DIM, BITS, 0>,\ From 8f548f6ba32baba77c76996124ad13a1680d78e3 Mon Sep 17 00:00:00 2001 From: Przemog1 Date: Thu, 4 Dec 2025 13:23:42 +0100 Subject: [PATCH 092/157] Updated DXC --- 3rdparty/dxc/dxc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/3rdparty/dxc/dxc b/3rdparty/dxc/dxc index 1e5414bcc2..ecd3f93521 160000 --- a/3rdparty/dxc/dxc +++ b/3rdparty/dxc/dxc @@ -1 +1 @@ -Subproject commit 1e5414bcc21b002d795f97075dff63e387fc668f +Subproject commit ecd3f93521f1aceabff64b14857f47f9a32c9958 From ac2070e0998a977d87ac524412f63efa6f560ea4 Mon Sep 17 00:00:00 2001 From: kevyuu Date: Thu, 4 Dec 2025 19:38:54 +0700 Subject: [PATCH 093/157] Remove NBL_ASSERT --- include/nbl/builtin/hlsl/cpp_compat/basic.h | 2 -- include/nbl/builtin/hlsl/morton.hlsl | 1 + 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/include/nbl/builtin/hlsl/cpp_compat/basic.h b/include/nbl/builtin/hlsl/cpp_compat/basic.h index 3ca499c567..a5715efa15 100644 --- a/include/nbl/builtin/hlsl/cpp_compat/basic.h +++ b/include/nbl/builtin/hlsl/cpp_compat/basic.h @@ -21,7 +21,6 @@ #define NBL_CONSTEXPR_OOL_MEMBER constexpr #define NBL_CONSTEXPR_INLINE_OOL_MEMBER constexpr inline #define NBL_IF_CONSTEXPR(...) if constexpr (__VA_ARGS__) -#define NBL_ASSERT(...) assert(__VA_ARGS__) namespace nbl::hlsl { @@ -58,7 +57,6 @@ namespace nbl::hlsl #define NBL_CONSTEXPR_OOL_MEMBER const #define NBL_CONSTEXPR_INLINE_OOL_MEMBER const #define NBL_IF_CONSTEXPR(...) if (__VA_ARGS__) -#define NBL_ASSERT(...) namespace nbl { diff --git a/include/nbl/builtin/hlsl/morton.hlsl b/include/nbl/builtin/hlsl/morton.hlsl index e72ec9a76b..08b2b1ccfb 100644 --- a/include/nbl/builtin/hlsl/morton.hlsl +++ b/include/nbl/builtin/hlsl/morton.hlsl @@ -362,6 +362,7 @@ struct code { this_t retVal; NBL_ASSERT((impl::verifyAnyBitIntegralVec(cartesian))); + assert((impl::verifyAnyBitIntegralVec(cartesian))); using decode_t = typename transcoder_t::decode_t; retVal.value = transcoder_t::encode(_static_cast(cartesian)); return retVal; From a4dabdf9f267c93da1a340aa51600ed9443004e2 Mon Sep 17 00:00:00 2001 From: kevyuu Date: Thu, 4 Dec 2025 20:45:21 +0700 Subject: [PATCH 094/157] Simplify mix helper by using select_helper in some specialization --- .../hlsl/cpp_compat/impl/intrinsics_impl.hlsl | 50 ++----------------- 1 file changed, 3 insertions(+), 47 deletions(-) diff --git a/include/nbl/builtin/hlsl/cpp_compat/impl/intrinsics_impl.hlsl b/include/nbl/builtin/hlsl/cpp_compat/impl/intrinsics_impl.hlsl index 7850fd7cf3..67a9f67d8f 100644 --- a/include/nbl/builtin/hlsl/cpp_compat/impl/intrinsics_impl.hlsl +++ b/include/nbl/builtin/hlsl/cpp_compat/impl/intrinsics_impl.hlsl @@ -268,20 +268,6 @@ struct mix_helper) > } }; -template -NBL_PARTIAL_REQ_TOP(spirv::SelectIsCallable) -struct mix_helper) > -{ - using return_t = conditional_t, vector::scalar_type, vector_traits::Dimension>, T>; - // for a component of a that is false, the corresponding component of x is returned - // for a component of a that is true, the corresponding component of y is returned - // so we make sure this is correct when calling the operation - static inline return_t __call(const T x, const T y, const U a) - { - return spirv::select(a, y, x); - } -}; - template NBL_PARTIAL_REQ_TOP(matrix_traits::Square) struct determinant_helper::Square) > { @@ -980,43 +966,13 @@ struct mix_helper -NBL_PARTIAL_REQ_TOP(VECTOR_SPECIALIZATION_CONCEPT && !impl::MixCallingBuiltins && concepts::BooleanScalar) -struct mix_helper && concepts::BooleanScalar) > +template NBL_PARTIAL_REQ_TOP(concepts::Vectorial && concepts::BooleanScalar) +struct mix_helper && concepts::BooleanScalar) > { using return_t = T; static return_t __call(NBL_CONST_REF_ARG(T) x, NBL_CONST_REF_ARG(T) y, NBL_CONST_REF_ARG(U) a) { - using traitsT = hlsl::vector_traits; - array_get getterT; - array_set setter; - - return_t output; - for (uint32_t i = 0; i < traitsT::Dimension; ++i) - setter(output, i, mix_helper::__call(getterT(x, i), getterT(y, i), a)); - - return output; - } -}; - -template -NBL_PARTIAL_REQ_TOP(VECTOR_SPECIALIZATION_CONCEPT && !impl::MixCallingBuiltins && concepts::Boolean && concepts::Vectorial && vector_traits::Dimension == vector_traits::Dimension) -struct mix_helper && concepts::Boolean && concepts::Vectorial && vector_traits::Dimension == vector_traits::Dimension) > -{ - using return_t = T; - static return_t __call(NBL_CONST_REF_ARG(T) x, NBL_CONST_REF_ARG(T) y, NBL_CONST_REF_ARG(U) a) - { - using traitsT = hlsl::vector_traits; - using traitsU = hlsl::vector_traits; - array_get getterT; - array_get getterU; - array_set setter; - - return_t output; - for (uint32_t i = 0; i < traitsT::Dimension; ++i) - setter(output, i, mix_helper::__call(getterT(x, i), getterT(y, i), getterU(a, i))); - - return output; + return select_helper(a, y, x); } }; From 697190629696ff85ead679e38ee293c922380eb0 Mon Sep 17 00:00:00 2001 From: keptsecret Date: Fri, 5 Dec 2025 14:04:25 +0700 Subject: [PATCH 095/157] fixes more nan problems + a few bugs in iridescent fresnel --- examples_tests | 2 +- include/nbl/builtin/hlsl/bxdf/fresnel.hlsl | 71 +++++++++++++--------- 2 files changed, 44 insertions(+), 29 deletions(-) diff --git a/examples_tests b/examples_tests index dd7de7a89c..c0eda4b4ab 160000 --- a/examples_tests +++ b/examples_tests @@ -1 +1 @@ -Subproject commit dd7de7a89cfa5a59970dde4d4744ecf746d77a4a +Subproject commit c0eda4b4ab50f8a7ad56bb32c98088d59c711b46 diff --git a/include/nbl/builtin/hlsl/bxdf/fresnel.hlsl b/include/nbl/builtin/hlsl/bxdf/fresnel.hlsl index f7655e9978..0f2b3486ab 100644 --- a/include/nbl/builtin/hlsl/bxdf/fresnel.hlsl +++ b/include/nbl/builtin/hlsl/bxdf/fresnel.hlsl @@ -508,25 +508,26 @@ struct iridescent_helper using scalar_type = typename vector_traits::scalar_type; using vector_type = T; - // returns reflectance R = (rp, rs), phi is the phase shift for each plane of polarization (p,s) - static void phase_shift(const vector_type orientedEta, const vector_type orientedEtak, const vector_type cosTheta, NBL_REF_ARG(vector_type) phiS, NBL_REF_ARG(vector_type) phiP) + // returns phi, the phase shift for each plane of polarization (p,s) + static void phase_shift(const vector_type ior1, const vector_type ior2, const vector_type iork2, const vector_type cosTheta, NBL_REF_ARG(vector_type) phiS, NBL_REF_ARG(vector_type) phiP) { - vector_type cosTheta_2 = cosTheta * cosTheta; - vector_type sinTheta2 = hlsl::promote(1.0) - cosTheta_2; - const vector_type eta2 = orientedEta*orientedEta; - const vector_type etak2 = orientedEtak*orientedEtak; + const vector_type cosTheta2 = cosTheta * cosTheta; + const vector_type sinTheta2 = hlsl::promote(1.0) - cosTheta2; + const vector_type ior1_2 = ior1*ior1; + const vector_type ior2_2 = ior2*ior2; + const vector_type iork2_2 = iork2*iork2; - vector_type z = eta2 - etak2 - sinTheta2; - vector_type w = hlsl::sqrt(z * z + scalar_type(4.0) * eta2 * eta2 * etak2); - vector_type a2 = (z + w) * hlsl::promote(0.5); - vector_type b2 = (w - z) * hlsl::promote(0.5); - vector_type b = hlsl::sqrt(b2); + const vector_type z = ior2_2 * (hlsl::promote(1.0) - iork2_2) - ior1_2 * sinTheta2; + const vector_type w = hlsl::sqrt(z*z + scalar_type(4.0) * ior2_2 * ior2_2 * iork2_2); + const vector_type a2 = hlsl::max(z + w, hlsl::promote(0.0)) * hlsl::promote(0.5); + const vector_type b2 = hlsl::max(w - z, hlsl::promote(0.0)) * hlsl::promote(0.5); + const vector_type a = hlsl::sqrt(a2); + const vector_type b = hlsl::sqrt(b2); - const vector_type t0 = eta2 + etak2; - const vector_type t1 = t0 * cosTheta_2; - - phiS = hlsl::atan2(hlsl::promote(2.0) * b * cosTheta, a2 + b2 - cosTheta_2); - phiP = hlsl::atan2(hlsl::promote(2.0) * eta2 * cosTheta * (hlsl::promote(2.0) * orientedEtak * hlsl::sqrt(a2) - etak2 * b), t1 - a2 + b2); + phiS = hlsl::atan2(scalar_type(2.0) * ior1 * b * cosTheta, a2 + b2 - ior1_2*cosTheta2); + const vector_type k2_plus_one = hlsl::promote(1.0) + iork2_2; + phiP = hlsl::atan2(scalar_type(2.0) * ior1 * ior2_2 * cosTheta * (scalar_type(2.0) * iork2 * a - (hlsl::promote(1.0) - iork2_2) * b), + ior2_2 * cosTheta2 * k2_plus_one * k2_plus_one - ior1_2*(a2+b2)); } // Evaluation XYZ sensitivity curves in Fourier space @@ -544,7 +545,8 @@ struct iridescent_helper } template - static T __call(const vector_type _D, const vector_type eta12, const vector_type eta23, const vector_type etak23, const scalar_type clampedCosTheta) + static T __call(const vector_type _D, const vector_type ior1, const vector_type ior2, const vector_type ior3, const vector_type iork3, + const vector_type eta12, const vector_type eta23, const vector_type etak23, const scalar_type clampedCosTheta) { const vector_type wavelengths = vector_type(Colorspace::wavelength_R, Colorspace::wavelength_G, Colorspace::wavelength_B); @@ -593,8 +595,8 @@ struct iridescent_helper vector_type I = hlsl::promote(0.0); // Evaluate the phase shift - phase_shift(eta12, hlsl::promote(0.0), hlsl::promote(cosTheta_1), phi21p, phi21s); - phase_shift(eta23, etak23, cosTheta_2, phi23p, phi23s); + phase_shift(ior1, ior2, hlsl::promote(0.0), hlsl::promote(cosTheta_1), phi21s, phi21p); + phase_shift(ior2, ior3, iork3, cosTheta_2, phi23s, phi23p); phi21p = hlsl::promote(numbers::pi) - phi21p; phi21s = hlsl::promote(numbers::pi) - phi21s; @@ -633,7 +635,7 @@ struct iridescent_helper I += Cm*Sm; } - return hlsl::max(colorspace::scRGB::FromXYZ(I), hlsl::promote(0.0)) * hlsl::promote(0.5); + return hlsl::max(colorspace::scRGB::FromXYZ(I) * hlsl::promote(0.5), hlsl::promote(0.0)); } }; @@ -643,11 +645,11 @@ struct iridescent_base using scalar_type = typename vector_traits::scalar_type; using vector_type = T; - vector_type getD() NBL_CONST_MEMBER_FUNC { return D; } - vector_type getEta12() NBL_CONST_MEMBER_FUNC { return eta12; } - vector_type getEta23() NBL_CONST_MEMBER_FUNC { return eta23; } - vector_type D; + vector_type ior1; + vector_type ior2; + vector_type ior3; + vector_type iork3; vector_type eta12; // outside (usually air 1.0) -> thin-film IOR vector_type eta23; // thin-film -> base material IOR }; @@ -679,6 +681,10 @@ struct Iridescent(2.0 * params.Dinc) * params.ior2; + retval.ior1 = params.ior1; + retval.ior2 = params.ior2; + retval.ior3 = params.ior3; + retval.iork3 = params.iork3; retval.eta12 = params.ior2/params.ior1; retval.eta23 = params.ior3/params.ior2; retval.etak23 = params.iork3/params.ior2; @@ -687,7 +693,8 @@ struct Iridescent::template __call(base_type::getD(), base_type::getEta12(), base_type::getEta23(), getEtak23(), clampedCosTheta); + return impl::iridescent_helper::template __call(base_type::D, base_type::ior1, base_type::ior2, base_type::ior3, base_type::iork3, + base_type::eta12, base_type::eta23, getEtak23(), clampedCosTheta); } OrientedEtaRcps getOrientedEtaRcps() NBL_CONST_MEMBER_FUNC @@ -731,6 +738,10 @@ struct Iridescent(2.0 * params.Dinc) * params.ior2; + retval.ior1 = params.ior1; + retval.ior2 = params.ior2; + retval.ior3 = params.ior3; + retval.iork3 = params.iork3; retval.eta12 = params.ior2/params.ior1; retval.eta23 = params.ior3/params.ior2; return retval; @@ -738,7 +749,8 @@ struct Iridescent::template __call(base_type::getD(), base_type::getEta12(), base_type::getEta23(), getEtak23(), clampedCosTheta); + return impl::iridescent_helper::template __call(base_type::D, base_type::ior1, base_type::ior2, base_type::ior3, getEtak23(), + base_type::eta12, base_type::eta23, getEtak23(), clampedCosTheta); } scalar_type getRefractionOrientedEta() NBL_CONST_MEMBER_FUNC { return base_type::eta23[0]; } @@ -755,8 +767,11 @@ struct Iridescent(1.0)/base_type::eta12, flip); - orientedFresnel.eta23 = hlsl::mix(base_type::eta23, hlsl::promote(1.0)/base_type::eta23, flip); + orientedFresnel.ior1 = base_type::ior3; + orientedFresnel.ior2 = base_type::ior2; + orientedFresnel.ior3 = base_type::ior1; + orientedFresnel.eta12 = hlsl::mix(base_type::eta12, hlsl::promote(1.0)/base_type::eta23, flip); + orientedFresnel.eta23 = hlsl::mix(base_type::eta23, hlsl::promote(1.0)/base_type::eta12, flip); return orientedFresnel; } From cb689283e3b3ff3ddf12e4ec16961b3b3293ca9f Mon Sep 17 00:00:00 2001 From: keptsecret Date: Fri, 5 Dec 2025 16:13:32 +0700 Subject: [PATCH 096/157] fixes iridescent fresnel under transmission --- include/nbl/builtin/hlsl/bxdf/fresnel.hlsl | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/include/nbl/builtin/hlsl/bxdf/fresnel.hlsl b/include/nbl/builtin/hlsl/bxdf/fresnel.hlsl index 0f2b3486ab..ad83da5cf7 100644 --- a/include/nbl/builtin/hlsl/bxdf/fresnel.hlsl +++ b/include/nbl/builtin/hlsl/bxdf/fresnel.hlsl @@ -563,7 +563,7 @@ struct iridescent_helper if (hlsl::any(notTIR)) { - Dielectric::__polarized(eta12, hlsl::promote(cosTheta_1), R12p, R12s); + Dielectric::__polarized(eta12 * eta12, hlsl::promote(cosTheta_1), R12p, R12s); // Reflected part by the base // if kappa==0, base material is dielectric @@ -741,7 +741,6 @@ struct Iridescent getOrientedEtaRcps() NBL_CONST_MEMBER_FUNC { OrientedEtaRcps rcpEta; - rcpEta.value = hlsl::promote(1.0) / base_type::eta23[0]; + rcpEta.value = base_type::ior1[0] / base_type::ior3[0]; rcpEta.value2 = rcpEta.value * rcpEta.value; return rcpEta; } @@ -767,9 +766,9 @@ struct Iridescent(1.0)/base_type::eta23, flip); orientedFresnel.eta23 = hlsl::mix(base_type::eta23, hlsl::promote(1.0)/base_type::eta12, flip); return orientedFresnel; From 76ed66ca21dadddb6c1cd3576dd6a2cf423dca7d Mon Sep 17 00:00:00 2001 From: keptsecret Date: Fri, 5 Dec 2025 16:34:58 +0700 Subject: [PATCH 097/157] fix wrong get refraction eta in iridescent transmission --- include/nbl/builtin/hlsl/bxdf/fresnel.hlsl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/nbl/builtin/hlsl/bxdf/fresnel.hlsl b/include/nbl/builtin/hlsl/bxdf/fresnel.hlsl index ad83da5cf7..0c498efb79 100644 --- a/include/nbl/builtin/hlsl/bxdf/fresnel.hlsl +++ b/include/nbl/builtin/hlsl/bxdf/fresnel.hlsl @@ -752,7 +752,7 @@ struct Iridescent getOrientedEtaRcps() NBL_CONST_MEMBER_FUNC { OrientedEtaRcps rcpEta; From 265100c26324b88cbcac5727862c0b14cac84847 Mon Sep 17 00:00:00 2001 From: Przemog1 Date: Fri, 5 Dec 2025 13:02:04 +0100 Subject: [PATCH 098/157] Extended NBL_CREATE_NSC_COMPILE_RULES function, now it creates precompiled shaders per build configuration --- cmake/common.cmake | 113 +++++++++++++++++++++++++++++---------------- examples_tests | 2 +- 2 files changed, 75 insertions(+), 40 deletions(-) diff --git a/cmake/common.cmake b/cmake/common.cmake index ab215a59e3..010c7409dc 100755 --- a/cmake/common.cmake +++ b/cmake/common.cmake @@ -1144,6 +1144,12 @@ define_property(TARGET PROPERTY NBL_MOUNT_POINT_DEFINES BRIEF_DOCS "List of preprocessor defines with mount points" ) +option(NSC_DEBUG_EDIF_FILE_BIT "Add \"-fspv-debug=file\" to NSC Debug CLI" ON) +option(NSC_DEBUG_EDIF_SOURCE_BIT "Add \"-fspv-debug=source\" to NSC Debug CLI" OFF) +option(NSC_DEBUG_EDIF_LINE_BIT "Add \"-fspv-debug=line\" to NSC Debug CLI" OFF) +option(NSC_DEBUG_EDIF_TOOL_BIT "Add \"-fspv-debug=tool\" to NSC Debug CLI" ON) +option(NSC_DEBUG_EDIF_NON_SEMANTIC_BIT "Add \"-fspv-debug=vulkan-with-source\" to NSC Debug CLI" OFF) + function(NBL_CREATE_NSC_COMPILE_RULES) set(COMMENT "this code has been autogenerated with Nabla CMake NBL_CREATE_HLSL_COMPILE_RULES utility") set(DEVICE_CONFIG_VIEW @@ -1178,9 +1184,34 @@ struct DeviceConfigCaps -enable-16bit-types -Zpr -spirv - -fspv-target-env=vulkan1.3 + -fspv-target-env=vulkan1.3 + -WShadow + -WConversion + $<$:-O0> + $<$:-O3> + $<$:-O3> ) + if(NSC_DEBUG_EDIF_FILE_BIT) + list(APPEND REQUIRED_OPTIONS $<$:-fspv-debug=file>) + endif() + + if(NSC_DEBUG_EDIF_SOURCE_BIT) + list(APPEND REQUIRED_OPTIONS $<$:-fspv-debug=source>) + endif() + + if(NSC_DEBUG_EDIF_LINE_BIT) + list(APPEND REQUIRED_OPTIONS $<$:-fspv-debug=line>) + endif() + + if(NSC_DEBUG_EDIF_TOOL_BIT) + list(APPEND REQUIRED_OPTIONS $<$:-fspv-debug=tool>) + endif() + + if(NSC_DEBUG_EDIF_NON_SEMANTIC_BIT) + list(APPEND REQUIRED_OPTIONS $<$:-fspv-debug=vulkan-with-source>) + endif() + if(NOT NBL_EMBED_BUILTIN_RESOURCES) list(APPEND REQUIRED_OPTIONS -I "${NBL_ROOT_PATH}/include" @@ -1210,12 +1241,12 @@ struct DeviceConfigCaps get_target_property(HEADER_RULE_GENERATED ${IMPL_TARGET} NBL_HEADER_GENERATED_RULE) if(NOT HEADER_RULE_GENERATED) - set(INCLUDE_DIR "$/${IMPL_TARGET}/.cmake/include") + set(INCLUDE_DIR "$/${IMPL_TARGET}/.cmake/include/$") set(INCLUDE_FILE "${INCLUDE_DIR}/$") set(INCLUDE_CONTENT $) file(GENERATE OUTPUT ${INCLUDE_FILE} - CONTENT ${INCLUDE_CONTENT} + CONTENT $ TARGET ${IMPL_TARGET} ) @@ -1420,7 +1451,7 @@ namespace @IMPL_NAMESPACE@ { nbl::core::string retval = "@BASE_KEY@"; @RETVAL_EVAL@ retval += ".spv"; - return retval; + return "$/" + retval; } } @@ -1444,46 +1475,50 @@ namespace @IMPL_NAMESPACE@ { function(GENERATE_KEYS PREFIX CAP_INDEX CAPS_EVAL_PART) if(NUM_CAPS EQUAL 0 OR CAP_INDEX EQUAL ${NUM_CAPS}) + # generate .config file set(FINAL_KEY "${BASE_KEY}${PREFIX}.spv") # always add ext even if its already there to make sure asset loader always is able to load as IShader - - set(TARGET_OUTPUT "${IMPL_BINARY_DIR}/${FINAL_KEY}") - set(CONFIG_FILE "${TARGET_OUTPUT}.config") + set(CONFIG_FILE_TARGET_OUTPUT "${IMPL_BINARY_DIR}/${FINAL_KEY}") + set(CONFIG_FILE "${CONFIG_FILE_TARGET_OUTPUT}.config") set(CAPS_EVAL "${CAPS_EVAL_PART}") - string(CONFIGURE "${DEVICE_CONFIG_VIEW}" CONFIG_CONTENT @ONLY) file(WRITE "${CONFIG_FILE}" "${CONFIG_CONTENT}") - set(NBL_NSC_COMPILE_COMMAND - "$" - -Fc "${TARGET_OUTPUT}" - ${COMPILE_OPTIONS} ${REQUIRED_OPTIONS} ${IMPL_COMMON_OPTIONS} - "${CONFIG_FILE}" - ) - - add_custom_command(OUTPUT "${TARGET_OUTPUT}" - COMMAND ${NBL_NSC_COMPILE_COMMAND} - DEPENDS ${DEPENDS_ON} - COMMENT "Creating \"${TARGET_OUTPUT}\"" - VERBATIM - COMMAND_EXPAND_LISTS - ) - - set(HEADER_ONLY_LIKE "${CONFIG_FILE}" "${TARGET_INPUT}" "${TARGET_OUTPUT}") - target_sources(${IMPL_TARGET} PRIVATE ${HEADER_ONLY_LIKE}) - - set_source_files_properties(${HEADER_ONLY_LIKE} PROPERTIES - HEADER_FILE_ONLY ON - VS_TOOL_OVERRIDE None - ) - - set_source_files_properties("${TARGET_OUTPUT}" PROPERTIES - NBL_SPIRV_REGISTERED_INPUT "${TARGET_INPUT}" - NBL_SPIRV_PERMUTATION_CONFIG "${CONFIG_FILE}" - NBL_SPIRV_BINARY_DIR "${IMPL_BINARY_DIR}" - NBL_SPIRV_ACCESS_KEY "${FINAL_KEY}" - ) - - set_property(TARGET ${IMPL_TARGET} APPEND PROPERTY NBL_SPIRV_OUTPUTS "${TARGET_OUTPUT}") + # generate keys and commands for compiling shaders + foreach(BUILD_CONFIGURATION ${CMAKE_CONFIGURATION_TYPES}) + set(TARGET_OUTPUT "${IMPL_BINARY_DIR}/${BUILD_CONFIGURATION}/${FINAL_KEY}") + + set(NBL_NSC_COMPILE_COMMAND + "$" + -Fc "${TARGET_OUTPUT}" + ${COMPILE_OPTIONS} ${REQUIRED_OPTIONS} ${IMPL_COMMON_OPTIONS} + "${CONFIG_FILE}" + ) + + add_custom_command(OUTPUT "${TARGET_OUTPUT}" + COMMAND ${NBL_NSC_COMPILE_COMMAND} + DEPENDS ${DEPENDS_ON} + COMMENT "Creating \"${TARGET_OUTPUT}\"" + VERBATIM + COMMAND_EXPAND_LISTS + ) + + set(HEADER_ONLY_LIKE "${CONFIG_FILE}" "${TARGET_INPUT}" "${TARGET_OUTPUT}") + target_sources(${IMPL_TARGET} PRIVATE ${HEADER_ONLY_LIKE}) + + set_source_files_properties(${HEADER_ONLY_LIKE} PROPERTIES + HEADER_FILE_ONLY ON + VS_TOOL_OVERRIDE None + ) + + set_source_files_properties("${TARGET_OUTPUT}" PROPERTIES + NBL_SPIRV_REGISTERED_INPUT "${TARGET_INPUT}" + NBL_SPIRV_PERMUTATION_CONFIG "${CONFIG_FILE}" + NBL_SPIRV_BINARY_DIR "${IMPL_BINARY_DIR}" + NBL_SPIRV_ACCESS_KEY "${FINAL_KEY}" + ) + + set_property(TARGET ${IMPL_TARGET} APPEND PROPERTY NBL_SPIRV_OUTPUTS "${TARGET_OUTPUT}") + endforeach() return() endif() diff --git a/examples_tests b/examples_tests index e1e8dd6fb0..eb7d4fe788 160000 --- a/examples_tests +++ b/examples_tests @@ -1 +1 @@ -Subproject commit e1e8dd6fb0c46612defeea46c960a6b85f4b4155 +Subproject commit eb7d4fe788fb5e88b8b475c979586e050e202b00 From 6887419b3d5a6b95851235fdbb2a1bae9c1f335f Mon Sep 17 00:00:00 2001 From: Karim Mohamed Date: Sat, 6 Dec 2025 21:03:27 +0300 Subject: [PATCH 099/157] updated examples submodule --- examples_tests | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples_tests b/examples_tests index f18160276e..93861bd59f 160000 --- a/examples_tests +++ b/examples_tests @@ -1 +1 @@ -Subproject commit f18160276e78f860f64c45111c874e3351b44ffb +Subproject commit 93861bd59f85721993472e3de67f23bec6170363 From f32ddd2c45088bd715fe411b9d0ee3f5e93654fe Mon Sep 17 00:00:00 2001 From: Karim Mohamed Date: Sun, 7 Dec 2025 00:53:04 +0300 Subject: [PATCH 100/157] Update examples submodule --- examples_tests | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples_tests b/examples_tests index 93861bd59f..008e2ee154 160000 --- a/examples_tests +++ b/examples_tests @@ -1 +1 @@ -Subproject commit 93861bd59f85721993472e3de67f23bec6170363 +Subproject commit 008e2ee154b6cf5ba725752a3f1b4dac5d37ff42 From 4a8f4dcf0f7defba037565da99a7999c5e757c4d Mon Sep 17 00:00:00 2001 From: keptsecret Date: Mon, 8 Dec 2025 16:04:44 +0700 Subject: [PATCH 101/157] quantized sequence packing data --- .../hlsl/sampling/quantized_sequence.hlsl | 166 ++++++++++++++++++ src/nbl/builtin/CMakeLists.txt | 1 + 2 files changed, 167 insertions(+) create mode 100644 include/nbl/builtin/hlsl/sampling/quantized_sequence.hlsl diff --git a/include/nbl/builtin/hlsl/sampling/quantized_sequence.hlsl b/include/nbl/builtin/hlsl/sampling/quantized_sequence.hlsl new file mode 100644 index 0000000000..788a38d499 --- /dev/null +++ b/include/nbl/builtin/hlsl/sampling/quantized_sequence.hlsl @@ -0,0 +1,166 @@ +// Copyright (C) 2018-2025 - DevSH Graphics Programming Sp. z O.O. +// This file is part of the "Nabla Engine". +// For conditions of distribution and use, see copyright notice in nabla.h + +#ifndef _NBL_BUILTIN_HLSL_SAMPLING_QUANTIZED_SEQUENCE_INCLUDED_ +#define _NBL_BUILTIN_HLSL_SAMPLING_QUANTIZED_SEQUENCE_INCLUDED_ + +#include "nbl/builtin/hlsl/concepts/vector.hlsl" +#include "nbl/builtin/hlsl/vector_utils/vector_traits.hlsl" + +namespace nbl +{ +namespace hlsl +{ +namespace sampling +{ + +template +struct QuantizedSequence; + +// byteslog2 = 1,2; dim = 1 +template NBL_PARTIAL_REQ_TOP(BytesLog2 > 0 && BytesLog2 < 3) +struct QuantizedSequence 0 && BytesLog2 < 3) > +{ + NBL_CONSTEXPR_STATIC_INLINE uint16_t base_store_bytes = uint16_t(1u) << BytesLog2; + using base_store_type = typename unsigned_integer_of_size::type; + + base_store_type getX() { return data; } + void setX(const base_store_type value) { data = value; } + + base_store_type data; +}; + +// byteslog2 = 3,4; dim = 1 +template NBL_PARTIAL_REQ_TOP(BytesLog2 > 2 && BytesLog2 < 5) +struct QuantizedSequence 2 && BytesLog2 < 5) > +{ + NBL_CONSTEXPR_STATIC_INLINE uint16_t base_bytes_log2 = uint16_t(2u); + NBL_CONSTEXPR_STATIC_INLINE uint16_t base_store_bytes = uint16_t(1u) << base_bytes_log2; + using base_store_type = typename unsigned_integer_of_size::type; + NBL_CONSTEXPR_STATIC_INLINE uint16_t num_components = uint16_t(1u) << (BytesLog2 - base_bytes_log2); + using store_type = vector; + + store_type getX() { return data; } + void setX(const store_type value) { data = value; } + + store_type data; +}; + +// byteslog2 = 2,3; dim = 2 +template NBL_PARTIAL_REQ_TOP(BytesLog2 > 1 && BytesLog2 < 4) +struct QuantizedSequence 2 && BytesLog2 < 5) > +{ + NBL_CONSTEXPR_STATIC_INLINE uint16_t base_bytes_log2 = BytesLog2 - uint16_t(1u); + NBL_CONSTEXPR_STATIC_INLINE uint16_t base_store_bytes = uint16_t(1u) << base_bytes_log2; + using base_store_type = typename unsigned_integer_of_size::type; + using store_type = vector; + + base_store_type getX() { return data[0]; } + base_store_type getY() { return data[1]; } + void setX(const base_store_type value) { data[0] = value; } + void setY(const base_store_type value) { data[1] = value; } + + store_type data; +}; + +// byteslog2 = 1; dim = 2,3,4 +template NBL_PARTIAL_REQ_TOP(Dim > 1 && Dim < 5) +struct QuantizedSequence<1, Dim NBL_PARTIAL_REQ_BOT(Dim > 1 && Dim < 5) > +{ + NBL_CONSTEXPR_STATIC_INLINE uint16_t base_store_bytes = uint16_t(1u) << uint16_t(1u); + NBL_CONSTEXPR_STATIC_INLINE uint16_t store_bits = uint16_t(8u) * base_store_bytes; + NBL_CONSTEXPR_STATIC_INLINE uint16_t bits_per_component = store_bits / Dim; + NBL_CONSTEXPR_STATIC_INLINE uint16_t MASK = (uint16_t(1u) << bits_per_component) - uint16_t(1u); + using base_store_type = uint16_t; + + base_store_type getX() { return data & MASK; } + base_store_type getY() { return (data >> bits_per_component) & MASK; } + template NBL_FUNC_REQUIRES(C::value && 2 < Dim) + base_store_type getZ() { return (data >> (bits_per_component * uint16_t(2u))) & MASK; } + template NBL_FUNC_REQUIRES(C::value && 3 < Dim) + base_store_type getW() { return (data >> (bits_per_component * uint16_t(3u))) & MASK; } + + void setX(const base_store_type value) + { + data &= ~MASK; + data |= value & MASK; + } + void setY(const base_store_type value) + { + const uint16_t mask = MASK << bits_per_component; + data &= ~mask; + data |= (value & MASK) << bits_per_component; + } + template NBL_FUNC_REQUIRES(C::value && 2 < Dim) + void setZ(const base_store_type value) + { + const uint16_t bits = (bits_per_component * uint16_t(2u)); + const uint16_t mask = MASK << bits; + data &= ~mask; + data |= (value & MASK) << bits; + } + template NBL_FUNC_REQUIRES(C::value && 3 < Dim) + void setW(const base_store_type value) + { + const uint16_t bits = (bits_per_component * uint16_t(3u)); + const uint16_t mask = MASK << bits; + data &= ~mask; + data |= (value & MASK) << bits; + } + + base_store_type data; +}; + +// byteslog2 = 2,3; dim = 3 +template NBL_PARTIAL_REQ_TOP(BytesLog2 > 1 && BytesLog2 < 4) +struct QuantizedSequence 2 && BytesLog2 < 5) > +{ + NBL_CONSTEXPR_STATIC_INLINE uint16_t base_bytes_log2 = BytesLog2 - uint16_t(1u); + NBL_CONSTEXPR_STATIC_INLINE uint16_t base_store_bytes = uint16_t(1u) << base_bytes_log2; + NBL_CONSTEXPR_STATIC_INLINE uint16_t store_bits = uint16_t(8u) * base_store_bytes; + NBL_CONSTEXPR_STATIC_INLINE uint16_t bits_per_component = store_bits / uint16_t(3u); + NBL_CONSTEXPR_STATIC_INLINE uint16_t MASK = (uint16_t(1u) << bits_per_component) - uint16_t(1u); + using base_store_type = typename unsigned_integer_of_size::type; + using store_type = vector; + + base_store_type getX() { return data[0] & MASK; } + base_store_type getY() + { + base_store_type y = data[0] >> bits_per_component; + y |= (data[1] >> bits_per_component) << (store_bits-bits_per_component); + return y; + } + base_store_type getZ() { return data[1] & MASK; } + + void setX(base_store_type x) + { + data[0] &= ~MASK; + data[0] |= x & MASK; + } + void setY(base_store_type y) + { + const uint16_t ybits = store_bits-bits_per_component; + const uint16_t ymask = uint16_t(1u) << ybits; + data[0] &= MASK; + data[1] &= MASK; + data[0] |= (y & ymask) << bits_per_component; + data[1] |= (y >> (ybits) & ymask) << bits_per_component; + } + void setZ(base_store_type z) + { + data[1] &= ~MASK; + data[1] |= z & MASK; + } + + store_type data; +}; + +// not complete because we're changing the template params next commit + +} + +} +} + +#endif diff --git a/src/nbl/builtin/CMakeLists.txt b/src/nbl/builtin/CMakeLists.txt index 736148fb21..30c8cdd8df 100644 --- a/src/nbl/builtin/CMakeLists.txt +++ b/src/nbl/builtin/CMakeLists.txt @@ -256,6 +256,7 @@ LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/shapes/aabb.hlsl") LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/sampling/basic.hlsl") LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/sampling/linear.hlsl") LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/sampling/bilinear.hlsl") +LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/sampling/quantized_sequence.hlsl") LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/sampling/concentric_mapping.hlsl") LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/sampling/box_muller_transform.hlsl") LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/sampling/spherical_triangle.hlsl") From 1a32ed257d717b57ba4a51b1a7f529a21c3cec1c Mon Sep 17 00:00:00 2001 From: kevyuu Date: Mon, 8 Dec 2025 20:58:54 +0700 Subject: [PATCH 102/157] Remove NBL_ASSERT usage --- include/nbl/builtin/hlsl/morton.hlsl | 1 - 1 file changed, 1 deletion(-) diff --git a/include/nbl/builtin/hlsl/morton.hlsl b/include/nbl/builtin/hlsl/morton.hlsl index 08b2b1ccfb..4e90fd4c91 100644 --- a/include/nbl/builtin/hlsl/morton.hlsl +++ b/include/nbl/builtin/hlsl/morton.hlsl @@ -361,7 +361,6 @@ struct code create(NBL_CONST_REF_ARG(vector) cartesian) { this_t retVal; - NBL_ASSERT((impl::verifyAnyBitIntegralVec(cartesian))); assert((impl::verifyAnyBitIntegralVec(cartesian))); using decode_t = typename transcoder_t::decode_t; retVal.value = transcoder_t::encode(_static_cast(cartesian)); From 183205914eaed37bacb146ee7c0d987ac09265c1 Mon Sep 17 00:00:00 2001 From: Karim Mohamed Date: Tue, 9 Dec 2025 00:21:01 +0300 Subject: [PATCH 103/157] update examples submodule --- examples_tests | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples_tests b/examples_tests index 008e2ee154..91ae8657de 160000 --- a/examples_tests +++ b/examples_tests @@ -1 +1 @@ -Subproject commit 008e2ee154b6cf5ba725752a3f1b4dac5d37ff42 +Subproject commit 91ae8657dee9b4de82c81b97b23b83d3824a6011 From b79bf8f7f44b913766a4fedaf2b887912d766e7a Mon Sep 17 00:00:00 2001 From: Karim Mohamed Date: Tue, 9 Dec 2025 00:30:59 +0300 Subject: [PATCH 104/157] update examples submodule --- examples_tests | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples_tests b/examples_tests index 91ae8657de..0124cc9c0a 160000 --- a/examples_tests +++ b/examples_tests @@ -1 +1 @@ -Subproject commit 91ae8657dee9b4de82c81b97b23b83d3824a6011 +Subproject commit 0124cc9c0ad83d4a38f1e8ac3ddcdf56125740ac From 49a017afca6718faac8b4bc08e55fe2d473f2d43 Mon Sep 17 00:00:00 2001 From: Karim Mohamed Date: Tue, 9 Dec 2025 00:45:05 +0300 Subject: [PATCH 105/157] update examples submodule --- examples_tests | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples_tests b/examples_tests index 0124cc9c0a..a35eddd1bd 160000 --- a/examples_tests +++ b/examples_tests @@ -1 +1 @@ -Subproject commit 0124cc9c0ad83d4a38f1e8ac3ddcdf56125740ac +Subproject commit a35eddd1bd83fbf636e820b59c6eef939ed09668 From b8688bef70d2316a982a5caafa6ab065e7430cd6 Mon Sep 17 00:00:00 2001 From: keptsecret Date: Tue, 9 Dec 2025 15:08:09 +0700 Subject: [PATCH 106/157] templated quantized sequence --- .../hlsl/sampling/quantized_sequence.hlsl | 295 ++++++++++++------ 1 file changed, 199 insertions(+), 96 deletions(-) diff --git a/include/nbl/builtin/hlsl/sampling/quantized_sequence.hlsl b/include/nbl/builtin/hlsl/sampling/quantized_sequence.hlsl index 788a38d499..5738dfec8c 100644 --- a/include/nbl/builtin/hlsl/sampling/quantized_sequence.hlsl +++ b/include/nbl/builtin/hlsl/sampling/quantized_sequence.hlsl @@ -15,31 +15,16 @@ namespace hlsl namespace sampling { -template +template struct QuantizedSequence; -// byteslog2 = 1,2; dim = 1 -template NBL_PARTIAL_REQ_TOP(BytesLog2 > 0 && BytesLog2 < 3) -struct QuantizedSequence 0 && BytesLog2 < 3) > -{ - NBL_CONSTEXPR_STATIC_INLINE uint16_t base_store_bytes = uint16_t(1u) << BytesLog2; - using base_store_type = typename unsigned_integer_of_size::type; - - base_store_type getX() { return data; } - void setX(const base_store_type value) { data = value; } - - base_store_type data; -}; +#define SEQUENCE_SPECIALIZATION_CONCEPT concepts::UnsignedIntegral::scalar_type> && size_of_v::scalar_type> <= 4 -// byteslog2 = 3,4; dim = 1 -template NBL_PARTIAL_REQ_TOP(BytesLog2 > 2 && BytesLog2 < 5) -struct QuantizedSequence 2 && BytesLog2 < 5) > +// all Dim=1 +template NBL_PARTIAL_REQ_TOP(SEQUENCE_SPECIALIZATION_CONCEPT) +struct QuantizedSequence { - NBL_CONSTEXPR_STATIC_INLINE uint16_t base_bytes_log2 = uint16_t(2u); - NBL_CONSTEXPR_STATIC_INLINE uint16_t base_store_bytes = uint16_t(1u) << base_bytes_log2; - using base_store_type = typename unsigned_integer_of_size::type; - NBL_CONSTEXPR_STATIC_INLINE uint16_t num_components = uint16_t(1u) << (BytesLog2 - base_bytes_log2); - using store_type = vector; + using store_type = T; store_type getX() { return data; } void setX(const store_type value) { data = value; } @@ -47,116 +32,234 @@ struct QuantizedSequence 2 && Bytes store_type data; }; -// byteslog2 = 2,3; dim = 2 -template NBL_PARTIAL_REQ_TOP(BytesLog2 > 1 && BytesLog2 < 4) -struct QuantizedSequence 2 && BytesLog2 < 5) > +// uint16_t, uint32_t; Dim=2,3,4 +template NBL_PARTIAL_REQ_TOP(SEQUENCE_SPECIALIZATION_CONCEPT && vector_traits::Dimension == 1 && Dim > 1 && Dim < 5) +struct QuantizedSequence::Dimension == 1 && Dim > 1 && Dim < 5) > { - NBL_CONSTEXPR_STATIC_INLINE uint16_t base_bytes_log2 = BytesLog2 - uint16_t(1u); - NBL_CONSTEXPR_STATIC_INLINE uint16_t base_store_bytes = uint16_t(1u) << base_bytes_log2; - using base_store_type = typename unsigned_integer_of_size::type; - using store_type = vector; + using store_type = T; + NBL_CONSTEXPR_STATIC_INLINE uint16_t StoreBits = size_of_v; + NBL_CONSTEXPR_STATIC_INLINE uint16_t BitsPerComponent = StoreBits / Dim; + NBL_CONSTEXPR_STATIC_INLINE uint16_t Mask = (uint16_t(1u) << BitsPerComponent) - uint16_t(1u); - base_store_type getX() { return data[0]; } - base_store_type getY() { return data[1]; } - void setX(const base_store_type value) { data[0] = value; } - void setY(const base_store_type value) { data[1] = value; } + store_type getX() { return data & Mask; } + store_type getY() { return (data >> (BitsPerComponent * uint16_t(1u))) & Mask; } + template NBL_FUNC_REQUIRES(C::value && 2 < Dim) + store_type getZ() { return (data >> (BitsPerComponent * uint16_t(2u))) & Mask; } + template NBL_FUNC_REQUIRES(C::value && 3 < Dim) + store_type getW() { return (data >> (BitsPerComponent * uint16_t(3u))) & Mask; } + + void setX(const store_type value) + { + data &= ~Mask; + data |= value & Mask; + } + void setY(const store_type value) + { + data &= ~(Mask << BitsPerComponent); + data |= (value & Mask) << BitsPerComponent; + } + template NBL_FUNC_REQUIRES(C::value && 2 < Dim) + void setZ(const store_type value) + { + const uint16_t bits = (BitsPerComponent * uint16_t(2u)); + data &= ~(Mask << bits); + data |= (value & Mask) << bits; + } + template NBL_FUNC_REQUIRES(C::value && 3 < Dim) + void setW(const store_type value) + { + const uint16_t bits = (BitsPerComponent * uint16_t(3u)); + data &= ~(Mask << bits); + data |= (value & Mask) << bits; + } store_type data; }; -// byteslog2 = 1; dim = 2,3,4 -template NBL_PARTIAL_REQ_TOP(Dim > 1 && Dim < 5) -struct QuantizedSequence<1, Dim NBL_PARTIAL_REQ_BOT(Dim > 1 && Dim < 5) > +// Dim 2,3,4 matches vector dim +template NBL_PARTIAL_REQ_TOP(SEQUENCE_SPECIALIZATION_CONCEPT && vector_traits::Dimension == Dim && Dim > 1 && Dim < 5) +struct QuantizedSequence::Dimension == Dim && Dim > 1 && Dim < 5) > { - NBL_CONSTEXPR_STATIC_INLINE uint16_t base_store_bytes = uint16_t(1u) << uint16_t(1u); - NBL_CONSTEXPR_STATIC_INLINE uint16_t store_bits = uint16_t(8u) * base_store_bytes; - NBL_CONSTEXPR_STATIC_INLINE uint16_t bits_per_component = store_bits / Dim; - NBL_CONSTEXPR_STATIC_INLINE uint16_t MASK = (uint16_t(1u) << bits_per_component) - uint16_t(1u); - using base_store_type = uint16_t; - - base_store_type getX() { return data & MASK; } - base_store_type getY() { return (data >> bits_per_component) & MASK; } + using store_type = T; + using scalar_type = typename vector_traits::scalar_type; + + scalar_type getX() { return data[0]; } + scalar_type getY() { return data[1]; } + template NBL_FUNC_REQUIRES(C::value && 2 < Dim) + scalar_type getZ() { return data[2]; } + template NBL_FUNC_REQUIRES(C::value && 3 < Dim) + scalar_type getW() { return data[3]; } + + void setX(const scalar_type value) { data[0] = value; } + void setY(const scalar_type value) { data[1] = value; } template NBL_FUNC_REQUIRES(C::value && 2 < Dim) - base_store_type getZ() { return (data >> (bits_per_component * uint16_t(2u))) & MASK; } + void setZ(const scalar_type value) { data[2] = value; } template NBL_FUNC_REQUIRES(C::value && 3 < Dim) - base_store_type getW() { return (data >> (bits_per_component * uint16_t(3u))) & MASK; } + void setW(const scalar_type value) { data[3] = value; } + + store_type data; +}; - void setX(const base_store_type value) +// uint16_t2, uint32_t2; Dim=3 +template NBL_PARTIAL_REQ_TOP(SEQUENCE_SPECIALIZATION_CONCEPT && vector_traits::Dimension == 2 && Dim == 3) +struct QuantizedSequence::Dimension == 2 && Dim == 3) > +{ + using store_type = T; + using scalar_type = typename vector_traits::scalar_type; + NBL_CONSTEXPR_STATIC_INLINE uint16_t StoreBits = size_of_v; + NBL_CONSTEXPR_STATIC_INLINE uint16_t BitsPerComponent = StoreBits / Dim; + NBL_CONSTEXPR_STATIC_INLINE uint16_t Mask = (uint16_t(1u) << BitsPerComponent) - uint16_t(1u); + + scalar_type getX() { return data[0] & Mask; } + scalar_type getY() { - data &= ~MASK; - data |= value & MASK; + scalar_type y = data[0] >> BitsPerComponent; + y |= (data[1] >> BitsPerComponent) << (StoreBits-BitsPerComponent); + return y; } - void setY(const base_store_type value) + scalar_type getZ() { return data[1] & Mask; } + + void setX(const scalar_type value) { - const uint16_t mask = MASK << bits_per_component; - data &= ~mask; - data |= (value & MASK) << bits_per_component; + data[0] &= ~Mask; + data[0] |= value & Mask; } - template NBL_FUNC_REQUIRES(C::value && 2 < Dim) - void setZ(const base_store_type value) + void setY(const scalar_type value) { - const uint16_t bits = (bits_per_component * uint16_t(2u)); - const uint16_t mask = MASK << bits; - data &= ~mask; - data |= (value & MASK) << bits; + const uint16_t ybits = StoreBits-BitsPerComponent; + const uint16_t ymask = uint16_t(1u) << ybits; + data[0] &= Mask; + data[1] &= Mask; + data[0] |= (value & ymask) << BitsPerComponent; + data[1] |= (value >> (ybits) & ymask) << BitsPerComponent; } - template NBL_FUNC_REQUIRES(C::value && 3 < Dim) - void setW(const base_store_type value) + void setZ(const scalar_type value) { - const uint16_t bits = (bits_per_component * uint16_t(3u)); - const uint16_t mask = MASK << bits; - data &= ~mask; - data |= (value & MASK) << bits; + data[1] &= ~Mask; + data[1] |= value & Mask; } - base_store_type data; + store_type data; +}; + +// uint16_t2, uint32_t2; Dim=4 +template NBL_PARTIAL_REQ_TOP(SEQUENCE_SPECIALIZATION_CONCEPT && vector_traits::Dimension == 2 && Dim == 4) +struct QuantizedSequence::Dimension == 2 && Dim == 4) > +{ + using store_type = T; + using scalar_type = typename vector_traits::scalar_type; + NBL_CONSTEXPR_STATIC_INLINE uint16_t StoreBits = size_of_v; + NBL_CONSTEXPR_STATIC_INLINE uint16_t BitsPerComponent = StoreBits / Dim; + NBL_CONSTEXPR_STATIC_INLINE uint16_t Mask = (uint16_t(1u) << BitsPerComponent) - uint16_t(1u); + + scalar_type getX() { return data[0] & Mask; } + scalar_type getY() { return data[0] >> BitsPerComponent; } + scalar_type getZ() { return data[1] & Mask; } + scalar_type getW() { return data[1] >> BitsPerComponent; } + + void setX(const scalar_type value) + { + data[0] &= ~Mask; + data[0] |= value & Mask; + } + void setY(const scalar_type value) + { + data[0] &= Mask; + data[0] |= (value & Mask) << BitsPerComponent; + } + void setZ(const scalar_type value) + { + data[1] &= ~Mask; + data[1] |= value & Mask; + } + void setW(const scalar_type value) + { + data[1] &= Mask; + data[1] |= (value & Mask) << BitsPerComponent; + } + + store_type data; +}; + +// uint16_t4, uint32_t4; Dim=2 +template NBL_PARTIAL_REQ_TOP(SEQUENCE_SPECIALIZATION_CONCEPT && vector_traits::Dimension == 4 && Dim == 2) +struct QuantizedSequence::Dimension == 4 && Dim == 2) > +{ + using store_type = T; + using scalar_type = typename vector_traits::scalar_type; + using base_type = vector; + NBL_CONSTEXPR_STATIC_INLINE uint16_t StoreBits = size_of_v; + NBL_CONSTEXPR_STATIC_INLINE uint16_t BitsPerComponent = StoreBits / Dim; + NBL_CONSTEXPR_STATIC_INLINE uint16_t Mask = (uint16_t(1u) << BitsPerComponent) - uint16_t(1u); + + base_type getX() { return data.xy; } + base_type getY() { return data.zw; } + + void setX(const base_type value) { data.xy = value; } + void setY(const base_type value) { data.zw = value; } + + store_type data; }; -// byteslog2 = 2,3; dim = 3 -template NBL_PARTIAL_REQ_TOP(BytesLog2 > 1 && BytesLog2 < 4) -struct QuantizedSequence 2 && BytesLog2 < 5) > +// uint16_t4, uint32_t4; Dim=3 +// uint16_t4 --> returns uint16_t2 - 21 bits per component: 16 in x, 5 in y +// uint16_t4 --> returns uint32_t2 - 42 bits per component: 32 in x, 10 in y +template NBL_PARTIAL_REQ_TOP(SEQUENCE_SPECIALIZATION_CONCEPT && vector_traits::Dimension == 4 && Dim == 3) +struct QuantizedSequence::Dimension == 4 && Dim == 3) > { - NBL_CONSTEXPR_STATIC_INLINE uint16_t base_bytes_log2 = BytesLog2 - uint16_t(1u); - NBL_CONSTEXPR_STATIC_INLINE uint16_t base_store_bytes = uint16_t(1u) << base_bytes_log2; - NBL_CONSTEXPR_STATIC_INLINE uint16_t store_bits = uint16_t(8u) * base_store_bytes; - NBL_CONSTEXPR_STATIC_INLINE uint16_t bits_per_component = store_bits / uint16_t(3u); - NBL_CONSTEXPR_STATIC_INLINE uint16_t MASK = (uint16_t(1u) << bits_per_component) - uint16_t(1u); - using base_store_type = typename unsigned_integer_of_size::type; - using store_type = vector; - - base_store_type getX() { return data[0] & MASK; } - base_store_type getY() - { - base_store_type y = data[0] >> bits_per_component; - y |= (data[1] >> bits_per_component) << (store_bits-bits_per_component); + using store_type = T; + using scalar_type = typename vector_traits::scalar_type; + using base_type = vector; + NBL_CONSTEXPR_STATIC_INLINE uint16_t StoreBits = size_of_v; + NBL_CONSTEXPR_STATIC_INLINE uint16_t BitsPerComponent = StoreBits / Dim; + NBL_CONSTEXPR_STATIC_INLINE uint16_t LeftoverBitsPerComponent = BitsPerComponent - size_of_v; + NBL_CONSTEXPR_STATIC_INLINE uint16_t Mask = (uint16_t(1u) << LeftoverBitsPerComponent) - uint16_t(1u); + + base_type getX() + { + base_type x; + x[0] = data[0]; + x[1] = data[3] & Mask; + return x; + } + base_type getY() + { + base_type y; + y[0] = data[1]; + y[1] = (data[3] >> LeftoverBitsPerComponent) & Mask; return y; } - base_store_type getZ() { return data[1] & MASK; } + base_type getZ() + { + base_type z; + z[0] = data[1]; + z[1] = (data[3] >> (LeftoverBitsPerComponent * uint16_t(2u))) & Mask; + return z; + } - void setX(base_store_type x) + void setX(const base_type value) { - data[0] &= ~MASK; - data[0] |= x & MASK; + data[0] = value[0]; + data[3] &= ~Mask; + data[3] |= value[1] & Mask; } - void setY(base_store_type y) + void setY(const base_type value) { - const uint16_t ybits = store_bits-bits_per_component; - const uint16_t ymask = uint16_t(1u) << ybits; - data[0] &= MASK; - data[1] &= MASK; - data[0] |= (y & ymask) << bits_per_component; - data[1] |= (y >> (ybits) & ymask) << bits_per_component; + data[1] = value[0]; + data[3] &= ~Mask; + data[3] |= (value[1] & Mask) << LeftoverBitsPerComponent; } - void setZ(base_store_type z) + void setZ(const base_type value) { - data[1] &= ~MASK; - data[1] |= z & MASK; + data[2] = value[0]; + data[3] &= ~Mask; + data[3] |= (value[1] & Mask) << (LeftoverBitsPerComponent * uint16_t(2u)); } store_type data; }; -// not complete because we're changing the template params next commit +#undef SEQUENCE_SPECIALIZATION_CONCEPT } From 38bbf049215dc1c0d801c901de878bff9185ca19 Mon Sep 17 00:00:00 2001 From: keptsecret Date: Tue, 9 Dec 2025 15:29:08 +0700 Subject: [PATCH 107/157] latest example --- examples_tests | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples_tests b/examples_tests index c0eda4b4ab..c1c71ee83e 160000 --- a/examples_tests +++ b/examples_tests @@ -1 +1 @@ -Subproject commit c0eda4b4ab50f8a7ad56bb32c98088d59c711b46 +Subproject commit c1c71ee83e9b017d2389022c5a6ecaf305f80bfd From fae7a80c24db5e281fe6a1929f356622ddb527d4 Mon Sep 17 00:00:00 2001 From: keptsecret Date: Tue, 9 Dec 2025 16:12:58 +0700 Subject: [PATCH 108/157] quantized sequence decode --- examples_tests | 2 +- .../hlsl/sampling/quantized_sequence.hlsl | 83 +++++++++++++++++++ 2 files changed, 84 insertions(+), 1 deletion(-) diff --git a/examples_tests b/examples_tests index c1c71ee83e..fb24a25a44 160000 --- a/examples_tests +++ b/examples_tests @@ -1 +1 @@ -Subproject commit c1c71ee83e9b017d2389022c5a6ecaf305f80bfd +Subproject commit fb24a25a44b85a9cee830a3cafd86894ca137453 diff --git a/include/nbl/builtin/hlsl/sampling/quantized_sequence.hlsl b/include/nbl/builtin/hlsl/sampling/quantized_sequence.hlsl index 5738dfec8c..fcb2488514 100644 --- a/include/nbl/builtin/hlsl/sampling/quantized_sequence.hlsl +++ b/include/nbl/builtin/hlsl/sampling/quantized_sequence.hlsl @@ -18,6 +18,89 @@ namespace sampling template struct QuantizedSequence; + +namespace impl +{ +template +struct decode_helper; + +template +struct decode_helper +{ + using scalar_type = typename vector_traits::scalar_type; + using fp_type = typename float_of_size::type; + using return_type = vector; + + static return_type __call(NBL_CONST_REF_ARG(QuantizedSequence) val, const scalar_type scrambleKey) + { + scalar_type seqVal = val.getX(); + seqVal ^= scrambleKey; + return hlsl::promote(seqVal) * bit_cast(0x2f800004u); + } +}; +template +struct decode_helper +{ + using scalar_type = typename vector_traits::scalar_type; + using fp_type = typename float_of_size::type; + using uvec_type = vector; + using return_type = vector; + + static return_type __call(NBL_CONST_REF_ARG(QuantizedSequence) val, const uvec_type scrambleKey) + { + uvec_type seqVal; + seqVal[0] = val.getX(); + seqVal[1] = val.getY(); + seqVal ^= scrambleKey; + return return_type(seqVal) * bit_cast(0x2f800004u); + } +}; +template +struct decode_helper +{ + using scalar_type = typename vector_traits::scalar_type; + using fp_type = typename float_of_size::type; + using uvec_type = vector; + using return_type = vector; + + static return_type __call(NBL_CONST_REF_ARG(QuantizedSequence) val, const uvec_type scrambleKey) + { + uvec_type seqVal; + seqVal[0] = val.getX(); + seqVal[1] = val.getY(); + seqVal[2] = val.getZ(); + seqVal ^= scrambleKey; + return return_type(seqVal) * bit_cast(0x2f800004u); + } +}; +template +struct decode_helper +{ + using scalar_type = typename vector_traits::scalar_type; + using fp_type = typename float_of_size::type; + using uvec_type = vector; + using return_type = vector; + + static return_type __call(NBL_CONST_REF_ARG(QuantizedSequence) val, const uvec_type scrambleKey) + { + uvec_type seqVal; + seqVal[0] = val.getX(); + seqVal[1] = val.getY(); + seqVal[2] = val.getZ(); + seqVal[3] = val.getW(); + seqVal ^= scrambleKey; + return return_type(seqVal) * bit_cast(0x2f800004u); + } +}; +} + +template +vector::scalar_type)>::type, D> decode(NBL_CONST_REF_ARG(QuantizedSequence) val, const vector::scalar_type, D> scrambleKey) +{ + return impl::decode_helper::__call(val, scrambleKey); +} + + #define SEQUENCE_SPECIALIZATION_CONCEPT concepts::UnsignedIntegral::scalar_type> && size_of_v::scalar_type> <= 4 // all Dim=1 From e23538cb43f5923c703daa16f346fe14ccfb3d78 Mon Sep 17 00:00:00 2001 From: keptsecret Date: Wed, 10 Dec 2025 11:24:14 +0700 Subject: [PATCH 109/157] quantized sequence get/set values by index, simplify decode func specializations --- .../hlsl/sampling/quantized_sequence.hlsl | 272 ++++++------------ 1 file changed, 86 insertions(+), 186 deletions(-) diff --git a/include/nbl/builtin/hlsl/sampling/quantized_sequence.hlsl b/include/nbl/builtin/hlsl/sampling/quantized_sequence.hlsl index fcb2488514..9392a7dab0 100644 --- a/include/nbl/builtin/hlsl/sampling/quantized_sequence.hlsl +++ b/include/nbl/builtin/hlsl/sampling/quantized_sequence.hlsl @@ -22,72 +22,19 @@ struct QuantizedSequence; namespace impl { template -struct decode_helper; - -template -struct decode_helper -{ - using scalar_type = typename vector_traits::scalar_type; - using fp_type = typename float_of_size::type; - using return_type = vector; - - static return_type __call(NBL_CONST_REF_ARG(QuantizedSequence) val, const scalar_type scrambleKey) - { - scalar_type seqVal = val.getX(); - seqVal ^= scrambleKey; - return hlsl::promote(seqVal) * bit_cast(0x2f800004u); - } -}; -template -struct decode_helper +struct decode_helper { using scalar_type = typename vector_traits::scalar_type; using fp_type = typename float_of_size::type; - using uvec_type = vector; - using return_type = vector; + using uvec_type = vector; + using sequence_type = QuantizedSequence; + using return_type = vector; - static return_type __call(NBL_CONST_REF_ARG(QuantizedSequence) val, const uvec_type scrambleKey) + static return_type __call(NBL_CONST_REF_ARG(sequence_type) val, const uvec_type scrambleKey) { uvec_type seqVal; - seqVal[0] = val.getX(); - seqVal[1] = val.getY(); - seqVal ^= scrambleKey; - return return_type(seqVal) * bit_cast(0x2f800004u); - } -}; -template -struct decode_helper -{ - using scalar_type = typename vector_traits::scalar_type; - using fp_type = typename float_of_size::type; - using uvec_type = vector; - using return_type = vector; - - static return_type __call(NBL_CONST_REF_ARG(QuantizedSequence) val, const uvec_type scrambleKey) - { - uvec_type seqVal; - seqVal[0] = val.getX(); - seqVal[1] = val.getY(); - seqVal[2] = val.getZ(); - seqVal ^= scrambleKey; - return return_type(seqVal) * bit_cast(0x2f800004u); - } -}; -template -struct decode_helper -{ - using scalar_type = typename vector_traits::scalar_type; - using fp_type = typename float_of_size::type; - using uvec_type = vector; - using return_type = vector; - - static return_type __call(NBL_CONST_REF_ARG(QuantizedSequence) val, const uvec_type scrambleKey) - { - uvec_type seqVal; - seqVal[0] = val.getX(); - seqVal[1] = val.getY(); - seqVal[2] = val.getZ(); - seqVal[3] = val.getW(); + NBL_UNROLL for(uint16_t i = 0; i < D; i++) + seqVal[i] = val.get(i); seqVal ^= scrambleKey; return return_type(seqVal) * bit_cast(0x2f800004u); } @@ -109,8 +56,8 @@ struct QuantizedSequence 0 && idx < 1); return data; } + void set(const uint16_t idx, const store_type value) { assert(idx > 0 && idx < 1); data = value; } store_type data; }; @@ -124,34 +71,16 @@ struct QuantizedSequence> (BitsPerComponent * uint16_t(1u))) & Mask; } - template NBL_FUNC_REQUIRES(C::value && 2 < Dim) - store_type getZ() { return (data >> (BitsPerComponent * uint16_t(2u))) & Mask; } - template NBL_FUNC_REQUIRES(C::value && 3 < Dim) - store_type getW() { return (data >> (BitsPerComponent * uint16_t(3u))) & Mask; } - - void setX(const store_type value) - { - data &= ~Mask; - data |= value & Mask; - } - void setY(const store_type value) - { - data &= ~(Mask << BitsPerComponent); - data |= (value & Mask) << BitsPerComponent; - } - template NBL_FUNC_REQUIRES(C::value && 2 < Dim) - void setZ(const store_type value) + store_type get(const uint16_t idx) { - const uint16_t bits = (BitsPerComponent * uint16_t(2u)); - data &= ~(Mask << bits); - data |= (value & Mask) << bits; + assert(idx > 0 && idx < Dim); + return (data >> (BitsPerComponent * idx)) & Mask; } - template NBL_FUNC_REQUIRES(C::value && 3 < Dim) - void setW(const store_type value) + + void set(const uint16_t idx, const store_type value) { - const uint16_t bits = (BitsPerComponent * uint16_t(3u)); + assert(idx > 0 && idx < Dim); + const uint16_t bits = (BitsPerComponent * idx); data &= ~(Mask << bits); data |= (value & Mask) << bits; } @@ -166,19 +95,8 @@ struct QuantizedSequence::scalar_type; - scalar_type getX() { return data[0]; } - scalar_type getY() { return data[1]; } - template NBL_FUNC_REQUIRES(C::value && 2 < Dim) - scalar_type getZ() { return data[2]; } - template NBL_FUNC_REQUIRES(C::value && 3 < Dim) - scalar_type getW() { return data[3]; } - - void setX(const scalar_type value) { data[0] = value; } - void setY(const scalar_type value) { data[1] = value; } - template NBL_FUNC_REQUIRES(C::value && 2 < Dim) - void setZ(const scalar_type value) { data[2] = value; } - template NBL_FUNC_REQUIRES(C::value && 3 < Dim) - void setW(const scalar_type value) { data[3] = value; } + scalar_type get(const uint16_t idx) { assert(idx > 0 && idx < Dim); return data[idx]; } + void set(const uint16_t idx, const scalar_type value) { assert(idx > 0 && idx < Dim); data[idx] = value; } store_type data; }; @@ -193,33 +111,38 @@ struct QuantizedSequence> BitsPerComponent; - y |= (data[1] >> BitsPerComponent) << (StoreBits-BitsPerComponent); - return y; - } - scalar_type getZ() { return data[1] & Mask; } - - void setX(const scalar_type value) - { - data[0] &= ~Mask; - data[0] |= value & Mask; - } - void setY(const scalar_type value) - { - const uint16_t ybits = StoreBits-BitsPerComponent; - const uint16_t ymask = uint16_t(1u) << ybits; - data[0] &= Mask; - data[1] &= Mask; - data[0] |= (value & ymask) << BitsPerComponent; - data[1] |= (value >> (ybits) & ymask) << BitsPerComponent; - } - void setZ(const scalar_type value) - { - data[1] &= ~Mask; - data[1] |= value & Mask; + scalar_type get(const uint16_t idx) + { + assert(idx > 0 && idx < 3); + if (idx < 2) + { + return data[idx] & Mask; + } + else + { + scalar_type z = data[0] >> BitsPerComponent; + z |= (data[1] >> BitsPerComponent) << (StoreBits-BitsPerComponent); + return z; + } + } + + void set(const uint16_t idx, const scalar_type value) + { + assert(idx > 0 && idx < 3); + if (idx < 2) + { + data[idx] &= ~Mask; + data[idx] |= value & Mask; + } + else + { + const uint16_t zbits = StoreBits-BitsPerComponent; + const uint16_t zmask = uint16_t(1u) << zbits; + data[0] &= Mask; + data[1] &= Mask; + data[0] |= (value & zmask) << BitsPerComponent; + data[1] |= (value >> (zbits) & zmask) << BitsPerComponent; + } } store_type data; @@ -235,30 +158,20 @@ struct QuantizedSequence> BitsPerComponent; } - scalar_type getZ() { return data[1] & Mask; } - scalar_type getW() { return data[1] >> BitsPerComponent; } - - void setX(const scalar_type value) - { - data[0] &= ~Mask; - data[0] |= value & Mask; - } - void setY(const scalar_type value) + scalar_type get(const uint16_t idx) { - data[0] &= Mask; - data[0] |= (value & Mask) << BitsPerComponent; + assert(idx > 0 && idx < 4); + const uint16_t i = (idx & uint16_t(2u)) >> uint16_t(1u); + return (data[i] >> (BitsPerComponent * (idx & uint16_t(1u)))) & Mask; } - void setZ(const scalar_type value) - { - data[1] &= ~Mask; - data[1] |= value & Mask; - } - void setW(const scalar_type value) + + void set(const uint16_t idx, const scalar_type value) { - data[1] &= Mask; - data[1] |= (value & Mask) << BitsPerComponent; + assert(idx > 0 && idx < 4); + const uint16_t i = (idx & uint16_t(2u)) >> uint16_t(1u); + const uint16_t odd = idx & uint16_t(1u); + data[i] &= hlsl::mix(~Mask, Mask, bool(odd)); + data[i] |= (value & Mask) << (BitsPerComponent * odd); } store_type data; @@ -275,11 +188,22 @@ struct QuantizedSequence 0 && idx < 2); + base_type a; + a[0] = data[uint16_t(2u) * idx]; + a[1] = data[uint16_t(2u) * idx + 1]; + return a; + } - void setX(const base_type value) { data.xy = value; } - void setY(const base_type value) { data.zw = value; } + void set(const uint16_t idx, const base_type value) + { + assert(idx > 0 && idx < 2); + base_type a; + data[uint16_t(2u) * idx] = value[0]; + data[uint16_t(2u) * idx + 1] = value[1]; + } store_type data; }; @@ -298,45 +222,21 @@ struct QuantizedSequence; NBL_CONSTEXPR_STATIC_INLINE uint16_t Mask = (uint16_t(1u) << LeftoverBitsPerComponent) - uint16_t(1u); - base_type getX() - { - base_type x; - x[0] = data[0]; - x[1] = data[3] & Mask; - return x; - } - base_type getY() + base_type get(const uint16_t idx) { - base_type y; - y[0] = data[1]; - y[1] = (data[3] >> LeftoverBitsPerComponent) & Mask; - return y; - } - base_type getZ() - { - base_type z; - z[0] = data[1]; - z[1] = (data[3] >> (LeftoverBitsPerComponent * uint16_t(2u))) & Mask; - return z; + assert(idx > 0 && idx < 3); + base_type a; + a[0] = data[idx]; + a[1] = (data[3] >> (LeftoverBitsPerComponent * idx)) & Mask; + return a; } - void setX(const base_type value) - { - data[0] = value[0]; - data[3] &= ~Mask; - data[3] |= value[1] & Mask; - } - void setY(const base_type value) - { - data[1] = value[0]; - data[3] &= ~Mask; - data[3] |= (value[1] & Mask) << LeftoverBitsPerComponent; - } - void setZ(const base_type value) + void set(const uint16_t idx, const base_type value) { - data[2] = value[0]; + assert(idx > 0 && idx < 3); + data[idx] = value[0]; data[3] &= ~Mask; - data[3] |= (value[1] & Mask) << (LeftoverBitsPerComponent * uint16_t(2u)); + data[3] |= (value[1] & Mask) << (LeftoverBitsPerComponent * idx); } store_type data; From 6e9160e7b869a88912652bbbbbf3c672d4736de0 Mon Sep 17 00:00:00 2001 From: keptsecret Date: Wed, 10 Dec 2025 16:51:29 +0700 Subject: [PATCH 110/157] quantized sequence encode should right shift input, changed scramble to initialize a pcg hash, added some helpful unorm constants --- .../hlsl/sampling/quantized_sequence.hlsl | 64 +++++++++++++------ 1 file changed, 43 insertions(+), 21 deletions(-) diff --git a/include/nbl/builtin/hlsl/sampling/quantized_sequence.hlsl b/include/nbl/builtin/hlsl/sampling/quantized_sequence.hlsl index 9392a7dab0..b70bddf54e 100644 --- a/include/nbl/builtin/hlsl/sampling/quantized_sequence.hlsl +++ b/include/nbl/builtin/hlsl/sampling/quantized_sequence.hlsl @@ -7,6 +7,7 @@ #include "nbl/builtin/hlsl/concepts/vector.hlsl" #include "nbl/builtin/hlsl/vector_utils/vector_traits.hlsl" +#include "nbl/builtin/hlsl/random/pcg.hlsl" namespace nbl { @@ -21,6 +22,23 @@ struct QuantizedSequence; namespace impl { +template +struct unorm_constant; +template<> +struct unorm_constant<4> { NBL_CONSTEXPR_STATIC_INLINE uint32_t value = 0x3d888889u; }; +template<> +struct unorm_constant<5> { NBL_CONSTEXPR_STATIC_INLINE uint32_t value = 0x3d042108u; }; +template<> +struct unorm_constant<8> { NBL_CONSTEXPR_STATIC_INLINE uint32_t value = 0x3b808081u; }; +template<> +struct unorm_constant<10> { NBL_CONSTEXPR_STATIC_INLINE uint32_t value = 0x3a802008u; }; +template<> +struct unorm_constant<16> { NBL_CONSTEXPR_STATIC_INLINE uint32_t value = 0x37800080u; }; +template<> +struct unorm_constant<21> { NBL_CONSTEXPR_STATIC_INLINE uint32_t value = 0x35000004u; }; +template<> +struct unorm_constant<32> { NBL_CONSTEXPR_STATIC_INLINE uint32_t value = 0x2f800004u; }; + template struct decode_helper { @@ -29,25 +47,25 @@ struct decode_helper using uvec_type = vector; using sequence_type = QuantizedSequence; using return_type = vector; + NBL_CONSTEXPR_STATIC_INLINE scalar_type UNormConstant = unorm_constant<8u*sizeof(scalar_type)>::value; - static return_type __call(NBL_CONST_REF_ARG(sequence_type) val, const uvec_type scrambleKey) + static return_type __call(NBL_CONST_REF_ARG(sequence_type) val, const uint32_t scrambleSeed) { + random::PCG32 pcg = random::PCG32::construct(scrambleSeed); uvec_type seqVal; NBL_UNROLL for(uint16_t i = 0; i < D; i++) - seqVal[i] = val.get(i); - seqVal ^= scrambleKey; - return return_type(seqVal) * bit_cast(0x2f800004u); + seqVal[i] = val.get(i) ^ pcg(); + return return_type(seqVal) * bit_cast(UNormConstant); } }; } template -vector::scalar_type)>::type, D> decode(NBL_CONST_REF_ARG(QuantizedSequence) val, const vector::scalar_type, D> scrambleKey) +vector::scalar_type)>::type, D> decode(NBL_CONST_REF_ARG(QuantizedSequence) val, const uint32_t scrambleSeed) { - return impl::decode_helper::__call(val, scrambleKey); + return impl::decode_helper::__call(val, scrambleSeed); } - #define SEQUENCE_SPECIALIZATION_CONCEPT concepts::UnsignedIntegral::scalar_type> && size_of_v::scalar_type> <= 4 // all Dim=1 @@ -55,6 +73,7 @@ template NBL_PARTIAL_REQ_TOP(SEQUENCE_SPECIALIZATION_CONCEPT) struct QuantizedSequence { using store_type = T; + NBL_CONSTEXPR_STATIC_INLINE uint32_t UNormConstant = impl::unorm_constant::value; store_type get(const uint16_t idx) { assert(idx > 0 && idx < 1); return data; } void set(const uint16_t idx, const store_type value) { assert(idx > 0 && idx < 1); data = value; } @@ -67,9 +86,10 @@ template NBL_PARTIAL_REQ_TOP(SEQUENCE_SPECIALIZATION_C struct QuantizedSequence::Dimension == 1 && Dim > 1 && Dim < 5) > { using store_type = T; - NBL_CONSTEXPR_STATIC_INLINE uint16_t StoreBits = size_of_v; + NBL_CONSTEXPR_STATIC_INLINE uint16_t StoreBits = uint16_t(8u) * size_of_v; NBL_CONSTEXPR_STATIC_INLINE uint16_t BitsPerComponent = StoreBits / Dim; NBL_CONSTEXPR_STATIC_INLINE uint16_t Mask = (uint16_t(1u) << BitsPerComponent) - uint16_t(1u); + NBL_CONSTEXPR_STATIC_INLINE uint16_t DiscardBits = StoreBits - BitsPerComponent; store_type get(const uint16_t idx) { @@ -82,7 +102,7 @@ struct QuantizedSequence 0 && idx < Dim); const uint16_t bits = (BitsPerComponent * idx); data &= ~(Mask << bits); - data |= (value & Mask) << bits; + data |= ((value >> DiscardBits) & Mask) << bits; } store_type data; @@ -107,9 +127,11 @@ struct QuantizedSequence::scalar_type; - NBL_CONSTEXPR_STATIC_INLINE uint16_t StoreBits = size_of_v; + NBL_CONSTEXPR_STATIC_INLINE uint16_t StoreBits = uint16_t(8u) * size_of_v; NBL_CONSTEXPR_STATIC_INLINE uint16_t BitsPerComponent = StoreBits / Dim; NBL_CONSTEXPR_STATIC_INLINE uint16_t Mask = (uint16_t(1u) << BitsPerComponent) - uint16_t(1u); + NBL_CONSTEXPR_STATIC_INLINE uint16_t DiscardBits = StoreBits - BitsPerComponent; + NBL_CONSTEXPR_STATIC_INLINE uint32_t UNormConstant = impl::unorm_constant::value; scalar_type get(const uint16_t idx) { @@ -132,16 +154,17 @@ struct QuantizedSequence> DiscardBits) & Mask; } else { const uint16_t zbits = StoreBits-BitsPerComponent; const uint16_t zmask = uint16_t(1u) << zbits; + const scalar_type trunc_val = value >> DiscardBits; data[0] &= Mask; data[1] &= Mask; - data[0] |= (value & zmask) << BitsPerComponent; - data[1] |= (value >> (zbits) & zmask) << BitsPerComponent; + data[0] |= (trunc_val & zmask) << BitsPerComponent; + data[1] |= (trunc_val >> (zbits) & zmask) << BitsPerComponent; } } @@ -154,9 +177,10 @@ struct QuantizedSequence::scalar_type; - NBL_CONSTEXPR_STATIC_INLINE uint16_t StoreBits = size_of_v; + NBL_CONSTEXPR_STATIC_INLINE uint16_t StoreBits = uint16_t(8u) * size_of_v; NBL_CONSTEXPR_STATIC_INLINE uint16_t BitsPerComponent = StoreBits / Dim; NBL_CONSTEXPR_STATIC_INLINE uint16_t Mask = (uint16_t(1u) << BitsPerComponent) - uint16_t(1u); + NBL_CONSTEXPR_STATIC_INLINE uint16_t DiscardBits = StoreBits - BitsPerComponent; scalar_type get(const uint16_t idx) { @@ -171,7 +195,7 @@ struct QuantizedSequence> uint16_t(1u); const uint16_t odd = idx & uint16_t(1u); data[i] &= hlsl::mix(~Mask, Mask, bool(odd)); - data[i] |= (value & Mask) << (BitsPerComponent * odd); + data[i] |= ((value >> DiscardBits) & Mask) << (BitsPerComponent * odd); } store_type data; @@ -184,9 +208,6 @@ struct QuantizedSequence::scalar_type; using base_type = vector; - NBL_CONSTEXPR_STATIC_INLINE uint16_t StoreBits = size_of_v; - NBL_CONSTEXPR_STATIC_INLINE uint16_t BitsPerComponent = StoreBits / Dim; - NBL_CONSTEXPR_STATIC_INLINE uint16_t Mask = (uint16_t(1u) << BitsPerComponent) - uint16_t(1u); base_type get(const uint16_t idx) { @@ -217,10 +238,11 @@ struct QuantizedSequence::scalar_type; using base_type = vector; - NBL_CONSTEXPR_STATIC_INLINE uint16_t StoreBits = size_of_v; + NBL_CONSTEXPR_STATIC_INLINE uint16_t StoreBits = uint16_t(8u) * size_of_v; NBL_CONSTEXPR_STATIC_INLINE uint16_t BitsPerComponent = StoreBits / Dim; - NBL_CONSTEXPR_STATIC_INLINE uint16_t LeftoverBitsPerComponent = BitsPerComponent - size_of_v; + NBL_CONSTEXPR_STATIC_INLINE uint16_t LeftoverBitsPerComponent = BitsPerComponent - uint16_t(8u) * size_of_v; NBL_CONSTEXPR_STATIC_INLINE uint16_t Mask = (uint16_t(1u) << LeftoverBitsPerComponent) - uint16_t(1u); + NBL_CONSTEXPR_STATIC_INLINE uint16_t DiscardBits = StoreBits - BitsPerComponent; base_type get(const uint16_t idx) { @@ -236,7 +258,7 @@ struct QuantizedSequence 0 && idx < 3); data[idx] = value[0]; data[3] &= ~Mask; - data[3] |= (value[1] & Mask) << (LeftoverBitsPerComponent * idx); + data[3] |= ((value[1] >> DiscardBits) & Mask) << (LeftoverBitsPerComponent * idx); } store_type data; From 4a399fbd58000439da6216eb6756edf9c9a84cf5 Mon Sep 17 00:00:00 2001 From: kevyuu Date: Thu, 11 Dec 2025 00:27:55 +0700 Subject: [PATCH 111/157] Fix verifyAnyBitIntegral --- include/nbl/builtin/hlsl/morton.hlsl | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/include/nbl/builtin/hlsl/morton.hlsl b/include/nbl/builtin/hlsl/morton.hlsl index 4e90fd4c91..9ba33ffb3d 100644 --- a/include/nbl/builtin/hlsl/morton.hlsl +++ b/include/nbl/builtin/hlsl/morton.hlsl @@ -28,14 +28,19 @@ NBL_BOOL_CONCEPT Dimension = 1 < D && D < 5; template && concepts::Scalar) NBL_CONSTEXPR_FUNC bool verifyAnyBitIntegral(T val) { - NBL_CONSTEXPR_FUNC_SCOPE_VAR T mask = ~((T(1) << Bits) - 1); - const bool allZero = ((val & mask) == 0); NBL_IF_CONSTEXPR(is_signed_v) { + // include the msb + NBL_CONSTEXPR_FUNC_SCOPE_VAR T mask = ~((uint64_t(1) << (Bits-1)) - 1); + const bool allZero = ((val & mask) == 0); const bool allOne = ((val & mask) == mask); return allZero || allOne; + } else + { + NBL_CONSTEXPR_FUNC_SCOPE_VAR T mask = ~((uint64_t(1) << Bits) - 1); + const bool allZero = ((val & mask) == 0); + return allZero; } - return allZero; } template && concepts::Scalar) From 84569361d6dce701e396284012bfb5d9d7ff445e Mon Sep 17 00:00:00 2001 From: Przemog1 Date: Wed, 10 Dec 2025 14:35:33 +0100 Subject: [PATCH 112/157] Fixed bug in the `promote_affine` function --- include/nbl/builtin/hlsl/math/linalg/transform.hlsl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/nbl/builtin/hlsl/math/linalg/transform.hlsl b/include/nbl/builtin/hlsl/math/linalg/transform.hlsl index 59ff142150..070f1e7af5 100644 --- a/include/nbl/builtin/hlsl/math/linalg/transform.hlsl +++ b/include/nbl/builtin/hlsl/math/linalg/transform.hlsl @@ -88,7 +88,7 @@ matrix promote_affine(const matrix inMatrix) NBL_UNROLL for (uint32_t row_i = NIn; row_i < NOut; row_i++) { retval[row_i] = promote(0.0); - if (row_i >= MIn && row_i < MOut) + if (row_i < MOut) retval[row_i][row_i] = T(1.0); } return retval; From 6f0d0120f438637f054a43c6c0dfffe24b66a931 Mon Sep 17 00:00:00 2001 From: keptsecret Date: Thu, 11 Dec 2025 11:26:37 +0700 Subject: [PATCH 113/157] added decode variant for scramble before decode --- examples_tests | 2 +- .../hlsl/sampling/quantized_sequence.hlsl | 50 +++++++++++++++++-- 2 files changed, 47 insertions(+), 5 deletions(-) diff --git a/examples_tests b/examples_tests index fb24a25a44..456f9e2fb0 160000 --- a/examples_tests +++ b/examples_tests @@ -1 +1 @@ -Subproject commit fb24a25a44b85a9cee830a3cafd86894ca137453 +Subproject commit 456f9e2fb0bffa0d599366bc4a0616730615ac93 diff --git a/include/nbl/builtin/hlsl/sampling/quantized_sequence.hlsl b/include/nbl/builtin/hlsl/sampling/quantized_sequence.hlsl index b70bddf54e..08f23eb170 100644 --- a/include/nbl/builtin/hlsl/sampling/quantized_sequence.hlsl +++ b/include/nbl/builtin/hlsl/sampling/quantized_sequence.hlsl @@ -39,8 +39,11 @@ struct unorm_constant<21> { NBL_CONSTEXPR_STATIC_INLINE uint32_t value = 0x35000 template<> struct unorm_constant<32> { NBL_CONSTEXPR_STATIC_INLINE uint32_t value = 0x2f800004u; }; +template +struct decode_helper; + template -struct decode_helper +struct decode_helper { using scalar_type = typename vector_traits::scalar_type; using fp_type = typename float_of_size::type; @@ -58,12 +61,46 @@ struct decode_helper return return_type(seqVal) * bit_cast(UNormConstant); } }; +template +struct decode_helper +{ + using scalar_type = typename vector_traits::scalar_type; + using fp_type = typename float_of_size::type; + using uvec_type = vector; + using sequence_type = QuantizedSequence; + using sequence_store_type = typename sequence_type::store_type; + using sequence_scalar_type = typename vector_traits::scalar_type; + using return_type = vector; + NBL_CONSTEXPR_STATIC_INLINE scalar_type UNormConstant = sequence_type::UNormConstant; + + static return_type __call(NBL_CONST_REF_ARG(sequence_type) val, const uint32_t scrambleSeed) + { + random::PCG32 pcg = random::PCG32::construct(scrambleSeed); + + sequence_store_type scrambleKey; + NBL_UNROLL for(uint16_t i = 0; i < vector_traits::Dimension; i++) + scrambleKey[i] = sequence_scalar_type(pcg()); + + sequence_type scramble; + scramble.data = scrambleKey ^ val.data; + + // sequence_type scramble; + // NBL_UNROLL for(uint16_t i = 0; i < D; i++) + // scramble.set(i, pcg()); + // scramble.data ^= val.data; + + uvec_type seqVal; + NBL_UNROLL for(uint16_t i = 0; i < D; i++) + seqVal[i] = scramble.get(i); + return return_type(seqVal) * bit_cast(UNormConstant); + } +}; } -template +template vector::scalar_type)>::type, D> decode(NBL_CONST_REF_ARG(QuantizedSequence) val, const uint32_t scrambleSeed) { - return impl::decode_helper::__call(val, scrambleSeed); + return impl::decode_helper::__call(val, scrambleSeed); } #define SEQUENCE_SPECIALIZATION_CONCEPT concepts::UnsignedIntegral::scalar_type> && size_of_v::scalar_type> <= 4 @@ -73,7 +110,7 @@ template NBL_PARTIAL_REQ_TOP(SEQUENCE_SPECIALIZATION_CONCEPT) struct QuantizedSequence { using store_type = T; - NBL_CONSTEXPR_STATIC_INLINE uint32_t UNormConstant = impl::unorm_constant::value; + NBL_CONSTEXPR_STATIC_INLINE uint32_t UNormConstant = impl::unorm_constant<8u*sizeof(store_type)>::value; store_type get(const uint16_t idx) { assert(idx > 0 && idx < 1); return data; } void set(const uint16_t idx, const store_type value) { assert(idx > 0 && idx < 1); data = value; } @@ -90,6 +127,7 @@ struct QuantizedSequence::value; store_type get(const uint16_t idx) { @@ -114,6 +152,7 @@ struct QuantizedSequence::scalar_type; + NBL_CONSTEXPR_STATIC_INLINE uint32_t UNormConstant = impl::unorm_constant<8u*sizeof(scalar_type)>::value; scalar_type get(const uint16_t idx) { assert(idx > 0 && idx < Dim); return data[idx]; } void set(const uint16_t idx, const scalar_type value) { assert(idx > 0 && idx < Dim); data[idx] = value; } @@ -181,6 +220,7 @@ struct QuantizedSequence::value; scalar_type get(const uint16_t idx) { @@ -208,6 +248,7 @@ struct QuantizedSequence::scalar_type; using base_type = vector; + NBL_CONSTEXPR_STATIC_INLINE uint32_t UNormConstant = impl::unorm_constant<8u*sizeof(scalar_type)>::value; base_type get(const uint16_t idx) { @@ -243,6 +284,7 @@ struct QuantizedSequence; NBL_CONSTEXPR_STATIC_INLINE uint16_t Mask = (uint16_t(1u) << LeftoverBitsPerComponent) - uint16_t(1u); NBL_CONSTEXPR_STATIC_INLINE uint16_t DiscardBits = StoreBits - BitsPerComponent; + NBL_CONSTEXPR_STATIC_INLINE uint32_t UNormConstant = impl::unorm_constant<8u*sizeof(scalar_type)>::value; base_type get(const uint16_t idx) { From c5c1dc2e2c520cf795568b4667414bf1c08ad0b1 Mon Sep 17 00:00:00 2001 From: kevyuu Date: Thu, 11 Dec 2025 14:13:37 +0700 Subject: [PATCH 114/157] Remove const specifier for parameters in ternary operator --- include/nbl/builtin/hlsl/functional.hlsl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/nbl/builtin/hlsl/functional.hlsl b/include/nbl/builtin/hlsl/functional.hlsl index 7531c5cdb9..5af6c98008 100644 --- a/include/nbl/builtin/hlsl/functional.hlsl +++ b/include/nbl/builtin/hlsl/functional.hlsl @@ -241,7 +241,7 @@ struct ternary_operator { using type_t = std::invoke_result_t; - constexpr inline type_t operator()(const bool condition, const F1& lhs, const F2& rhs) + constexpr inline type_t operator()(const bool condition, F1& lhs, F2& rhs) { if (condition) return std::invoke(lhs); @@ -255,7 +255,7 @@ struct ternary_operator { using type_t = decltype(experimental::declval().operator()); - NBL_CONSTEXPR_FUNC type_t operator()(const bool condition, NBL_CONST_REF_ARG(F1) lhs, NBL_CONST_REF_ARG(F2) rhs) + NBL_CONSTEXPR_FUNC type_t operator()(const bool condition, NBL_REF_ARG(F1) lhs, NBL_REF_ARG(F2) rhs) { if (condition) return lhs(); From 9b2780fe5dbb8ce427eba699f929f0fd6d1b86fb Mon Sep 17 00:00:00 2001 From: kevyuu Date: Thu, 11 Dec 2025 14:27:18 +0700 Subject: [PATCH 115/157] Improve select implementation to use spirv intrinsics instead of branch --- include/nbl/builtin/hlsl/cpp_compat/impl/intrinsics_impl.hlsl | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/include/nbl/builtin/hlsl/cpp_compat/impl/intrinsics_impl.hlsl b/include/nbl/builtin/hlsl/cpp_compat/impl/intrinsics_impl.hlsl index 67a9f67d8f..66ed29f1ad 100644 --- a/include/nbl/builtin/hlsl/cpp_compat/impl/intrinsics_impl.hlsl +++ b/include/nbl/builtin/hlsl/cpp_compat/impl/intrinsics_impl.hlsl @@ -661,7 +661,11 @@ struct select_helper) > { NBL_CONSTEXPR_STATIC T __call(NBL_CONST_REF_ARG(B) condition, NBL_CONST_REF_ARG(T) object1, NBL_CONST_REF_ARG(T) object2) { + #ifdef __HLSL_VERSION + return spirv::select(condition, object1, object2); + #else return condition ? object1 : object2; + #endif } }; From 887117537e9b59cebdf27eb3cd792e6a43efbaac Mon Sep 17 00:00:00 2001 From: kevyuu Date: Thu, 11 Dec 2025 15:48:01 +0700 Subject: [PATCH 116/157] Refactor select_helper to use intrinsics if possible --- .../hlsl/cpp_compat/impl/intrinsics_impl.hlsl | 72 ++++++++++--------- 1 file changed, 40 insertions(+), 32 deletions(-) diff --git a/include/nbl/builtin/hlsl/cpp_compat/impl/intrinsics_impl.hlsl b/include/nbl/builtin/hlsl/cpp_compat/impl/intrinsics_impl.hlsl index 66ed29f1ad..2856871a02 100644 --- a/include/nbl/builtin/hlsl/cpp_compat/impl/intrinsics_impl.hlsl +++ b/include/nbl/builtin/hlsl/cpp_compat/impl/intrinsics_impl.hlsl @@ -52,6 +52,14 @@ NBL_BOOL_CONCEPT MixCallingBuiltins = #else MixIsCallable; #endif + +template +NBL_BOOL_CONCEPT SelectCallingBuiltins = +#ifdef __HLSL_VERSION +spirv::SelectIsCallable; +#else +false; +#endif } template @@ -476,6 +484,17 @@ struct mix_helper } }; +template +requires(concepts::BooleanScalar) +struct select_helper +{ + using return_t = T; + static inline return_t __call(const B condition, const T& object1, const T& object2) + { + return condition ? object1 : object2; + } +}; + template requires concepts::FloatingPointScalar || concepts::IntegralScalar struct sign_helper @@ -655,38 +674,6 @@ struct subBorrow_helper } }; -template -NBL_PARTIAL_REQ_TOP(concepts::BooleanScalar) -struct select_helper) > -{ - NBL_CONSTEXPR_STATIC T __call(NBL_CONST_REF_ARG(B) condition, NBL_CONST_REF_ARG(T) object1, NBL_CONST_REF_ARG(T) object2) - { - #ifdef __HLSL_VERSION - return spirv::select(condition, object1, object2); - #else - return condition ? object1 : object2; - #endif - } -}; - -template -NBL_PARTIAL_REQ_TOP(concepts::Boolean&& concepts::Vector&& concepts::Vector && (extent_v == extent_v)) -struct select_helper&& concepts::Vector&& concepts::Vector && (extent_v == extent_v)) > -{ - NBL_CONSTEXPR_STATIC T __call(NBL_CONST_REF_ARG(B) condition, NBL_CONST_REF_ARG(T) object1, NBL_CONST_REF_ARG(T) object2) - { - using traits = hlsl::vector_traits; - array_get conditionGetter; - array_get objectGetter; - array_set setter; - - T selected; - for (uint32_t i = 0; i < traits::Dimension; ++i) - setter(selected, i, conditionGetter(condition, i) ? objectGetter(object1, i) : objectGetter(object2, i)); - - return selected; - } -}; template struct undef_helper @@ -980,6 +967,27 @@ struct mix_helper && concepts::B } }; + +template +NBL_PARTIAL_REQ_TOP(concepts::Boolean && concepts::Vector && concepts::Vector && (extent_v == extent_v) && !impl::SelectCallingBuiltins) +struct select_helper && concepts::Vector && concepts::Vector && (extent_v == extent_v) && !impl::SelectCallingBuiltins) > +{ + using return_t = T; + NBL_CONSTEXPR_STATIC return_t __call(NBL_CONST_REF_ARG(B) condition, NBL_CONST_REF_ARG(T) object1, NBL_CONST_REF_ARG(T) object2) + { + using traits = hlsl::vector_traits; + array_get conditionGetter; + array_get objectGetter; + array_set setter; + + T selected; + for (uint32_t i = 0; i < traits::Dimension; ++i) + setter(selected, i, conditionGetter(condition, i) ? objectGetter(object1, i) : objectGetter(object2, i)); + + return selected; + } +}; + template NBL_PARTIAL_REQ_TOP(VECTOR_SPECIALIZATION_CONCEPT) struct fma_helper From bbbeeea7471ed742fdd5ca15769d9f1bbb9d1983 Mon Sep 17 00:00:00 2001 From: kevyuu Date: Thu, 11 Dec 2025 15:58:16 +0700 Subject: [PATCH 117/157] Revert previous changes, instead make it clear that the function is implemented only in cpp --- .../hlsl/cpp_compat/impl/intrinsics_impl.hlsl | 70 ++++++++----------- 1 file changed, 30 insertions(+), 40 deletions(-) diff --git a/include/nbl/builtin/hlsl/cpp_compat/impl/intrinsics_impl.hlsl b/include/nbl/builtin/hlsl/cpp_compat/impl/intrinsics_impl.hlsl index 2856871a02..9fe3ddc21b 100644 --- a/include/nbl/builtin/hlsl/cpp_compat/impl/intrinsics_impl.hlsl +++ b/include/nbl/builtin/hlsl/cpp_compat/impl/intrinsics_impl.hlsl @@ -52,14 +52,6 @@ NBL_BOOL_CONCEPT MixCallingBuiltins = #else MixIsCallable; #endif - -template -NBL_BOOL_CONCEPT SelectCallingBuiltins = -#ifdef __HLSL_VERSION -spirv::SelectIsCallable; -#else -false; -#endif } template @@ -484,17 +476,6 @@ struct mix_helper } }; -template -requires(concepts::BooleanScalar) -struct select_helper -{ - using return_t = T; - static inline return_t __call(const B condition, const T& object1, const T& object2) - { - return condition ? object1 : object2; - } -}; - template requires concepts::FloatingPointScalar || concepts::IntegralScalar struct sign_helper @@ -674,6 +655,36 @@ struct subBorrow_helper } }; +template +requires (concepts::BooleanScalar) +struct select_helper +{ + using return_t = T; + constexpr static return_t __call(const B& condition, const T& object1, const T& object2) + { + return condition ? object1 : object2; + } +}; + +template +requires (concepts::Boolean&& concepts::Vector&& concepts::Vector && (extent_v == extent_v)) +struct select_helper +{ + using return_t = T; + constexpr static T __call(const B& condition, const T& object1, const T& object2) + { + using traits = vector_traits; + array_get conditionGetter; + array_get objectGetter; + array_set setter; + + T selected; + for (uint32_t i = 0; i < traits::Dimension; ++i) + setter(selected, i, conditionGetter(condition, i) ? objectGetter(object1, i) : objectGetter(object2, i)); + + return selected; + } +}; template struct undef_helper @@ -967,27 +978,6 @@ struct mix_helper && concepts::B } }; - -template -NBL_PARTIAL_REQ_TOP(concepts::Boolean && concepts::Vector && concepts::Vector && (extent_v == extent_v) && !impl::SelectCallingBuiltins) -struct select_helper && concepts::Vector && concepts::Vector && (extent_v == extent_v) && !impl::SelectCallingBuiltins) > -{ - using return_t = T; - NBL_CONSTEXPR_STATIC return_t __call(NBL_CONST_REF_ARG(B) condition, NBL_CONST_REF_ARG(T) object1, NBL_CONST_REF_ARG(T) object2) - { - using traits = hlsl::vector_traits; - array_get conditionGetter; - array_get objectGetter; - array_set setter; - - T selected; - for (uint32_t i = 0; i < traits::Dimension; ++i) - setter(selected, i, conditionGetter(condition, i) ? objectGetter(object1, i) : objectGetter(object2, i)); - - return selected; - } -}; - template NBL_PARTIAL_REQ_TOP(VECTOR_SPECIALIZATION_CONCEPT) struct fma_helper From 258e491cb3377f1b87a9c8850d1f4f3a69ccfa11 Mon Sep 17 00:00:00 2001 From: kevyuu Date: Thu, 11 Dec 2025 16:36:03 +0700 Subject: [PATCH 118/157] Reverse the order of template argument of select and select_helper --- .../hlsl/cpp_compat/impl/intrinsics_impl.hlsl | 14 +++++++------- .../nbl/builtin/hlsl/cpp_compat/intrinsics.hlsl | 4 ++-- include/nbl/builtin/hlsl/emulated/int64_t.hlsl | 6 +++--- 3 files changed, 12 insertions(+), 12 deletions(-) diff --git a/include/nbl/builtin/hlsl/cpp_compat/impl/intrinsics_impl.hlsl b/include/nbl/builtin/hlsl/cpp_compat/impl/intrinsics_impl.hlsl index 9fe3ddc21b..8a745fc4ef 100644 --- a/include/nbl/builtin/hlsl/cpp_compat/impl/intrinsics_impl.hlsl +++ b/include/nbl/builtin/hlsl/cpp_compat/impl/intrinsics_impl.hlsl @@ -90,7 +90,7 @@ template struct all_helper; template struct any_helper; -template +template struct select_helper; template struct bitReverseAs_helper; @@ -166,7 +166,7 @@ template AUTO_SPECIALIZE_TRIVIAL_CASE_HELPER(rsqrt_helper, inverseSq template AUTO_SPECIALIZE_TRIVIAL_CASE_HELPER(fract_helper, fract, (T), (T), T) template AUTO_SPECIALIZE_TRIVIAL_CASE_HELPER(all_helper, all, (T), (T), bool) template AUTO_SPECIALIZE_TRIVIAL_CASE_HELPER(any_helper, any, (T), (T), bool) -template AUTO_SPECIALIZE_TRIVIAL_CASE_HELPER(select_helper, select, (B)(T), (B)(T)(T), T) +template AUTO_SPECIALIZE_TRIVIAL_CASE_HELPER(select_helper, select, (T)(B), (B)(T)(T), T) template AUTO_SPECIALIZE_TRIVIAL_CASE_HELPER(sign_helper, fSign, (T), (T), T) template AUTO_SPECIALIZE_TRIVIAL_CASE_HELPER(sign_helper, sSign, (T), (T), T) template AUTO_SPECIALIZE_TRIVIAL_CASE_HELPER(radians_helper, radians, (T), (T), T) @@ -655,9 +655,9 @@ struct subBorrow_helper } }; -template +template requires (concepts::BooleanScalar) -struct select_helper +struct select_helper { using return_t = T; constexpr static return_t __call(const B& condition, const T& object1, const T& object2) @@ -666,9 +666,9 @@ struct select_helper } }; -template +template requires (concepts::Boolean&& concepts::Vector&& concepts::Vector && (extent_v == extent_v)) -struct select_helper +struct select_helper { using return_t = T; constexpr static T __call(const B& condition, const T& object1, const T& object2) @@ -974,7 +974,7 @@ struct mix_helper && concepts::B using return_t = T; static return_t __call(NBL_CONST_REF_ARG(T) x, NBL_CONST_REF_ARG(T) y, NBL_CONST_REF_ARG(U) a) { - return select_helper(a, y, x); + return select_helper(a, y, x); } }; diff --git a/include/nbl/builtin/hlsl/cpp_compat/intrinsics.hlsl b/include/nbl/builtin/hlsl/cpp_compat/intrinsics.hlsl index 27518222b3..78367f7924 100644 --- a/include/nbl/builtin/hlsl/cpp_compat/intrinsics.hlsl +++ b/include/nbl/builtin/hlsl/cpp_compat/intrinsics.hlsl @@ -156,10 +156,10 @@ inline bool any(Vector vec) return cpp_compat_intrinsics_impl::any_helper::__call(vec); } -template +template NBL_CONSTEXPR_FUNC ResultType select(Condition condition, ResultType object1, ResultType object2) { - return cpp_compat_intrinsics_impl::select_helper::__call(condition, object1, object2); + return cpp_compat_intrinsics_impl::select_helper::__call(condition, object1, object2); } /** diff --git a/include/nbl/builtin/hlsl/emulated/int64_t.hlsl b/include/nbl/builtin/hlsl/emulated/int64_t.hlsl index 1324998d1a..4fa2014607 100644 --- a/include/nbl/builtin/hlsl/emulated/int64_t.hlsl +++ b/include/nbl/builtin/hlsl/emulated/int64_t.hlsl @@ -208,7 +208,7 @@ struct left_shift_operator(0, operand.__getLSB() << shift) : vector(operand.__getLSB() << bits, (operand.__getMSB() << bits) | (operand.__getLSB() >> shift))); - return select(bool(bits), shifted, operand); + return select(bool(bits), shifted, operand); } // If `_bits > 63` or `_bits < 0` the result is undefined @@ -235,7 +235,7 @@ struct arithmetic_right_shift_operator const type_t shifted = type_t::create(bigShift ? vector(operand.__getMSB() >> shift, 0) : vector((operand.__getMSB() << shift) | (operand.__getLSB() >> bits), operand.__getMSB() >> bits)); - return select(bool(bits), shifted, operand); + return select(bool(bits), shifted, operand); } // If `_bits > 63` the result is undefined @@ -261,7 +261,7 @@ struct arithmetic_right_shift_operator const uint32_t shift = bigShift ? bits - ComponentBitWidth : ComponentBitWidth - bits; const type_t shifted = type_t::create(bigShift ? vector(uint32_t(int32_t(operand.__getMSB()) >> shift), int32_t(operand.__getMSB()) < 0 ? ~uint32_t(0) : uint32_t(0)) : vector((operand.__getMSB() << shift) | (operand.__getLSB() >> bits), uint32_t(int32_t(operand.__getMSB()) >> bits))); - return select(bool(bits), shifted, operand); + return select(bool(bits), shifted, operand); } // If `_bits > 63` or `_bits < 0` the result is undefined From c6462d1834d9b8781adcb2a7e67942b2cecdf77c Mon Sep 17 00:00:00 2001 From: keptsecret Date: Fri, 12 Dec 2025 17:32:00 +0700 Subject: [PATCH 119/157] some minor fixes to quantized sequence set, decode --- .../hlsl/sampling/quantized_sequence.hlsl | 23 +++++++------------ 1 file changed, 8 insertions(+), 15 deletions(-) diff --git a/include/nbl/builtin/hlsl/sampling/quantized_sequence.hlsl b/include/nbl/builtin/hlsl/sampling/quantized_sequence.hlsl index 08f23eb170..27588dd9e0 100644 --- a/include/nbl/builtin/hlsl/sampling/quantized_sequence.hlsl +++ b/include/nbl/builtin/hlsl/sampling/quantized_sequence.hlsl @@ -71,23 +71,16 @@ struct decode_helper using sequence_store_type = typename sequence_type::store_type; using sequence_scalar_type = typename vector_traits::scalar_type; using return_type = vector; - NBL_CONSTEXPR_STATIC_INLINE scalar_type UNormConstant = sequence_type::UNormConstant; + NBL_CONSTEXPR_STATIC_INLINE scalar_type UNormConstant = unorm_constant<8u*sizeof(scalar_type)>::value; static return_type __call(NBL_CONST_REF_ARG(sequence_type) val, const uint32_t scrambleSeed) { random::PCG32 pcg = random::PCG32::construct(scrambleSeed); - sequence_store_type scrambleKey; - NBL_UNROLL for(uint16_t i = 0; i < vector_traits::Dimension; i++) - scrambleKey[i] = sequence_scalar_type(pcg()); - sequence_type scramble; - scramble.data = scrambleKey ^ val.data; - - // sequence_type scramble; - // NBL_UNROLL for(uint16_t i = 0; i < D; i++) - // scramble.set(i, pcg()); - // scramble.data ^= val.data; + NBL_UNROLL for(uint16_t i = 0; i < D; i++) + scramble.set(i, pcg()); + scramble.data ^= val.data; uvec_type seqVal; NBL_UNROLL for(uint16_t i = 0; i < D; i++) @@ -197,13 +190,13 @@ struct QuantizedSequence> DiscardBits; + const scalar_type zbits = StoreBits-BitsPerComponent; + const scalar_type zmask = (uint16_t(1u) << zbits) - uint16_t(1u); + const scalar_type trunc_val = value >> (DiscardBits-1u); data[0] &= Mask; data[1] &= Mask; data[0] |= (trunc_val & zmask) << BitsPerComponent; - data[1] |= (trunc_val >> (zbits) & zmask) << BitsPerComponent; + data[1] |= ((trunc_val >> zbits) & zmask) << BitsPerComponent; } } From ba6e1ec941fd5bb694c4b161ff236ced0f41ffb2 Mon Sep 17 00:00:00 2001 From: kevyuu Date: Fri, 12 Dec 2025 23:15:27 +0700 Subject: [PATCH 120/157] Fix complex identity macro --- include/nbl/builtin/hlsl/complex.hlsl | 36 +++++++++++++-------------- 1 file changed, 18 insertions(+), 18 deletions(-) diff --git a/include/nbl/builtin/hlsl/complex.hlsl b/include/nbl/builtin/hlsl/complex.hlsl index 7f59d103fa..7e8f6526ec 100644 --- a/include/nbl/builtin/hlsl/complex.hlsl +++ b/include/nbl/builtin/hlsl/complex.hlsl @@ -238,28 +238,28 @@ struct divides< complex_t > // Out of line generic initialization of static member data not yet supported so we X-Macro identities for Scalar types we want to support // (left X-Macro here since it's pretty readable) -#define COMPLEX_ARITHMETIC_IDENTITIES(SCALAR) \ +#define COMPLEX_ARITHMETIC_IDENTITIES(SCALAR, COMPONENT) \ template<> \ -const static complex_t< SCALAR > plus< complex_t< SCALAR > >::identity = { promote< SCALAR , uint32_t>(0), promote< SCALAR , uint32_t>(0)}; \ +const static complex_t< SCALAR > plus< complex_t< SCALAR > >::identity = { promote< SCALAR, COMPONENT>(0), promote< SCALAR, COMPONENT>(0)}; \ template<> \ -const static complex_t< SCALAR > minus< complex_t< SCALAR > >::identity = { promote< SCALAR , uint32_t>(0), promote< SCALAR , uint32_t>(0)}; \ +const static complex_t< SCALAR > minus< complex_t< SCALAR > >::identity = { promote< SCALAR, COMPONENT>(0), promote< SCALAR, COMPONENT>(0)}; \ template<> \ -const static complex_t< SCALAR > multiplies< complex_t< SCALAR > >::identity = { promote< SCALAR , uint32_t>(1), promote< SCALAR , uint32_t>(0)}; \ +const static complex_t< SCALAR > multiplies< complex_t< SCALAR > >::identity = { promote< SCALAR, COMPONENT>(1), promote< SCALAR, COMPONENT>(0)}; \ template<> \ -const static complex_t< SCALAR > divides< complex_t< SCALAR > >::identity = { promote< SCALAR , uint32_t>(1), promote< SCALAR , uint32_t>(0)}; - -COMPLEX_ARITHMETIC_IDENTITIES(float16_t) -COMPLEX_ARITHMETIC_IDENTITIES(float16_t2) -COMPLEX_ARITHMETIC_IDENTITIES(float16_t3) -COMPLEX_ARITHMETIC_IDENTITIES(float16_t4) -COMPLEX_ARITHMETIC_IDENTITIES(float32_t) -COMPLEX_ARITHMETIC_IDENTITIES(float32_t2) -COMPLEX_ARITHMETIC_IDENTITIES(float32_t3) -COMPLEX_ARITHMETIC_IDENTITIES(float32_t4) -COMPLEX_ARITHMETIC_IDENTITIES(float64_t) -COMPLEX_ARITHMETIC_IDENTITIES(float64_t2) -COMPLEX_ARITHMETIC_IDENTITIES(float64_t3) -COMPLEX_ARITHMETIC_IDENTITIES(float64_t4) +const static complex_t< SCALAR > divides< complex_t< SCALAR > >::identity = { promote< SCALAR, COMPONENT>(1), promote< SCALAR, COMPONENT>(0)}; + +COMPLEX_ARITHMETIC_IDENTITIES(float16_t, float16_t) +COMPLEX_ARITHMETIC_IDENTITIES(float16_t2, float16_t) +COMPLEX_ARITHMETIC_IDENTITIES(float16_t3, float16_t) +COMPLEX_ARITHMETIC_IDENTITIES(float16_t4, float16_t) +COMPLEX_ARITHMETIC_IDENTITIES(float32_t, float32_t) +COMPLEX_ARITHMETIC_IDENTITIES(float32_t2, float32_t) +COMPLEX_ARITHMETIC_IDENTITIES(float32_t3, float32_t) +COMPLEX_ARITHMETIC_IDENTITIES(float32_t4, float32_t) +COMPLEX_ARITHMETIC_IDENTITIES(float64_t, float64_t) +COMPLEX_ARITHMETIC_IDENTITIES(float64_t2, float64_t) +COMPLEX_ARITHMETIC_IDENTITIES(float64_t3, float64_t) +COMPLEX_ARITHMETIC_IDENTITIES(float64_t4, float64_t) #undef COMPLEX_ARITHMETIC_IDENTITIES From 87e2cff9efd25500101ba58479b1dfdf7a70c318 Mon Sep 17 00:00:00 2001 From: Przemog1 Date: Fri, 12 Dec 2025 17:26:19 +0100 Subject: [PATCH 121/157] Fixed keys --- cmake/common.cmake | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/cmake/common.cmake b/cmake/common.cmake index 010c7409dc..c61c13714f 100755 --- a/cmake/common.cmake +++ b/cmake/common.cmake @@ -1485,7 +1485,8 @@ namespace @IMPL_NAMESPACE@ { # generate keys and commands for compiling shaders foreach(BUILD_CONFIGURATION ${CMAKE_CONFIGURATION_TYPES}) - set(TARGET_OUTPUT "${IMPL_BINARY_DIR}/${BUILD_CONFIGURATION}/${FINAL_KEY}") + set(FINAL_KEY_REL_PATH "${BUILD_CONFIGURATION}/${FINAL_KEY}") + set(TARGET_OUTPUT "${IMPL_BINARY_DIR}/${FINAL_KEY_REL_PATH}") set(NBL_NSC_COMPILE_COMMAND "$" @@ -1514,7 +1515,7 @@ namespace @IMPL_NAMESPACE@ { NBL_SPIRV_REGISTERED_INPUT "${TARGET_INPUT}" NBL_SPIRV_PERMUTATION_CONFIG "${CONFIG_FILE}" NBL_SPIRV_BINARY_DIR "${IMPL_BINARY_DIR}" - NBL_SPIRV_ACCESS_KEY "${FINAL_KEY}" + NBL_SPIRV_ACCESS_KEY "${FINAL_KEY_REL_PATH}" ) set_property(TARGET ${IMPL_TARGET} APPEND PROPERTY NBL_SPIRV_OUTPUTS "${TARGET_OUTPUT}") From 34b1de46a0e75eb15a7b1e178808741fbb3d80bc Mon Sep 17 00:00:00 2001 From: kevyuu Date: Fri, 12 Dec 2025 23:27:51 +0700 Subject: [PATCH 122/157] Fix default blit_blit.comp.hlsl --- include/nbl/builtin/hlsl/blit/default_blit.comp.hlsl | 1 + 1 file changed, 1 insertion(+) diff --git a/include/nbl/builtin/hlsl/blit/default_blit.comp.hlsl b/include/nbl/builtin/hlsl/blit/default_blit.comp.hlsl index 1407d7fc77..4b97bbc08f 100644 --- a/include/nbl/builtin/hlsl/blit/default_blit.comp.hlsl +++ b/include/nbl/builtin/hlsl/blit/default_blit.comp.hlsl @@ -59,6 +59,7 @@ using namespace nbl::hlsl::blit; // TODO: push constants [numthreads(ConstevalParameters::WorkGroupSize,1,1)] +[shader("compute")] void main() { InImgAccessor inImgA; From 67e6e5031b2875ca605e532c5dfb31a71cd247ac Mon Sep 17 00:00:00 2001 From: kevyuu Date: Fri, 12 Dec 2025 23:35:25 +0700 Subject: [PATCH 123/157] Fix ternary operator --- include/nbl/builtin/hlsl/functional.hlsl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/nbl/builtin/hlsl/functional.hlsl b/include/nbl/builtin/hlsl/functional.hlsl index 5af6c98008..3f1043a5e2 100644 --- a/include/nbl/builtin/hlsl/functional.hlsl +++ b/include/nbl/builtin/hlsl/functional.hlsl @@ -236,7 +236,7 @@ struct maximum }; #ifndef __HLSL_VERSION -template requires(is_same_v, std::invoke_result_t()> ) +template requires(is_same_v, std::invoke_result_t > ) struct ternary_operator { using type_t = std::invoke_result_t; From 5698cf050362bcfed179d62ec9390d5f08446a6d Mon Sep 17 00:00:00 2001 From: kevyuu Date: Sat, 13 Dec 2025 00:16:12 +0700 Subject: [PATCH 124/157] Add missing include --- include/nbl/builtin/hlsl/functional.hlsl | 1 + 1 file changed, 1 insertion(+) diff --git a/include/nbl/builtin/hlsl/functional.hlsl b/include/nbl/builtin/hlsl/functional.hlsl index 3f1043a5e2..118fe07c63 100644 --- a/include/nbl/builtin/hlsl/functional.hlsl +++ b/include/nbl/builtin/hlsl/functional.hlsl @@ -8,6 +8,7 @@ #include "nbl/builtin/hlsl/glsl_compat/core.hlsl" #include "nbl/builtin/hlsl/limits.hlsl" #include "nbl/builtin/hlsl/concepts/vector.hlsl" +#include "nbl/builtin/hlsl/array_accessors.hlsl" namespace nbl From 7d9611d8e40e5f2b0a2e9339a3aa654bf739a08d Mon Sep 17 00:00:00 2001 From: kevyuu Date: Sat, 13 Dec 2025 02:23:04 +0700 Subject: [PATCH 125/157] Fix mix partial specialization --- .../nbl/builtin/hlsl/cpp_compat/impl/intrinsics_impl.hlsl | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/include/nbl/builtin/hlsl/cpp_compat/impl/intrinsics_impl.hlsl b/include/nbl/builtin/hlsl/cpp_compat/impl/intrinsics_impl.hlsl index 8a745fc4ef..a5e48debbf 100644 --- a/include/nbl/builtin/hlsl/cpp_compat/impl/intrinsics_impl.hlsl +++ b/include/nbl/builtin/hlsl/cpp_compat/impl/intrinsics_impl.hlsl @@ -48,7 +48,7 @@ NBL_VALID_EXPRESSION(MixIsCallable, (T)(U), glm::mix(declval(),declval(),d template NBL_BOOL_CONCEPT MixCallingBuiltins = #ifdef __HLSL_VERSION -(spirv::FMixIsCallable && is_same_v) || spirv::SelectIsCallable; +(spirv::FMixIsCallable && is_same_v); #else MixIsCallable; #endif @@ -968,13 +968,13 @@ struct mix_helper NBL_PARTIAL_REQ_TOP(concepts::Vectorial && concepts::BooleanScalar) -struct mix_helper && concepts::BooleanScalar) > +template NBL_PARTIAL_REQ_TOP((concepts::Vectorial || concepts::Scalar) && concepts::BooleanScalar && !impl::MixCallingBuiltins) +struct mix_helper || concepts::Scalar) && concepts::BooleanScalar && !impl::MixCallingBuiltins) > { using return_t = T; static return_t __call(NBL_CONST_REF_ARG(T) x, NBL_CONST_REF_ARG(T) y, NBL_CONST_REF_ARG(U) a) { - return select_helper(a, y, x); + return select_helper::__call(a, y, x); } }; From e44a8fbc95792a1dcefe2440db1f18e3e525cf79 Mon Sep 17 00:00:00 2001 From: Przemog1 <32484732+Przemog1@users.noreply.github.com> Date: Fri, 12 Dec 2025 21:33:17 +0100 Subject: [PATCH 126/157] Fixed nsc command line arguments --- cmake/common.cmake | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/cmake/common.cmake b/cmake/common.cmake index c61c13714f..16ea1aee06 100755 --- a/cmake/common.cmake +++ b/cmake/common.cmake @@ -1185,8 +1185,8 @@ struct DeviceConfigCaps -Zpr -spirv -fspv-target-env=vulkan1.3 - -WShadow - -WConversion + -Wshadow + -Wconversion $<$:-O0> $<$:-O3> $<$:-O3> @@ -1599,4 +1599,4 @@ function(NBL_CREATE_RESOURCE_ARCHIVE) if(IMPL_LINK_TO) LINK_BUILTIN_RESOURCES_TO_TARGET(${IMPL_LINK_TO} ${IMPL_TARGET}) endif() -endfunction() \ No newline at end of file +endfunction() From b803f838a329f7c2eff9a9ce3d89e81868e9637b Mon Sep 17 00:00:00 2001 From: Mateusz Kielan Date: Fri, 12 Dec 2025 23:19:24 +0100 Subject: [PATCH 127/157] fix missing built-in --- src/nbl/builtin/CMakeLists.txt | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/nbl/builtin/CMakeLists.txt b/src/nbl/builtin/CMakeLists.txt index 6562fbb69b..75cb681d36 100644 --- a/src/nbl/builtin/CMakeLists.txt +++ b/src/nbl/builtin/CMakeLists.txt @@ -145,6 +145,7 @@ LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/indirect_commands.hlsl") # emulated LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/emulated/float64_t.hlsl") LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/emulated/float64_t_impl.hlsl") +LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/emulated/int64_common_member_inc.hlsl") LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/emulated/int64_t.hlsl") LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/emulated/vector_t.hlsl") LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/emulated/matrix_t.hlsl") @@ -357,4 +358,4 @@ LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/prefix_sum_blur/box_sampler.h #morton codes LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/morton.hlsl") -ADD_CUSTOM_BUILTIN_RESOURCES(nblBuiltinResourceData NBL_RESOURCES_TO_EMBED "${NBL_ROOT_PATH}/include" "nbl/builtin" "nbl::builtin" "${NBL_ROOT_PATH_BINARY}/include" "${NBL_ROOT_PATH_BINARY}/src" "STATIC" "INTERNAL") \ No newline at end of file +ADD_CUSTOM_BUILTIN_RESOURCES(nblBuiltinResourceData NBL_RESOURCES_TO_EMBED "${NBL_ROOT_PATH}/include" "nbl/builtin" "nbl::builtin" "${NBL_ROOT_PATH_BINARY}/include" "${NBL_ROOT_PATH_BINARY}/src" "STATIC" "INTERNAL") From d61989fb86f790f76805939519f2a467b34beea8 Mon Sep 17 00:00:00 2001 From: devsh Date: Sun, 14 Dec 2025 12:29:44 +0100 Subject: [PATCH 128/157] post merge submodule pointer update also prepare new release --- CMakeLists.txt | 2 +- examples_tests | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index bedb9f1dc2..2235512d1f 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -28,7 +28,7 @@ include(ExternalProject) include(${CMAKE_CURRENT_SOURCE_DIR}/cmake/toolchains/android/build.cmake) project(Nabla - VERSION 0.8.0.1 + VERSION 0.9.0.0 HOMEPAGE_URL "https://www.devsh.eu/nabla" LANGUAGES CXX C ) diff --git a/examples_tests b/examples_tests index eb7d4fe788..4ab1de2235 160000 --- a/examples_tests +++ b/examples_tests @@ -1 +1 @@ -Subproject commit eb7d4fe788fb5e88b8b475c979586e050e202b00 +Subproject commit 4ab1de2235365833db2d089259000bec2bcce3e3 From a30f08314c92245e5c6761012da9e767fef8c912 Mon Sep 17 00:00:00 2001 From: keptsecret Date: Mon, 15 Dec 2025 12:20:05 +0700 Subject: [PATCH 129/157] fix quantized sequence mask being too small, assert conditions --- .../hlsl/sampling/quantized_sequence.hlsl | 32 ++++++++++--------- 1 file changed, 17 insertions(+), 15 deletions(-) diff --git a/include/nbl/builtin/hlsl/sampling/quantized_sequence.hlsl b/include/nbl/builtin/hlsl/sampling/quantized_sequence.hlsl index 27588dd9e0..8ea31cbe71 100644 --- a/include/nbl/builtin/hlsl/sampling/quantized_sequence.hlsl +++ b/include/nbl/builtin/hlsl/sampling/quantized_sequence.hlsl @@ -71,7 +71,8 @@ struct decode_helper using sequence_store_type = typename sequence_type::store_type; using sequence_scalar_type = typename vector_traits::scalar_type; using return_type = vector; - NBL_CONSTEXPR_STATIC_INLINE scalar_type UNormConstant = unorm_constant<8u*sizeof(scalar_type)>::value; + // NBL_CONSTEXPR_STATIC_INLINE scalar_type UNormConstant = unorm_constant<8u*sizeof(scalar_type)>::value; + NBL_CONSTEXPR_STATIC_INLINE scalar_type UNormConstant = unorm_constant<21>::value; static return_type __call(NBL_CONST_REF_ARG(sequence_type) val, const uint32_t scrambleSeed) { @@ -118,7 +119,7 @@ struct QuantizedSequence; NBL_CONSTEXPR_STATIC_INLINE uint16_t BitsPerComponent = StoreBits / Dim; - NBL_CONSTEXPR_STATIC_INLINE uint16_t Mask = (uint16_t(1u) << BitsPerComponent) - uint16_t(1u); + NBL_CONSTEXPR_STATIC_INLINE store_type Mask = (uint16_t(1u) << BitsPerComponent) - uint16_t(1u); NBL_CONSTEXPR_STATIC_INLINE uint16_t DiscardBits = StoreBits - BitsPerComponent; NBL_CONSTEXPR_STATIC_INLINE uint32_t UNormConstant = impl::unorm_constant::value; @@ -161,13 +162,13 @@ struct QuantizedSequence::scalar_type; NBL_CONSTEXPR_STATIC_INLINE uint16_t StoreBits = uint16_t(8u) * size_of_v; NBL_CONSTEXPR_STATIC_INLINE uint16_t BitsPerComponent = StoreBits / Dim; - NBL_CONSTEXPR_STATIC_INLINE uint16_t Mask = (uint16_t(1u) << BitsPerComponent) - uint16_t(1u); + NBL_CONSTEXPR_STATIC_INLINE scalar_type Mask = (uint16_t(1u) << BitsPerComponent) - uint16_t(1u); NBL_CONSTEXPR_STATIC_INLINE uint16_t DiscardBits = StoreBits - BitsPerComponent; NBL_CONSTEXPR_STATIC_INLINE uint32_t UNormConstant = impl::unorm_constant::value; scalar_type get(const uint16_t idx) { - assert(idx > 0 && idx < 3); + assert(idx >= 0 && idx < 3); if (idx < 2) { return data[idx] & Mask; @@ -182,15 +183,16 @@ struct QuantizedSequence 0 && idx < 3); + assert(idx >= 0 && idx < 3); if (idx < 2) { + const scalar_type trunc_val = value >> DiscardBits; data[idx] &= ~Mask; - data[idx] |= (value >> DiscardBits) & Mask; + data[idx] |= trunc_val &Mask; } else { - const scalar_type zbits = StoreBits-BitsPerComponent; + const uint16_t zbits = StoreBits-BitsPerComponent; const scalar_type zmask = (uint16_t(1u) << zbits) - uint16_t(1u); const scalar_type trunc_val = value >> (DiscardBits-1u); data[0] &= Mask; @@ -211,20 +213,20 @@ struct QuantizedSequence::scalar_type; NBL_CONSTEXPR_STATIC_INLINE uint16_t StoreBits = uint16_t(8u) * size_of_v; NBL_CONSTEXPR_STATIC_INLINE uint16_t BitsPerComponent = StoreBits / Dim; - NBL_CONSTEXPR_STATIC_INLINE uint16_t Mask = (uint16_t(1u) << BitsPerComponent) - uint16_t(1u); + NBL_CONSTEXPR_STATIC_INLINE scalar_type Mask = (uint16_t(1u) << BitsPerComponent) - uint16_t(1u); NBL_CONSTEXPR_STATIC_INLINE uint16_t DiscardBits = StoreBits - BitsPerComponent; NBL_CONSTEXPR_STATIC_INLINE uint32_t UNormConstant = impl::unorm_constant::value; scalar_type get(const uint16_t idx) { - assert(idx > 0 && idx < 4); + assert(idx >= 0 && idx < 4); const uint16_t i = (idx & uint16_t(2u)) >> uint16_t(1u); return (data[i] >> (BitsPerComponent * (idx & uint16_t(1u)))) & Mask; } void set(const uint16_t idx, const scalar_type value) { - assert(idx > 0 && idx < 4); + assert(idx >= 0 && idx < 4); const uint16_t i = (idx & uint16_t(2u)) >> uint16_t(1u); const uint16_t odd = idx & uint16_t(1u); data[i] &= hlsl::mix(~Mask, Mask, bool(odd)); @@ -245,7 +247,7 @@ struct QuantizedSequence 0 && idx < 2); + assert(idx >= 0 && idx < 2); base_type a; a[0] = data[uint16_t(2u) * idx]; a[1] = data[uint16_t(2u) * idx + 1]; @@ -254,7 +256,7 @@ struct QuantizedSequence 0 && idx < 2); + assert(idx >= 0 && idx < 2); base_type a; data[uint16_t(2u) * idx] = value[0]; data[uint16_t(2u) * idx + 1] = value[1]; @@ -275,13 +277,13 @@ struct QuantizedSequence; NBL_CONSTEXPR_STATIC_INLINE uint16_t BitsPerComponent = StoreBits / Dim; NBL_CONSTEXPR_STATIC_INLINE uint16_t LeftoverBitsPerComponent = BitsPerComponent - uint16_t(8u) * size_of_v; - NBL_CONSTEXPR_STATIC_INLINE uint16_t Mask = (uint16_t(1u) << LeftoverBitsPerComponent) - uint16_t(1u); + NBL_CONSTEXPR_STATIC_INLINE scalar_type Mask = (uint16_t(1u) << LeftoverBitsPerComponent) - uint16_t(1u); NBL_CONSTEXPR_STATIC_INLINE uint16_t DiscardBits = StoreBits - BitsPerComponent; NBL_CONSTEXPR_STATIC_INLINE uint32_t UNormConstant = impl::unorm_constant<8u*sizeof(scalar_type)>::value; base_type get(const uint16_t idx) { - assert(idx > 0 && idx < 3); + assert(idx >= 0 && idx < 3); base_type a; a[0] = data[idx]; a[1] = (data[3] >> (LeftoverBitsPerComponent * idx)) & Mask; @@ -290,7 +292,7 @@ struct QuantizedSequence 0 && idx < 3); + assert(idx >= 0 && idx < 3); data[idx] = value[0]; data[3] &= ~Mask; data[3] |= ((value[1] >> DiscardBits) & Mask) << (LeftoverBitsPerComponent * idx); From 7d16cb26736dc326c03a37d7d18c8a696e41f6d0 Mon Sep 17 00:00:00 2001 From: keptsecret Date: Mon, 15 Dec 2025 14:02:09 +0700 Subject: [PATCH 130/157] fixed problems from merging master --- include/nbl/builtin/hlsl/bxdf/fresnel.hlsl | 4 ++-- include/nbl/builtin/hlsl/cpp_compat/promote.hlsl | 2 +- include/nbl/builtin/hlsl/spirv_intrinsics/core.hlsl | 5 ----- 3 files changed, 3 insertions(+), 8 deletions(-) diff --git a/include/nbl/builtin/hlsl/bxdf/fresnel.hlsl b/include/nbl/builtin/hlsl/bxdf/fresnel.hlsl index 0c498efb79..b13abc6632 100644 --- a/include/nbl/builtin/hlsl/bxdf/fresnel.hlsl +++ b/include/nbl/builtin/hlsl/bxdf/fresnel.hlsl @@ -617,7 +617,7 @@ struct iridescent_helper NBL_UNROLL for (int m=1; m<=2; ++m) { Cm *= r123p; - Sm = hlsl::promote(2.0) * evalSensitivity(hlsl::promote(m)*D, hlsl::promote(m)*(phi23p+phi21p)); + Sm = hlsl::promote(2.0) * evalSensitivity(hlsl::promote(scalar_type(m))*D, hlsl::promote(scalar_type(m))*(phi23p+phi21p)); I += Cm*Sm; } @@ -631,7 +631,7 @@ struct iridescent_helper NBL_UNROLL for (int m=1; m<=2; ++m) { Cm *= r123s; - Sm = hlsl::promote(2.0) * evalSensitivity(hlsl::promote(m)*D, hlsl::promote(m) *(phi23s+phi21s)); + Sm = hlsl::promote(2.0) * evalSensitivity(hlsl::promote(scalar_type(m))*D, hlsl::promote(scalar_type(m)) *(phi23s+phi21s)); I += Cm*Sm; } diff --git a/include/nbl/builtin/hlsl/cpp_compat/promote.hlsl b/include/nbl/builtin/hlsl/cpp_compat/promote.hlsl index 6a8476e644..1887f4b51f 100644 --- a/include/nbl/builtin/hlsl/cpp_compat/promote.hlsl +++ b/include/nbl/builtin/hlsl/cpp_compat/promote.hlsl @@ -22,7 +22,7 @@ struct Promote }; template NBL_PARTIAL_REQ_TOP(concepts::Vectorial && (concepts::IntegralLikeScalar || concepts::FloatingPointLikeScalar) && is_same_v::scalar_type, From>) -struct Promote && is_scalar_v && is_same_v::scalar_type, From>) > +struct Promote && (concepts::IntegralLikeScalar || concepts::FloatingPointLikeScalar) && is_same_v::scalar_type, From>) > { NBL_CONSTEXPR_FUNC To operator()(const From v) { diff --git a/include/nbl/builtin/hlsl/spirv_intrinsics/core.hlsl b/include/nbl/builtin/hlsl/spirv_intrinsics/core.hlsl index 02495e2f2e..9190a4ec73 100644 --- a/include/nbl/builtin/hlsl/spirv_intrinsics/core.hlsl +++ b/include/nbl/builtin/hlsl/spirv_intrinsics/core.hlsl @@ -347,11 +347,6 @@ template [[vk::ext_instruction(spv::OpAny)]] enable_if_t&& is_same_v::scalar_type, bool>, bool> any(BooleanVector vec); -// If Condition is a vector, ResultType must be a vector with the same number of components. Using (p -> q) = (~p v q) -template && (! concepts::Vector || (concepts::Vector && (extent_v == extent_v)))) -[[vk::ext_instruction(spv::OpSelect)]] -ResultType select(Condition condition, ResultType object1, ResultType object2); - template) [[vk::ext_instruction(spv::OpIAddCarry)]] AddCarryOutput addCarry(T operand1, T operand2); From 965e028a96a18e4b89fc4597504281b49093bf42 Mon Sep 17 00:00:00 2001 From: keptsecret Date: Mon, 15 Dec 2025 15:54:29 +0700 Subject: [PATCH 131/157] fix decode scramble key, shifting discard bits in quantization --- .../hlsl/sampling/quantized_sequence.hlsl | 38 +++++++++---------- 1 file changed, 17 insertions(+), 21 deletions(-) diff --git a/include/nbl/builtin/hlsl/sampling/quantized_sequence.hlsl b/include/nbl/builtin/hlsl/sampling/quantized_sequence.hlsl index 8ea31cbe71..24ca8eb66d 100644 --- a/include/nbl/builtin/hlsl/sampling/quantized_sequence.hlsl +++ b/include/nbl/builtin/hlsl/sampling/quantized_sequence.hlsl @@ -52,12 +52,11 @@ struct decode_helper using return_type = vector; NBL_CONSTEXPR_STATIC_INLINE scalar_type UNormConstant = unorm_constant<8u*sizeof(scalar_type)>::value; - static return_type __call(NBL_CONST_REF_ARG(sequence_type) val, const uint32_t scrambleSeed) + static return_type __call(NBL_CONST_REF_ARG(sequence_type) val, const uvec_type scrambleKey) { - random::PCG32 pcg = random::PCG32::construct(scrambleSeed); uvec_type seqVal; NBL_UNROLL for(uint16_t i = 0; i < D; i++) - seqVal[i] = val.get(i) ^ pcg(); + seqVal[i] = val.get(i) ^ scrambleKey[i]; return return_type(seqVal) * bit_cast(UNormConstant); } }; @@ -71,16 +70,13 @@ struct decode_helper using sequence_store_type = typename sequence_type::store_type; using sequence_scalar_type = typename vector_traits::scalar_type; using return_type = vector; - // NBL_CONSTEXPR_STATIC_INLINE scalar_type UNormConstant = unorm_constant<8u*sizeof(scalar_type)>::value; - NBL_CONSTEXPR_STATIC_INLINE scalar_type UNormConstant = unorm_constant<21>::value; + NBL_CONSTEXPR_STATIC_INLINE scalar_type UNormConstant = sequence_type::UNormConstant; - static return_type __call(NBL_CONST_REF_ARG(sequence_type) val, const uint32_t scrambleSeed) + static return_type __call(NBL_CONST_REF_ARG(sequence_type) val, const uvec_type scrambleKey) { - random::PCG32 pcg = random::PCG32::construct(scrambleSeed); - sequence_type scramble; NBL_UNROLL for(uint16_t i = 0; i < D; i++) - scramble.set(i, pcg()); + scramble.set(i, scrambleKey[i]); scramble.data ^= val.data; uvec_type seqVal; @@ -92,9 +88,9 @@ struct decode_helper } template -vector::scalar_type)>::type, D> decode(NBL_CONST_REF_ARG(QuantizedSequence) val, const uint32_t scrambleSeed) +vector::scalar_type)>::type, D> decode(NBL_CONST_REF_ARG(QuantizedSequence) val, const vector::scalar_type, D> scrambleKey) { - return impl::decode_helper::__call(val, scrambleSeed); + return impl::decode_helper::__call(val, scrambleKey); } #define SEQUENCE_SPECIALIZATION_CONCEPT concepts::UnsignedIntegral::scalar_type> && size_of_v::scalar_type> <= 4 @@ -162,13 +158,13 @@ struct QuantizedSequence::scalar_type; NBL_CONSTEXPR_STATIC_INLINE uint16_t StoreBits = uint16_t(8u) * size_of_v; NBL_CONSTEXPR_STATIC_INLINE uint16_t BitsPerComponent = StoreBits / Dim; - NBL_CONSTEXPR_STATIC_INLINE scalar_type Mask = (uint16_t(1u) << BitsPerComponent) - uint16_t(1u); - NBL_CONSTEXPR_STATIC_INLINE uint16_t DiscardBits = StoreBits - BitsPerComponent; + NBL_CONSTEXPR_STATIC_INLINE scalar_type Mask = (scalar_type(1u) << BitsPerComponent) - scalar_type(1u); + NBL_CONSTEXPR_STATIC_INLINE uint16_t DiscardBits = (uint16_t(8u) * size_of_v) - BitsPerComponent; NBL_CONSTEXPR_STATIC_INLINE uint32_t UNormConstant = impl::unorm_constant::value; scalar_type get(const uint16_t idx) { - assert(idx >= 0 && idx < 3); + // assert(idx >= 0 && idx < 3); if (idx < 2) { return data[idx] & Mask; @@ -176,24 +172,24 @@ struct QuantizedSequence> BitsPerComponent; - z |= (data[1] >> BitsPerComponent) << (StoreBits-BitsPerComponent); + z |= (data[1] >> BitsPerComponent) << DiscardBits; return z; } } void set(const uint16_t idx, const scalar_type value) { - assert(idx >= 0 && idx < 3); + // assert(idx >= 0 && idx < 3); if (idx < 2) { const scalar_type trunc_val = value >> DiscardBits; data[idx] &= ~Mask; - data[idx] |= trunc_val &Mask; + data[idx] |= trunc_val & Mask; } else { - const uint16_t zbits = StoreBits-BitsPerComponent; - const scalar_type zmask = (uint16_t(1u) << zbits) - uint16_t(1u); + const scalar_type zbits = scalar_type(DiscardBits); + const scalar_type zmask = (scalar_type(1u) << zbits) - scalar_type(1u); const scalar_type trunc_val = value >> (DiscardBits-1u); data[0] &= Mask; data[1] &= Mask; @@ -214,7 +210,7 @@ struct QuantizedSequence; NBL_CONSTEXPR_STATIC_INLINE uint16_t BitsPerComponent = StoreBits / Dim; NBL_CONSTEXPR_STATIC_INLINE scalar_type Mask = (uint16_t(1u) << BitsPerComponent) - uint16_t(1u); - NBL_CONSTEXPR_STATIC_INLINE uint16_t DiscardBits = StoreBits - BitsPerComponent; + NBL_CONSTEXPR_STATIC_INLINE uint16_t DiscardBits = (uint16_t(8u) * size_of_v) - BitsPerComponent; NBL_CONSTEXPR_STATIC_INLINE uint32_t UNormConstant = impl::unorm_constant::value; scalar_type get(const uint16_t idx) @@ -278,7 +274,7 @@ struct QuantizedSequence; NBL_CONSTEXPR_STATIC_INLINE scalar_type Mask = (uint16_t(1u) << LeftoverBitsPerComponent) - uint16_t(1u); - NBL_CONSTEXPR_STATIC_INLINE uint16_t DiscardBits = StoreBits - BitsPerComponent; + NBL_CONSTEXPR_STATIC_INLINE uint16_t DiscardBits = (uint16_t(8u) * size_of_v) - BitsPerComponent; NBL_CONSTEXPR_STATIC_INLINE uint32_t UNormConstant = impl::unorm_constant<8u*sizeof(scalar_type)>::value; base_type get(const uint16_t idx) From cfd55006be6e34deb69ce891f8145c78230c7e68 Mon Sep 17 00:00:00 2001 From: keptsecret Date: Mon, 15 Dec 2025 17:03:08 +0700 Subject: [PATCH 132/157] fix z component storing too many bits in quantized sequence in vec2 data type for dim 3 --- examples_tests | 2 +- .../builtin/hlsl/sampling/quantized_sequence.hlsl | 12 +++++++----- 2 files changed, 8 insertions(+), 6 deletions(-) diff --git a/examples_tests b/examples_tests index 6f9bdc3b18..ea3ec9e728 160000 --- a/examples_tests +++ b/examples_tests @@ -1 +1 @@ -Subproject commit 6f9bdc3b182f8bcd2cd699a4c6d092240e5c7f9f +Subproject commit ea3ec9e7282d2911c12f261bcc404255570eb870 diff --git a/include/nbl/builtin/hlsl/sampling/quantized_sequence.hlsl b/include/nbl/builtin/hlsl/sampling/quantized_sequence.hlsl index 24ca8eb66d..8929609c34 100644 --- a/include/nbl/builtin/hlsl/sampling/quantized_sequence.hlsl +++ b/include/nbl/builtin/hlsl/sampling/quantized_sequence.hlsl @@ -164,22 +164,24 @@ struct QuantizedSequence= 0 && idx < 3); + assert(idx >= 0 && idx < 3); if (idx < 2) { return data[idx] & Mask; } else { - scalar_type z = data[0] >> BitsPerComponent; - z |= (data[1] >> BitsPerComponent) << DiscardBits; + const scalar_type zbits = scalar_type(DiscardBits); + const scalar_type zmask = (scalar_type(1u) << zbits) - scalar_type(1u); + scalar_type z = (data[0] >> BitsPerComponent) & zmask; + z |= ((data[1] >> BitsPerComponent) & zmask) << DiscardBits; return z; } } void set(const uint16_t idx, const scalar_type value) { - // assert(idx >= 0 && idx < 3); + assert(idx >= 0 && idx < 3); if (idx < 2) { const scalar_type trunc_val = value >> DiscardBits; @@ -190,7 +192,7 @@ struct QuantizedSequence> (DiscardBits-1u); + const scalar_type trunc_val = value >> DiscardBits; data[0] &= Mask; data[1] &= Mask; data[0] |= (trunc_val & zmask) << BitsPerComponent; From 7da0bdab2f7f9c78dcef7b398f10f255b1eefb27 Mon Sep 17 00:00:00 2001 From: devsh Date: Mon, 15 Dec 2025 20:15:57 +0100 Subject: [PATCH 133/157] get latest glm --- 3rdparty/glm | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/3rdparty/glm b/3rdparty/glm index 2d4c4b4dd3..8f6213d379 160000 --- a/3rdparty/glm +++ b/3rdparty/glm @@ -1 +1 @@ -Subproject commit 2d4c4b4dd31fde06cfffad7915c2b3006402322f +Subproject commit 8f6213d379a904f5ae910e09a114e066e25faf57 From 290478d0d6dcee1ef64804954c58053f94f303e6 Mon Sep 17 00:00:00 2001 From: devsh Date: Mon, 15 Dec 2025 20:28:22 +0100 Subject: [PATCH 134/157] update gli now --- 3rdparty/gli | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/3rdparty/gli b/3rdparty/gli index c4e6446d3b..2749a197e8 160000 --- a/3rdparty/gli +++ b/3rdparty/gli @@ -1 +1 @@ -Subproject commit c4e6446d3b646538026fd5a95533daed952878d4 +Subproject commit 2749a197e88f94858f4108732824b3790064f6ec From 5b634dd927a0f1606dbfb4218202f4672dc60eeb Mon Sep 17 00:00:00 2001 From: devsh Date: Mon, 15 Dec 2025 20:32:32 +0100 Subject: [PATCH 135/157] fixed example 22 --- examples_tests | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples_tests b/examples_tests index 4ab1de2235..4425ec1454 160000 --- a/examples_tests +++ b/examples_tests @@ -1 +1 @@ -Subproject commit 4ab1de2235365833db2d089259000bec2bcce3e3 +Subproject commit 4425ec1454acd2e7771f290d7b5f08fd9dbcb07b From 53ff444984a4b55da8cbcaa2070e8ddb4c9dc079 Mon Sep 17 00:00:00 2001 From: keptsecret Date: Tue, 16 Dec 2025 10:54:43 +0700 Subject: [PATCH 136/157] mix_helper requirements include bool vectors --- include/nbl/builtin/hlsl/cpp_compat/impl/intrinsics_impl.hlsl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/nbl/builtin/hlsl/cpp_compat/impl/intrinsics_impl.hlsl b/include/nbl/builtin/hlsl/cpp_compat/impl/intrinsics_impl.hlsl index a5e48debbf..ad53bad2e8 100644 --- a/include/nbl/builtin/hlsl/cpp_compat/impl/intrinsics_impl.hlsl +++ b/include/nbl/builtin/hlsl/cpp_compat/impl/intrinsics_impl.hlsl @@ -968,8 +968,8 @@ struct mix_helper NBL_PARTIAL_REQ_TOP((concepts::Vectorial || concepts::Scalar) && concepts::BooleanScalar && !impl::MixCallingBuiltins) -struct mix_helper || concepts::Scalar) && concepts::BooleanScalar && !impl::MixCallingBuiltins) > +template NBL_PARTIAL_REQ_TOP((concepts::Vectorial || concepts::Scalar) && concepts::Boolean && !impl::MixCallingBuiltins) +struct mix_helper || concepts::Scalar) && concepts::Boolean && !impl::MixCallingBuiltins) > { using return_t = T; static return_t __call(NBL_CONST_REF_ARG(T) x, NBL_CONST_REF_ARG(T) y, NBL_CONST_REF_ARG(U) a) From 5055713ae307adcb844d24dbc348448776a7d977 Mon Sep 17 00:00:00 2001 From: keptsecret Date: Tue, 16 Dec 2025 10:55:28 +0700 Subject: [PATCH 137/157] fix iridescent oriented eta bug --- include/nbl/builtin/hlsl/bxdf/fresnel.hlsl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/nbl/builtin/hlsl/bxdf/fresnel.hlsl b/include/nbl/builtin/hlsl/bxdf/fresnel.hlsl index b13abc6632..954022e216 100644 --- a/include/nbl/builtin/hlsl/bxdf/fresnel.hlsl +++ b/include/nbl/builtin/hlsl/bxdf/fresnel.hlsl @@ -756,7 +756,7 @@ struct Iridescent getOrientedEtaRcps() NBL_CONST_MEMBER_FUNC { OrientedEtaRcps rcpEta; - rcpEta.value = base_type::ior1[0] / base_type::ior3[0]; + rcpEta.value = hlsl::promote(base_type::ior1[0] / base_type::ior3[0]); rcpEta.value2 = rcpEta.value * rcpEta.value; return rcpEta; } From 3f3b5c991675f89d3d22f76ad99c2ade3d0b6c12 Mon Sep 17 00:00:00 2001 From: keptsecret Date: Tue, 16 Dec 2025 11:27:07 +0700 Subject: [PATCH 138/157] partitionRandVar stores leftProb, fix minor bugs --- .../builtin/hlsl/bxdf/base/cook_torrance_base.hlsl | 3 ++- .../hlsl/bxdf/transmission/smooth_dielectric.hlsl | 6 ++++-- include/nbl/builtin/hlsl/sampling/basic.hlsl | 12 +++++++----- .../hlsl/sampling/projected_spherical_triangle.hlsl | 2 +- 4 files changed, 14 insertions(+), 9 deletions(-) diff --git a/include/nbl/builtin/hlsl/bxdf/base/cook_torrance_base.hlsl b/include/nbl/builtin/hlsl/bxdf/base/cook_torrance_base.hlsl index 5e5e543791..c3de375678 100644 --- a/include/nbl/builtin/hlsl/bxdf/base/cook_torrance_base.hlsl +++ b/include/nbl/builtin/hlsl/bxdf/base/cook_torrance_base.hlsl @@ -304,7 +304,8 @@ struct SCookTorrance scalar_type rcpChoiceProb; scalar_type z = u.z; sampling::PartitionRandVariable partitionRandVariable; - bool transmitted = partitionRandVariable(reflectance, z, rcpChoiceProb); + partitionRandVariable.leftProb = reflectance; + bool transmitted = partitionRandVariable(z, rcpChoiceProb); const scalar_type LdotH = hlsl::mix(VdotH, ieee754::copySign(hlsl::sqrt(rcpEta.value2[0]*VdotH*VdotH + scalar_type(1.0) - rcpEta.value2[0]), -VdotH), transmitted); bool valid; diff --git a/include/nbl/builtin/hlsl/bxdf/transmission/smooth_dielectric.hlsl b/include/nbl/builtin/hlsl/bxdf/transmission/smooth_dielectric.hlsl index 712b614755..6d5744fb49 100644 --- a/include/nbl/builtin/hlsl/bxdf/transmission/smooth_dielectric.hlsl +++ b/include/nbl/builtin/hlsl/bxdf/transmission/smooth_dielectric.hlsl @@ -41,7 +41,8 @@ struct SSmoothDielectric scalar_type rcpChoiceProb; sampling::PartitionRandVariable partitionRandVariable; - bool transmitted = partitionRandVariable(reflectance, u.z, rcpChoiceProb); + partitionRandVariable.leftProb = reflectance; + bool transmitted = partitionRandVariable(u.z, rcpChoiceProb); ray_dir_info_type V = interaction.getV(); Refract r = Refract::create(V.getDirection(), interaction.getN()); @@ -128,7 +129,8 @@ struct SThinSmoothDielectric scalar_type rcpChoiceProb; scalar_type z = u.z; sampling::PartitionRandVariable partitionRandVariable; - const bool transmitted = partitionRandVariable(reflectionProb, z, rcpChoiceProb); + partitionRandVariable.leftProb = reflectionProb; + const bool transmitted = partitionRandVariable(z, rcpChoiceProb); remainderMetadata = hlsl::mix(reflectance, hlsl::promote(1.0) - reflectance, transmitted) * rcpChoiceProb; ray_dir_info_type V = interaction.getV(); diff --git a/include/nbl/builtin/hlsl/sampling/basic.hlsl b/include/nbl/builtin/hlsl/sampling/basic.hlsl index d0738dd930..9c575a22ce 100644 --- a/include/nbl/builtin/hlsl/sampling/basic.hlsl +++ b/include/nbl/builtin/hlsl/sampling/basic.hlsl @@ -19,14 +19,14 @@ template) struct PartitionRandVariable { using floating_point_type = T; - using uint_type = typename unsigned_integer_of_size::type; + using uint_type = unsigned_integer_of_size_t; - bool operator()(floating_point_type leftProb, NBL_REF_ARG(floating_point_type) xi, NBL_REF_ARG(floating_point_type) rcpChoiceProb) + bool operator()(NBL_REF_ARG(floating_point_type) xi, NBL_REF_ARG(floating_point_type) rcpChoiceProb) { - const floating_point_type NEXT_ULP_AFTER_UNITY = bit_cast(bit_cast(floating_point_type(1.0)) + uint_type(1u)); - const bool pickRight = xi >= leftProb * NEXT_ULP_AFTER_UNITY; + const floating_point_type NextULPAfterUnity = bit_cast(bit_cast(floating_point_type(1.0)) + uint_type(1u)); + const bool pickRight = xi >= leftProb * NextULPAfterUnity; - // This is all 100% correct taking into account the above NEXT_ULP_AFTER_UNITY + // This is all 100% correct taking into account the above NextULPAfterUnity xi -= pickRight ? leftProb : floating_point_type(0.0); rcpChoiceProb = floating_point_type(1.0) / (pickRight ? (floating_point_type(1.0) - leftProb) : leftProb); @@ -34,6 +34,8 @@ struct PartitionRandVariable return pickRight; } + + floating_point_type leftProb; }; diff --git a/include/nbl/builtin/hlsl/sampling/projected_spherical_triangle.hlsl b/include/nbl/builtin/hlsl/sampling/projected_spherical_triangle.hlsl index f2f29ed12b..0578af5b19 100644 --- a/include/nbl/builtin/hlsl/sampling/projected_spherical_triangle.hlsl +++ b/include/nbl/builtin/hlsl/sampling/projected_spherical_triangle.hlsl @@ -49,7 +49,7 @@ struct ProjectedSphericalTriangle // pre-warp according to proj solid angle approximation vector4_type patch = computeBilinearPatch(receiverNormal, isBSDF); Bilinear bilinear = Bilinear::create(patch); - u = bilinear.generate(rcpPdf, u); + u = bilinear.generate(rcpPdf, _u); // now warp the points onto a spherical triangle const vector3_type L = sphtri.generate(solidAngle, cos_vertices, sin_vertices, cos_a, cos_c, csc_b, csc_c, u); From db454c0a3aef25044294e926504ab057f443a992 Mon Sep 17 00:00:00 2001 From: keptsecret Date: Tue, 16 Dec 2025 11:46:51 +0700 Subject: [PATCH 139/157] plain const for vector types --- include/nbl/builtin/hlsl/sampling/bilinear.hlsl | 12 ++++++------ .../builtin/hlsl/sampling/box_muller_transform.hlsl | 2 +- .../builtin/hlsl/sampling/concentric_mapping.hlsl | 2 +- .../builtin/hlsl/sampling/cos_weighted_spheres.hlsl | 10 +++++----- include/nbl/builtin/hlsl/sampling/linear.hlsl | 4 ++-- .../hlsl/sampling/projected_spherical_triangle.hlsl | 12 ++++++------ .../builtin/hlsl/sampling/spherical_rectangle.hlsl | 2 +- .../builtin/hlsl/sampling/spherical_triangle.hlsl | 8 ++++---- .../nbl/builtin/hlsl/sampling/uniform_spheres.hlsl | 4 ++-- .../nbl/builtin/hlsl/shapes/spherical_rectangle.hlsl | 6 +++--- .../nbl/builtin/hlsl/shapes/spherical_triangle.hlsl | 6 +++--- 11 files changed, 34 insertions(+), 34 deletions(-) diff --git a/include/nbl/builtin/hlsl/sampling/bilinear.hlsl b/include/nbl/builtin/hlsl/sampling/bilinear.hlsl index 746713e4c4..a74869990f 100644 --- a/include/nbl/builtin/hlsl/sampling/bilinear.hlsl +++ b/include/nbl/builtin/hlsl/sampling/bilinear.hlsl @@ -24,7 +24,7 @@ struct Bilinear using vector3_type = vector; using vector4_type = vector; - static Bilinear create(NBL_CONST_REF_ARG(vector4_type) bilinearCoeffs) + static Bilinear create(const vector4_type bilinearCoeffs) { Bilinear retval; retval.bilinearCoeffs = bilinearCoeffs; @@ -32,22 +32,22 @@ struct Bilinear return retval; } - vector2_type generate(NBL_REF_ARG(scalar_type) rcpPdf, NBL_CONST_REF_ARG(vector2_type) _u) + vector2_type generate(NBL_REF_ARG(scalar_type) rcpPdf, const vector2_type _u) { - vector2_type u = _u; + vector2_type u; Linear lineary = Linear::create(twiceAreasUnderXCurve); - u.y = lineary.generate(u.y); + u.y = lineary.generate(_u.y); const vector2_type ySliceEndPoints = vector2_type(nbl::hlsl::mix(bilinearCoeffs[0], bilinearCoeffs[2], u.y), nbl::hlsl::mix(bilinearCoeffs[1], bilinearCoeffs[3], u.y)); Linear linearx = Linear::create(ySliceEndPoints); - u.x = linearx.generate(u.x); + u.x = linearx.generate(_u.x); rcpPdf = (twiceAreasUnderXCurve[0] + twiceAreasUnderXCurve[1]) / (4.0 * nbl::hlsl::mix(ySliceEndPoints[0], ySliceEndPoints[1], u.x)); return u; } - scalar_type pdf(NBL_CONST_REF_ARG(vector2_type) u) + scalar_type pdf(const vector2_type u) { return 4.0 * nbl::hlsl::mix(nbl::hlsl::mix(bilinearCoeffs[0], bilinearCoeffs[1], u.x), nbl::hlsl::mix(bilinearCoeffs[2], bilinearCoeffs[3], u.x), u.y) / (bilinearCoeffs[0] + bilinearCoeffs[1] + bilinearCoeffs[2] + bilinearCoeffs[3]); } diff --git a/include/nbl/builtin/hlsl/sampling/box_muller_transform.hlsl b/include/nbl/builtin/hlsl/sampling/box_muller_transform.hlsl index 93cea06ee0..9474642f4c 100644 --- a/include/nbl/builtin/hlsl/sampling/box_muller_transform.hlsl +++ b/include/nbl/builtin/hlsl/sampling/box_muller_transform.hlsl @@ -21,7 +21,7 @@ struct BoxMullerTransform using scalar_type = T; using vector2_type = vector; - vector2_type operator()(vector2_type xi) + vector2_type operator()(const vector2_type xi) { scalar_type sinPhi, cosPhi; math::sincos(2.0 * numbers::pi * xi.y - numbers::pi, sinPhi, cosPhi); diff --git a/include/nbl/builtin/hlsl/sampling/concentric_mapping.hlsl b/include/nbl/builtin/hlsl/sampling/concentric_mapping.hlsl index 1a5c96b6df..841fc9ff2d 100644 --- a/include/nbl/builtin/hlsl/sampling/concentric_mapping.hlsl +++ b/include/nbl/builtin/hlsl/sampling/concentric_mapping.hlsl @@ -17,7 +17,7 @@ namespace sampling { template -vector concentricMapping(vector _u) +vector concentricMapping(const vector _u) { //map [0;1]^2 to [-1;1]^2 vector u = 2.0f * _u - hlsl::promote >(1.0); diff --git a/include/nbl/builtin/hlsl/sampling/cos_weighted_spheres.hlsl b/include/nbl/builtin/hlsl/sampling/cos_weighted_spheres.hlsl index 9f95bf2ee5..ddbb961300 100644 --- a/include/nbl/builtin/hlsl/sampling/cos_weighted_spheres.hlsl +++ b/include/nbl/builtin/hlsl/sampling/cos_weighted_spheres.hlsl @@ -22,26 +22,26 @@ struct ProjectedHemisphere using vector_t2 = vector; using vector_t3 = vector; - static vector_t3 generate(vector_t2 _sample) + static vector_t3 generate(const vector_t2 _sample) { vector_t2 p = concentricMapping(_sample * T(0.99999) + T(0.000005)); T z = hlsl::sqrt(hlsl::max(T(0.0), T(1.0) - p.x * p.x - p.y * p.y)); return vector_t3(p.x, p.y, z); } - static T pdf(T L_z) + static T pdf(const T L_z) { return L_z * numbers::inv_pi; } template > - static sampling::quotient_and_pdf quotient_and_pdf(T L) + static sampling::quotient_and_pdf quotient_and_pdf(const T L) { return sampling::quotient_and_pdf::create(hlsl::promote(1.0), pdf(L)); } template > - static sampling::quotient_and_pdf quotient_and_pdf(vector_t3 L) + static sampling::quotient_and_pdf quotient_and_pdf(const vector_t3 L) { return sampling::quotient_and_pdf::create(hlsl::promote(1.0), pdf(L.z)); } @@ -77,7 +77,7 @@ struct ProjectedSphere } template > - static sampling::quotient_and_pdf quotient_and_pdf(vector_t3 L) + static sampling::quotient_and_pdf quotient_and_pdf(const vector_t3 L) { return sampling::quotient_and_pdf::create(hlsl::promote(1.0), pdf(L.z)); } diff --git a/include/nbl/builtin/hlsl/sampling/linear.hlsl b/include/nbl/builtin/hlsl/sampling/linear.hlsl index ddd7bcf8df..6c3cf1fad9 100644 --- a/include/nbl/builtin/hlsl/sampling/linear.hlsl +++ b/include/nbl/builtin/hlsl/sampling/linear.hlsl @@ -21,7 +21,7 @@ struct Linear using scalar_type = T; using vector2_type = vector; - static Linear create(NBL_CONST_REF_ARG(vector2_type) linearCoeffs) // start and end importance values (start, end) + static Linear create(const vector2_type linearCoeffs) // start and end importance values (start, end) { Linear retval; retval.linearCoeffStart = linearCoeffs[0]; @@ -32,7 +32,7 @@ struct Linear return retval; } - scalar_type generate(scalar_type u) + scalar_type generate(const scalar_type u) { return hlsl::mix(u, (linearCoeffStart - hlsl::sqrt(squaredCoeffStart + u * squaredCoeffDiff)) * rcpDiff, hlsl::abs(rcpDiff) < numeric_limits::max); } diff --git a/include/nbl/builtin/hlsl/sampling/projected_spherical_triangle.hlsl b/include/nbl/builtin/hlsl/sampling/projected_spherical_triangle.hlsl index 0578af5b19..e60fe28423 100644 --- a/include/nbl/builtin/hlsl/sampling/projected_spherical_triangle.hlsl +++ b/include/nbl/builtin/hlsl/sampling/projected_spherical_triangle.hlsl @@ -33,17 +33,17 @@ struct ProjectedSphericalTriangle return retval; } - vector4_type computeBilinearPatch(NBL_CONST_REF_ARG(vector3_type) receiverNormal, bool isBSDF) + vector4_type computeBilinearPatch(const vector3_type receiverNormal, bool isBSDF) { const scalar_type minimumProjSolidAngle = 0.0; matrix m = matrix(tri.vertex0, tri.vertex1, tri.vertex2); - const vector3_type bxdfPdfAtVertex = math::conditionalAbsOrMax(isBSDF, nbl::hlsl::mul(m, receiverNormal), (vector3_type)minimumProjSolidAngle); + const vector3_type bxdfPdfAtVertex = math::conditionalAbsOrMax(isBSDF, nbl::hlsl::mul(m, receiverNormal), hlsl::promote(minimumProjSolidAngle)); return bxdfPdfAtVertex.yyxz; } - vector3_type generate(NBL_REF_ARG(scalar_type) rcpPdf, scalar_type solidAngle, NBL_CONST_REF_ARG(vector3_type) cos_vertices, NBL_CONST_REF_ARG(vector3_type) sin_vertices, scalar_type cos_a, scalar_type cos_c, scalar_type csc_b, scalar_type csc_c, NBL_CONST_REF_ARG(vector3_type) receiverNormal, bool isBSDF, NBL_CONST_REF_ARG(vector2_type) _u) + vector3_type generate(NBL_REF_ARG(scalar_type) rcpPdf, scalar_type solidAngle, const vector3_type cos_vertices, const vector3_type sin_vertices, scalar_type cos_a, scalar_type cos_c, scalar_type csc_b, scalar_type csc_c, const vector3_type receiverNormal, bool isBSDF, const vector2_type _u) { vector2_type u; // pre-warp according to proj solid angle approximation @@ -58,7 +58,7 @@ struct ProjectedSphericalTriangle return L; } - vector3_type generate(NBL_REF_ARG(scalar_type) rcpPdf, NBL_CONST_REF_ARG(vector3_type) receiverNormal, bool isBSDF, NBL_CONST_REF_ARG(vector2_type) u) + vector3_type generate(NBL_REF_ARG(scalar_type) rcpPdf, const vector3_type receiverNormal, bool isBSDF, const vector2_type u) { scalar_type cos_a, cos_c, csc_b, csc_c; vector3_type cos_vertices, sin_vertices; @@ -66,7 +66,7 @@ struct ProjectedSphericalTriangle return generate(rcpPdf, solidAngle, cos_vertices, sin_vertices, cos_a, cos_c, csc_b, csc_c, receiverNormal, isBSDF, u); } - scalar_type pdf(scalar_type solidAngle, NBL_CONST_REF_ARG(vector3_type) cos_vertices, NBL_CONST_REF_ARG(vector3_type) sin_vertices, scalar_type cos_a, scalar_type cos_c, scalar_type csc_b, scalar_type csc_c, NBL_CONST_REF_ARG(vector3_type) receiverNormal, bool receiverWasBSDF, NBL_CONST_REF_ARG(vector3_type) L) + scalar_type pdf(scalar_type solidAngle, const vector3_type cos_vertices, const vector3_type sin_vertices, scalar_type cos_a, scalar_type cos_c, scalar_type csc_b, scalar_type csc_c, const vector3_type receiverNormal, bool receiverWasBSDF, const vector3_type L) { scalar_type pdf; const vector2_type u = sphtri.generateInverse(pdf, solidAngle, cos_vertices, sin_vertices, cos_a, cos_c, csc_b, csc_c, L); @@ -76,7 +76,7 @@ struct ProjectedSphericalTriangle return pdf * bilinear.pdf(u); } - scalar_type pdf(NBL_CONST_REF_ARG(vector3_type) receiverNormal, bool receiverWasBSDF, NBL_CONST_REF_ARG(vector3_type) L) + scalar_type pdf(const vector3_type receiverNormal, bool receiverWasBSDF, const vector3_type L) { scalar_type pdf; const vector2_type u = sphtri.generateInverse(pdf, L); diff --git a/include/nbl/builtin/hlsl/sampling/spherical_rectangle.hlsl b/include/nbl/builtin/hlsl/sampling/spherical_rectangle.hlsl index f5c19fb864..f9e3d2f7ae 100644 --- a/include/nbl/builtin/hlsl/sampling/spherical_rectangle.hlsl +++ b/include/nbl/builtin/hlsl/sampling/spherical_rectangle.hlsl @@ -32,7 +32,7 @@ struct SphericalRectangle return retval; } - vector2_type generate(NBL_CONST_REF_ARG(vector2_type) rectangleExtents, NBL_CONST_REF_ARG(vector2_type) uv, NBL_REF_ARG(scalar_type) S) + vector2_type generate(const vector2_type rectangleExtents, const vector2_type uv, NBL_REF_ARG(scalar_type) S) { const vector4_type denorm_n_z = vector4_type(-rect.r0.y, rect.r0.x + rectangleExtents.x, rect.r0.y + rectangleExtents.y, -rect.r0.x); const vector4_type n_z = denorm_n_z / hlsl::sqrt(hlsl::promote(rect.r0.z * rect.r0.z) + denorm_n_z * denorm_n_z); diff --git a/include/nbl/builtin/hlsl/sampling/spherical_triangle.hlsl b/include/nbl/builtin/hlsl/sampling/spherical_triangle.hlsl index c31e194788..5770403cd2 100644 --- a/include/nbl/builtin/hlsl/sampling/spherical_triangle.hlsl +++ b/include/nbl/builtin/hlsl/sampling/spherical_triangle.hlsl @@ -33,7 +33,7 @@ struct SphericalTriangle } // WARNING: can and will return NAN if one or three of the triangle edges are near zero length - vector3_type generate(scalar_type solidAngle, NBL_CONST_REF_ARG(vector3_type) cos_vertices, NBL_CONST_REF_ARG(vector3_type) sin_vertices, scalar_type cos_a, scalar_type cos_c, scalar_type csc_b, scalar_type csc_c, NBL_CONST_REF_ARG(vector2_type) u) + vector3_type generate(scalar_type solidAngle, const vector3_type cos_vertices, const vector3_type sin_vertices, scalar_type cos_a, scalar_type cos_c, scalar_type csc_b, scalar_type csc_c, const vector2_type u) { scalar_type negSinSubSolidAngle,negCosSubSolidAngle; math::sincos(solidAngle * u.x - numbers::pi, negSinSubSolidAngle, negCosSubSolidAngle); @@ -66,7 +66,7 @@ struct SphericalTriangle return retval; } - vector3_type generate(NBL_REF_ARG(scalar_type) rcpPdf, NBL_CONST_REF_ARG(vector2_type) u) + vector3_type generate(NBL_REF_ARG(scalar_type) rcpPdf, const vector2_type u) { scalar_type cos_a, cos_c, csc_b, csc_c; vector3_type cos_vertices, sin_vertices; @@ -76,7 +76,7 @@ struct SphericalTriangle return generate(rcpPdf, cos_vertices, sin_vertices, cos_a, cos_c, csc_b, csc_c, u); } - vector2_type generateInverse(NBL_REF_ARG(scalar_type) pdf, scalar_type solidAngle, NBL_CONST_REF_ARG(vector3_type) cos_vertices, NBL_CONST_REF_ARG(vector3_type) sin_vertices, scalar_type cos_a, scalar_type cos_c, scalar_type csc_b, scalar_type csc_c, NBL_CONST_REF_ARG(vector3_type) L) + vector2_type generateInverse(NBL_REF_ARG(scalar_type) pdf, scalar_type solidAngle, const vector3_type cos_vertices, const vector3_type sin_vertices, scalar_type cos_a, scalar_type cos_c, scalar_type csc_b, scalar_type csc_c, const vector3_type L) { pdf = 1.0 / solidAngle; @@ -102,7 +102,7 @@ struct SphericalTriangle return vector2_type(u,v); } - vector2_type generateInverse(NBL_REF_ARG(scalar_type) pdf, NBL_CONST_REF_ARG(vector3_type) L) + vector2_type generateInverse(NBL_REF_ARG(scalar_type) pdf, const vector3_type L) { scalar_type cos_a, cos_c, csc_b, csc_c; vector3_type cos_vertices, sin_vertices; diff --git a/include/nbl/builtin/hlsl/sampling/uniform_spheres.hlsl b/include/nbl/builtin/hlsl/sampling/uniform_spheres.hlsl index df4100db9b..5fc3bc7a0b 100644 --- a/include/nbl/builtin/hlsl/sampling/uniform_spheres.hlsl +++ b/include/nbl/builtin/hlsl/sampling/uniform_spheres.hlsl @@ -23,7 +23,7 @@ struct UniformHemisphere using vector_t2 = vector; using vector_t3 = vector; - static vector_t3 generate(vector_t2 _sample) + static vector_t3 generate(const vector_t2 _sample) { T z = _sample.x; T r = hlsl::sqrt(hlsl::max(T(0.0), T(1.0) - z * z)); @@ -49,7 +49,7 @@ struct UniformSphere using vector_t2 = vector; using vector_t3 = vector; - static vector_t3 generate(vector_t2 _sample) + static vector_t3 generate(const vector_t2 _sample) { T z = T(1.0) - T(2.0) * _sample.x; T r = hlsl::sqrt(hlsl::max(T(0.0), T(1.0) - z * z)); diff --git a/include/nbl/builtin/hlsl/shapes/spherical_rectangle.hlsl b/include/nbl/builtin/hlsl/shapes/spherical_rectangle.hlsl index daeb3175c3..11442bef7c 100644 --- a/include/nbl/builtin/hlsl/shapes/spherical_rectangle.hlsl +++ b/include/nbl/builtin/hlsl/shapes/spherical_rectangle.hlsl @@ -25,14 +25,14 @@ struct SphericalRectangle using vector4_type = vector; using matrix3x3_type = matrix; - static SphericalRectangle create(NBL_CONST_REF_ARG(vector3_type) observer, NBL_CONST_REF_ARG(vector3_type) rectangleOrigin, NBL_CONST_REF_ARG(matrix3x3_type) basis) + static SphericalRectangle create(const vector3_type observer, const vector3_type rectangleOrigin, const matrix3x3_type basis) { SphericalRectangle retval; retval.r0 = nbl::hlsl::mul(basis, rectangleOrigin - observer); return retval; } - static SphericalRectangle create(NBL_CONST_REF_ARG(vector3_type) observer, NBL_CONST_REF_ARG(vector3_type) rectangleOrigin, NBL_CONST_REF_ARG(vector3_type) T, NBL_CONST_REF_ARG(vector3_type) B, NBL_CONST_REF_ARG(vector3_type) N) + static SphericalRectangle create(const vector3_type observer, const vector3_type rectangleOrigin, const vector3_type T, vector3_type B, const vector3_type N) { SphericalRectangle retval; matrix3x3_type TBN = nbl::hlsl::transpose(matrix3x3_type(T, B, N)); @@ -40,7 +40,7 @@ struct SphericalRectangle return retval; } - scalar_type solidAngleOfRectangle(NBL_CONST_REF_ARG(vector) rectangleExtents) + scalar_type solidAngleOfRectangle(const vector rectangleExtents) { const vector4_type denorm_n_z = vector4_type(-r0.y, r0.x + rectangleExtents.x, r0.y + rectangleExtents.y, -r0.x); const vector4_type n_z = denorm_n_z / nbl::hlsl::sqrt((vector4_type)(r0.z * r0.z) + denorm_n_z * denorm_n_z); diff --git a/include/nbl/builtin/hlsl/shapes/spherical_triangle.hlsl b/include/nbl/builtin/hlsl/shapes/spherical_triangle.hlsl index f0b184d057..7304fa72e9 100644 --- a/include/nbl/builtin/hlsl/shapes/spherical_triangle.hlsl +++ b/include/nbl/builtin/hlsl/shapes/spherical_triangle.hlsl @@ -25,7 +25,7 @@ struct SphericalTriangle using scalar_type = T; using vector3_type = vector; - static SphericalTriangle create(NBL_CONST_REF_ARG(vector3_type) vertex0, NBL_CONST_REF_ARG(vector3_type) vertex1, NBL_CONST_REF_ARG(vector3_type) vertex2, NBL_CONST_REF_ARG(vector3_type) origin) + static SphericalTriangle create(const vector3_type vertex0, const vector3_type vertex1, const vector3_type vertex2, const vector3_type origin) { SphericalTriangle retval; retval.vertex0 = nbl::hlsl::normalize(vertex0 - origin); @@ -72,7 +72,7 @@ struct SphericalTriangle return solidAngleOfTriangle(dummy0,dummy1,dummy2,dummy3,dummy4,dummy5); } - scalar_type projectedSolidAngleOfTriangle(NBL_CONST_REF_ARG(vector3_type) receiverNormal, NBL_REF_ARG(vector3_type) cos_sides, NBL_REF_ARG(vector3_type) csc_sides, NBL_REF_ARG(vector3_type) cos_vertices) + scalar_type projectedSolidAngleOfTriangle(const vector3_type receiverNormal, NBL_REF_ARG(vector3_type) cos_sides, NBL_REF_ARG(vector3_type) csc_sides, NBL_REF_ARG(vector3_type) cos_vertices) { if (pyramidAngles()) return 0.f; @@ -106,7 +106,7 @@ namespace util { // Use this convetion e_i = v_{i+2}-v_{i+1}. vertex index is modulo by 3. template - vector compInternalAngle(NBL_CONST_REF_ARG(vector) e0, NBL_CONST_REF_ARG(vector) e1, NBL_CONST_REF_ARG(vector) e2) + vector compInternalAngle(const vector e0, vector e1, const vector e2) { // Calculate this triangle's weight for each of its three m_vertices // start by calculating the lengths of its sides From c353ab3247444fc8fd3cc53a6e97b353c868e5a2 Mon Sep 17 00:00:00 2001 From: keptsecret Date: Tue, 16 Dec 2025 14:11:16 +0700 Subject: [PATCH 140/157] fixes to iridescent fresnel, moved getOrientedEtaRcp to dielectric fresnels only --- .../hlsl/bxdf/base/cook_torrance_base.hlsl | 2 +- include/nbl/builtin/hlsl/bxdf/fresnel.hlsl | 51 ++++++++++--------- 2 files changed, 27 insertions(+), 26 deletions(-) diff --git a/include/nbl/builtin/hlsl/bxdf/base/cook_torrance_base.hlsl b/include/nbl/builtin/hlsl/bxdf/base/cook_torrance_base.hlsl index c3de375678..d70e8823da 100644 --- a/include/nbl/builtin/hlsl/bxdf/base/cook_torrance_base.hlsl +++ b/include/nbl/builtin/hlsl/bxdf/base/cook_torrance_base.hlsl @@ -280,7 +280,7 @@ struct SCookTorrance const scalar_type NdotV = localV.z; fresnel_type _f = __getOrientedFresnel(fresnel, NdotV); - fresnel::OrientedEtaRcps rcpEta = _f.getOrientedEtaRcps(); + fresnel::OrientedEtaRcps rcpEta = _f.getRefractionOrientedEtaRcps(); const vector3_type upperHemisphereV = ieee754::flipSignIfRHSNegative(localV, hlsl::promote(NdotV)); const vector3_type localH = ndf.generateH(upperHemisphereV, u.xy); diff --git a/include/nbl/builtin/hlsl/bxdf/fresnel.hlsl b/include/nbl/builtin/hlsl/bxdf/fresnel.hlsl index 954022e216..d32d3de16c 100644 --- a/include/nbl/builtin/hlsl/bxdf/fresnel.hlsl +++ b/include/nbl/builtin/hlsl/bxdf/fresnel.hlsl @@ -313,9 +313,7 @@ NBL_CONCEPT_BEGIN(2) NBL_CONCEPT_END( ((NBL_CONCEPT_REQ_TYPE)(T::scalar_type)) ((NBL_CONCEPT_REQ_TYPE)(T::vector_type)) - ((NBL_CONCEPT_REQ_TYPE)(T::eta_type)) ((NBL_CONCEPT_REQ_EXPR_RET_TYPE)((fresnel(cosTheta)), ::nbl::hlsl::is_same_v, typename T::vector_type)) - ((NBL_CONCEPT_REQ_EXPR_RET_TYPE)((fresnel.getOrientedEtaRcps()), ::nbl::hlsl::is_same_v, OrientedEtaRcps)) ); #undef cosTheta #undef fresnel @@ -331,7 +329,9 @@ NBL_CONCEPT_BEGIN(2) #define cosTheta NBL_CONCEPT_PARAM_T NBL_CONCEPT_PARAM_1 NBL_CONCEPT_END( ((NBL_CONCEPT_REQ_TYPE_ALIAS_CONCEPT)(Fresnel, T)) + ((NBL_CONCEPT_REQ_TYPE)(T::eta_type)) ((NBL_CONCEPT_REQ_EXPR_RET_TYPE)((fresnel.getRefractionOrientedEta()), ::nbl::hlsl::is_same_v, typename T::scalar_type)) + ((NBL_CONCEPT_REQ_EXPR_RET_TYPE)((fresnel.getRefractionOrientedEtaRcps()), ::nbl::hlsl::is_same_v, OrientedEtaRcps)) ((NBL_CONCEPT_REQ_EXPR_RET_TYPE)((fresnel.getReorientedFresnel(cosTheta)), ::nbl::hlsl::is_same_v, T)) ); #undef cosTheta @@ -362,7 +362,7 @@ struct Schlick return F0 + (1.0 - F0) * x*x*x*x*x; } - OrientedEtaRcps getOrientedEtaRcps() NBL_CONST_MEMBER_FUNC + OrientedEtaRcps getRefractionOrientedEtaRcps() NBL_CONST_MEMBER_FUNC { const eta_type sqrtF0 = hlsl::sqrt(F0); OrientedEtaRcps rcpEta; @@ -424,13 +424,13 @@ struct Conductor return (rs2 + rp2) * hlsl::promote(0.5); } - OrientedEtaRcps getOrientedEtaRcps() NBL_CONST_MEMBER_FUNC - { - OrientedEtaRcps rcpEta; - rcpEta.value = hlsl::promote(1.0) / eta; - rcpEta.value2 = rcpEta.value * rcpEta.value; - return rcpEta; - } + // OrientedEtaRcps getRefractionOrientedEtaRcps() NBL_CONST_MEMBER_FUNC + // { + // OrientedEtaRcps rcpEta; + // rcpEta.value = hlsl::promote(1.0) / eta; + // rcpEta.value2 = rcpEta.value * rcpEta.value; + // return rcpEta; + // } T eta; T etak2; @@ -484,7 +484,7 @@ struct Dielectric // default to monochrome, but it is possible to have RGB fresnel without dispersion fixing the refraction Eta // to be something else than the etas used to compute RGB reflectance or some sort of interpolation of them scalar_type getRefractionOrientedEta() NBL_CONST_MEMBER_FUNC { return orientedEta.value[0]; } - OrientedEtaRcps getOrientedEtaRcps() NBL_CONST_MEMBER_FUNC { return orientedEta.getReciprocals(); } + OrientedEtaRcps getRefractionOrientedEtaRcps() NBL_CONST_MEMBER_FUNC { return orientedEta.getReciprocals(); } Dielectric getReorientedFresnel(const scalar_type NdotI) NBL_CONST_MEMBER_FUNC { @@ -548,8 +548,6 @@ struct iridescent_helper static T __call(const vector_type _D, const vector_type ior1, const vector_type ior2, const vector_type ior3, const vector_type iork3, const vector_type eta12, const vector_type eta23, const vector_type etak23, const scalar_type clampedCosTheta) { - const vector_type wavelengths = vector_type(Colorspace::wavelength_R, Colorspace::wavelength_G, Colorspace::wavelength_B); - const scalar_type cosTheta_1 = clampedCosTheta; vector_type R12p, R23p, R12s, R23s; vector_type cosTheta_2; @@ -589,7 +587,6 @@ struct iridescent_helper // Optical Path Difference const vector_type D = _D * cosTheta_2; - const vector_type Dphi = hlsl::promote(2.0 * numbers::pi) * D / wavelengths; vector_type phi21p, phi21s, phi23p, phi23s, r123s, r123p, Rs; vector_type I = hlsl::promote(0.0); @@ -635,7 +632,7 @@ struct iridescent_helper I += Cm*Sm; } - return hlsl::max(colorspace::scRGB::FromXYZ(I) * hlsl::promote(0.5), hlsl::promote(0.0)); + return hlsl::max(Colorspace::FromXYZ(I) * hlsl::promote(0.5), hlsl::promote(0.0)); } }; @@ -652,6 +649,7 @@ struct iridescent_base vector_type iork3; vector_type eta12; // outside (usually air 1.0) -> thin-film IOR vector_type eta23; // thin-film -> base material IOR + vector_type eta13; }; } @@ -688,6 +686,7 @@ struct Iridescent getOrientedEtaRcps() NBL_CONST_MEMBER_FUNC - { - OrientedEtaRcps rcpEta; - rcpEta.value = hlsl::promote(1.0) / base_type::eta23; - rcpEta.value2 = rcpEta.value * rcpEta.value; - return rcpEta; - } + // OrientedEtaRcps getRefractionOrientedEtaRcps() NBL_CONST_MEMBER_FUNC + // { + // OrientedEtaRcps rcpEta; + // rcpEta.value = hlsl::promote(1.0) / base_type::eta13; + // rcpEta.value2 = rcpEta.value * rcpEta.value; + // return rcpEta; + // } vector_type getEtak23() NBL_CONST_MEMBER_FUNC { @@ -743,6 +742,7 @@ struct Iridescent getOrientedEtaRcps() NBL_CONST_MEMBER_FUNC + scalar_type getRefractionOrientedEta() NBL_CONST_MEMBER_FUNC { return base_type::eta13[0]; } + OrientedEtaRcps getRefractionOrientedEtaRcps() NBL_CONST_MEMBER_FUNC { OrientedEtaRcps rcpEta; - rcpEta.value = hlsl::promote(base_type::ior1[0] / base_type::ior3[0]); + rcpEta.value = hlsl::promote(1.0) / hlsl::promote(base_type::eta13[0]); rcpEta.value2 = rcpEta.value * rcpEta.value; return rcpEta; } @@ -771,6 +771,7 @@ struct Iridescent(1.0)/base_type::eta23, flip); orientedFresnel.eta23 = hlsl::mix(base_type::eta23, hlsl::promote(1.0)/base_type::eta12, flip); + orientedFresnel.eta13 = hlsl::mix(base_type::eta13, hlsl::promote(1.0)/base_type::eta13, flip); return orientedFresnel; } From 04f1c7637638cdefdfb3bddc3574f4298d8bd7f3 Mon Sep 17 00:00:00 2001 From: keptsecret Date: Tue, 16 Dec 2025 15:22:23 +0700 Subject: [PATCH 141/157] adjust mix_helper calling select requirements --- .../hlsl/cpp_compat/impl/intrinsics_impl.hlsl | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/include/nbl/builtin/hlsl/cpp_compat/impl/intrinsics_impl.hlsl b/include/nbl/builtin/hlsl/cpp_compat/impl/intrinsics_impl.hlsl index ad53bad2e8..5a19a1d529 100644 --- a/include/nbl/builtin/hlsl/cpp_compat/impl/intrinsics_impl.hlsl +++ b/include/nbl/builtin/hlsl/cpp_compat/impl/intrinsics_impl.hlsl @@ -968,8 +968,19 @@ struct mix_helper NBL_PARTIAL_REQ_TOP((concepts::Vectorial || concepts::Scalar) && concepts::Boolean && !impl::MixCallingBuiltins) -struct mix_helper || concepts::Scalar) && concepts::Boolean && !impl::MixCallingBuiltins) > +namespace impl +{ +template +NBL_BOOL_CONCEPT MixCallingSelect = +#ifdef __HLSL_VERSION +spirv::SelectIsCallable; +#else +concepts::Boolean && (concepts::Scalar || (concepts::Vector && vector_traits::Dimension==vector_traits::Dimension)) && !MixCallingBuiltins; +#endif +} + +template NBL_PARTIAL_REQ_TOP(impl::MixCallingSelect) +struct mix_helper) > { using return_t = T; static return_t __call(NBL_CONST_REF_ARG(T) x, NBL_CONST_REF_ARG(T) y, NBL_CONST_REF_ARG(U) a) From 94a778fef8127a4e0ad0c75ade54a1f13593b015 Mon Sep 17 00:00:00 2001 From: keptsecret Date: Tue, 16 Dec 2025 15:35:50 +0700 Subject: [PATCH 142/157] restore regular triangle stuff, refactor usage --- .../hlsl/shapes/spherical_triangle.hlsl | 23 ---------- include/nbl/builtin/hlsl/shapes/triangle.hlsl | 46 +++++++++++++++++++ .../asset/utils/CSmoothNormalGenerator.cpp | 4 +- src/nbl/builtin/CMakeLists.txt | 1 + 4 files changed, 49 insertions(+), 25 deletions(-) create mode 100644 include/nbl/builtin/hlsl/shapes/triangle.hlsl diff --git a/include/nbl/builtin/hlsl/shapes/spherical_triangle.hlsl b/include/nbl/builtin/hlsl/shapes/spherical_triangle.hlsl index 7304fa72e9..f574b106ce 100644 --- a/include/nbl/builtin/hlsl/shapes/spherical_triangle.hlsl +++ b/include/nbl/builtin/hlsl/shapes/spherical_triangle.hlsl @@ -102,29 +102,6 @@ struct SphericalTriangle vector3_type csc_sides; }; -namespace util -{ - // Use this convetion e_i = v_{i+2}-v_{i+1}. vertex index is modulo by 3. - template - vector compInternalAngle(const vector e0, vector e1, const vector e2) - { - // Calculate this triangle's weight for each of its three m_vertices - // start by calculating the lengths of its sides - const float_t a = hlsl::dot(e0, e0); - const float_t asqrt = hlsl::sqrt(a); - const float_t b = hlsl::dot(e1, e1); - const float_t bsqrt = hlsl::sqrt(b); - const float_t c = hlsl::dot(e2, e2); - const float_t csqrt = hlsl::sqrt(c); - - const float_t angle0 = hlsl::acos((b + c - a) / (2.f * bsqrt * csqrt)); - const float_t angle1 = hlsl::acos((-b + c + a) / (2.f * asqrt * csqrt)); - const float_t angle2 = hlsl::numbers::pi - (angle0 + angle1); - // use them to find the angle at each vertex - return vector(angle0, angle1, angle2); - } -} - } } } diff --git a/include/nbl/builtin/hlsl/shapes/triangle.hlsl b/include/nbl/builtin/hlsl/shapes/triangle.hlsl new file mode 100644 index 0000000000..d64fc9d29d --- /dev/null +++ b/include/nbl/builtin/hlsl/shapes/triangle.hlsl @@ -0,0 +1,46 @@ +// Copyright (C) 2018-2025 - DevSH Graphics Programming Sp. z O.O. +// This file is part of the "Nabla Engine". +// For conditions of distribution and use, see copyright notice in nabla.h + +#ifndef _NBL_BUILTIN_HLSL_SHAPES_TRIANGLE_INCLUDED_ +#define _NBL_BUILTIN_HLSL_SHAPES_TRIANGLE_INCLUDED_ + +#include +#include +#include + +namespace nbl +{ +namespace hlsl +{ +namespace shapes +{ + +namespace util +{ + // Use this convetion e_i = v_{i+2}-v_{i+1}. vertex index is modulo by 3. + template + vector anglesFromTriangleEdge(const vector e0, vector e1, const vector e2) + { + // Calculate this triangle's weight for each of its three m_vertices + // start by calculating the lengths of its sides + const float_t a = hlsl::dot(e0, e0); + const float_t asqrt = hlsl::sqrt(a); + const float_t b = hlsl::dot(e1, e1); + const float_t bsqrt = hlsl::sqrt(b); + const float_t c = hlsl::dot(e2, e2); + const float_t csqrt = hlsl::sqrt(c); + + const float_t angle0 = hlsl::acos((b + c - a) / (2.f * bsqrt * csqrt)); + const float_t angle1 = hlsl::acos((-b + c + a) / (2.f * asqrt * csqrt)); + const float_t angle2 = hlsl::numbers::pi - (angle0 + angle1); + // use them to find the angle at each vertex + return vector(angle0, angle1, angle2); + } +} + +} +} +} + +#endif \ No newline at end of file diff --git a/src/nbl/asset/utils/CSmoothNormalGenerator.cpp b/src/nbl/asset/utils/CSmoothNormalGenerator.cpp index 43413152a8..2ed1d4e19e 100644 --- a/src/nbl/asset/utils/CSmoothNormalGenerator.cpp +++ b/src/nbl/asset/utils/CSmoothNormalGenerator.cpp @@ -5,7 +5,7 @@ #include "CSmoothNormalGenerator.h" #include "nbl/core/declarations.h" -#include "nbl/builtin/hlsl/shapes/spherical_triangle.hlsl" +#include "nbl/builtin/hlsl/shapes/triangle.hlsl" #include @@ -58,7 +58,7 @@ CSmoothNormalGenerator::VertexHashMap CSmoothNormalGenerator::setupData(const as const auto faceNormal = normalize(cross(v1 - v0, v2 - v0)); //set data for m_vertices - const auto angleWages = hlsl::shapes::util::compInternalAngle(v2 - v1, v0 - v2, v1 - v2); + const auto angleWages = hlsl::shapes::util::anglesFromTriangleEdge(v2 - v1, v0 - v2, v1 - v2); vertices.add({ i, 0, faceNormal * angleWages.x, v0}); vertices.add({ i + 1, 0, faceNormal * angleWages.y,v1}); diff --git a/src/nbl/builtin/CMakeLists.txt b/src/nbl/builtin/CMakeLists.txt index 343bfa31a6..cdafa522ab 100644 --- a/src/nbl/builtin/CMakeLists.txt +++ b/src/nbl/builtin/CMakeLists.txt @@ -253,6 +253,7 @@ LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/shapes/circle.hlsl") LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/shapes/ellipse.hlsl") LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/shapes/line.hlsl") LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/shapes/beziers.hlsl") +LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/shapes/triangle.hlsl") LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/shapes/spherical_triangle.hlsl") LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/shapes/spherical_rectangle.hlsl") LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/shapes/aabb.hlsl") From 9e4a16c5118d0684cdfa82ea3b374ddb57de25af Mon Sep 17 00:00:00 2001 From: keptsecret Date: Tue, 16 Dec 2025 16:13:14 +0700 Subject: [PATCH 143/157] minor changes to rwmc usage --- .../builtin/hlsl/rwmc/CascadeAccumulator.hlsl | 9 +++++---- .../builtin/hlsl/rwmc/SplattingParameters.hlsl | 17 +++++++++++++---- 2 files changed, 18 insertions(+), 8 deletions(-) diff --git a/include/nbl/builtin/hlsl/rwmc/CascadeAccumulator.hlsl b/include/nbl/builtin/hlsl/rwmc/CascadeAccumulator.hlsl index 593e267a26..2ab953b469 100644 --- a/include/nbl/builtin/hlsl/rwmc/CascadeAccumulator.hlsl +++ b/include/nbl/builtin/hlsl/rwmc/CascadeAccumulator.hlsl @@ -69,12 +69,9 @@ struct CascadeAccumulator // most of this code is stolen from https://cg.ivd.kit.edu/publications/2018/rwmc/tool/split.cpp void addSample(uint32_t sampleCount, input_sample_type _sample) { - const float32_t2 unpackedParams = hlsl::unpackHalf2x16(splattingParameters.packedLog2); - const cascade_layer_scalar_type log2Start = unpackedParams[0]; - const cascade_layer_scalar_type log2Base = unpackedParams[1]; const cascade_layer_scalar_type luma = getLuma(_sample); const cascade_layer_scalar_type log2Luma = log2(luma); - const cascade_layer_scalar_type cascade = log2Luma * 1.f / log2Base - log2Start / log2Base; + const cascade_layer_scalar_type cascade = log2Luma * splattingParameters.rcpLog2Base - splattingParameters.baseRootOfStart; const cascade_layer_scalar_type clampedCascade = clamp(cascade, 0, CascadeCount - 1); // c<=0 -> 0, c>=Count-1 -> Count-1 uint32_t lowerCascadeIndex = floor(cascade); @@ -85,7 +82,11 @@ struct CascadeAccumulator // handle super bright sample case if (cascade > CascadeCount - 1) + { + const cascade_layer_scalar_type log2Base = cascade_layer_scalar_type(1.0) / splattingParameters.rcpLog2Base; + const cascade_layer_scalar_type log2Start = splattingParameters.baseRootOfStart * log2Base; lowerCascadeWeight = exp2(log2Start + log2Base * (CascadeCount - 1) - log2Luma); + } accumulation.addSampleIntoCascadeEntry(_sample, lowerCascadeIndex, lowerCascadeWeight, higherCascadeWeight, sampleCount); } diff --git a/include/nbl/builtin/hlsl/rwmc/SplattingParameters.hlsl b/include/nbl/builtin/hlsl/rwmc/SplattingParameters.hlsl index c549d83be6..df39660d95 100644 --- a/include/nbl/builtin/hlsl/rwmc/SplattingParameters.hlsl +++ b/include/nbl/builtin/hlsl/rwmc/SplattingParameters.hlsl @@ -2,6 +2,7 @@ #define _NBL_BUILTIN_HLSL_RWMC_SPLATTING_PARAMETERS_HLSL_INCLUDED_ #include "nbl/builtin/hlsl/cpp_compat.hlsl" +#include "nbl/builtin/hlsl/tgmath.hlsl" namespace nbl { @@ -12,10 +13,18 @@ namespace rwmc struct SplattingParameters { - // float16_t log2Start; 0 - // float16_t log2Base; 1 - // pack as Half2x16 - int32_t packedLog2; + using scalar_t = float; + + static SplattingParameters create(const scalar_t base, const scalar_t start) + { + SplattingParameters retval; + retval.rcpLog2Base = scalar_t(1.0) / hlsl::log2(base); + retval.baseRootOfStart = hlsl::log2(start) * retval.rcpLog2Base; + return retval; + } + + scalar_t baseRootOfStart; + scalar_t rcpLog2Base; }; } From 62c79b40e00edcda8a3599c149198c2fb14fe88c Mon Sep 17 00:00:00 2001 From: keptsecret Date: Tue, 16 Dec 2025 16:26:12 +0700 Subject: [PATCH 144/157] remove storing texture inside local var of ResolveAccessorAdaptor --- include/nbl/builtin/hlsl/rwmc/resolve.hlsl | 2 -- 1 file changed, 2 deletions(-) diff --git a/include/nbl/builtin/hlsl/rwmc/resolve.hlsl b/include/nbl/builtin/hlsl/rwmc/resolve.hlsl index d8f777d277..6c2b8b3230 100644 --- a/include/nbl/builtin/hlsl/rwmc/resolve.hlsl +++ b/include/nbl/builtin/hlsl/rwmc/resolve.hlsl @@ -48,8 +48,6 @@ struct ResolveAccessorAdaptor using output_type = vector; NBL_CONSTEXPR int32_t image_dimension = 2; - RWTexture2DArray cascade; - float32_t calcLuma(NBL_REF_ARG(float32_t3) col) { return hlsl::dot(colorspace::scRGB::ToXYZ()[1], col); From 5075c6385b93a0ca4f29c22a36a4f3ba026d13c5 Mon Sep 17 00:00:00 2001 From: keptsecret Date: Tue, 16 Dec 2025 16:53:31 +0700 Subject: [PATCH 145/157] removed accessor, user should provide accessor that matches concept --- include/nbl/builtin/hlsl/rwmc/resolve.hlsl | 26 ---------------------- 1 file changed, 26 deletions(-) diff --git a/include/nbl/builtin/hlsl/rwmc/resolve.hlsl b/include/nbl/builtin/hlsl/rwmc/resolve.hlsl index 6c2b8b3230..906cad512b 100644 --- a/include/nbl/builtin/hlsl/rwmc/resolve.hlsl +++ b/include/nbl/builtin/hlsl/rwmc/resolve.hlsl @@ -41,32 +41,6 @@ NBL_CONCEPT_END( template NBL_BOOL_CONCEPT ResolveAccessor = ResolveAccessorBase && concepts::accessors::LoadableImage; -template -struct ResolveAccessorAdaptor -{ - using output_scalar_type = OutputScalar; - using output_type = vector; - NBL_CONSTEXPR int32_t image_dimension = 2; - - float32_t calcLuma(NBL_REF_ARG(float32_t3) col) - { - return hlsl::dot(colorspace::scRGB::ToXYZ()[1], col); - } - - template - output_type get(vector uv, uint16_t layer) - { - uint32_t imgWidth, imgHeight, layers; - cascade.GetDimensions(imgWidth, imgHeight, layers); - int16_t2 cascadeImageDimension = int16_t2(imgWidth, imgHeight); - - if (any(uv < int16_t2(0, 0)) || any(uv > cascadeImageDimension)) - return vector(0, 0, 0, 0); - - return cascade.Load(int32_t3(uv, int32_t(layer))); - } -}; - template && ResolveAccessor) struct Resolver { From 90007e68b7b3778f5532f4df35854283a7316515 Mon Sep 17 00:00:00 2001 From: keptsecret Date: Tue, 16 Dec 2025 17:02:49 +0700 Subject: [PATCH 146/157] fix formatting, name --- include/nbl/builtin/hlsl/shapes/triangle.hlsl | 36 +++++++++---------- .../asset/utils/CSmoothNormalGenerator.cpp | 2 +- 2 files changed, 19 insertions(+), 19 deletions(-) diff --git a/include/nbl/builtin/hlsl/shapes/triangle.hlsl b/include/nbl/builtin/hlsl/shapes/triangle.hlsl index d64fc9d29d..b2f4170f70 100644 --- a/include/nbl/builtin/hlsl/shapes/triangle.hlsl +++ b/include/nbl/builtin/hlsl/shapes/triangle.hlsl @@ -18,25 +18,25 @@ namespace shapes namespace util { - // Use this convetion e_i = v_{i+2}-v_{i+1}. vertex index is modulo by 3. - template - vector anglesFromTriangleEdge(const vector e0, vector e1, const vector e2) - { - // Calculate this triangle's weight for each of its three m_vertices - // start by calculating the lengths of its sides - const float_t a = hlsl::dot(e0, e0); - const float_t asqrt = hlsl::sqrt(a); - const float_t b = hlsl::dot(e1, e1); - const float_t bsqrt = hlsl::sqrt(b); - const float_t c = hlsl::dot(e2, e2); - const float_t csqrt = hlsl::sqrt(c); +// Use this convetion e_i = v_{i+2}-v_{i+1}. vertex index is modulo by 3. +template +vector anglesFromTriangleEdges(const vector e0, vector e1, const vector e2) +{ + // Calculate this triangle's weight for each of its three m_vertices + // start by calculating the lengths of its sides + const float_t a = hlsl::dot(e0, e0); + const float_t asqrt = hlsl::sqrt(a); + const float_t b = hlsl::dot(e1, e1); + const float_t bsqrt = hlsl::sqrt(b); + const float_t c = hlsl::dot(e2, e2); + const float_t csqrt = hlsl::sqrt(c); - const float_t angle0 = hlsl::acos((b + c - a) / (2.f * bsqrt * csqrt)); - const float_t angle1 = hlsl::acos((-b + c + a) / (2.f * asqrt * csqrt)); - const float_t angle2 = hlsl::numbers::pi - (angle0 + angle1); - // use them to find the angle at each vertex - return vector(angle0, angle1, angle2); - } + const float_t angle0 = hlsl::acos((b + c - a) / (2.f * bsqrt * csqrt)); + const float_t angle1 = hlsl::acos((-b + c + a) / (2.f * asqrt * csqrt)); + const float_t angle2 = hlsl::numbers::pi - (angle0 + angle1); + // use them to find the angle at each vertex + return vector(angle0, angle1, angle2); +} } } diff --git a/src/nbl/asset/utils/CSmoothNormalGenerator.cpp b/src/nbl/asset/utils/CSmoothNormalGenerator.cpp index 2ed1d4e19e..f8bc45a317 100644 --- a/src/nbl/asset/utils/CSmoothNormalGenerator.cpp +++ b/src/nbl/asset/utils/CSmoothNormalGenerator.cpp @@ -58,7 +58,7 @@ CSmoothNormalGenerator::VertexHashMap CSmoothNormalGenerator::setupData(const as const auto faceNormal = normalize(cross(v1 - v0, v2 - v0)); //set data for m_vertices - const auto angleWages = hlsl::shapes::util::anglesFromTriangleEdge(v2 - v1, v0 - v2, v1 - v2); + const auto angleWages = hlsl::shapes::util::anglesFromTriangleEdges(v2 - v1, v0 - v2, v1 - v2); vertices.add({ i, 0, faceNormal * angleWages.x, v0}); vertices.add({ i + 1, 0, faceNormal * angleWages.y,v1}); From 3d36c1392cd9830857500cda363baa7d2df83300 Mon Sep 17 00:00:00 2001 From: keptsecret Date: Tue, 16 Dec 2025 17:09:48 +0700 Subject: [PATCH 147/157] added more things to precompute --- include/nbl/builtin/hlsl/rwmc/CascadeAccumulator.hlsl | 6 +----- include/nbl/builtin/hlsl/rwmc/SplattingParameters.hlsl | 10 +++++++--- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/include/nbl/builtin/hlsl/rwmc/CascadeAccumulator.hlsl b/include/nbl/builtin/hlsl/rwmc/CascadeAccumulator.hlsl index 2ab953b469..1ed8884206 100644 --- a/include/nbl/builtin/hlsl/rwmc/CascadeAccumulator.hlsl +++ b/include/nbl/builtin/hlsl/rwmc/CascadeAccumulator.hlsl @@ -82,11 +82,7 @@ struct CascadeAccumulator // handle super bright sample case if (cascade > CascadeCount - 1) - { - const cascade_layer_scalar_type log2Base = cascade_layer_scalar_type(1.0) / splattingParameters.rcpLog2Base; - const cascade_layer_scalar_type log2Start = splattingParameters.baseRootOfStart * log2Base; - lowerCascadeWeight = exp2(log2Start + log2Base * (CascadeCount - 1) - log2Luma); - } + lowerCascadeWeight = exp2(splattingParameters.log2Start + splattingParameters.log2Base * (CascadeCount - 1) - log2Luma); accumulation.addSampleIntoCascadeEntry(_sample, lowerCascadeIndex, lowerCascadeWeight, higherCascadeWeight, sampleCount); } diff --git a/include/nbl/builtin/hlsl/rwmc/SplattingParameters.hlsl b/include/nbl/builtin/hlsl/rwmc/SplattingParameters.hlsl index df39660d95..a6c479a8e2 100644 --- a/include/nbl/builtin/hlsl/rwmc/SplattingParameters.hlsl +++ b/include/nbl/builtin/hlsl/rwmc/SplattingParameters.hlsl @@ -18,11 +18,15 @@ struct SplattingParameters static SplattingParameters create(const scalar_t base, const scalar_t start) { SplattingParameters retval; - retval.rcpLog2Base = scalar_t(1.0) / hlsl::log2(base); - retval.baseRootOfStart = hlsl::log2(start) * retval.rcpLog2Base; + retval.log2Base = hlsl::log2(base); + retval.log2Start = hlsl::log2(start); + retval.rcpLog2Base = scalar_t(1.0) / retval.log2Base; + retval.baseRootOfStart = retval.log2Start * retval.rcpLog2Base; return retval; } - + + scalar_t log2Base; + scalar_t log2Start; scalar_t baseRootOfStart; scalar_t rcpLog2Base; }; From 62f2c99d01166ea2e077bc73856727d5af016d43 Mon Sep 17 00:00:00 2001 From: keptsecret Date: Tue, 16 Dec 2025 17:27:55 +0700 Subject: [PATCH 148/157] changes to splatting params precompute for the last time --- .../nbl/builtin/hlsl/rwmc/CascadeAccumulator.hlsl | 2 +- .../nbl/builtin/hlsl/rwmc/SplattingParameters.hlsl | 14 +++++++------- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/include/nbl/builtin/hlsl/rwmc/CascadeAccumulator.hlsl b/include/nbl/builtin/hlsl/rwmc/CascadeAccumulator.hlsl index 1ed8884206..9413bcee98 100644 --- a/include/nbl/builtin/hlsl/rwmc/CascadeAccumulator.hlsl +++ b/include/nbl/builtin/hlsl/rwmc/CascadeAccumulator.hlsl @@ -82,7 +82,7 @@ struct CascadeAccumulator // handle super bright sample case if (cascade > CascadeCount - 1) - lowerCascadeWeight = exp2(splattingParameters.log2Start + splattingParameters.log2Base * (CascadeCount - 1) - log2Luma); + lowerCascadeWeight = splattingParameters.lastCascadeLuma / luma; accumulation.addSampleIntoCascadeEntry(_sample, lowerCascadeIndex, lowerCascadeWeight, higherCascadeWeight, sampleCount); } diff --git a/include/nbl/builtin/hlsl/rwmc/SplattingParameters.hlsl b/include/nbl/builtin/hlsl/rwmc/SplattingParameters.hlsl index a6c479a8e2..a3a3520415 100644 --- a/include/nbl/builtin/hlsl/rwmc/SplattingParameters.hlsl +++ b/include/nbl/builtin/hlsl/rwmc/SplattingParameters.hlsl @@ -15,18 +15,18 @@ struct SplattingParameters { using scalar_t = float; - static SplattingParameters create(const scalar_t base, const scalar_t start) + static SplattingParameters create(const scalar_t base, const scalar_t start, const uint32_t cascadeCount) { SplattingParameters retval; - retval.log2Base = hlsl::log2(base); - retval.log2Start = hlsl::log2(start); - retval.rcpLog2Base = scalar_t(1.0) / retval.log2Base; - retval.baseRootOfStart = retval.log2Start * retval.rcpLog2Base; + const scalar_t log2Base = hlsl::log2(base); + const scalar_t log2Start = hlsl::log2(start); + retval.lastCascadeLuma = hlsl::exp2(log2Start + log2Base * (cascadeCount - 1)); + retval.rcpLog2Base = scalar_t(1.0) / log2Base; + retval.baseRootOfStart = log2Start * retval.rcpLog2Base; return retval; } - scalar_t log2Base; - scalar_t log2Start; + scalar_t lastCascadeLuma; scalar_t baseRootOfStart; scalar_t rcpLog2Base; }; From f3f60c64da31c852aea6118200c6f5c86db3d829 Mon Sep 17 00:00:00 2001 From: keptsecret Date: Tue, 16 Dec 2025 17:41:18 +0700 Subject: [PATCH 149/157] check that eta type of dielectric fresnels should be monochrome --- include/nbl/builtin/hlsl/bxdf/fresnel.hlsl | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/include/nbl/builtin/hlsl/bxdf/fresnel.hlsl b/include/nbl/builtin/hlsl/bxdf/fresnel.hlsl index d32d3de16c..33faa79efc 100644 --- a/include/nbl/builtin/hlsl/bxdf/fresnel.hlsl +++ b/include/nbl/builtin/hlsl/bxdf/fresnel.hlsl @@ -319,6 +319,12 @@ NBL_CONCEPT_END( #undef fresnel #include +namespace impl +{ +template +NBL_BOOL_CONCEPT VectorIsMonochrome = vector_traits::Dimension == 1; +} + #define NBL_CONCEPT_NAME TwoSidedFresnel #define NBL_CONCEPT_TPLT_PRM_KINDS (typename) #define NBL_CONCEPT_TPLT_PRM_NAMES (T) @@ -333,6 +339,7 @@ NBL_CONCEPT_END( ((NBL_CONCEPT_REQ_EXPR_RET_TYPE)((fresnel.getRefractionOrientedEta()), ::nbl::hlsl::is_same_v, typename T::scalar_type)) ((NBL_CONCEPT_REQ_EXPR_RET_TYPE)((fresnel.getRefractionOrientedEtaRcps()), ::nbl::hlsl::is_same_v, OrientedEtaRcps)) ((NBL_CONCEPT_REQ_EXPR_RET_TYPE)((fresnel.getReorientedFresnel(cosTheta)), ::nbl::hlsl::is_same_v, T)) + ((NBL_CONCEPT_REQ_TYPE_ALIAS_CONCEPT)(impl::VectorIsMonochrome, typename T::eta_type)) ); #undef cosTheta #undef fresnel From b4d957d40bf3ad1967b20d65cbe929b2965eb50a Mon Sep 17 00:00:00 2001 From: Mateusz Kielan Date: Tue, 16 Dec 2025 13:02:12 +0100 Subject: [PATCH 150/157] Change `Compile flag error` to `Compile flag warning` so CI logs are easier to Ctrl+F --- src/nbl/asset/utils/CHLSLCompiler.cpp | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/nbl/asset/utils/CHLSLCompiler.cpp b/src/nbl/asset/utils/CHLSLCompiler.cpp index 306d2f60de..d36ecfa1cb 100644 --- a/src/nbl/asset/utils/CHLSLCompiler.cpp +++ b/src/nbl/asset/utils/CHLSLCompiler.cpp @@ -115,11 +115,11 @@ static bool fixup_spirv_target_ver(std::vector& arguments, system: const auto found = AllowedSuffices.find(suffix); if (found!=AllowedSuffices.end()) return true; - logger.log("Compile flag error: Required compile flag not found -fspv-target-env=. Force enabling -fspv-target-env= found but with unsupported value `%s`.", system::ILogger::ELL_ERROR, "TODO: write wchar to char convert usage"); + logger.log("Compile flag warning: Required compile flag not found -fspv-target-env=. Force enabling -fspv-target-env= found but with unsupported value `%s`.", system::ILogger::ELL_ERROR, "TODO: write wchar to char convert usage"); return false; } - logger.log("Compile flag error: Required compile flag not found -fspv-target-env=. Force enabling -fspv-target-env=vulkan1.3, as it is required by Nabla.", system::ILogger::ELL_WARNING); + logger.log("Compile flag warning: Required compile flag not found -fspv-target-env=. Force enabling -fspv-target-env=vulkan1.3, as it is required by Nabla.", system::ILogger::ELL_WARNING); arguments.push_back(L"-fspv-target-env=vulkan1.3"); return true; } @@ -148,7 +148,7 @@ static void try_upgrade_hlsl_version(std::vector& arguments, syste } else { - logger.log("Compile flag error: Required compile flag not found -HV. Force enabling -HV 202x, as it is required by Nabla.", system::ILogger::ELL_WARNING); + logger.log("Compile flag warning: Required compile flag not found -HV. Force enabling -HV 202x, as it is required by Nabla.", system::ILogger::ELL_WARNING); arguments.push_back(L"-HV"); arguments.push_back(L"202x"); } @@ -254,7 +254,7 @@ static void add_required_arguments_if_not_present(std::vector& arg { bool missing = set.find(required[j]) == set.end(); if (missing) { - logger.log("Compile flag error: Required compile flag not found %ls. This flag will be force enabled, as it is required by Nabla.", system::ILogger::ELL_WARNING, required[j]); + logger.log("Compile flag warning: Required compile flag not found %ls. This flag will be force enabled, as it is required by Nabla.", system::ILogger::ELL_WARNING, required[j]); arguments.push_back(required[j]); } } @@ -534,4 +534,4 @@ void CHLSLCompiler::insertIntoStart(std::string& code, std::ostringstream&& ins) code.insert(0u, ins.str()); } -#endif \ No newline at end of file +#endif From e714c2469357633bd17a26b693e9157c94116dd8 Mon Sep 17 00:00:00 2001 From: Karim Mohamed Date: Wed, 17 Dec 2025 22:25:41 +0300 Subject: [PATCH 151/157] RandomSampler can give floats now, ranged and [0, 1), also update examples submodule --- examples_tests | 2 +- include/nbl/core/sampling/RandomSampler.h | 21 ++++++++++++++++++--- 2 files changed, 19 insertions(+), 4 deletions(-) diff --git a/examples_tests b/examples_tests index a35eddd1bd..1c6458d81b 160000 --- a/examples_tests +++ b/examples_tests @@ -1 +1 @@ -Subproject commit a35eddd1bd83fbf636e820b59c6eef939ed09668 +Subproject commit 1c6458d81b83aea176ac7ebda7450a9b395a85bd diff --git a/include/nbl/core/sampling/RandomSampler.h b/include/nbl/core/sampling/RandomSampler.h index 39832dc8f1..b692ef5e08 100644 --- a/include/nbl/core/sampling/RandomSampler.h +++ b/include/nbl/core/sampling/RandomSampler.h @@ -11,8 +11,8 @@ namespace nbl::core { -class RandomSampler -{ + class RandomSampler + { public: RandomSampler(uint32_t _seed) { @@ -25,9 +25,24 @@ class RandomSampler return mersenneTwister(); } + // Returns a float in [0, 1) + inline float nextFloat() + { + // 1 / 2^32 + constexpr float norm = 1.0f / 4294967296.0f; + return mersenneTwister() * norm; + } + + // Returns a float in [min, max) + inline float nextFloat(float min, float max) + { + constexpr float norm = 1.0f / 4294967296.0f; + return min + (mersenneTwister() * norm) * (max - min); + } + protected: std::mt19937 mersenneTwister; -}; + }; } From 6741c756172abb1e8095e9c153cecc3207622313 Mon Sep 17 00:00:00 2001 From: Karim Mohamed Date: Thu, 18 Dec 2025 01:11:29 +0300 Subject: [PATCH 152/157] update examples submodules --- examples_tests | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples_tests b/examples_tests index 1c6458d81b..2e306fc96b 160000 --- a/examples_tests +++ b/examples_tests @@ -1 +1 @@ -Subproject commit 1c6458d81b83aea176ac7ebda7450a9b395a85bd +Subproject commit 2e306fc96bfae85a9669ad552751cece33d1b383 From 92545a557f6231d8a84275e75228f235ea7b4e41 Mon Sep 17 00:00:00 2001 From: Karim Mohamed Date: Thu, 18 Dec 2025 02:25:03 +0300 Subject: [PATCH 153/157] update examples submodule --- examples_tests | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples_tests b/examples_tests index 2e306fc96b..12486d4670 160000 --- a/examples_tests +++ b/examples_tests @@ -1 +1 @@ -Subproject commit 2e306fc96bfae85a9669ad552751cece33d1b383 +Subproject commit 12486d4670f0453722351814996d91f198a16749 From 32de44d2d31f0ee80292a255b3df5bd824f218f2 Mon Sep 17 00:00:00 2001 From: Arkadiusz Lachowicz Date: Thu, 18 Dec 2025 16:23:48 +0100 Subject: [PATCH 154/157] Create docs for NSC prebuilds --- docs/nsc-prebuilds.md | 386 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 386 insertions(+) create mode 100644 docs/nsc-prebuilds.md diff --git a/docs/nsc-prebuilds.md b/docs/nsc-prebuilds.md new file mode 100644 index 0000000000..4d57d7a8de --- /dev/null +++ b/docs/nsc-prebuilds.md @@ -0,0 +1,386 @@ +# NSC prebuilds (build-time HLSL -> SPIR-V) + +This document explains how to use `NBL_CREATE_NSC_COMPILE_RULES` together with `NBL_CREATE_RESOURCE_ARCHIVE` to: + +- Compile HLSL to SPIR-V at **build time** (via the `nsc` tool). +- Optionally generate **device-cap permutations** (limits/features "CAPS"). +- Generate a small C++ header with **type-safe key getters** (`get_spirv_key<...>()`). +- Make the same code work with `NBL_EMBED_BUILTIN_RESOURCES` **ON** (embedded virtual archive) and **OFF** (mounted build directory) when loading your precompiled SPIR-V at runtime. + +Definitions live in `cmake/common.cmake` (`NBL_CREATE_NSC_COMPILE_RULES`, `NBL_CREATE_RESOURCE_ARCHIVE`). + +## Runtime mounting requirement (important) + +All of this assumes your app mounts the directory/archive containing the NSC outputs (i.e. `BINARY_DIR`) into Nabla's virtual filesystem, then loads files via keys that are relative to that mounted root (the examples use `app_resources`). + +The examples "just work" because they inherit from `nbl::examples::BuiltinResourcesApplication`, which mounts: + +- `NBL_EMBED_BUILTIN_RESOURCES=OFF`: `system::CMountDirectoryArchive(NBL_THIS_EXAMPLE_BUILD_MOUNT_POINT, ...)` at `app_resources` +- `NBL_EMBED_BUILTIN_RESOURCES=ON`: the generated embedded archive (e.g. `nbl::this_example::builtin::build::CArchive`) at `app_resources` + +If you're writing your own app/extension and don't use `BuiltinResourcesApplication`, you must mount equivalently yourself (split by `NBL_EMBED_BUILTIN_RESOURCES`). Optionally set `IAssetLoader::SAssetLoadParams::workingDirectory` to whatever virtual root you want to load from. + +The `MOUNT_POINT_DEFINE` argument of `NBL_CREATE_NSC_COMPILE_RULES` defines a C/C++ macro whose value is the absolute path to the NSC output directory (`BINARY_DIR`) that you mount when builtins are off (in examples it's `NBL_THIS_EXAMPLE_BUILD_MOUNT_POINT`). + +See `examples_tests/common/include/nbl/examples/common/BuiltinResourcesApplication.hpp` for the exact mounting logic. + +## Why build-time NSC instead of runtime compilation? + +Build-time compilation is usually preferable because it: + +- Uses your build system's parallelism (Ninja/MSBuild jobs) to compile shaders quickly. +- Writes **only into the build tree** (no source tree pollution, easy clean/reconfigure). +- Lets CI validate "shaders compile" as part of a normal build. +- Enables fast runtime iteration: at runtime you only **pick** the right SPIR-V, you don't compile it. +- Makes shader compilation deterministic and reproducible (toolchain + flags captured by the build). + +Runtime compilation is still useful for prototyping, but (assuming you don't use a runtime shader cache) it can make startup slower and shift failures to runtime instead of CI/build (a cache can hide the repeated cost on subsequent runs; our current one has some rough edges: it writes into the source tree and has issues when compiling many inputs from the same source directory). + +## What `NBL_CREATE_NSC_COMPILE_RULES` produces + +For each registered input it generates: + +- One `.spv` output **per CMake configuration** (`Debug/`, `Release/`, `RelWithDebInfo/`). +- If you use `CAPS`, it generates a **cartesian product** of permutations and emits a `.spv` for each. +- A generated header (you choose the path via `INCLUDE`) containing: + - a primary template `get_spirv_key(limits, features)` and `get_spirv_key(device)` + - explicit specializations for each registered base `KEY` + - the returned key already includes the build config prefix (compiled into the header). + +Keys are strings that match the output layout: + +``` +/(._)(._)....spv +``` + +## The JSON "INPUTS" format + +`INPUTS` is a JSON array of objects. Each object supports: + +- `INPUT` (string, required): path to `.hlsl` (relative to `CMAKE_CURRENT_SOURCE_DIR` or absolute). +- `KEY` (string, required): base key (prefer without `.spv`; it is always appended, so using `foo.spv` will result in `foo.spv.spv`). +- `COMPILE_OPTIONS` (array of strings, optional): per-input extra options (e.g. `["-T","cs_6_8"]`). +- `DEPENDS` (array of strings, optional): per-input dependencies (extra files that should trigger rebuild). +- `CAPS` (array, optional): permutation caps (see below). + +You can register many rules in a single call, and you can call the function multiple times to append rules to the same `TARGET`. + +## Compile options (generator expressions, defaults, debug info) + +`NBL_CREATE_NSC_COMPILE_RULES` combines options from multiple sources: + +- Built-in defaults from the helper (see `cmake/common.cmake`): HLSL version, Vulkan SPIR-V target env, scalar layout, warnings, and per-config optimization flags (e.g. `-O0` for Debug, `-O3` for Release) implemented via CMake generator expressions. +- Global extra options via `COMMON_OPTIONS` (CMake list). +- Per-input extra options via JSON `COMPILE_OPTIONS` (array of strings). + +Both `COMMON_OPTIONS` and JSON `COMPILE_OPTIONS` support CMake generator expressions like `$<$:...>` (the helper uses them itself), so you can make flags configuration-dependent when needed. + +### Debug info for RenderDoc + +The helper also exposes CMake options that append NSC debug flags **only for Debug config** (via generator expressions). Enable them if you want RenderDoc to show source/line information instead of just raw disassembly: + +- `NSC_DEBUG_EDIF_FILE_BIT` (default `ON`) -> `-fspv-debug=file` +- `NSC_DEBUG_EDIF_TOOL_BIT` (default `ON`) -> `-fspv-debug=tool` +- `NSC_DEBUG_EDIF_SOURCE_BIT` (default `OFF`) -> `-fspv-debug=source` +- `NSC_DEBUG_EDIF_LINE_BIT` (default `OFF`) -> `-fspv-debug=line` +- `NSC_DEBUG_EDIF_NON_SEMANTIC_BIT` (default `OFF`) -> `-fspv-debug=vulkan-with-source` + +## Source files and rebuild dependencies (important) + +Make sure shader inputs and includes are: + +1. Marked as header-only on your target (so the IDE shows them, but the build system doesn't try to compile them with default HLSL rules like `fxc`): + +```cmake +target_sources(${EXECUTABLE_NAME} PRIVATE ${DEPENDS}) +set_source_files_properties(${DEPENDS} PROPERTIES HEADER_FILE_ONLY ON) +``` + +2. Listed as dependencies of the NSC custom commands (so editing any of them triggers a rebuild of the `.spv` outputs). + +This is what the `DEPENDS` argument of `NBL_CREATE_NSC_COMPILE_RULES` (and/or per-input JSON `DEPENDS`) is for. Always include the main `INPUT` file itself and any files it includes; otherwise the build system might not re-run `nsc` when you change them. + +## Minimal usage (no permutations) + +Example pattern (as in `examples_tests/27_MPMCScheduler/CMakeLists.txt`): + +```cmake +set(OUTPUT_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}/auto-gen") +set(DEPENDS + app_resources/common.hlsl + app_resources/shader.comp.hlsl +) +target_sources(${EXECUTABLE_NAME} PRIVATE ${DEPENDS}) +set_source_files_properties(${DEPENDS} PROPERTIES HEADER_FILE_ONLY ON) + +set(JSON [=[ +[ + { + "INPUT": "app_resources/shader.comp.hlsl", + "KEY": "shader", + "COMPILE_OPTIONS": ["-T", "cs_6_8"], + "DEPENDS": [], + "CAPS": [] + } +] +]=]) + +NBL_CREATE_NSC_COMPILE_RULES( + TARGET ${EXECUTABLE_NAME}SPIRV + LINK_TO ${EXECUTABLE_NAME} + DEPENDS ${DEPENDS} + BINARY_DIR ${OUTPUT_DIRECTORY} + MOUNT_POINT_DEFINE NBL_THIS_EXAMPLE_BUILD_MOUNT_POINT + COMMON_OPTIONS -I ${CMAKE_CURRENT_SOURCE_DIR} + OUTPUT_VAR KEYS + INCLUDE nbl/this_example/builtin/build/spirv/keys.hpp + NAMESPACE nbl::this_example::builtin::build + INPUTS ${JSON} +) +``` + +Then include the generated header and use the key to load the SPIR-V: + +```cpp +#include "nbl/this_example/builtin/build/spirv/keys.hpp" +// ... +auto key = nbl::this_example::builtin::build::get_spirv_key<"shader">(device); +auto bundle = assetMgr->getAsset(key.c_str(), loadParams); +``` + +`OUTPUT_VAR` (here: `KEYS`) is assigned the list of **all** produced access keys (all configurations + all permutations). This list is intended to be fed into `NBL_CREATE_RESOURCE_ARCHIVE(BUILTINS ${KEYS})`. + +## Permutations via `CAPS` + +`CAPS` lets you prebuild multiple SPIR-V variants parameterized by device limits or features. + +Each `CAPS` entry looks like: + +- `kind` (string, optional): `"limits"` or `"features"` (defaults to `"limits"` if omitted/invalid). +- `name` (string, required): identifier used in both generated HLSL config and C++ key (must be a valid C/C++ identifier). +- `type` (string, required): `bool`, `uint16_t`, `uint32_t`, `uint64_t`. +- `values` (array of numbers, required): the values you want to prebuild. + - for `bool`, values must be `0` or `1`. + +At build time, NSC compiles each combination of values (cartesian product). At runtime, `get_spirv_key` appends suffixes using the `limits`/`features` you pass in. + +### Example: mixing `limits` and `features` + +This example permutes over one device limit and one device feature (order matters: the suffix order matches the `CAPS` array order): + +```cmake +set(JSON [=[ +[ + { + "INPUT": "app_resources/shader.hlsl", + "KEY": "shader", + "COMPILE_OPTIONS": ["-T", "lib_6_8"], + "DEPENDS": ["app_resources/common.hlsl"], + "CAPS": [ + { + "kind": "limits", + "name": "maxComputeSharedMemorySize", + "type": "uint32_t", + "values": [16384, 32768, 65536] + }, + { + "kind": "features", + "name": "shaderFloat64", + "type": "bool", + "values": [0, 1] + } + ] + } +] +]=]) + +NBL_CREATE_NSC_COMPILE_RULES( + # ... + OUTPUT_VAR KEYS + INPUTS ${JSON} +) +``` + +This produces `3 * 2 = 6` permutations per build configuration, and `KEYS` contains all of them (for example): + +``` +Debug/shader.maxComputeSharedMemorySize_16384.shaderFloat64_0.spv +Debug/shader.maxComputeSharedMemorySize_16384.shaderFloat64_1.spv +... +``` + +Practical tip: for numeric limits you often want to "bucket" real device values into one of the prebuilt values. The CountingSort example does exactly that: + +- CMake definition: `examples_tests/10_CountingSort/CMakeLists.txt` +- Runtime bucketing: `examples_tests/10_CountingSort/main.cpp` + +```cpp +auto limits = m_physicalDevice->getLimits(); +constexpr std::array AllowedMaxComputeSharedMemorySizes = { 16384, 32768, 65536 }; + +auto upperBoundSharedMemSize = std::upper_bound( + AllowedMaxComputeSharedMemorySizes.begin(), AllowedMaxComputeSharedMemorySizes.end(), limits.maxComputeSharedMemorySize +); +// devices which support less than 16KB of max compute shared memory size are not supported +if (upperBoundSharedMemSize == AllowedMaxComputeSharedMemorySizes.begin()) +{ + m_logger->log("maxComputeSharedMemorySize is too low (%u)", ILogger::E_LOG_LEVEL::ELL_ERROR, limits.maxComputeSharedMemorySize); + exit(0); +} + +limits.maxComputeSharedMemorySize = *(upperBoundSharedMemSize - 1); + +auto key = nbl::this_example::builtin::build::get_spirv_key<"prefix_sum_shader">(limits, m_physicalDevice->getFeatures()); +``` + +## Pairing with `NBL_CREATE_RESOURCE_ARCHIVE` (works with builtins ON/OFF) + +The recommended pattern is to always call `NBL_CREATE_RESOURCE_ARCHIVE` right after the NSC rules, using the produced `KEYS` list: + +```cmake +NBL_CREATE_RESOURCE_ARCHIVE( + TARGET ${EXECUTABLE_NAME}_builtinsBuild + LINK_TO ${EXECUTABLE_NAME} + BIND ${OUTPUT_DIRECTORY} + BUILTINS ${KEYS} + NAMESPACE nbl::this_example::builtin::build +) +``` + +### How `BINARY_DIR`, `MOUNT_POINT_DEFINE`, and `BIND` fit together + +- In `NBL_CREATE_NSC_COMPILE_RULES`, `BINARY_DIR` is the output directory where NSC writes the compiled files: + - `${BINARY_DIR}//....spv` +- In `NBL_CREATE_NSC_COMPILE_RULES`, `MOUNT_POINT_DEFINE` is the *name* of a C/C++ preprocessor define whose value is set to the **absolute path** of `BINARY_DIR`. + - Example: `MOUNT_POINT_DEFINE NBL_THIS_EXAMPLE_BUILD_MOUNT_POINT` results in something like `-DNBL_THIS_EXAMPLE_BUILD_MOUNT_POINT="C:/.../auto-gen"` on the target. + - Keys returned by `get_spirv_key<...>()` are relative to that directory; the full path on disk is: + - `${NBL_THIS_EXAMPLE_BUILD_MOUNT_POINT}/` +- In `NBL_CREATE_RESOURCE_ARCHIVE`, `BIND` should point at the same directory as `BINARY_DIR`. + - The `BUILTINS` list entries must be relative to `BIND`. + - This is why pairing it with `OUTPUT_VAR KEYS` works: `KEYS` is exactly the list of relative paths under `BINARY_DIR` that were generated by the NSC rules, so the archive generator knows what to serialize/embed. + +This is designed to work in both modes: + +- `NBL_EMBED_BUILTIN_RESOURCES=OFF`: + - `NBL_CREATE_RESOURCE_ARCHIVE` becomes a no-op (creates a dummy interface target). + - You load SPIR-V from the **build directory** mounted into the virtual filesystem. + - `MOUNT_POINT_DEFINE` provides an absolute path (e.g. `NBL_THIS_EXAMPLE_BUILD_MOUNT_POINT`) for mounting. +- `NBL_EMBED_BUILTIN_RESOURCES=ON`: + - `NBL_CREATE_RESOURCE_ARCHIVE` generates a small library that embeds the listed files into a virtual archive and emits `.../CArchive.h` under the requested `NAMESPACE`. + - You mount the embedded archive instead of a directory; runtime loading code stays the same (keys don't change). + +## Notes / gotchas + +- `INCLUDE` must be a **relative** path (it is emitted under the build tree and added to include dirs automatically). +- Prefer not to include `.spv` in `KEY` (the extension is appended unconditionally); if you do, you'll just get `.spv.spv` in the final filename/key (not an error, just not what you want). +- You can mix: + - per-input `COMPILE_OPTIONS` (inside JSON), and + - global `COMMON_OPTIONS` (CMake list after `COMMON_OPTIONS`). + +## Troubleshooting (no logs / silent NSC failures) + +Sometimes an NSC compile rule fails during the build, but the build output doesn't show a useful log. In that case, run the failing command under a debugger: + +1. Open the generated Visual Studio solution and set the `nsc` project/target as the Startup Project. +2. Open the `nsc` project properties and set **Debugging -> Command Arguments**. +3. Copy the exact CLI from the failing "NSC Rules" custom command (the one that calls `nsc.exe`) into the Command Arguments field. +4. Start debugging (`F5`) and reproduce; if needed, put a breakpoint in the HLSL compiler/preprocessor codepath and step until you find the root cause. + +If the error looks like a preprocessing issue, note that we use Boost.Wave as the preprocessor; it can have quirky edge cases (e.g. needing a trailing newline/whitespace at the end of a file for correct parsing). + +## Best practices + +- Prefer compiling to a shader library (`-T lib_6_x`) and using multiple entry points when possible: fewer inputs means fewer compile rules and less build overhead; at runtime you still choose the entry point from the same `.spv`. +- Treat `CAPS` as a build-time cost multiplier (cartesian product). If the permutation count gets too large (thousands+), prebuilding usually stops paying off; an example of such workload is `examples_tests/23_Arithmetic2UnitTest`. + +## Complete example (expand) + +
+NSC rules + archive + runtime key usage + +### CMake (`CMakeLists.txt`) + +```cmake +include(common) + +nbl_create_executable_project("" "" "" "") + +set(OUTPUT_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}/auto-gen") +set(DEPENDS + app_resources/common.hlsl + app_resources/shader.hlsl +) +target_sources(${EXECUTABLE_NAME} PRIVATE ${DEPENDS}) +set_source_files_properties(${DEPENDS} PROPERTIES HEADER_FILE_ONLY ON) + +set(JSON [=[ +[ + { + "INPUT": "app_resources/shader.hlsl", + "KEY": "shader", + "COMPILE_OPTIONS": ["-T", "lib_6_8"], + "DEPENDS": [], + "CAPS": [ + { + "kind": "limits", + "name": "maxComputeSharedMemorySize", + "type": "uint32_t", + "values": [16384, 32768, 65536] + }, + { + "kind": "features", + "name": "shaderFloat64", + "type": "bool", + "values": [0, 1] + } + ] + } +] +]=]) + +NBL_CREATE_NSC_COMPILE_RULES( + TARGET ${EXECUTABLE_NAME}SPIRV + LINK_TO ${EXECUTABLE_NAME} + DEPENDS ${DEPENDS} + BINARY_DIR ${OUTPUT_DIRECTORY} + MOUNT_POINT_DEFINE NBL_THIS_EXAMPLE_BUILD_MOUNT_POINT + COMMON_OPTIONS -I ${CMAKE_CURRENT_SOURCE_DIR} + OUTPUT_VAR KEYS + INCLUDE nbl/this_example/builtin/build/spirv/keys.hpp + NAMESPACE nbl::this_example::builtin::build + INPUTS ${JSON} +) + +# Works for both NBL_EMBED_BUILTIN_RESOURCES=ON/OFF +NBL_CREATE_RESOURCE_ARCHIVE( + NAMESPACE nbl::this_example::builtin::build + TARGET ${EXECUTABLE_NAME}_builtinsBuild + LINK_TO ${EXECUTABLE_NAME} + BIND ${OUTPUT_DIRECTORY} + BUILTINS ${KEYS} +) +``` + +### Runtime usage (C++) + +```cpp +#include "nbl/this_example/builtin/build/spirv/keys.hpp" + +// Load relative to the VFS mount (examples mount it at "app_resources") +asset::IAssetLoader::SAssetLoadParams lp = {}; +lp.workingDirectory = "app_resources"; + +auto limits = device->getPhysicalDevice()->getLimits(); +limits.maxComputeSharedMemorySize = 32768; // one of the prebuilt values; real code should bucket/clamp with std::upper_bound (see the CountingSort snippet above) + +auto key = nbl::this_example::builtin::build::get_spirv_key<"shader">(limits, device->getEnabledFeatures()); +auto bundle = assetMgr->getAsset(key.c_str(), lp); +const auto assets = bundle.getContents(); +auto spvShader = asset::IAsset::castDown(assets[0]); + +// params.shader.shader = spvShader.get(); + +// If you compiled with `-T lib_6_x`, pick the entry point at pipeline creation time (e.g. `params.shader.entryPoint = "main";`). +``` + +
From 25370497736a02f5b175f34b38b91930ad3a7eba Mon Sep 17 00:00:00 2001 From: kevyuu Date: Fri, 19 Dec 2025 18:15:57 +0700 Subject: [PATCH 155/157] Remove duplicate partial specialization for truncate and emulated_vec --- .../nbl/builtin/hlsl/emulated/vector_t.hlsl | 47 ------------------- 1 file changed, 47 deletions(-) diff --git a/include/nbl/builtin/hlsl/emulated/vector_t.hlsl b/include/nbl/builtin/hlsl/emulated/vector_t.hlsl index 4eb8b7bf06..25b033c30e 100644 --- a/include/nbl/builtin/hlsl/emulated/vector_t.hlsl +++ b/include/nbl/builtin/hlsl/emulated/vector_t.hlsl @@ -587,53 +587,6 @@ NBL_EMULATED_VEC_TO_EMULATED_VEC_STATIC_CAST(4) #undef NBL_EMULATED_VEC_TO_EMULATED_VEC_STATIC_CAST -#define NBL_EMULATED_VEC_PROMOTION(N) template\ -struct Promote, ComponentType>\ -{\ - using VecType = emulated_vector_t##N ;\ - NBL_CONSTEXPR_FUNC VecType operator()(NBL_CONST_REF_ARG(ComponentType) v)\ - {\ - array_set setter;\ - VecType promoted;\ - [[unroll]]\ - for (int i = 0; i < N; ++i)\ - setter(promoted, i, v);\ - return promoted;\ - }\ -}; - -NBL_EMULATED_VEC_PROMOTION(2) -NBL_EMULATED_VEC_PROMOTION(3) -NBL_EMULATED_VEC_PROMOTION(4) - -#undef NBL_EMULATED_VEC_PROMOTION - -#define NBL_EMULATED_VEC_TRUNCATION(N, M) template\ -struct Truncate, emulated_vector_t##M >\ -{\ - using OutputVecType = emulated_vector_t##N ;\ - using InputVecType = emulated_vector_t##M ;\ - NBL_CONSTEXPR_FUNC OutputVecType operator()(NBL_CONST_REF_ARG(InputVecType) vec)\ - {\ - array_get getter;\ - array_set setter;\ - OutputVecType output;\ - [[unroll]]\ - for (int i = 0; i < N; ++i)\ - setter(output, i, getter(vec, i));\ - return output;\ - }\ -}; - -NBL_EMULATED_VEC_TRUNCATION(2, 2) -NBL_EMULATED_VEC_TRUNCATION(2, 3) -NBL_EMULATED_VEC_TRUNCATION(2, 4) -NBL_EMULATED_VEC_TRUNCATION(3, 3) -NBL_EMULATED_VEC_TRUNCATION(3, 4) -NBL_EMULATED_VEC_TRUNCATION(4, 4) - -#undef NBL_EMULATED_VEC_TRUNCATION - } //namespace impl } From 993032c01e2890934021af8a0525eda310cd984e Mon Sep 17 00:00:00 2001 From: Karim Mohamed Date: Sat, 20 Dec 2025 10:19:17 +0300 Subject: [PATCH 156/157] update examples submodule --- examples_tests | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples_tests b/examples_tests index 12486d4670..1961a898fd 160000 --- a/examples_tests +++ b/examples_tests @@ -1 +1 @@ -Subproject commit 12486d4670f0453722351814996d91f198a16749 +Subproject commit 1961a898fd0a91c8e4d5c1a3fcb02df9142e8388 From 6ab99fac0405cda68d5e7a05d9dc8a9a99ca556b Mon Sep 17 00:00:00 2001 From: Przemog1 Date: Sat, 20 Dec 2025 11:44:51 +0100 Subject: [PATCH 157/157] Updated DXC --- 3rdparty/dxc/dxc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/3rdparty/dxc/dxc b/3rdparty/dxc/dxc index ecd3f93521..d76c7890b1 160000 --- a/3rdparty/dxc/dxc +++ b/3rdparty/dxc/dxc @@ -1 +1 @@ -Subproject commit ecd3f93521f1aceabff64b14857f47f9a32c9958 +Subproject commit d76c7890b19ce0b344ee0ce116dbc1c92220ccea