diff --git a/3rdparty/dxc/dxc b/3rdparty/dxc/dxc
index dafad1d9a3..d76c7890b1 160000
--- a/3rdparty/dxc/dxc
+++ b/3rdparty/dxc/dxc
@@ -1 +1 @@
-Subproject commit dafad1d9a370d17ac9ce69928ef518f842cb5191
+Subproject commit d76c7890b19ce0b344ee0ce116dbc1c92220ccea
diff --git a/3rdparty/gli b/3rdparty/gli
index c4e6446d3b..2749a197e8 160000
--- a/3rdparty/gli
+++ b/3rdparty/gli
@@ -1 +1 @@
-Subproject commit c4e6446d3b646538026fd5a95533daed952878d4
+Subproject commit 2749a197e88f94858f4108732824b3790064f6ec
diff --git a/3rdparty/glm b/3rdparty/glm
index 2d4c4b4dd3..8f6213d379 160000
--- a/3rdparty/glm
+++ b/3rdparty/glm
@@ -1 +1 @@
-Subproject commit 2d4c4b4dd31fde06cfffad7915c2b3006402322f
+Subproject commit 8f6213d379a904f5ae910e09a114e066e25faf57
diff --git a/CMakeLists.txt b/CMakeLists.txt
index bedb9f1dc2..2235512d1f 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -28,7 +28,7 @@ include(ExternalProject)
 include(${CMAKE_CURRENT_SOURCE_DIR}/cmake/toolchains/android/build.cmake)
 
 project(Nabla
-	VERSION 0.8.0.1 
+	VERSION 0.9.0.0 
 	HOMEPAGE_URL "https://www.devsh.eu/nabla"
 	LANGUAGES CXX C
 )
diff --git a/cmake/common.cmake b/cmake/common.cmake
index 645837aaaa..16ea1aee06 100755
--- a/cmake/common.cmake
+++ b/cmake/common.cmake
@@ -1144,6 +1144,12 @@ define_property(TARGET PROPERTY NBL_MOUNT_POINT_DEFINES
 	BRIEF_DOCS "List of preprocessor defines with mount points"
 )
 
+option(NSC_DEBUG_EDIF_FILE_BIT "Add \"-fspv-debug=file\" to NSC Debug CLI" ON)
+option(NSC_DEBUG_EDIF_SOURCE_BIT "Add \"-fspv-debug=source\" to NSC Debug CLI" OFF)
+option(NSC_DEBUG_EDIF_LINE_BIT "Add \"-fspv-debug=line\" to NSC Debug CLI" OFF)
+option(NSC_DEBUG_EDIF_TOOL_BIT "Add \"-fspv-debug=tool\" to NSC Debug CLI" ON)
+option(NSC_DEBUG_EDIF_NON_SEMANTIC_BIT "Add \"-fspv-debug=vulkan-with-source\" to NSC Debug CLI" OFF)
+
 function(NBL_CREATE_NSC_COMPILE_RULES)
     set(COMMENT "this code has been autogenerated with Nabla CMake NBL_CREATE_HLSL_COMPILE_RULES utility")
     set(DEVICE_CONFIG_VIEW
@@ -1178,9 +1184,34 @@ struct DeviceConfigCaps
 		-enable-16bit-types 
 		-Zpr 
 		-spirv 
-		-fspv-target-env=vulkan1.3
+		-fspv-target-env=vulkan1.3 
+		-Wshadow 
+		-Wconversion 
+		$<$<CONFIG:Debug>:-O0> 
+		$<$<CONFIG:Release>:-O3> 
+		$<$<CONFIG:RelWithDebInfo>:-O3> 
 	)
 
+	if(NSC_DEBUG_EDIF_FILE_BIT)
+    	list(APPEND REQUIRED_OPTIONS $<$<CONFIG:Debug>:-fspv-debug=file>)
+	endif()
+	
+	if(NSC_DEBUG_EDIF_SOURCE_BIT)
+	    list(APPEND REQUIRED_OPTIONS $<$<CONFIG:Debug>:-fspv-debug=source>)
+	endif()
+	
+	if(NSC_DEBUG_EDIF_LINE_BIT)
+	    list(APPEND REQUIRED_OPTIONS $<$<CONFIG:Debug>:-fspv-debug=line>)
+	endif()
+	
+	if(NSC_DEBUG_EDIF_TOOL_BIT)
+	    list(APPEND REQUIRED_OPTIONS $<$<CONFIG:Debug>:-fspv-debug=tool>)
+	endif()
+	
+	if(NSC_DEBUG_EDIF_NON_SEMANTIC_BIT)
+	    list(APPEND REQUIRED_OPTIONS $<$<CONFIG:Debug>:-fspv-debug=vulkan-with-source>)
+	endif()
+
 	if(NOT NBL_EMBED_BUILTIN_RESOURCES)
 		list(APPEND REQUIRED_OPTIONS
 			-I "${NBL_ROOT_PATH}/include"
@@ -1210,12 +1241,12 @@ struct DeviceConfigCaps
 
 	get_target_property(HEADER_RULE_GENERATED ${IMPL_TARGET} NBL_HEADER_GENERATED_RULE)
 	if(NOT HEADER_RULE_GENERATED)
-		set(INCLUDE_DIR "$<TARGET_PROPERTY:${IMPL_TARGET},BINARY_DIR>/${IMPL_TARGET}/.cmake/include")
+	    set(INCLUDE_DIR "$<TARGET_PROPERTY:${IMPL_TARGET},BINARY_DIR>/${IMPL_TARGET}/.cmake/include/$<CONFIG>")
 		set(INCLUDE_FILE "${INCLUDE_DIR}/$<TARGET_PROPERTY:${IMPL_TARGET},NBL_HEADER_PATH>")
 		set(INCLUDE_CONTENT $<TARGET_PROPERTY:${IMPL_TARGET},NBL_HEADER_CONTENT>)
 
 		file(GENERATE OUTPUT ${INCLUDE_FILE}
-			CONTENT ${INCLUDE_CONTENT}
+			CONTENT $<GENEX_EVAL:${INCLUDE_CONTENT}>
 			TARGET ${IMPL_TARGET}
 		)
 
@@ -1277,17 +1308,22 @@ namespace @IMPL_NAMESPACE@ {
     foreach(INDEX RANGE ${LAST_INDEX})
         string(JSON INPUT GET "${IMPL_INPUTS}" ${INDEX} INPUT)
 		string(JSON BASE_KEY GET "${IMPL_INPUTS}" ${INDEX} KEY)
-        string(JSON COMPILE_OPTIONS_LENGTH LENGTH "${IMPL_INPUTS}" ${INDEX} COMPILE_OPTIONS)
-
+        
         set(COMPILE_OPTIONS "")
-        math(EXPR LAST_CO "${COMPILE_OPTIONS_LENGTH} - 1")
-        foreach(COMP_IDX RANGE 0 ${LAST_CO})
-            string(JSON COMP_ITEM GET "${IMPL_INPUTS}" ${INDEX} COMPILE_OPTIONS ${COMP_IDX})
-            list(APPEND COMPILE_OPTIONS "${COMP_ITEM}")
-        endforeach()
+		string(JSON HAS_COMPILE_OPTIONS ERROR_VARIABLE ERROR_VAR TYPE "${IMPL_INPUTS}" ${INDEX} COMPILE_OPTIONS)
+		if(HAS_COMPILE_OPTIONS STREQUAL "ARRAY")
+			string(JSON COMPILE_OPTIONS_LENGTH LENGTH "${IMPL_INPUTS}" ${INDEX} COMPILE_OPTIONS)
+			if(NOT COMPILE_OPTIONS_LENGTH EQUAL 0)
+				math(EXPR LAST_CO "${COMPILE_OPTIONS_LENGTH} - 1")
+				foreach(COMP_IDX RANGE 0 ${LAST_CO})
+					string(JSON COMP_ITEM GET "${IMPL_INPUTS}" ${INDEX} COMPILE_OPTIONS ${COMP_IDX})
+					list(APPEND COMPILE_OPTIONS "${COMP_ITEM}")
+				endforeach()
+			endif()
+		endif()
 
 		set(DEPENDS_ON "")
-        string(JSON HAS_DEPENDS TYPE "${IMPL_INPUTS}" ${INDEX} DEPENDS)
+        string(JSON HAS_DEPENDS ERROR_VARIABLE ERROR_VAR TYPE "${IMPL_INPUTS}" ${INDEX} DEPENDS)
         if(HAS_DEPENDS STREQUAL "ARRAY")
             string(JSON DEPENDS_LENGTH LENGTH "${IMPL_INPUTS}" ${INDEX} DEPENDS)
             if(NOT DEPENDS_LENGTH EQUAL 0)
@@ -1305,7 +1341,7 @@ namespace @IMPL_NAMESPACE@ {
 
         set(HAS_CAPS FALSE)
         set(CAPS_LENGTH 0)
-        string(JSON CAPS_TYPE TYPE "${IMPL_INPUTS}" ${INDEX} CAPS)
+        string(JSON CAPS_TYPE ERROR_VARIABLE ERROR_VAR TYPE "${IMPL_INPUTS}" ${INDEX} CAPS)
         if(CAPS_TYPE STREQUAL "ARRAY")
             string(JSON CAPS_LENGTH LENGTH "${IMPL_INPUTS}" ${INDEX} CAPS)
             if(NOT CAPS_LENGTH EQUAL 0)
@@ -1323,12 +1359,27 @@ namespace @IMPL_NAMESPACE@ {
 
         set(CAP_NAMES "")
         set(CAP_TYPES "")
+		set(CAP_KINDS "")
         if(HAS_CAPS)
             math(EXPR LAST_CAP "${CAPS_LENGTH} - 1")
             foreach(CAP_IDX RANGE 0 ${LAST_CAP})
+				string(JSON CAP_KIND ERROR_VARIABLE CAP_TYPE_ERROR GET "${IMPL_INPUTS}" ${INDEX} CAPS ${CAP_IDX} kind)
                 string(JSON CAP_NAME GET "${IMPL_INPUTS}" ${INDEX} CAPS ${CAP_IDX} name)
                 string(JSON CAP_TYPE GET "${IMPL_INPUTS}" ${INDEX} CAPS ${CAP_IDX} type)
 
+				# -> TODO: improve validation, input should be string
+				if(CAP_TYPE_ERROR)
+					set(CAP_KIND limits) # I assume its limit by default (or when invalid value present, currently)
+				else()
+					if(NOT CAP_KIND MATCHES "^(limits|features)$")
+						ERROR_WHILE_PARSING_ITEM(
+							"Invalid CAP kind \"${CAP_KIND}\" for ${CAP_NAME}\n"
+							"Allowed kinds are: limits, features"
+						)
+					endif()
+				endif()
+				# <-
+
 				if(NOT CAP_TYPE MATCHES "^(bool|uint16_t|uint32_t|uint64_t)$")
 					ERROR_WHILE_PARSING_ITEM(
 						"Invalid CAP type \"${CAP_TYPE}\" for ${CAP_NAME}\n"
@@ -1366,6 +1417,7 @@ namespace @IMPL_NAMESPACE@ {
                 set(CAP_VALUES_${CAP_IDX} "${VALUES}")
                 list(APPEND CAP_NAMES "${CAP_NAME}")
                 list(APPEND CAP_TYPES "${CAP_TYPE}")
+				list(APPEND CAP_KINDS "${CAP_KIND}")
             endforeach()
         endif()
 
@@ -1399,68 +1451,81 @@ namespace @IMPL_NAMESPACE@ {
 		nbl::core::string retval = "@BASE_KEY@";
 @RETVAL_EVAL@
 		retval += ".spv";
-		return retval;
+		return "$<CONFIG>/" + retval;
 	}
 }
 
 ]=])
 		unset(RETVAL_EVAL)
-		foreach(CAP ${CAP_NAMES})
-			string(CONFIGURE [=[
-		retval += ".@CAP@_" + std::to_string(limits.@CAP@);
-]=] RETVALUE_VIEW @ONLY)
-			string(APPEND RETVAL_EVAL "${RETVALUE_VIEW}")
-		endforeach(CAP)
+		list(LENGTH CAP_NAMES CAP_COUNT)
+		if(CAP_COUNT GREATER 0)
+			math(EXPR LAST_CAP "${CAP_COUNT} - 1")
+			foreach(i RANGE ${LAST_CAP})
+				list(GET CAP_NAMES ${i} CAP)
+				list(GET CAP_KINDS ${i} KIND)
+				string(CONFIGURE [=[
+		retval += ".@CAP@_" + std::to_string(@KIND@.@CAP@);
+]=] 			RETVALUE_VIEW @ONLY)
+				string(APPEND RETVAL_EVAL "${RETVALUE_VIEW}")
+			endforeach()
+		endif()
+		
 		string(CONFIGURE "${HEADER_ITEM_VIEW}" HEADER_ITEM_EVAL @ONLY)
 		set_property(TARGET ${IMPL_TARGET} APPEND_STRING PROPERTY NBL_HEADER_CONTENT "${HEADER_ITEM_EVAL}")
 		
 		function(GENERATE_KEYS PREFIX CAP_INDEX CAPS_EVAL_PART)
 			if(NUM_CAPS EQUAL 0 OR CAP_INDEX EQUAL ${NUM_CAPS})
+			# generate .config file
 				set(FINAL_KEY "${BASE_KEY}${PREFIX}.spv") # always add ext even if its already there to make sure asset loader always is able to load as IShader
-
-				set(TARGET_OUTPUT "${IMPL_BINARY_DIR}/${FINAL_KEY}")
-				set(CONFIG_FILE "${TARGET_OUTPUT}.config")
+				set(CONFIG_FILE_TARGET_OUTPUT "${IMPL_BINARY_DIR}/${FINAL_KEY}")
+				set(CONFIG_FILE "${CONFIG_FILE_TARGET_OUTPUT}.config")
 				set(CAPS_EVAL "${CAPS_EVAL_PART}")
-
 				string(CONFIGURE "${DEVICE_CONFIG_VIEW}" CONFIG_CONTENT @ONLY)
 				file(WRITE "${CONFIG_FILE}" "${CONFIG_CONTENT}")
 
-				set(NBL_NSC_COMPILE_COMMAND
-					"$<TARGET_FILE:nsc>"
-					-Fc "${TARGET_OUTPUT}"
-					${COMPILE_OPTIONS} ${REQUIRED_OPTIONS} ${IMPL_COMMON_OPTIONS}
-					"${CONFIG_FILE}"
-				)
-
-				add_custom_command(OUTPUT "${TARGET_OUTPUT}"
-					COMMAND ${NBL_NSC_COMPILE_COMMAND}
-					DEPENDS ${DEPENDS_ON}
-					COMMENT "Creating \"${TARGET_OUTPUT}\""
-					VERBATIM
-					COMMAND_EXPAND_LISTS
-				)
-
-				set(HEADER_ONLY_LIKE "${CONFIG_FILE}" "${TARGET_INPUT}" "${TARGET_OUTPUT}")
-				target_sources(${IMPL_TARGET} PRIVATE ${HEADER_ONLY_LIKE})
-
-				set_source_files_properties(${HEADER_ONLY_LIKE} PROPERTIES 
-					HEADER_FILE_ONLY ON
-					VS_TOOL_OVERRIDE None
-				)
-
-				set_source_files_properties("${TARGET_OUTPUT}" PROPERTIES
-					NBL_SPIRV_REGISTERED_INPUT "${TARGET_INPUT}"
-					NBL_SPIRV_PERMUTATION_CONFIG "${CONFIG_FILE}"
-					NBL_SPIRV_BINARY_DIR "${IMPL_BINARY_DIR}"
-					NBL_SPIRV_ACCESS_KEY "${FINAL_KEY}"
-				)
-				
-				set_property(TARGET ${IMPL_TARGET} APPEND PROPERTY NBL_SPIRV_OUTPUTS "${TARGET_OUTPUT}")
+				# generate keys and commands for compiling shaders
+				foreach(BUILD_CONFIGURATION ${CMAKE_CONFIGURATION_TYPES})
+					set(FINAL_KEY_REL_PATH "${BUILD_CONFIGURATION}/${FINAL_KEY}")
+					set(TARGET_OUTPUT "${IMPL_BINARY_DIR}/${FINAL_KEY_REL_PATH}")
+
+					set(NBL_NSC_COMPILE_COMMAND
+						"$<TARGET_FILE:nsc>"
+						-Fc "${TARGET_OUTPUT}"
+						${COMPILE_OPTIONS} ${REQUIRED_OPTIONS} ${IMPL_COMMON_OPTIONS}
+						"${CONFIG_FILE}"
+					)
+
+					add_custom_command(OUTPUT "${TARGET_OUTPUT}"
+						COMMAND ${NBL_NSC_COMPILE_COMMAND}
+						DEPENDS ${DEPENDS_ON}
+						COMMENT "Creating \"${TARGET_OUTPUT}\""
+						VERBATIM
+						COMMAND_EXPAND_LISTS
+					)
+
+					set(HEADER_ONLY_LIKE "${CONFIG_FILE}" "${TARGET_INPUT}" "${TARGET_OUTPUT}")
+					target_sources(${IMPL_TARGET} PRIVATE ${HEADER_ONLY_LIKE})
+
+					set_source_files_properties(${HEADER_ONLY_LIKE} PROPERTIES 
+						HEADER_FILE_ONLY ON
+						VS_TOOL_OVERRIDE None
+					)
+
+					set_source_files_properties("${TARGET_OUTPUT}" PROPERTIES
+						NBL_SPIRV_REGISTERED_INPUT "${TARGET_INPUT}"
+						NBL_SPIRV_PERMUTATION_CONFIG "${CONFIG_FILE}"
+						NBL_SPIRV_BINARY_DIR "${IMPL_BINARY_DIR}"
+						NBL_SPIRV_ACCESS_KEY "${FINAL_KEY_REL_PATH}"
+					)
+
+					set_property(TARGET ${IMPL_TARGET} APPEND PROPERTY NBL_SPIRV_OUTPUTS "${TARGET_OUTPUT}")
+					endforeach()
 				return()
 			endif()
 
 			list(GET CAP_NAMES ${CAP_INDEX} CURRENT_CAP)
 			list(GET CAP_TYPES ${CAP_INDEX} CURRENT_TYPE)
+			list(GET CAP_KINDS ${CAP_INDEX} CURRENT_KIND)
 			set(VAR_NAME "CAP_VALUES_${CAP_INDEX}")
 			set(VALUES "${${VAR_NAME}}")
 
@@ -1534,4 +1599,4 @@ function(NBL_CREATE_RESOURCE_ARCHIVE)
 	if(IMPL_LINK_TO)
 		LINK_BUILTIN_RESOURCES_TO_TARGET(${IMPL_LINK_TO} ${IMPL_TARGET})
 	endif()
-endfunction()
\ No newline at end of file
+endfunction()
diff --git a/docs/nsc-prebuilds.md b/docs/nsc-prebuilds.md
new file mode 100644
index 0000000000..4d57d7a8de
--- /dev/null
+++ b/docs/nsc-prebuilds.md
@@ -0,0 +1,386 @@
+# NSC prebuilds (build-time HLSL -> SPIR-V)
+
+This document explains how to use `NBL_CREATE_NSC_COMPILE_RULES` together with `NBL_CREATE_RESOURCE_ARCHIVE` to:
+
+- Compile HLSL to SPIR-V at **build time** (via the `nsc` tool).
+- Optionally generate **device-cap permutations** (limits/features "CAPS").
+- Generate a small C++ header with **type-safe key getters** (`get_spirv_key<...>()`).
+- Make the same code work with `NBL_EMBED_BUILTIN_RESOURCES` **ON** (embedded virtual archive) and **OFF** (mounted build directory) when loading your precompiled SPIR-V at runtime.
+
+Definitions live in `cmake/common.cmake` (`NBL_CREATE_NSC_COMPILE_RULES`, `NBL_CREATE_RESOURCE_ARCHIVE`).
+
+## Runtime mounting requirement (important)
+
+All of this assumes your app mounts the directory/archive containing the NSC outputs (i.e. `BINARY_DIR`) into Nabla's virtual filesystem, then loads files via keys that are relative to that mounted root (the examples use `app_resources`).
+
+The examples "just work" because they inherit from `nbl::examples::BuiltinResourcesApplication`, which mounts:
+
+- `NBL_EMBED_BUILTIN_RESOURCES=OFF`: `system::CMountDirectoryArchive(NBL_THIS_EXAMPLE_BUILD_MOUNT_POINT, ...)` at `app_resources`
+- `NBL_EMBED_BUILTIN_RESOURCES=ON`: the generated embedded archive (e.g. `nbl::this_example::builtin::build::CArchive`) at `app_resources`
+
+If you're writing your own app/extension and don't use `BuiltinResourcesApplication`, you must mount equivalently yourself (split by `NBL_EMBED_BUILTIN_RESOURCES`). Optionally set `IAssetLoader::SAssetLoadParams::workingDirectory` to whatever virtual root you want to load from.
+
+The `MOUNT_POINT_DEFINE` argument of `NBL_CREATE_NSC_COMPILE_RULES` defines a C/C++ macro whose value is the absolute path to the NSC output directory (`BINARY_DIR`) that you mount when builtins are off (in examples it's `NBL_THIS_EXAMPLE_BUILD_MOUNT_POINT`).
+
+See `examples_tests/common/include/nbl/examples/common/BuiltinResourcesApplication.hpp` for the exact mounting logic.
+
+## Why build-time NSC instead of runtime compilation?
+
+Build-time compilation is usually preferable because it:
+
+- Uses your build system's parallelism (Ninja/MSBuild jobs) to compile shaders quickly.
+- Writes **only into the build tree** (no source tree pollution, easy clean/reconfigure).
+- Lets CI validate "shaders compile" as part of a normal build.
+- Enables fast runtime iteration: at runtime you only **pick** the right SPIR-V, you don't compile it.
+- Makes shader compilation deterministic and reproducible (toolchain + flags captured by the build).
+
+Runtime compilation is still useful for prototyping, but (assuming you don't use a runtime shader cache) it can make startup slower and shift failures to runtime instead of CI/build (a cache can hide the repeated cost on subsequent runs; our current one has some rough edges: it writes into the source tree and has issues when compiling many inputs from the same source directory).
+
+## What `NBL_CREATE_NSC_COMPILE_RULES` produces
+
+For each registered input it generates:
+
+- One `.spv` output **per CMake configuration** (`Debug/`, `Release/`, `RelWithDebInfo/`).
+- If you use `CAPS`, it generates a **cartesian product** of permutations and emits a `.spv` for each.
+- A generated header (you choose the path via `INCLUDE`) containing:
+  - a primary template `get_spirv_key<Key>(limits, features)` and `get_spirv_key<Key>(device)`
+  - explicit specializations for each registered base `KEY`
+  - the returned key already includes the build config prefix (compiled into the header).
+
+Keys are strings that match the output layout:
+
+```
+<CONFIG>/<KEY>(.<capName>_<value>)(.<capName>_<value>)....spv
+```
+
+## The JSON "INPUTS" format
+
+`INPUTS` is a JSON array of objects. Each object supports:
+
+- `INPUT` (string, required): path to `.hlsl` (relative to `CMAKE_CURRENT_SOURCE_DIR` or absolute).
+- `KEY` (string, required): base key (prefer without `.spv`; it is always appended, so using `foo.spv` will result in `foo.spv.spv`).
+- `COMPILE_OPTIONS` (array of strings, optional): per-input extra options (e.g. `["-T","cs_6_8"]`).
+- `DEPENDS` (array of strings, optional): per-input dependencies (extra files that should trigger rebuild).
+- `CAPS` (array, optional): permutation caps (see below).
+
+You can register many rules in a single call, and you can call the function multiple times to append rules to the same `TARGET`.
+
+## Compile options (generator expressions, defaults, debug info)
+
+`NBL_CREATE_NSC_COMPILE_RULES` combines options from multiple sources:
+
+- Built-in defaults from the helper (see `cmake/common.cmake`): HLSL version, Vulkan SPIR-V target env, scalar layout, warnings, and per-config optimization flags (e.g. `-O0` for Debug, `-O3` for Release) implemented via CMake generator expressions.
+- Global extra options via `COMMON_OPTIONS` (CMake list).
+- Per-input extra options via JSON `COMPILE_OPTIONS` (array of strings).
+
+Both `COMMON_OPTIONS` and JSON `COMPILE_OPTIONS` support CMake generator expressions like `$<$<CONFIG:Debug>:...>` (the helper uses them itself), so you can make flags configuration-dependent when needed.
+
+### Debug info for RenderDoc
+
+The helper also exposes CMake options that append NSC debug flags **only for Debug config** (via generator expressions). Enable them if you want RenderDoc to show source/line information instead of just raw disassembly:
+
+- `NSC_DEBUG_EDIF_FILE_BIT` (default `ON`) -> `-fspv-debug=file`
+- `NSC_DEBUG_EDIF_TOOL_BIT` (default `ON`) -> `-fspv-debug=tool`
+- `NSC_DEBUG_EDIF_SOURCE_BIT` (default `OFF`) -> `-fspv-debug=source`
+- `NSC_DEBUG_EDIF_LINE_BIT` (default `OFF`) -> `-fspv-debug=line`
+- `NSC_DEBUG_EDIF_NON_SEMANTIC_BIT` (default `OFF`) -> `-fspv-debug=vulkan-with-source`
+
+## Source files and rebuild dependencies (important)
+
+Make sure shader inputs and includes are:
+
+1. Marked as header-only on your target (so the IDE shows them, but the build system doesn't try to compile them with default HLSL rules like `fxc`):
+
+```cmake
+target_sources(${EXECUTABLE_NAME} PRIVATE ${DEPENDS})
+set_source_files_properties(${DEPENDS} PROPERTIES HEADER_FILE_ONLY ON)
+```
+
+2. Listed as dependencies of the NSC custom commands (so editing any of them triggers a rebuild of the `.spv` outputs).
+
+This is what the `DEPENDS` argument of `NBL_CREATE_NSC_COMPILE_RULES` (and/or per-input JSON `DEPENDS`) is for. Always include the main `INPUT` file itself and any files it includes; otherwise the build system might not re-run `nsc` when you change them.
+
+## Minimal usage (no permutations)
+
+Example pattern (as in `examples_tests/27_MPMCScheduler/CMakeLists.txt`):
+
+```cmake
+set(OUTPUT_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}/auto-gen")
+set(DEPENDS
+  app_resources/common.hlsl
+  app_resources/shader.comp.hlsl
+)
+target_sources(${EXECUTABLE_NAME} PRIVATE ${DEPENDS})
+set_source_files_properties(${DEPENDS} PROPERTIES HEADER_FILE_ONLY ON)
+
+set(JSON [=[
+[
+  {
+    "INPUT": "app_resources/shader.comp.hlsl",
+    "KEY": "shader",
+    "COMPILE_OPTIONS": ["-T", "cs_6_8"],
+    "DEPENDS": [],
+    "CAPS": []
+  }
+]
+]=])
+
+NBL_CREATE_NSC_COMPILE_RULES(
+  TARGET ${EXECUTABLE_NAME}SPIRV
+  LINK_TO ${EXECUTABLE_NAME}
+  DEPENDS ${DEPENDS}
+  BINARY_DIR ${OUTPUT_DIRECTORY}
+  MOUNT_POINT_DEFINE NBL_THIS_EXAMPLE_BUILD_MOUNT_POINT
+  COMMON_OPTIONS -I ${CMAKE_CURRENT_SOURCE_DIR}
+  OUTPUT_VAR KEYS
+  INCLUDE nbl/this_example/builtin/build/spirv/keys.hpp
+  NAMESPACE nbl::this_example::builtin::build
+  INPUTS ${JSON}
+)
+```
+
+Then include the generated header and use the key to load the SPIR-V:
+
+```cpp
+#include "nbl/this_example/builtin/build/spirv/keys.hpp"
+// ...
+auto key = nbl::this_example::builtin::build::get_spirv_key<"shader">(device);
+auto bundle = assetMgr->getAsset(key.c_str(), loadParams);
+```
+
+`OUTPUT_VAR` (here: `KEYS`) is assigned the list of **all** produced access keys (all configurations + all permutations). This list is intended to be fed into `NBL_CREATE_RESOURCE_ARCHIVE(BUILTINS ${KEYS})`.
+
+## Permutations via `CAPS`
+
+`CAPS` lets you prebuild multiple SPIR-V variants parameterized by device limits or features.
+
+Each `CAPS` entry looks like:
+
+- `kind` (string, optional): `"limits"` or `"features"` (defaults to `"limits"` if omitted/invalid).
+- `name` (string, required): identifier used in both generated HLSL config and C++ key (must be a valid C/C++ identifier).
+- `type` (string, required): `bool`, `uint16_t`, `uint32_t`, `uint64_t`.
+- `values` (array of numbers, required): the values you want to prebuild.
+  - for `bool`, values must be `0` or `1`.
+
+At build time, NSC compiles each combination of values (cartesian product). At runtime, `get_spirv_key` appends suffixes using the `limits`/`features` you pass in.
+
+### Example: mixing `limits` and `features`
+
+This example permutes over one device limit and one device feature (order matters: the suffix order matches the `CAPS` array order):
+
+```cmake
+set(JSON [=[
+[
+  {
+    "INPUT": "app_resources/shader.hlsl",
+    "KEY": "shader",
+    "COMPILE_OPTIONS": ["-T", "lib_6_8"],
+    "DEPENDS": ["app_resources/common.hlsl"],
+    "CAPS": [
+      {
+        "kind": "limits",
+        "name": "maxComputeSharedMemorySize",
+        "type": "uint32_t",
+        "values": [16384, 32768, 65536]
+      },
+      {
+        "kind": "features",
+        "name": "shaderFloat64",
+        "type": "bool",
+        "values": [0, 1]
+      }
+    ]
+  }
+]
+]=])
+
+NBL_CREATE_NSC_COMPILE_RULES(
+  # ...
+  OUTPUT_VAR KEYS
+  INPUTS ${JSON}
+)
+```
+
+This produces `3 * 2 = 6` permutations per build configuration, and `KEYS` contains all of them (for example):
+
+```
+Debug/shader.maxComputeSharedMemorySize_16384.shaderFloat64_0.spv
+Debug/shader.maxComputeSharedMemorySize_16384.shaderFloat64_1.spv
+...
+```
+
+Practical tip: for numeric limits you often want to "bucket" real device values into one of the prebuilt values. The CountingSort example does exactly that:
+
+- CMake definition: `examples_tests/10_CountingSort/CMakeLists.txt`
+- Runtime bucketing: `examples_tests/10_CountingSort/main.cpp`
+
+```cpp
+auto limits = m_physicalDevice->getLimits();
+constexpr std::array<uint32_t, 3u> AllowedMaxComputeSharedMemorySizes = { 16384, 32768, 65536 };
+
+auto upperBoundSharedMemSize = std::upper_bound(
+	AllowedMaxComputeSharedMemorySizes.begin(), AllowedMaxComputeSharedMemorySizes.end(), limits.maxComputeSharedMemorySize
+);
+// devices which support less than 16KB of max compute shared memory size are not supported
+if (upperBoundSharedMemSize == AllowedMaxComputeSharedMemorySizes.begin())
+{
+	m_logger->log("maxComputeSharedMemorySize is too low (%u)", ILogger::E_LOG_LEVEL::ELL_ERROR, limits.maxComputeSharedMemorySize);
+	exit(0);
+}
+
+limits.maxComputeSharedMemorySize = *(upperBoundSharedMemSize - 1);
+
+auto key = nbl::this_example::builtin::build::get_spirv_key<"prefix_sum_shader">(limits, m_physicalDevice->getFeatures());
+```
+
+## Pairing with `NBL_CREATE_RESOURCE_ARCHIVE` (works with builtins ON/OFF)
+
+The recommended pattern is to always call `NBL_CREATE_RESOURCE_ARCHIVE` right after the NSC rules, using the produced `KEYS` list:
+
+```cmake
+NBL_CREATE_RESOURCE_ARCHIVE(
+  TARGET ${EXECUTABLE_NAME}_builtinsBuild
+  LINK_TO ${EXECUTABLE_NAME}
+  BIND ${OUTPUT_DIRECTORY}
+  BUILTINS ${KEYS}
+  NAMESPACE nbl::this_example::builtin::build
+)
+```
+
+### How `BINARY_DIR`, `MOUNT_POINT_DEFINE`, and `BIND` fit together
+
+- In `NBL_CREATE_NSC_COMPILE_RULES`, `BINARY_DIR` is the output directory where NSC writes the compiled files:
+  - `${BINARY_DIR}/<CONFIG>/<KEY>....spv`
+- In `NBL_CREATE_NSC_COMPILE_RULES`, `MOUNT_POINT_DEFINE` is the *name* of a C/C++ preprocessor define whose value is set to the **absolute path** of `BINARY_DIR`.
+  - Example: `MOUNT_POINT_DEFINE NBL_THIS_EXAMPLE_BUILD_MOUNT_POINT` results in something like `-DNBL_THIS_EXAMPLE_BUILD_MOUNT_POINT="C:/.../auto-gen"` on the target.
+  - Keys returned by `get_spirv_key<...>()` are relative to that directory; the full path on disk is:
+    - `${NBL_THIS_EXAMPLE_BUILD_MOUNT_POINT}/<key>`
+- In `NBL_CREATE_RESOURCE_ARCHIVE`, `BIND` should point at the same directory as `BINARY_DIR`.
+  - The `BUILTINS` list entries must be relative to `BIND`.
+  - This is why pairing it with `OUTPUT_VAR KEYS` works: `KEYS` is exactly the list of relative paths under `BINARY_DIR` that were generated by the NSC rules, so the archive generator knows what to serialize/embed.
+
+This is designed to work in both modes:
+
+- `NBL_EMBED_BUILTIN_RESOURCES=OFF`:
+  - `NBL_CREATE_RESOURCE_ARCHIVE` becomes a no-op (creates a dummy interface target).
+  - You load SPIR-V from the **build directory** mounted into the virtual filesystem.
+  - `MOUNT_POINT_DEFINE` provides an absolute path (e.g. `NBL_THIS_EXAMPLE_BUILD_MOUNT_POINT`) for mounting.
+- `NBL_EMBED_BUILTIN_RESOURCES=ON`:
+  - `NBL_CREATE_RESOURCE_ARCHIVE` generates a small library that embeds the listed files into a virtual archive and emits `.../CArchive.h` under the requested `NAMESPACE`.
+  - You mount the embedded archive instead of a directory; runtime loading code stays the same (keys don't change).
+
+## Notes / gotchas
+
+- `INCLUDE` must be a **relative** path (it is emitted under the build tree and added to include dirs automatically).
+- Prefer not to include `.spv` in `KEY` (the extension is appended unconditionally); if you do, you'll just get `.spv.spv` in the final filename/key (not an error, just not what you want).
+- You can mix:
+  - per-input `COMPILE_OPTIONS` (inside JSON), and
+  - global `COMMON_OPTIONS` (CMake list after `COMMON_OPTIONS`).
+
+## Troubleshooting (no logs / silent NSC failures)
+
+Sometimes an NSC compile rule fails during the build, but the build output doesn't show a useful log. In that case, run the failing command under a debugger:
+
+1. Open the generated Visual Studio solution and set the `nsc` project/target as the Startup Project.
+2. Open the `nsc` project properties and set **Debugging -> Command Arguments**.
+3. Copy the exact CLI from the failing "NSC Rules" custom command (the one that calls `nsc.exe`) into the Command Arguments field.
+4. Start debugging (`F5`) and reproduce; if needed, put a breakpoint in the HLSL compiler/preprocessor codepath and step until you find the root cause.
+
+If the error looks like a preprocessing issue, note that we use Boost.Wave as the preprocessor; it can have quirky edge cases (e.g. needing a trailing newline/whitespace at the end of a file for correct parsing).
+
+## Best practices
+
+- Prefer compiling to a shader library (`-T lib_6_x`) and using multiple entry points when possible: fewer inputs means fewer compile rules and less build overhead; at runtime you still choose the entry point from the same `.spv`.
+- Treat `CAPS` as a build-time cost multiplier (cartesian product). If the permutation count gets too large (thousands+), prebuilding usually stops paying off; an example of such workload is `examples_tests/23_Arithmetic2UnitTest`.
+
+## Complete example (expand)
+
+<details>
+<summary>NSC rules + archive + runtime key usage</summary>
+
+### CMake (`CMakeLists.txt`)
+
+```cmake
+include(common)
+
+nbl_create_executable_project("" "" "" "")
+
+set(OUTPUT_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}/auto-gen")
+set(DEPENDS
+  app_resources/common.hlsl
+  app_resources/shader.hlsl
+)
+target_sources(${EXECUTABLE_NAME} PRIVATE ${DEPENDS})
+set_source_files_properties(${DEPENDS} PROPERTIES HEADER_FILE_ONLY ON)
+
+set(JSON [=[
+[
+  {
+    "INPUT": "app_resources/shader.hlsl",
+    "KEY": "shader",
+    "COMPILE_OPTIONS": ["-T", "lib_6_8"],
+    "DEPENDS": [],
+    "CAPS": [
+      {
+        "kind": "limits",
+        "name": "maxComputeSharedMemorySize",
+        "type": "uint32_t",
+        "values": [16384, 32768, 65536]
+      },
+      {
+        "kind": "features",
+        "name": "shaderFloat64",
+        "type": "bool",
+        "values": [0, 1]
+      }
+    ]
+  }
+]
+]=])
+
+NBL_CREATE_NSC_COMPILE_RULES(
+  TARGET ${EXECUTABLE_NAME}SPIRV
+  LINK_TO ${EXECUTABLE_NAME}
+  DEPENDS ${DEPENDS}
+  BINARY_DIR ${OUTPUT_DIRECTORY}
+  MOUNT_POINT_DEFINE NBL_THIS_EXAMPLE_BUILD_MOUNT_POINT
+  COMMON_OPTIONS -I ${CMAKE_CURRENT_SOURCE_DIR}
+  OUTPUT_VAR KEYS
+  INCLUDE nbl/this_example/builtin/build/spirv/keys.hpp
+  NAMESPACE nbl::this_example::builtin::build
+  INPUTS ${JSON}
+)
+
+# Works for both NBL_EMBED_BUILTIN_RESOURCES=ON/OFF
+NBL_CREATE_RESOURCE_ARCHIVE(
+  NAMESPACE nbl::this_example::builtin::build
+  TARGET ${EXECUTABLE_NAME}_builtinsBuild
+  LINK_TO ${EXECUTABLE_NAME}
+  BIND ${OUTPUT_DIRECTORY}
+  BUILTINS ${KEYS}
+)
+```
+
+### Runtime usage (C++)
+
+```cpp
+#include "nbl/this_example/builtin/build/spirv/keys.hpp"
+
+// Load relative to the VFS mount (examples mount it at "app_resources")
+asset::IAssetLoader::SAssetLoadParams lp = {};
+lp.workingDirectory = "app_resources";
+
+auto limits = device->getPhysicalDevice()->getLimits();
+limits.maxComputeSharedMemorySize = 32768; // one of the prebuilt values; real code should bucket/clamp with std::upper_bound (see the CountingSort snippet above)
+
+auto key = nbl::this_example::builtin::build::get_spirv_key<"shader">(limits, device->getEnabledFeatures());
+auto bundle = assetMgr->getAsset(key.c_str(), lp);
+const auto assets = bundle.getContents();
+auto spvShader = asset::IAsset::castDown<asset::IShader>(assets[0]);
+
+// params.shader.shader = spvShader.get();
+
+// If you compiled with `-T lib_6_x`, pick the entry point at pipeline creation time (e.g. `params.shader.entryPoint = "main";`).
+```
+
+</details>
diff --git a/examples_tests b/examples_tests
index dd7de7a89c..b5d8abc0e5 160000
--- a/examples_tests
+++ b/examples_tests
@@ -1 +1 @@
-Subproject commit dd7de7a89cfa5a59970dde4d4744ecf746d77a4a
+Subproject commit b5d8abc0e5c4761a3714b2c4a074cb10aaa90573
diff --git a/include/nbl/builtin/hlsl/algorithm.hlsl b/include/nbl/builtin/hlsl/algorithm.hlsl
index 7eca7d51df..66442a11a1 100644
--- a/include/nbl/builtin/hlsl/algorithm.hlsl
+++ b/include/nbl/builtin/hlsl/algorithm.hlsl
@@ -19,7 +19,7 @@ namespace impl
     // TODO: use structs
 
     template<typename T>
-    NBL_CONSTEXPR_INLINE_FUNC void swap(NBL_REF_ARG(T) lhs, NBL_REF_ARG(T) rhs)
+    NBL_CONSTEXPR_FUNC void swap(NBL_REF_ARG(T) lhs, NBL_REF_ARG(T) rhs)
     {
         T tmp = lhs;
         lhs = rhs;
@@ -27,7 +27,7 @@ namespace impl
     }
 
     template<>
-    NBL_CONSTEXPR_INLINE_FUNC void swap(NBL_REF_ARG(uint16_t) lhs, NBL_REF_ARG(uint16_t) rhs)
+    NBL_CONSTEXPR_FUNC void swap(NBL_REF_ARG(uint16_t) lhs, NBL_REF_ARG(uint16_t) rhs)
     {
         lhs ^= rhs;
         rhs ^= lhs;
@@ -35,7 +35,7 @@ namespace impl
     }
 
     template<>
-    NBL_CONSTEXPR_INLINE_FUNC void swap(NBL_REF_ARG(uint32_t) lhs, NBL_REF_ARG(uint32_t) rhs)
+    NBL_CONSTEXPR_FUNC void swap(NBL_REF_ARG(uint32_t) lhs, NBL_REF_ARG(uint32_t) rhs)
     {
         lhs ^= rhs;
         rhs ^= lhs;
@@ -43,7 +43,7 @@ namespace impl
     }
 
     template<>
-    NBL_CONSTEXPR_INLINE_FUNC void swap(NBL_REF_ARG(uint64_t) lhs, NBL_REF_ARG(uint64_t) rhs)
+    NBL_CONSTEXPR_FUNC void swap(NBL_REF_ARG(uint64_t) lhs, NBL_REF_ARG(uint64_t) rhs)
     {
         lhs ^= rhs;
         rhs ^= lhs;
@@ -51,7 +51,7 @@ namespace impl
     }
 
     template<>
-    NBL_CONSTEXPR_INLINE_FUNC void swap(NBL_REF_ARG(int16_t) lhs, NBL_REF_ARG(int16_t) rhs)
+    NBL_CONSTEXPR_FUNC void swap(NBL_REF_ARG(int16_t) lhs, NBL_REF_ARG(int16_t) rhs)
     {
         lhs ^= rhs;
         rhs ^= lhs;
@@ -59,7 +59,7 @@ namespace impl
     }
 
     template<>
-    NBL_CONSTEXPR_INLINE_FUNC void swap(NBL_REF_ARG(int32_t) lhs, NBL_REF_ARG(int32_t) rhs)
+    NBL_CONSTEXPR_FUNC void swap(NBL_REF_ARG(int32_t) lhs, NBL_REF_ARG(int32_t) rhs)
     {
         lhs ^= rhs;
         rhs ^= lhs;
@@ -67,7 +67,7 @@ namespace impl
     }
 
     template<>
-    NBL_CONSTEXPR_INLINE_FUNC void swap(NBL_REF_ARG(int64_t) lhs, NBL_REF_ARG(int64_t) rhs)
+    NBL_CONSTEXPR_FUNC void swap(NBL_REF_ARG(int64_t) lhs, NBL_REF_ARG(int64_t) rhs)
     {
         lhs ^= rhs;
         rhs ^= lhs;
@@ -75,7 +75,7 @@ namespace impl
     }
 #else
     template<typename T>
-    NBL_CONSTEXPR_INLINE_FUNC void swap(NBL_REF_ARG(T) lhs, NBL_REF_ARG(T) rhs)
+    NBL_CONSTEXPR_FUNC void swap(NBL_REF_ARG(T) lhs, NBL_REF_ARG(T) rhs)
     {
         std::swap(lhs, rhs);
     }
@@ -83,7 +83,7 @@ namespace impl
 }
 
 template<typename T>
-NBL_CONSTEXPR_INLINE_FUNC void swap(NBL_REF_ARG(T) lhs, NBL_REF_ARG(T) rhs)
+NBL_CONSTEXPR_FUNC void swap(NBL_REF_ARG(T) lhs, NBL_REF_ARG(T) rhs)
 {
     impl::swap<T>(lhs, rhs);
 }
diff --git a/include/nbl/builtin/hlsl/blit/default_blit.comp.hlsl b/include/nbl/builtin/hlsl/blit/default_blit.comp.hlsl
index 1407d7fc77..4b97bbc08f 100644
--- a/include/nbl/builtin/hlsl/blit/default_blit.comp.hlsl
+++ b/include/nbl/builtin/hlsl/blit/default_blit.comp.hlsl
@@ -59,6 +59,7 @@ using namespace nbl::hlsl::blit;
 // TODO: push constants
 
 [numthreads(ConstevalParameters::WorkGroupSize,1,1)]
+[shader("compute")]
 void main()
 {
 	InImgAccessor inImgA;
diff --git a/include/nbl/builtin/hlsl/bxdf/base/cook_torrance_base.hlsl b/include/nbl/builtin/hlsl/bxdf/base/cook_torrance_base.hlsl
index 5e5e543791..d70e8823da 100644
--- a/include/nbl/builtin/hlsl/bxdf/base/cook_torrance_base.hlsl
+++ b/include/nbl/builtin/hlsl/bxdf/base/cook_torrance_base.hlsl
@@ -280,7 +280,7 @@ struct SCookTorrance
         const scalar_type NdotV = localV.z;
 
         fresnel_type _f = __getOrientedFresnel(fresnel, NdotV);
-        fresnel::OrientedEtaRcps<monochrome_type> rcpEta = _f.getOrientedEtaRcps();
+        fresnel::OrientedEtaRcps<monochrome_type> rcpEta = _f.getRefractionOrientedEtaRcps();
 
         const vector3_type upperHemisphereV = ieee754::flipSignIfRHSNegative<vector3_type>(localV, hlsl::promote<vector3_type>(NdotV));
         const vector3_type localH = ndf.generateH(upperHemisphereV, u.xy);
@@ -304,7 +304,8 @@ struct SCookTorrance
         scalar_type rcpChoiceProb;
         scalar_type z = u.z;
         sampling::PartitionRandVariable<scalar_type> partitionRandVariable;
-        bool transmitted = partitionRandVariable(reflectance, z, rcpChoiceProb);
+        partitionRandVariable.leftProb = reflectance;
+        bool transmitted = partitionRandVariable(z, rcpChoiceProb);
 
         const scalar_type LdotH = hlsl::mix(VdotH, ieee754::copySign(hlsl::sqrt(rcpEta.value2[0]*VdotH*VdotH + scalar_type(1.0) - rcpEta.value2[0]), -VdotH), transmitted);
         bool valid;
diff --git a/include/nbl/builtin/hlsl/bxdf/fresnel.hlsl b/include/nbl/builtin/hlsl/bxdf/fresnel.hlsl
index f7655e9978..33faa79efc 100644
--- a/include/nbl/builtin/hlsl/bxdf/fresnel.hlsl
+++ b/include/nbl/builtin/hlsl/bxdf/fresnel.hlsl
@@ -313,14 +313,18 @@ NBL_CONCEPT_BEGIN(2)
 NBL_CONCEPT_END(
     ((NBL_CONCEPT_REQ_TYPE)(T::scalar_type))
     ((NBL_CONCEPT_REQ_TYPE)(T::vector_type))
-    ((NBL_CONCEPT_REQ_TYPE)(T::eta_type))
     ((NBL_CONCEPT_REQ_EXPR_RET_TYPE)((fresnel(cosTheta)), ::nbl::hlsl::is_same_v, typename T::vector_type))
-    ((NBL_CONCEPT_REQ_EXPR_RET_TYPE)((fresnel.getOrientedEtaRcps()), ::nbl::hlsl::is_same_v, OrientedEtaRcps<typename T::eta_type>))
 );
 #undef cosTheta
 #undef fresnel
 #include <nbl/builtin/hlsl/concepts/__end.hlsl>
 
+namespace impl
+{
+template<typename T>
+NBL_BOOL_CONCEPT VectorIsMonochrome = vector_traits<T>::Dimension == 1;
+}
+
 #define NBL_CONCEPT_NAME TwoSidedFresnel
 #define NBL_CONCEPT_TPLT_PRM_KINDS (typename)
 #define NBL_CONCEPT_TPLT_PRM_NAMES (T)
@@ -331,8 +335,11 @@ NBL_CONCEPT_BEGIN(2)
 #define cosTheta NBL_CONCEPT_PARAM_T NBL_CONCEPT_PARAM_1
 NBL_CONCEPT_END(
     ((NBL_CONCEPT_REQ_TYPE_ALIAS_CONCEPT)(Fresnel, T))
+    ((NBL_CONCEPT_REQ_TYPE)(T::eta_type))
     ((NBL_CONCEPT_REQ_EXPR_RET_TYPE)((fresnel.getRefractionOrientedEta()), ::nbl::hlsl::is_same_v, typename T::scalar_type))
+    ((NBL_CONCEPT_REQ_EXPR_RET_TYPE)((fresnel.getRefractionOrientedEtaRcps()), ::nbl::hlsl::is_same_v, OrientedEtaRcps<typename T::eta_type>))
     ((NBL_CONCEPT_REQ_EXPR_RET_TYPE)((fresnel.getReorientedFresnel(cosTheta)), ::nbl::hlsl::is_same_v, T))
+    ((NBL_CONCEPT_REQ_TYPE_ALIAS_CONCEPT)(impl::VectorIsMonochrome, typename T::eta_type))
 );
 #undef cosTheta
 #undef fresnel
@@ -362,7 +369,7 @@ struct Schlick
         return F0 + (1.0 - F0) * x*x*x*x*x;
     }
 
-    OrientedEtaRcps<eta_type> getOrientedEtaRcps() NBL_CONST_MEMBER_FUNC
+    OrientedEtaRcps<eta_type> getRefractionOrientedEtaRcps() NBL_CONST_MEMBER_FUNC
     {
         const eta_type sqrtF0 = hlsl::sqrt(F0);        
         OrientedEtaRcps<eta_type> rcpEta;
@@ -424,13 +431,13 @@ struct Conductor
         return (rs2 + rp2) * hlsl::promote<T>(0.5);
     }
 
-    OrientedEtaRcps<eta_type> getOrientedEtaRcps() NBL_CONST_MEMBER_FUNC
-    {
-        OrientedEtaRcps<eta_type> rcpEta;
-        rcpEta.value = hlsl::promote<eta_type>(1.0) / eta;
-        rcpEta.value2 = rcpEta.value * rcpEta.value;
-        return rcpEta;
-    }
+    // OrientedEtaRcps<eta_type> getRefractionOrientedEtaRcps() NBL_CONST_MEMBER_FUNC
+    // {
+    //     OrientedEtaRcps<eta_type> rcpEta;
+    //     rcpEta.value = hlsl::promote<eta_type>(1.0) / eta;
+    //     rcpEta.value2 = rcpEta.value * rcpEta.value;
+    //     return rcpEta;
+    // }
 
     T eta;
     T etak2;
@@ -484,7 +491,7 @@ struct Dielectric
     // default to monochrome, but it is possible to have RGB fresnel without dispersion fixing the refraction Eta
     // to be something else than the etas used to compute RGB reflectance or some sort of interpolation of them
     scalar_type getRefractionOrientedEta() NBL_CONST_MEMBER_FUNC { return orientedEta.value[0]; }
-    OrientedEtaRcps<T> getOrientedEtaRcps() NBL_CONST_MEMBER_FUNC { return orientedEta.getReciprocals(); }
+    OrientedEtaRcps<eta_type> getRefractionOrientedEtaRcps() NBL_CONST_MEMBER_FUNC { return orientedEta.getReciprocals(); }
 
     Dielectric<T> getReorientedFresnel(const scalar_type NdotI) NBL_CONST_MEMBER_FUNC
     {
@@ -508,25 +515,26 @@ struct iridescent_helper
     using scalar_type = typename vector_traits<T>::scalar_type;
     using vector_type = T;
 
-    // returns reflectance R = (rp, rs), phi is the phase shift for each plane of polarization (p,s)
-    static void phase_shift(const vector_type orientedEta, const vector_type orientedEtak, const vector_type cosTheta, NBL_REF_ARG(vector_type) phiS, NBL_REF_ARG(vector_type) phiP)
+    // returns phi, the phase shift for each plane of polarization (p,s)
+    static void phase_shift(const vector_type ior1, const vector_type ior2, const vector_type iork2, const vector_type cosTheta, NBL_REF_ARG(vector_type) phiS, NBL_REF_ARG(vector_type) phiP)
     {
-        vector_type cosTheta_2 = cosTheta * cosTheta;
-        vector_type sinTheta2 = hlsl::promote<vector_type>(1.0) - cosTheta_2;
-        const vector_type eta2 = orientedEta*orientedEta;
-        const vector_type etak2 = orientedEtak*orientedEtak;
-
-        vector_type z = eta2 - etak2 - sinTheta2;
-        vector_type w = hlsl::sqrt(z * z + scalar_type(4.0) * eta2 * eta2 * etak2);
-        vector_type a2 = (z + w) * hlsl::promote<vector_type>(0.5);
-        vector_type b2 = (w - z) * hlsl::promote<vector_type>(0.5);
-        vector_type b = hlsl::sqrt(b2);
+        const vector_type cosTheta2 = cosTheta * cosTheta;
+        const vector_type sinTheta2 = hlsl::promote<vector_type>(1.0) - cosTheta2;
+        const vector_type ior1_2 = ior1*ior1;
+        const vector_type ior2_2 = ior2*ior2;
+        const vector_type iork2_2 = iork2*iork2;
 
-        const vector_type t0 = eta2 + etak2;
-        const vector_type t1 = t0 * cosTheta_2;
+        const vector_type z = ior2_2 * (hlsl::promote<vector_type>(1.0) - iork2_2) - ior1_2 * sinTheta2;
+        const vector_type w = hlsl::sqrt(z*z + scalar_type(4.0) * ior2_2 * ior2_2 * iork2_2);
+        const vector_type a2 = hlsl::max(z + w, hlsl::promote<vector_type>(0.0)) * hlsl::promote<vector_type>(0.5);
+        const vector_type b2 = hlsl::max(w - z, hlsl::promote<vector_type>(0.0)) * hlsl::promote<vector_type>(0.5);
+        const vector_type a = hlsl::sqrt(a2);
+        const vector_type b = hlsl::sqrt(b2);
 
-        phiS = hlsl::atan2(hlsl::promote<vector_type>(2.0) * b * cosTheta, a2 + b2 - cosTheta_2);
-        phiP = hlsl::atan2(hlsl::promote<vector_type>(2.0) * eta2 * cosTheta * (hlsl::promote<vector_type>(2.0) * orientedEtak * hlsl::sqrt(a2) - etak2 * b), t1 - a2 + b2);
+        phiS = hlsl::atan2(scalar_type(2.0) * ior1 * b * cosTheta, a2 + b2 - ior1_2*cosTheta2);
+        const vector_type k2_plus_one = hlsl::promote<vector_type>(1.0) + iork2_2;
+        phiP = hlsl::atan2(scalar_type(2.0) * ior1 * ior2_2 * cosTheta * (scalar_type(2.0) * iork2 * a - (hlsl::promote<vector_type>(1.0) - iork2_2) * b),
+                ior2_2 * cosTheta2 * k2_plus_one * k2_plus_one - ior1_2*(a2+b2));
     }
 
     // Evaluation XYZ sensitivity curves in Fourier space
@@ -544,10 +552,9 @@ struct iridescent_helper
     }
 
     template<typename Colorspace>
-    static T __call(const vector_type _D, const vector_type eta12, const vector_type eta23, const vector_type etak23, const scalar_type clampedCosTheta)
+    static T __call(const vector_type _D, const vector_type ior1, const vector_type ior2, const vector_type ior3, const vector_type iork3,
+                    const vector_type eta12, const vector_type eta23, const vector_type etak23, const scalar_type clampedCosTheta)
     {
-        const vector_type wavelengths = vector_type(Colorspace::wavelength_R, Colorspace::wavelength_G, Colorspace::wavelength_B);
-
         const scalar_type cosTheta_1 = clampedCosTheta;
         vector_type R12p, R23p, R12s, R23s;
         vector_type cosTheta_2;
@@ -561,7 +568,7 @@ struct iridescent_helper
 
         if (hlsl::any(notTIR))
         {
-            Dielectric<vector_type>::__polarized(eta12, hlsl::promote<vector_type>(cosTheta_1), R12p, R12s);
+            Dielectric<vector_type>::__polarized(eta12 * eta12, hlsl::promote<vector_type>(cosTheta_1), R12p, R12s);
 
             // Reflected part by the base
             // if kappa==0, base material is dielectric
@@ -587,14 +594,13 @@ struct iridescent_helper
 
         // Optical Path Difference
         const vector_type D = _D * cosTheta_2;
-        const vector_type Dphi = hlsl::promote<vector_type>(2.0 * numbers::pi<scalar_type>) * D / wavelengths;
 
         vector_type phi21p, phi21s, phi23p, phi23s, r123s, r123p, Rs;
         vector_type I = hlsl::promote<vector_type>(0.0);
 
         // Evaluate the phase shift
-        phase_shift(eta12, hlsl::promote<vector_type>(0.0), hlsl::promote<vector_type>(cosTheta_1), phi21p, phi21s);
-        phase_shift(eta23, etak23, cosTheta_2, phi23p, phi23s);
+        phase_shift(ior1, ior2, hlsl::promote<vector_type>(0.0), hlsl::promote<vector_type>(cosTheta_1), phi21s, phi21p);
+        phase_shift(ior2, ior3, iork3, cosTheta_2, phi23s, phi23p);
         phi21p = hlsl::promote<vector_type>(numbers::pi<scalar_type>) - phi21p;
         phi21s = hlsl::promote<vector_type>(numbers::pi<scalar_type>) - phi21s;
 
@@ -615,7 +621,7 @@ struct iridescent_helper
         NBL_UNROLL for (int m=1; m<=2; ++m)
         {
             Cm *= r123p;
-            Sm  = hlsl::promote<vector_type>(2.0) * evalSensitivity(hlsl::promote<vector_type>(m)*D, hlsl::promote<vector_type>(m)*(phi23p+phi21p));
+            Sm  = hlsl::promote<vector_type>(2.0) * evalSensitivity(hlsl::promote<vector_type>(scalar_type(m))*D, hlsl::promote<vector_type>(scalar_type(m))*(phi23p+phi21p));
             I  += Cm*Sm;
         }
 
@@ -629,11 +635,11 @@ struct iridescent_helper
         NBL_UNROLL for (int m=1; m<=2; ++m)
         {
             Cm *= r123s;
-            Sm  = hlsl::promote<vector_type>(2.0) * evalSensitivity(hlsl::promote<vector_type>(m)*D, hlsl::promote<vector_type>(m) *(phi23s+phi21s));
+            Sm  = hlsl::promote<vector_type>(2.0) * evalSensitivity(hlsl::promote<vector_type>(scalar_type(m))*D, hlsl::promote<vector_type>(scalar_type(m)) *(phi23s+phi21s));
             I  += Cm*Sm;
         }
 
-        return hlsl::max(colorspace::scRGB::FromXYZ(I), hlsl::promote<vector_type>(0.0)) * hlsl::promote<vector_type>(0.5);
+        return hlsl::max(Colorspace::FromXYZ(I) * hlsl::promote<vector_type>(0.5), hlsl::promote<vector_type>(0.0));
     }
 };
 
@@ -643,13 +649,14 @@ struct iridescent_base
     using scalar_type = typename vector_traits<T>::scalar_type;
     using vector_type = T;
 
-    vector_type getD() NBL_CONST_MEMBER_FUNC { return D; }
-    vector_type getEta12() NBL_CONST_MEMBER_FUNC { return eta12; }
-    vector_type getEta23() NBL_CONST_MEMBER_FUNC { return eta23; }
-
     vector_type D;
+    vector_type ior1;
+    vector_type ior2;
+    vector_type ior3;
+    vector_type iork3;
     vector_type eta12;      // outside (usually air 1.0) -> thin-film IOR
     vector_type eta23;      // thin-film -> base material IOR
+    vector_type eta13;
 };
 }
 
@@ -679,24 +686,30 @@ struct Iridescent<T, false, Colorspace NBL_PARTIAL_REQ_BOT(concepts::FloatingPoi
     {
         this_t retval;
         retval.D = hlsl::promote<vector_type>(2.0 * params.Dinc) * params.ior2;
+        retval.ior1 = params.ior1;
+        retval.ior2 = params.ior2;
+        retval.ior3 = params.ior3;
+        retval.iork3 = params.iork3;
         retval.eta12 = params.ior2/params.ior1;
         retval.eta23 = params.ior3/params.ior2;
         retval.etak23 = params.iork3/params.ior2;
+        retval.eta13 = params.ior3/params.ior1;
         return retval;
     }
 
     T operator()(const scalar_type clampedCosTheta) NBL_CONST_MEMBER_FUNC
     {
-        return impl::iridescent_helper<T,false>::template __call<Colorspace>(base_type::getD(), base_type::getEta12(), base_type::getEta23(), getEtak23(), clampedCosTheta);
+        return impl::iridescent_helper<T,false>::template __call<Colorspace>(base_type::D, base_type::ior1, base_type::ior2, base_type::ior3, base_type::iork3,
+                                                            base_type::eta12, base_type::eta23, getEtak23(), clampedCosTheta);
     }
 
-    OrientedEtaRcps<eta_type> getOrientedEtaRcps() NBL_CONST_MEMBER_FUNC
-    {
-        OrientedEtaRcps<eta_type> rcpEta;
-        rcpEta.value = hlsl::promote<eta_type>(1.0) / base_type::eta23;
-        rcpEta.value2 = rcpEta.value * rcpEta.value;
-        return rcpEta;
-    }
+    // OrientedEtaRcps<eta_type> getRefractionOrientedEtaRcps() NBL_CONST_MEMBER_FUNC
+    // {
+    //     OrientedEtaRcps<eta_type> rcpEta;
+    //     rcpEta.value = hlsl::promote<eta_type>(1.0) / base_type::eta13;
+    //     rcpEta.value2 = rcpEta.value * rcpEta.value;
+    //     return rcpEta;
+    // }
 
     vector_type getEtak23() NBL_CONST_MEMBER_FUNC
     {
@@ -731,21 +744,26 @@ struct Iridescent<T, true, Colorspace NBL_PARTIAL_REQ_BOT(concepts::FloatingPoin
     {
         this_t retval;
         retval.D = hlsl::promote<vector_type>(2.0 * params.Dinc) * params.ior2;
+        retval.ior1 = params.ior1;
+        retval.ior2 = params.ior2;
+        retval.ior3 = params.ior3;
         retval.eta12 = params.ior2/params.ior1;
         retval.eta23 = params.ior3/params.ior2;
+        retval.eta13 = params.ior3/params.ior1;
         return retval;
     }
 
     T operator()(const scalar_type clampedCosTheta) NBL_CONST_MEMBER_FUNC
     {
-        return impl::iridescent_helper<T,true>::template __call<Colorspace>(base_type::getD(), base_type::getEta12(), base_type::getEta23(), getEtak23(), clampedCosTheta);
+        return impl::iridescent_helper<T,true>::template __call<Colorspace>(base_type::D, base_type::ior1, base_type::ior2, base_type::ior3, getEtak23(),
+                                                            base_type::eta12, base_type::eta23, getEtak23(), clampedCosTheta);
     }
 
-    scalar_type getRefractionOrientedEta() NBL_CONST_MEMBER_FUNC { return base_type::eta23[0]; }
-    OrientedEtaRcps<eta_type> getOrientedEtaRcps() NBL_CONST_MEMBER_FUNC
+    scalar_type getRefractionOrientedEta() NBL_CONST_MEMBER_FUNC { return base_type::eta13[0]; }
+    OrientedEtaRcps<eta_type> getRefractionOrientedEtaRcps() NBL_CONST_MEMBER_FUNC
     {
         OrientedEtaRcps<eta_type> rcpEta;
-        rcpEta.value = hlsl::promote<eta_type>(1.0) / base_type::eta23[0];
+        rcpEta.value = hlsl::promote<eta_type>(1.0) / hlsl::promote<eta_type>(base_type::eta13[0]);
         rcpEta.value2 = rcpEta.value * rcpEta.value;
         return rcpEta;
     }
@@ -755,8 +773,12 @@ struct Iridescent<T, true, Colorspace NBL_PARTIAL_REQ_BOT(concepts::FloatingPoin
         const bool flip = NdotI < scalar_type(0.0);
         this_t orientedFresnel;
         orientedFresnel.D = base_type::D;
-        orientedFresnel.eta12 = hlsl::mix(base_type::eta12, hlsl::promote<vector_type>(1.0)/base_type::eta12, flip);
-        orientedFresnel.eta23 = hlsl::mix(base_type::eta23, hlsl::promote<vector_type>(1.0)/base_type::eta23, flip);
+        orientedFresnel.ior1 = hlsl::mix(base_type::ior1, base_type::ior3, flip);
+        orientedFresnel.ior2 = base_type::ior2;
+        orientedFresnel.ior3 = hlsl::mix(base_type::ior3, base_type::ior1, flip);
+        orientedFresnel.eta12 = hlsl::mix(base_type::eta12, hlsl::promote<vector_type>(1.0)/base_type::eta23, flip);
+        orientedFresnel.eta23 = hlsl::mix(base_type::eta23, hlsl::promote<vector_type>(1.0)/base_type::eta12, flip);
+        orientedFresnel.eta13 = hlsl::mix(base_type::eta13, hlsl::promote<vector_type>(1.0)/base_type::eta13, flip);
         return orientedFresnel;
     }
 
diff --git a/include/nbl/builtin/hlsl/bxdf/ndf/ggx.hlsl b/include/nbl/builtin/hlsl/bxdf/ndf/ggx.hlsl
index 4ad4bb341e..c64f6e3b84 100644
--- a/include/nbl/builtin/hlsl/bxdf/ndf/ggx.hlsl
+++ b/include/nbl/builtin/hlsl/bxdf/ndf/ggx.hlsl
@@ -406,7 +406,7 @@ template<class T>
 struct is_ggx : impl::is_ggx<T, typename T::scalar_type> {};
 
 template<typename T>
-NBL_CONSTEXPR bool is_ggx_v = is_ggx<T>::value;
+NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR bool is_ggx_v = is_ggx<T>::value;
 
 }
 }
diff --git a/include/nbl/builtin/hlsl/bxdf/transmission/smooth_dielectric.hlsl b/include/nbl/builtin/hlsl/bxdf/transmission/smooth_dielectric.hlsl
index 712b614755..6d5744fb49 100644
--- a/include/nbl/builtin/hlsl/bxdf/transmission/smooth_dielectric.hlsl
+++ b/include/nbl/builtin/hlsl/bxdf/transmission/smooth_dielectric.hlsl
@@ -41,7 +41,8 @@ struct SSmoothDielectric
 
         scalar_type rcpChoiceProb;
         sampling::PartitionRandVariable<scalar_type> partitionRandVariable;
-        bool transmitted = partitionRandVariable(reflectance, u.z, rcpChoiceProb);
+        partitionRandVariable.leftProb = reflectance;
+        bool transmitted = partitionRandVariable(u.z, rcpChoiceProb);
 
         ray_dir_info_type V = interaction.getV();
         Refract<scalar_type> r = Refract<scalar_type>::create(V.getDirection(), interaction.getN());
@@ -128,7 +129,8 @@ struct SThinSmoothDielectric
         scalar_type rcpChoiceProb;
         scalar_type z = u.z;
         sampling::PartitionRandVariable<scalar_type> partitionRandVariable;
-        const bool transmitted = partitionRandVariable(reflectionProb, z, rcpChoiceProb);
+        partitionRandVariable.leftProb = reflectionProb;
+        const bool transmitted = partitionRandVariable(z, rcpChoiceProb);
         remainderMetadata = hlsl::mix(reflectance, hlsl::promote<spectral_type>(1.0) - reflectance, transmitted) * rcpChoiceProb;
 
         ray_dir_info_type V = interaction.getV();
diff --git a/include/nbl/builtin/hlsl/complex.hlsl b/include/nbl/builtin/hlsl/complex.hlsl
index da04c49b51..7e8f6526ec 100644
--- a/include/nbl/builtin/hlsl/complex.hlsl
+++ b/include/nbl/builtin/hlsl/complex.hlsl
@@ -238,28 +238,28 @@ struct divides< complex_t<Scalar> >
 // Out of line generic initialization of static member data not yet supported so we X-Macro identities for Scalar types we want to support
 // (left X-Macro here since it's pretty readable)
 
-#define COMPLEX_ARITHMETIC_IDENTITIES(SCALAR) \
+#define COMPLEX_ARITHMETIC_IDENTITIES(SCALAR, COMPONENT) \
 template<> \
-const static complex_t< SCALAR > plus< complex_t< SCALAR > >::identity = { promote< SCALAR , uint32_t>(0), promote< SCALAR , uint32_t>(0)}; \
+const static complex_t< SCALAR > plus< complex_t< SCALAR > >::identity = { promote< SCALAR, COMPONENT>(0), promote< SCALAR, COMPONENT>(0)}; \
 template<> \
-const static complex_t< SCALAR > minus< complex_t< SCALAR > >::identity = { promote< SCALAR , uint32_t>(0),  promote< SCALAR , uint32_t>(0)}; \
+const static complex_t< SCALAR > minus< complex_t< SCALAR > >::identity = { promote< SCALAR, COMPONENT>(0),  promote< SCALAR, COMPONENT>(0)}; \
 template<> \
-const static complex_t< SCALAR > multiplies< complex_t< SCALAR > >::identity = { promote< SCALAR , uint32_t>(1),  promote< SCALAR , uint32_t>(0)}; \
+const static complex_t< SCALAR > multiplies< complex_t< SCALAR > >::identity = { promote< SCALAR, COMPONENT>(1),  promote< SCALAR, COMPONENT>(0)}; \
 template<> \
-const static complex_t< SCALAR > divides< complex_t< SCALAR > >::identity = { promote< SCALAR , uint32_t>(1),  promote< SCALAR , uint32_t>(0)};
-
-COMPLEX_ARITHMETIC_IDENTITIES(float16_t)
-COMPLEX_ARITHMETIC_IDENTITIES(float16_t2)
-COMPLEX_ARITHMETIC_IDENTITIES(float16_t3)
-COMPLEX_ARITHMETIC_IDENTITIES(float16_t4)  
-COMPLEX_ARITHMETIC_IDENTITIES(float32_t)
-COMPLEX_ARITHMETIC_IDENTITIES(float32_t2)
-COMPLEX_ARITHMETIC_IDENTITIES(float32_t3)
-COMPLEX_ARITHMETIC_IDENTITIES(float32_t4)  
-COMPLEX_ARITHMETIC_IDENTITIES(float64_t)
-COMPLEX_ARITHMETIC_IDENTITIES(float64_t2)
-COMPLEX_ARITHMETIC_IDENTITIES(float64_t3)
-COMPLEX_ARITHMETIC_IDENTITIES(float64_t4)
+const static complex_t< SCALAR > divides< complex_t< SCALAR > >::identity = { promote< SCALAR, COMPONENT>(1),  promote< SCALAR, COMPONENT>(0)};
+
+COMPLEX_ARITHMETIC_IDENTITIES(float16_t, float16_t)
+COMPLEX_ARITHMETIC_IDENTITIES(float16_t2, float16_t)
+COMPLEX_ARITHMETIC_IDENTITIES(float16_t3, float16_t)
+COMPLEX_ARITHMETIC_IDENTITIES(float16_t4, float16_t)  
+COMPLEX_ARITHMETIC_IDENTITIES(float32_t, float32_t)
+COMPLEX_ARITHMETIC_IDENTITIES(float32_t2, float32_t)
+COMPLEX_ARITHMETIC_IDENTITIES(float32_t3, float32_t)
+COMPLEX_ARITHMETIC_IDENTITIES(float32_t4, float32_t)  
+COMPLEX_ARITHMETIC_IDENTITIES(float64_t, float64_t)
+COMPLEX_ARITHMETIC_IDENTITIES(float64_t2, float64_t)
+COMPLEX_ARITHMETIC_IDENTITIES(float64_t3, float64_t)
+COMPLEX_ARITHMETIC_IDENTITIES(float64_t4, float64_t)
 
 #undef COMPLEX_ARITHMETIC_IDENTITIES
 
@@ -436,22 +436,6 @@ complex_t<Scalar> rotateRight(NBL_CONST_REF_ARG(complex_t<Scalar>) value)
     return retVal;
 }
 
-template<typename Scalar>
-struct ternary_operator< complex_t<Scalar> >
-{
-    using type_t = complex_t<Scalar>;
-
-    complex_t<Scalar> operator()(bool condition, NBL_CONST_REF_ARG(complex_t<Scalar>) lhs, NBL_CONST_REF_ARG(complex_t<Scalar>) rhs)
-    {
-        const vector<Scalar, 2> lhsVector = vector<Scalar, 2>(lhs.real(), lhs.imag());
-        const vector<Scalar, 2> rhsVector = vector<Scalar, 2>(rhs.real(), rhs.imag());
-        const vector<Scalar, 2> resultVector = condition ? lhsVector : rhsVector;
-        const complex_t<Scalar> result = { resultVector.x, resultVector.y };
-        return result;
-    }
-};
-
-
 }
 }
 
diff --git a/include/nbl/builtin/hlsl/concepts.hlsl b/include/nbl/builtin/hlsl/concepts.hlsl
index 6e0f380d01..3c40b3e6c6 100644
--- a/include/nbl/builtin/hlsl/concepts.hlsl
+++ b/include/nbl/builtin/hlsl/concepts.hlsl
@@ -128,9 +128,9 @@ NBL_CONSTEXPR bool NBL_CONCEPT_NAME = BOOST_PP_SEQ_FOR_EACH_I(NBL_IMPL_CONCEPT_E
 namespace impl\
 {\
 template<BOOST_PP_SEQ_FOR_EACH_I(NBL_IMPL_EXPR_DECL_TEMP_ARG, _, ARG_TYPE_LIST), typename enable=void>\
-struct CONCEPT_NAME : false_type {};\
+struct CONCEPT_NAME : ::nbl::hlsl::false_type {};\
 template<BOOST_PP_SEQ_FOR_EACH_I(NBL_IMPL_EXPR_DECL_TEMP_ARG, _, ARG_TYPE_LIST)>\
-struct CONCEPT_NAME<BOOST_PP_SEQ_FOR_EACH_I(NBL_IMPL_EXPR_ITER_TEMP_ARG, _, ARG_TYPE_LIST), make_void_t<decltype( __VA_ARGS__ )> > : true_type {};\
+struct CONCEPT_NAME<BOOST_PP_SEQ_FOR_EACH_I(NBL_IMPL_EXPR_ITER_TEMP_ARG, _, ARG_TYPE_LIST), ::nbl::hlsl::make_void_t<decltype( __VA_ARGS__ )> > : ::nbl::hlsl::true_type {};\
 }\
 template<BOOST_PP_SEQ_FOR_EACH_I(NBL_IMPL_EXPR_DECL_TEMP_ARG, _, ARG_TYPE_LIST)>\
 NBL_BOOL_CONCEPT CONCEPT_NAME = impl::CONCEPT_NAME<BOOST_PP_SEQ_FOR_EACH_I(NBL_IMPL_EXPR_ITER_TEMP_ARG, _, ARG_TYPE_LIST)>::value\
@@ -139,4 +139,4 @@ NBL_BOOL_CONCEPT CONCEPT_NAME = impl::CONCEPT_NAME<BOOST_PP_SEQ_FOR_EACH_I(NBL_I
 }
 }
 
-#endif
\ No newline at end of file
+#endif
diff --git a/include/nbl/builtin/hlsl/concepts/core.hlsl b/include/nbl/builtin/hlsl/concepts/core.hlsl
index dcbafae8a5..e3ff3f611f 100644
--- a/include/nbl/builtin/hlsl/concepts/core.hlsl
+++ b/include/nbl/builtin/hlsl/concepts/core.hlsl
@@ -72,13 +72,23 @@ namespace impl
 template<typename T>
 struct is_emulating_floating_point_scalar
 {
-	NBL_CONSTEXPR_STATIC_INLINE bool value = FloatingPointScalar<T>;
+	NBL_CONSTEXPR_STATIC_INLINE bool value = false;
+};
+
+template<typename T>
+struct is_emulating_integral_scalar
+{
+	NBL_CONSTEXPR_STATIC_INLINE bool value = false;
 };
 }
 
 //! Floating point types are native floating point types or types that imitate native floating point types (for example emulated_float64_t)
 template<typename T>
-NBL_BOOL_CONCEPT FloatingPointLikeScalar = impl::is_emulating_floating_point_scalar<T>::value;
+NBL_BOOL_CONCEPT FloatingPointLikeScalar = FloatingPointScalar<T> || impl::is_emulating_floating_point_scalar<T>::value;
+
+//! Integral-like types are native integral types or types that imitate native integral types (for example emulated_uint64_t)
+template<typename T>
+NBL_BOOL_CONCEPT IntegralLikeScalar = IntegralScalar<T> || impl::is_emulating_integral_scalar<T>::value;
 
 }
 }
diff --git a/include/nbl/builtin/hlsl/concepts/vector.hlsl b/include/nbl/builtin/hlsl/concepts/vector.hlsl
index 468838730a..f132531cb9 100644
--- a/include/nbl/builtin/hlsl/concepts/vector.hlsl
+++ b/include/nbl/builtin/hlsl/concepts/vector.hlsl
@@ -40,14 +40,12 @@ NBL_BOOL_CONCEPT FloatingPointLikeVectorial = concepts::Vectorial<T> && concepts
 template<typename T>
 NBL_BOOL_CONCEPT IntVectorial = concepts::Vectorial<T> && (is_integral_v<typename vector_traits<T>::scalar_type>);
 template<typename T>
+NBL_BOOL_CONCEPT IntegralLikeVectorial = concepts::Vectorial<T> && concepts::IntegralLikeScalar<typename vector_traits<T>::scalar_type>;
+template<typename T>
 NBL_BOOL_CONCEPT SignedIntVectorial = concepts::Vectorial<T> && concepts::SignedIntegralScalar<typename vector_traits<T>::scalar_type>;
 
 }
 
-template<typename Vectorial>
-NBL_PARTIAL_REQ_TOP(concepts::Vectorial<Vectorial>)
-struct extent<Vectorial, 0 NBL_PARTIAL_REQ_BOT(concepts::Vectorial<Vectorial>) > : integral_constant<uint64_t, vector_traits<Vectorial>::Dimension> {};
-
 }
 }
 #endif
\ No newline at end of file
diff --git a/include/nbl/builtin/hlsl/cpp_compat.hlsl b/include/nbl/builtin/hlsl/cpp_compat.hlsl
index 175a3e76c1..03d47864fb 100644
--- a/include/nbl/builtin/hlsl/cpp_compat.hlsl
+++ b/include/nbl/builtin/hlsl/cpp_compat.hlsl
@@ -5,5 +5,9 @@
 // it includes vector and matrix
 #include <nbl/builtin/hlsl/cpp_compat/intrinsics.hlsl>
 #include <nbl/builtin/hlsl/cpp_compat/promote.hlsl>
+#include <nbl/builtin/hlsl/cpp_compat/truncate.hlsl>
+
+// Had to push some stuff here to avoid circular dependencies
+#include <nbl/builtin/hlsl/cpp_compat/vector.hlsl>
 
 #endif
\ No newline at end of file
diff --git a/include/nbl/builtin/hlsl/cpp_compat/basic.h b/include/nbl/builtin/hlsl/cpp_compat/basic.h
index 87baa1f0d6..a5715efa15 100644
--- a/include/nbl/builtin/hlsl/cpp_compat/basic.h
+++ b/include/nbl/builtin/hlsl/cpp_compat/basic.h
@@ -3,39 +3,12 @@
 
 #include <nbl/builtin/hlsl/macros.h>
 
-namespace nbl
-{
-namespace hlsl
-{
-namespace impl
-{
-template<typename To, typename From, typename Enabled = void>
-struct static_cast_helper
-{
-    static inline To cast(From u)
-    {
-#ifndef __HLSL_VERSION
-        return static_cast<To>(u);
-#else
-        return To(u);
-#endif
-    }
-};
-}
-
-template<typename To, typename From>
-inline To _static_cast(From v)
-{
-    return impl::static_cast_helper<To, From>::cast(v);
-}
-
-}
-}
 
 #ifndef __HLSL_VERSION
 #include <type_traits>
 
 #define ARROW ->
+#define NBL_DEREF_THIS (*this)
 #define NBL_CONSTEXPR constexpr // TODO: rename to NBL_CONSTEXPR_VAR
 #define NBL_CONSTEXPR_FUNC constexpr
 #define NBL_CONSTEXPR_STATIC constexpr static
@@ -43,6 +16,10 @@ inline To _static_cast(From v)
 #define NBL_CONSTEXPR_INLINE_FUNC constexpr inline
 #define NBL_CONSTEXPR_FORCED_INLINE_FUNC NBL_FORCE_INLINE constexpr
 #define NBL_CONST_MEMBER_FUNC const
+#define NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR constexpr inline
+#define NBL_CONSTEXPR_FUNC_SCOPE_VAR constexpr
+#define NBL_CONSTEXPR_OOL_MEMBER constexpr
+#define NBL_CONSTEXPR_INLINE_OOL_MEMBER constexpr inline
 #define NBL_IF_CONSTEXPR(...) if constexpr (__VA_ARGS__)
 
 namespace nbl::hlsl
@@ -67,6 +44,7 @@ namespace nbl::hlsl
 #else
 
 #define ARROW .arrow().
+#define NBL_DEREF_THIS this
 #define NBL_CONSTEXPR const static // TODO: rename to NBL_CONSTEXPR_VAR
 #define NBL_CONSTEXPR_FUNC
 #define NBL_CONSTEXPR_STATIC const static
@@ -74,6 +52,10 @@ namespace nbl::hlsl
 #define NBL_CONSTEXPR_INLINE_FUNC inline
 #define NBL_CONSTEXPR_FORCED_INLINE_FUNC inline
 #define NBL_CONST_MEMBER_FUNC 
+#define NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR const static
+#define NBL_CONSTEXPR_FUNC_SCOPE_VAR const
+#define NBL_CONSTEXPR_OOL_MEMBER const
+#define NBL_CONSTEXPR_INLINE_OOL_MEMBER const
 #define NBL_IF_CONSTEXPR(...) if (__VA_ARGS__)
 
 namespace nbl
@@ -102,4 +84,33 @@ struct add_pointer
 
 #endif
 
+namespace nbl
+{
+namespace hlsl
+{
+namespace impl
+{
+template<typename To, typename From, typename Enabled = void>
+struct static_cast_helper
+{
+    NBL_CONSTEXPR_STATIC_INLINE To cast(From u)
+    {
+#ifndef __HLSL_VERSION
+        return static_cast<To>(u);
+#else
+        return To(u);
+#endif
+    }
+};
+}
+
+template<typename To, typename From>
+NBL_CONSTEXPR_INLINE_FUNC To _static_cast(From v)
+{
+    return impl::static_cast_helper<To, From>::cast(v);
+}
+
+}
+}
+
 #endif
diff --git a/include/nbl/builtin/hlsl/cpp_compat/impl/intrinsics_impl.hlsl b/include/nbl/builtin/hlsl/cpp_compat/impl/intrinsics_impl.hlsl
index 0c595bb0e2..5a19a1d529 100644
--- a/include/nbl/builtin/hlsl/cpp_compat/impl/intrinsics_impl.hlsl
+++ b/include/nbl/builtin/hlsl/cpp_compat/impl/intrinsics_impl.hlsl
@@ -27,7 +27,7 @@ template<typename UnsignedInteger NBL_FUNC_REQUIRES(hlsl::is_integral_v<Unsigned
 inline bool isnan_uint_impl(UnsignedInteger val)
 {
 	using AsFloat = typename float_of_size<sizeof(UnsignedInteger)>::type;
-	NBL_CONSTEXPR UnsignedInteger Mask = (UnsignedInteger(0) - 1) >> 1;
+	NBL_CONSTEXPR_FUNC_SCOPE_VAR UnsignedInteger Mask = (UnsignedInteger(0) - 1) >> 1;
 	UnsignedInteger absVal = val & Mask;
 	return absVal > (ieee754::traits<AsFloat>::specialValueExp << ieee754::traits<AsFloat>::mantissaBitCnt);
 }
@@ -48,7 +48,7 @@ NBL_VALID_EXPRESSION(MixIsCallable, (T)(U), glm::mix(declval<T>(),declval<T>(),d
 template<typename T, typename U>
 NBL_BOOL_CONCEPT MixCallingBuiltins =
 #ifdef __HLSL_VERSION
-(spirv::FMixIsCallable<T> && is_same_v<T,U>) || spirv::SelectIsCallable<T,U>;
+(spirv::FMixIsCallable<T> && is_same_v<T,U>);
 #else
 MixIsCallable<T,U>;
 #endif
@@ -90,6 +90,8 @@ template<typename T NBL_STRUCT_CONSTRAINABLE>
 struct all_helper;
 template<typename T NBL_STRUCT_CONSTRAINABLE>
 struct any_helper;
+template<typename T, typename B NBL_STRUCT_CONSTRAINABLE>
+struct select_helper;
 template<typename T NBL_STRUCT_CONSTRAINABLE>
 struct bitReverseAs_helper;
 template<typename T NBL_STRUCT_CONSTRAINABLE>
@@ -119,6 +121,12 @@ struct nMax_helper;
 template<typename T NBL_STRUCT_CONSTRAINABLE>
 struct nClamp_helper;
 template<typename T NBL_STRUCT_CONSTRAINABLE>
+struct addCarry_helper;
+template<typename T NBL_STRUCT_CONSTRAINABLE>
+struct subBorrow_helper;
+template<typename T NBL_STRUCT_CONSTRAINABLE>
+struct undef_helper;
+template<typename T NBL_STRUCT_CONSTRAINABLE>
 struct fma_helper;
 
 #ifdef __HLSL_VERSION // HLSL only specializations
@@ -133,8 +141,8 @@ struct fma_helper;
 // the template<> needs to be written ourselves
 // return type is __VA_ARGS__ to protect against `,` in templated return types
 #define AUTO_SPECIALIZE_TRIVIAL_CASE_HELPER(HELPER_NAME, SPIRV_FUNCTION_NAME, ARG_TYPE_LIST, ARG_TYPE_SET, ...)\
-NBL_PARTIAL_REQ_TOP(is_same_v<decltype(spirv::SPIRV_FUNCTION_NAME<T>(BOOST_PP_SEQ_FOR_EACH_I(DECLVAL, _, ARG_TYPE_SET))), __VA_ARGS__ >) \
-struct HELPER_NAME<BOOST_PP_SEQ_FOR_EACH_I(WRAP, _, ARG_TYPE_LIST) NBL_PARTIAL_REQ_BOT(is_same_v<decltype(spirv::SPIRV_FUNCTION_NAME<T>(BOOST_PP_SEQ_FOR_EACH_I(DECLVAL, _, ARG_TYPE_SET))), __VA_ARGS__ >) >\
+NBL_PARTIAL_REQ_TOP(is_same_v<decltype(spirv::SPIRV_FUNCTION_NAME< BOOST_PP_SEQ_FOR_EACH_I(WRAP, _, ARG_TYPE_LIST) >(BOOST_PP_SEQ_FOR_EACH_I(DECLVAL, _, ARG_TYPE_SET))), __VA_ARGS__ >) \
+struct HELPER_NAME<BOOST_PP_SEQ_FOR_EACH_I(WRAP, _, ARG_TYPE_LIST) NBL_PARTIAL_REQ_BOT(is_same_v<decltype(spirv::SPIRV_FUNCTION_NAME< BOOST_PP_SEQ_FOR_EACH_I(WRAP, _, ARG_TYPE_LIST) >(BOOST_PP_SEQ_FOR_EACH_I(DECLVAL, _, ARG_TYPE_SET))), __VA_ARGS__ >) >\
 {\
 	using return_t = __VA_ARGS__;\
 	static inline return_t __call( BOOST_PP_SEQ_FOR_EACH_I(DECL_ARG, _, ARG_TYPE_SET) )\
@@ -156,8 +164,9 @@ template<typename T> AUTO_SPECIALIZE_TRIVIAL_CASE_HELPER(length_helper, length,
 template<typename T> AUTO_SPECIALIZE_TRIVIAL_CASE_HELPER(normalize_helper, normalize, (T), (T), T)
 template<typename T> AUTO_SPECIALIZE_TRIVIAL_CASE_HELPER(rsqrt_helper, inverseSqrt, (T), (T), T)
 template<typename T> AUTO_SPECIALIZE_TRIVIAL_CASE_HELPER(fract_helper, fract, (T), (T), T)
-template<typename T> AUTO_SPECIALIZE_TRIVIAL_CASE_HELPER(all_helper, any, (T), (T), bool)
+template<typename T> AUTO_SPECIALIZE_TRIVIAL_CASE_HELPER(all_helper, all, (T), (T), bool)
 template<typename T> AUTO_SPECIALIZE_TRIVIAL_CASE_HELPER(any_helper, any, (T), (T), bool)
+template<typename T, typename B> AUTO_SPECIALIZE_TRIVIAL_CASE_HELPER(select_helper, select, (T)(B), (B)(T)(T), T)
 template<typename T> AUTO_SPECIALIZE_TRIVIAL_CASE_HELPER(sign_helper, fSign, (T), (T), T)
 template<typename T> AUTO_SPECIALIZE_TRIVIAL_CASE_HELPER(sign_helper, sSign, (T), (T), T)
 template<typename T> AUTO_SPECIALIZE_TRIVIAL_CASE_HELPER(radians_helper, radians, (T), (T), T)
@@ -179,6 +188,10 @@ template<typename T, typename U> AUTO_SPECIALIZE_TRIVIAL_CASE_HELPER(refract_hel
 template<typename T> AUTO_SPECIALIZE_TRIVIAL_CASE_HELPER(nMax_helper, nMax, (T), (T)(T), T)
 template<typename T> AUTO_SPECIALIZE_TRIVIAL_CASE_HELPER(nMin_helper, nMin, (T), (T)(T), T)
 template<typename T> AUTO_SPECIALIZE_TRIVIAL_CASE_HELPER(nClamp_helper, nClamp, (T), (T)(T), T)
+// Can use trivial case and not worry about restricting `T` with a concept since `spirv::AddCarryOutput / SubBorrowOutput` already take care of that
+template<typename T> AUTO_SPECIALIZE_TRIVIAL_CASE_HELPER(addCarry_helper, addCarry, (T), (T)(T), spirv::AddCarryOutput<T>)
+template<typename T> AUTO_SPECIALIZE_TRIVIAL_CASE_HELPER(subBorrow_helper, subBorrow, (T), (T)(T), spirv::SubBorrowOutput<T>)
+template<typename T> AUTO_SPECIALIZE_TRIVIAL_CASE_HELPER(undef_helper, undef, (T), , T)
 template<typename T> AUTO_SPECIALIZE_TRIVIAL_CASE_HELPER(fma_helper, fma, (T), (T)(T)(T), T)
 
 #define BITCOUNT_HELPER_RETRUN_TYPE conditional_t<is_vector_v<T>, vector<int32_t, vector_traits<T>::Dimension>, int32_t>
@@ -255,20 +268,6 @@ struct mix_helper<T, T NBL_PARTIAL_REQ_BOT(spirv::FMixIsCallable<T>) >
 	}
 };
 
-template<typename T, typename U>
-NBL_PARTIAL_REQ_TOP(spirv::SelectIsCallable<T,U> && concepts::Boolean<U>)
-struct mix_helper<T, U NBL_PARTIAL_REQ_BOT(spirv::SelectIsCallable<T,U> && concepts::Boolean<U>) >
-{
-	using return_t = conditional_t<is_vector_v<T>, vector<typename vector_traits<T>::scalar_type, vector_traits<T>::Dimension>, T>;
-	// for a component of a that is false, the corresponding component of x is returned
-	// for a component of a that is true, the corresponding component of y is returned
-	// so we make sure this is correct when calling the operation
-	static inline return_t __call(const T x, const T y, const U a)
-	{
-		return spirv::select<T, U>(a, y, x);
-	}
-};
-
 template<typename SquareMatrix> NBL_PARTIAL_REQ_TOP(matrix_traits<SquareMatrix>::Square)
 struct determinant_helper<SquareMatrix NBL_PARTIAL_REQ_BOT(matrix_traits<SquareMatrix>::Square) >
 {
@@ -629,6 +628,74 @@ struct nClamp_helper<T>
 	}
 };
 
+// Once again no need to restrict the two below with concepts for same reason as HLSL version
+template<typename T>
+struct addCarry_helper
+{
+	using return_t = spirv::AddCarryOutput<T>;
+	constexpr static inline return_t __call(const T operand1, const T operand2)
+	{
+		return_t retVal;
+		retVal.result = operand1 + operand2;
+		retVal.carry = T(retVal.result < operand1);
+		return retVal;
+	}
+};
+
+template<typename T>
+struct subBorrow_helper
+{
+	using return_t = spirv::SubBorrowOutput<T>;
+	constexpr static inline return_t __call(const T operand1, const T operand2)
+	{
+		return_t retVal;
+		retVal.result = static_cast<T>(operand1 - operand2);
+		retVal.borrow = T(operand1 < operand2);
+		return retVal;
+	}
+};
+
+template<typename T, typename B>
+requires (concepts::BooleanScalar<B>)
+struct select_helper<T, B>
+{
+    using return_t = T;
+	constexpr static return_t __call(const B& condition, const T& object1, const T& object2)
+	{
+		return condition ? object1 : object2;
+	}
+};
+
+template<typename T, typename B>
+requires (concepts::Boolean<B>&& concepts::Vector<B>&& concepts::Vector<T> && (extent_v<B> == extent_v<T>))
+struct select_helper<T, B>
+{
+    using return_t = T;
+	constexpr static T __call(const B& condition, const T& object1, const T& object2)
+	{
+		using traits = vector_traits<T>;
+		array_get<B, bool> conditionGetter;
+		array_get<T, typename traits::scalar_type> objectGetter;
+		array_set<T, typename traits::scalar_type> setter;
+
+		T selected;
+		for (uint32_t i = 0; i < traits::Dimension; ++i)
+			setter(selected, i, conditionGetter(condition, i) ? objectGetter(object1, i) : objectGetter(object2, i));
+
+		return selected;
+	}
+};
+
+template<typename T>
+struct undef_helper
+{
+	NBL_CONSTEXPR_STATIC T __call()
+	{
+		T t;
+		return t;
+	}
+};
+
 template<typename FloatingPoint>
 requires concepts::FloatingPointScalar<FloatingPoint>
 struct fma_helper<FloatingPoint>
@@ -901,43 +968,24 @@ struct mix_helper<T, T NBL_PARTIAL_REQ_BOT(VECTOR_SPECIALIZATION_CONCEPT && !imp
 	}
 };
 
-template<typename T, typename U>
-NBL_PARTIAL_REQ_TOP(VECTOR_SPECIALIZATION_CONCEPT && !impl::MixCallingBuiltins<T,U> && concepts::BooleanScalar<U>)
-struct mix_helper<T, U NBL_PARTIAL_REQ_BOT(VECTOR_SPECIALIZATION_CONCEPT && !impl::MixCallingBuiltins<T,U> && concepts::BooleanScalar<U>) >
+namespace impl
 {
-	using return_t = T;
-	static return_t __call(NBL_CONST_REF_ARG(T) x, NBL_CONST_REF_ARG(T) y, NBL_CONST_REF_ARG(U) a)
-	{
-		using traitsT = hlsl::vector_traits<T>;
-		array_get<T, typename traitsT::scalar_type> getterT;
-		array_set<return_t, typename traitsT::scalar_type> setter;
-
-		return_t output;
-		for (uint32_t i = 0; i < traitsT::Dimension; ++i)
-			setter(output, i, mix_helper<typename traitsT::scalar_type, U>::__call(getterT(x, i), getterT(y, i), a));
-
-		return output;
-	}
-};
-
 template<typename T, typename U>
-NBL_PARTIAL_REQ_TOP(VECTOR_SPECIALIZATION_CONCEPT && !impl::MixCallingBuiltins<T,U> && concepts::Boolean<U> && concepts::Vectorial<U> && vector_traits<T>::Dimension == vector_traits<U>::Dimension)
-struct mix_helper<T, U NBL_PARTIAL_REQ_BOT(VECTOR_SPECIALIZATION_CONCEPT && !impl::MixCallingBuiltins<T,U> && concepts::Boolean<U>  && concepts::Vectorial<U> && vector_traits<T>::Dimension == vector_traits<U>::Dimension) >
+NBL_BOOL_CONCEPT MixCallingSelect =
+#ifdef __HLSL_VERSION
+spirv::SelectIsCallable<T, U>;
+#else
+concepts::Boolean<U> && (concepts::Scalar<U> || (concepts::Vector<T> && vector_traits<T>::Dimension==vector_traits<U>::Dimension)) && !MixCallingBuiltins<T, U>;
+#endif
+}
+
+template<typename T, typename U> NBL_PARTIAL_REQ_TOP(impl::MixCallingSelect<T, U>)
+struct mix_helper<T, U NBL_PARTIAL_REQ_BOT(impl::MixCallingSelect<T, U>) >
 {
 	using return_t = T;
 	static return_t __call(NBL_CONST_REF_ARG(T) x, NBL_CONST_REF_ARG(T) y, NBL_CONST_REF_ARG(U) a)
 	{
-		using traitsT = hlsl::vector_traits<T>;
-		using traitsU = hlsl::vector_traits<U>;
-		array_get<T, typename traitsT::scalar_type> getterT;
-		array_get<U, typename traitsU::scalar_type> getterU;
-		array_set<return_t, typename traitsT::scalar_type> setter;
-
-		return_t output;
-		for (uint32_t i = 0; i < traitsT::Dimension; ++i)
-			setter(output, i, mix_helper<typename traitsT::scalar_type, typename traitsU::scalar_type>::__call(getterT(x, i), getterT(y, i), getterU(a, i)));
-
-		return output;
+		return select_helper<T, U>::__call(a, y, x);
 	}
 };
 
diff --git a/include/nbl/builtin/hlsl/cpp_compat/intrinsics.hlsl b/include/nbl/builtin/hlsl/cpp_compat/intrinsics.hlsl
index 7198bae563..78367f7924 100644
--- a/include/nbl/builtin/hlsl/cpp_compat/intrinsics.hlsl
+++ b/include/nbl/builtin/hlsl/cpp_compat/intrinsics.hlsl
@@ -23,6 +23,12 @@ namespace nbl
 namespace hlsl
 {
 
+template<typename T>
+NBL_CONSTEXPR_FUNC T undef()
+{
+	return cpp_compat_intrinsics_impl::undef_helper<T>::__call();
+}
+
 template<typename T>
 inline typename cpp_compat_intrinsics_impl::bitCount_helper<T>::return_t bitCount(NBL_CONST_REF_ARG(T) val)
 {
@@ -150,6 +156,12 @@ inline bool any(Vector vec)
 	return cpp_compat_intrinsics_impl::any_helper<Vector>::__call(vec);
 }
 
+template<typename ResultType, typename Condition>
+NBL_CONSTEXPR_FUNC ResultType select(Condition condition, ResultType object1, ResultType object2)
+{
+	return cpp_compat_intrinsics_impl::select_helper<ResultType, Condition>::__call(condition, object1, object2);
+}
+
 /**
 * @brief Returns x - floor(x).
 *
@@ -217,6 +229,19 @@ inline T refract(NBL_CONST_REF_ARG(T) I, NBL_CONST_REF_ARG(T) N, NBL_CONST_REF_A
 	return cpp_compat_intrinsics_impl::refract_helper<T, U>::__call(I, N, eta);
 }
 
+template<typename T>
+NBL_CONSTEXPR_FUNC spirv::AddCarryOutput<T> addCarry(NBL_CONST_REF_ARG(T) operand1, NBL_CONST_REF_ARG(T) operand2)
+{
+	return cpp_compat_intrinsics_impl::addCarry_helper<T>::__call(operand1, operand2);
+}
+
+template<typename T>
+NBL_CONSTEXPR_FUNC spirv::SubBorrowOutput<T> subBorrow(NBL_CONST_REF_ARG(T) operand1, NBL_CONST_REF_ARG(T) operand2)
+{
+	return cpp_compat_intrinsics_impl::subBorrow_helper<T>::__call(operand1, operand2);
+}
+
+
 #ifdef __HLSL_VERSION
 #define NAMESPACE spirv
 #else
diff --git a/include/nbl/builtin/hlsl/cpp_compat/promote.hlsl b/include/nbl/builtin/hlsl/cpp_compat/promote.hlsl
index 51ca73f6d3..1887f4b51f 100644
--- a/include/nbl/builtin/hlsl/cpp_compat/promote.hlsl
+++ b/include/nbl/builtin/hlsl/cpp_compat/promote.hlsl
@@ -12,68 +12,39 @@ namespace impl
 {
 
 // partial specialize this for `T=matrix<scalar_t,,>|vector<scalar_t,>` and `U=matrix<scalar_t,,>|vector<scalar_t,>|scalar_t`
-template<typename T, typename U>
+template<typename T, typename U NBL_STRUCT_CONSTRAINABLE>
 struct Promote
 {
-    T operator()(U v)
+    NBL_CONSTEXPR_FUNC T operator()(NBL_CONST_REF_ARG(U) v)
     {
         return T(v);
     }
 };
 
-#ifdef __HLSL_VERSION
-
-template<typename Scalar, typename U>
-struct Promote<vector <Scalar, 1>, U>
-{
-    enable_if_t<is_scalar<Scalar>::value && is_scalar<U>::value, vector <Scalar, 1> > operator()(U v)
-    {
-        vector <Scalar, 1> promoted = {Scalar(v)};
-        return promoted;
-    }
-};
-
-template<typename Scalar, typename U>
-struct Promote<vector <Scalar, 2>, U>
+template<typename To, typename From> NBL_PARTIAL_REQ_TOP(concepts::Vectorial<To> && (concepts::IntegralLikeScalar<From> || concepts::FloatingPointLikeScalar<From>) && is_same_v<typename vector_traits<To>::scalar_type, From>)
+struct Promote<To, From NBL_PARTIAL_REQ_BOT(concepts::Vectorial<To> && (concepts::IntegralLikeScalar<From> || concepts::FloatingPointLikeScalar<From>) && is_same_v<typename vector_traits<To>::scalar_type, From>) >
 {
-    enable_if_t<is_scalar<Scalar>::value && is_scalar<U>::value, vector <Scalar, 2> > operator()(U v)
+    NBL_CONSTEXPR_FUNC To operator()(const From v)
     {
-        vector <Scalar, 2> promoted = {Scalar(v), Scalar(v)};
-        return promoted;
+        array_set<To, From> setter;
+        To output;
+        [[unroll]]
+        for (int i = 0; i < vector_traits<To>::Dimension; ++i)
+            setter(output, i, v);
+        return output;
     }
 };
 
-template<typename Scalar, typename U>
-struct Promote<vector <Scalar, 3>, U>
-{
-    enable_if_t<is_scalar<Scalar>::value && is_scalar<U>::value, vector <Scalar, 3> > operator()(U v)
-    {
-        vector <Scalar, 3> promoted = {Scalar(v), Scalar(v), Scalar(v)};
-        return promoted;
-    }
-};
-
-template<typename Scalar, typename U>
-struct Promote<vector <Scalar, 4>, U>
-{
-    enable_if_t<is_scalar<Scalar>::value && is_scalar<U>::value, vector <Scalar, 4> > operator()(U v)
-    {
-        vector <Scalar, 4> promoted = {Scalar(v), Scalar(v), Scalar(v), Scalar(v)};
-        return promoted;
-    }
-};
-
-#endif
-
 }
 
 template<typename T, typename U>
-T promote(const U v) // TODO: use NBL_CONST_REF_ARG(U) instead of U v (circular ref)
+NBL_CONSTEXPR_FUNC T promote(NBL_CONST_REF_ARG(U) v)
 {
     impl::Promote<T,U> _promote;
     return _promote(v);
 }
 
+
 }
 }
 
diff --git a/include/nbl/builtin/hlsl/cpp_compat/truncate.hlsl b/include/nbl/builtin/hlsl/cpp_compat/truncate.hlsl
new file mode 100644
index 0000000000..ffe3d12641
--- /dev/null
+++ b/include/nbl/builtin/hlsl/cpp_compat/truncate.hlsl
@@ -0,0 +1,58 @@
+#ifndef _NBL_BUILTIN_HLSL_CPP_COMPAT_TRUNCATE_INCLUDED_
+#define _NBL_BUILTIN_HLSL_CPP_COMPAT_TRUNCATE_INCLUDED_
+
+#include "nbl/builtin/hlsl/type_traits.hlsl"
+#include "nbl/builtin/hlsl/concepts/core.hlsl"
+
+namespace nbl
+{
+namespace hlsl
+{
+
+namespace concepts
+{
+  template<typename To, typename From>
+  NBL_BOOL_CONCEPT can_truncate_vector = concepts::Vectorial<To> && concepts::Vectorial<From> && concepts::same_as<typename vector_traits<To>::scalar_type, typename vector_traits<From>::scalar_type > && vector_traits<To>::Dimension <= vector_traits<From>::Dimension;
+}
+
+namespace impl
+{
+
+template<typename T, typename U NBL_STRUCT_CONSTRAINABLE >
+struct Truncate
+{
+    NBL_CONSTEXPR_FUNC T operator()(NBL_CONST_REF_ARG(U) v)
+    {
+        return T(v);
+    }
+};
+
+template<typename To, typename From> NBL_PARTIAL_REQ_TOP(concepts::can_truncate_vector<To, From>) 
+struct Truncate<To, From NBL_PARTIAL_REQ_BOT(concepts::can_truncate_vector<To, From>) >
+{
+    NBL_CONSTEXPR_FUNC To operator()(const From v)
+    {
+        array_get<From, typename vector_traits<From>::scalar_type> getter;
+        array_set<To, typename vector_traits<To>::scalar_type> setter;
+        To output;
+        [[unroll]]
+        for (int i = 0; i < vector_traits<To>::Dimension; ++i)
+            setter(output, i, getter(v, i));
+        return output;
+    }
+
+};
+
+} //namespace impl
+
+template<typename T, typename U>
+NBL_CONSTEXPR_FUNC T truncate(NBL_CONST_REF_ARG(U) v)
+{
+    impl::Truncate<T, U> _truncate;
+    return _truncate(v);
+}
+
+}
+}
+
+#endif
\ No newline at end of file
diff --git a/include/nbl/builtin/hlsl/emulated/float64_t.hlsl b/include/nbl/builtin/hlsl/emulated/float64_t.hlsl
index 9872675e3a..da32fab7b0 100644
--- a/include/nbl/builtin/hlsl/emulated/float64_t.hlsl
+++ b/include/nbl/builtin/hlsl/emulated/float64_t.hlsl
@@ -471,25 +471,25 @@ inline int extractExponent(__VA_ARGS__ x)\
 }\
 \
 template<>\
-NBL_CONSTEXPR_INLINE_FUNC __VA_ARGS__ replaceBiasedExponent(__VA_ARGS__ x, typename unsigned_integer_of_size<sizeof(__VA_ARGS__)>::type biasedExp)\
+NBL_CONSTEXPR_FUNC __VA_ARGS__ replaceBiasedExponent(__VA_ARGS__ x, typename unsigned_integer_of_size<sizeof(__VA_ARGS__)>::type biasedExp)\
 {\
     return __VA_ARGS__(replaceBiasedExponent(x.data, biasedExp));\
 }\
 \
 template <>\
-NBL_CONSTEXPR_INLINE_FUNC __VA_ARGS__ fastMulExp2(__VA_ARGS__ x, int n)\
+NBL_CONSTEXPR_FUNC __VA_ARGS__ fastMulExp2(__VA_ARGS__ x, int n)\
 {\
     return __VA_ARGS__(replaceBiasedExponent(x.data, extractBiasedExponent(x) + uint32_t(n)));\
 }\
 \
 template <>\
-NBL_CONSTEXPR_INLINE_FUNC unsigned_integer_of_size<sizeof(__VA_ARGS__)>::type extractMantissa(__VA_ARGS__ x)\
+NBL_CONSTEXPR_FUNC unsigned_integer_of_size<sizeof(__VA_ARGS__)>::type extractMantissa(__VA_ARGS__ x)\
 {\
     return extractMantissa(x.data);\
 }\
 \
 template <>\
-NBL_CONSTEXPR_INLINE_FUNC uint64_t extractNormalizeMantissa(__VA_ARGS__ x)\
+NBL_CONSTEXPR_FUNC uint64_t extractNormalizeMantissa(__VA_ARGS__ x)\
 {\
     return extractNormalizeMantissa(x.data);\
 }\
@@ -636,10 +636,10 @@ namespace ieee754
 {
 namespace impl
 {
-template<> NBL_CONSTEXPR_INLINE_FUNC uint64_t bitCastToUintType(emulated_float64_t<true, true> x) { return x.data; }
-template<> NBL_CONSTEXPR_INLINE_FUNC uint64_t bitCastToUintType(emulated_float64_t<false, false> x) { return x.data; }
-template<> NBL_CONSTEXPR_INLINE_FUNC uint64_t bitCastToUintType(emulated_float64_t<true, false> x) { return x.data; }
-template<> NBL_CONSTEXPR_INLINE_FUNC uint64_t bitCastToUintType(emulated_float64_t<false, true> x) { return x.data; }
+template<> NBL_CONSTEXPR_FUNC uint64_t bitCastToUintType(emulated_float64_t<true, true> x) { return x.data; }
+template<> NBL_CONSTEXPR_FUNC uint64_t bitCastToUintType(emulated_float64_t<false, false> x) { return x.data; }
+template<> NBL_CONSTEXPR_FUNC uint64_t bitCastToUintType(emulated_float64_t<true, false> x) { return x.data; }
+template<> NBL_CONSTEXPR_FUNC uint64_t bitCastToUintType(emulated_float64_t<false, true> x) { return x.data; }
 }
 
 IMPLEMENT_IEEE754_FUNC_SPEC_FOR_EMULATED_F64_TYPE(emulated_float64_t<true, true>);
diff --git a/include/nbl/builtin/hlsl/emulated/float64_t_impl.hlsl b/include/nbl/builtin/hlsl/emulated/float64_t_impl.hlsl
index 44b881345d..df785e3e8f 100644
--- a/include/nbl/builtin/hlsl/emulated/float64_t_impl.hlsl
+++ b/include/nbl/builtin/hlsl/emulated/float64_t_impl.hlsl
@@ -41,7 +41,7 @@ namespace hlsl
 {
 namespace emulated_float64_t_impl
 {
-NBL_CONSTEXPR_INLINE_FUNC uint64_t2 shiftMantissaLeftBy53(uint64_t mantissa64)
+NBL_CONSTEXPR_FUNC uint64_t2 shiftMantissaLeftBy53(uint64_t mantissa64)
 {
     uint64_t2 output;
     output.x = mantissa64 >> (64 - ieee754::traits<float64_t>::mantissaBitCnt);
@@ -74,7 +74,7 @@ inline uint64_t castFloat32ToStorageType(float32_t val)
     }
 };
 
-NBL_CONSTEXPR_INLINE_FUNC bool isZero(uint64_t val)
+NBL_CONSTEXPR_FUNC bool isZero(uint64_t val)
 {
     return (val << 1) == 0ull;
 }
@@ -137,18 +137,18 @@ inline uint64_t reinterpretAsFloat64BitPattern<int64_t>(int64_t val)
     return sign | reinterpretAsFloat64BitPattern(absVal);
 };
 
-NBL_CONSTEXPR_INLINE_FUNC uint64_t flushDenormToZero(uint64_t value)
+NBL_CONSTEXPR_FUNC uint64_t flushDenormToZero(uint64_t value)
 {
     const uint64_t biasBits = value & ieee754::traits<float64_t>::exponentMask;
     return biasBits ? value : (value & ieee754::traits<float64_t>::signMask);
 }
 
-NBL_CONSTEXPR_INLINE_FUNC uint64_t assembleFloat64(uint64_t signShifted, uint64_t expShifted, uint64_t mantissa)
+NBL_CONSTEXPR_FUNC uint64_t assembleFloat64(uint64_t signShifted, uint64_t expShifted, uint64_t mantissa)
 {
     return  signShifted | expShifted | mantissa;
 }
 
-NBL_CONSTEXPR_INLINE_FUNC bool areBothInfinity(uint64_t lhs, uint64_t rhs)
+NBL_CONSTEXPR_FUNC bool areBothInfinity(uint64_t lhs, uint64_t rhs)
 {
     lhs &= ~ieee754::traits<float64_t>::signMask;
     rhs &= ~ieee754::traits<float64_t>::signMask;
@@ -156,18 +156,18 @@ NBL_CONSTEXPR_INLINE_FUNC bool areBothInfinity(uint64_t lhs, uint64_t rhs)
     return lhs == rhs && lhs == ieee754::traits<float64_t>::inf;
 }
 
-NBL_CONSTEXPR_INLINE_FUNC bool areBothZero(uint64_t lhs, uint64_t rhs)
+NBL_CONSTEXPR_FUNC bool areBothZero(uint64_t lhs, uint64_t rhs)
 {
     return !bool((lhs | rhs) << 1);
 }
 
-NBL_CONSTEXPR_INLINE_FUNC bool areBothSameSignZero(uint64_t lhs, uint64_t rhs)
+NBL_CONSTEXPR_FUNC bool areBothSameSignZero(uint64_t lhs, uint64_t rhs)
 {
     return !bool((lhs) << 1) && (lhs == rhs);
 }
 
 template<bool FastMath, typename Op>
-NBL_CONSTEXPR_INLINE_FUNC bool operatorLessAndGreaterCommonImplementation(uint64_t lhs, uint64_t rhs)
+NBL_CONSTEXPR_FUNC bool operatorLessAndGreaterCommonImplementation(uint64_t lhs, uint64_t rhs)
 {
     if (!FastMath)
     {
diff --git a/include/nbl/builtin/hlsl/emulated/int64_common_member_inc.hlsl b/include/nbl/builtin/hlsl/emulated/int64_common_member_inc.hlsl
new file mode 100644
index 0000000000..3818814a49
--- /dev/null
+++ b/include/nbl/builtin/hlsl/emulated/int64_common_member_inc.hlsl
@@ -0,0 +1,155 @@
+using storage_t = vector<uint32_t, 2>;
+storage_t data;
+
+/**
+* @brief Creates an `emulated_int64` from a vector of two `uint32_t`s representing its bitpattern
+*
+* @param [in] _data Vector of `uint32_t` encoding the `uint64_t/int64_t` being emulated. Stored as little endian (first component are the lower 32 bits)
+*/
+NBL_CONSTEXPR_STATIC this_t create(NBL_CONST_REF_ARG(storage_t) _data)
+{
+	this_t retVal;
+	retVal.data = _data;
+	return retVal;
+}
+
+/**
+* @brief Creates an `emulated_int64` from two `uint32_t`s representing its bitpattern
+*
+* @param [in] lo Lowest 32 bits of the `uint64_t/int64_t` being emulated
+* @param [in] hi Highest 32 bits of the `uint64_t/int64_t` being emulated
+*/
+NBL_CONSTEXPR_STATIC this_t create(NBL_CONST_REF_ARG(uint32_t) lo, NBL_CONST_REF_ARG(uint32_t) hi)
+{
+	return create(storage_t(lo, hi));
+}
+
+// ------------------------------------------------------- CONVERSION OPERATORS---------------------------------------------------------------
+// GLM requires these for vector casts
+
+#ifndef __HLSL_VERSION
+
+template<concepts::IntegralScalar I>
+constexpr explicit operator I() const noexcept;
+
+#endif
+
+// ------------------------------------------------------- INTERNAL GETTERS -------------------------------------------------
+
+NBL_CONSTEXPR_FUNC uint32_t __getLSB() NBL_CONST_MEMBER_FUNC
+{
+	return data.x;
+}
+
+NBL_CONSTEXPR_FUNC uint32_t __getMSB() NBL_CONST_MEMBER_FUNC
+{
+	return data.y;
+}
+
+// ------------------------------------------------------- BITWISE OPERATORS -------------------------------------------------
+
+NBL_CONSTEXPR_FUNC this_t operator&(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC
+{
+	this_t retVal = create(data & rhs.data);
+	return retVal;
+}
+
+NBL_CONSTEXPR_FUNC this_t operator|(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC
+{
+	this_t retVal = create(data | rhs.data);
+	return retVal;
+}
+
+NBL_CONSTEXPR_FUNC this_t operator^(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC
+{
+	this_t retVal = create(data ^ rhs.data);
+	return retVal;
+}
+
+NBL_CONSTEXPR_FUNC this_t operator~() NBL_CONST_MEMBER_FUNC
+{
+	this_t retVal = create(~data);
+	return retVal;
+}
+
+// Only valid in CPP
+#ifndef __HLSL_VERSION
+constexpr inline this_t operator>>(uint32_t bits) const;
+
+constexpr inline this_t operator<<(uint32_t bits) const;
+
+constexpr inline this_t& operator&=(const this_t& val)
+{
+  data &= val.data;
+  return *this;
+}
+
+constexpr inline this_t& operator|=(const this_t& val)
+{
+  data |= val.data;
+  return *this;
+}
+
+constexpr inline this_t& operator^=(const this_t& val)
+{
+  data ^= val.data;
+  return *this;
+}
+
+#endif
+
+// ------------------------------------------------------- ARITHMETIC OPERATORS -------------------------------------------------
+
+NBL_CONSTEXPR_FUNC this_t operator+(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC
+{
+	const spirv::AddCarryOutput<uint32_t> lowerAddResult = addCarry(__getLSB(), rhs.__getLSB());
+	return create(lowerAddResult.result, __getMSB() + rhs.__getMSB() + lowerAddResult.carry);
+}
+
+NBL_CONSTEXPR_FUNC this_t operator-(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC
+{
+	const spirv::SubBorrowOutput<uint32_t> lowerSubResult = subBorrow(__getLSB(), rhs.__getLSB());
+	return create(lowerSubResult.result, __getMSB() - rhs.__getMSB() - lowerSubResult.borrow);
+}
+
+// ------------------------------------------------------- COMPARISON OPERATORS -------------------------------------------------
+NBL_CONSTEXPR_FUNC bool operator==(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC
+{
+	equal_to<storage_t> equals;
+	return all(equals(data, rhs.data));
+}
+
+NBL_CONSTEXPR_FUNC bool operator!=(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC
+{
+	not_equal_to<storage_t> notEquals;
+	return any(notEquals(data, rhs.data));
+}
+
+NBL_CONSTEXPR_FUNC bool operator<(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC
+{
+	// Either the topmost bits, when interpreted with correct sign, are less than those of `rhs`, or they're equal and the lower bits are less
+	// (lower bits are always positive in both unsigned and 2's complement so comparison can happen as-is)
+	const bool MSBEqual = __getMSB() == rhs.__getMSB();
+	const bool MSB = Signed ? (bit_cast<int32_t>(__getMSB()) < bit_cast<int32_t>(rhs.__getMSB())) : (__getMSB() < rhs.__getMSB());
+	const bool LSB = __getLSB() < rhs.__getLSB();
+	return MSBEqual ? LSB : MSB;
+}
+
+NBL_CONSTEXPR_FUNC bool operator>(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC
+{
+	// Same reasoning as above
+	const bool MSBEqual = __getMSB() == rhs.__getMSB();
+	const bool MSB = Signed ? (bit_cast<int32_t>(__getMSB()) > bit_cast<int32_t>(rhs.__getMSB())) : (__getMSB() > rhs.__getMSB());
+	const bool LSB = __getLSB() > rhs.__getLSB();
+	return MSBEqual ? LSB : MSB;
+}
+
+NBL_CONSTEXPR_FUNC bool operator<=(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC
+{
+	return !operator>(rhs);
+}
+
+NBL_CONSTEXPR_FUNC bool operator>=(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC
+{
+	return !operator<(rhs);
+}
diff --git a/include/nbl/builtin/hlsl/emulated/int64_t.hlsl b/include/nbl/builtin/hlsl/emulated/int64_t.hlsl
new file mode 100644
index 0000000000..4fa2014607
--- /dev/null
+++ b/include/nbl/builtin/hlsl/emulated/int64_t.hlsl
@@ -0,0 +1,409 @@
+#ifndef _NBL_BUILTIN_HLSL_EMULATED_INT64_T_HLSL_INCLUDED_
+#define _NBL_BUILTIN_HLSL_EMULATED_INT64_T_HLSL_INCLUDED_
+
+#include "nbl/builtin/hlsl/cpp_compat.hlsl"
+#include "nbl/builtin/hlsl/functional.hlsl"
+#include "nbl/builtin/hlsl/concepts/core.hlsl"
+#include "nbl/builtin/hlsl/bit.hlsl"
+
+// Didn't bother with operator*, operator/, implement if you need them. Multiplication is pretty straightforward, division requires switching on signs 
+// and whether the topmost bits of the divisor are equal to 0
+// - Francisco
+
+namespace nbl 
+{
+namespace hlsl
+{
+
+struct emulated_int64_t;
+
+struct emulated_uint64_t
+{
+    using this_t = emulated_uint64_t;
+    NBL_CONSTEXPR_STATIC_INLINE bool Signed = false;
+
+    #include "int64_common_member_inc.hlsl" 
+
+    #ifndef __HLSL_VERSION
+    emulated_uint64_t() = default;
+    // GLM requires these to cast vectors because it uses a native `static_cast`
+    template<concepts::IntegralScalar I>
+    constexpr explicit emulated_uint64_t(const I& toEmulate);
+
+    constexpr explicit emulated_uint64_t(const emulated_int64_t& other);
+    #endif
+};
+
+
+struct emulated_int64_t
+{
+    using this_t = emulated_int64_t;
+    NBL_CONSTEXPR_STATIC_INLINE bool Signed = true;
+    
+    #include "int64_common_member_inc.hlsl"
+    
+    #ifndef __HLSL_VERSION
+    emulated_int64_t() = default;
+    // GLM requires these to cast vectors because it uses a native `static_cast`
+    template<concepts::IntegralScalar I>
+    constexpr explicit emulated_int64_t(const I& toEmulate);
+
+    constexpr explicit emulated_int64_t(const emulated_uint64_t& other);
+    #endif
+
+    NBL_CONSTEXPR_FUNC emulated_int64_t operator-() NBL_CONST_MEMBER_FUNC
+    {
+        storage_t inverted = ~data;
+        return create(_static_cast<storage_t>(inverted)) + _static_cast<this_t>(1);
+    }
+
+};
+
+// ------------------------------------------------ TYPE TRAITS SATISFIED -----------------------------------------------------
+
+template<>
+struct is_signed<emulated_int64_t> : bool_constant<true> {};
+
+template<>
+struct is_unsigned<emulated_uint64_t> : bool_constant<true> {};
+
+// --------------------------------------------------- CONCEPTS SATISFIED -----------------------------------------------------
+namespace concepts
+{
+
+template <typename T>
+NBL_BOOL_CONCEPT EmulatedIntegralScalar64 = same_as<T, emulated_uint64_t> || same_as<T, emulated_int64_t>;
+  
+namespace impl
+{
+
+template<>
+struct is_emulating_integral_scalar<emulated_uint64_t>
+{
+    NBL_CONSTEXPR_STATIC_INLINE bool value = true;
+};
+
+template<>
+struct is_emulating_integral_scalar<emulated_int64_t>
+{
+    NBL_CONSTEXPR_STATIC_INLINE bool value = true;
+};
+}
+
+
+}
+
+
+namespace impl
+{
+
+template<typename To, typename From> NBL_PARTIAL_REQ_TOP(concepts::EmulatedIntegralScalar64<To> && concepts::EmulatedIntegralScalar64<From> && !concepts::same_as<To, From>)
+struct static_cast_helper<To, From NBL_PARTIAL_REQ_BOT(concepts::EmulatedIntegralScalar64<To> && concepts::EmulatedIntegralScalar64<From> && !concepts::same_as<To, From>) >
+{
+
+    NBL_CONSTEXPR_STATIC To cast(NBL_CONST_REF_ARG(From) other)
+    {
+        To retVal;
+        retVal.data = other.data;
+        return retVal;
+    }
+};
+
+template<typename To, typename From> NBL_PARTIAL_REQ_TOP(concepts::IntegralScalar<To> && (sizeof(To) <= sizeof(uint32_t)) && concepts::EmulatedIntegralScalar64<From>)
+struct static_cast_helper<To, From NBL_PARTIAL_REQ_BOT(concepts::IntegralScalar<To> && (sizeof(To) <= sizeof(uint32_t)) && concepts::EmulatedIntegralScalar64<From>) >
+{
+    // Return only the lowest bits
+    NBL_CONSTEXPR_STATIC To cast(NBL_CONST_REF_ARG(From) val)
+    {
+        return _static_cast<To>(val.data.x);
+    }
+};
+
+template<typename To, typename From> NBL_PARTIAL_REQ_TOP(concepts::IntegralScalar<To> && (sizeof(To) > sizeof(uint32_t)) && concepts::EmulatedIntegralScalar64<From>)
+struct static_cast_helper<To, From NBL_PARTIAL_REQ_BOT(concepts::IntegralScalar<To> && (sizeof(To) > sizeof(uint32_t)) && concepts::EmulatedIntegralScalar64<From>) >
+{
+    NBL_CONSTEXPR_STATIC To cast(NBL_CONST_REF_ARG(From) val)
+    {
+        return bit_cast<To>(val.data);
+    }
+};
+
+template<typename To, typename From> NBL_PARTIAL_REQ_TOP(concepts::IntegralScalar<From> && (sizeof(From) <= sizeof(uint32_t)) && concepts::EmulatedIntegralScalar64<To>)
+struct static_cast_helper<To, From NBL_PARTIAL_REQ_BOT(concepts::IntegralScalar<From> && (sizeof(From) <= sizeof(uint32_t)) && concepts::EmulatedIntegralScalar64<To>) >
+{
+    // Set only lower bits
+    NBL_CONSTEXPR_STATIC To cast(NBL_CONST_REF_ARG(From) i)
+    {
+        return To::create(_static_cast<uint32_t>(i), uint32_t(0));
+    }
+};
+
+template<typename To, typename From> NBL_PARTIAL_REQ_TOP(concepts::IntegralScalar<From> && (sizeof(From) > sizeof(uint32_t)) && concepts::EmulatedIntegralScalar64<To>)
+struct static_cast_helper<To, From NBL_PARTIAL_REQ_BOT(concepts::IntegralScalar<From> && (sizeof(From) > sizeof(uint32_t)) && concepts::EmulatedIntegralScalar64<To>) >
+{
+    NBL_CONSTEXPR_STATIC To cast(NBL_CONST_REF_ARG(From) i)
+    {
+        // `bit_cast` blocked by GLM vectors using a union
+        #ifndef __HLSL_VERSION
+        return To::create(_static_cast<uint32_t>(i), _static_cast<uint32_t>(i >> 32));
+        #else
+        To retVal;
+        retVal.data = bit_cast<vector<uint32_t, 2> >(i);
+        return retVal;
+        #endif 
+    }
+};
+
+} //namespace impl
+
+// Define constructor and conversion operators
+
+#ifndef __HLSL_VERSION
+
+constexpr emulated_int64_t::emulated_int64_t(const emulated_uint64_t& other) : data(other.data) {}
+
+constexpr emulated_uint64_t::emulated_uint64_t(const emulated_int64_t& other) : data(other.data) {}
+
+template<concepts::IntegralScalar I>
+constexpr emulated_int64_t::emulated_int64_t(const I& toEmulate)
+{
+    *this = _static_cast<emulated_int64_t>(toEmulate);
+}
+
+template<concepts::IntegralScalar I>
+constexpr emulated_uint64_t::emulated_uint64_t(const I& toEmulate)
+{
+    *this = _static_cast<emulated_uint64_t>(toEmulate);
+}
+
+template<concepts::IntegralScalar I>
+constexpr emulated_int64_t::operator I() const noexcept
+{
+    return _static_cast<I>(*this);
+}
+
+template<concepts::IntegralScalar I>
+constexpr emulated_uint64_t::operator I() const noexcept
+{
+    return _static_cast<I>(*this);
+}
+
+#endif
+
+// ---------------------- Functional operators ------------------------
+
+template<typename T> NBL_PARTIAL_REQ_TOP(concepts::EmulatedIntegralScalar64<T>)
+struct left_shift_operator<T NBL_PARTIAL_REQ_BOT(concepts::EmulatedIntegralScalar64<T>) >
+{
+    using type_t = T;
+    NBL_CONSTEXPR_STATIC uint32_t ComponentBitWidth = uint32_t(8 * sizeof(uint32_t));
+
+    // Can't do generic templated definition, see:
+    //https://github.com/microsoft/DirectXShaderCompiler/issues/7325
+    
+    // If `_bits > 63` or `_bits < 0` the result is undefined
+    NBL_CONSTEXPR_FUNC type_t operator()(NBL_CONST_REF_ARG(type_t) operand, uint32_t bits)
+    {
+        const bool bigShift = bits >= ComponentBitWidth; // Shift that completely rewrites LSB
+        const uint32_t shift = bigShift ? bits - ComponentBitWidth : ComponentBitWidth - bits;
+        const type_t shifted = type_t::create(bigShift ? vector<uint32_t, 2>(0, operand.__getLSB() << shift)
+                                                       : vector<uint32_t, 2>(operand.__getLSB() << bits, (operand.__getMSB() << bits) | (operand.__getLSB() >> shift)));
+        return select<type_t, bool>(bool(bits), shifted, operand);
+    }
+
+    // If `_bits > 63` or `_bits < 0` the result is undefined
+    NBL_CONSTEXPR_FUNC type_t operator()(NBL_CONST_REF_ARG(type_t) operand, type_t bits)
+    {
+        return operator()(operand, _static_cast<uint32_t>(bits));
+    }
+};
+
+template<>
+struct arithmetic_right_shift_operator<emulated_uint64_t>
+{
+    using type_t = emulated_uint64_t;
+    NBL_CONSTEXPR_STATIC uint32_t ComponentBitWidth = uint32_t(8 * sizeof(uint32_t));
+
+    // Can't do generic templated definition, see:
+    //https://github.com/microsoft/DirectXShaderCompiler/issues/7325
+
+    // If `_bits > 63` the result is undefined
+    NBL_CONSTEXPR_FUNC type_t operator()(NBL_CONST_REF_ARG(type_t) operand, uint32_t bits)
+    {
+        const bool bigShift = bits >= ComponentBitWidth; // Shift that completely rewrites MSB
+        const uint32_t shift = bigShift ? bits - ComponentBitWidth : ComponentBitWidth - bits;
+        const type_t shifted = type_t::create(bigShift ? vector<uint32_t, 2>(operand.__getMSB() >> shift, 0)
+                                                       : vector<uint32_t, 2>((operand.__getMSB() << shift) | (operand.__getLSB() >> bits), operand.__getMSB() >> bits));
+        
+        return select<type_t, bool>(bool(bits), shifted, operand);
+    }
+
+    // If `_bits > 63` the result is undefined
+    NBL_CONSTEXPR_FUNC type_t operator()(NBL_CONST_REF_ARG(type_t) operand, type_t bits)
+    {
+        return operator()(operand, _static_cast<uint32_t>(bits));
+    }
+};
+
+template<>
+struct arithmetic_right_shift_operator<emulated_int64_t>
+{
+    using type_t = emulated_int64_t;
+    NBL_CONSTEXPR_STATIC uint32_t ComponentBitWidth = uint32_t(8 * sizeof(uint32_t));
+
+    // Can't do generic templated definition, see:
+    //https://github.com/microsoft/DirectXShaderCompiler/issues/7325
+
+    // If `_bits > 63` or `_bits < 0` the result is undefined
+    NBL_CONSTEXPR_FUNC type_t operator()(NBL_CONST_REF_ARG(type_t) operand, uint32_t bits)
+    {
+        const bool bigShift = bits >= ComponentBitWidth; // Shift that completely rewrites MSB
+        const uint32_t shift = bigShift ? bits - ComponentBitWidth : ComponentBitWidth - bits;
+        const type_t shifted = type_t::create(bigShift ? vector<uint32_t, 2>(uint32_t(int32_t(operand.__getMSB()) >> shift), int32_t(operand.__getMSB()) < 0 ? ~uint32_t(0) : uint32_t(0))
+                                                                        : vector<uint32_t, 2>((operand.__getMSB() << shift) | (operand.__getLSB() >> bits), uint32_t(int32_t(operand.__getMSB()) >> bits)));
+        return select<type_t, bool>(bool(bits), shifted, operand);
+    }
+
+    // If `_bits > 63` or `_bits < 0` the result is undefined
+    NBL_CONSTEXPR_FUNC type_t operator()(NBL_CONST_REF_ARG(type_t) operand, type_t bits)
+    {
+        return operator()(operand, _static_cast<uint32_t>(bits));
+    }
+};
+
+#ifndef __HLSL_VERSION
+
+constexpr inline emulated_int64_t emulated_int64_t::operator<<(uint32_t bits) const
+{
+    left_shift_operator<emulated_int64_t> leftShift;
+    return leftShift(*this, bits);
+}
+
+constexpr inline emulated_uint64_t emulated_uint64_t::operator<<(uint32_t bits) const
+{
+    left_shift_operator<emulated_uint64_t> leftShift;
+    return leftShift(*this, bits);
+}
+
+constexpr inline emulated_uint64_t emulated_uint64_t::operator>>(uint32_t bits) const
+{
+    arithmetic_right_shift_operator<emulated_uint64_t> rightShift;
+    return rightShift(*this, bits);
+}
+
+constexpr inline emulated_int64_t emulated_int64_t::operator>>(uint32_t bits) const
+{
+    arithmetic_right_shift_operator<emulated_int64_t> rightShift;
+    return rightShift(*this, bits);
+}
+
+#endif
+
+
+// ---------------------- STD arithmetic operators ------------------------
+// Specializations of the structs found in functional.hlsl
+// These all have to be specialized because of the identity that can't be initialized inside the struct definition
+
+template<typename T> NBL_PARTIAL_REQ_TOP(concepts::EmulatedIntegralScalar64<T>)
+struct plus<T NBL_PARTIAL_REQ_BOT(concepts::EmulatedIntegralScalar64<T>) >
+{
+    using type_t = T;
+
+    type_t operator()(NBL_CONST_REF_ARG(type_t) lhs, NBL_CONST_REF_ARG(type_t) rhs)
+    {
+        return lhs + rhs;
+    }
+
+    const static type_t identity;
+};
+
+template<typename T> NBL_PARTIAL_REQ_TOP(concepts::EmulatedIntegralScalar64<T>)
+struct minus<T NBL_PARTIAL_REQ_BOT(concepts::EmulatedIntegralScalar64<T>) >
+{
+    using type_t = T;
+
+    type_t operator()(NBL_CONST_REF_ARG(type_t) lhs, NBL_CONST_REF_ARG(type_t) rhs)
+    {
+        return lhs - rhs;
+    }
+
+    const static type_t identity;
+};
+
+template<>
+NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR emulated_uint64_t plus<emulated_uint64_t>::identity = _static_cast<emulated_uint64_t>(uint64_t(0));
+template<>
+NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR emulated_int64_t plus<emulated_int64_t>::identity = _static_cast<emulated_int64_t>(int64_t(0));
+template<>
+NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR emulated_uint64_t minus<emulated_uint64_t>::identity = _static_cast<emulated_uint64_t>(uint64_t(0));
+template<>
+NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR emulated_int64_t minus<emulated_int64_t>::identity = _static_cast<emulated_int64_t>(int64_t(0));
+
+// --------------------------------- Compound assignment operators ------------------------------------------
+// Specializations of the structs found in functional.hlsl
+
+template<typename T> NBL_PARTIAL_REQ_TOP(concepts::EmulatedIntegralScalar64<T>)
+struct plus_assign<T NBL_PARTIAL_REQ_BOT(concepts::EmulatedIntegralScalar64<T>) >
+{
+    using type_t = T;
+    using base_t = plus<type_t>;
+    base_t baseOp;
+    void operator()(NBL_REF_ARG(type_t) lhs, NBL_CONST_REF_ARG(type_t) rhs)
+    {
+        lhs = baseOp(lhs, rhs);
+    }
+
+    const static type_t identity;
+};
+
+template<typename T> NBL_PARTIAL_REQ_TOP(concepts::EmulatedIntegralScalar64<T>)
+struct minus_assign<T NBL_PARTIAL_REQ_BOT(concepts::EmulatedIntegralScalar64<T>) >
+{
+    using type_t = T;
+    using base_t = minus<type_t>;
+    base_t baseOp;
+    void operator()(NBL_REF_ARG(type_t) lhs, NBL_CONST_REF_ARG(type_t) rhs)
+    {
+        lhs = baseOp(lhs, rhs);
+    }
+
+    const static type_t identity;
+};
+
+template<>
+NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR emulated_uint64_t plus_assign<emulated_uint64_t>::identity = plus<emulated_uint64_t>::identity;
+template<>
+NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR emulated_int64_t plus_assign<emulated_int64_t>::identity = plus<emulated_int64_t>::identity;
+template<>
+NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR emulated_uint64_t minus_assign<emulated_uint64_t>::identity = minus<emulated_uint64_t>::identity;
+template<>
+NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR emulated_int64_t minus_assign<emulated_int64_t>::identity = minus<emulated_int64_t>::identity;
+
+} //namespace nbl
+} //namespace hlsl
+
+// Declare them as signed/unsigned versions of each other
+
+#ifndef __HLSL_VERSION
+#define NBL_ADD_STD std::
+#else 
+#define NBL_ADD_STD nbl::hlsl:: 
+#endif
+
+template<>
+struct NBL_ADD_STD make_unsigned<nbl::hlsl::emulated_uint64_t> : type_identity<nbl::hlsl::emulated_uint64_t> {};
+
+template<>
+struct NBL_ADD_STD make_unsigned<nbl::hlsl::emulated_int64_t> : type_identity<nbl::hlsl::emulated_uint64_t> {};
+
+template<>
+struct NBL_ADD_STD make_signed<nbl::hlsl::emulated_uint64_t> : type_identity<nbl::hlsl::emulated_int64_t> {};
+
+template<>
+struct NBL_ADD_STD make_signed<nbl::hlsl::emulated_int64_t> : type_identity<nbl::hlsl::emulated_int64_t> {};
+
+#undef NBL_ADD_STD
+
+
+
+#endif
diff --git a/include/nbl/builtin/hlsl/emulated/vector_t.hlsl b/include/nbl/builtin/hlsl/emulated/vector_t.hlsl
index 0053008aa4..25b033c30e 100644
--- a/include/nbl/builtin/hlsl/emulated/vector_t.hlsl
+++ b/include/nbl/builtin/hlsl/emulated/vector_t.hlsl
@@ -2,6 +2,7 @@
 #define _NBL_BUILTIN_HLSL_EMULATED_VECTOR_T_HLSL_INCLUDED_
 
 #include <nbl/builtin/hlsl/portable/float64_t.hlsl>
+#include <nbl/builtin/hlsl/portable/int64_t.hlsl>
 #include <nbl/builtin/hlsl/functional.hlsl>
 #include <nbl/builtin/hlsl/array_accessors.hlsl>
 #include <nbl/builtin/hlsl/vector_utils/vector_traits.hlsl>
@@ -23,7 +24,7 @@ struct _2_component_vec
 
     static_assert(sizeof(T) <= 8);
 
-    NBL_CONSTEXPR_INLINE_FUNC void setComponent(uint32_t componentIdx, T val)
+    NBL_CONSTEXPR_FUNC void setComponent(uint32_t componentIdx, T val)
     {
         if (componentIdx == 0)
             x = val;
@@ -31,7 +32,7 @@ struct _2_component_vec
             y = val;
     }
 
-    NBL_CONSTEXPR_INLINE_FUNC T getComponent(uint32_t componentIdx) NBL_CONST_MEMBER_FUNC
+    NBL_CONSTEXPR_FUNC T getComponent(uint32_t componentIdx) NBL_CONST_MEMBER_FUNC
     {
         if (componentIdx == 0)
             return x;
@@ -39,9 +40,7 @@ struct _2_component_vec
             return y;
 
         // TODO: avoid code duplication, make it constexpr
-        using TAsUint = typename unsigned_integer_of_size<sizeof(T)>::type;
-        TAsUint invalidComponentValue = nbl::hlsl::_static_cast<TAsUint>(0xdeadbeefbadcaffeull);
-        return nbl::hlsl::bit_cast<T>(invalidComponentValue);
+        return nbl::hlsl::undef<T>();
     }
 
     NBL_CONSTEXPR_STATIC uint32_t Dimension = 2;
@@ -55,7 +54,7 @@ struct _3_component_vec
     T z;
 
 
-    NBL_CONSTEXPR_INLINE_FUNC void setComponent(uint32_t componentIdx, T val)
+    NBL_CONSTEXPR_FUNC void setComponent(uint32_t componentIdx, T val)
     {
         if (componentIdx == 0)
             x = val;
@@ -65,7 +64,7 @@ struct _3_component_vec
             z = val;
     }
 
-    NBL_CONSTEXPR_INLINE_FUNC T getComponent(uint32_t componentIdx) NBL_CONST_MEMBER_FUNC
+    NBL_CONSTEXPR_FUNC T getComponent(uint32_t componentIdx) NBL_CONST_MEMBER_FUNC
     {
         if (componentIdx == 0)
             return x;
@@ -75,9 +74,7 @@ struct _3_component_vec
             return z;
 
         // TODO: avoid code duplication, make it constexpr
-        using TAsUint = typename unsigned_integer_of_size<sizeof(T)>::type;
-        TAsUint invalidComponentValue = nbl::hlsl::_static_cast<TAsUint>(0xdeadbeefbadcaffeull >> (64 - sizeof(T) * 8));
-        return nbl::hlsl::bit_cast<T>(invalidComponentValue);
+        return nbl::hlsl::undef<T>();
     }
 
     NBL_CONSTEXPR_STATIC uint32_t Dimension = 3;
@@ -91,7 +88,7 @@ struct _4_component_vec
     T z;
     T w;
 
-    NBL_CONSTEXPR_INLINE_FUNC void setComponent(uint32_t componentIdx, T val)
+    NBL_CONSTEXPR_FUNC void setComponent(uint32_t componentIdx, T val)
     {
         if (componentIdx == 0)
             x = val;
@@ -103,7 +100,7 @@ struct _4_component_vec
             w = val;
     }
 
-    NBL_CONSTEXPR_INLINE_FUNC T getComponent(uint32_t componentIdx) NBL_CONST_MEMBER_FUNC
+    NBL_CONSTEXPR_FUNC T getComponent(uint32_t componentIdx) NBL_CONST_MEMBER_FUNC
     {
         if (componentIdx == 0)
             return x;
@@ -115,133 +112,207 @@ struct _4_component_vec
             return w;
 
         // TODO: avoid code duplication, make it constexpr
-        using TAsUint = typename unsigned_integer_of_size<sizeof(T)>::type;
-        uint64_t invalidComponentValue = nbl::hlsl::_static_cast<TAsUint>(0xdeadbeefbadcaffeull >> (64 - sizeof(T) * 8));
-        return nbl::hlsl::bit_cast<T>(invalidComponentValue);
+        return nbl::hlsl::undef<T>();
     }
 
     NBL_CONSTEXPR_STATIC uint32_t Dimension = 4;
 };
 
-template <typename ComponentType, typename CRTP, bool IsComponentTypeFundamental = is_fundamental<ComponentType>::value>
-struct emulated_vector : CRTP
-{
-    using this_t = emulated_vector<ComponentType, CRTP>;
-    using component_t = ComponentType;
-
-    NBL_CONSTEXPR_STATIC_INLINE this_t create(this_t other)
-    {
-        CRTP output;
-
-        for (uint32_t i = 0u; i < CRTP::Dimension; ++i)
-            output.setComponent(i, other.getComponent(i));
-    }
-    NBL_CONSTEXPR_STATIC_INLINE this_t create(vector<component_t, CRTP::Dimension> other)
-    {
-        this_t output;
-
-        for (uint32_t i = 0u; i < CRTP::Dimension; ++i)
-            output.setComponent(i, other[i]);
-
-        return output;
-    }
-
-    NBL_CONSTEXPR_INLINE_FUNC this_t operator+(component_t val)
-    {
-        this_t output;
-
-        for (uint32_t i = 0u; i < CRTP::Dimension; ++i)
-            output.setComponent(i, this_t::getComponent(i) + val);
-
-        return output;
-    }
-    NBL_CONSTEXPR_INLINE_FUNC this_t operator+(this_t other)
-    {
-        this_t output;
-
-        for (uint32_t i = 0u; i < CRTP::Dimension; ++i)
-            output.setComponent(i, this_t::getComponent(i) + other.getComponent(i));
-
-        return output;
-    }
-    NBL_CONSTEXPR_INLINE_FUNC this_t operator+(vector<component_t, CRTP::Dimension> other)
-    {
-        this_t output;
-
-        for (uint32_t i = 0u; i < CRTP::Dimension; ++i)
-            output.setComponent(i, this_t::getComponent(i) + other[i]);
+template <typename ComponentType, typename CRTP NBL_STRUCT_CONSTRAINABLE >
+struct emulated_vector;
 
-        return output;
-    }
-    
-    NBL_CONSTEXPR_INLINE_FUNC this_t operator-(component_t val)
-    {
-        this_t output;
+// Generic ComponentType vectors still have to be partial specialized based on whether they're fundamental and/or integral
 
-        for (uint32_t i = 0u; i < CRTP::Dimension; ++i)
-            output.setComponent(i, CRTP::getComponent(i) - val);
-
-        return output;
-    }
-    NBL_CONSTEXPR_INLINE_FUNC this_t operator-(this_t other)
-    {
-        this_t output;
-
-        for (uint32_t i = 0u; i < CRTP::Dimension; ++i)
-            output.setComponent(i, CRTP::getComponent(i) - other.getComponent(i));
+#define NBL_EMULATED_VECTOR_UNARY_OPERATOR(OP)\
+NBL_CONSTEXPR_FUNC this_t operator OP() NBL_CONST_MEMBER_FUNC \
+{\
+    this_t output;\
+    [[unroll]]\
+    for (uint32_t i = 0u; i < CRTP::Dimension; ++i)\
+        output.setComponent(i, this_t::getComponent(i).operator OP());\
+    return output;\
+}
 
-        return output;
-    }
-    NBL_CONSTEXPR_INLINE_FUNC this_t operator-(vector<component_t, CRTP::Dimension> other)
-    {
-        this_t output;
+#define NBL_EMULATED_VECTOR_ARITHMETIC_OPERATOR(OP)\
+NBL_CONSTEXPR_FUNC this_t operator OP (component_t val) NBL_CONST_MEMBER_FUNC \
+{\
+    this_t output;\
+    [[unroll]]\
+    for (uint32_t i = 0u; i < CRTP::Dimension; ++i)\
+        output.setComponent(i, this_t::getComponent(i) OP val);\
+    return output;\
+}\
+NBL_CONSTEXPR_FUNC this_t operator OP (this_t other) NBL_CONST_MEMBER_FUNC \
+{\
+    this_t output;\
+    [[unroll]]\
+    for (uint32_t i = 0u; i < CRTP::Dimension; ++i)\
+        output.setComponent(i, this_t::getComponent(i) OP other.getComponent(i));\
+    return output;\
+}
 
-        for (uint32_t i = 0u; i < CRTP::Dimension; ++i)
-            output.setComponent(i, CRTP::getComponent(i) - other[i]);
+#define NBL_EMULATED_FUNDAMENTAL_TYPE_VECTOR_ARITHMETIC_OPERATOR(OP) NBL_EMULATED_VECTOR_ARITHMETIC_OPERATOR(OP)\
+NBL_CONSTEXPR_FUNC this_t operator OP(vector<component_t, CRTP::Dimension> other) NBL_CONST_MEMBER_FUNC \
+{\
+    this_t output;\
+    [[unroll]]\
+    for (uint32_t i = 0u; i < CRTP::Dimension; ++i)\
+        output.setComponent(i, this_t::getComponent(i) OP other[i]);\
+    return output;\
+}
 
-        return output;
-    }
+#define NBL_EMULATED_VECTOR_COMPARISON_OPERATOR(OP) NBL_CONSTEXPR_FUNC vector<bool, CRTP::Dimension> operator OP (this_t other) NBL_CONST_MEMBER_FUNC \
+{\
+    vector<bool, CRTP::Dimension> output;\
+    [[unroll]]\
+    for (uint32_t i = 0u; i < CRTP::Dimension; ++i)\
+        output[i] = CRTP::getComponent(i) OP other.getComponent(i);\
+    return output;\
+}
 
-    NBL_CONSTEXPR_INLINE_FUNC this_t operator*(component_t val)
-    {
-        this_t output;
+#define NBL_EMULATED_FUNDAMENTAL_TYPE_VECTOR_COMPARISON_OPERATOR(OP) NBL_EMULATED_VECTOR_COMPARISON_OPERATOR(OP)\
+NBL_CONSTEXPR_FUNC vector<bool, CRTP::Dimension> operator OP (vector<component_t, CRTP::Dimension> other) NBL_CONST_MEMBER_FUNC \
+{\
+    vector<bool, CRTP::Dimension> output;\
+    [[unroll]]\
+    for (uint32_t i = 0u; i < CRTP::Dimension; ++i)\
+        output[i] = CRTP::getComponent(i) OP other[i];\
+    return output;\
+}
 
-        for (uint32_t i = 0u; i < CRTP::Dimension; ++i)
-            output.setComponent(i, CRTP::getComponent(i) * val);
+#define NBL_EMULATED_VECTOR_CREATION_AND_COMPONENT_SUM \
+using this_t = emulated_vector<ComponentType, CRTP>;\
+using component_t = ComponentType;\
+NBL_CONSTEXPR_STATIC this_t create(this_t other)\
+{\
+    CRTP output;\
+    [[unroll]]\
+    for (uint32_t i = 0u; i < CRTP::Dimension; ++i)\
+        output.setComponent(i, other.getComponent(i));\
+}\
+NBL_CONSTEXPR_FUNC component_t calcComponentSum() NBL_CONST_MEMBER_FUNC \
+{\
+    component_t sum = CRTP::getComponent(0);\
+    [[unroll]]\
+    for (uint32_t i = 1u; i < CRTP::Dimension; ++i)\
+        sum = sum + CRTP::getComponent(i);\
+    return sum;\
+}
 
-        return output;
-    }
-    NBL_CONSTEXPR_INLINE_FUNC this_t operator*(this_t other)
-    {
-        this_t output;
+#define NBL_EMULATED_FUNDAMENTAL_TYPE_VECTOR_CREATION_AND_COMPONENT_SUM NBL_EMULATED_VECTOR_CREATION_AND_COMPONENT_SUM \
+NBL_CONSTEXPR_STATIC this_t create(vector<component_t, CRTP::Dimension> other)\
+{\
+    this_t output;\
+    [[unroll]]\
+    for (uint32_t i = 0u; i < CRTP::Dimension; ++i)\
+        output.setComponent(i, other[i]);\
+    return output;\
+}
 
-        for (uint32_t i = 0u; i < CRTP::Dimension; ++i)
-            output.setComponent(i, CRTP::getComponent(i) * other.getComponent(i));
+// Fundamental, integral
+template <typename ComponentType, typename CRTP> NBL_PARTIAL_REQ_TOP(is_fundamental_v<ComponentType> && concepts::IntegralLikeScalar<ComponentType>)
+struct emulated_vector<ComponentType, CRTP NBL_PARTIAL_REQ_BOT(is_fundamental_v<ComponentType>&& concepts::IntegralLikeScalar<ComponentType>) > : CRTP
+{
+    // Creation for fundamental type
+    NBL_EMULATED_FUNDAMENTAL_TYPE_VECTOR_CREATION_AND_COMPONENT_SUM
+    // Operators, including integral
+    NBL_EMULATED_VECTOR_UNARY_OPERATOR(~)
+    NBL_EMULATED_FUNDAMENTAL_TYPE_VECTOR_ARITHMETIC_OPERATOR(&)
+    NBL_EMULATED_FUNDAMENTAL_TYPE_VECTOR_ARITHMETIC_OPERATOR(|)
+    NBL_EMULATED_FUNDAMENTAL_TYPE_VECTOR_ARITHMETIC_OPERATOR(^)
+    NBL_EMULATED_VECTOR_UNARY_OPERATOR(-)
+    NBL_EMULATED_FUNDAMENTAL_TYPE_VECTOR_ARITHMETIC_OPERATOR(+)
+    NBL_EMULATED_FUNDAMENTAL_TYPE_VECTOR_ARITHMETIC_OPERATOR(-)
+    NBL_EMULATED_FUNDAMENTAL_TYPE_VECTOR_ARITHMETIC_OPERATOR(*)
+    NBL_EMULATED_FUNDAMENTAL_TYPE_VECTOR_ARITHMETIC_OPERATOR(/)
+    // Comparison operators
+    NBL_EMULATED_FUNDAMENTAL_TYPE_VECTOR_COMPARISON_OPERATOR(==)
+    NBL_EMULATED_FUNDAMENTAL_TYPE_VECTOR_COMPARISON_OPERATOR(!=)
+    NBL_EMULATED_FUNDAMENTAL_TYPE_VECTOR_COMPARISON_OPERATOR(<)
+    NBL_EMULATED_FUNDAMENTAL_TYPE_VECTOR_COMPARISON_OPERATOR(<=)
+    NBL_EMULATED_FUNDAMENTAL_TYPE_VECTOR_COMPARISON_OPERATOR(>)
+    NBL_EMULATED_FUNDAMENTAL_TYPE_VECTOR_COMPARISON_OPERATOR(>=)
+};
 
-        return output;
-    }
-    NBL_CONSTEXPR_INLINE_FUNC this_t operator*(vector<component_t, CRTP::Dimension> other)
-    {
-        this_t output;
+// Fundamental, not integral
+template <typename ComponentType, typename CRTP> NBL_PARTIAL_REQ_TOP(is_fundamental_v<ComponentType> && !concepts::IntegralLikeScalar<ComponentType>)
+struct emulated_vector<ComponentType, CRTP NBL_PARTIAL_REQ_BOT(is_fundamental_v<ComponentType> && !concepts::IntegralLikeScalar<ComponentType>) > : CRTP
+{
+    // Creation for fundamental type
+    NBL_EMULATED_FUNDAMENTAL_TYPE_VECTOR_CREATION_AND_COMPONENT_SUM
+    // Operators
+    NBL_EMULATED_VECTOR_UNARY_OPERATOR(-)
+    NBL_EMULATED_FUNDAMENTAL_TYPE_VECTOR_ARITHMETIC_OPERATOR(+)
+    NBL_EMULATED_FUNDAMENTAL_TYPE_VECTOR_ARITHMETIC_OPERATOR(-)
+    NBL_EMULATED_FUNDAMENTAL_TYPE_VECTOR_ARITHMETIC_OPERATOR(*)
+    NBL_EMULATED_FUNDAMENTAL_TYPE_VECTOR_ARITHMETIC_OPERATOR(/)
+    // Comparison operators
+    NBL_EMULATED_FUNDAMENTAL_TYPE_VECTOR_COMPARISON_OPERATOR(==)
+    NBL_EMULATED_FUNDAMENTAL_TYPE_VECTOR_COMPARISON_OPERATOR(!=)
+    NBL_EMULATED_FUNDAMENTAL_TYPE_VECTOR_COMPARISON_OPERATOR(<)
+    NBL_EMULATED_FUNDAMENTAL_TYPE_VECTOR_COMPARISON_OPERATOR(<=)
+    NBL_EMULATED_FUNDAMENTAL_TYPE_VECTOR_COMPARISON_OPERATOR(>)
+    NBL_EMULATED_FUNDAMENTAL_TYPE_VECTOR_COMPARISON_OPERATOR(>=)
+};
 
-        for (uint32_t i = 0u; i < CRTP::Dimension; ++i)
-            output.setComponent(i, CRTP::getComponent(i) * other[i]);
+// Not fundamental, integral
+template <typename ComponentType, typename CRTP> NBL_PARTIAL_REQ_TOP(!is_fundamental_v<ComponentType> && concepts::IntegralLikeScalar<ComponentType>)
+struct emulated_vector<ComponentType, CRTP NBL_PARTIAL_REQ_BOT(!is_fundamental_v<ComponentType> && concepts::IntegralLikeScalar<ComponentType>) > : CRTP
+{
+    // Creation
+    NBL_EMULATED_VECTOR_CREATION_AND_COMPONENT_SUM
+    // Operators, including integral
+    NBL_EMULATED_VECTOR_UNARY_OPERATOR(~)
+    NBL_EMULATED_VECTOR_ARITHMETIC_OPERATOR(&)
+    NBL_EMULATED_VECTOR_ARITHMETIC_OPERATOR(|)
+    NBL_EMULATED_VECTOR_ARITHMETIC_OPERATOR(^)
+    NBL_EMULATED_VECTOR_UNARY_OPERATOR(-)
+    NBL_EMULATED_VECTOR_ARITHMETIC_OPERATOR(+)
+    NBL_EMULATED_VECTOR_ARITHMETIC_OPERATOR(-)
+    NBL_EMULATED_VECTOR_ARITHMETIC_OPERATOR(*)
+    NBL_EMULATED_VECTOR_ARITHMETIC_OPERATOR(/)
+    // Comparison operators
+    NBL_EMULATED_VECTOR_COMPARISON_OPERATOR(==)
+    NBL_EMULATED_VECTOR_COMPARISON_OPERATOR(!=)
+    NBL_EMULATED_VECTOR_COMPARISON_OPERATOR(<)
+    NBL_EMULATED_VECTOR_COMPARISON_OPERATOR(<=)
+    NBL_EMULATED_VECTOR_COMPARISON_OPERATOR(>)
+    NBL_EMULATED_VECTOR_COMPARISON_OPERATOR(>=)
+};
 
-        return output;
-    }
+// Not fundamental, not integral
+template <typename ComponentType, typename CRTP> NBL_PARTIAL_REQ_TOP(!is_fundamental_v<ComponentType> && !concepts::IntegralLikeScalar<ComponentType>)
+struct emulated_vector<ComponentType, CRTP NBL_PARTIAL_REQ_BOT(!is_fundamental_v<ComponentType> && !concepts::IntegralLikeScalar<ComponentType>) > : CRTP
+{
+    // Creation
+    NBL_EMULATED_VECTOR_CREATION_AND_COMPONENT_SUM
+    // Operators
+    NBL_EMULATED_VECTOR_UNARY_OPERATOR(-)
+    NBL_EMULATED_VECTOR_ARITHMETIC_OPERATOR(+)
+    NBL_EMULATED_VECTOR_ARITHMETIC_OPERATOR(-)
+    NBL_EMULATED_VECTOR_ARITHMETIC_OPERATOR(*)
+    NBL_EMULATED_VECTOR_ARITHMETIC_OPERATOR(/)
+    // Comparison operators
+    NBL_EMULATED_VECTOR_COMPARISON_OPERATOR(==)
+    NBL_EMULATED_VECTOR_COMPARISON_OPERATOR(!=)
+    NBL_EMULATED_VECTOR_COMPARISON_OPERATOR(<)
+    NBL_EMULATED_VECTOR_COMPARISON_OPERATOR(<=)
+    NBL_EMULATED_VECTOR_COMPARISON_OPERATOR(>)
+    NBL_EMULATED_VECTOR_COMPARISON_OPERATOR(>=)
+};
 
-    NBL_CONSTEXPR_INLINE_FUNC component_t calcComponentSum()
-    {
-        component_t sum = 0;
-        for (uint32_t i = 0u; i < CRTP::Dimension; ++i)
-            sum = sum + CRTP::getComponent(i);
+#undef NBL_EMULATED_FUNDAMENTAL_TYPE_VECTOR_CREATION_AND_COMPONENT_SUM
+#undef NBL_EMULATED_VECTOR_CREATION_AND_COMPONENT_SUM
+#undef NBL_EMULATED_FUNDAMENTAL_TYPE_VECTOR_COMPARISON_OPERATOR
+#undef NBL_EMULATED_VECTOR_COMPARISON_OPERATOR
+#undef NBL_EMULATED_FUNDAMENTAL_TYPE_VECTOR_ARITHMETIC_OPERATOR
+#undef NBL_EMULATED_VECTOR_ARITHMETIC_OPERATOR
+#undef NBL_EMULATED_VECTOR_UNARY_OPERATOR
 
-        return sum;
-    }
-};
+// ----------------------------------------------------- EMULATED FLOAT SPECIALIZATION --------------------------------------------------------------------
 
 #define DEFINE_OPERATORS_FOR_TYPE(...)\
-NBL_CONSTEXPR_INLINE_FUNC this_t operator+(__VA_ARGS__ val)\
+NBL_CONSTEXPR_FUNC this_t operator+(__VA_ARGS__ val) NBL_CONST_MEMBER_FUNC \
 {\
     this_t output;\
     for (uint32_t i = 0u; i < CRTP::Dimension; ++i)\
@@ -250,7 +321,7 @@ NBL_CONSTEXPR_INLINE_FUNC this_t operator+(__VA_ARGS__ val)\
     return output;\
 }\
 \
-NBL_CONSTEXPR_INLINE_FUNC this_t operator-(__VA_ARGS__ val)\
+NBL_CONSTEXPR_FUNC this_t operator-(__VA_ARGS__ val) NBL_CONST_MEMBER_FUNC \
 {\
     this_t output;\
     for (uint32_t i = 0u; i < CRTP::Dimension; ++i)\
@@ -259,7 +330,7 @@ NBL_CONSTEXPR_INLINE_FUNC this_t operator-(__VA_ARGS__ val)\
     return output;\
 }\
 \
-NBL_CONSTEXPR_INLINE_FUNC this_t operator*(__VA_ARGS__ val)\
+NBL_CONSTEXPR_FUNC this_t operator*(__VA_ARGS__ val) NBL_CONST_MEMBER_FUNC \
 {\
     this_t output;\
     for (uint32_t i = 0u; i < CRTP::Dimension; ++i)\
@@ -269,14 +340,14 @@ NBL_CONSTEXPR_INLINE_FUNC this_t operator*(__VA_ARGS__ val)\
 }\
 \
 
-// TODO: some of code duplication could be avoided
-template <typename ComponentType, typename CRTP>
-struct emulated_vector<ComponentType, CRTP, false> : CRTP
+
+template <bool FastMath, bool FlushDenormToZero, typename CRTP>
+struct emulated_vector<emulated_float64_t<FastMath, FlushDenormToZero>, CRTP> : CRTP
 {
-    using component_t = ComponentType;
-    using this_t = emulated_vector<ComponentType, CRTP, false>;
+    using component_t = emulated_float64_t<FastMath, FlushDenormToZero>;
+    using this_t = emulated_vector<component_t, CRTP>;
 
-    NBL_CONSTEXPR_STATIC_INLINE this_t create(this_t other)
+    NBL_CONSTEXPR_STATIC this_t create(this_t other)
     {
         this_t output;
 
@@ -287,17 +358,17 @@ struct emulated_vector<ComponentType, CRTP, false> : CRTP
     }
 
     template<typename T>
-    NBL_CONSTEXPR_STATIC_INLINE this_t create(vector<T, CRTP::Dimension> other)
+    NBL_CONSTEXPR_STATIC this_t create(vector<T, CRTP::Dimension> other)
     {
         this_t output;
 
         for (uint32_t i = 0u; i < CRTP::Dimension; ++i)
-            output.setComponent(i, ComponentType::create(other[i]));
+            output.setComponent(i, component_t::create(other[i]));
 
         return output;
     }
 
-    NBL_CONSTEXPR_INLINE_FUNC this_t operator+(this_t other)
+    NBL_CONSTEXPR_FUNC this_t operator+(this_t other) NBL_CONST_MEMBER_FUNC
     {
         this_t output;
 
@@ -306,7 +377,7 @@ struct emulated_vector<ComponentType, CRTP, false> : CRTP
 
         return output;
     }
-    NBL_CONSTEXPR_INLINE_FUNC this_t operator-(this_t other)
+    NBL_CONSTEXPR_FUNC this_t operator-(this_t other) NBL_CONST_MEMBER_FUNC
     {
         this_t output;
 
@@ -315,7 +386,7 @@ struct emulated_vector<ComponentType, CRTP, false> : CRTP
 
         return output;
     }
-    NBL_CONSTEXPR_INLINE_FUNC this_t operator*(this_t other)
+    NBL_CONSTEXPR_FUNC this_t operator*(this_t other) NBL_CONST_MEMBER_FUNC
     {
         this_t output;
 
@@ -338,9 +409,9 @@ struct emulated_vector<ComponentType, CRTP, false> : CRTP
     DEFINE_OPERATORS_FOR_TYPE(int32_t)
     DEFINE_OPERATORS_FOR_TYPE(int64_t)
 
-    NBL_CONSTEXPR_INLINE_FUNC ComponentType calcComponentSum()
+    NBL_CONSTEXPR_FUNC component_t calcComponentSum() NBL_CONST_MEMBER_FUNC
     {
-        ComponentType sum = ComponentType::create(0);
+        component_t sum = component_t::create(0);
         for (uint32_t i = 0u; i < CRTP::Dimension; ++i)
             sum = sum + CRTP::getComponent(i);
 
@@ -420,12 +491,21 @@ DEFINE_SCALAR_OF_SPECIALIZATION(3)
 DEFINE_SCALAR_OF_SPECIALIZATION(4)
 #undef DEFINE_SCALAR_OF_SPECIALIZATION
 
+#define DEFINE_EXTENT_SPECIALIZATION(DIMENSION)\
+template<typename ScalarType, uint32_t I>\
+struct extent<emulated_vector_t##DIMENSION<ScalarType>, I> : extent<ScalarType[DIMENSION], I> {};
+
+DEFINE_EXTENT_SPECIALIZATION(2)
+DEFINE_EXTENT_SPECIALIZATION(3)
+DEFINE_EXTENT_SPECIALIZATION(4)
+#undef DEFINE_EXTENT_SPECIALIZATION
+
 namespace impl
 {
 template<typename To, typename From>
 struct static_cast_helper<emulated_vector_t2<To>, vector<From, 2>, void>
 {
-    static inline emulated_vector_t2<To> cast(vector<From, 2> vec)
+    NBL_CONSTEXPR_STATIC emulated_vector_t2<To> cast(NBL_CONST_REF_ARG(vector<From, 2>) vec)
     {
         emulated_vector_t2<To> output;
         output.x = _static_cast<To, From>(vec.x);
@@ -438,7 +518,7 @@ struct static_cast_helper<emulated_vector_t2<To>, vector<From, 2>, void>
 template<typename To, typename From>
 struct static_cast_helper<emulated_vector_t3<To>, vector<From, 3>, void>
 {
-    static inline emulated_vector_t3<To> cast(vector<From, 3> vec)
+    NBL_CONSTEXPR_STATIC emulated_vector_t3<To> cast(NBL_CONST_REF_ARG(vector<From, 3>) vec)
     {
         emulated_vector_t3<To> output;
         output.x = _static_cast<To, From>(vec.x);
@@ -452,7 +532,7 @@ struct static_cast_helper<emulated_vector_t3<To>, vector<From, 3>, void>
 template<typename To, typename From>
 struct static_cast_helper<emulated_vector_t4<To>, vector<From, 4>, void>
 {
-    static inline emulated_vector_t4<To> cast(vector<From, 4> vec)
+    NBL_CONSTEXPR_STATIC emulated_vector_t4<To> cast(NBL_CONST_REF_ARG(vector<From, 4>) vec)
     {
         emulated_vector_t4<To> output;
         output.x = _static_cast<To, From>(vec.x);
@@ -470,12 +550,13 @@ struct static_cast_helper<vector<ToComponentType, N>, emulated_vector_t<FromComp
     using OutputVecType = vector<ToComponentType, N>;
     using InputVecType = emulated_vector_t<FromComponentType, N>;
 
-    static inline OutputVecType cast(InputVecType vec)
+    NBL_CONSTEXPR_STATIC OutputVecType cast(NBL_CONST_REF_ARG(InputVecType) vec)
     {
         array_get<InputVecType, FromComponentType> getter;
         array_set<OutputVecType, ToComponentType> setter;
         
         OutputVecType output;
+        [[unroll]]
         for (int i = 0; i < N; ++i)
             setter(output, i, _static_cast<ToComponentType>(getter(vec, i)));
 
@@ -483,7 +564,30 @@ struct static_cast_helper<vector<ToComponentType, N>, emulated_vector_t<FromComp
     }
 };
 
-}
+#define NBL_EMULATED_VEC_TO_EMULATED_VEC_STATIC_CAST(N) template<typename ToComponentType, typename FromComponentType>\
+struct static_cast_helper<emulated_vector_t##N <ToComponentType>, emulated_vector_t##N <FromComponentType>, void>\
+{\
+    using OutputVecType = emulated_vector_t##N <ToComponentType>;\
+    using InputVecType = emulated_vector_t##N <FromComponentType>;\
+    NBL_CONSTEXPR_STATIC OutputVecType cast(NBL_CONST_REF_ARG(InputVecType) vec)\
+    {\
+        array_get<InputVecType, FromComponentType> getter;\
+        array_set<OutputVecType, ToComponentType> setter;\
+        OutputVecType output;\
+        [[unroll]]\
+        for (int i = 0; i < N; ++i)\
+            setter(output, i, _static_cast<ToComponentType>(getter(vec, i)));\
+        return output;\
+    }\
+};
+
+NBL_EMULATED_VEC_TO_EMULATED_VEC_STATIC_CAST(2)
+NBL_EMULATED_VEC_TO_EMULATED_VEC_STATIC_CAST(3)
+NBL_EMULATED_VEC_TO_EMULATED_VEC_STATIC_CAST(4)
+
+#undef NBL_EMULATED_VEC_TO_EMULATED_VEC_STATIC_CAST
+
+} //namespace impl
 
 }
 }
diff --git a/include/nbl/builtin/hlsl/functional.hlsl b/include/nbl/builtin/hlsl/functional.hlsl
index 25d822a940..118fe07c63 100644
--- a/include/nbl/builtin/hlsl/functional.hlsl
+++ b/include/nbl/builtin/hlsl/functional.hlsl
@@ -7,6 +7,8 @@
 
 #include "nbl/builtin/hlsl/glsl_compat/core.hlsl"
 #include "nbl/builtin/hlsl/limits.hlsl"
+#include "nbl/builtin/hlsl/concepts/vector.hlsl"
+#include "nbl/builtin/hlsl/array_accessors.hlsl"
 
 
 namespace nbl
@@ -79,7 +81,7 @@ struct reference_wrapper : enable_if_t<
 // TODO: partial specializations for T being a special SPIR-V type for image ops, etc.
 
 
-#define ALIAS_STD(NAME,OP) template<typename T> struct NAME { \
+#define ALIAS_STD(NAME,OP) template<typename T NBL_STRUCT_CONSTRAINABLE > struct NAME { \
     using type_t = T; \
     \
     T operator()(NBL_CONST_REF_ARG(T) lhs, NBL_CONST_REF_ARG(T) rhs) \
@@ -90,7 +92,6 @@ struct reference_wrapper : enable_if_t<
 
 #else // CPP
 
-
 #define ALIAS_STD(NAME,OP) template<typename T> struct NAME : std::NAME<T> { \
     using type_t = T;
 
@@ -135,16 +136,56 @@ ALIAS_STD(divides,/)
 };
 
 
-ALIAS_STD(greater,>) };
-ALIAS_STD(less,<) };
-ALIAS_STD(greater_equal,>=) };
-ALIAS_STD(less_equal,<=) };
+ALIAS_STD(equal_to, ==) };
+ALIAS_STD(not_equal_to, !=) };
+ALIAS_STD(greater, >) };
+ALIAS_STD(less, <) };
+ALIAS_STD(greater_equal, >=) };
+ALIAS_STD(less_equal, <=) };
 
 #undef ALIAS_STD
 
-// ------------------------ Compound assignment operators ----------------------
+// The above comparison operators return bool on STD, but in HLSL they're supposed to yield bool vectors, so here's a specialization so that they return `vector<bool, N>` for vectorial types
+
+// GLM doesn't have operators on vectors
+#ifndef __HLSL_VERSION
+
+#define NBL_COMPARISON_VECTORIAL_SPECIALIZATION(NAME, OP, GLM_OP) template<typename T> requires (concepts::Vectorial<T>)\
+struct NAME <T>\
+{\
+    using type_t = T;\
+    vector<bool, vector_traits<T>::Dimension> operator()(const T& lhs, const T& rhs)\
+    {\
+        return glm::GLM_OP (lhs, rhs);\
+    }\
+};
+
+#else 
+
+#define NBL_COMPARISON_VECTORIAL_SPECIALIZATION(NAME, OP, GLM_OP) template<typename T> NBL_PARTIAL_REQ_TOP(concepts::Vectorial<T>)\
+struct NAME <T NBL_PARTIAL_REQ_BOT(concepts::Vectorial<T>) >\
+{\
+    using type_t = T;\
+    vector<bool, vector_traits<T>::Dimension> operator()(NBL_CONST_REF_ARG(T) lhs, NBL_CONST_REF_ARG(T) rhs)\
+    {\
+        return lhs OP rhs;\
+    }\
+};
+
+#endif
+
+NBL_COMPARISON_VECTORIAL_SPECIALIZATION(equal_to, ==, equal)
+NBL_COMPARISON_VECTORIAL_SPECIALIZATION(not_equal_to, !=, notEqual)
+NBL_COMPARISON_VECTORIAL_SPECIALIZATION(greater, >, greaterThan)
+NBL_COMPARISON_VECTORIAL_SPECIALIZATION(less, <, lessThan)
+NBL_COMPARISON_VECTORIAL_SPECIALIZATION(greater_equal, >=, greaterThanEqual)
+NBL_COMPARISON_VECTORIAL_SPECIALIZATION(less_equal, <=, lessThanEqual)
+
+#undef NBL_COMPARISON_VECTORIAL_SPECIALIZATION
 
-#define COMPOUND_ASSIGN(NAME) template<typename T> struct NAME##_assign { \
+// ------------------------------------------------------------- COMPOUND ASSIGNMENT OPERATORS --------------------------------------------------------------------
+
+#define COMPOUND_ASSIGN(NAME) template<typename T NBL_STRUCT_CONSTRAINABLE> struct NAME##_assign { \
     using type_t = T; \
     using base_t = NAME <type_t>; \
     base_t baseOp; \
@@ -163,9 +204,9 @@ COMPOUND_ASSIGN(divides)
 
 #undef COMPOUND_ASSIGN
 
-// ----------------- End of compound assignment ops ----------------
+// ---------------------------------------------------------------- MIN, MAX, TERNARY -------------------------------------------------------------------------
 
-// Min, Max and Ternary Operator don't use ALIAS_STD because they don't exist in STD
+// Min, Max, and Ternary and Shift operators don't use ALIAS_STD because they don't exist in STD
 // TODO: implement as mix(rhs<lhs,lhs,rhs) (SPIR-V intrinsic from the extended set & glm on C++)
 template<typename T>
 struct minimum
@@ -195,18 +236,273 @@ struct maximum
     NBL_CONSTEXPR_STATIC_INLINE T identity = numeric_limits<scalar_t>::lowest; // TODO: `all_components<T>`
 };
 
-template<typename T>
+#ifndef __HLSL_VERSION
+template<typename F1, typename F2 > requires(is_same_v<std::invoke_result_t<F1>, std::invoke_result_t<F2> > )
+struct ternary_operator
+{
+   using type_t = std::invoke_result_t<F1>;
+
+   constexpr inline type_t operator()(const bool condition, F1& lhs, F2& rhs)
+   {
+      if (condition)
+         return std::invoke(lhs);
+      else
+         return std::invoke(rhs);
+   }
+};
+#else
+template<typename F1, typename F2 NBL_PRIMARY_REQUIRES(is_same_v<decltype(experimental::declval<F1>()()),decltype(experimental::declval<F2>()())> )
 struct ternary_operator
+{
+   using type_t = decltype(experimental::declval<F1>().operator());
+
+   NBL_CONSTEXPR_FUNC type_t operator()(const bool condition, NBL_REF_ARG(F1) lhs, NBL_REF_ARG(F2) rhs)
+   {
+      if (condition)
+         return lhs();
+      else
+         return rhs();
+   }
+};
+#endif
+
+// ----------------------------------------------------------------- SHIFT OPERATORS --------------------------------------------------------------------
+
+template<typename T NBL_STRUCT_CONSTRAINABLE >
+struct left_shift_operator
+{
+    using type_t = T;
+
+    NBL_CONSTEXPR_FUNC T operator()(NBL_CONST_REF_ARG(T) operand, NBL_CONST_REF_ARG(T) bits)
+    {
+        return operand << bits;
+    }
+};
+
+template<typename T> NBL_PARTIAL_REQ_TOP(concepts::IntVector<T>)
+struct left_shift_operator<T NBL_PARTIAL_REQ_BOT(concepts::IntVector<T>) >
 {
     using type_t = T;
+    using scalar_t = scalar_type_t<T>;
 
-    T operator()(bool condition, NBL_CONST_REF_ARG(T) lhs, NBL_CONST_REF_ARG(T) rhs)
+    NBL_CONSTEXPR_FUNC T operator()(NBL_CONST_REF_ARG(T) operand, NBL_CONST_REF_ARG(T) bits)
     {
-        return condition ? lhs : rhs;
+        return operand << bits;
+    }
+
+    NBL_CONSTEXPR_FUNC T operator()(NBL_CONST_REF_ARG(T) operand, NBL_CONST_REF_ARG(scalar_t) bits)
+    {
+        return operand << bits;
     }
 };
 
-}
-}
+template<typename T> NBL_PARTIAL_REQ_TOP(!concepts::IntVector<T> && concepts::IntegralLikeVectorial<T>)
+struct left_shift_operator<T NBL_PARTIAL_REQ_BOT(!concepts::IntVector<T> && concepts::IntegralLikeVectorial<T>) >
+{
+    using type_t = T;
+    using scalar_t = typename vector_traits<T>::scalar_type;
+
+    NBL_CONSTEXPR_FUNC T operator()(NBL_CONST_REF_ARG(T) operand, NBL_CONST_REF_ARG(T) bits)
+    {
+        array_get<T, scalar_t> getter;
+        array_set<T, scalar_t> setter;
+        NBL_CONSTEXPR_FUNC_SCOPE_VAR uint16_t extent = uint16_t(extent_v<T>);
+        left_shift_operator<scalar_t> leftShift;
+        T shifted;
+        [[unroll]]
+        for (uint16_t i = 0; i < extent; i++)
+        {
+            setter(shifted, i, leftShift(getter(operand, i), getter(bits, i)));
+        }
+        return shifted;
+    }
 
-#endif
\ No newline at end of file
+    NBL_CONSTEXPR_FUNC T operator()(NBL_CONST_REF_ARG(T) operand, NBL_CONST_REF_ARG(scalar_t) bits)
+    {
+        array_get<T, scalar_t> getter;
+        array_set<T, scalar_t> setter;
+        NBL_CONSTEXPR_FUNC_SCOPE_VAR uint16_t extent = uint16_t(extent_v<T>);
+        left_shift_operator<scalar_t> leftShift;
+        T shifted;
+        [[unroll]]
+        for (uint16_t i = 0; i < extent; i++)
+        {
+            setter(shifted, i, leftShift(getter(operand, i), bits));
+        }
+        return shifted;
+    }
+
+    NBL_CONSTEXPR_FUNC T operator()(NBL_CONST_REF_ARG(T) operand, NBL_CONST_REF_ARG(vector<uint16_t, vector_traits<T>::Dimension>) bits)
+    {
+        array_get<T, scalar_t> getter;
+        array_set<T, scalar_t> setter;
+        NBL_CONSTEXPR_FUNC_SCOPE_VAR uint16_t extent = uint16_t(extent_v<T>);
+        left_shift_operator<scalar_t> leftShift;
+        T shifted;
+        [[unroll]]
+        for (uint16_t i = 0; i < extent; i++)
+        {
+            setter(shifted, i, leftShift(getter(operand, i), bits[i]));
+        }
+        return shifted;
+    }
+
+    NBL_CONSTEXPR_FUNC T operator()(NBL_CONST_REF_ARG(T) operand, NBL_CONST_REF_ARG(uint16_t) bits)
+    {
+        array_get<T, scalar_t> getter;
+        array_set<T, scalar_t> setter;
+        NBL_CONSTEXPR_FUNC_SCOPE_VAR uint16_t extent = uint16_t(extent_v<T>);
+        left_shift_operator<scalar_t> leftShift;
+        T shifted;
+        [[unroll]]
+        for (uint16_t i = 0; i < extent; i++)
+        {
+            setter(shifted, i, leftShift(getter(operand, i), bits));
+        }
+        return shifted;
+    }
+};
+
+template<typename T NBL_STRUCT_CONSTRAINABLE >
+struct arithmetic_right_shift_operator
+{
+    using type_t = T;
+
+    NBL_CONSTEXPR_FUNC T operator()(NBL_CONST_REF_ARG(T) operand, NBL_CONST_REF_ARG(T) bits)
+    {
+        return operand >> bits;
+    }
+};
+
+template<typename T> NBL_PARTIAL_REQ_TOP(concepts::IntVector<T>)
+struct arithmetic_right_shift_operator<T NBL_PARTIAL_REQ_BOT(concepts::IntVector<T>) >
+{
+    using type_t = T;
+    using scalar_t = scalar_type_t<T>;
+
+    NBL_CONSTEXPR_FUNC T operator()(NBL_CONST_REF_ARG(T) operand, NBL_CONST_REF_ARG(T) bits)
+    {
+        return operand >> bits;
+    }
+
+    NBL_CONSTEXPR_FUNC T operator()(NBL_CONST_REF_ARG(T) operand, NBL_CONST_REF_ARG(scalar_t) bits)
+    {
+        return operand >> bits;
+    }
+};
+
+template<typename T> NBL_PARTIAL_REQ_TOP(!concepts::IntVector<T>&& concepts::IntegralLikeVectorial<T>)
+struct arithmetic_right_shift_operator<T NBL_PARTIAL_REQ_BOT(!concepts::IntVector<T>&& concepts::IntegralLikeVectorial<T>) >
+{
+    using type_t = T;
+    using scalar_t = typename vector_traits<T>::scalar_type;
+
+    NBL_CONSTEXPR_FUNC T operator()(NBL_CONST_REF_ARG(T) operand, NBL_CONST_REF_ARG(T) bits)
+    {
+        array_get<T, scalar_t> getter;
+        array_set<T, scalar_t> setter;
+        NBL_CONSTEXPR_FUNC_SCOPE_VAR uint16_t extent = uint16_t(extent_v<T>);
+        arithmetic_right_shift_operator<scalar_t> rightShift;
+        T shifted;
+        [[unroll]]
+        for (uint16_t i = 0; i < extent; i++)
+        {
+            setter(shifted, i, rightShift(getter(operand, i), getter(bits, i)));
+        }
+        return shifted;
+    }
+
+    NBL_CONSTEXPR_FUNC T operator()(NBL_CONST_REF_ARG(T) operand, NBL_CONST_REF_ARG(scalar_t) bits)
+    {
+        array_get<T, scalar_t> getter;
+        array_set<T, scalar_t> setter;
+        NBL_CONSTEXPR_FUNC_SCOPE_VAR uint16_t extent = uint16_t(extent_v<T>);
+        arithmetic_right_shift_operator<scalar_t> rightShift;
+        T shifted;
+        [[unroll]]
+        for (uint16_t i = 0; i < extent; i++)
+        {
+            setter(shifted, i, rightShift(getter(operand, i), bits));
+        }
+        return shifted;
+    }
+
+    NBL_CONSTEXPR_FUNC T operator()(NBL_CONST_REF_ARG(T) operand, NBL_CONST_REF_ARG(vector<uint16_t, vector_traits<T>::Dimension>) bits)
+    {
+        array_get<T, scalar_t> getter;
+        array_set<T, scalar_t> setter;
+        NBL_CONSTEXPR_FUNC_SCOPE_VAR uint16_t extent = uint16_t(extent_v<T>);
+        arithmetic_right_shift_operator<scalar_t> rightShift;
+        T shifted;
+        [[unroll]]
+        for (uint16_t i = 0; i < extent; i++)
+        {
+            setter(shifted, i, rightShift(getter(operand, i), bits[i]));
+        }
+        return shifted;
+    }
+
+    NBL_CONSTEXPR_FUNC T operator()(NBL_CONST_REF_ARG(T) operand, NBL_CONST_REF_ARG(uint16_t) bits)
+    {
+        array_get<T, scalar_t> getter;
+        array_set<T, scalar_t> setter;
+        NBL_CONSTEXPR_FUNC_SCOPE_VAR uint16_t extent = uint16_t(extent_v<T>);
+        arithmetic_right_shift_operator<scalar_t> rightShift;
+        T shifted;
+        [[unroll]]
+        for (uint16_t i = 0; i < extent; i++)
+        {
+            setter(shifted, i, rightShift(getter(operand, i), bits));
+        }
+        return shifted;
+    }
+};
+
+// Left unimplemented for vectorial types by default
+template<typename T NBL_STRUCT_CONSTRAINABLE >
+struct logical_right_shift_operator
+{
+    using type_t = T;
+    using unsigned_type_t = make_unsigned_t<T>;
+
+    NBL_CONSTEXPR_FUNC T operator()(NBL_CONST_REF_ARG(T) operand, NBL_CONST_REF_ARG(T) bits)
+    {
+        arithmetic_right_shift_operator<unsigned_type_t> arithmeticRightShift;
+        return _static_cast<T>(arithmeticRightShift(_static_cast<unsigned_type_t>(operand), _static_cast<unsigned_type_t>(bits)));
+    }
+};
+
+// ----------------------------------------------------------------- UNARY OPERATORS --------------------------------------------------------------------
+#ifndef __HLSL_VERSION
+#define NBL_UNARY_OP_SPECIALIZATION(NAME, OP) template<typename T> \
+struct NAME : std::NAME<T> { \
+    using type_t = T; \
+};
+#else
+#define NBL_UNARY_OP_SPECIALIZATION(NAME, OP) template<typename T NBL_STRUCT_CONSTRAINABLE> \
+struct NAME \
+{ \
+    using type_t = T; \
+    NBL_CONSTEXPR_FUNC T operator()(NBL_CONST_REF_ARG(T) operand) \
+    { \
+        return operand.operator OP(); \
+    } \
+}; \
+template<typename T> NBL_PARTIAL_REQ_TOP(concepts::Scalar<T> || concepts::Vector<T> || concepts::Matrix<T> ) \
+struct NAME<T NBL_PARTIAL_REQ_BOT(concepts::Scalar<T> || concepts::Vector<T> || concepts::Matrix<T> ) > \
+{ \
+    using type_t = T; \
+    NBL_CONSTEXPR_FUNC T operator()(const T operand) \
+    { \
+      return (OP operand); \
+    } \
+}; 
+#endif
+
+NBL_UNARY_OP_SPECIALIZATION(bit_not, ~)
+NBL_UNARY_OP_SPECIALIZATION(negate, -)
+
+} //namespace nbl
+} //namespace hlsl
+
+#endif
diff --git a/include/nbl/builtin/hlsl/ieee754.hlsl b/include/nbl/builtin/hlsl/ieee754.hlsl
index 307a11101f..29c48a79d1 100644
--- a/include/nbl/builtin/hlsl/ieee754.hlsl
+++ b/include/nbl/builtin/hlsl/ieee754.hlsl
@@ -90,7 +90,7 @@ inline int extractExponent(T x)
 }
 
 template <typename T>
-NBL_CONSTEXPR_INLINE_FUNC T replaceBiasedExponent(T x, typename unsigned_integer_of_size<sizeof(T)>::type biasedExp)
+NBL_CONSTEXPR_FUNC T replaceBiasedExponent(T x, typename unsigned_integer_of_size<sizeof(T)>::type biasedExp)
 {
 	using AsFloat = typename float_of_size<sizeof(T)>::type;
 	return impl::castBackToFloatType<T>(glsl::bitfieldInsert(ieee754::impl::bitCastToUintType(x), biasedExp, traits<AsFloat>::mantissaBitCnt, traits<AsFloat>::exponentBitCnt));
@@ -98,20 +98,20 @@ NBL_CONSTEXPR_INLINE_FUNC T replaceBiasedExponent(T x, typename unsigned_integer
 
 // performs no overflow tests, returns x*exp2(n)
 template <typename T>
-NBL_CONSTEXPR_INLINE_FUNC T fastMulExp2(T x, int n)
+NBL_CONSTEXPR_FUNC T fastMulExp2(T x, int n)
 {
 	return replaceBiasedExponent(x, extractBiasedExponent(x) + uint32_t(n));
 }
 
 template <typename T>
-NBL_CONSTEXPR_INLINE_FUNC typename unsigned_integer_of_size<sizeof(T)>::type extractMantissa(T x)
+NBL_CONSTEXPR_FUNC typename unsigned_integer_of_size<sizeof(T)>::type extractMantissa(T x)
 {
 	using AsUint = typename unsigned_integer_of_size<sizeof(T)>::type;
 	return ieee754::impl::bitCastToUintType(x) & traits<typename float_of_size<sizeof(T)>::type>::mantissaMask;
 }
 
 template <typename T>
-NBL_CONSTEXPR_INLINE_FUNC typename unsigned_integer_of_size<sizeof(T)>::type extractNormalizeMantissa(T x)
+NBL_CONSTEXPR_FUNC typename unsigned_integer_of_size<sizeof(T)>::type extractNormalizeMantissa(T x)
 {
 	using AsUint = typename unsigned_integer_of_size<sizeof(T)>::type;
 	using AsFloat = typename float_of_size<sizeof(T)>::type;
@@ -119,21 +119,21 @@ NBL_CONSTEXPR_INLINE_FUNC typename unsigned_integer_of_size<sizeof(T)>::type ext
 }
 
 template <typename T>
-NBL_CONSTEXPR_INLINE_FUNC typename unsigned_integer_of_size<sizeof(T)>::type extractSign(T x)
+NBL_CONSTEXPR_FUNC typename unsigned_integer_of_size<sizeof(T)>::type extractSign(T x)
 {
 	using AsFloat = typename float_of_size<sizeof(T)>::type;
 	return (ieee754::impl::bitCastToUintType(x) & traits<AsFloat>::signMask) >> ((sizeof(T) * 8) - 1);
 }
 
 template <typename T>
-NBL_CONSTEXPR_INLINE_FUNC typename unsigned_integer_of_size<sizeof(T)>::type extractSignPreserveBitPattern(T x)
+NBL_CONSTEXPR_FUNC typename unsigned_integer_of_size<sizeof(T)>::type extractSignPreserveBitPattern(T x)
 {
 	using AsFloat = typename float_of_size<sizeof(T)>::type;
 	return ieee754::impl::bitCastToUintType(x) & traits<AsFloat>::signMask;
 }
 
 template <typename FloatingPoint NBL_FUNC_REQUIRES(concepts::FloatingPointLikeScalar<FloatingPoint>)
-NBL_CONSTEXPR_INLINE_FUNC FloatingPoint copySign(FloatingPoint to, FloatingPoint from)
+NBL_CONSTEXPR_FUNC FloatingPoint copySign(FloatingPoint to, FloatingPoint from)
 {
 	using AsUint = typename unsigned_integer_of_size<sizeof(FloatingPoint)>::type;
 
@@ -240,13 +240,13 @@ struct flipSignIfRHSNegative_helper<Vectorial NBL_PARTIAL_REQ_BOT(concepts::Floa
 }
 
 template <typename T, typename U>
-NBL_CONSTEXPR_INLINE_FUNC T flipSign(T val, U flip)
+NBL_CONSTEXPR_FUNC T flipSign(T val, U flip)
 {
 	return impl::flipSign_helper<T, U>::__call(val, flip);
 }
 
 template <typename T>
-NBL_CONSTEXPR_INLINE_FUNC T flipSignIfRHSNegative(T val, T flip)
+NBL_CONSTEXPR_FUNC T flipSignIfRHSNegative(T val, T flip)
 {
 	return impl::flipSignIfRHSNegative_helper<T>::__call(val, flip);
 }
diff --git a/include/nbl/builtin/hlsl/ieee754/impl.hlsl b/include/nbl/builtin/hlsl/ieee754/impl.hlsl
index ad8a3f9228..69fba9795f 100644
--- a/include/nbl/builtin/hlsl/ieee754/impl.hlsl
+++ b/include/nbl/builtin/hlsl/ieee754/impl.hlsl
@@ -15,25 +15,25 @@ namespace ieee754
 namespace impl
 {
 template <typename T>
-NBL_CONSTEXPR_INLINE_FUNC unsigned_integer_of_size_t<sizeof(T)> bitCastToUintType(T x)
+NBL_CONSTEXPR_FUNC unsigned_integer_of_size_t<sizeof(T)> bitCastToUintType(T x)
 {
 	using AsUint = unsigned_integer_of_size_t<sizeof(T)>;
 	return bit_cast<AsUint, T>(x);
 }
 // to avoid bit cast from uintN_t to uintN_t
-template <> NBL_CONSTEXPR_INLINE_FUNC unsigned_integer_of_size_t<2> bitCastToUintType(uint16_t x) { return x; }
-template <> NBL_CONSTEXPR_INLINE_FUNC unsigned_integer_of_size_t<4> bitCastToUintType(uint32_t x) { return x; }
-template <> NBL_CONSTEXPR_INLINE_FUNC unsigned_integer_of_size_t<8> bitCastToUintType(uint64_t x) { return x; }
+template <> NBL_CONSTEXPR_FUNC unsigned_integer_of_size_t<2> bitCastToUintType(uint16_t x) { return x; }
+template <> NBL_CONSTEXPR_FUNC unsigned_integer_of_size_t<4> bitCastToUintType(uint32_t x) { return x; }
+template <> NBL_CONSTEXPR_FUNC unsigned_integer_of_size_t<8> bitCastToUintType(uint64_t x) { return x; }
 
 template <typename T>
-NBL_CONSTEXPR_INLINE_FUNC T castBackToFloatType(T x)
+NBL_CONSTEXPR_FUNC T castBackToFloatType(T x)
 {
 	using AsFloat = typename float_of_size<sizeof(T)>::type;
 	return bit_cast<AsFloat, T>(x);
 }
-template<> NBL_CONSTEXPR_INLINE_FUNC uint16_t castBackToFloatType(uint16_t x) { return x; }
-template<> NBL_CONSTEXPR_INLINE_FUNC uint32_t castBackToFloatType(uint32_t x) { return x; }
-template<> NBL_CONSTEXPR_INLINE_FUNC uint64_t castBackToFloatType(uint64_t x) { return x; }
+template<> NBL_CONSTEXPR_FUNC uint16_t castBackToFloatType(uint16_t x) { return x; }
+template<> NBL_CONSTEXPR_FUNC uint32_t castBackToFloatType(uint32_t x) { return x; }
+template<> NBL_CONSTEXPR_FUNC uint64_t castBackToFloatType(uint64_t x) { return x; }
 }
 
 }
diff --git a/include/nbl/builtin/hlsl/math/linalg/transform.hlsl b/include/nbl/builtin/hlsl/math/linalg/transform.hlsl
index 59ff142150..070f1e7af5 100644
--- a/include/nbl/builtin/hlsl/math/linalg/transform.hlsl
+++ b/include/nbl/builtin/hlsl/math/linalg/transform.hlsl
@@ -88,7 +88,7 @@ matrix<T, NOut, MOut> promote_affine(const matrix<T, NIn, MIn> inMatrix)
     NBL_UNROLL for (uint32_t row_i = NIn; row_i < NOut; row_i++)
     {
         retval[row_i] = promote<out_row_t>(0.0);
-        if (row_i >= MIn && row_i < MOut)
+        if (row_i < MOut)
             retval[row_i][row_i] = T(1.0);
     }
     return retval;
diff --git a/include/nbl/builtin/hlsl/math/quadrature/gauss_legendre/impl.hlsl b/include/nbl/builtin/hlsl/math/quadrature/gauss_legendre/impl.hlsl
index 3bcfbb2388..cd402d0cd4 100644
--- a/include/nbl/builtin/hlsl/math/quadrature/gauss_legendre/impl.hlsl
+++ b/include/nbl/builtin/hlsl/math/quadrature/gauss_legendre/impl.hlsl
@@ -14,25 +14,25 @@
 
 namespace float_t_namespace
 {
-NBL_CONSTEXPR float_t xi_2[2] = {
+NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR float_t xi_2[2] = {
     TYPED_NUMBER(-0.5773502691896257),
     TYPED_NUMBER(0.5773502691896257) 
 };
 
-NBL_CONSTEXPR float_t xi_3[3] = {
+NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR float_t xi_3[3] = {
     TYPED_NUMBER(0.0),
     TYPED_NUMBER(-0.7745966692414833),
     TYPED_NUMBER(0.7745966692414833) 
 };
 
-NBL_CONSTEXPR float_t xi_4[4] = {
+NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR float_t xi_4[4] = {
     TYPED_NUMBER(-0.3399810435848562),
     TYPED_NUMBER(0.3399810435848562),
     TYPED_NUMBER(-0.8611363115940525),
     TYPED_NUMBER(0.8611363115940525) 
 };
 
-NBL_CONSTEXPR float_t xi_5[5] = {
+NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR float_t xi_5[5] = {
     TYPED_NUMBER(0.0),
     TYPED_NUMBER(-0.5384693101056830),
     TYPED_NUMBER(0.5384693101056830),
@@ -40,7 +40,7 @@ NBL_CONSTEXPR float_t xi_5[5] = {
     TYPED_NUMBER(0.9061798459386639) 
 };
 
-NBL_CONSTEXPR float_t xi_6[6] = {
+NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR float_t xi_6[6] = {
     TYPED_NUMBER(0.6612093864662645),
     TYPED_NUMBER(-0.6612093864662645),
     TYPED_NUMBER(-0.2386191860831969),
@@ -49,7 +49,7 @@ NBL_CONSTEXPR float_t xi_6[6] = {
     TYPED_NUMBER(0.9324695142031520) 
 };
 
-NBL_CONSTEXPR float_t xi_7[7] = {
+NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR float_t xi_7[7] = {
     TYPED_NUMBER(0.0),
     TYPED_NUMBER(0.4058451513773971),
     TYPED_NUMBER(-0.4058451513773971),
@@ -59,7 +59,7 @@ NBL_CONSTEXPR float_t xi_7[7] = {
     TYPED_NUMBER(0.9491079123427585) 
 };
 
-NBL_CONSTEXPR float_t xi_8[8] = {
+NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR float_t xi_8[8] = {
     TYPED_NUMBER(-0.1834346424956498),
     TYPED_NUMBER(0.1834346424956498),
     TYPED_NUMBER(-0.5255324099163289),
@@ -70,7 +70,7 @@ NBL_CONSTEXPR float_t xi_8[8] = {
     TYPED_NUMBER(0.9602898564975362) 
 };
 
-NBL_CONSTEXPR float_t xi_9[9] = {
+NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR float_t xi_9[9] = {
     TYPED_NUMBER(0.0),
     TYPED_NUMBER(-0.8360311073266357),
     TYPED_NUMBER(0.8360311073266357),
@@ -82,7 +82,7 @@ NBL_CONSTEXPR float_t xi_9[9] = {
     TYPED_NUMBER(0.6133714327005903) 
 };
 
-NBL_CONSTEXPR float_t xi_10[10] = {
+NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR float_t xi_10[10] = {
     TYPED_NUMBER(-0.1488743389816312),
     TYPED_NUMBER(0.1488743389816312),
     TYPED_NUMBER(-0.4333953941292471),
@@ -95,7 +95,7 @@ NBL_CONSTEXPR float_t xi_10[10] = {
     TYPED_NUMBER(0.9739065285171717) 
 };
 
-NBL_CONSTEXPR float_t xi_11[11] = {
+NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR float_t xi_11[11] = {
     TYPED_NUMBER(0.0),
     TYPED_NUMBER(-0.2695431559523449),
     TYPED_NUMBER(0.2695431559523449),
@@ -109,7 +109,7 @@ NBL_CONSTEXPR float_t xi_11[11] = {
     TYPED_NUMBER(0.9782286581460569) 
 };
 
-NBL_CONSTEXPR float_t xi_12[12] = {
+NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR float_t xi_12[12] = {
     TYPED_NUMBER(-0.1252334085114689),
     TYPED_NUMBER(0.1252334085114689),
     TYPED_NUMBER(-0.3678314989981801),
@@ -124,7 +124,7 @@ NBL_CONSTEXPR float_t xi_12[12] = {
     TYPED_NUMBER(0.9815606342467192) 
 };
 
-NBL_CONSTEXPR float_t xi_13[13] = {
+NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR float_t xi_13[13] = {
     TYPED_NUMBER(0.0),
     TYPED_NUMBER(-0.2304583159551347),
     TYPED_NUMBER(0.2304583159551347),
@@ -140,7 +140,7 @@ NBL_CONSTEXPR float_t xi_13[13] = {
     TYPED_NUMBER(0.9841830547185881) 
 };
 
-NBL_CONSTEXPR float_t xi_14[14] = {
+NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR float_t xi_14[14] = {
     TYPED_NUMBER(-0.1080549487073436),
     TYPED_NUMBER(0.1080549487073436),
     TYPED_NUMBER(-0.3191123689278897),
@@ -157,7 +157,7 @@ NBL_CONSTEXPR float_t xi_14[14] = {
     TYPED_NUMBER(0.9862838086968123) 
 };
 
-NBL_CONSTEXPR float_t xi_15[15] = {
+NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR float_t xi_15[15] = {
     TYPED_NUMBER(0.0),
     TYPED_NUMBER(-0.2011940939974345),
     TYPED_NUMBER(0.2011940939974345),
@@ -175,25 +175,25 @@ NBL_CONSTEXPR float_t xi_15[15] = {
     TYPED_NUMBER(0.9879925180204854) 
 };
 
-NBL_CONSTEXPR float_t wi_2[2] = {
+NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR float_t wi_2[2] = {
     TYPED_NUMBER(1.0000000000000000),
     TYPED_NUMBER(1.0000000000000000) 
 };
 
-NBL_CONSTEXPR float_t wi_3[3] = {
+NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR float_t wi_3[3] = {
     TYPED_NUMBER(0.8888888888888888),
     TYPED_NUMBER(0.5555555555555555),
     TYPED_NUMBER(0.5555555555555555) 
 };
 
-NBL_CONSTEXPR float_t wi_4[4] = {
+NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR float_t wi_4[4] = {
     TYPED_NUMBER(0.6521451548625461),
     TYPED_NUMBER(0.6521451548625461),
     TYPED_NUMBER(0.3478548451374538),
     TYPED_NUMBER(0.3478548451374538) 
 };
 
-NBL_CONSTEXPR float_t wi_5[5] = {
+NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR float_t wi_5[5] = {
     TYPED_NUMBER(0.5688888888888888),
     TYPED_NUMBER(0.4786286704993664),
     TYPED_NUMBER(0.4786286704993664),
@@ -201,7 +201,7 @@ NBL_CONSTEXPR float_t wi_5[5] = {
     TYPED_NUMBER(0.2369268850561890) 
 };
 
-NBL_CONSTEXPR float_t wi_6[6] = {
+NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR float_t wi_6[6] = {
     TYPED_NUMBER(0.3607615730481386),
     TYPED_NUMBER(0.3607615730481386),
     TYPED_NUMBER(0.4679139345726910),
@@ -210,7 +210,7 @@ NBL_CONSTEXPR float_t wi_6[6] = {
     TYPED_NUMBER(0.1713244923791703) 
 };
 
-NBL_CONSTEXPR float_t wi_7[7] = {
+NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR float_t wi_7[7] = {
     TYPED_NUMBER(0.4179591836734693),
     TYPED_NUMBER(0.3818300505051189),
     TYPED_NUMBER(0.3818300505051189),
@@ -220,7 +220,7 @@ NBL_CONSTEXPR float_t wi_7[7] = {
     TYPED_NUMBER(0.1294849661688696) 
 };
 
-NBL_CONSTEXPR float_t wi_8[8] = {
+NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR float_t wi_8[8] = {
     TYPED_NUMBER(0.3626837833783619),
     TYPED_NUMBER(0.3626837833783619),
     TYPED_NUMBER(0.3137066458778872),
@@ -231,7 +231,7 @@ NBL_CONSTEXPR float_t wi_8[8] = {
     TYPED_NUMBER(0.1012285362903762) 
 };
 
-NBL_CONSTEXPR float_t wi_9[9] = {
+NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR float_t wi_9[9] = {
     TYPED_NUMBER(0.3302393550012597),
     TYPED_NUMBER(0.1806481606948574),
     TYPED_NUMBER(0.1806481606948574),
@@ -243,7 +243,7 @@ NBL_CONSTEXPR float_t wi_9[9] = {
     TYPED_NUMBER(0.2606106964029354) 
 };
 
-NBL_CONSTEXPR float_t wi_10[10] = {
+NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR float_t wi_10[10] = {
     TYPED_NUMBER(0.2955242247147528),
     TYPED_NUMBER(0.2955242247147528),
     TYPED_NUMBER(0.2692667193099963),
@@ -256,7 +256,7 @@ NBL_CONSTEXPR float_t wi_10[10] = {
     TYPED_NUMBER(0.0666713443086881) 
 };
 
-NBL_CONSTEXPR float_t wi_11[11] = {
+NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR float_t wi_11[11] = {
     TYPED_NUMBER(0.2729250867779006),
     TYPED_NUMBER(0.2628045445102466),
     TYPED_NUMBER(0.2628045445102466),
@@ -270,7 +270,7 @@ NBL_CONSTEXPR float_t wi_11[11] = {
     TYPED_NUMBER(0.0556685671161736) 
 };
 
-NBL_CONSTEXPR float_t wi_12[12] = {
+NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR float_t wi_12[12] = {
     TYPED_NUMBER(0.2491470458134027),
     TYPED_NUMBER(0.2491470458134027),
     TYPED_NUMBER(0.2334925365383548),
@@ -285,7 +285,7 @@ NBL_CONSTEXPR float_t wi_12[12] = {
     TYPED_NUMBER(0.0471753363865118) 
 };
 
-NBL_CONSTEXPR float_t wi_13[13] = {
+NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR float_t wi_13[13] = {
     TYPED_NUMBER(0.2325515532308739),
     TYPED_NUMBER(0.2262831802628972),
     TYPED_NUMBER(0.2262831802628972),
@@ -301,7 +301,7 @@ NBL_CONSTEXPR float_t wi_13[13] = {
     TYPED_NUMBER(0.0404840047653158) 
 };
 
-NBL_CONSTEXPR float_t wi_14[14] = {
+NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR float_t wi_14[14] = {
     TYPED_NUMBER(0.2152638534631577),
     TYPED_NUMBER(0.2152638534631577),
     TYPED_NUMBER(0.2051984637212956),
@@ -318,7 +318,7 @@ NBL_CONSTEXPR float_t wi_14[14] = {
     TYPED_NUMBER(0.0351194603317518) 
 };
 
-NBL_CONSTEXPR float_t wi_15[15] = {
+NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR float_t wi_15[15] = {
     TYPED_NUMBER(0.2025782419255612),
     TYPED_NUMBER(0.1984314853271115),
     TYPED_NUMBER(0.1984314853271115),
diff --git a/include/nbl/builtin/hlsl/math/quaternions.hlsl b/include/nbl/builtin/hlsl/math/quaternions.hlsl
index aca8d1ff3c..8d50202f4e 100644
--- a/include/nbl/builtin/hlsl/math/quaternions.hlsl
+++ b/include/nbl/builtin/hlsl/math/quaternions.hlsl
@@ -15,9 +15,9 @@ namespace math
 {
 
 template <typename T>
-struct quaternion_t
+struct quaternion
 {
-    using this_t = quaternion_t<T>;
+    using this_t = quaternion<T>;
     using scalar_type = T;
     using data_type = vector<T, 4>;
     using vector3_type = vector<T, 3>;
diff --git a/include/nbl/builtin/hlsl/morton.hlsl b/include/nbl/builtin/hlsl/morton.hlsl
new file mode 100644
index 0000000000..9ba33ffb3d
--- /dev/null
+++ b/include/nbl/builtin/hlsl/morton.hlsl
@@ -0,0 +1,661 @@
+#ifndef _NBL_BUILTIN_HLSL_MORTON_INCLUDED_
+#define _NBL_BUILTIN_HLSL_MORTON_INCLUDED_
+
+#include "nbl/builtin/hlsl/cpp_compat.hlsl"
+#include "nbl/builtin/hlsl/concepts/core.hlsl"
+#include "nbl/builtin/hlsl/bit.hlsl"
+#include "nbl/builtin/hlsl/functional.hlsl"
+#include "nbl/builtin/hlsl/emulated/int64_t.hlsl"
+#include "nbl/builtin/hlsl/mpl.hlsl"
+#include "nbl/builtin/hlsl/portable/vector_t.hlsl"
+
+// TODO: mega macro to get functional plus, minus, plus_assign, minus_assign
+
+namespace nbl
+{
+namespace hlsl
+{
+namespace morton
+{
+
+namespace impl
+{
+
+// Valid dimension for a morton code
+template <uint16_t D>
+NBL_BOOL_CONCEPT Dimension = 1 < D && D < 5;
+
+template<typename T, uint16_t Bits NBL_FUNC_REQUIRES(concepts::Integral<T> && concepts::Scalar<T>) 
+NBL_CONSTEXPR_FUNC bool verifyAnyBitIntegral(T val)
+{
+  NBL_IF_CONSTEXPR(is_signed_v<T>)
+  {
+    // include the msb
+    NBL_CONSTEXPR_FUNC_SCOPE_VAR T mask = ~((uint64_t(1) << (Bits-1)) - 1);
+	const bool allZero = ((val & mask) == 0);
+    const bool allOne = ((val & mask) == mask);
+    return allZero || allOne;
+  } else
+  {
+	NBL_CONSTEXPR_FUNC_SCOPE_VAR T mask = ~((uint64_t(1) << Bits) - 1);
+	const bool allZero = ((val & mask) == 0);
+	return allZero;
+  }
+}
+
+template<typename T, uint16_t Dim, uint16_t Bits NBL_FUNC_REQUIRES(concepts::Integral<T> && concepts::Scalar<T>)
+NBL_CONSTEXPR_FUNC bool verifyAnyBitIntegralVec(vector<T, Dim> vec)
+{
+  array_get<vector<T, Dim>, T> getter;
+  NBL_UNROLL
+  for (uint16_t i = 0; i < Dim; i++)
+    if (!verifyAnyBitIntegral<T, Bits>(getter(vec, i))) return false;
+  return true;
+}
+
+
+// --------------------------------------------------------- MORTON ENCOE/DECODE MASKS ---------------------------------------------------
+
+NBL_CONSTEXPR uint16_t CodingStages = 5;
+
+template<uint16_t Dim, uint16_t Bits, uint16_t Stage>
+struct coding_mask;
+
+template<uint16_t Dim, uint16_t Bits, uint16_t Stage, typename T = uint64_t>
+NBL_CONSTEXPR T coding_mask_v = _static_cast<T>(coding_mask<Dim, Bits, Stage>::value);
+
+// constexpr vector is not supported since it is not a fundamental type, which means it cannot be stored or leaked outside of constexpr context, it can only exist transiently. So the only way to return vector is to make the function consteval. Thus, we use macro to inline where it is used.
+#define NBL_MORTON_INTERLEAVE_MASKS(STORAGE_T, DIM, BITS, NAMESPACE_PREFIX) _static_cast<portable_vector_t< STORAGE_T, DIM > >(\
+                                                                            truncate<vector<uint64_t, DIM > >(\
+                                                                            vector<uint64_t, 4>(NAMESPACE_PREFIX coding_mask_v< DIM, BITS, 0>,\
+                                                                                                NAMESPACE_PREFIX coding_mask_v< DIM, BITS, 0> << 1,\
+                                                                                                NAMESPACE_PREFIX coding_mask_v< DIM, BITS, 0> << 2,\
+                                                                                                NAMESPACE_PREFIX coding_mask_v< DIM, BITS, 0> << 3)))
+
+
+template<uint16_t Dim, uint16_t Bits>
+struct sign_mask : integral_constant<uint64_t, uint64_t(1) << ((Bits - 1) * Dim)> {};
+
+template<uint16_t Dim, uint16_t Bits, typename T = uint64_t>
+NBL_CONSTEXPR T sign_mask_v = _static_cast<T>(sign_mask<Dim, Bits>::value);
+
+#define NBL_MORTON_SIGN_MASKS(STORAGE_T, DIM, BITS) _static_cast<portable_vector_t< STORAGE_T, DIM > >(\
+                                                    truncate<vector<uint64_t, DIM> >(\
+                                                    vector<uint64_t, 4>(sign_mask_v< DIM, BITS >,\
+                                                                        sign_mask_v< DIM, BITS > << 1,\
+                                                                        sign_mask_v< DIM, BITS > << 2,\
+                                                                        sign_mask_v< DIM, BITS > << 3)))
+
+// 0th stage will be special: to avoid masking twice during encode/decode, and to get a proper mask that only gets the relevant bits out of a morton code, the 0th stage
+// mask also considers the total number of bits we're cnsidering for a code (all other masks operate on a bit-agnostic basis).
+#define NBL_HLSL_MORTON_SPECIALIZE_FIRST_CODING_MASK(DIM, BASE_VALUE) template<uint16_t Bits> struct coding_mask<DIM, Bits, 0>\
+{\
+    enum : uint64_t { _Bits = Bits };\
+    NBL_CONSTEXPR_STATIC_INLINE uint64_t KilloffMask = _Bits * DIM < 64 ? (uint64_t(1) << (_Bits * DIM)) - 1 : ~uint64_t(0);\
+    NBL_CONSTEXPR_STATIC_INLINE uint64_t value = uint64_t(BASE_VALUE) & KilloffMask;\
+};
+
+#define NBL_HLSL_MORTON_SPECIALIZE_CODING_MASK(DIM, STAGE, BASE_VALUE) template<uint16_t Bits> struct coding_mask<DIM, Bits, STAGE>\
+{\
+    NBL_CONSTEXPR_STATIC_INLINE uint64_t value = uint64_t(BASE_VALUE);\
+};
+
+// Final stage mask also counts exact number of bits, although maybe it's not necessary
+#define NBL_HLSL_MORTON_SPECIALIZE_LAST_CODING_MASKS template<uint16_t Dim, uint16_t Bits> struct coding_mask<Dim, Bits, CodingStages>\
+{\
+    enum : uint64_t { _Bits = Bits };\
+    NBL_CONSTEXPR_STATIC_INLINE uint64_t value = (uint64_t(1) << _Bits) - 1;\
+};
+
+NBL_HLSL_MORTON_SPECIALIZE_FIRST_CODING_MASK(2, 0x5555555555555555ull)        // Groups bits by 1  on, 1  off
+NBL_HLSL_MORTON_SPECIALIZE_CODING_MASK(2, 1, 0x3333333333333333ull) // Groups bits by 2  on, 2  off
+NBL_HLSL_MORTON_SPECIALIZE_CODING_MASK(2, 2, 0x0F0F0F0F0F0F0F0Full) // Groups bits by 4  on, 4  off
+NBL_HLSL_MORTON_SPECIALIZE_CODING_MASK(2, 3, 0x00FF00FF00FF00FFull) // Groups bits by 8  on, 8  off
+NBL_HLSL_MORTON_SPECIALIZE_CODING_MASK(2, 4, 0x0000FFFF0000FFFFull) // Groups bits by 16 on, 16 off
+
+NBL_HLSL_MORTON_SPECIALIZE_FIRST_CODING_MASK(3, 0x9249249249249249ull)        // Groups bits by 1  on, 2  off
+NBL_HLSL_MORTON_SPECIALIZE_CODING_MASK(3, 1, 0x30C30C30C30C30C3ull) // Groups bits by 2  on, 4  off
+NBL_HLSL_MORTON_SPECIALIZE_CODING_MASK(3, 2, 0xF00F00F00F00F00Full) // Groups bits by 4  on, 8  off
+NBL_HLSL_MORTON_SPECIALIZE_CODING_MASK(3, 3, 0x00FF0000FF0000FFull) // Groups bits by 8  on, 16 off
+NBL_HLSL_MORTON_SPECIALIZE_CODING_MASK(3, 4, 0xFFFF00000000FFFFull) // Groups bits by 16 on, 32 off
+
+NBL_HLSL_MORTON_SPECIALIZE_FIRST_CODING_MASK(4, 0x1111111111111111ull)        // Groups bits by 1  on, 3  off
+NBL_HLSL_MORTON_SPECIALIZE_CODING_MASK(4, 1, 0x0303030303030303ull) // Groups bits by 2  on, 6  off
+NBL_HLSL_MORTON_SPECIALIZE_CODING_MASK(4, 2, 0x000F000F000F000Full) // Groups bits by 4  on, 12 off
+NBL_HLSL_MORTON_SPECIALIZE_CODING_MASK(4, 3, 0x000000FF000000FFull) // Groups bits by 8  on, 24 off
+NBL_HLSL_MORTON_SPECIALIZE_CODING_MASK(4, 4, 0x000000000000FFFFull) // Groups bits by 16 on, 48 off (unused but here for completion + likely keeps compiler from complaining)
+
+NBL_HLSL_MORTON_SPECIALIZE_LAST_CODING_MASKS
+
+#undef NBL_HLSL_MORTON_SPECIALIZE_LAST_CODING_MASK
+#undef NBL_HLSL_MORTON_SPECIALIZE_CODING_MASK
+#undef NBL_HLSL_MORTON_SPECIALIZE_FIRST_CODING_MASK
+
+// ----------------------------------------------------------------- MORTON TRANSCODER ---------------------------------------------------
+template<uint16_t Dim, uint16_t Bits, typename encode_t NBL_PRIMARY_REQUIRES(Dimension<Dim> && Dim * Bits <= 64 && 8 * sizeof(encode_t) == mpl::max_v<uint64_t, mpl::round_up_to_pot_v<Dim * Bits>, uint64_t(16)>)
+struct Transcoder
+{
+    using decode_component_t = conditional_t<(Bits > 16), uint32_t, uint16_t>;
+    using decode_t = vector<decode_component_t, Dim>;
+
+    template<typename T 
+    NBL_FUNC_REQUIRES(concepts::same_as<T, decode_t> )
+    /**
+    * @brief Interleaves each coordinate with `Dim - 1` zeros inbetween each bit, and left-shifts each by their coordinate index
+    *
+    * @param [in] decodedValue Cartesian coordinates to interleave and shift
+    */
+    NBL_CONSTEXPR_STATIC portable_vector_t<encode_t, Dim> interleaveShift(NBL_CONST_REF_ARG(T) decodedValue)
+    {
+        left_shift_operator<portable_vector_t<encode_t, Dim> > leftShift;
+        portable_vector_t<encode_t, Dim> interleaved = _static_cast<portable_vector_t<encode_t, Dim> >(decodedValue) & coding_mask_v<Dim, Bits, CodingStages, encode_t>;
+
+        // Read this to understand how interleaving and spreading bits works https://fgiesen.wordpress.com/2009/12/13/decoding-morton-codes/
+        #define ENCODE_LOOP_ITERATION(I) NBL_IF_CONSTEXPR(Bits > (uint16_t(1) << I))\
+        {\
+            interleaved = interleaved | leftShift(interleaved, (uint16_t(1) << I) * (Dim - 1));\
+            interleaved = interleaved & coding_mask_v<Dim, Bits, I, encode_t>;\
+        }
+        ENCODE_LOOP_ITERATION(4)
+        ENCODE_LOOP_ITERATION(3)
+        ENCODE_LOOP_ITERATION(2)
+        ENCODE_LOOP_ITERATION(1)
+        ENCODE_LOOP_ITERATION(0)
+
+        #undef ENCODE_LOOP_ITERATION
+
+        // After interleaving, shift each coordinate left by their index
+        return leftShift(interleaved, truncate<vector<uint16_t, Dim> >(vector<uint16_t, 4>(0, 1, 2, 3)));
+    }
+
+    template<typename T>
+    /**
+    * @brief Encodes a vector of cartesian coordinates as a Morton code
+    *
+    * @param [in] decodedValue Cartesian coordinates to encode
+    */
+    NBL_CONSTEXPR_STATIC encode_t encode(NBL_CONST_REF_ARG(T) decodedValue)
+    {
+        const portable_vector_t<encode_t, Dim> interleaveShifted = interleaveShift<T>(decodedValue);
+
+        array_get<portable_vector_t<encode_t, Dim>, encode_t> getter;
+        encode_t encoded = getter(interleaveShifted, 0);
+
+        NBL_UNROLL
+        for (uint16_t i = 1; i < Dim; i++)
+            encoded = encoded | getter(interleaveShifted, i);
+
+        return encoded;
+    }
+
+    /**
+    * @brief Decodes a Morton code back to a vector of cartesian coordinates
+    *
+    * @param [in] encodedValue Representation of a Morton code (binary code, not the morton class defined below)
+    */
+    NBL_CONSTEXPR_STATIC decode_t decode(NBL_CONST_REF_ARG(encode_t) encodedValue)
+    {
+        arithmetic_right_shift_operator<encode_t> encodedRightShift;
+        portable_vector_t<encode_t, Dim> decoded;
+        array_set<portable_vector_t<encode_t, Dim>, encode_t> setter;
+        // Write initial values into decoded
+        NBL_UNROLL
+        for (uint16_t i = 0; i < Dim; i++)
+            setter(decoded, i, encodedRightShift(encodedValue, i));
+
+        arithmetic_right_shift_operator<portable_vector_t<encode_t, Dim> > rightShift;
+
+        #define DECODE_LOOP_ITERATION(I) NBL_IF_CONSTEXPR(Bits > (uint16_t(1) << I))\
+        {\
+            decoded = decoded & coding_mask_v<Dim, Bits, I, encode_t>;\
+            decoded = decoded | rightShift(decoded, (uint16_t(1) << I) * (Dim - 1));\
+        }
+
+        DECODE_LOOP_ITERATION(0)
+        DECODE_LOOP_ITERATION(1)
+        DECODE_LOOP_ITERATION(2)
+        DECODE_LOOP_ITERATION(3)
+        DECODE_LOOP_ITERATION(4)
+
+        #undef DECODE_LOOP_ITERATION
+
+        // If `Bits` is greater than half the bitwidth of the decode type, then we can avoid `&`ing against the last mask since duplicated MSB get truncated
+        NBL_IF_CONSTEXPR(Bits > 4 * sizeof(typename vector_traits<decode_t>::scalar_type))
+            return _static_cast<decode_t>(decoded);
+        else
+            return _static_cast<decode_t>(decoded & coding_mask_v<Dim, Bits, CodingStages, encode_t>);
+    }
+};
+
+// ---------------------------------------------------- COMPARISON OPERATORS ---------------------------------------------------------------
+// Here because no partial specialization of methods
+// `BitsAlreadySpread` assumes both pre-interleaved and pre-shifted
+
+template<bool Signed, uint16_t Bits, typename storage_t, bool BitsAlreadySpread, typename I>
+NBL_BOOL_CONCEPT Comparable = concepts::IntegralLikeScalar<I> && is_signed_v<I> == Signed && ((BitsAlreadySpread && sizeof(I) == sizeof(storage_t)) || (!BitsAlreadySpread && 8 * sizeof(I) == mpl::max_v<uint64_t, mpl::round_up_to_pot_v<Bits>, uint64_t(16)>));
+
+template<bool Signed, uint16_t Bits, uint16_t D, typename storage_t, bool BitsAlreadySpread>
+struct Equal;
+
+template<bool Signed, uint16_t Bits, uint16_t D, typename storage_t>
+struct Equal<Signed, Bits, D, storage_t, true>
+{
+    template<typename I NBL_FUNC_REQUIRES(Comparable<Signed, Bits, storage_t, true, I>)
+    NBL_CONSTEXPR_STATIC vector<bool, D> __call(NBL_CONST_REF_ARG(storage_t) value, NBL_CONST_REF_ARG(portable_vector_t<I, D>) rhs)
+    {
+        const portable_vector_t<storage_t, D> InterleaveMasks = NBL_MORTON_INTERLEAVE_MASKS(storage_t, D, Bits, );
+        const portable_vector_t<storage_t, D> zeros = promote<portable_vector_t<storage_t, D> >(_static_cast<storage_t>(0));
+        
+        const portable_vector_t<storage_t, D> rhsCasted = _static_cast<portable_vector_t<storage_t, D> >(rhs);
+        const portable_vector_t<storage_t, D> xored = rhsCasted ^ (InterleaveMasks & value);
+        equal_to<portable_vector_t<storage_t, D> > _equal;
+        return _equal(xored, zeros);
+    }
+};
+
+template<bool Signed, uint16_t Bits, uint16_t D, typename storage_t>
+struct Equal<Signed, Bits, D, storage_t, false>
+{
+    template<typename I NBL_FUNC_REQUIRES(Comparable<Signed, Bits, storage_t, false, I>)
+    NBL_CONSTEXPR_STATIC vector<bool, D> __call(NBL_CONST_REF_ARG(storage_t) value, NBL_CONST_REF_ARG(vector<I, D>) rhs)
+    {
+        using right_sign_t = conditional_t<Signed, make_signed_t<storage_t>, make_unsigned_t<storage_t> >;
+        using transcoder_t = Transcoder<D, Bits, storage_t>;
+        const portable_vector_t<right_sign_t, D> interleaved = _static_cast<portable_vector_t<right_sign_t, D> >(transcoder_t::interleaveShift(_static_cast<typename transcoder_t::decode_t>(rhs)));
+        return Equal<Signed, Bits, D, storage_t, true>::template __call<right_sign_t>(value, interleaved);
+    }
+};
+
+template<bool Signed, uint16_t Bits, uint16_t D, typename storage_t, bool BitsAlreadySpread, typename ComparisonOp>
+struct BaseComparison;
+
+// Aux variable that has only the sign bit for the first of D dimensions
+template<uint16_t Bits, uint16_t D>
+NBL_CONSTEXPR uint64_t SignMask = uint64_t(1) << (D * (Bits - 1));
+
+template<bool Signed, uint16_t Bits, uint16_t D, typename storage_t, typename ComparisonOp>
+struct BaseComparison<Signed, Bits, D, storage_t, true, ComparisonOp>
+{
+    template<typename I NBL_FUNC_REQUIRES(Comparable<Signed, Bits, storage_t, true, I>)
+    NBL_CONSTEXPR_STATIC vector<bool, D> __call(NBL_CONST_REF_ARG(storage_t) value, NBL_CONST_REF_ARG(portable_vector_t<I, D>) rhs)
+    {
+        const portable_vector_t<storage_t, D> InterleaveMasks = NBL_MORTON_INTERLEAVE_MASKS(storage_t, D, Bits, );
+        const portable_vector_t<storage_t, D> SignMasks = NBL_MORTON_SIGN_MASKS(storage_t, D, Bits);
+        ComparisonOp comparison;
+        NBL_IF_CONSTEXPR(Signed)
+        {
+            // Obtain a vector of deinterleaved coordinates and flip their sign bits
+            portable_vector_t<storage_t, D> thisCoord = (InterleaveMasks & value) ^ SignMasks;
+            // rhs already deinterleaved, just have to cast type and flip sign
+            const portable_vector_t<storage_t, D> rhsCoord = _static_cast<portable_vector_t<storage_t, D> >(rhs) ^ SignMasks;
+
+            return comparison(thisCoord, rhsCoord);
+        }
+        else 
+        {
+            // Obtain a vector of deinterleaved coordinates
+            portable_vector_t<storage_t, D> thisCoord = InterleaveMasks & value;
+            // rhs already deinterleaved, just have to cast type
+            const portable_vector_t<storage_t, D> rhsCoord = _static_cast<portable_vector_t<storage_t, D> >(rhs);
+
+            return comparison(thisCoord, rhsCoord);
+        }
+        
+    }
+};
+
+template<bool Signed, uint16_t Bits, uint16_t D, typename storage_t, typename ComparisonOp>
+struct BaseComparison<Signed, Bits, D, storage_t, false, ComparisonOp>
+{
+    template<typename I NBL_FUNC_REQUIRES(Comparable<Signed, Bits, storage_t, false, I>)
+    NBL_CONSTEXPR_STATIC vector<bool, D> __call(NBL_CONST_REF_ARG(storage_t) value, NBL_CONST_REF_ARG(vector<I, D>) rhs)
+    {
+        using right_sign_t = conditional_t<Signed, make_signed_t<storage_t>, make_unsigned_t<storage_t> >;
+        using transcoder_t = Transcoder<D, Bits, storage_t>;
+        const portable_vector_t<right_sign_t, D> interleaved = _static_cast<portable_vector_t<right_sign_t, D> >(transcoder_t::interleaveShift(_static_cast<typename transcoder_t::decode_t>(rhs)));
+        return BaseComparison<Signed, Bits, D, storage_t, true, ComparisonOp>::template __call<right_sign_t>(value, interleaved);
+    }
+};
+
+template<bool Signed, uint16_t Bits, uint16_t D, typename storage_t, bool BitsAlreadySpread>
+struct LessThan : BaseComparison<Signed, Bits, D, storage_t, BitsAlreadySpread, less<portable_vector_t<storage_t, D> > > {};
+
+template<bool Signed, uint16_t Bits, uint16_t D, typename storage_t, bool BitsAlreadySpread>
+struct LessEqual : BaseComparison<Signed, Bits, D, storage_t, BitsAlreadySpread, less_equal<portable_vector_t<storage_t, D> > > {};
+
+template<bool Signed, uint16_t Bits, uint16_t D, typename storage_t, bool BitsAlreadySpread>
+struct GreaterThan : BaseComparison<Signed, Bits, D, storage_t, BitsAlreadySpread, greater<portable_vector_t<storage_t, D> > > {};
+
+template<bool Signed, uint16_t Bits, uint16_t D, typename storage_t, bool BitsAlreadySpread>
+struct GreaterEqual : BaseComparison<Signed, Bits, D, storage_t, BitsAlreadySpread, greater_equal<portable_vector_t<storage_t, D> > > {};
+
+} //namespace impl
+
+// Making this even slightly less ugly is blocked by https://github.com/microsoft/DirectXShaderCompiler/issues/7006
+// In particular, `Masks` should be a `const static` member field instead of appearing in every method using it
+template<bool Signed, uint16_t Bits, uint16_t D, typename _uint64_t = uint64_t NBL_PRIMARY_REQUIRES(impl::Dimension<D> && D * Bits <= 64)
+struct code
+{
+    using this_t = code<Signed, Bits, D, _uint64_t>;
+    using this_signed_t = code<true, Bits, D, _uint64_t>;
+    NBL_CONSTEXPR_STATIC uint16_t TotalBitWidth = D * Bits;
+    using storage_t = conditional_t<(TotalBitWidth > 16), conditional_t<(TotalBitWidth > 32), _uint64_t, uint32_t>, uint16_t>;
+    
+    using transcoder_t = impl::Transcoder<D, Bits, storage_t>;
+    using decode_component_t = conditional_t<Signed,
+      make_signed_t<typename transcoder_t::decode_component_t>,
+      typename transcoder_t::decode_component_t>;
+
+    storage_t value;
+
+    // ---------------------------------------------------- CONSTRUCTORS ---------------------------------------------------------------
+
+    #ifndef __HLSL_VERSION
+
+    code() = default;
+
+    #endif
+
+    /**
+    * @brief Creates a Morton code from a set of integral cartesian coordinates
+    *
+    * @param [in] cartesian Coordinates to encode. Signedness MUST match the signedness of this Morton code class
+    */
+    template<typename I>
+    NBL_CONSTEXPR_STATIC enable_if_t <concepts::same_as<I, decode_component_t>, this_t>
+    create(NBL_CONST_REF_ARG(vector<I, D>) cartesian)
+    {
+        this_t retVal;
+        assert((impl::verifyAnyBitIntegralVec<I, D, Bits >(cartesian)));
+        using decode_t = typename transcoder_t::decode_t;
+        retVal.value = transcoder_t::encode(_static_cast<decode_t>(cartesian));
+        return retVal;
+    }
+
+    // CPP can also have an actual constructor
+    #ifndef __HLSL_VERSION
+
+    /**
+    * @brief Creates a Morton code from a set of cartesian coordinates
+    *
+    * @param [in] cartesian Coordinates to encode
+    */
+    template<typename I>
+    inline explicit code(NBL_CONST_REF_ARG(vector<I, D>) cartesian)
+    {
+        *this = create(cartesian);
+    }
+
+    /**
+    * @brief Decodes this Morton code back to a set of cartesian coordinates
+    */
+    template<typename I NBL_FUNC_REQUIRES(is_signed_v<I> == Signed)
+    constexpr explicit operator vector<I, D>() const noexcept;
+
+    #endif
+
+    // ------------------------------------------------------- BITWISE OPERATORS -------------------------------------------------
+
+    NBL_CONSTEXPR_FUNC this_t operator&(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC
+    {
+        this_t retVal;
+        retVal.value = value & rhs.value;
+        return retVal;
+    }
+
+    NBL_CONSTEXPR_FUNC this_t operator|(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC
+    {
+        this_t retVal;
+        retVal.value = value | rhs.value;
+        return retVal;
+    }
+
+    NBL_CONSTEXPR_FUNC this_t operator^(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC
+    {
+        this_t retVal;
+        retVal.value = value ^ rhs.value;
+        return retVal;
+    }
+
+    NBL_CONSTEXPR_FUNC this_t operator~() NBL_CONST_MEMBER_FUNC
+    {
+        this_t retVal;
+        retVal.value = ~value;
+        return retVal;
+    }
+
+    // Only valid in CPP
+    #ifndef __HLSL_VERSION
+
+    constexpr this_t operator<<(uint16_t bits) const;
+
+    constexpr this_t operator>>(uint16_t bits) const;
+
+    #endif
+
+    // ------------------------------------------------------- UNARY ARITHMETIC OPERATORS -------------------------------------------------
+
+    NBL_CONSTEXPR_FUNC this_signed_t operator-() NBL_CONST_MEMBER_FUNC
+    {
+        this_t zero;
+        zero.value = _static_cast<storage_t>(0);
+        #ifndef __HLSL_VERSION
+        return zero - *this;
+        #else
+        return zero - this;
+        #endif
+    }
+
+    // ------------------------------------------------------- BINARY ARITHMETIC OPERATORS -------------------------------------------------
+
+    // put 1 bits everywhere in the bits the current axis is not using
+    // then extract just the axis bits for the right hand coordinate
+    // carry-1 will propagate the bits across the already set bits
+    // then clear out the bits not belonging to current axis
+    // Note: Its possible to clear on `this` and fill on `rhs` but that will
+    // disable optimizations, we expect the compiler to optimize a lot if the
+    // value of `rhs` is known at compile time, e.g. `static_cast<Morton<N>>(glm::ivec3(1,0,0))`
+    NBL_CONSTEXPR_FUNC this_t operator+(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC
+    {
+        const portable_vector_t<storage_t, D> InterleaveMasks = NBL_MORTON_INTERLEAVE_MASKS(storage_t, D, Bits, impl::);
+        bit_not<portable_vector_t<storage_t, D> > bitnot;
+        // For each coordinate, leave its bits intact and turn every other bit ON
+        const portable_vector_t<storage_t, D> counterMaskedValue = bitnot(InterleaveMasks) | value;
+        // For each coordinate in rhs, leave its bits intact and turn every other bit OFF
+        const portable_vector_t<storage_t, D> maskedRhsValue = InterleaveMasks & rhs.value;
+        // Add these coordinate-wise, then turn all bits not belonging to the current coordinate OFF
+        const portable_vector_t<storage_t, D> interleaveShiftedResult = (counterMaskedValue + maskedRhsValue) & InterleaveMasks;
+        // Re-encode the result
+        array_get<portable_vector_t<storage_t, D>, storage_t> getter;
+        this_t retVal;
+        retVal.value = getter(interleaveShiftedResult, 0);
+        NBL_UNROLL
+        for (uint16_t i = 1; i < D; i++)
+            retVal.value = retVal.value | getter(interleaveShiftedResult, i);
+        return retVal;
+    }
+
+    // This is the dual trick of the one used for addition: set all other bits to 0 so borrows propagate
+    NBL_CONSTEXPR_FUNC this_t operator-(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC
+    {
+        const portable_vector_t<storage_t, D> InterleaveMasks = NBL_MORTON_INTERLEAVE_MASKS(storage_t, D, Bits, impl::);
+        // For each coordinate, leave its bits intact and turn every other bit OFF
+        const portable_vector_t<storage_t, D> maskedValue = InterleaveMasks & value;
+        // Do the same for each coordinate in rhs
+        const portable_vector_t<storage_t, D> maskedRhsValue = InterleaveMasks & rhs.value;
+        // Subtract these coordinate-wise, then turn all bits not belonging to the current coordinate OFF
+        const portable_vector_t<storage_t, D> interleaveShiftedResult = (maskedValue - maskedRhsValue) & InterleaveMasks;
+        // Re-encode the result
+        array_get<portable_vector_t<storage_t, D>, storage_t> getter;
+        this_t retVal;
+        retVal.value = getter(interleaveShiftedResult, 0);
+        NBL_UNROLL
+        for (uint16_t i = 1; i < D; i++)
+            retVal.value = retVal.value | getter(interleaveShiftedResult, i);
+
+        return retVal;
+    }
+
+    // ------------------------------------------------------- COMPARISON OPERATORS -------------------------------------------------
+
+    NBL_CONSTEXPR_FUNC bool operator==(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC
+    {
+        return value == rhs.value;
+    }
+
+    template<bool BitsAlreadySpread, typename I 
+    NBL_FUNC_REQUIRES(impl::Comparable<Signed, Bits, storage_t, BitsAlreadySpread, I>)
+    NBL_CONSTEXPR_FUNC vector<bool, D> equal(NBL_CONST_REF_ARG(vector<I, D>) rhs) NBL_CONST_MEMBER_FUNC
+    {
+        return impl::Equal<Signed, Bits, D, storage_t, BitsAlreadySpread>::template __call<I>(value, rhs);
+    }  
+
+    NBL_CONSTEXPR_FUNC bool operator!=(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC
+    {
+        return value != rhs.value;
+    }
+
+    template<bool BitsAlreadySpread, typename I
+    NBL_FUNC_REQUIRES(impl::Comparable<Signed, Bits, storage_t, BitsAlreadySpread, I>)
+    NBL_CONSTEXPR_FUNC vector<bool, D> notEqual(NBL_CONST_REF_ARG(vector<I, D>) rhs) NBL_CONST_MEMBER_FUNC
+    {
+        return !equal<BitsAlreadySpread, I>(rhs);
+    }
+
+    template<bool BitsAlreadySpread, typename I
+    NBL_FUNC_REQUIRES(impl::Comparable<Signed, Bits, storage_t, BitsAlreadySpread, I>)
+    NBL_CONSTEXPR_FUNC vector<bool, D> lessThan(NBL_CONST_REF_ARG(vector<I, D>) rhs) NBL_CONST_MEMBER_FUNC
+    {
+        return impl::LessThan<Signed, Bits, D, storage_t, BitsAlreadySpread>::template __call<I>(value, rhs);
+    }
+
+    template<bool BitsAlreadySpread, typename I
+    NBL_FUNC_REQUIRES(impl::Comparable<Signed, Bits, storage_t, BitsAlreadySpread, I>)
+    NBL_CONSTEXPR_FUNC vector<bool, D> lessThanEqual(NBL_CONST_REF_ARG(vector<I, D>) rhs) NBL_CONST_MEMBER_FUNC
+    {
+        return impl::LessEqual<Signed, Bits, D, storage_t, BitsAlreadySpread>::template __call<I>(value, rhs);
+    }
+
+    template<bool BitsAlreadySpread, typename I
+    NBL_FUNC_REQUIRES(impl::Comparable<Signed, Bits, storage_t, BitsAlreadySpread, I>)
+    NBL_CONSTEXPR_FUNC vector<bool, D> greaterThan(NBL_CONST_REF_ARG(vector<I, D>) rhs) NBL_CONST_MEMBER_FUNC
+    {
+        return impl::GreaterThan<Signed, Bits, D, storage_t, BitsAlreadySpread>::template __call<I>(value, rhs);
+    }
+
+    template<bool BitsAlreadySpread, typename I
+    NBL_FUNC_REQUIRES(impl::Comparable<Signed, Bits, storage_t, BitsAlreadySpread, I>)
+    NBL_CONSTEXPR_FUNC vector<bool, D> greaterThanEqual(NBL_CONST_REF_ARG(vector<I, D>) rhs) NBL_CONST_MEMBER_FUNC
+    {
+        return impl::GreaterEqual<Signed, Bits, D, storage_t, BitsAlreadySpread>::template __call<I>(value, rhs);
+    }
+
+};
+
+} //namespace morton
+
+// Specialize the `static_cast_helper`
+namespace impl
+{
+
+// I must be of same signedness as the morton code, and be wide enough to hold each component
+template<typename I, uint16_t Bits, uint16_t D, typename _uint64_t> NBL_PARTIAL_REQ_TOP(concepts::IntegralScalar<I>)
+struct static_cast_helper<vector<I, D>, morton::code<is_signed_v<I>, Bits, D, _uint64_t> NBL_PARTIAL_REQ_BOT(concepts::IntegralScalar<I>) >
+{
+    NBL_CONSTEXPR_STATIC vector<I, D> cast(NBL_CONST_REF_ARG(morton::code<is_signed_v<I>, Bits, D, _uint64_t>) val)
+    {
+        using storage_t = typename morton::code<is_signed_v<I>, Bits, D, _uint64_t>::storage_t;
+        return morton::impl::Transcoder<D, Bits, storage_t>::decode(val.value);
+    }
+};
+
+} // namespace impl
+
+template<bool Signed, uint16_t Bits, uint16_t D, typename _uint64_t>
+struct left_shift_operator<morton::code<Signed, Bits, D, _uint64_t> >
+{
+    using type_t = morton::code<Signed, Bits, D, _uint64_t>;
+    using storage_t = typename type_t::storage_t;
+
+    NBL_CONSTEXPR_FUNC type_t operator()(NBL_CONST_REF_ARG(type_t) operand, uint16_t bits)
+    {
+        left_shift_operator<storage_t> valueLeftShift;
+        type_t retVal;
+        // Shift every coordinate by `bits`
+        retVal.value = valueLeftShift(operand.value, bits * D);
+        // Previous shift might move bits to positions that storage has available but the morton code does not use
+        // Un-decoding the resulting morton is still fine and produces expected results, but some operations such as equality expect these unused bits to be 0 so we mask them off
+        const uint64_t UsedBitsMask = Bits * D < 64 ? (uint64_t(1) << (Bits * D)) - 1 : ~uint64_t(0);
+        retVal.value = retVal.value & _static_cast<storage_t>(UsedBitsMask);
+        return retVal;
+    }
+};
+
+template<uint16_t Bits, uint16_t D, typename _uint64_t>
+struct arithmetic_right_shift_operator<morton::code<false, Bits, D, _uint64_t> >
+{
+    using type_t = morton::code<false, Bits, D, _uint64_t>;
+    using storage_t = typename type_t::storage_t;
+
+    NBL_CONSTEXPR_FUNC type_t operator()(NBL_CONST_REF_ARG(type_t) operand, uint16_t bits)
+    {
+        arithmetic_right_shift_operator<storage_t> valueArithmeticRightShift;
+        type_t retVal;
+        // Shift every coordinate by `bits`
+        retVal.value = valueArithmeticRightShift(operand.value, bits * D);
+        return retVal;
+    }
+};
+
+// This one's uglier - have to unpack to get the expected behaviour
+template<uint16_t Bits, uint16_t D, typename _uint64_t>
+struct arithmetic_right_shift_operator<morton::code<true, Bits, D, _uint64_t> >
+{
+    using type_t = morton::code<true, Bits, D, _uint64_t>;
+    using scalar_t = conditional_t<(Bits > 16), int32_t, int16_t>;
+
+    NBL_CONSTEXPR_FUNC type_t operator()(NBL_CONST_REF_ARG(type_t) operand, uint16_t bits)
+    {
+        vector<scalar_t, D> cartesian = _static_cast<vector<scalar_t, D> >(operand);
+        // To avoid branching, we left-shift each coordinate to put the MSB (of the encoded Morton) at the position of the MSB (of the `scalar_t` used for the decoded coordinate),
+        // then right-shift again to get correct sign on each coordinate
+        // The number of bits we shift by to put MSB of Morton at MSB of `scalar_t` is the difference between the bitwidth of `scalar_t` and Bits
+        const scalar_t ShiftFactor = scalar_t(8 * sizeof(scalar_t) - Bits);
+        cartesian <<= ShiftFactor;
+        cartesian >>= ShiftFactor + scalar_t(bits);
+        return type_t::create(cartesian);
+    }
+};
+
+#ifndef __HLSL_VERSION
+
+template<bool Signed, uint16_t Bits, uint16_t D, typename _uint64_t NBL_FUNC_REQUIRES(morton::impl::Dimension<D>&& D* Bits <= 64)
+constexpr morton::code<Signed, Bits, D, _uint64_t> morton::code<Signed, Bits, D, _uint64_t>::operator<<(uint16_t bits) const
+{
+    left_shift_operator<morton::code<Signed, Bits, D, _uint64_t>> leftShift;
+    return leftShift(*this, bits);
+}
+
+template<bool Signed, uint16_t Bits, uint16_t D, typename _uint64_t NBL_FUNC_REQUIRES(morton::impl::Dimension<D>&& D* Bits <= 64)
+constexpr morton::code<Signed, Bits, D, _uint64_t> morton::code<Signed, Bits, D, _uint64_t>::operator>>(uint16_t bits) const
+{
+    arithmetic_right_shift_operator<morton::code<Signed, Bits, D, _uint64_t>> rightShift;
+    return rightShift(*this, bits);
+}
+
+template <bool Signed, uint16_t Bits, uint16_t D, typename _uint64_t NBL_PRIMARY_REQUIRES(morton::impl::Dimension<D>&& D* Bits <= 64)
+template <typename I NBL_FUNC_REQUIRES(is_signed_v<I> == Signed)
+constexpr morton::code<Signed, Bits, D, _uint64_t>::operator vector<I, D>() const noexcept
+{
+    return _static_cast<vector<I, D>, morton::code<Signed, Bits, D>>(*this);
+}
+
+#endif
+
+#undef NBL_MORTON_INTERLEAVE_MASKS
+#undef NBL_MORTON_SIGN_MASKS
+
+} //namespace hlsl
+} //namespace nbl
+
+#endif
\ No newline at end of file
diff --git a/include/nbl/builtin/hlsl/mpl.hlsl b/include/nbl/builtin/hlsl/mpl.hlsl
index 8fb13db872..7734dea15f 100644
--- a/include/nbl/builtin/hlsl/mpl.hlsl
+++ b/include/nbl/builtin/hlsl/mpl.hlsl
@@ -41,7 +41,12 @@ struct countl_zero : impl::countl_zero<uint64_t(N), (sizeof(T) * 8)>
     static_assert(is_integral<T>::value, "countl_zero type parameter must be an integral type");
 };
 template<class T, T N>
-NBL_CONSTEXPR T countl_zero_v = countl_zero<T,N>::value;
+NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR T countl_zero_v = countl_zero<T,N>::value;
+
+template<uint64_t N>
+struct is_pot : bool_constant< (N > 0 && !(N & (N - 1))) > {};
+template<uint64_t N>
+NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR bool is_pot_v = is_pot<N>::value;
 
 template<uint64_t X>
 struct log2
@@ -49,7 +54,12 @@ struct log2
     NBL_CONSTEXPR_STATIC_INLINE uint16_t value = X ? (1ull<<6)-countl_zero<uint64_t, X>::value-1 : -1ull;
 };
 template<uint64_t X>
-NBL_CONSTEXPR uint64_t log2_v = log2<X>::value;
+NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR uint16_t log2_v = log2<X>::value;
+
+template<uint64_t X>
+struct log2_ceil : integral_constant<uint16_t, log2_v<X> + uint16_t(!is_pot_v<X>)> {};
+template<uint64_t X>
+NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR uint16_t log2_ceil_v = log2_ceil<X>::value;
 
 template<typename T, T X, int32_t S>
 struct rotl
@@ -59,7 +69,7 @@ struct rotl
     NBL_CONSTEXPR_STATIC_INLINE T value = (S >= 0) ? ((X << r) | (X >> (N - r))) : (X >> (-r)) | (X << (N - (-r)));
 };
 template<typename T, T X, int32_t S>
-NBL_CONSTEXPR T rotl_v = rotl<T,X,S>::value;
+NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR T rotl_v = rotl<T,X,S>::value;
 
 template<typename T, T X, int32_t S>
 struct rotr
@@ -69,7 +79,7 @@ struct rotr
     NBL_CONSTEXPR_STATIC_INLINE T value = (S >= 0) ? ((X >> r) | (X << (N - r))) : (X << (-r)) | (X >> (N - (-r)));
 };
 template<typename T, T X, int32_t S>
-NBL_CONSTEXPR T rotr_v = rotr<T,X,S>::value;
+NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR T rotr_v = rotr<T,X,S>::value;
 
 template<uint64_t X, uint64_t M>
 struct align_up
@@ -77,12 +87,7 @@ struct align_up
     NBL_CONSTEXPR_STATIC_INLINE uint64_t value = X ? (((X-1)/M+1)*M):0;
 };
 template<uint64_t X, uint64_t M>
-NBL_CONSTEXPR uint64_t align_up_v = align_up<X,M>::value;
-
-template<uint64_t N>
-struct is_pot : bool_constant< (N > 0 && !(N & (N - 1))) > {};
-template<uint64_t N>
-NBL_CONSTEXPR bool is_pot_v = is_pot<N>::value;
+NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR uint64_t align_up_v = align_up<X,M>::value;
 
 template<typename T, T X, T Y>
 struct max
@@ -90,7 +95,7 @@ struct max
     NBL_CONSTEXPR_STATIC_INLINE T value = X<Y ? Y:X;
 };
 template<typename T, T X, T Y>
-NBL_CONSTEXPR T max_v = max<T,X,Y>::value;
+NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR T max_v = max<T,X,Y>::value;
 
 template<typename T, T X, T Y>
 struct min
@@ -98,7 +103,18 @@ struct min
     NBL_CONSTEXPR_STATIC_INLINE T value = X<Y ? X:Y;
 };
 template<typename T, T X, T Y>
-NBL_CONSTEXPR T min_v = min<T,X,Y>::value;
+NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR T min_v = min<T,X,Y>::value;
+
+template<uint64_t X>
+struct round_up_to_pot : integral_constant<uint64_t, uint64_t(1) << log2_ceil_v<X> > {};
+template<uint64_t X>
+NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR uint64_t round_up_to_pot_v = round_up_to_pot<X>::value;
+
+// TODO: should rename log2 to log2_floor
+template<uint64_t X>
+struct round_down_to_pot : integral_constant<uint64_t, uint64_t(1) << log2_v<X> > {};
+template<uint64_t X>
+NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR uint64_t round_down_to_pot_v = round_down_to_pot<X>::value;
 
 template<uint64_t X>
 struct find_lsb
@@ -106,7 +122,7 @@ struct find_lsb
 	NBL_CONSTEXPR_STATIC_INLINE uint16_t value = log2<X & -X>::value;
 };
 template<uint64_t X>
-NBL_CONSTEXPR uint64_t find_lsb_v = find_lsb<X>::value;
+NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR uint64_t find_lsb_v = find_lsb<X>::value;
 }
 }
 }
diff --git a/include/nbl/builtin/hlsl/numbers.hlsl b/include/nbl/builtin/hlsl/numbers.hlsl
index 6671a44756..4594596590 100644
--- a/include/nbl/builtin/hlsl/numbers.hlsl
+++ b/include/nbl/builtin/hlsl/numbers.hlsl
@@ -11,33 +11,33 @@ namespace numbers
 {
 
 template <typename float_t>
-NBL_CONSTEXPR float_t e = float_t(2.718281828459045);
+NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR float_t e = float_t(2.718281828459045);
 template <typename float_t>
-NBL_CONSTEXPR float_t log2e = float_t(1.4426950408889634);
+NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR float_t log2e = float_t(1.4426950408889634);
 template <typename float_t>
-NBL_CONSTEXPR float_t log10e = float_t(0.4342944819032518);
+NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR float_t log10e = float_t(0.4342944819032518);
 template <typename float_t>
-NBL_CONSTEXPR float_t pi = float_t(3.141592653589793);
+NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR float_t pi = float_t(3.141592653589793);
 template <typename float_t>
-NBL_CONSTEXPR float_t inv_pi = float_t(0.3183098861837907);
+NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR float_t inv_pi = float_t(0.3183098861837907);
 template <typename float_t>
-NBL_CONSTEXPR float_t inv_sqrtpi = float_t(0.5641895835477563);
+NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR float_t inv_sqrtpi = float_t(0.5641895835477563);
 template <typename float_t>
-NBL_CONSTEXPR float_t ln2 = float_t(0.6931471805599453);
+NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR float_t ln2 = float_t(0.6931471805599453);
 template <typename float_t>
-NBL_CONSTEXPR float_t inv_ln2 = float_t(1.44269504088896);
+NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR float_t inv_ln2 = float_t(1.44269504088896);
 template <typename float_t>
-NBL_CONSTEXPR float_t ln10 = float_t(2.302585092994046);
+NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR float_t ln10 = float_t(2.302585092994046);
 template <typename float_t>
-NBL_CONSTEXPR float_t sqrt2 = float_t(1.4142135623730951);
+NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR float_t sqrt2 = float_t(1.4142135623730951);
 template <typename float_t>
-NBL_CONSTEXPR float_t sqrt3 = float_t(1.7320508075688772);
+NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR float_t sqrt3 = float_t(1.7320508075688772);
 template <typename float_t>
-NBL_CONSTEXPR float_t inv_sqrt3 = float_t(0.5773502691896257);
+NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR float_t inv_sqrt3 = float_t(0.5773502691896257);
 template <typename float_t>
-NBL_CONSTEXPR float_t egamma = float_t(0.5772156649015329);
+NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR float_t egamma = float_t(0.5772156649015329);
 template <typename float_t>
-NBL_CONSTEXPR float_t phi = float_t(1.618033988749895);
+NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR float_t phi = float_t(1.618033988749895);
 
 }
 }
diff --git a/include/nbl/builtin/hlsl/portable/int64_t.hlsl b/include/nbl/builtin/hlsl/portable/int64_t.hlsl
new file mode 100644
index 0000000000..2dffa40a2d
--- /dev/null
+++ b/include/nbl/builtin/hlsl/portable/int64_t.hlsl
@@ -0,0 +1,36 @@
+#ifndef _NBL_BUILTIN_HLSL_PORTABLE_INT64_T_INCLUDED_
+#define _NBL_BUILTIN_HLSL_PORTABLE_INT64_T_INCLUDED_
+
+#include <nbl/builtin/hlsl/emulated/int64_t.hlsl>
+#include <nbl/builtin/hlsl/device_capabilities_traits.hlsl>
+
+// define NBL_FORCE_EMULATED_INT_64 to force using emulated int64 types
+
+namespace nbl
+{
+namespace hlsl
+{
+#ifdef __HLSL_VERSION
+#ifdef NBL_FORCE_EMULATED_INT_64
+template<typename device_caps = void>
+using portable_uint64_t = emulated_uint64_t;
+template<typename device_caps = void>
+using portable_int64_t = emulated_int64_t;
+#else
+template<typename device_caps = void>
+using portable_uint64_t = typename conditional<device_capabilities_traits<device_caps>::shaderInt64, uint64_t, emulated_uint64_t>::type;
+template<typename device_caps = void>
+using portable_int64_t = typename conditional<device_capabilities_traits<device_caps>::shaderInt64, int64_t, emulated_int64_t>::type;
+#endif
+
+#else
+template<typename device_caps = void>
+using portable_uint64_t = uint64_t;
+template<typename device_caps = void>
+using portable_int64_t = int64_t;
+#endif
+
+}
+}
+
+#endif
\ No newline at end of file
diff --git a/include/nbl/builtin/hlsl/portable/vector_t.hlsl b/include/nbl/builtin/hlsl/portable/vector_t.hlsl
index ace199e20b..16d5b40f81 100644
--- a/include/nbl/builtin/hlsl/portable/vector_t.hlsl
+++ b/include/nbl/builtin/hlsl/portable/vector_t.hlsl
@@ -3,6 +3,7 @@
 
 #include <nbl/builtin/hlsl/emulated/vector_t.hlsl>
 #include <nbl/builtin/hlsl/portable/float64_t.hlsl>
+#include <nbl/builtin/hlsl/portable/int64_t.hlsl>
 
 namespace nbl
 {
@@ -36,19 +37,53 @@ template<typename T>
 using portable_vector_t4 = portable_vector_t<T, 4>;
 
 #ifdef __HLSL_VERSION
+// Float
 template<typename device_caps = void>
 using portable_float64_t2 = portable_vector_t2<portable_float64_t<device_caps> >;
 template<typename device_caps = void>
 using portable_float64_t3 = portable_vector_t3<portable_float64_t<device_caps> >;
 template<typename device_caps = void>
 using portable_float64_t4 = portable_vector_t4<portable_float64_t<device_caps> >;
+
+// Uint
+template<typename device_caps = void>
+using portable_uint64_t2 = portable_vector_t2<portable_uint64_t<device_caps> >;
+template<typename device_caps = void>
+using portable_uint64_t3 = portable_vector_t3<portable_uint64_t<device_caps> >;
+template<typename device_caps = void>
+using portable_uint64_t4 = portable_vector_t4<portable_uint64_t<device_caps> >;
+
+//Int
+template<typename device_caps = void>
+using portable_int64_t2 = portable_vector_t2<portable_int64_t<device_caps> >;
+template<typename device_caps = void>
+using portable_int64_t3 = portable_vector_t3<portable_int64_t<device_caps> >;
+template<typename device_caps = void>
+using portable_int64_t4 = portable_vector_t4<portable_int64_t<device_caps> >;
 #else
+// Float
 template<typename device_caps = void>
 using portable_float64_t2 = portable_vector_t2<float64_t>;
 template<typename device_caps = void>
 using portable_float64_t3 = portable_vector_t3<float64_t>;
 template<typename device_caps = void>
 using portable_float64_t4 = portable_vector_t4<float64_t>;
+
+// Uint
+template<typename device_caps = void>
+using portable_uint64_t2 = portable_vector_t2<uint64_t>;
+template<typename device_caps = void>
+using portable_uint64_t3 = portable_vector_t3<uint64_t>;
+template<typename device_caps = void>
+using portable_uint64_t4 = portable_vector_t4<uint64_t>;
+
+// Int
+template<typename device_caps = void>
+using portable_int64_t2 = portable_vector_t2<int64_t>;
+template<typename device_caps = void>
+using portable_int64_t3 = portable_vector_t3<int64_t>;
+template<typename device_caps = void>
+using portable_int64_t4 = portable_vector_t4<int64_t>;
 #endif
 
 }
diff --git a/include/nbl/builtin/hlsl/rwmc/CascadeAccumulator.hlsl b/include/nbl/builtin/hlsl/rwmc/CascadeAccumulator.hlsl
index 593e267a26..9413bcee98 100644
--- a/include/nbl/builtin/hlsl/rwmc/CascadeAccumulator.hlsl
+++ b/include/nbl/builtin/hlsl/rwmc/CascadeAccumulator.hlsl
@@ -69,12 +69,9 @@ struct CascadeAccumulator
     // most of this code is stolen from https://cg.ivd.kit.edu/publications/2018/rwmc/tool/split.cpp
     void addSample(uint32_t sampleCount, input_sample_type _sample)
     {
-        const float32_t2 unpackedParams = hlsl::unpackHalf2x16(splattingParameters.packedLog2);
-        const cascade_layer_scalar_type log2Start = unpackedParams[0];
-        const cascade_layer_scalar_type log2Base = unpackedParams[1];
         const cascade_layer_scalar_type luma = getLuma(_sample);
         const cascade_layer_scalar_type log2Luma = log2<cascade_layer_scalar_type>(luma);
-        const cascade_layer_scalar_type cascade = log2Luma * 1.f / log2Base - log2Start / log2Base;
+        const cascade_layer_scalar_type cascade = log2Luma * splattingParameters.rcpLog2Base - splattingParameters.baseRootOfStart;
         const cascade_layer_scalar_type clampedCascade = clamp(cascade, 0, CascadeCount - 1);
         // c<=0 -> 0, c>=Count-1 -> Count-1 
         uint32_t lowerCascadeIndex = floor<cascade_layer_scalar_type>(cascade);
@@ -85,7 +82,7 @@ struct CascadeAccumulator
 
         // handle super bright sample case
         if (cascade > CascadeCount - 1)
-            lowerCascadeWeight = exp2(log2Start + log2Base * (CascadeCount - 1) - log2Luma);
+            lowerCascadeWeight = splattingParameters.lastCascadeLuma / luma;
 
         accumulation.addSampleIntoCascadeEntry(_sample, lowerCascadeIndex, lowerCascadeWeight, higherCascadeWeight, sampleCount);
     }
diff --git a/include/nbl/builtin/hlsl/rwmc/SplattingParameters.hlsl b/include/nbl/builtin/hlsl/rwmc/SplattingParameters.hlsl
index c549d83be6..a3a3520415 100644
--- a/include/nbl/builtin/hlsl/rwmc/SplattingParameters.hlsl
+++ b/include/nbl/builtin/hlsl/rwmc/SplattingParameters.hlsl
@@ -2,6 +2,7 @@
 #define _NBL_BUILTIN_HLSL_RWMC_SPLATTING_PARAMETERS_HLSL_INCLUDED_
 
 #include "nbl/builtin/hlsl/cpp_compat.hlsl"
+#include "nbl/builtin/hlsl/tgmath.hlsl"
 
 namespace nbl
 {
@@ -12,10 +13,22 @@ namespace rwmc
 
 struct SplattingParameters
 {
-    // float16_t log2Start; 0
-    // float16_t log2Base; 1
-    // pack as Half2x16
-    int32_t packedLog2;
+    using scalar_t = float;
+
+    static SplattingParameters create(const scalar_t base, const scalar_t start, const uint32_t cascadeCount)
+    {
+        SplattingParameters retval;
+        const scalar_t log2Base = hlsl::log2(base);
+        const scalar_t log2Start = hlsl::log2(start);
+        retval.lastCascadeLuma = hlsl::exp2(log2Start + log2Base * (cascadeCount - 1));
+        retval.rcpLog2Base = scalar_t(1.0) / log2Base;
+        retval.baseRootOfStart = log2Start * retval.rcpLog2Base;
+        return retval;
+    }
+
+    scalar_t lastCascadeLuma;
+    scalar_t baseRootOfStart;
+    scalar_t rcpLog2Base;
 };
 
 }
diff --git a/include/nbl/builtin/hlsl/rwmc/resolve.hlsl b/include/nbl/builtin/hlsl/rwmc/resolve.hlsl
index d8f777d277..906cad512b 100644
--- a/include/nbl/builtin/hlsl/rwmc/resolve.hlsl
+++ b/include/nbl/builtin/hlsl/rwmc/resolve.hlsl
@@ -41,34 +41,6 @@ NBL_CONCEPT_END(
 template<typename T, typename VectorScalarType, int32_t Dims>
 NBL_BOOL_CONCEPT ResolveAccessor = ResolveAccessorBase<T, VectorScalarType, Dims> && concepts::accessors::LoadableImage<T, VectorScalarType, Dims>;
 
-template<typename OutputScalar>
-struct ResolveAccessorAdaptor
-{
-	using output_scalar_type = OutputScalar;
-	using output_type = vector<OutputScalar, 4>;
-	NBL_CONSTEXPR int32_t image_dimension = 2;
-
-	RWTexture2DArray<float32_t4> cascade;
-
-	float32_t calcLuma(NBL_REF_ARG(float32_t3) col)
-	{
-		return hlsl::dot<float32_t3>(colorspace::scRGB::ToXYZ()[1], col);
-	}
-
-	template<typename OutputScalarType, int32_t Dimension>
-	output_type get(vector<uint16_t, 2> uv, uint16_t layer)
-	{
-		uint32_t imgWidth, imgHeight, layers;
-		cascade.GetDimensions(imgWidth, imgHeight, layers);
-		int16_t2 cascadeImageDimension = int16_t2(imgWidth, imgHeight);
-
-		if (any(uv < int16_t2(0, 0)) || any(uv > cascadeImageDimension))
-			return vector<OutputScalar, 4>(0, 0, 0, 0);
-
-		return cascade.Load(int32_t3(uv, int32_t(layer)));
-	}
-};
-
 template<typename CascadeAccessor, typename OutputColorTypeVec NBL_PRIMARY_REQUIRES(concepts::Vector<OutputColorTypeVec> && ResolveAccessor<CascadeAccessor, typename CascadeAccessor::output_scalar_type, CascadeAccessor::image_dimension>)
 struct Resolver
 {
diff --git a/include/nbl/builtin/hlsl/sampling/basic.hlsl b/include/nbl/builtin/hlsl/sampling/basic.hlsl
index d0738dd930..9c575a22ce 100644
--- a/include/nbl/builtin/hlsl/sampling/basic.hlsl
+++ b/include/nbl/builtin/hlsl/sampling/basic.hlsl
@@ -19,14 +19,14 @@ template<typename T NBL_PRIMARY_REQUIRES(concepts::FloatingPointLikeScalar<T>)
 struct PartitionRandVariable
 {
     using floating_point_type = T;
-    using uint_type = typename unsigned_integer_of_size<sizeof(floating_point_type)>::type;
+    using uint_type = unsigned_integer_of_size_t<sizeof(floating_point_type)>;
 
-    bool operator()(floating_point_type leftProb, NBL_REF_ARG(floating_point_type) xi, NBL_REF_ARG(floating_point_type) rcpChoiceProb)
+    bool operator()(NBL_REF_ARG(floating_point_type) xi, NBL_REF_ARG(floating_point_type) rcpChoiceProb)
     {
-        const floating_point_type NEXT_ULP_AFTER_UNITY = bit_cast<floating_point_type>(bit_cast<uint_type>(floating_point_type(1.0)) + uint_type(1u));
-        const bool pickRight = xi >= leftProb * NEXT_ULP_AFTER_UNITY;
+        const floating_point_type NextULPAfterUnity = bit_cast<floating_point_type>(bit_cast<uint_type>(floating_point_type(1.0)) + uint_type(1u));
+        const bool pickRight = xi >= leftProb * NextULPAfterUnity;
 
-        // This is all 100% correct taking into account the above NEXT_ULP_AFTER_UNITY
+        // This is all 100% correct taking into account the above NextULPAfterUnity
         xi -= pickRight ? leftProb : floating_point_type(0.0);
 
         rcpChoiceProb = floating_point_type(1.0) / (pickRight ? (floating_point_type(1.0) - leftProb) : leftProb);
@@ -34,6 +34,8 @@ struct PartitionRandVariable
 
         return pickRight;
     }
+
+    floating_point_type leftProb;
 };
 
 
diff --git a/include/nbl/builtin/hlsl/sampling/bilinear.hlsl b/include/nbl/builtin/hlsl/sampling/bilinear.hlsl
index 746713e4c4..a74869990f 100644
--- a/include/nbl/builtin/hlsl/sampling/bilinear.hlsl
+++ b/include/nbl/builtin/hlsl/sampling/bilinear.hlsl
@@ -24,7 +24,7 @@ struct Bilinear
     using vector3_type = vector<T, 3>;
     using vector4_type = vector<T, 4>;
 
-    static Bilinear<T> create(NBL_CONST_REF_ARG(vector4_type) bilinearCoeffs)
+    static Bilinear<T> create(const vector4_type bilinearCoeffs)
     {
         Bilinear<T> retval;
         retval.bilinearCoeffs = bilinearCoeffs;
@@ -32,22 +32,22 @@ struct Bilinear
         return retval;
     }
 
-    vector2_type generate(NBL_REF_ARG(scalar_type) rcpPdf, NBL_CONST_REF_ARG(vector2_type) _u)
+    vector2_type generate(NBL_REF_ARG(scalar_type) rcpPdf, const vector2_type _u)
     {
-        vector2_type u = _u;
+        vector2_type u;
         Linear<scalar_type> lineary = Linear<scalar_type>::create(twiceAreasUnderXCurve);
-        u.y = lineary.generate(u.y);
+        u.y = lineary.generate(_u.y);
 
         const vector2_type ySliceEndPoints = vector2_type(nbl::hlsl::mix(bilinearCoeffs[0], bilinearCoeffs[2], u.y), nbl::hlsl::mix(bilinearCoeffs[1], bilinearCoeffs[3], u.y));
         Linear<scalar_type> linearx = Linear<scalar_type>::create(ySliceEndPoints);
-        u.x = linearx.generate(u.x);
+        u.x = linearx.generate(_u.x);
 
         rcpPdf = (twiceAreasUnderXCurve[0] + twiceAreasUnderXCurve[1]) / (4.0 * nbl::hlsl::mix(ySliceEndPoints[0], ySliceEndPoints[1], u.x));
 
         return u;
     }
 
-    scalar_type pdf(NBL_CONST_REF_ARG(vector2_type) u)
+    scalar_type pdf(const vector2_type u)
     {
         return 4.0 * nbl::hlsl::mix(nbl::hlsl::mix(bilinearCoeffs[0], bilinearCoeffs[1], u.x), nbl::hlsl::mix(bilinearCoeffs[2], bilinearCoeffs[3], u.x), u.y) / (bilinearCoeffs[0] + bilinearCoeffs[1] + bilinearCoeffs[2] + bilinearCoeffs[3]);
     }
diff --git a/include/nbl/builtin/hlsl/sampling/box_muller_transform.hlsl b/include/nbl/builtin/hlsl/sampling/box_muller_transform.hlsl
index 93cea06ee0..9474642f4c 100644
--- a/include/nbl/builtin/hlsl/sampling/box_muller_transform.hlsl
+++ b/include/nbl/builtin/hlsl/sampling/box_muller_transform.hlsl
@@ -21,7 +21,7 @@ struct BoxMullerTransform
     using scalar_type = T;
     using vector2_type = vector<T,2>;
 
-    vector2_type operator()(vector2_type xi)
+    vector2_type operator()(const vector2_type xi)
     {
         scalar_type sinPhi, cosPhi;
         math::sincos<scalar_type>(2.0 * numbers::pi<scalar_type> * xi.y - numbers::pi<scalar_type>, sinPhi, cosPhi);
diff --git a/include/nbl/builtin/hlsl/sampling/concentric_mapping.hlsl b/include/nbl/builtin/hlsl/sampling/concentric_mapping.hlsl
index 1a5c96b6df..841fc9ff2d 100644
--- a/include/nbl/builtin/hlsl/sampling/concentric_mapping.hlsl
+++ b/include/nbl/builtin/hlsl/sampling/concentric_mapping.hlsl
@@ -17,7 +17,7 @@ namespace sampling
 {
 
 template<typename T>
-vector<T,2> concentricMapping(vector<T,2> _u)
+vector<T,2> concentricMapping(const vector<T,2> _u)
 {
     //map [0;1]^2 to [-1;1]^2
     vector<T,2> u = 2.0f * _u - hlsl::promote<vector<T,2> >(1.0);
diff --git a/include/nbl/builtin/hlsl/sampling/cos_weighted_spheres.hlsl b/include/nbl/builtin/hlsl/sampling/cos_weighted_spheres.hlsl
index 9f95bf2ee5..ddbb961300 100644
--- a/include/nbl/builtin/hlsl/sampling/cos_weighted_spheres.hlsl
+++ b/include/nbl/builtin/hlsl/sampling/cos_weighted_spheres.hlsl
@@ -22,26 +22,26 @@ struct ProjectedHemisphere
     using vector_t2 = vector<T, 2>;
     using vector_t3 = vector<T, 3>;
     
-    static vector_t3 generate(vector_t2 _sample)
+    static vector_t3 generate(const vector_t2 _sample)
     {
         vector_t2 p = concentricMapping<T>(_sample * T(0.99999) + T(0.000005));
         T z = hlsl::sqrt<T>(hlsl::max<T>(T(0.0), T(1.0) - p.x * p.x - p.y * p.y));
         return vector_t3(p.x, p.y, z);
     }
 
-    static T pdf(T L_z)
+    static T pdf(const T L_z)
     {
         return L_z * numbers::inv_pi<float>;
     }
 
     template<typename U=vector<T,1> >
-    static sampling::quotient_and_pdf<U, T> quotient_and_pdf(T L)
+    static sampling::quotient_and_pdf<U, T> quotient_and_pdf(const T L)
     {
         return sampling::quotient_and_pdf<U, T>::create(hlsl::promote<U>(1.0), pdf(L));
     }
 
     template<typename U=vector<T,1> >
-    static sampling::quotient_and_pdf<U, T> quotient_and_pdf(vector_t3 L)
+    static sampling::quotient_and_pdf<U, T> quotient_and_pdf(const vector_t3 L)
     {
         return sampling::quotient_and_pdf<U, T>::create(hlsl::promote<U>(1.0), pdf(L.z));
     }
@@ -77,7 +77,7 @@ struct ProjectedSphere
     }
 
     template<typename U=vector<T,1> >
-    static sampling::quotient_and_pdf<U, T> quotient_and_pdf(vector_t3 L)
+    static sampling::quotient_and_pdf<U, T> quotient_and_pdf(const vector_t3 L)
     {
         return sampling::quotient_and_pdf<U, T>::create(hlsl::promote<U>(1.0), pdf(L.z));
     }
diff --git a/include/nbl/builtin/hlsl/sampling/linear.hlsl b/include/nbl/builtin/hlsl/sampling/linear.hlsl
index ddd7bcf8df..6c3cf1fad9 100644
--- a/include/nbl/builtin/hlsl/sampling/linear.hlsl
+++ b/include/nbl/builtin/hlsl/sampling/linear.hlsl
@@ -21,7 +21,7 @@ struct Linear
     using scalar_type = T;
     using vector2_type = vector<T, 2>;
 
-    static Linear<T> create(NBL_CONST_REF_ARG(vector2_type) linearCoeffs)   // start and end importance values (start, end)
+    static Linear<T> create(const vector2_type linearCoeffs)   // start and end importance values (start, end)
     {
         Linear<T> retval;
         retval.linearCoeffStart = linearCoeffs[0];
@@ -32,7 +32,7 @@ struct Linear
         return retval;
     }
 
-    scalar_type generate(scalar_type u)
+    scalar_type generate(const scalar_type u)
     {
         return hlsl::mix(u, (linearCoeffStart - hlsl::sqrt(squaredCoeffStart + u * squaredCoeffDiff)) * rcpDiff, hlsl::abs(rcpDiff) < numeric_limits<scalar_type>::max);
     }
diff --git a/include/nbl/builtin/hlsl/sampling/projected_spherical_triangle.hlsl b/include/nbl/builtin/hlsl/sampling/projected_spherical_triangle.hlsl
index f2f29ed12b..e60fe28423 100644
--- a/include/nbl/builtin/hlsl/sampling/projected_spherical_triangle.hlsl
+++ b/include/nbl/builtin/hlsl/sampling/projected_spherical_triangle.hlsl
@@ -33,23 +33,23 @@ struct ProjectedSphericalTriangle
         return retval;
     }
 
-    vector4_type computeBilinearPatch(NBL_CONST_REF_ARG(vector3_type) receiverNormal, bool isBSDF)
+    vector4_type computeBilinearPatch(const vector3_type receiverNormal, bool isBSDF)
     {
         const scalar_type minimumProjSolidAngle = 0.0;
 
         matrix<T, 3, 3> m = matrix<T, 3, 3>(tri.vertex0, tri.vertex1, tri.vertex2);
-        const vector3_type bxdfPdfAtVertex = math::conditionalAbsOrMax(isBSDF, nbl::hlsl::mul(m, receiverNormal), (vector3_type)minimumProjSolidAngle);
+        const vector3_type bxdfPdfAtVertex = math::conditionalAbsOrMax(isBSDF, nbl::hlsl::mul(m, receiverNormal), hlsl::promote<vector3_type>(minimumProjSolidAngle));
 
         return bxdfPdfAtVertex.yyxz;
     }
 
-    vector3_type generate(NBL_REF_ARG(scalar_type) rcpPdf, scalar_type solidAngle, NBL_CONST_REF_ARG(vector3_type) cos_vertices, NBL_CONST_REF_ARG(vector3_type) sin_vertices, scalar_type cos_a, scalar_type cos_c, scalar_type csc_b, scalar_type csc_c, NBL_CONST_REF_ARG(vector3_type) receiverNormal, bool isBSDF, NBL_CONST_REF_ARG(vector2_type) _u)
+    vector3_type generate(NBL_REF_ARG(scalar_type) rcpPdf, scalar_type solidAngle, const vector3_type cos_vertices, const vector3_type sin_vertices, scalar_type cos_a, scalar_type cos_c, scalar_type csc_b, scalar_type csc_c, const vector3_type receiverNormal, bool isBSDF, const vector2_type _u)
     {
         vector2_type u;
         // pre-warp according to proj solid angle approximation
         vector4_type patch = computeBilinearPatch(receiverNormal, isBSDF);
         Bilinear<scalar_type> bilinear = Bilinear<scalar_type>::create(patch);
-        u = bilinear.generate(rcpPdf, u);
+        u = bilinear.generate(rcpPdf, _u);
 
         // now warp the points onto a spherical triangle
         const vector3_type L = sphtri.generate(solidAngle, cos_vertices, sin_vertices, cos_a, cos_c, csc_b, csc_c, u);
@@ -58,7 +58,7 @@ struct ProjectedSphericalTriangle
         return L;
     }
 
-    vector3_type generate(NBL_REF_ARG(scalar_type) rcpPdf, NBL_CONST_REF_ARG(vector3_type) receiverNormal, bool isBSDF, NBL_CONST_REF_ARG(vector2_type) u)
+    vector3_type generate(NBL_REF_ARG(scalar_type) rcpPdf, const vector3_type receiverNormal, bool isBSDF, const vector2_type u)
     {
         scalar_type cos_a, cos_c, csc_b, csc_c;
         vector3_type cos_vertices, sin_vertices;
@@ -66,7 +66,7 @@ struct ProjectedSphericalTriangle
         return generate(rcpPdf, solidAngle, cos_vertices, sin_vertices, cos_a, cos_c, csc_b, csc_c, receiverNormal, isBSDF, u);
     }
 
-    scalar_type pdf(scalar_type solidAngle, NBL_CONST_REF_ARG(vector3_type) cos_vertices, NBL_CONST_REF_ARG(vector3_type) sin_vertices, scalar_type cos_a, scalar_type cos_c, scalar_type csc_b, scalar_type csc_c, NBL_CONST_REF_ARG(vector3_type) receiverNormal, bool receiverWasBSDF, NBL_CONST_REF_ARG(vector3_type) L)
+    scalar_type pdf(scalar_type solidAngle, const vector3_type cos_vertices, const vector3_type sin_vertices, scalar_type cos_a, scalar_type cos_c, scalar_type csc_b, scalar_type csc_c, const vector3_type receiverNormal, bool receiverWasBSDF, const vector3_type L)
     {
         scalar_type pdf;
         const vector2_type u = sphtri.generateInverse(pdf, solidAngle, cos_vertices, sin_vertices, cos_a, cos_c, csc_b, csc_c, L);
@@ -76,7 +76,7 @@ struct ProjectedSphericalTriangle
         return pdf * bilinear.pdf(u);
     }
 
-    scalar_type pdf(NBL_CONST_REF_ARG(vector3_type) receiverNormal, bool receiverWasBSDF, NBL_CONST_REF_ARG(vector3_type) L)
+    scalar_type pdf(const vector3_type receiverNormal, bool receiverWasBSDF, const vector3_type L)
     {
         scalar_type pdf;
         const vector2_type u = sphtri.generateInverse(pdf, L);
diff --git a/include/nbl/builtin/hlsl/sampling/quantized_sequence.hlsl b/include/nbl/builtin/hlsl/sampling/quantized_sequence.hlsl
new file mode 100644
index 0000000000..8929609c34
--- /dev/null
+++ b/include/nbl/builtin/hlsl/sampling/quantized_sequence.hlsl
@@ -0,0 +1,309 @@
+// Copyright (C) 2018-2025 - DevSH Graphics Programming Sp. z O.O.
+// This file is part of the "Nabla Engine".
+// For conditions of distribution and use, see copyright notice in nabla.h
+
+#ifndef _NBL_BUILTIN_HLSL_SAMPLING_QUANTIZED_SEQUENCE_INCLUDED_
+#define _NBL_BUILTIN_HLSL_SAMPLING_QUANTIZED_SEQUENCE_INCLUDED_
+
+#include "nbl/builtin/hlsl/concepts/vector.hlsl"
+#include "nbl/builtin/hlsl/vector_utils/vector_traits.hlsl"
+#include "nbl/builtin/hlsl/random/pcg.hlsl"
+
+namespace nbl
+{
+namespace hlsl
+{
+namespace sampling
+{
+
+template<typename T, uint16_t Dim NBL_STRUCT_CONSTRAINABLE>
+struct QuantizedSequence;
+
+
+namespace impl
+{
+template<uint16_t Bits>
+struct unorm_constant;
+template<>
+struct unorm_constant<4> { NBL_CONSTEXPR_STATIC_INLINE uint32_t value = 0x3d888889u; };
+template<>
+struct unorm_constant<5> { NBL_CONSTEXPR_STATIC_INLINE uint32_t value = 0x3d042108u; };
+template<>
+struct unorm_constant<8> { NBL_CONSTEXPR_STATIC_INLINE uint32_t value = 0x3b808081u; };
+template<>
+struct unorm_constant<10> { NBL_CONSTEXPR_STATIC_INLINE uint32_t value = 0x3a802008u; };
+template<>
+struct unorm_constant<16> { NBL_CONSTEXPR_STATIC_INLINE uint32_t value = 0x37800080u; };
+template<>
+struct unorm_constant<21> { NBL_CONSTEXPR_STATIC_INLINE uint32_t value = 0x35000004u; };
+template<>
+struct unorm_constant<32> { NBL_CONSTEXPR_STATIC_INLINE uint32_t value = 0x2f800004u; };
+
+template<typename T, uint16_t D, bool EncodeScramble>
+struct decode_helper;
+
+template<typename T, uint16_t D>
+struct decode_helper<T, D, false>
+{
+    using scalar_type = typename vector_traits<T>::scalar_type;
+    using fp_type = typename float_of_size<sizeof(scalar_type)>::type;
+    using uvec_type = vector<scalar_type, D>;
+    using sequence_type = QuantizedSequence<T, D>;
+    using return_type = vector<fp_type, D>;
+    NBL_CONSTEXPR_STATIC_INLINE scalar_type UNormConstant = unorm_constant<8u*sizeof(scalar_type)>::value;
+
+    static return_type __call(NBL_CONST_REF_ARG(sequence_type) val, const uvec_type scrambleKey)
+    {
+        uvec_type seqVal;
+        NBL_UNROLL for(uint16_t i = 0; i < D; i++)
+            seqVal[i] = val.get(i) ^ scrambleKey[i];
+        return return_type(seqVal) * bit_cast<fp_type>(UNormConstant);
+    }
+};
+template<typename T, uint16_t D>
+struct decode_helper<T, D, true>
+{
+    using scalar_type = typename vector_traits<T>::scalar_type;
+    using fp_type = typename float_of_size<sizeof(scalar_type)>::type;
+    using uvec_type = vector<scalar_type, D>;
+    using sequence_type = QuantizedSequence<T, D>;
+    using sequence_store_type = typename sequence_type::store_type;
+    using sequence_scalar_type = typename vector_traits<sequence_store_type>::scalar_type;
+    using return_type = vector<fp_type, D>;
+    NBL_CONSTEXPR_STATIC_INLINE scalar_type UNormConstant = sequence_type::UNormConstant;
+
+    static return_type __call(NBL_CONST_REF_ARG(sequence_type) val, const uvec_type scrambleKey)
+    {
+        sequence_type scramble;
+        NBL_UNROLL for(uint16_t i = 0; i < D; i++)
+            scramble.set(i, scrambleKey[i]);
+        scramble.data ^= val.data;
+
+        uvec_type seqVal;
+        NBL_UNROLL for(uint16_t i = 0; i < D; i++)
+            seqVal[i] = scramble.get(i);
+        return return_type(seqVal) * bit_cast<fp_type>(UNormConstant);
+    }
+};
+}
+
+template<typename T, uint16_t D, bool EncodeScramble=false>
+vector<typename float_of_size<sizeof(typename vector_traits<T>::scalar_type)>::type, D> decode(NBL_CONST_REF_ARG(QuantizedSequence<T, D>) val, const vector<typename vector_traits<T>::scalar_type, D> scrambleKey)
+{
+    return impl::decode_helper<T,D,EncodeScramble>::__call(val, scrambleKey);
+}
+
+#define SEQUENCE_SPECIALIZATION_CONCEPT concepts::UnsignedIntegral<typename vector_traits<T>::scalar_type> && size_of_v<typename vector_traits<T>::scalar_type> <= 4
+
+// all Dim=1
+template<typename T> NBL_PARTIAL_REQ_TOP(SEQUENCE_SPECIALIZATION_CONCEPT)
+struct QuantizedSequence<T, 1 NBL_PARTIAL_REQ_BOT(SEQUENCE_SPECIALIZATION_CONCEPT) >
+{
+    using store_type = T;
+    NBL_CONSTEXPR_STATIC_INLINE uint32_t UNormConstant = impl::unorm_constant<8u*sizeof(store_type)>::value;
+
+    store_type get(const uint16_t idx) { assert(idx > 0 && idx < 1); return data; }
+    void set(const uint16_t idx, const store_type value) { assert(idx > 0 && idx < 1); data = value; }
+
+    store_type data;
+};
+
+// uint16_t, uint32_t; Dim=2,3,4
+template<typename T, uint16_t Dim> NBL_PARTIAL_REQ_TOP(SEQUENCE_SPECIALIZATION_CONCEPT && vector_traits<T>::Dimension == 1 && Dim > 1 && Dim < 5)
+struct QuantizedSequence<T, Dim NBL_PARTIAL_REQ_BOT(SEQUENCE_SPECIALIZATION_CONCEPT && vector_traits<T>::Dimension == 1 && Dim > 1 && Dim < 5) >
+{
+    using store_type = T;
+    NBL_CONSTEXPR_STATIC_INLINE uint16_t StoreBits = uint16_t(8u) * size_of_v<store_type>;
+    NBL_CONSTEXPR_STATIC_INLINE uint16_t BitsPerComponent = StoreBits / Dim;
+    NBL_CONSTEXPR_STATIC_INLINE store_type Mask = (uint16_t(1u) << BitsPerComponent) - uint16_t(1u);
+    NBL_CONSTEXPR_STATIC_INLINE uint16_t DiscardBits = StoreBits - BitsPerComponent;
+    NBL_CONSTEXPR_STATIC_INLINE uint32_t UNormConstant = impl::unorm_constant<BitsPerComponent>::value;
+
+    store_type get(const uint16_t idx)
+    {
+        assert(idx > 0 && idx < Dim);
+        return (data >> (BitsPerComponent * idx)) & Mask;
+    }
+
+    void set(const uint16_t idx, const store_type value)
+    {
+        assert(idx > 0 && idx < Dim);
+        const uint16_t bits = (BitsPerComponent * idx);
+        data &= ~(Mask << bits);
+        data |= ((value >> DiscardBits) & Mask) << bits;
+    }
+
+    store_type data;
+};
+
+// Dim 2,3,4 matches vector dim
+template<typename T, uint16_t Dim> NBL_PARTIAL_REQ_TOP(SEQUENCE_SPECIALIZATION_CONCEPT && vector_traits<T>::Dimension == Dim && Dim > 1 && Dim < 5)
+struct QuantizedSequence<T, Dim NBL_PARTIAL_REQ_BOT(SEQUENCE_SPECIALIZATION_CONCEPT && vector_traits<T>::Dimension == Dim && Dim > 1 && Dim < 5) >
+{
+    using store_type = T;
+    using scalar_type = typename vector_traits<T>::scalar_type;
+    NBL_CONSTEXPR_STATIC_INLINE uint32_t UNormConstant = impl::unorm_constant<8u*sizeof(scalar_type)>::value;
+
+    scalar_type get(const uint16_t idx) { assert(idx > 0 && idx < Dim); return data[idx]; }
+    void set(const uint16_t idx, const scalar_type value) { assert(idx > 0 && idx < Dim); data[idx] = value; }
+
+    store_type data;
+};
+
+// uint16_t2, uint32_t2; Dim=3
+template<typename T, uint16_t Dim> NBL_PARTIAL_REQ_TOP(SEQUENCE_SPECIALIZATION_CONCEPT && vector_traits<T>::Dimension == 2 && Dim == 3)
+struct QuantizedSequence<T, Dim NBL_PARTIAL_REQ_BOT(SEQUENCE_SPECIALIZATION_CONCEPT && vector_traits<T>::Dimension == 2 && Dim == 3) >
+{
+    using store_type = T;
+    using scalar_type = typename vector_traits<T>::scalar_type;
+    NBL_CONSTEXPR_STATIC_INLINE uint16_t StoreBits = uint16_t(8u) * size_of_v<store_type>;
+    NBL_CONSTEXPR_STATIC_INLINE uint16_t BitsPerComponent = StoreBits / Dim;
+    NBL_CONSTEXPR_STATIC_INLINE scalar_type Mask = (scalar_type(1u) << BitsPerComponent) - scalar_type(1u);
+    NBL_CONSTEXPR_STATIC_INLINE uint16_t DiscardBits = (uint16_t(8u) * size_of_v<scalar_type>) - BitsPerComponent;
+    NBL_CONSTEXPR_STATIC_INLINE uint32_t UNormConstant = impl::unorm_constant<BitsPerComponent>::value;
+
+    scalar_type get(const uint16_t idx)
+    {
+        assert(idx >= 0 && idx < 3);
+        if (idx < 2)
+        {
+            return data[idx] & Mask;
+        }
+        else
+        {
+            const scalar_type zbits = scalar_type(DiscardBits);
+            const scalar_type zmask = (scalar_type(1u) << zbits) - scalar_type(1u);
+            scalar_type z = (data[0] >> BitsPerComponent) & zmask;
+            z |= ((data[1] >> BitsPerComponent) & zmask) << DiscardBits;
+            return z;
+        }
+    }
+
+    void set(const uint16_t idx, const scalar_type value)
+    {
+        assert(idx >= 0 && idx < 3);
+        if (idx < 2)
+        {
+            const scalar_type trunc_val = value >> DiscardBits;
+            data[idx] &= ~Mask;
+            data[idx] |= trunc_val & Mask;
+        }
+        else
+        {
+            const scalar_type zbits = scalar_type(DiscardBits);
+            const scalar_type zmask = (scalar_type(1u) << zbits) - scalar_type(1u);
+            const scalar_type trunc_val = value >> DiscardBits;
+            data[0] &= Mask;
+            data[1] &= Mask;
+            data[0] |= (trunc_val & zmask) << BitsPerComponent;
+            data[1] |= ((trunc_val >> zbits) & zmask) << BitsPerComponent;
+        }
+    }
+
+    store_type data;
+};
+
+// uint16_t2, uint32_t2; Dim=4
+template<typename T, uint16_t Dim> NBL_PARTIAL_REQ_TOP(SEQUENCE_SPECIALIZATION_CONCEPT && vector_traits<T>::Dimension == 2 && Dim == 4)
+struct QuantizedSequence<T, Dim NBL_PARTIAL_REQ_BOT(SEQUENCE_SPECIALIZATION_CONCEPT && vector_traits<T>::Dimension == 2 && Dim == 4) >
+{
+    using store_type = T;
+    using scalar_type = typename vector_traits<T>::scalar_type;
+    NBL_CONSTEXPR_STATIC_INLINE uint16_t StoreBits = uint16_t(8u) * size_of_v<store_type>;
+    NBL_CONSTEXPR_STATIC_INLINE uint16_t BitsPerComponent = StoreBits / Dim;
+    NBL_CONSTEXPR_STATIC_INLINE scalar_type Mask = (uint16_t(1u) << BitsPerComponent) - uint16_t(1u);
+    NBL_CONSTEXPR_STATIC_INLINE uint16_t DiscardBits = (uint16_t(8u) * size_of_v<scalar_type>) - BitsPerComponent;
+    NBL_CONSTEXPR_STATIC_INLINE uint32_t UNormConstant = impl::unorm_constant<BitsPerComponent>::value;
+
+    scalar_type get(const uint16_t idx)
+    {
+        assert(idx >= 0 && idx < 4);
+        const uint16_t i = (idx & uint16_t(2u)) >> uint16_t(1u);
+        return (data[i] >> (BitsPerComponent * (idx & uint16_t(1u)))) & Mask;
+    }
+
+    void set(const uint16_t idx, const scalar_type value)
+    {
+        assert(idx >= 0 && idx < 4);
+        const uint16_t i = (idx & uint16_t(2u)) >> uint16_t(1u);
+        const uint16_t odd = idx & uint16_t(1u);
+        data[i] &= hlsl::mix(~Mask, Mask, bool(odd));
+        data[i] |= ((value >> DiscardBits) & Mask) << (BitsPerComponent * odd);
+    }
+
+    store_type data;
+};
+
+// uint16_t4, uint32_t4; Dim=2
+template<typename T, uint16_t Dim> NBL_PARTIAL_REQ_TOP(SEQUENCE_SPECIALIZATION_CONCEPT && vector_traits<T>::Dimension == 4 && Dim == 2)
+struct QuantizedSequence<T, Dim NBL_PARTIAL_REQ_BOT(SEQUENCE_SPECIALIZATION_CONCEPT && vector_traits<T>::Dimension == 4 && Dim == 2) >
+{
+    using store_type = T;
+    using scalar_type = typename vector_traits<T>::scalar_type;
+    using base_type = vector<scalar_type, 2>;
+    NBL_CONSTEXPR_STATIC_INLINE uint32_t UNormConstant = impl::unorm_constant<8u*sizeof(scalar_type)>::value;
+
+    base_type get(const uint16_t idx)
+    {
+        assert(idx >= 0 && idx < 2);
+        base_type a;
+        a[0] = data[uint16_t(2u) * idx];
+        a[1] = data[uint16_t(2u) * idx + 1];
+        return a;
+    }
+
+    void set(const uint16_t idx, const base_type value)
+    {
+        assert(idx >= 0 && idx < 2);
+        base_type a;
+        data[uint16_t(2u) * idx] = value[0];
+        data[uint16_t(2u) * idx + 1] = value[1];
+    }
+
+    store_type data;
+};
+
+// uint16_t4, uint32_t4; Dim=3
+// uint16_t4 --> returns uint16_t2 - 21 bits per component: 16 in x, 5 in y
+// uint16_t4 --> returns uint32_t2 - 42 bits per component: 32 in x, 10 in y
+template<typename T, uint16_t Dim> NBL_PARTIAL_REQ_TOP(SEQUENCE_SPECIALIZATION_CONCEPT && vector_traits<T>::Dimension == 4 && Dim == 3)
+struct QuantizedSequence<T, Dim NBL_PARTIAL_REQ_BOT(SEQUENCE_SPECIALIZATION_CONCEPT && vector_traits<T>::Dimension == 4 && Dim == 3) >
+{
+    using store_type = T;
+    using scalar_type = typename vector_traits<T>::scalar_type;
+    using base_type = vector<scalar_type, 2>;
+    NBL_CONSTEXPR_STATIC_INLINE uint16_t StoreBits = uint16_t(8u) * size_of_v<store_type>;
+    NBL_CONSTEXPR_STATIC_INLINE uint16_t BitsPerComponent = StoreBits / Dim;
+    NBL_CONSTEXPR_STATIC_INLINE uint16_t LeftoverBitsPerComponent = BitsPerComponent - uint16_t(8u) * size_of_v<scalar_type>;
+    NBL_CONSTEXPR_STATIC_INLINE scalar_type Mask = (uint16_t(1u) << LeftoverBitsPerComponent) - uint16_t(1u);
+    NBL_CONSTEXPR_STATIC_INLINE uint16_t DiscardBits = (uint16_t(8u) * size_of_v<base_type>) - BitsPerComponent;
+    NBL_CONSTEXPR_STATIC_INLINE uint32_t UNormConstant = impl::unorm_constant<8u*sizeof(scalar_type)>::value;
+
+    base_type get(const uint16_t idx)
+    {
+        assert(idx >= 0 && idx < 3);
+        base_type a;
+        a[0] = data[idx];
+        a[1] = (data[3] >> (LeftoverBitsPerComponent * idx)) & Mask;
+        return a;
+    }
+
+    void set(const uint16_t idx, const base_type value)
+    {
+        assert(idx >= 0 && idx < 3);
+        data[idx] = value[0];
+        data[3] &= ~Mask;
+        data[3] |= ((value[1] >> DiscardBits) & Mask) << (LeftoverBitsPerComponent * idx);
+    }
+
+    store_type data;
+};
+
+#undef SEQUENCE_SPECIALIZATION_CONCEPT
+
+}
+
+}
+}
+
+#endif
diff --git a/include/nbl/builtin/hlsl/sampling/spherical_rectangle.hlsl b/include/nbl/builtin/hlsl/sampling/spherical_rectangle.hlsl
index f5c19fb864..f9e3d2f7ae 100644
--- a/include/nbl/builtin/hlsl/sampling/spherical_rectangle.hlsl
+++ b/include/nbl/builtin/hlsl/sampling/spherical_rectangle.hlsl
@@ -32,7 +32,7 @@ struct SphericalRectangle
         return retval;
     }
 
-    vector2_type generate(NBL_CONST_REF_ARG(vector2_type) rectangleExtents, NBL_CONST_REF_ARG(vector2_type) uv, NBL_REF_ARG(scalar_type) S)
+    vector2_type generate(const vector2_type rectangleExtents, const vector2_type uv, NBL_REF_ARG(scalar_type) S)
     {
         const vector4_type denorm_n_z = vector4_type(-rect.r0.y, rect.r0.x + rectangleExtents.x, rect.r0.y + rectangleExtents.y, -rect.r0.x);
         const vector4_type n_z = denorm_n_z / hlsl::sqrt<vector4_type>(hlsl::promote<vector4_type>(rect.r0.z * rect.r0.z) + denorm_n_z * denorm_n_z);
diff --git a/include/nbl/builtin/hlsl/sampling/spherical_triangle.hlsl b/include/nbl/builtin/hlsl/sampling/spherical_triangle.hlsl
index 0c86b69793..5770403cd2 100644
--- a/include/nbl/builtin/hlsl/sampling/spherical_triangle.hlsl
+++ b/include/nbl/builtin/hlsl/sampling/spherical_triangle.hlsl
@@ -33,7 +33,7 @@ struct SphericalTriangle
     }
 
     // WARNING: can and will return NAN if one or three of the triangle edges are near zero length
-    vector3_type generate(scalar_type solidAngle, NBL_CONST_REF_ARG(vector3_type) cos_vertices, NBL_CONST_REF_ARG(vector3_type) sin_vertices, scalar_type cos_a, scalar_type cos_c, scalar_type csc_b, scalar_type csc_c, NBL_CONST_REF_ARG(vector2_type) u)
+    vector3_type generate(scalar_type solidAngle, const vector3_type cos_vertices, const vector3_type sin_vertices, scalar_type cos_a, scalar_type cos_c, scalar_type csc_b, scalar_type csc_c, const vector2_type u)
     {
         scalar_type negSinSubSolidAngle,negCosSubSolidAngle;
         math::sincos(solidAngle * u.x - numbers::pi<scalar_type>, negSinSubSolidAngle, negCosSubSolidAngle);
@@ -51,7 +51,7 @@ struct SphericalTriangle
         {
             const scalar_type cosAngleAlongAC = ((v_ * q - u_ * p) * cos_vertices[0] - v_) / ((v_ * p + u_ * q) * sin_vertices[0]);
             if (nbl::hlsl::abs(cosAngleAlongAC) < 1.f)
-                C_s += math::quaternion_t<scalar_type>::slerp_delta(tri.vertex0, tri.vertex2 * csc_b, cosAngleAlongAC);
+                C_s += math::quaternion<scalar_type>::slerp_delta(tri.vertex0, tri.vertex2 * csc_b, cosAngleAlongAC);
         }
 
         vector3_type retval = tri.vertex1;
@@ -61,12 +61,12 @@ struct SphericalTriangle
         {
             const scalar_type cosAngleAlongBC_s = nbl::hlsl::clamp(1.0 + cosBC_s * u.y - u.y, -1.f, 1.f);
             if (nbl::hlsl::abs(cosAngleAlongBC_s) < 1.f)
-                retval += math::quaternion_t<scalar_type>::slerp_delta(tri.vertex1, C_s * csc_b_s, cosAngleAlongBC_s);
+                retval += math::quaternion<scalar_type>::slerp_delta(tri.vertex1, C_s * csc_b_s, cosAngleAlongBC_s);
         }
         return retval;
     }
 
-    vector3_type generate(NBL_REF_ARG(scalar_type) rcpPdf, NBL_CONST_REF_ARG(vector2_type) u)
+    vector3_type generate(NBL_REF_ARG(scalar_type) rcpPdf, const vector2_type u)
     {
         scalar_type cos_a, cos_c, csc_b, csc_c;
         vector3_type cos_vertices, sin_vertices;
@@ -76,7 +76,7 @@ struct SphericalTriangle
         return generate(rcpPdf, cos_vertices, sin_vertices, cos_a, cos_c, csc_b, csc_c, u);
     }
 
-    vector2_type generateInverse(NBL_REF_ARG(scalar_type) pdf, scalar_type solidAngle, NBL_CONST_REF_ARG(vector3_type) cos_vertices, NBL_CONST_REF_ARG(vector3_type) sin_vertices, scalar_type cos_a, scalar_type cos_c, scalar_type csc_b, scalar_type csc_c, NBL_CONST_REF_ARG(vector3_type) L)
+    vector2_type generateInverse(NBL_REF_ARG(scalar_type) pdf, scalar_type solidAngle, const vector3_type cos_vertices, const vector3_type sin_vertices, scalar_type cos_a, scalar_type cos_c, scalar_type csc_b, scalar_type csc_c, const vector3_type L)
     {
         pdf = 1.0 / solidAngle;
 
@@ -102,7 +102,7 @@ struct SphericalTriangle
         return vector2_type(u,v);
     }
 
-    vector2_type generateInverse(NBL_REF_ARG(scalar_type) pdf, NBL_CONST_REF_ARG(vector3_type) L)
+    vector2_type generateInverse(NBL_REF_ARG(scalar_type) pdf, const vector3_type L)
     {
         scalar_type cos_a, cos_c, csc_b, csc_c;
         vector3_type cos_vertices, sin_vertices;
diff --git a/include/nbl/builtin/hlsl/sampling/uniform_spheres.hlsl b/include/nbl/builtin/hlsl/sampling/uniform_spheres.hlsl
index df4100db9b..5fc3bc7a0b 100644
--- a/include/nbl/builtin/hlsl/sampling/uniform_spheres.hlsl
+++ b/include/nbl/builtin/hlsl/sampling/uniform_spheres.hlsl
@@ -23,7 +23,7 @@ struct UniformHemisphere
     using vector_t2 = vector<T, 2>;
     using vector_t3 = vector<T, 3>;
 
-    static vector_t3 generate(vector_t2 _sample)
+    static vector_t3 generate(const vector_t2 _sample)
     {
         T z = _sample.x;
         T r = hlsl::sqrt<T>(hlsl::max<T>(T(0.0), T(1.0) - z * z));
@@ -49,7 +49,7 @@ struct UniformSphere
     using vector_t2 = vector<T, 2>;
     using vector_t3 = vector<T, 3>;
 
-    static vector_t3 generate(vector_t2 _sample)
+    static vector_t3 generate(const vector_t2 _sample)
     {
         T z = T(1.0) - T(2.0) * _sample.x;
         T r = hlsl::sqrt<T>(hlsl::max<T>(T(0.0), T(1.0) - z * z));
diff --git a/include/nbl/builtin/hlsl/shapes/spherical_rectangle.hlsl b/include/nbl/builtin/hlsl/shapes/spherical_rectangle.hlsl
index daeb3175c3..11442bef7c 100644
--- a/include/nbl/builtin/hlsl/shapes/spherical_rectangle.hlsl
+++ b/include/nbl/builtin/hlsl/shapes/spherical_rectangle.hlsl
@@ -25,14 +25,14 @@ struct SphericalRectangle
     using vector4_type = vector<Scalar, 4>;
     using matrix3x3_type = matrix<Scalar, 3, 3>;
 
-    static SphericalRectangle<scalar_type> create(NBL_CONST_REF_ARG(vector3_type) observer, NBL_CONST_REF_ARG(vector3_type) rectangleOrigin, NBL_CONST_REF_ARG(matrix3x3_type) basis)
+    static SphericalRectangle<scalar_type> create(const vector3_type observer, const vector3_type rectangleOrigin, const matrix3x3_type basis)
     {
         SphericalRectangle<scalar_type> retval;
         retval.r0 = nbl::hlsl::mul(basis, rectangleOrigin - observer);
         return retval;
     }
 
-    static SphericalRectangle<Scalar> create(NBL_CONST_REF_ARG(vector3_type) observer, NBL_CONST_REF_ARG(vector3_type) rectangleOrigin, NBL_CONST_REF_ARG(vector3_type) T, NBL_CONST_REF_ARG(vector3_type) B, NBL_CONST_REF_ARG(vector3_type) N)
+    static SphericalRectangle<Scalar> create(const vector3_type observer, const vector3_type rectangleOrigin, const vector3_type T, vector3_type B, const vector3_type N)
     {
         SphericalRectangle<scalar_type> retval;
         matrix3x3_type TBN = nbl::hlsl::transpose<matrix3x3_type>(matrix3x3_type(T, B, N));
@@ -40,7 +40,7 @@ struct SphericalRectangle
         return retval;
     }
 
-    scalar_type solidAngleOfRectangle(NBL_CONST_REF_ARG(vector<scalar_type, 2>) rectangleExtents)
+    scalar_type solidAngleOfRectangle(const vector<scalar_type, 2> rectangleExtents)
     {
         const vector4_type denorm_n_z = vector4_type(-r0.y, r0.x + rectangleExtents.x, r0.y + rectangleExtents.y, -r0.x);
         const vector4_type n_z = denorm_n_z / nbl::hlsl::sqrt((vector4_type)(r0.z * r0.z) + denorm_n_z * denorm_n_z);
diff --git a/include/nbl/builtin/hlsl/shapes/spherical_triangle.hlsl b/include/nbl/builtin/hlsl/shapes/spherical_triangle.hlsl
index f0b184d057..f574b106ce 100644
--- a/include/nbl/builtin/hlsl/shapes/spherical_triangle.hlsl
+++ b/include/nbl/builtin/hlsl/shapes/spherical_triangle.hlsl
@@ -25,7 +25,7 @@ struct SphericalTriangle
     using scalar_type = T;
     using vector3_type = vector<T, 3>;
 
-    static SphericalTriangle<T> create(NBL_CONST_REF_ARG(vector3_type) vertex0, NBL_CONST_REF_ARG(vector3_type) vertex1, NBL_CONST_REF_ARG(vector3_type) vertex2, NBL_CONST_REF_ARG(vector3_type) origin)
+    static SphericalTriangle<T> create(const vector3_type vertex0, const vector3_type vertex1, const vector3_type vertex2, const vector3_type origin)
     {
         SphericalTriangle<T> retval;
         retval.vertex0 = nbl::hlsl::normalize(vertex0 - origin);
@@ -72,7 +72,7 @@ struct SphericalTriangle
         return solidAngleOfTriangle(dummy0,dummy1,dummy2,dummy3,dummy4,dummy5);
     }
 
-    scalar_type projectedSolidAngleOfTriangle(NBL_CONST_REF_ARG(vector3_type) receiverNormal, NBL_REF_ARG(vector3_type) cos_sides, NBL_REF_ARG(vector3_type) csc_sides, NBL_REF_ARG(vector3_type) cos_vertices)
+    scalar_type projectedSolidAngleOfTriangle(const vector3_type receiverNormal, NBL_REF_ARG(vector3_type) cos_sides, NBL_REF_ARG(vector3_type) csc_sides, NBL_REF_ARG(vector3_type) cos_vertices)
     {
         if (pyramidAngles())
             return 0.f;
@@ -102,29 +102,6 @@ struct SphericalTriangle
     vector3_type csc_sides;
 };
 
-namespace util
-{
-  // Use this convetion e_i = v_{i+2}-v_{i+1}. vertex index is modulo by 3.
-  template <typename float_t>
-  vector<float_t, 3> compInternalAngle(NBL_CONST_REF_ARG(vector<float_t, 3>) e0, NBL_CONST_REF_ARG(vector<float_t, 3>) e1, NBL_CONST_REF_ARG(vector<float_t, 3>) e2)
-  {
-    // Calculate this triangle's weight for each of its three m_vertices
-    // start by calculating the lengths of its sides
-    const float_t a = hlsl::dot(e0, e0);
-    const float_t asqrt = hlsl::sqrt(a);
-    const float_t b = hlsl::dot(e1, e1);
-    const float_t bsqrt = hlsl::sqrt(b);
-    const float_t c = hlsl::dot(e2, e2);
-    const float_t csqrt = hlsl::sqrt(c);
-
-    const float_t angle0 = hlsl::acos((b + c - a) / (2.f * bsqrt * csqrt));
-    const float_t angle1 = hlsl::acos((-b + c + a) / (2.f * asqrt * csqrt));
-    const float_t angle2 = hlsl::numbers::pi<float_t> - (angle0 + angle1);
-    // use them to find the angle at each vertex
-    return vector<float_t, 3>(angle0, angle1, angle2);
-  }
-}
-
 }
 }
 }
diff --git a/include/nbl/builtin/hlsl/shapes/triangle.hlsl b/include/nbl/builtin/hlsl/shapes/triangle.hlsl
new file mode 100644
index 0000000000..b2f4170f70
--- /dev/null
+++ b/include/nbl/builtin/hlsl/shapes/triangle.hlsl
@@ -0,0 +1,46 @@
+// Copyright (C) 2018-2025 - DevSH Graphics Programming Sp. z O.O.
+// This file is part of the "Nabla Engine".
+// For conditions of distribution and use, see copyright notice in nabla.h
+
+#ifndef _NBL_BUILTIN_HLSL_SHAPES_TRIANGLE_INCLUDED_
+#define _NBL_BUILTIN_HLSL_SHAPES_TRIANGLE_INCLUDED_
+
+#include <nbl/builtin/hlsl/cpp_compat.hlsl>
+#include <nbl/builtin/hlsl/tgmath.hlsl>
+#include <nbl/builtin/hlsl/numbers.hlsl>
+
+namespace nbl
+{
+namespace hlsl
+{
+namespace shapes
+{
+
+namespace util
+{
+// Use this convetion e_i = v_{i+2}-v_{i+1}. vertex index is modulo by 3.
+template <typename float_t>
+vector<float_t, 3> anglesFromTriangleEdges(const vector<float_t, 3> e0, vector<float_t, 3> e1, const vector<float_t, 3> e2)
+{
+    // Calculate this triangle's weight for each of its three m_vertices
+    // start by calculating the lengths of its sides
+    const float_t a = hlsl::dot(e0, e0);
+    const float_t asqrt = hlsl::sqrt(a);
+    const float_t b = hlsl::dot(e1, e1);
+    const float_t bsqrt = hlsl::sqrt(b);
+    const float_t c = hlsl::dot(e2, e2);
+    const float_t csqrt = hlsl::sqrt(c);
+
+    const float_t angle0 = hlsl::acos((b + c - a) / (2.f * bsqrt * csqrt));
+    const float_t angle1 = hlsl::acos((-b + c + a) / (2.f * asqrt * csqrt));
+    const float_t angle2 = hlsl::numbers::pi<float_t> - (angle0 + angle1);
+    // use them to find the angle at each vertex
+    return vector<float_t, 3>(angle0, angle1, angle2);
+}
+}
+
+}
+}
+}
+
+#endif
\ No newline at end of file
diff --git a/include/nbl/builtin/hlsl/spirv_intrinsics/core.hlsl b/include/nbl/builtin/hlsl/spirv_intrinsics/core.hlsl
index a7614469dd..9190a4ec73 100644
--- a/include/nbl/builtin/hlsl/spirv_intrinsics/core.hlsl
+++ b/include/nbl/builtin/hlsl/spirv_intrinsics/core.hlsl
@@ -4,6 +4,8 @@
 #ifndef _NBL_BUILTIN_HLSL_SPIRV_INTRINSICS_CORE_INCLUDED_
 #define _NBL_BUILTIN_HLSL_SPIRV_INTRINSICS_CORE_INCLUDED_
 
+#include <nbl/builtin/hlsl/spirv_intrinsics/output_structs.hlsl>
+
 #ifdef __HLSL_VERSION // TODO: AnastZIuk fix public search paths so we don't choke
 #include "spirv/unified1/spirv.hpp"
 
@@ -11,7 +13,6 @@
 #include <nbl/builtin/hlsl/type_traits.hlsl>
 #include <nbl/builtin/hlsl/concepts.hlsl>
 #include <nbl/builtin/hlsl/concepts/vector.hlsl>
-#include <nbl/builtin/hlsl/spirv_intrinsics/output_structs.hlsl>
 
 namespace nbl 
 {
@@ -115,7 +116,12 @@ NBL_CONSTEXPR_STATIC_INLINE bool is_bda_pointer_v = is_bda_pointer<T>::value;
 
 
 //! General Operations
- 
+
+//! Miscellaneous Instructions
+template<typename T>
+[[vk::ext_instruction(spv::OpUndef)]]
+T undef();
+
 //
 template<typename M, typename T>
 [[vk::ext_instruction(spv::OpAccessChain)]]
@@ -382,7 +388,8 @@ template<typename T, typename U NBL_FUNC_REQUIRES(concepts::Boolean<U> && (!conc
 [[vk::ext_instruction(spv::OpSelect)]]
 T select(U a, T x, T y);
 
-NBL_VALID_EXPRESSION(SelectIsCallable, (T)(U), select<T,U>(experimental::declval<U>(),experimental::declval<T>(),experimental::declval<T>()));
+// need to use `spirv::` even in the namespace because it matches the HLSL intrinsic which is not namespaced at all, and will happily match anything
+NBL_VALID_EXPRESSION(SelectIsCallable, (T)(U), spirv::select<T,U>(experimental::declval<U>(),experimental::declval<T>(),experimental::declval<T>()));
 
 }
 
diff --git a/include/nbl/builtin/hlsl/type_traits.hlsl b/include/nbl/builtin/hlsl/type_traits.hlsl
index a9701619dd..257a753129 100644
--- a/include/nbl/builtin/hlsl/type_traits.hlsl
+++ b/include/nbl/builtin/hlsl/type_traits.hlsl
@@ -636,28 +636,39 @@ template<bool C, class T, class F>
 using conditional_t = typename conditional<C,T,F>::type;
 
 
-// Template variables
+// Template Variables
+template<class T, T val>
+NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR T integral_constant_v = integral_constant<T, val>::value;
 template<typename A, typename B>
-NBL_CONSTEXPR bool is_same_v = is_same<A, B>::value;
+NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR bool is_same_v = is_same<A, B>::value;
 template<class T>
-NBL_CONSTEXPR bool is_unsigned_v = is_unsigned<T>::value;
+NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR bool is_unsigned_v = is_unsigned<T>::value;
 template<class T>
-NBL_CONSTEXPR bool is_integral_v = is_integral<T>::value;
+NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR bool is_integral_v = is_integral<T>::value;
 template<class T>
-NBL_CONSTEXPR bool is_floating_point_v = is_floating_point<T>::value;
+NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR bool is_floating_point_v = is_floating_point<T>::value;
 template<class T>
-NBL_CONSTEXPR bool is_signed_v = is_signed<T>::value;
+NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR bool is_signed_v = is_signed<T>::value;
 template<class T>
-NBL_CONSTEXPR bool is_scalar_v = is_scalar<T>::value;
+NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR bool is_scalar_v = is_scalar<T>::value;
 template<class T>
-NBL_CONSTEXPR uint64_t size_of_v = size_of<T>::value;
+NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR uint64_t size_of_v = size_of<T>::value;
 template<class T>
-NBL_CONSTEXPR uint32_t alignment_of_v = alignment_of<T>::value;
+NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR uint32_t alignment_of_v = alignment_of<T>::value;
+template<typename T>
+NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR bool is_fundamental_v = is_fundamental<T>::value;
+
 
 // Overlapping definitions
 template<typename T>
 using make_void_t = typename make_void<T>::type;
 
+template<typename T>
+using make_signed_t = typename make_signed<T>::type;
+
+template<typename T>
+using make_unsigned_t = typename make_unsigned<T>::type;
+
 template<bool C, typename T, T A, T B>
 struct conditional_value
 {
@@ -674,7 +685,7 @@ template<class T, uint32_t N>
 struct is_vector<vector<T, N> > : bool_constant<true> {};
 
 template<typename T>
-NBL_CONSTEXPR bool is_vector_v = is_vector<T>::value;
+NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR bool is_vector_v = is_vector<T>::value;
 
 #ifndef __HLSL_VERSION
 template<typename T>
@@ -685,7 +696,7 @@ template<class T, uint32_t N, uint32_t M>
 struct is_matrix<matrix<T, N, M> > : bool_constant<true> {};
 
 template<class T>
-NBL_CONSTEXPR bool is_matrix_v = is_matrix<T>::value;
+NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR bool is_matrix_v = is_matrix<T>::value;
 
 
 template<class T>
@@ -721,16 +732,16 @@ struct extent<T[N], I> : integral_constant<uint64_t,extent<T, I - 1>::value> {};
 template<class T, uint32_t I> 
 struct extent<T[], I> : integral_constant<uint64_t,extent<T, I - 1>::value> {};
 
-template<class T, uint16_t N> 
-struct extent<vector<T,N>, 0> : integral_constant<uint64_t, N> {};
+template<class T, uint16_t N, uint32_t I>
+struct extent<vector<T,N>, I> : extent<T[N], I> {};
 
 template<class T, uint16_t M, uint16_t N, uint32_t I> 
-struct extent<matrix<T,N,M>, I> : integral_constant<uint64_t,extent<T[N][M], I>::value> {};
+struct extent<matrix<T,N,M>, I> : extent<T[N][M], I> {};
 
 
 // Template Variables
 template<class T, uint32_t N = 0>
-NBL_CONSTEXPR uint64_t extent_v = extent<T, N>::value;
+NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR uint64_t extent_v = extent<T, N>::value;
 
 
 template<typename T,bool=is_scalar<T>::value>
@@ -844,15 +855,6 @@ struct float_of_size<8>
 template<uint16_t bytesize>
 using float_of_size_t = typename float_of_size<bytesize>::type;
 
-template<typename T, int N>
-struct extent<vector<T, N>, 0> : integral_constant<uint64_t, N> {};
-
-template<typename T, int N, int M>
-struct extent<matrix<T, N, M>, 0> : integral_constant<uint64_t, N> {};
-
-template<typename T, int N, int M>
-struct extent<matrix<T, N, M>, 1> : integral_constant<uint64_t, M> {};
-
 }
 }
 
diff --git a/include/nbl/builtin/hlsl/workgroup2/arithmetic_config.hlsl b/include/nbl/builtin/hlsl/workgroup2/arithmetic_config.hlsl
index 03ccd64d4e..22c93ce193 100644
--- a/include/nbl/builtin/hlsl/workgroup2/arithmetic_config.hlsl
+++ b/include/nbl/builtin/hlsl/workgroup2/arithmetic_config.hlsl
@@ -225,7 +225,7 @@ template<uint16_t W, uint16_t S, uint16_t I>
 struct is_configuration<ArithmeticConfiguration<W,S,I> > : bool_constant<true> {};
 
 template<typename T>
-NBL_CONSTEXPR bool is_configuration_v = is_configuration<T>::value;
+NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR bool is_configuration_v = is_configuration<T>::value;
 
 }
 }
diff --git a/include/nbl/core/sampling/RandomSampler.h b/include/nbl/core/sampling/RandomSampler.h
index 39832dc8f1..b692ef5e08 100644
--- a/include/nbl/core/sampling/RandomSampler.h
+++ b/include/nbl/core/sampling/RandomSampler.h
@@ -11,8 +11,8 @@
 namespace nbl::core
 {
 
-class RandomSampler
-{
+	class RandomSampler
+	{
 	public:
 		RandomSampler(uint32_t _seed)
 		{
@@ -25,9 +25,24 @@ class RandomSampler
 			return mersenneTwister();
 		}
 
+		// Returns a float in [0, 1)
+		inline float nextFloat()
+		{
+			// 1 / 2^32
+			constexpr float norm = 1.0f / 4294967296.0f;
+			return mersenneTwister() * norm;
+		}
+
+		// Returns a float in [min, max)
+		inline float nextFloat(float min, float max)
+		{
+			constexpr float norm = 1.0f / 4294967296.0f;
+			return min + (mersenneTwister() * norm) * (max - min);
+		}
+
 	protected:
 		std::mt19937 mersenneTwister;
-};
+	};
 
 
 }
diff --git a/include/nbl/system/to_string.h b/include/nbl/system/to_string.h
new file mode 100644
index 0000000000..92888704c0
--- /dev/null
+++ b/include/nbl/system/to_string.h
@@ -0,0 +1,52 @@
+#ifndef _NBL_SYSTEM_TO_STRING_INCLUDED_
+#define _NBL_SYSTEM_TO_STRING_INCLUDED_
+
+#include <nbl/builtin/hlsl/cpp_compat.hlsl>
+
+namespace nbl
+{
+namespace system
+{
+namespace impl
+{
+
+template<typename T>
+struct to_string_helper
+{
+    static std::string __call(const T& value)
+    {
+        return std::to_string(value);
+    }
+};
+
+template<typename T, int16_t N>
+struct to_string_helper<hlsl::vector<T, N>>
+{
+    static std::string __call(const hlsl::vector<T, N>& value)
+    {
+        std::stringstream output;
+        output << "{ ";
+        for (int i = 0; i < N; ++i)
+        {
+            output << to_string_helper<T>::__call(value[i]);
+
+            if (i < N - 1)
+                output << ", ";
+        }
+        output << " }";
+
+        return output.str();
+    }
+};
+
+}
+
+template<typename T>
+std::string to_string(T value)
+{
+    return impl::to_string_helper<T>::__call(value);
+}
+}
+}
+
+#endif
\ No newline at end of file
diff --git a/src/nbl/asset/utils/CHLSLCompiler.cpp b/src/nbl/asset/utils/CHLSLCompiler.cpp
index 306d2f60de..d36ecfa1cb 100644
--- a/src/nbl/asset/utils/CHLSLCompiler.cpp
+++ b/src/nbl/asset/utils/CHLSLCompiler.cpp
@@ -115,11 +115,11 @@ static bool fixup_spirv_target_ver(std::vector<std::wstring>& arguments, system:
         const auto found = AllowedSuffices.find(suffix);
         if (found!=AllowedSuffices.end())
             return true;
-        logger.log("Compile flag error: Required compile flag not found -fspv-target-env=. Force enabling -fspv-target-env= found but with unsupported value `%s`.", system::ILogger::ELL_ERROR, "TODO: write wchar to char convert usage");
+        logger.log("Compile flag warning: Required compile flag not found -fspv-target-env=. Force enabling -fspv-target-env= found but with unsupported value `%s`.", system::ILogger::ELL_ERROR, "TODO: write wchar to char convert usage");
         return false;
     }
 
-    logger.log("Compile flag error: Required compile flag not found -fspv-target-env=. Force enabling -fspv-target-env=vulkan1.3, as it is required by Nabla.", system::ILogger::ELL_WARNING);
+    logger.log("Compile flag warning: Required compile flag not found -fspv-target-env=. Force enabling -fspv-target-env=vulkan1.3, as it is required by Nabla.", system::ILogger::ELL_WARNING);
     arguments.push_back(L"-fspv-target-env=vulkan1.3");
     return true;
 }
@@ -148,7 +148,7 @@ static void try_upgrade_hlsl_version(std::vector<std::wstring>& arguments, syste
     }
     else
     {
-        logger.log("Compile flag error: Required compile flag not found -HV. Force enabling -HV 202x, as it is required by Nabla.", system::ILogger::ELL_WARNING);
+        logger.log("Compile flag warning: Required compile flag not found -HV. Force enabling -HV 202x, as it is required by Nabla.", system::ILogger::ELL_WARNING);
         arguments.push_back(L"-HV");
         arguments.push_back(L"202x");
     }
@@ -254,7 +254,7 @@ static void add_required_arguments_if_not_present(std::vector<std::wstring>& arg
     {
         bool missing = set.find(required[j]) == set.end();
         if (missing) {
-            logger.log("Compile flag error: Required compile flag not found %ls. This flag will be force enabled, as it is required by Nabla.", system::ILogger::ELL_WARNING, required[j]);
+            logger.log("Compile flag warning: Required compile flag not found %ls. This flag will be force enabled, as it is required by Nabla.", system::ILogger::ELL_WARNING, required[j]);
             arguments.push_back(required[j]);
         }
     }
@@ -534,4 +534,4 @@ void CHLSLCompiler::insertIntoStart(std::string& code, std::ostringstream&& ins)
     code.insert(0u, ins.str());
 }
 
-#endif
\ No newline at end of file
+#endif
diff --git a/src/nbl/asset/utils/CSmoothNormalGenerator.cpp b/src/nbl/asset/utils/CSmoothNormalGenerator.cpp
index 43413152a8..f8bc45a317 100644
--- a/src/nbl/asset/utils/CSmoothNormalGenerator.cpp
+++ b/src/nbl/asset/utils/CSmoothNormalGenerator.cpp
@@ -5,7 +5,7 @@
 #include "CSmoothNormalGenerator.h"
 
 #include "nbl/core/declarations.h"
-#include "nbl/builtin/hlsl/shapes/spherical_triangle.hlsl"
+#include "nbl/builtin/hlsl/shapes/triangle.hlsl"
 
 #include <algorithm>
 
@@ -58,7 +58,7 @@ CSmoothNormalGenerator::VertexHashMap CSmoothNormalGenerator::setupData(const as
 		const auto faceNormal = normalize(cross(v1 - v0, v2 - v0));
 
 		//set data for m_vertices
-		const auto angleWages = hlsl::shapes::util::compInternalAngle(v2 - v1, v0 - v2, v1 - v2);
+		const auto angleWages = hlsl::shapes::util::anglesFromTriangleEdges(v2 - v1, v0 - v2, v1 - v2);
 
 		vertices.add({ i,	0,	faceNormal * angleWages.x, v0});
 		vertices.add({ i + 1,	0,	faceNormal * angleWages.y,v1});
diff --git a/src/nbl/builtin/CMakeLists.txt b/src/nbl/builtin/CMakeLists.txt
index 736148fb21..cdafa522ab 100644
--- a/src/nbl/builtin/CMakeLists.txt
+++ b/src/nbl/builtin/CMakeLists.txt
@@ -145,10 +145,13 @@ LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/indirect_commands.hlsl")
 # emulated
 LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/emulated/float64_t.hlsl")
 LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/emulated/float64_t_impl.hlsl")
+LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/emulated/int64_common_member_inc.hlsl")
+LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/emulated/int64_t.hlsl")
 LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/emulated/vector_t.hlsl")
 LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/emulated/matrix_t.hlsl")
 # portable
 LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/portable/float64_t.hlsl")
+LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/portable/int64_t.hlsl")
 LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/portable/vector_t.hlsl")
 LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/portable/matrix_t.hlsl")
 # ieee754
@@ -177,6 +180,7 @@ LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/cpp_compat/basic.h")
 LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/cpp_compat/intrinsics.hlsl")
 LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/cpp_compat/matrix.hlsl")
 LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/cpp_compat/promote.hlsl")
+LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/cpp_compat/truncate.hlsl")
 LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/cpp_compat/vector.hlsl")
 LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/cpp_compat/impl/intrinsics_impl.hlsl")
 #glsl compat
@@ -249,6 +253,7 @@ LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/shapes/circle.hlsl")
 LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/shapes/ellipse.hlsl")
 LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/shapes/line.hlsl")
 LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/shapes/beziers.hlsl")
+LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/shapes/triangle.hlsl")
 LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/shapes/spherical_triangle.hlsl")
 LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/shapes/spherical_rectangle.hlsl")
 LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/shapes/aabb.hlsl")
@@ -256,6 +261,7 @@ LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/shapes/aabb.hlsl")
 LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/sampling/basic.hlsl")
 LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/sampling/linear.hlsl")
 LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/sampling/bilinear.hlsl")
+LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/sampling/quantized_sequence.hlsl")
 LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/sampling/concentric_mapping.hlsl")
 LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/sampling/box_muller_transform.hlsl")
 LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/sampling/spherical_triangle.hlsl")
@@ -365,5 +371,7 @@ LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/rwmc/Resolve.hlsl")
 LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/rwmc/CascadeAccumulator.hlsl")
 LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/rwmc/SplattingParameters.hlsl")
 LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/rwmc/ResolveParameters.hlsl")
+#morton codes
+LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/morton.hlsl")
 
-ADD_CUSTOM_BUILTIN_RESOURCES(nblBuiltinResourceData NBL_RESOURCES_TO_EMBED "${NBL_ROOT_PATH}/include" "nbl/builtin" "nbl::builtin" "${NBL_ROOT_PATH_BINARY}/include" "${NBL_ROOT_PATH_BINARY}/src" "STATIC" "INTERNAL")
\ No newline at end of file
+ADD_CUSTOM_BUILTIN_RESOURCES(nblBuiltinResourceData NBL_RESOURCES_TO_EMBED "${NBL_ROOT_PATH}/include" "nbl/builtin" "nbl::builtin" "${NBL_ROOT_PATH_BINARY}/include" "${NBL_ROOT_PATH_BINARY}/src" "STATIC" "INTERNAL")