Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
31 commits
Select commit Hold shift + click to select a range
f2ea51d
Morton code tests
Fletterio Mar 23, 2025
8f4e452
Morton codes creating properly
Fletterio Mar 25, 2025
0aedfd9
All tests passing, HLSL compiles fine!
Fletterio Mar 28, 2025
ea42d5b
Rename example
Fletterio Apr 1, 2025
2ba08a4
Add tests for AddCarry and SUbBorrow intrinsics
Fletterio Apr 1, 2025
f00bbf6
Disable intrinsic tests for uSUbBorrow for the time being, start copy…
Fletterio Apr 7, 2025
b2d87c3
Added extensive tests for Morton codes
Fletterio Apr 24, 2025
c68c336
Done with tests
Fletterio Apr 28, 2025
d906bb2
Merged master
Fletterio Apr 28, 2025
f05dec4
Clarifying comment for blocker issue
Fletterio Apr 28, 2025
b13fdc7
Merge branch 'master' into mortons
Nov 19, 2025
08c898d
Reindex mortons example from 12 to 73
Nov 27, 2025
7f8dd73
Global variable of hlsl to use NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR
Nov 28, 2025
43b8634
Add test for operator-
Dec 1, 2025
ba6641f
Use 1, 1, 1 workgroup dimension
Dec 1, 2025
e830c34
Enable previously failed test because of bug in glm
Dec 1, 2025
e35e61d
Example 73 to 15 and fix compile error
Dec 5, 2025
2e5642a
Remove example 73 mortons
Dec 8, 2025
197b46a
Enable second test set
Dec 9, 2025
6692311
Delete fillSecondTestValues
Dec 9, 2025
4287ed1
Fix morton test
Dec 10, 2025
6a7b003
Remove unnecessary code
Dec 10, 2025
f012a1a
Add some comment for the reason we have to CTester
Dec 10, 2025
f415e8c
Remove dummy code
Dec 10, 2025
8f72b9e
Fix compiler warning for shader compilation
Dec 11, 2025
3042409
Add back second test to first in commented form
Dec 12, 2025
7011ea0
Fix example 28 to use select instead of ternary_op
Dec 12, 2025
02eed2e
Fix example 28
Dec 12, 2025
30b4f52
prefix select with hlsl::
Dec 12, 2025
3e443b1
Add nbl prefix to hlsl::select
Dec 12, 2025
84eb1f7
Merge branch 'master' into mortons
Dec 12, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions 07_StagingAndMultipleQueues/app_resources/common.hlsl
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
#include "nbl/builtin/hlsl/cpp_compat.hlsl"

NBL_CONSTEXPR uint32_t WorkgroupSizeX = 16;
NBL_CONSTEXPR uint32_t WorkgroupSizeY = 16;
NBL_CONSTEXPR uint32_t WorkgroupSize = WorkgroupSizeX*WorkgroupSizeY;
NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR uint32_t WorkgroupSizeX = 16;
NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR uint32_t WorkgroupSizeY = 16;
NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR uint32_t WorkgroupSize = WorkgroupSizeX*WorkgroupSizeY;

static const uint32_t FRAMES_IN_FLIGHT = 3u;

Expand Down
24 changes: 24 additions & 0 deletions 14_Mortons/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
include(common RESULT_VARIABLE RES)
if(NOT RES)
message(FATAL_ERROR "common.cmake not found. Should be in {repo_root}/cmake directory")
endif()

nbl_create_executable_project("" "" "" "" "${NBL_EXECUTABLE_PROJECT_CREATION_PCH_TARGET}")

if(NBL_EMBED_BUILTIN_RESOURCES)
set(_BR_TARGET_ ${EXECUTABLE_NAME}_builtinResourceData)
set(RESOURCE_DIR "app_resources")

get_filename_component(_SEARCH_DIRECTORIES_ "${CMAKE_CURRENT_SOURCE_DIR}" ABSOLUTE)
get_filename_component(_OUTPUT_DIRECTORY_SOURCE_ "${CMAKE_CURRENT_BINARY_DIR}/src" ABSOLUTE)
get_filename_component(_OUTPUT_DIRECTORY_HEADER_ "${CMAKE_CURRENT_BINARY_DIR}/include" ABSOLUTE)

file(GLOB_RECURSE BUILTIN_RESOURCE_FILES RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}/${RESOURCE_DIR}" CONFIGURE_DEPENDS "${CMAKE_CURRENT_SOURCE_DIR}/${RESOURCE_DIR}/*")
foreach(RES_FILE ${BUILTIN_RESOURCE_FILES})
LIST_BUILTIN_RESOURCE(RESOURCES_TO_EMBED "${RES_FILE}")
endforeach()

ADD_CUSTOM_BUILTIN_RESOURCES(${_BR_TARGET_} RESOURCES_TO_EMBED "${_SEARCH_DIRECTORIES_}" "${RESOURCE_DIR}" "nbl::this_example::builtin" "${_OUTPUT_DIRECTORY_HEADER_}" "${_OUTPUT_DIRECTORY_SOURCE_}")

LINK_BUILTIN_RESOURCES_TO_TARGET(${EXECUTABLE_NAME} ${_BR_TARGET_})
endif()
521 changes: 521 additions & 0 deletions 14_Mortons/CTester.h

Large diffs are not rendered by default.

279 changes: 279 additions & 0 deletions 14_Mortons/ITester.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,279 @@
#ifndef _NBL_EXAMPLES_TESTS_22_CPP_COMPAT_I_TESTER_INCLUDED_
#define _NBL_EXAMPLES_TESTS_22_CPP_COMPAT_I_TESTER_INCLUDED_

#include <nabla.h>
#include "app_resources/common.hlsl"
#include "nbl/application_templates/MonoDeviceApplication.hpp"

using namespace nbl;

class ITester
{
public:
virtual ~ITester()
{
m_outputBufferAllocation.memory->unmap();
};

struct PipelineSetupData
{
std::string testShaderPath;
core::smart_refctd_ptr<video::ILogicalDevice> device;
core::smart_refctd_ptr<video::CVulkanConnection> api;
core::smart_refctd_ptr<asset::IAssetManager> assetMgr;
core::smart_refctd_ptr<system::ILogger> logger;
video::IPhysicalDevice* physicalDevice;
uint32_t computeFamilyIndex;
};

template<typename InputStruct, typename OutputStruct>
void setupPipeline(const PipelineSetupData& pipleineSetupData)
{
// setting up pipeline in the constructor
m_device = core::smart_refctd_ptr(pipleineSetupData.device);
m_physicalDevice = pipleineSetupData.physicalDevice;
m_api = core::smart_refctd_ptr(pipleineSetupData.api);
m_assetMgr = core::smart_refctd_ptr(pipleineSetupData.assetMgr);
m_logger = core::smart_refctd_ptr(pipleineSetupData.logger);
m_queueFamily = pipleineSetupData.computeFamilyIndex;
m_semaphoreCounter = 0;
m_semaphore = m_device->createSemaphore(0);
m_cmdpool = m_device->createCommandPool(m_queueFamily, video::IGPUCommandPool::CREATE_FLAGS::RESET_COMMAND_BUFFER_BIT);
if (!m_cmdpool->createCommandBuffers(video::IGPUCommandPool::BUFFER_LEVEL::PRIMARY, 1u, &m_cmdbuf))
logFail("Failed to create Command Buffers!\n");

// Load shaders, set up pipeline
core::smart_refctd_ptr<asset::IShader> shader;
{
asset::IAssetLoader::SAssetLoadParams lp = {};
lp.logger = m_logger.get();
lp.workingDirectory = ""; // virtual root
auto assetBundle = m_assetMgr->getAsset(pipleineSetupData.testShaderPath, lp);
const auto assets = assetBundle.getContents();
if (assets.empty())
return logFail("Could not load shader!");

// It would be super weird if loading a shader from a file produced more than 1 asset
assert(assets.size() == 1);
core::smart_refctd_ptr<asset::IShader> source = asset::IAsset::castDown<asset::IShader>(assets[0]);

shader = m_device->compileShader({source.get()});
}

if (!shader)
logFail("Failed to create a GPU Shader, seems the Driver doesn't like the SPIR-V we're feeding it!\n");

video::IGPUDescriptorSetLayout::SBinding bindings[2] = {
{
.binding = 0,
.type = asset::IDescriptor::E_TYPE::ET_STORAGE_BUFFER,
.createFlags = video::IGPUDescriptorSetLayout::SBinding::E_CREATE_FLAGS::ECF_NONE,
.stageFlags = ShaderStage::ESS_COMPUTE,
.count = 1
},
{
.binding = 1,
.type = asset::IDescriptor::E_TYPE::ET_STORAGE_BUFFER,
.createFlags = video::IGPUDescriptorSetLayout::SBinding::E_CREATE_FLAGS::ECF_NONE,
.stageFlags = ShaderStage::ESS_COMPUTE,
.count = 1
}
};

core::smart_refctd_ptr<video::IGPUDescriptorSetLayout> dsLayout = m_device->createDescriptorSetLayout(bindings);
if (!dsLayout)
logFail("Failed to create a Descriptor Layout!\n");

m_pplnLayout = m_device->createPipelineLayout({}, core::smart_refctd_ptr(dsLayout));
if (!m_pplnLayout)
logFail("Failed to create a Pipeline Layout!\n");

{
video::IGPUComputePipeline::SCreationParams params = {};
params.layout = m_pplnLayout.get();
params.shader.entryPoint = "main";
params.shader.shader = shader.get();
if (!m_device->createComputePipelines(nullptr, { &params,1 }, &m_pipeline))
logFail("Failed to create pipelines (compile & link shaders)!\n");
}

// Allocate memory of the input buffer
{
constexpr size_t BufferSize = sizeof(InputStruct);

video::IGPUBuffer::SCreationParams params = {};
params.size = BufferSize;
params.usage = video::IGPUBuffer::EUF_STORAGE_BUFFER_BIT;
core::smart_refctd_ptr<video::IGPUBuffer> inputBuff = m_device->createBuffer(std::move(params));
if (!inputBuff)
logFail("Failed to create a GPU Buffer of size %d!\n", params.size);

inputBuff->setObjectDebugName("emulated_float64_t output buffer");

video::IDeviceMemoryBacked::SDeviceMemoryRequirements reqs = inputBuff->getMemoryReqs();
reqs.memoryTypeBits &= m_physicalDevice->getHostVisibleMemoryTypeBits();

m_inputBufferAllocation = m_device->allocate(reqs, inputBuff.get(), video::IDeviceMemoryAllocation::EMAF_NONE);
if (!m_inputBufferAllocation.isValid())
logFail("Failed to allocate Device Memory compatible with our GPU Buffer!\n");

assert(inputBuff->getBoundMemory().memory == m_inputBufferAllocation.memory.get());
core::smart_refctd_ptr<video::IDescriptorPool> pool = m_device->createDescriptorPoolForDSLayouts(video::IDescriptorPool::ECF_NONE, { &dsLayout.get(),1 });

m_ds = pool->createDescriptorSet(core::smart_refctd_ptr(dsLayout));
{
video::IGPUDescriptorSet::SDescriptorInfo info[1];
info[0].desc = core::smart_refctd_ptr(inputBuff);
info[0].info.buffer = { .offset = 0,.size = BufferSize };
video::IGPUDescriptorSet::SWriteDescriptorSet writes[1] = {
{.dstSet = m_ds.get(),.binding = 0,.arrayElement = 0,.count = 1,.info = info}
};
m_device->updateDescriptorSets(writes, {});
}
}

// Allocate memory of the output buffer
{
constexpr size_t BufferSize = sizeof(OutputStruct);

video::IGPUBuffer::SCreationParams params = {};
params.size = BufferSize;
params.usage = video::IGPUBuffer::EUF_STORAGE_BUFFER_BIT;
core::smart_refctd_ptr<video::IGPUBuffer> outputBuff = m_device->createBuffer(std::move(params));
if (!outputBuff)
logFail("Failed to create a GPU Buffer of size %d!\n", params.size);

outputBuff->setObjectDebugName("emulated_float64_t output buffer");

video::IDeviceMemoryBacked::SDeviceMemoryRequirements reqs = outputBuff->getMemoryReqs();
reqs.memoryTypeBits &= m_physicalDevice->getHostVisibleMemoryTypeBits();

m_outputBufferAllocation = m_device->allocate(reqs, outputBuff.get(), video::IDeviceMemoryAllocation::EMAF_NONE);
if (!m_outputBufferAllocation.isValid())
logFail("Failed to allocate Device Memory compatible with our GPU Buffer!\n");

assert(outputBuff->getBoundMemory().memory == m_outputBufferAllocation.memory.get());
core::smart_refctd_ptr<video::IDescriptorPool> pool = m_device->createDescriptorPoolForDSLayouts(video::IDescriptorPool::ECF_NONE, { &dsLayout.get(),1 });

{
video::IGPUDescriptorSet::SDescriptorInfo info[1];
info[0].desc = core::smart_refctd_ptr(outputBuff);
info[0].info.buffer = { .offset = 0,.size = BufferSize };
video::IGPUDescriptorSet::SWriteDescriptorSet writes[1] = {
{.dstSet = m_ds.get(),.binding = 1,.arrayElement = 0,.count = 1,.info = info}
};
m_device->updateDescriptorSets(writes, {});
}
}

if (!m_outputBufferAllocation.memory->map({ 0ull,m_outputBufferAllocation.memory->getAllocationSize() }, video::IDeviceMemoryAllocation::EMCAF_READ))
logFail("Failed to map the Device Memory!\n");

// if the mapping is not coherent the range needs to be invalidated to pull in new data for the CPU's caches
const video::ILogicalDevice::MappedMemoryRange memoryRange(m_outputBufferAllocation.memory.get(), 0ull, m_outputBufferAllocation.memory->getAllocationSize());
if (!m_outputBufferAllocation.memory->getMemoryPropertyFlags().hasFlags(video::IDeviceMemoryAllocation::EMPF_HOST_COHERENT_BIT))
m_device->invalidateMappedMemoryRanges(1, &memoryRange);

assert(memoryRange.valid() && memoryRange.length >= sizeof(OutputStruct));

m_queue = m_device->getQueue(m_queueFamily, 0);
}

enum class TestType
{
CPU,
GPU
};

template<typename T>
void verifyTestValue(const std::string& memberName, const T& expectedVal, const T& testVal, const TestType testType)
{
if (expectedVal == testVal)
return;

std::stringstream ss;
switch (testType)
{
case TestType::CPU:
ss << "CPU TEST ERROR:\n";
break;
case TestType::GPU:
ss << "GPU TEST ERROR:\n";
}

ss << "nbl::hlsl::" << memberName << " produced incorrect output!" << '\n';

m_logger->log(ss.str().c_str(), system::ILogger::ELL_ERROR);
}

protected:
uint32_t m_queueFamily;
core::smart_refctd_ptr<video::ILogicalDevice> m_device;
core::smart_refctd_ptr<video::CVulkanConnection> m_api;
video::IPhysicalDevice* m_physicalDevice;
core::smart_refctd_ptr<asset::IAssetManager> m_assetMgr;
core::smart_refctd_ptr<system::ILogger> m_logger;
video::IDeviceMemoryAllocator::SAllocation m_inputBufferAllocation = {};
video::IDeviceMemoryAllocator::SAllocation m_outputBufferAllocation = {};
core::smart_refctd_ptr<video::IGPUCommandBuffer> m_cmdbuf = nullptr;
core::smart_refctd_ptr<video::IGPUCommandPool> m_cmdpool = nullptr;
core::smart_refctd_ptr<video::IGPUDescriptorSet> m_ds = nullptr;
core::smart_refctd_ptr<video::IGPUPipelineLayout> m_pplnLayout = nullptr;
core::smart_refctd_ptr<video::IGPUComputePipeline> m_pipeline;
core::smart_refctd_ptr<video::ISemaphore> m_semaphore;
video::IQueue* m_queue;
uint64_t m_semaphoreCounter;

template<typename InputStruct, typename OutputStruct>
OutputStruct dispatch(const InputStruct& input)
{
// Update input buffer
if (!m_inputBufferAllocation.memory->map({ 0ull,m_inputBufferAllocation.memory->getAllocationSize() }, video::IDeviceMemoryAllocation::EMCAF_READ))
logFail("Failed to map the Device Memory!\n");

const video::ILogicalDevice::MappedMemoryRange memoryRange(m_inputBufferAllocation.memory.get(), 0ull, m_inputBufferAllocation.memory->getAllocationSize());
if (!m_inputBufferAllocation.memory->getMemoryPropertyFlags().hasFlags(video::IDeviceMemoryAllocation::EMPF_HOST_COHERENT_BIT))
m_device->invalidateMappedMemoryRanges(1, &memoryRange);

std::memcpy(static_cast<InputStruct*>(m_inputBufferAllocation.memory->getMappedPointer()), &input, sizeof(InputStruct));

m_inputBufferAllocation.memory->unmap();

// record command buffer
m_cmdbuf->reset(video::IGPUCommandBuffer::RESET_FLAGS::NONE);
m_cmdbuf->begin(video::IGPUCommandBuffer::USAGE::NONE);
m_cmdbuf->beginDebugMarker("test", core::vector4df_SIMD(0, 1, 0, 1));
m_cmdbuf->bindComputePipeline(m_pipeline.get());
m_cmdbuf->bindDescriptorSets(nbl::asset::EPBP_COMPUTE, m_pplnLayout.get(), 0, 1, &m_ds.get());
m_cmdbuf->dispatch(1, 1, 1);
m_cmdbuf->endDebugMarker();
m_cmdbuf->end();

video::IQueue::SSubmitInfo submitInfos[1] = {};
const video::IQueue::SSubmitInfo::SCommandBufferInfo cmdbufs[] = { {.cmdbuf = m_cmdbuf.get()} };
submitInfos[0].commandBuffers = cmdbufs;
const video::IQueue::SSubmitInfo::SSemaphoreInfo signals[] = { {.semaphore = m_semaphore.get(), .value = ++m_semaphoreCounter, .stageMask = asset::PIPELINE_STAGE_FLAGS::COMPUTE_SHADER_BIT} };
submitInfos[0].signalSemaphores = signals;

m_api->startCapture();
m_queue->submit(submitInfos);
m_api->endCapture();

m_device->waitIdle();
OutputStruct output;
std::memcpy(&output, static_cast<OutputStruct*>(m_outputBufferAllocation.memory->getMappedPointer()), sizeof(OutputStruct));
m_device->waitIdle();

return output;
}

private:
template<typename... Args>
inline void logFail(const char* msg, Args&&... args)
{
m_logger->log(msg, system::ILogger::ELL_ERROR, std::forward<Args>(args)...);
exit(-1);
}
};

#endif
Loading