Vulkan Schnee 0.0.1
High-performance rendering engine
Loading...
Searching...
No Matches
Renderer.cpp
Go to the documentation of this file.
5#include "Engine/Files/Path.h"
17
18#ifdef COMPUTE_DEBUG
20#endif
21
22#include <array>
23#include <cmath>
24#include <glm/gtc/matrix_transform.hpp>
26#include <BS_tracy_thread_pool.hpp>
31#include <Engine/Mesh/Vertex.h>
35#include <plog/Log.h>
36#include <sstream>
37#include <vector>
38#include <vulkan/vulkan_core.h>
39
40#ifdef ENABLE_TRACY
42#endif
43
44namespace EngineCore
45{
53#if defined(HEADLESS)
54 #define COMPUTE_SHADER_NAME(name) "Output/Placeholder.comp.spv"
55 #define COMPUTE_GUARD() return;
56 #define GRAPHICS_GUARD() return;
57#elif defined(COMPUTE_DEBUG)
58 #define COMPUTE_SHADER_NAME(name) name
59 #define COMPUTE_GUARD() // No-op: compute runs in debug mode
60 #define GRAPHICS_GUARD() return; // Skip graphics in debug mode
61#else
62 #define COMPUTE_SHADER_NAME(name) name
63 #define COMPUTE_GUARD()
64 #define GRAPHICS_GUARD()
65#endif
66
70 const Engine * engine
71 )
72 : updateThreadPool( 2, [](std::size_t) {}, "Renderer" )
73 , engine( engine )
74 , context( context )
75 , headset( headset )
76 {
77 TRACY_ZONE_SCOPED_NAMED( "Renderer Constructor" );
78
79 renderingDataManager = std::make_unique<RenderingDataManager>(engine, context);
80
81 {
82 TRACY_ZONE_SCOPED_NAMED( "Initialize Frame Indices" );
83 initializeFrameIndices();
84 }
85
86 {
87 TRACY_ZONE_SCOPED_NAMED("Creating buffers");
88 createDispatchBuffer();
89 createPlaceholderBuffer();
90 createPlaceholderUniformBuffer();
91 createCounterBuffer();
92 createObjectIDsBuffer();
93 createObjectCullingDataBuffer();
94 createObjectMeshletDataBuffer();
95 createMeshUnpackingDataBuffer();
96
97 // create main stages
98 createPrimitiveCullingResources();
99 createBinningAllocatorResources(); // Stage 1: Binning Allocator
100 createMeshletUnpackingResources(); // Stage 2: Meshlet Unpacking V2
101 createMeshletCullingResources();
102 createPrepareDrawResources();
103
104 // create dispatcher stages. This should only be placed after the actual creation of the others as it
105 // requires the pipelines to be setup already. It fetches the thread count from the pipeline to set
106 // the target size in the next shader
107 createMeshletUnpackingDispatcherResources();
108 createMeshletCullingDispatcherResources();
109
110 // Hi-Z occlusion culling resources
111 createHiZGenerationResources(); // Legacy multi-pass (fallback)
112 createHiZSPDResources(); // SPD single-pass (default)
113 // NOTE: createHiZMipDescriptorSets() and createHiZSPDDescriptorSets() are called after renderProcesses are created
114 // NOTE: createVertexShaderPathResources() is called after graphicsPipelineLayout is created
115 }
116
118 {
119 TRACY_ZONE_SCOPED_NAMED( "Create Graphics Command Pool" );
120 VkCommandPoolCreateInfo commandPoolCreateInfo{
121 .sType = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO,
122 .pNext = nullptr,
123 .flags = VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT,
124 .queueFamilyIndex = context->getVkGraphicsQueueFamilyIndex()
125 };
126 PLOG_THROW_FN_VK( vkCreateCommandPool(
127 context->getVkDevice(), &commandPoolCreateInfo, nullptr, &vkGraphicsCommandPool
128 ) )
129 }
130
132 {
133 TRACY_ZONE_SCOPED_NAMED( "Create Transfer Command Pool" );
134 VkCommandPoolCreateInfo commandPoolCreateInfo{
135 .sType = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO,
136 .pNext = nullptr,
137 .flags = VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT,
138 .queueFamilyIndex = context->getVkTransferQueueFamilyIndex()
139 };
140 PLOG_THROW_FN_VK( vkCreateCommandPool(
141 context->getVkDevice(), &commandPoolCreateInfo, nullptr, &vkTransferCommandPool
142 ) )
143 }
144
146 {
147 TRACY_ZONE_SCOPED_NAMED( "Create Descriptor Pool" );
148 std::array<VkDescriptorPoolSize, 3u> descriptorPoolSizes{};
149
150 descriptorPoolSizes.at( 0 ) = {
151 .type = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER,
152 .descriptorCount =
153 static_cast<uint32_t>( MAX_FRAMES_IN_FLIGHT ) * 22 // for binding 3 and frustumUBOs + Hi-Z viewproj UBO
154 };
155
156 descriptorPoolSizes.at( 1 ) = { .type = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER,
157 .descriptorCount = (MAX_TEXTURE_COUNT + 2) * MAX_FRAMES_IN_FLIGHT }; // +2 for Hi-Z pyramid in primitive culling
158
159 descriptorPoolSizes.at( 2 ) = {
160 .type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
161 .descriptorCount =
162 static_cast<uint32_t>( MAX_FRAMES_IN_FLIGHT ) *
163 ( 83 ) // 8 for graphics + 8 for primitive culling (including Hi-Z) + 4 for binning allocator
164 // + 3 for meshlet unpacking V2 + 5 for meshlet culling + 2 for prepare draw
165 // + dispatchers + 8 materials + extra buffer
166 // + 13 for VS instanced drawing (6 binning + 3 unpacking + 4 prepare draw)
167 };
168
169 VkDescriptorPoolCreateInfo descriptorPoolCreateInfo{
170 .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO,
171 .pNext = nullptr,
172 .flags = 0,
173 .maxSets = static_cast<uint32_t>( MAX_FRAMES_IN_FLIGHT ) *
174 (7+1+3), // One for graphics, seven for compute, +3 for VS instanced drawing pipeline
175 .poolSizeCount = static_cast<uint32_t>( descriptorPoolSizes.size() ),
176 .pPoolSizes = descriptorPoolSizes.data()
177 };
178
180 context->getVkDevice(), &descriptorPoolCreateInfo, nullptr, &descriptorPool, "Main"
181 );
182 }
183
185 {
186 TRACY_ZONE_SCOPED_NAMED( "Create Timeline Synchronizer" );
187 timelineSynchronizer_ = std::make_unique<TimelineSynchronizer>(context, MAX_FRAMES_IN_FLIGHT);
188 }
189
191 {
192 TRACY_ZONE_SCOPED_NAMED( "Create Render Finished Semaphores" );
193 renderFinishedSemaphores.resize( headset->getSwapchainRenderTargets().size() );
194 for ( auto & sem : renderFinishedSemaphores )
195 {
196 VkSemaphoreCreateInfo semInfo{ VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO };
197 vkCreateSemaphore( context->getVkDevice(), &semInfo, nullptr, &sem );
198 }
199 }
200 }
201
203 {
204 // TODO: Check if this is actually necessary during destruction as i dont think it is necessary. If it
205 // isn't remove it and note in comment that it needs to be cleaned up manually.
206 cleanup();
207 }
208
210 {
211 TRACY_ZONE_SCOPED_NAMED( "Allocate Descriptors" );
212
214 {
215 TRACY_ZONE_SCOPED_NAMED( "Create Descriptor Set Layout Bindings" );
216 auto builder = LayoutBindingsBuilder();
217
218#if !defined(HEADLESS) && !defined(COMPUTE_DEBUG)
219 #define MESH_STAGE VK_SHADER_STAGE_MESH_BIT_EXT
220 #define VERTEX_STAGE VK_SHADER_STAGE_VERTEX_BIT // though not used in mesh shader path
221#else
222 #define MESH_STAGE VK_SHADER_STAGE_VERTEX_BIT
223 #define VERTEX_STAGE VK_SHADER_STAGE_VERTEX_BIT
224#endif
225
226 // VS path needs VERTEX_BIT for bindings 1 (VIEW_PROJECTION_UBO) and 5 (PER_OBJECT_SSBO)
227 constexpr VkShaderStageFlags MESH_AND_VERTEX_STAGE = MESH_STAGE | VK_SHADER_STAGE_VERTEX_BIT;
228
229 builder.addStorageBuffer( ShaderStage::Graphics::VERTEX_BUFFER, MESH_STAGE )
230 .addUniformBuffer( ShaderStage::Graphics::VIEW_PROJECTION_UBO, MESH_AND_VERTEX_STAGE )
234 .addStorageBuffer( ShaderStage::Graphics::PER_OBJECT_SSBO, MESH_AND_VERTEX_STAGE )
236 .addStorageBuffer( ShaderStage::Graphics::MESH_PRIMITIVE_BUFFER, VK_SHADER_STAGE_FRAGMENT_BIT )
238 // MATERIAL DATA
239 .addStorageBuffer( ShaderStage::Graphics::MATERIAL_DIFFUSE_FLAT_COLOR, VK_SHADER_STAGE_FRAGMENT_BIT )
240 .addStorageBuffer( ShaderStage::Graphics::MATERIAL_DIFFUSE_SHADER, VK_SHADER_STAGE_FRAGMENT_BIT )
241 .addStorageBuffer( ShaderStage::Graphics::MATERIAL_MOVABLE_DIFFUSE, VK_SHADER_STAGE_FRAGMENT_BIT )
242 .addStorageBuffer( ShaderStage::Graphics::MATERIAL_NORMALS, VK_SHADER_STAGE_FRAGMENT_BIT )
243 .addStorageBuffer( ShaderStage::Graphics::MATERIAL_L0, VK_SHADER_STAGE_FRAGMENT_BIT )
244 .addStorageBuffer( ShaderStage::Graphics::MATERIAL_L1, VK_SHADER_STAGE_FRAGMENT_BIT )
245 .addStorageBuffer( ShaderStage::Graphics::MATERIAL_L2, VK_SHADER_STAGE_FRAGMENT_BIT )
246 .addStorageBuffer( ShaderStage::Graphics::MATERIAL_DYNAMIC_TEXTURES, VK_SHADER_STAGE_FRAGMENT_BIT )
247 .addStorageBuffer( ShaderStage::Graphics::MATERIAL_STATIC_LIGHTMAP, VK_SHADER_STAGE_FRAGMENT_BIT )
248 // VS instanced drawing: instance ID buffer for vertex shader lookup
249 // NOTE: Must be added BEFORE texture array because texture array uses
250 // VK_DESCRIPTOR_BINDING_VARIABLE_DESCRIPTOR_COUNT_BIT which must be last
251 .addStorageBuffer( ShaderStage::Graphics::VS_INSTANCE_IDS, VK_SHADER_STAGE_VERTEX_BIT )
252 .addTextureBuffer( ShaderStage::Graphics::TEXTURE_ARRAY, VK_SHADER_STAGE_FRAGMENT_BIT )
253 .build();
254
255#undef MESH_STAGE
256#undef VERTEX_STAGE
257
258 VkDescriptorSetLayoutBindingFlagsCreateInfo bindingFlagsInfo{
259 .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_BINDING_FLAGS_CREATE_INFO,
260 .bindingCount = static_cast<uint32_t>( builder.getFlags().size() ),
261 .pBindingFlags = builder.getFlags().data()
262 };
263
264 // Now create the descriptor set layout.
265 VkDescriptorSetLayoutCreateInfo descriptorSetLayoutCreateInfo{
266 .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
267 .pNext = &bindingFlagsInfo,
268 .flags = 0,
269 .bindingCount = static_cast<uint32_t>( builder.getBindings().size() ),
270 .pBindings = builder.getBindings().data()
271 };
273 context->getVkDevice(),
274 &descriptorSetLayoutCreateInfo,
275 nullptr,
277 "Graphics"
278 );
279 assert(graphicsDescriptorSetLayout != nullptr);
280 }
281
283 {
284#if !defined(HEADLESS) && !defined(COMPUTE_DEBUG)
285 TRACY_ZONE_SCOPED_NAMED( "Create Push Constant Range" );
286 VkPushConstantRange pushConstantRange{ .stageFlags = VK_SHADER_STAGE_MESH_BIT_EXT |
287 VK_SHADER_STAGE_FRAGMENT_BIT,
288 .offset = 0,
289 .size = sizeof( TaskConstants ) };
290 pushConstants = { pushConstantRange };
291#endif
292 }
293
295 {
296 TRACY_ZONE_SCOPED_NAMED( "Create Pipeline Layout" );
297 VkPipelineLayoutCreateInfo pipelineLayoutCreateInfo{
298 .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
299 .setLayoutCount = 1u,
300 .pSetLayouts = &graphicsDescriptorSetLayout,
301#if !defined(HEADLESS) && !defined(COMPUTE_DEBUG)
302 .pushConstantRangeCount = static_cast<uint32_t>( pushConstants.size() ),
303 .pPushConstantRanges = pushConstants.data()
304#else
305 .pushConstantRangeCount = 0,
306 .pPushConstantRanges = nullptr
307#endif
308 };
310 context->getVkDevice(),
311 &pipelineLayoutCreateInfo,
312 nullptr,
314 "Graphics"
315 );
316 }
317
318 // Create vertex shader path pipeline (needs graphicsPipelineLayout)
320 createVSInstancedDrawingResources(); // VS instanced drawing compute passes
321
322 // Geometry and material buffers are now managed by RenderingDataManager
323 // They will be populated automatically when scenes load via the event-driven system
324 // For initial empty scene, RenderingDataManager creates placeholder buffers
325
326 // Verify RenderingDataManager buffers were created successfully
327 PLOGI << "RenderingDataManager buffer creation complete:";
328 PLOGI << " Vertex buffer: "
329 << VulkanHelper::strIsValid( renderingDataManager->getVertexBuffer().getBuffer() );
330 PLOGI << " Meshlet buffer: "
331 << VulkanHelper::strIsValid( renderingDataManager->getMeshletBuffer().getBuffer() );
332 PLOGI << " Triangle buffer: "
333 << VulkanHelper::strIsValid( renderingDataManager->getTriangleBuffer().getBuffer() );
334 PLOGI << " Primitive render data buffer: "
335 << VulkanHelper::strIsValid( renderingDataManager->getPrimitiveRenderDataBuffer().getBuffer() );
336
338 {
339 TRACY_ZONE_SCOPED_NAMED( "Create Render Processes" );
341
342 for ( uint32_t eyeIndex = 0u; eyeIndex < renderProcesses.size(); eyeIndex++ )
343 {
344 {
345 TRACY_ZONE_SCOPED_NAMED( "Create RenderProcess" );
346 PLOGI << "Creating RenderProcess " << eyeIndex;
347 renderProcesses.at( eyeIndex ) = new RenderProcess(
348 context,
349 context->getUniformBufferOffsetAlignment(),
354 eyeIndex,
355 this,
356 engine
357 );
358 }
359 }
360 }
361
362 // Hi-Z descriptor sets need renderProcesses to exist for the image views
363 createHiZMipDescriptorSets(); // Legacy multi-pass
364 createHiZSPDDescriptorSets(); // SPD single-pass
365
367 {
368 TRACY_ZONE_SCOPED_NAMED( "Create Graphics Pipelines" );
369 VkVertexInputBindingDescription vertexInputBindingDescription{
370 .binding = 0u, .stride = sizeof( Vertex ), .inputRate = VK_VERTEX_INPUT_RATE_VERTEX
371 };
372
373 VkVertexInputAttributeDescription vertexInputAttributePosition{
374 .location = 0u,
375 .binding = 0u,
376 .format = VK_FORMAT_R32G32B32_SFLOAT,
377 .offset = offsetof( Vertex, position )
378 };
379
380 VkVertexInputAttributeDescription vertexInputAttributeNormal{
381 .location = 1u,
382 .binding = 0u,
383 .format = VK_FORMAT_R32G32B32_SFLOAT,
384 .offset = offsetof( Vertex, normal ),
385 };
386
387 VkVertexInputAttributeDescription vertexInputAttributeTextureCoordinate{
388 .location = 2u,
389 .binding = 0u,
390 .format = VK_FORMAT_R32G32_SFLOAT,
391 .offset = offsetof( Vertex, textureCoordinate )
392 };
393
394 // Create graphics pipelines from static PipelineNames configuration
395 {
396 TRACY_ZONE_SCOPED_NAMED( "Create Graphics Pipelines" );
397
398 // List of all pipeline types to create
399 const std::vector<PipelineNames> pipelinesToCreate = {
404 L0_SHADER,
405 L1_SHADER,
406 L2_SHADER,
409 };
410
411 graphicsPipelines.reserve( pipelinesToCreate.size() );
412
413 // Create mesh shader specialization with actual screen resolution from headset
414 VkExtent2D eyeResolution = headset->getEyeResolution(0);
415 MeshShaderSpecializationData meshSpecData(
416 static_cast<float>(eyeResolution.width),
417 static_cast<float>(eyeResolution.height)
418 );
419 PLOGI << "Mesh shader screen resolution: " << eyeResolution.width << "x" << eyeResolution.height;
420
421 for ( PipelineNames pipelineName : pipelinesToCreate )
422 {
423 PipelineConfig config = getPipelineConfig( pipelineName );
424 TRACY_ZONE_SCOPED_NAMED_D( "Create Pipeline " << static_cast<int>(pipelineName) );
425
426 GraphicsPipeline * pipeline = nullptr;
427
428#if !defined(HEADLESS) && !defined(COMPUTE_DEBUG)
429 pipeline = new GraphicsPipeline(
430 context->getVkDevice(),
431 context->getMultisampleCount(),
433 VK_NULL_HANDLE, // Use dynamic rendering (vkCmdBeginRendering)
434 config.meshShaderPath,
435 config.fragmentShaderPath,
436 {},
437 { vertexInputAttributePosition,
438 vertexInputAttributeNormal,
439 vertexInputAttributeTextureCoordinate },
440 config.pipelineData,
441 &meshSpecData // Pass screen resolution for small triangle culling
442 );
443#else
444 pipeline = new GraphicsPipeline(
445 context->getVkDevice(),
446 context->getMultisampleCount(),
448 headset->getRenderPass(),
449 Path::engineShaders() / "Output/Placeholder.vert.spv",
450 Path::engineShaders() / "Output/Placeholder.frag.spv"
451 );
452#endif
453
454 graphicsPipelines.push_back( pipeline );
455 pipelinesByName[pipelineName] = pipeline;
456 }
457
458 PLOGI << "Created " << graphicsPipelines.size() << " graphics pipelines";
459 }
460
461 // Create depth-only pipeline variants for Z-prepass (Pass 1)
462 // These have colorAttachmentCount = 0, enabling "Double-Speed Z" on NVIDIA/AMD
463 {
464 TRACY_ZONE_SCOPED_NAMED( "Create Depth-Only Pipelines" );
465
467
468 // Depth-only fragment shader path
469 const auto depthOnlyFragPath = Path::engineShaders() / "Output/depth_only.frag.spv";
470
471 VkExtent2D eyeResolution = headset->getEyeResolution(0);
472 MeshShaderSpecializationData meshSpecData(
473 static_cast<float>(eyeResolution.width),
474 static_cast<float>(eyeResolution.height)
475 );
476
477 // Create depth-only variants for each pipeline type
478 const std::vector<PipelineNames> pipelinesToCreate = {
483 L0_SHADER,
484 L1_SHADER,
485 L2_SHADER,
488 };
489
490 for ( PipelineNames pipelineName : pipelinesToCreate )
491 {
492 PipelineConfig config = getPipelineConfig( pipelineName );
493
494#if !defined(HEADLESS) && !defined(COMPUTE_DEBUG)
495 GraphicsPipeline * depthOnlyPipeline = new GraphicsPipeline(
496 context->getVkDevice(),
497 context->getMultisampleCount(),
499 VK_NULL_HANDLE, // Use dynamic rendering
500 config.meshShaderPath,
501 depthOnlyFragPath.string(), // Use depth-only fragment shader
502 {},
503 {},
504 config.pipelineData,
505 &meshSpecData,
506 true // depthOnly = true
507 );
508 depthOnlyGraphicsPipelines.push_back( depthOnlyPipeline );
509#endif
510 }
511
512 PLOGI << "Created " << depthOnlyGraphicsPipelines.size() << " depth-only pipelines for Z-prepass";
513 }
514
515 // Build pipeline index map for fast lookup
516 {
517 TRACY_ZONE_SCOPED_NAMED( "Build Pipeline Index Map" );
518 pipelineIndices.clear();
519 for ( uint32_t i = 0; i < graphicsPipelines.size(); ++i )
520 {
522 }
523 }
524 }
525
526 // setup tracy with calibrated timestamps
527 {
528#ifdef ENABLE_TRACY
529 TRACY_ZONE_SCOPED_NAMED( "Setup Tracy Profiling" );
530 // Get function pointers for calibrated timestamps
531 for ( size_t i = 0; i < renderProcesses.size(); i++ )
532 {
533 {
534 TRACY_ZONE_SCOPED_NAMED( "Create Tracy Context" );
535 VkCommandBuffer commandBuffer = renderProcesses[i]->getGraphicsCommandBuffer();
536
537 TracyVkCtx tracyContext = TracyVkContext(
538 context->getVkPhysicalDevice(),
539 context->getVkDevice(),
540 context->getGraphicsQueue(),
541 commandBuffer
542 );
543
544 std::stringstream contextNameStream{};
545 contextNameStream << "Vulkan Context " << i;
546 std::string contextName = contextNameStream.str();
547
548 TracyVkContextName( tracyContext, contextName.c_str(), contextName.size() );
549
550 tracyVkContext.push_back( tracyContext );
551 }
552 }
553#endif
554 }
555 }
556
558 {
559 TRACY_ZONE_SCOPED_NAMED( "Initialize GPU Buffers" );
560 PLOGI << "Populating staging buffers with initial data for all frames";
561
562 // Skip initialization if engine is not available (e.g., during testing)
563 if (engine == nullptr) {
564 PLOGD << "Engine is null, skipping GPU buffer initialization (test mode)";
565 return;
566 }
567
568 for ( RenderProcess * renderProcess : renderProcesses )
569 {
570 {
571 TRACY_ZONE_SCOPED_NAMED( "Update Render Process Data" );
572 renderProcess->updateSSBO( this );
573 renderProcess->updateMeshletDataBuffer();
574 renderProcess->updateMeshRenderingData();
575
576 renderProcess->uploadSSBOStagingBuffer();
577 renderProcess->uploadMeshRenderingStagingData();
578 }
579 }
580 }
581
582 void Renderer::prepareTransferSubmission( const uint32_t frameIndex ) const
583 {
584 TRACY_ZONE_SCOPED_NAMED( "Prepare Transfer Submission" );
585 std::vector<VkBufferMemoryBarrier2> barriers{};
586
587 // Get the transfer command buffer for the SPECIFIED frame
588 VkCommandBuffer transferCommandBuffer = renderProcesses[frameIndex]->getTransferCommandBuffer();
589
590 // Reset and begin recording
591 {
592 TRACY_ZONE_SCOPED_NAMED( "Record transfer command buffer n+1" );
593 PLOG_RETURN_FN_VK( vkResetCommandBuffer( transferCommandBuffer, 0u ) )
594
595 VkCommandBufferBeginInfo commandBufferBeginInfo{ VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO };
596 PLOG_RETURN_FN_VK( vkBeginCommandBuffer( transferCommandBuffer, &commandBufferBeginInfo ) )
597 }
598
599 // Record the copy commands
600 {
602 getCurrentTracyVkContext(), transferCommandBuffer, "Transfer Buffer Copies", Colors::Green
603 )
604 for ( const auto & copyObject : syncCopyObjects )
605 {
606 VkBufferCopy copyRegion{};
607 copyRegion.srcOffset = copyObject.copyObject.srcOffset;
608 copyRegion.dstOffset = copyObject.copyObject.dstOffset;
609 copyRegion.size = copyObject.copyObject.size;
610 if (copyObject.copyObject.size == 0)
611 {
612 PLOGW << "Trying to copy copy object of size 0. Skipping copy command";
613 continue;
614 }
615 vkCmdCopyBuffer(
616 transferCommandBuffer,
617 copyObject.copyObject.sourceBuffer,
618 copyObject.copyObject.destinationBuffer,
619 1,
620 &copyRegion
621 );
622 }
623
624 // NEW: Memory barrier after copies to ensure writes are complete before release.
625 VkMemoryBarrier2 copyBarrier{ .sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER_2,
626 .srcStageMask = VK_PIPELINE_STAGE_2_TRANSFER_BIT,
627 .srcAccessMask = VK_ACCESS_2_TRANSFER_WRITE_BIT,
628 .dstStageMask = VK_PIPELINE_STAGE_2_BOTTOM_OF_PIPE_BIT,
629 .dstAccessMask = VK_ACCESS_2_NONE };
630
631 VkDependencyInfo copyDep{
632 .sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO,
633 .memoryBarrierCount = 1,
634 .pMemoryBarriers = &copyBarrier,
635 };
636 vkCmdPipelineBarrier2( transferCommandBuffer, &copyDep );
637
638 // NEW: Record release barriers here (moved from render()).
639 // For each syncCopyObject, create a release barrier. Assume syncCopyObjects already have the
640 // needed data.
641 std::vector<VkBufferMemoryBarrier2> releaseBarriers;
642 for ( const auto & obj : syncCopyObjects )
643 {
644 VkBufferMemoryBarrier2 rel = {
645 .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER_2,
646 .srcStageMask = VK_PIPELINE_STAGE_2_TRANSFER_BIT,
647 .srcAccessMask = VK_ACCESS_2_TRANSFER_WRITE_BIT,
648 .dstStageMask = VK_PIPELINE_STAGE_2_NONE, // Not needed for release
649 .dstAccessMask = VK_ACCESS_2_NONE,
650 .srcQueueFamilyIndex = context->getVkTransferQueueFamilyIndex(),
651 .dstQueueFamilyIndex = context->getVkGraphicsQueueFamilyIndex(),
652 .buffer = obj.copyObject.destinationBuffer,
653 .offset = 0,
654 .size = VK_WHOLE_SIZE,
655 };
656 releaseBarriers.push_back( rel );
657 }
658 if ( !releaseBarriers.empty() )
659 {
660 VkDependencyInfo dep{
661 .sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO,
662 .bufferMemoryBarrierCount = static_cast<uint32_t>( releaseBarriers.size() ),
663 .pBufferMemoryBarriers = releaseBarriers.data(),
664 };
665 vkCmdPipelineBarrier2( transferCommandBuffer, &dep );
666 }
667
668 if ( !barriers.empty() )
669 {
670 VkDependencyInfo dependencyInfo{
671 .sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO,
672 .bufferMemoryBarrierCount = static_cast<uint32_t>( barriers.size() ),
673 .pBufferMemoryBarriers = barriers.data(),
674 };
675 vkCmdPipelineBarrier2( transferCommandBuffer, &dependencyInfo );
676 }
677
678 if ( !syncCopyObjects.empty() )
679 {
680 VkMemoryBarrier2 memoryBarrier{ .sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER_2,
681 .srcStageMask = VK_PIPELINE_STAGE_2_TRANSFER_BIT,
682 .srcAccessMask = VK_ACCESS_2_TRANSFER_WRITE_BIT,
683 .dstStageMask = VK_PIPELINE_STAGE_2_NONE,
684 .dstAccessMask = 0 };
685
686 VkDependencyInfo dependencyInfo{
687 .sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO,
688 .memoryBarrierCount = 1,
689 .pMemoryBarriers = &memoryBarrier,
690 };
691 vkCmdPipelineBarrier2( transferCommandBuffer, &dependencyInfo );
692 }
693
694 vkEndCommandBuffer( transferCommandBuffer );
695 }
696 }
697
699 {
700 TRACY_ZONE_SCOPED_NAMED( "Renderer::updateViewMatrix" );
701 // Always update view-projection matrices for rendering (follows camera)
703
704 // Only update culling data (frustum planes, Hi-Z VP) if not frozen
705 // When frozen, culling uses the viewpoint from when freeze was enabled
706 if ( !freezeCulling_ )
707 {
710 }
711 }
712
713 void Renderer::setFreezeCulling( bool freeze )
714 {
715 freezeCulling_ = freeze;
716 PLOGI << "Culling freeze " << ( freeze ? "enabled" : "disabled" );
717 }
718
719 void Renderer::uploadFrameData( float time )
720 {
723 }
724
725 void Renderer::recordTransfer( float time )
726 {
728
729 RenderProcess * currentRenderProcess = getCurrentRenderProcess();
730 const VkCommandBuffer transferCommandBuffer = currentRenderProcess->getTransferCommandBuffer();
731
732 // Begin transfer command buffer recording
733 {
734 TRACY_ZONE_SCOPED_NAMED( "Begin transfer command buffer" );
735 PLOG_RETURN_FN_VK( vkResetCommandBuffer( transferCommandBuffer, 0u ) )
736
737 constexpr VkCommandBufferBeginInfo commandBufferBeginInfo{
738 .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO
739 };
740 PLOG_RETURN_FN_VK( vkBeginCommandBuffer( transferCommandBuffer, &commandBufferBeginInfo ) )
741 }
742
743 // Record buffer copy commands
744 {
745 TRACY_ZONE_SCOPED_NAMED( "Buffer transfer processing" );
747 getCurrentTracyVkContext(), transferCommandBuffer, "Buffer Copy Operations", Colors::Green
748 );
749
750 for ( const auto & copyObject : syncCopyObjects )
751 {
752 VkBufferCopy copyRegion{};
753 copyRegion.srcOffset = copyObject.copyObject.srcOffset;
754 copyRegion.dstOffset = copyObject.copyObject.dstOffset;
755 copyRegion.size = copyObject.copyObject.size;
756 if (copyRegion.size == 0)
757 {
758 PLOGW << "Trying to upload a copy buffer with size 0. Skipping copy command";
759 continue;
760 }
761 vkCmdCopyBuffer(
762 transferCommandBuffer,
763 copyObject.copyObject.sourceBuffer,
764 copyObject.copyObject.destinationBuffer,
765 1,
766 &copyRegion
767 );
768 }
769
770 // Memory barrier after copies to ensure writes are complete
771 if ( !syncCopyObjects.empty() )
772 {
773 VkMemoryBarrier2 copyBarrier{ .sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER_2,
774 .srcStageMask = VK_PIPELINE_STAGE_2_TRANSFER_BIT,
775 .srcAccessMask = VK_ACCESS_2_TRANSFER_WRITE_BIT,
776 .dstStageMask = VK_PIPELINE_STAGE_2_BOTTOM_OF_PIPE_BIT,
777 .dstAccessMask = VK_ACCESS_2_NONE };
778
779 VkDependencyInfo copyDep{
780 .sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO,
781 .memoryBarrierCount = 1,
782 .pMemoryBarriers = &copyBarrier,
783 };
784 vkCmdPipelineBarrier2( transferCommandBuffer, &copyDep );
785
786 // Queue family ownership transfer: Release barriers from Transfer to Graphics queue
787 // The acquire barriers are recorded in renderToXr() on the graphics command buffer
788 std::vector<VkBufferMemoryBarrier2> releaseBarriers;
789 releaseBarriers.reserve( syncCopyObjects.size() );
790 for ( const auto & obj : syncCopyObjects )
791 {
792 VkBufferMemoryBarrier2 rel = {
793 .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER_2,
794 .srcStageMask = VK_PIPELINE_STAGE_2_TRANSFER_BIT,
795 .srcAccessMask = VK_ACCESS_2_TRANSFER_WRITE_BIT,
796 .dstStageMask = VK_PIPELINE_STAGE_2_NONE,
797 .dstAccessMask = VK_ACCESS_2_NONE,
798 .srcQueueFamilyIndex = context->getVkTransferQueueFamilyIndex(),
799 .dstQueueFamilyIndex = context->getVkGraphicsQueueFamilyIndex(),
800 .buffer = obj.copyObject.destinationBuffer,
801 .offset = 0,
802 .size = VK_WHOLE_SIZE,
803 };
804 releaseBarriers.push_back( rel );
805 }
806
807 VkDependencyInfo releaseDep{
808 .sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO,
809 .bufferMemoryBarrierCount = static_cast<uint32_t>( releaseBarriers.size() ),
810 .pBufferMemoryBarriers = releaseBarriers.data(),
811 };
812 vkCmdPipelineBarrier2( transferCommandBuffer, &releaseDep );
813 }
814 }
815
817 {
818 TRACY_ZONE_SCOPED_NAMED( "Texture transfer processing" );
820 getCurrentTracyVkContext(), transferCommandBuffer, "Texture Upload Commands", Colors::Orange
821 ) // Orange
822
823 for ( Texture * texture : renderingDataManager->getTexturesToUpload() )
824 {
825 TRACY_ZONE_SCOPED_NAMED("Creating texture upload command");
826 PLOGI << "Recording texture transfer command for texture: " << texture->getDescriptorIndex();
827 texture->createDataUploadCommand( transferCommandBuffer, this );
828 }
829
830 if ( !renderingDataManager->getTexturesToUpload().empty() )
831 {
832 TRACY_ZONE_SCOPED_NAMED( "Adding textures to upload" );
833 for ( RenderProcess * renderProcessPtr : renderProcesses )
834 {
835 for ( Texture * texture : renderingDataManager->getTexturesToUpload() )
836 {
837 renderProcessPtr->addTexturesToUpload( { texture }, texture->getDescriptorIndex() );
838 }
839 }
840 }
841
842 // Textures will be cleared after graphics queue processing completes
843 }
844
845 // End transfer command buffer recording
846 PLOG_RETURN_FN_VK( vkEndCommandBuffer( transferCommandBuffer ) )
847 }
848
849 void Renderer::renderToXr( size_t swapChainImageIndex, float time )
850 {
851 PLOGV << "Swap chain image index: " << swapChainImageIndex << " currentFrame: " << currentFrame;
853
854 // updateCpuRenderResources(time); // Moved to recordTransfer
855
856 RenderProcess * currentRenderProcess = renderProcesses.at( currentFrame );
857 std::vector<VkBufferMemoryBarrier2> acquireBarriers{};
858 std::vector<BufferCopyObject> copyObjects{};
859
860 for ( const auto & [releaseBarrier, acquireBarrier, copyObject] : syncCopyObjects )
861 {
862 acquireBarriers.push_back( acquireBarrier );
863 copyObjects.push_back( copyObject );
864 }
865
867
869 {
870 TRACY_ZONE_SCOPED_NAMED( "Acquire transfer resources" );
871 if ( !acquireBarriers.empty() )
872 {
873 VkDependencyInfo acquireDependency{
874 .sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO,
875 .bufferMemoryBarrierCount = static_cast<uint32_t>( acquireBarriers.size() ),
876 .pBufferMemoryBarriers = acquireBarriers.data(),
877 };
878
879 vkCmdPipelineBarrier2( currentRenderProcess->getGraphicsCommandBuffer(), &acquireDependency );
880 }
881 }
882
883 // Process texture uploads on graphics queue (after transfer data is available)
884 currentRenderProcess->processTextureUpload();
885
886 // Transition textures from transfer layout to shader-readable layout
887 if ( !renderingDataManager->getTexturesToUpload().empty() )
888 {
889 TRACY_ZONE_SCOPED_NAMED( "Texture layout transitions" );
892 currentRenderProcess->getGraphicsCommandBuffer(),
893 "Texture Layout Transitions",
895 ) // Blue
896
897 for ( Texture * texture : renderingDataManager->getTexturesToUpload() )
898 {
899 VkImageMemoryBarrier2 imageBarrier{ .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER_2,
900 .pNext = nullptr,
901 .srcStageMask = VK_PIPELINE_STAGE_2_TRANSFER_BIT,
902 .srcAccessMask = VK_ACCESS_2_TRANSFER_WRITE_BIT,
903 .dstStageMask = VK_PIPELINE_STAGE_2_FRAGMENT_SHADER_BIT,
904 .dstAccessMask = VK_ACCESS_2_SHADER_READ_BIT,
905 .oldLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
906 .newLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL,
907 .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
908 .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
909 .image = texture->getVkImage(),
910 .subresourceRange = { .aspectMask =
911 VK_IMAGE_ASPECT_COLOR_BIT,
912 .baseMipLevel = 0,
913 .levelCount = 1,
914 .baseArrayLayer = 0,
915 .layerCount = 1 } };
916
917 VkDependencyInfo depInfo{
918 .sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO,
919 .pNext = nullptr,
920 .imageMemoryBarrierCount = 1u,
921 .pImageMemoryBarriers = &imageBarrier,
922 };
923 vkCmdPipelineBarrier2( currentRenderProcess->getGraphicsCommandBuffer(), &depInfo );
924 }
925
926 // Now we can clear the textures to upload since processing is complete
927 renderingDataManager->clearTexturesToUpload();
928 }
929
932 currentRenderProcess->getGraphicsCommandBuffer(),
933 "Draw Frame",
935 )
936
937 // Reset pipeline counters and bin offsets buffers before compute passes
938 {
939 TRACY_VK_ZONE_C( getCurrentTracyVkContext(), currentRenderProcess->getGraphicsCommandBuffer(), "Reset Pipeline Buffers", Colors::Red )
940 // Zero pipeline counters (used as atomic counters in binning allocator)
941 vkCmdFillBuffer(
943 currentRenderProcess->getPipelineCountersBuffer().getBuffer(),
944 0,
945 VK_WHOLE_SIZE,
946 0
947 );
948 // Zero bin offsets (pipeline 0 starts at offset 0, others need prefix sum)
949 // TODO: Add proper prefix sum computation for multi-pipeline support
950 vkCmdFillBuffer(
952 currentRenderProcess->getPipelineBinOffsetsBuffer().getBuffer(),
953 0,
954 VK_WHOLE_SIZE,
955 0
956 );
957 // Zero primitive culling survivor counter (accumulates via atomicAdd in PrimitiveCulling)
958 vkCmdFillBuffer(
960 counterBuffer->getBuffer(),
961 0,
962 sizeof(uint32_t),
963 0
964 );
965 // Zero culling failed counter (two-pass occlusion: stores objects that failed Pass 1 Hi-Z)
966 vkCmdFillBuffer(
968 currentRenderProcess->getCullingFailedCounterBuffer().getBuffer(),
969 0,
970 sizeof(uint32_t),
971 0
972 );
973 // Zero VS indirect draw count (vertex shader path for single-meshlet geometry - legacy)
974 vkCmdFillBuffer(
976 currentRenderProcess->getVSIndirectDrawCountBuffer().getBuffer(),
977 0,
978 sizeof(uint32_t),
979 0
980 );
981 // Zero VS visible count (VS instanced drawing pipeline - PrimitiveCulling writes this)
982 vkCmdFillBuffer(
984 currentRenderProcess->getVSVisibleCountBuffer().getBuffer(),
985 0,
986 sizeof(uint32_t),
987 0
988 );
989 // Zero LOD cluster survivor count (PrimitiveCulling writes this via atomicAdd for LOD path)
990 // Without this reset, garbage values cause MeshletBinningAllocator/Unpacking to read invalid data
991 vkCmdFillBuffer(
993 currentRenderProcess->getLodClusterSurvivorCountBuffer().getBuffer(),
994 0,
995 sizeof(uint32_t),
996 0
997 );
998 // Barrier to ensure fills complete before compute passes read/write
999 VkMemoryBarrier2 fillBarrier{
1000 .sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER_2,
1001 .srcStageMask = VK_PIPELINE_STAGE_2_TRANSFER_BIT,
1002 .srcAccessMask = VK_ACCESS_2_TRANSFER_WRITE_BIT,
1003 .dstStageMask = VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT,
1004 .dstAccessMask = VK_ACCESS_2_SHADER_READ_BIT | VK_ACCESS_2_SHADER_WRITE_BIT
1005 };
1006 VkDependencyInfo fillDep{
1007 .sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO,
1008 .memoryBarrierCount = 1,
1009 .pMemoryBarriers = &fillBarrier
1010 };
1011 vkCmdPipelineBarrier2( getCurrentRenderingCommandBuffer(), &fillDep );
1012 }
1013
1014 // Update culling descriptor set with PREVIOUS frame's Hi-Z pyramid
1015 // The Hi-Z was generated at the end of the previous frame, so culling reads from it
1016 {
1017 const uint32_t previousFrame = (currentFrame + MAX_FRAMES_IN_FLIGHT - 1) % MAX_FRAMES_IN_FLIGHT;
1018 RenderProcess* previousRenderProcess = renderProcesses[previousFrame];
1019 currentRenderProcess->updateComputeObjectCullingDescriptorSets(
1020 previousRenderProcess->getHiZPyramidFullView()
1021 );
1022 }
1023
1024 // Update LOD configuration with camera and screen info for cluster-based LOD selection
1025 {
1026 const uint32_t eyeIndex = currentRenderProcess->getEyeIndex();
1027 const glm::mat4 viewMatrix = headset->getEyeViewMatrix(eyeIndex);
1028 const glm::mat4 projMatrix = headset->getEyeProjectionMatrix(eyeIndex);
1029 const VkExtent2D resolution = headset->getEyeResolution(eyeIndex);
1030
1031 // Extract camera position from inverse view matrix (column 3 of inverse = world position)
1032 const glm::mat4 invView = glm::inverse(viewMatrix);
1033 const glm::vec3 cameraPosition = glm::vec3(invView[3]);
1034
1035 // Use default near plane for VR (0.01m is common)
1036 constexpr float nearPlane = 0.01f;
1037
1038 // Update LOD config UBO
1039 currentRenderProcess->updateLodConfig(
1040 cameraPosition,
1041 projMatrix,
1042 nearPlane,
1043 static_cast<float>(resolution.width),
1044 static_cast<float>(resolution.height),
1045 1.0f, // errorThresholdPixels - 1 pixel error target
1046 true // ditherEnabled
1047 );
1048
1049 // Upload to GPU (staged buffer)
1050 currentRenderProcess->uploadLodConfigBuffer();
1051
1052 // Log LOD config once at startup
1053 VS_LOG_ONCE(LogLOD, Warning, "[DIAG] LOD Config: cameraPos=({:.2f},{:.2f},{:.2f}) proj11={:.4f} nearPlane={} screenSize={}x{} errorThreshold=1.0",
1054 cameraPosition.x, cameraPosition.y, cameraPosition.z,
1055 projMatrix[1][1], nearPlane, resolution.width, resolution.height);
1056 }
1057
1058 // Stage 0: Primitive Culling - PASS 1
1059 // Uses previous frame's Hi-Z, may have false negatives which Pass 2 will catch
1060 {
1061 TRACY_VK_ZONE_C( getCurrentTracyVkContext(), currentRenderProcess->getGraphicsCommandBuffer(), "Primitive Culling Pass 1", Colors::Red )
1062 const uint32_t threadCount = getObjectCullingComputePass().getThreadCount();
1063 const uint32_t primitiveCount = renderingDataManager->getPrimitiveCount();
1064
1065 // Push constants: {primitiveCount, cullingPass}
1066 struct CullingPushConstants {
1067 uint32_t primitiveCount;
1068 uint32_t cullingPass; // 0 = Pass 1
1069 } pushData = { primitiveCount, 0 };
1070
1073 PLOGI << "Starting Pass 1 culling with " << primitiveCount << " primitives";
1074 VkPushConstantsInfo pushConstantsInfo = getObjectCullingComputePass().createPushConstantsInfo( sizeof( CullingPushConstants ), &pushData );
1075 vkCmdPushConstants2( getCurrentRenderingCommandBuffer(), &pushConstantsInfo );
1076 if ( primitiveCount > 0 )
1077 {
1078 vkCmdDispatch( getCurrentRenderingCommandBuffer(), ( primitiveCount + threadCount - 1 ) / threadCount, 1, 1 );
1079 }
1080 }
1081 {
1082 PLOGI << "Barrier: Primitive Culling -> Binning Allocator";
1083 TRACY_VK_ZONE_C( getCurrentTracyVkContext(), currentRenderProcess->getGraphicsCommandBuffer(), "Primitive Culling -> Binning Allocator", Colors::Red )
1084 Vulkan::BarrierBundle bundle;
1085 VkDependencyInfo dependencyInfo = getObjectCullingToMeshletUnpackingDispatchBarriers(bundle);
1086 vkCmdPipelineBarrier2( getCurrentRenderingCommandBuffer(), &dependencyInfo );
1087 }
1088
1089 // Stage 1: Binning Allocator
1090 // BinningAllocator processes both regular primitive survivors AND LOD cluster survivors
1091 // Total work = survivor_count + lod_cluster_survivor_count (computed in shader)
1092 // Dispatch bound: primitiveCount (max regular survivors) + clusterCount (max LOD cluster survivors)
1093 const uint32_t primitiveCount = renderingDataManager->getPrimitiveCount();
1094 const uint32_t clusterCount = renderingDataManager->getClusterCount();
1095 const uint32_t maxBinningWorkItems = primitiveCount + clusterCount;
1096
1097 // LOD diagnostic logging (every 60 frames to avoid spam)
1098 static uint32_t lodDiagFrameCounter = 0;
1099 if (++lodDiagFrameCounter >= 60) {
1100 lodDiagFrameCounter = 0;
1101 VS_LOG(LogLOD, Warning, "[DIAG] Renderer: primitiveCount={} clusterCount={} hasLodData={} clusterGroupCount={}",
1102 primitiveCount, clusterCount, renderingDataManager->hasLodData(), renderingDataManager->getClusterGroupCount());
1103 }
1104
1105 {
1106 TRACY_VK_ZONE_C( getCurrentTracyVkContext(), currentRenderProcess->getGraphicsCommandBuffer(), "Binning Allocator", Colors::Red )
1107 const uint32_t threadCount = getBinningAllocatorComputePass().getThreadCount();
1108
1111 PLOGI << "Starting binning allocator with max " << primitiveCount << " primitives + " << clusterCount << " LOD clusters";
1112 if ( maxBinningWorkItems > 0 )
1113 {
1114 vkCmdDispatch( getCurrentRenderingCommandBuffer(), ( maxBinningWorkItems + threadCount - 1 ) / threadCount, 1, 1 );
1115 }
1116 }
1117 {
1118 PLOGI << "Barrier: Binning Allocator -> Meshlet Unpacking Dispatcher";
1119 TRACY_VK_ZONE_C( getCurrentTracyVkContext(), currentRenderProcess->getGraphicsCommandBuffer(), "Binning Allocator -> Meshlet Unpacking Dispatcher", Colors::Red )
1120 // Barrier to ensure binning allocator writes are visible to dispatcher
1121 VkMemoryBarrier2 binningBarrier{
1122 .sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER_2,
1123 .srcStageMask = VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT,
1124 .srcAccessMask = VK_ACCESS_2_SHADER_WRITE_BIT,
1125 .dstStageMask = VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT,
1126 .dstAccessMask = VK_ACCESS_2_SHADER_READ_BIT
1127 };
1128 VkDependencyInfo binningDep{
1129 .sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO,
1130 .memoryBarrierCount = 1,
1131 .pMemoryBarriers = &binningBarrier
1132 };
1133 vkCmdPipelineBarrier2( getCurrentRenderingCommandBuffer(), &binningDep );
1134 }
1135 {
1136 PLOGI << "Meshlet Unpacking Dispatcher";
1137 TRACY_VK_ZONE_C( getCurrentTracyVkContext(), currentRenderProcess->getGraphicsCommandBuffer(), "Meshlet Unpacking Dispatcher", Colors::Red)
1139 getMeshletUnpackingDispatchComputePass().bindDescriptorSets(currentRenderProcess->getGraphicsCommandBuffer(), currentRenderProcess->getEyeIndex());
1140 vkCmdDispatch( getCurrentRenderingCommandBuffer(), 1, 1, 1 );
1141 }
1142 {
1143 PLOGI << "Barrier: Meshlet Unpacking Dispatcher -> Meshlet Unpacking";
1144 TRACY_VK_ZONE_C( getCurrentTracyVkContext(), currentRenderProcess->getGraphicsCommandBuffer(), "Meshlet Unpacking Dispatcher -> Meshlet Unpacking", Colors::Red)
1145 Vulkan::BarrierBundle bundle;
1146 VkDependencyInfo dependencyInfo = getMeshletUnpackingDispatchToMeshletUnpackingBarriers(bundle);
1147 vkCmdPipelineBarrier2( getCurrentRenderingCommandBuffer(), &dependencyInfo );
1148 }
1149
1150 // Stage 2: Meshlet Unpacking (V2)
1151 // MeshletUnpacking reads survivor_count directly from the counter buffer (GPU-driven)
1152 {
1153 PLOGI << "Meshlet Unpacking (V2)";
1154 TRACY_VK_ZONE_C( getCurrentTracyVkContext(), currentRenderProcess->getGraphicsCommandBuffer(), "Meshlet Unpacking", Colors::Red)
1157 vkCmdDispatchIndirect( getCurrentRenderingCommandBuffer(), getDispatchBuffer().getBuffer(), 0 );
1158 }
1159 // NOTE: MeshletCulling stage removed - BinningAllocator already computes per-pipeline counts
1160 // in pipelineCountersBuffer, which PrepareDraw now reads directly
1161 {
1162 TRACY_VK_ZONE_C( getCurrentTracyVkContext(), currentRenderProcess->getGraphicsCommandBuffer(), "Meshlet Unpacking -> Prepare Draw", Colors::Red)
1163 // Barrier: MeshletUnpacking writes to binnedVisibleMeshletIndexBuffer
1164 // PrepareDraw reads from pipelineCountersBuffer (written by BinningAllocator)
1165 // Both are compute shader operations
1166 VkMemoryBarrier2 memBarrier{
1167 .sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER_2,
1168 .srcStageMask = VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT,
1169 .srcAccessMask = VK_ACCESS_2_SHADER_WRITE_BIT,
1170 .dstStageMask = VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT,
1171 .dstAccessMask = VK_ACCESS_2_SHADER_READ_BIT
1172 };
1173 VkDependencyInfo depInfo{
1174 .sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO,
1175 .memoryBarrierCount = 1,
1176 .pMemoryBarriers = &memBarrier
1177 };
1178 vkCmdPipelineBarrier2( getCurrentRenderingCommandBuffer(), &depInfo );
1179 }
1180 {
1181 TRACY_VK_ZONE_C( getCurrentTracyVkContext(), currentRenderProcess->getGraphicsCommandBuffer(), "Prepare Draw", Colors::Red)
1184 vkCmdDispatch( getCurrentRenderingCommandBuffer(), 1, 1, 1 );
1185 }
1186
1187 // VS Instanced Drawing Pipeline: convert ~5k VS draws to ~1 draw per geometry type
1188 // This reads vs_visible_instances from PrimitiveCulling and outputs instanced draw commands
1190
1191#ifdef COMPUTE_DEBUG
1192 // Debug logging for compute pipeline results
1193 {
1194 const uint32_t objectCount = engine->getRenderableSceneObjectCount();
1195 PLOGI << "[COMPUTE_DEBUG] Compute pipeline completed for " << objectCount << " objects";
1196 PLOGI << "[COMPUTE_DEBUG] To read back buffer values, wait for GPU completion and use ComputeDebug::readbackBuffer()";
1197 }
1198#endif
1199
1200 // Graphics barriers and render pass - skip in COMPUTE_DEBUG mode
1201#if !defined(HEADLESS) && !defined(COMPUTE_DEBUG)
1202 // Add barriers after compute pass (before image barriers/render pass)
1203 {
1206 currentRenderProcess->getGraphicsCommandBuffer(),
1207 "Compute-Graphics Barriers",
1209 )
1210 // Barrier 1: Culling compute write to mesh shader read (visible meshlet buffer)
1211 VkBufferMemoryBarrier2 visibleBarrier{ VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER_2 };
1212 visibleBarrier.srcStageMask = VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT;
1213 visibleBarrier.srcAccessMask = VK_ACCESS_2_SHADER_STORAGE_WRITE_BIT;
1214 visibleBarrier.dstStageMask = VK_PIPELINE_STAGE_2_MESH_SHADER_BIT_EXT;
1215 visibleBarrier.dstAccessMask = VK_ACCESS_2_SHADER_STORAGE_READ_BIT;
1216 visibleBarrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
1217 visibleBarrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
1218 visibleBarrier.buffer =
1220 visibleBarrier.offset = 0;
1221 visibleBarrier.size = VK_WHOLE_SIZE;
1222
1223 // Barrier 2: Prepare draw compute write to indirect draw read (indirect buffer)
1224 VkBufferMemoryBarrier2 indirectBarrier{ VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER_2 };
1225 indirectBarrier.srcStageMask = VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT;
1226 indirectBarrier.srcAccessMask = VK_ACCESS_2_SHADER_STORAGE_WRITE_BIT;
1227 indirectBarrier.dstStageMask = VK_PIPELINE_STAGE_2_DRAW_INDIRECT_BIT;
1228 indirectBarrier.dstAccessMask = VK_ACCESS_2_INDIRECT_COMMAND_READ_BIT;
1229 indirectBarrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
1230 indirectBarrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
1231 indirectBarrier.buffer = getCurrentRenderProcess()->getIndirectDrawBuffer().getBuffer();
1232 indirectBarrier.offset = 0;
1233 indirectBarrier.size = VK_WHOLE_SIZE;
1234
1235 // Barrier 3: VS indirect draw buffer (single-meshlet geometry path)
1236 VkBufferMemoryBarrier2 vsIndirectBarrier{ VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER_2 };
1237 vsIndirectBarrier.srcStageMask = VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT;
1238 vsIndirectBarrier.srcAccessMask = VK_ACCESS_2_SHADER_STORAGE_WRITE_BIT;
1239 vsIndirectBarrier.dstStageMask = VK_PIPELINE_STAGE_2_DRAW_INDIRECT_BIT;
1240 vsIndirectBarrier.dstAccessMask = VK_ACCESS_2_INDIRECT_COMMAND_READ_BIT;
1241 vsIndirectBarrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
1242 vsIndirectBarrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
1243 vsIndirectBarrier.buffer = getCurrentRenderProcess()->getVSIndirectDrawBuffer().getBuffer();
1244 vsIndirectBarrier.offset = 0;
1245 vsIndirectBarrier.size = VK_WHOLE_SIZE;
1246
1247 // Barrier 4: VS indirect draw count buffer (single-meshlet geometry path)
1248 VkBufferMemoryBarrier2 vsCountBarrier{ VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER_2 };
1249 vsCountBarrier.srcStageMask = VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT;
1250 vsCountBarrier.srcAccessMask = VK_ACCESS_2_SHADER_STORAGE_WRITE_BIT;
1251 vsCountBarrier.dstStageMask = VK_PIPELINE_STAGE_2_DRAW_INDIRECT_BIT;
1252 vsCountBarrier.dstAccessMask = VK_ACCESS_2_INDIRECT_COMMAND_READ_BIT;
1253 vsCountBarrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
1254 vsCountBarrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
1255 vsCountBarrier.buffer = getCurrentRenderProcess()->getVSIndirectDrawCountBuffer().getBuffer();
1256 vsCountBarrier.offset = 0;
1257 vsCountBarrier.size = VK_WHOLE_SIZE;
1258
1259 // Barrier 5: VS instanced draw commands buffer (new instanced drawing path)
1260 VkBufferMemoryBarrier2 vsInstancedDrawBarrier{ VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER_2 };
1261 vsInstancedDrawBarrier.srcStageMask = VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT;
1262 vsInstancedDrawBarrier.srcAccessMask = VK_ACCESS_2_SHADER_STORAGE_WRITE_BIT;
1263 vsInstancedDrawBarrier.dstStageMask = VK_PIPELINE_STAGE_2_DRAW_INDIRECT_BIT;
1264 vsInstancedDrawBarrier.dstAccessMask = VK_ACCESS_2_INDIRECT_COMMAND_READ_BIT;
1265 vsInstancedDrawBarrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
1266 vsInstancedDrawBarrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
1267 vsInstancedDrawBarrier.buffer = getCurrentRenderProcess()->getVSDrawCommandsBuffer().getBuffer();
1268 vsInstancedDrawBarrier.offset = 0;
1269 vsInstancedDrawBarrier.size = VK_WHOLE_SIZE;
1270
1271 // Barrier 6: VS instanced draw count buffer (new instanced drawing path)
1272 VkBufferMemoryBarrier2 vsInstancedCountBarrier{ VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER_2 };
1273 vsInstancedCountBarrier.srcStageMask = VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT;
1274 vsInstancedCountBarrier.srcAccessMask = VK_ACCESS_2_SHADER_STORAGE_WRITE_BIT;
1275 vsInstancedCountBarrier.dstStageMask = VK_PIPELINE_STAGE_2_DRAW_INDIRECT_BIT;
1276 vsInstancedCountBarrier.dstAccessMask = VK_ACCESS_2_INDIRECT_COMMAND_READ_BIT;
1277 vsInstancedCountBarrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
1278 vsInstancedCountBarrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
1279 vsInstancedCountBarrier.buffer = getCurrentRenderProcess()->getVSDrawCountBuffer().getBuffer();
1280 vsInstancedCountBarrier.offset = 0;
1281 vsInstancedCountBarrier.size = VK_WHOLE_SIZE;
1282
1283 VkBufferMemoryBarrier2 barriers[] = { visibleBarrier, indirectBarrier, vsIndirectBarrier, vsCountBarrier, vsInstancedDrawBarrier, vsInstancedCountBarrier };
1284
1285 VkDependencyInfo depInfo{ VK_STRUCTURE_TYPE_DEPENDENCY_INFO };
1286 depInfo.bufferMemoryBarrierCount = 6;
1287 depInfo.pBufferMemoryBarriers = barriers;
1288 vkCmdPipelineBarrier2( currentRenderProcess->getGraphicsCommandBuffer(), &depInfo );
1289 }
1290
1291 recordXrSwapchainImageWritableBarrier( swapChainImageIndex );
1292 recordRenderPass( swapChainImageIndex );
1293 // Generate Hi-Z pyramid for occlusion culling (uses depth from just-completed render pass)
1295
1296 // =====================================================================
1297 // TWO-PASS OCCLUSION CULLING - PASS 2
1298 // =====================================================================
1299 // Pass 2 re-tests objects that failed Pass 1 Hi-Z against the NEW Hi-Z
1300 // (generated from Pass 1 rendering). This catches objects that were
1301 // incorrectly culled due to camera movement (temporal disocclusion).
1302 //
1303 // Note: Pass 2 survivors are APPENDED to culling_survivors (via atomicAdd
1304 // to cull_count). We then re-run the full pipeline (binning, unpacking,
1305 // prepare draw) for ALL survivors and render everything with LOAD_OP_CLEAR.
1306 // The resolve overwrites the swapchain with the complete final image.
1307 // =====================================================================
1308 recordPass2Culling( swapChainImageIndex );
1309
1310 // Synchronize swapchain image for MirrorView blit and SteamVR compositor copy.
1311 // This makes the rendered data available for TRANSFER_READ operations while
1312 // keeping the image in COLOR_ATTACHMENT_OPTIMAL as required by OpenXR spec.
1313 recordXrSwapchainImageFinishedWritingBarrier( swapChainImageIndex );
1314#endif
1315
1317 }
1318
1319 /*void Renderer::resetCounterBuffer() const
1320 {
1321 const VkBufferMemoryBarrier2 bufferFillBarrier{
1322 .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER_2,
1323 .pNext = nullptr,
1324 .srcStageMask = VK_PIPELINE_STAGE_2_TRANSFER_BIT,
1325 .srcAccessMask = VK_ACCESS_2_TRANSFER_WRITE_BIT,
1326 .dstStageMask = VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT,
1327 .dstAccessMask = VK_ACCESS_2_SHADER_STORAGE_READ_BIT,
1328 .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
1329 .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
1330 .buffer = getCurrentRenderProcess()->getMeshletCounterBuffer().getBuffer(),
1331 .offset = 0,
1332 .size = VK_WHOLE_SIZE,
1333 };
1334
1335 // Reset the draw counter buffer to 0 for the current frame
1336 vkCmdFillBuffer(
1337 getCurrentRenderingCommandBuffer(),
1338 getCurrentRenderProcess()->getMeshletCounterBuffer().getBuffer(),
1339 0,
1340 VK_WHOLE_SIZE,
1341 0
1342 );
1343
1344 VkDependencyInfo dependencyInfo{ VK_STRUCTURE_TYPE_DEPENDENCY_INFO };
1345 dependencyInfo.bufferMemoryBarrierCount = 1;
1346 dependencyInfo.pBufferMemoryBarriers = &bufferFillBarrier;
1347 vkCmdPipelineBarrier2( getCurrentRenderingCommandBuffer(), &dependencyInfo );
1348 }
1349
1350 void Renderer::recordComputeMeshletCulling()
1351 {
1352 TRACY_ZONE_SCOPED_NAMED( "Compute Meshlet Culling Pass" )
1353
1354 const VkCommandBuffer commandBuffer = getCurrentRenderProcess()->getGraphicsCommandBuffer();
1355 TRACY_VK_ZONE_C( getCurrentTracyVkContext(), commandBuffer, "Meshlet Culling Compute", Colors::Purple ) // Purple
1356
1357 resetCounterBuffer();
1358
1359 // Bind compute pipeline and descriptor sets
1360 objectCullingComputePipeline->bind( commandBuffer );
1361 const VkDescriptorSet computeDescriptorSet =
1362 getCurrentRenderProcess()->getComputeCullingDescriptorSet();
1363 vkCmdBindDescriptorSets(
1364 commandBuffer,
1365 VK_PIPELINE_BIND_POINT_COMPUTE,
1366 computeMeshletCullingPipelineLayout,
1367 0,
1368 1,
1369 &computeDescriptorSet,
1370 0,
1371 nullptr
1372 );
1373
1374 // dispatch compute shader
1375 const uint32_t numObjects =
1376 static_cast<uint32_t>( engine->getSceneManager()->getGameObjectCount() );
1377 PLOGI << "Dispatching Meshlet culling compute shader with " << numObjects << " objects";
1378 if ( numObjects > 0 )
1379 {
1380 const uint32_t numWorkgroups = ( numObjects + 255 ) / 256; // Divide by 256, round up
1381 vkCmdDispatch( commandBuffer, numWorkgroups, 1, 1 );
1382 }
1383
1384 // Debug: Read back the draw counter value
1385 PLOGI << "Dispatched compute shader for culling. Draw counter buffer at "
1386 << getCurrentRenderProcess()->getMeshletCounterBuffer().getBuffer();
1387 }*/
1388
1389 /*void Renderer::recordComputeDrawCommands() const
1390 {
1391 VkCommandBuffer cmdBuffer = getCurrentRenderingCommandBuffer();
1392
1393 TRACY_VK_ZONE_C( tracyVkContext[currentFrame], cmdBuffer, "Compute Draw Commands", Colors::Cyan );
1394
1395 // meshlet counter buffer
1396 VkBufferMemoryBarrier2 bufferBarrier{
1397 .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER_2,
1398 .pNext = nullptr,
1399 .srcStageMask = VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT,
1400 .srcAccessMask = VK_ACCESS_2_SHADER_STORAGE_WRITE_BIT,
1401 .dstStageMask = VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT,
1402 .dstAccessMask = VK_ACCESS_2_SHADER_STORAGE_READ_BIT,
1403 .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
1404 .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
1405 .buffer = getCurrentRenderProcess()->getMeshletCounterBuffer().getBuffer(),
1406 .offset = 0,
1407 .size = VK_WHOLE_SIZE,
1408 };
1409
1410 VkDependencyInfo dependencyInfo{ .sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO,
1411 .bufferMemoryBarrierCount = 1u,
1412 .pBufferMemoryBarriers = &bufferBarrier };
1413 vkCmdPipelineBarrier2( cmdBuffer, &dependencyInfo );
1414
1415 prepareDrawsComputePipeline->bind( cmdBuffer );
1416 VkDescriptorSet prepareDrawComputeDescriptorSet =
1417 getCurrentRenderProcess()->getComputePrepareDrawDescriptorSet();
1418 vkCmdBindDescriptorSets(
1419 cmdBuffer,
1420 VK_PIPELINE_BIND_POINT_COMPUTE,
1421 prepareDrawsComputePipelineLayout,
1422 0,
1423 1,
1424 &prepareDrawComputeDescriptorSet,
1425 0,
1426 nullptr
1427 );
1428
1429 // Dispatch one thread per pipeline
1430 // vkCmdDispatchIndirect()
1431 vkCmdDispatch( getCurrentRenderProcess()->getGraphicsCommandBuffer(), MAX_PIPELINES, 1, 1 );
1432 }*/
1433
1435 {
1436 TRACY_ZONE_SCOPED_NAMED( "Submit Initial Transfers" );
1437 PLOGI << "Submitting initial transfers for all frames in flight";
1438
1439 for ( uint32_t frameIndex = 0; frameIndex < MAX_FRAMES_IN_FLIGHT; frameIndex++ )
1440 {
1441 {
1442 TRACY_ZONE_SCOPED_NAMED( "Submit Initial Transfer Frame" );
1443 // First, record the command buffer using the data populated in initializeGpuBuffers()
1444 prepareTransferSubmission( frameIndex );
1445
1446 RenderProcess * currentProcess = renderProcesses[frameIndex];
1447 VkCommandBuffer transferCmdBuffer = getCurrentTransferCommandBuffer();
1448
1449 // Use the new QueueSubmitBuilder for clean submission
1450 uint64_t signalValue = timelineSynchronizer_->getStageValue(
1452
1453 PLOGI << "Submitting initial transfer for frame " << frameIndex << " signaling value "
1454 << signalValue;
1455
1456 QueueSubmitBuilder builder(*timelineSynchronizer_, frameIndex);
1457 VkResult result = builder
1458 .withCommandBuffer(transferCmdBuffer)
1460 .submit(context->getTransferQueue(), currentProcess->getTransferCompleteFence());
1461
1462 if (result != VK_SUCCESS)
1463 {
1464 PLOGE << "Failed to submit initial transfer for frame " << frameIndex;
1465 }
1466 currentProcess->setTransferFenceSubmitted( true );
1467 }
1468 }
1469
1470 // Wait for all initial fences to ensure completion before main loop
1471 {
1472 TRACY_ZONE_SCOPED_NAMED( "Wait for Initial Transfer Completion" );
1473 for ( uint32_t frameIndex = 0; frameIndex < MAX_FRAMES_IN_FLIGHT; frameIndex++ )
1474 {
1475 {
1476 TRACY_ZONE_SCOPED_NAMED( "Wait for Transfer Fence" );
1477 RenderProcess * process = renderProcesses[frameIndex];
1478 vkWaitForFences(
1479 context->getVkDevice(), 1, &process->getTransferCompleteFence(), VK_TRUE, UINT64_MAX
1480 );
1481 vkResetFences( context->getVkDevice(), 1, &process->getTransferCompleteFence() );
1482 process->setTransferFenceSubmitted( false );
1483 }
1484 }
1485 }
1486
1488 PLOGI << "All initial transfers submitted.";
1489 }
1490
1492 {
1493 TRACY_ZONE_SCOPED_NAMED( "Submit Transfer" );
1494 RenderProcess * transferProcess = renderProcesses[currentFrame];
1495
1496 if ( transferProcess->getTransferFenceSubmitted() )
1497 {
1498 {
1499 TRACY_ZONE_SCOPED_NAMED( "Wait for Transfer Fence" );
1500 vkWaitForFences(
1501 context->getVkDevice(),
1502 1,
1503 &transferProcess->getTransferCompleteFence(),
1504 VK_TRUE,
1505 UINT64_MAX
1506 );
1507 vkResetFences( context->getVkDevice(), 1, &transferProcess->getTransferCompleteFence() );
1508 transferProcess->setTransferFenceSubmitted( false );
1509 }
1510
1511 // Process deferred buffer deletions AFTER fence wait to ensure
1512 // previous GPU work using this frame slot is complete
1513 renderingDataManager->processPendingDeletions();
1514
1515 // Cleanup staging buffers AFTER fence wait - these buffers were used
1516 // by the transfer command buffer that just completed
1517 transferProcess->cleanupStagingBuffer();
1518 }
1519
1520 // Use QueueSubmitBuilder for clean, correct synchronization
1521 // Wait for resource reuse: wait for the frame that last used this slot to complete graphics
1523
1524 builder.waitForResourceReuse(
1526 VK_PIPELINE_STAGE_2_TRANSFER_BIT
1527 );
1528
1529 uint64_t signalValue = timelineSynchronizer_->getStageValue(
1531 PLOGI << "Transfer: frame " << renderedFrameCounter << " signal " << signalValue;
1532
1533 VkResult result = builder
1534 .withCommandBuffer(transferProcess->getTransferCommandBuffer())
1536 .submit(context->getTransferQueue(), transferProcess->getTransferCompleteFence());
1537
1538 if (result != VK_SUCCESS)
1539 {
1540 PLOGE << "Failed to submit transfer for frame " << renderedFrameCounter;
1541 }
1542
1543 transferProcess->setTransferFenceSubmitted( true );
1544 }
1545
1546 void Renderer::submitGraphics( uint32_t swapchainImageIndex, VkSemaphore mirrorAcquireSemaphore )
1547 {
1548 TRACY_ZONE_SCOPED_NAMED( "Submit Graphics" );
1549 RenderProcess * graphicsProcess = getCurrentRenderProcess();
1550
1551 {
1552 TRACY_ZONE_SCOPED_NAMED( "End Graphics Command Buffer" );
1553 vkEndCommandBuffer( graphicsProcess->getGraphicsCommandBuffer() );
1554 }
1555
1556 // Get the binary semaphore for presentation
1557 VkSemaphore binarySemaphore = renderFinishedSemaphores[swapchainImageIndex];
1558 if ( binarySemaphore == VK_NULL_HANDLE )
1559 {
1560 PLOGE << "Invalid render finished semaphore";
1561 }
1562
1563 // Use QueueSubmitBuilder for clean, unified submission
1564 // This combines the timeline signal AND binary semaphore signal in a single submit
1566
1567 // Wait for transfer completion from current frame
1568#if !defined(HEADLESS) && !defined(COMPUTE_DEBUG)
1569 builder.waitForCurrent(
1571 VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT | VK_PIPELINE_STAGE_2_TASK_SHADER_BIT_EXT |
1572 VK_PIPELINE_STAGE_2_COLOR_ATTACHMENT_OUTPUT_BIT | VK_PIPELINE_STAGE_2_TRANSFER_BIT
1573 );
1574 // Wait for previous frame's Hi-Z generation to complete before culling reads it.
1575 // Hi-Z is generated at the end of the graphics pass, so we wait for GraphicsComplete.
1576 // Without this, the culling compute shader may read incomplete Hi-Z data causing flickering.
1577 if (renderedFrameCounter > 0) {
1578 builder.waitForPrevious(
1580 VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT
1581 );
1582 }
1583#else
1584 builder.waitForCurrent(
1586 VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT | VK_PIPELINE_STAGE_2_COLOR_ATTACHMENT_OUTPUT_BIT |
1587 VK_PIPELINE_STAGE_2_TRANSFER_BIT
1588 );
1589#endif
1590
1591 // Wait for mirror acquire semaphore if provided
1592 if ( mirrorAcquireSemaphore != VK_NULL_HANDLE )
1593 {
1594 builder.waitForBinary(mirrorAcquireSemaphore, VK_PIPELINE_STAGE_2_TRANSFER_BIT);
1595 }
1596
1597 uint64_t signalValue = timelineSynchronizer_->getStageValue(
1599 PLOGI << "Submitting graphics frame " << renderedFrameCounter << " signaling " << signalValue;
1600
1601 // Single submit that signals timeline semaphore, and binary semaphore only if presenting
1602 builder.withCommandBuffer(graphicsProcess->getGraphicsCommandBuffer())
1604
1605 // Only signal binary semaphore if we're going to present (mirror view was rendered)
1606 if ( mirrorAcquireSemaphore != VK_NULL_HANDLE )
1607 {
1608 builder.signalBinary(binarySemaphore); // For vkQueuePresentKHR
1609 }
1610
1611 VkResult result = builder.submit(context->getGraphicsQueue(), graphicsProcess->getGraphicsCompleteFence());
1612
1613 if (result != VK_SUCCESS)
1614 {
1615 PLOGE << "Failed to submit graphics for frame " << renderedFrameCounter;
1616 }
1617
1618 graphicsProcess->setGraphicsFenceSubmitted( true );
1619 }
1620
1622 {
1623 return timelineSynchronizer_->getCurrentValue();
1624 }
1625
1630
1635
1640
1641 VkSemaphore Renderer::getCurrentPresentableSemaphore( uint32_t swapchainImageIndex ) const
1642 {
1643 return renderFinishedSemaphores[swapchainImageIndex];
1644 }
1645
1646 std::vector<VkCommandBuffer> Renderer::getGraphicsCommandBuffers() const
1647 {
1648 std::vector<VkCommandBuffer> commandBuffers;
1649 for ( RenderProcess * renderProcess : renderProcesses )
1650 {
1651 commandBuffers.push_back( renderProcess->getGraphicsCommandBuffer() );
1652 }
1653 return commandBuffers;
1654 }
1655
1657 {
1658 return vkGraphicsCommandPool;
1659 }
1660
1662 {
1663 return vkTransferCommandPool;
1664 }
1665
1667 {
1668 TRACY_ZONE_SCOPED_NAMED( "Renderer Cleanup" );
1669#ifdef ENABLE_TRACY
1670 destroyTracyContexts();
1671#endif
1672
1673 for ( GraphicsPipeline * pipeline : graphicsPipelines )
1674 {
1675 delete pipeline; // Delete the pipeline object to free the memory
1676 }
1677 graphicsPipelines.clear();
1678
1679 // Cleanup depth-only pipelines
1680 for ( GraphicsPipeline * pipeline : depthOnlyGraphicsPipelines )
1681 {
1682 delete pipeline;
1683 }
1685
1686 // Cleanup depth-only vertex shader pipeline
1687 if (vsDepthOnlyPipeline_ != VK_NULL_HANDLE) {
1688 vkDestroyPipeline(context->getVkDevice(), vsDepthOnlyPipeline_, nullptr);
1689 vsDepthOnlyPipeline_ = VK_NULL_HANDLE;
1690 }
1691
1692 // Cleanup vertex shader path pipeline
1693 if (vsGraphicsPipeline_ != VK_NULL_HANDLE) {
1694 vkDestroyPipeline(context->getVkDevice(), vsGraphicsPipeline_, nullptr);
1695 vsGraphicsPipeline_ = VK_NULL_HANDLE;
1696 }
1697
1698 for ( RenderProcess * process : renderProcesses )
1699 {
1700 process->cleanup();
1701 delete process;
1702 }
1703 renderProcesses.clear();
1704
1705 // Cleanup compute passes
1706 /*if (objectCullingComputePass.has_value()) {
1707 objectCullingComputePass->cleanup(context);
1708 objectCullingComputePass.reset();
1709 }
1710 if (meshletUnpackingDispatchComputePass.has_value()) {
1711 meshletUnpackingDispatchComputePass->cleanup(context);
1712 meshletUnpackingDispatchComputePass.reset();
1713 }
1714 if (meshletUnpackingComputePass.has_value()) {
1715 meshletUnpackingComputePass->cleanup(context);
1716 meshletUnpackingComputePass.reset();
1717 }
1718 if (meshletCullingDispatchComputePass.has_value()) {
1719 meshletCullingDispatchComputePass->cleanup(context);
1720 meshletCullingDispatchComputePass.reset();
1721 }
1722 if (meshletCullingComputePass.has_value()) {
1723 meshletCullingComputePass->cleanup(context);
1724 meshletCullingComputePass.reset();
1725 }
1726 if (drawPreparationComputePass.has_value()) {
1727 drawPreparationComputePass->cleanup(context);
1728 drawPreparationComputePass.reset();
1729 }*/
1730
1731 if (placeholderBuffer.has_value())
1732 placeholderBuffer->destroy();
1733 if (placeholderUniformBuffer.has_value())
1734 placeholderUniformBuffer->destroy();
1735 if (counterBuffer.has_value())
1736 counterBuffer->destroy();
1737 if (dispatchBuffer.has_value())
1738 dispatchBuffer->destroy();
1739 if (objectIDsBuffer.has_value())
1740 objectIDsBuffer->destroy();
1741 if (objectCullingDataBuffer.has_value())
1742 objectCullingDataBuffer->destroy();
1743 if (objectMeshletDataBuffer.has_value())
1744 objectMeshletDataBuffer->destroy();
1745 if (meshUnpackingDataBuffer.has_value())
1746 meshUnpackingDataBuffer->destroy();
1747
1748 vkDestroyCommandPool( context->getVkDevice(), vkGraphicsCommandPool, nullptr );
1749 vkGraphicsCommandPool = VK_NULL_HANDLE; // Set to NULL after destroying
1750 vkDestroyCommandPool( context->getVkDevice(), vkTransferCommandPool, nullptr );
1751 vkTransferCommandPool = VK_NULL_HANDLE;
1752
1753 vkDestroyDescriptorSetLayout( context->getVkDevice(), graphicsDescriptorSetLayout, nullptr );
1754 graphicsDescriptorSetLayout = VK_NULL_HANDLE;
1755
1756 vkDestroyPipelineLayout( context->getVkDevice(), prepareDrawsComputePipelineLayout, nullptr );
1757 prepareDrawsComputePipelineLayout = VK_NULL_HANDLE;
1758 vkDestroyPipelineLayout( context->getVkDevice(), computeObjectCullingPipelineLayout, nullptr );
1759 computeObjectCullingPipelineLayout = VK_NULL_HANDLE;
1760 vkDestroyPipelineLayout( context->getVkDevice(), graphicsPipelineLayout, nullptr );
1761 graphicsPipelineLayout = VK_NULL_HANDLE;
1762
1763 vkDestroyDescriptorPool( context->getVkDevice(), descriptorPool, nullptr );
1764 descriptorPool = VK_NULL_HANDLE;
1765
1766 // Clean up Hi-Z descriptor pool
1767 if (hiZDescriptorPool != VK_NULL_HANDLE)
1768 {
1769 vkDestroyDescriptorPool( context->getVkDevice(), hiZDescriptorPool, nullptr );
1770 hiZDescriptorPool = VK_NULL_HANDLE;
1771 }
1772
1773 // TimelineSynchronizer handles its own cleanup via RAII
1774 timelineSynchronizer_.reset();
1775
1776 for ( auto sem : renderFinishedSemaphores )
1777 vkDestroySemaphore( context->getVkDevice(), sem, nullptr );
1779 }
1780
1785
1787 {
1788 currentFrame = 0;
1790 // Timeline values are now computed on-demand by TimelineSynchronizer
1791 }
1792
1794 {
1795 VkCommandBuffer initialImageTransition =
1797
1798 std::vector<VkImageMemoryBarrier2> swapchainImageBarriers;
1799
1800 for ( const auto & renderTarget : headset->getSwapchainRenderTargets() )
1801 {
1802 VkImageMemoryBarrier2 swapchainImageMemoryBarrier{
1803 .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER_2,
1804 .srcStageMask = VK_PIPELINE_STAGE_2_BOTTOM_OF_PIPE_BIT,
1805 .srcAccessMask = 0,
1806 .dstStageMask = VK_PIPELINE_STAGE_2_COLOR_ATTACHMENT_OUTPUT_BIT,
1807 .dstAccessMask = VK_ACCESS_2_COLOR_ATTACHMENT_WRITE_BIT,
1808 .oldLayout = VK_IMAGE_LAYOUT_UNDEFINED,
1809 .newLayout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL,
1810 .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
1811 .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
1812 .image = renderTarget.image,
1813 .subresourceRange{ .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
1814 .baseMipLevel = 0u,
1815 .levelCount = 1u,
1816 .baseArrayLayer = 0u,
1817 .layerCount = headset->getEyeCount() }
1818 };
1819 swapchainImageBarriers.push_back( swapchainImageMemoryBarrier );
1820 }
1821
1822 VkDependencyInfo dependencyInfo{
1823 .sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO,
1824 .dependencyFlags = 0,
1825 .imageMemoryBarrierCount = static_cast<uint32_t>( swapchainImageBarriers.size() ),
1826 .pImageMemoryBarriers = swapchainImageBarriers.data(),
1827 };
1828
1829 vkCmdPipelineBarrier2( initialImageTransition, &dependencyInfo );
1831 context->getVkDevice(),
1832 context->getGraphicsQueue(),
1834 initialImageTransition
1835 );
1836 }
1837
1839 {
1842 // Timeline values are now computed on-demand by TimelineSynchronizer
1843 }
1844
1846 {
1847 // Get the current timeline semaphore value
1848 uint64_t currentValue = timelineSynchronizer_->getCurrentValue();
1849
1850 // Calculate the minimum frame number that would produce valid signals.
1851 // Timeline signal formula: value = frameNumber * PIPELINE_STAGE_COUNT + stage + 1
1852 // We need nextSignal > currentValue, so:
1853 // (nextFrame * PIPELINE_STAGE_COUNT + 0 + 1) > currentValue
1854 // nextFrame > (currentValue - 1) / PIPELINE_STAGE_COUNT
1855 // nextFrame = ceil((currentValue) / PIPELINE_STAGE_COUNT)
1856 // Use the actual PIPELINE_STAGE_COUNT from PipelineStages.h (currently 5)
1857 constexpr uint64_t stageCount = static_cast<uint64_t>(PIPELINE_STAGE_COUNT);
1858 uint64_t minFrameForValidSignal = (currentValue + stageCount - 1) / stageCount;
1859
1860 if (renderedFrameCounter < minFrameForValidSignal)
1861 {
1862 PLOGI << "Timeline sync after pause: advancing frame counter from "
1863 << renderedFrameCounter << " to " << minFrameForValidSignal
1864 << " (timeline at " << currentValue << ")";
1865 renderedFrameCounter = minFrameForValidSignal;
1866 // Also advance currentFrame to match, wrapping around MAX_FRAMES_IN_FLIGHT
1868 // Skip mirror view for multiple frames to cycle through all swapchain images
1869 // This prevents WRITE_AFTER_PRESENT hazard on any swapchain image
1871 }
1872 else
1873 {
1874 PLOGI << "Timeline sync after pause: no adjustment needed (frame "
1875 << renderedFrameCounter << ", timeline at " << currentValue << ")";
1876 }
1877 }
1878
1880 {
1881 // Check skip counter (set by timeline sync or stall detection in markFrameStart)
1883 {
1884 PLOGI << "Skipping mirror view (stall recovery, " << skipMirrorFramesRemaining_ << " frames remaining)";
1886 return true;
1887 }
1888
1889 return false;
1890 }
1891
1893 {
1894 auto now = std::chrono::steady_clock::now();
1895 auto elapsed = std::chrono::duration_cast<std::chrono::milliseconds>(now - lastFrameStartTime_);
1896 PLOGI << "[FRAME_TIME] Frame start - elapsed since last frame: " << elapsed.count() << "ms";
1897
1898 // If there was a long stall, skip mirror view for multiple frames to cycle through
1899 // all swapchain images safely
1900 if (elapsed > STALL_THRESHOLD_MS)
1901 {
1902 PLOGI << "[FRAME_TIME] Stall detected (" << elapsed.count() << "ms > "
1903 << STALL_THRESHOLD_MS.count() << "ms) - will skip mirror view for "
1904 << SKIP_FRAMES_AFTER_STALL << " frames and sync GPU";
1905
1906 // Always skip mirror frames after a stall to prevent WRITE_AFTER_PRESENT hazard
1908
1909 // Wait for all GPU work to complete to ensure clean state after stall
1910 vkDeviceWaitIdle(context->getVkDevice());
1911 PLOGI << "[FRAME_TIME] GPU sync completed after stall";
1912
1913 // Sync the timeline semaphore and frame counter after GPU idle.
1914 // This ensures the frame counter is advanced to produce valid timeline values.
1915 // Without this, the frame counter may be too low, causing submitTransfer to
1916 // wait for timeline values that will never be signaled (deadlock).
1918 }
1919
1920 lastFrameStartTime_ = now;
1921 }
1922
1924 {
1925 auto now = std::chrono::steady_clock::now();
1926 return std::chrono::duration_cast<std::chrono::milliseconds>(now - lastFrameStartTime_).count();
1927 }
1928
1930 {
1931 return skipMirrorFramesRemaining_ > 0;
1932 }
1933
1935 {
1936 return timelineSynchronizer_->getSemaphore();
1937 }
1938
1943
1944 const std::vector<GraphicsPipeline *> & Renderer::getGraphicsPipelines() const
1945 {
1946 return graphicsPipelines;
1947 }
1948
1949#ifdef ENABLE_TRACY
1950
1951 void Renderer::destroyTracyContexts()
1952 {
1953 TRACY_ZONE_SCOPED_NAMED( "Cleanup Tracy Contexts" );
1954
1955 PLOGI << "Cleaning up " << tracyVkContext.size() << " Tracy Vulkan contexts";
1956
1957 for ( TracyVkCtx ctx : tracyVkContext )
1958 {
1959 if ( ctx == nullptr )
1960 {
1961 continue;
1962 }
1963
1964 PLOGI << "Destroying Tracy context: " << static_cast<void *>( ctx );
1965 TracyVkDestroy( ctx );
1966 PLOGI << "Tracy context destroyed";
1967 }
1968
1969 tracyVkContext.clear();
1970 PLOGI << "Tracy contexts cleanup complete";
1971 }
1972
1973 void Renderer::setupTracy( const std::vector<TracyVkCtx> & tracyVulkanContext )
1974 {
1975 if ( !tracyVkContext.empty() )
1976 {
1977 destroyTracyContexts();
1978 }
1979 tracyVkContext = tracyVulkanContext;
1980 }
1981
1982 void Renderer::resetTracyContexts()
1983 {
1984 TRACY_ZONE_SCOPED_NAMED( "Reset Tracy Contexts" );
1985 PLOGI << "Resetting Tracy Vulkan contexts for state transition";
1986
1987 // Destroy existing contexts
1988 destroyTracyContexts();
1989
1990 // Recreate Tracy contexts using the same pattern as the constructor
1991 for ( size_t i = 0; i < renderProcesses.size(); i++ )
1992 {
1993 VkCommandBuffer commandBuffer = renderProcesses[i]->getGraphicsCommandBuffer();
1994
1995 TracyVkCtx tracyContext = TracyVkContext(
1996 context->getVkPhysicalDevice(),
1997 context->getVkDevice(),
1998 context->getGraphicsQueue(),
1999 commandBuffer
2000 );
2001
2002 std::stringstream contextNameStream{};
2003 contextNameStream << "Vulkan Context " << i;
2004 std::string contextName = contextNameStream.str();
2005
2006 TracyVkContextName( tracyContext, contextName.c_str(), contextName.size() );
2007
2008 tracyVkContext.push_back( tracyContext );
2009 }
2010
2011 PLOGI << "Tracy contexts reset complete, created " << tracyVkContext.size() << " contexts";
2012 }
2013
2014#endif
2015
2017 const std::string & meshShader,
2018 const std::string & fragShader,
2019 const PipelineMaterialPayload & pipelineData
2020 ) const
2021 {
2022 PLOGI << "Trying to find existing pipeline for mesh shader: " << meshShader
2023 << " frag shader: " << fragShader;
2024 for ( size_t i = 0; i < graphicsPipelines.size(); i++ )
2025 {
2026 if ( graphicsPipelines[i]->getMeshShaderName() == meshShader &&
2027 graphicsPipelines[i]->getFragShaderName() == fragShader )
2028 {
2029 PLOGI << "Found existing pipeline at " << i << "!";
2030 return static_cast<int>( i );
2031 }
2032 }
2033 PLOGI << "No existing pipeline!";
2034 return -1;
2035 }
2036
2038 {
2039 if (!objectCullingComputePass.has_value() ) throw std::runtime_error( "No primtive culling compute pass found!" );
2040 return objectCullingComputePass.value();
2041 }
2042
2044 {
2045 if (!binningAllocatorComputePass.has_value()) throw std::runtime_error("No binning allocator compute pass found!");
2046 return binningAllocatorComputePass.value();
2047 }
2048
2050 {
2051 if ( meshletUnpackingDispatchComputePass.has_value() )
2053 throw std::runtime_error( "No object for object unpacking compute pass found!" );
2054 }
2055
2057 {
2058 if ( meshletUnpackingComputePass.has_value() )
2059 return meshletUnpackingComputePass.value();
2060 throw std::runtime_error( "No object for meshlet unpacking compute pass found!" );
2061 }
2062
2064 {
2065 if ( meshletCullingDispatchComputePass.has_value() )
2066 return meshletCullingDispatchComputePass.value();
2067 throw std::runtime_error( "No object for meshlet culling compute pass found!" );
2068 }
2069
2071 {
2072 if ( meshletCullingComputePass.has_value() )
2073 return meshletCullingComputePass.value();
2074 throw std::runtime_error( "No object for meshlet culling compute pass found!" );
2075 }
2076
2078 {
2079 if ( drawPreparationComputePass.has_value() )
2080 return drawPreparationComputePass.value();
2081 throw std::runtime_error( "No object for draw preparation compute pass found!" );
2082 }
2083
2085 {
2086 if ( vsBinningAllocatorComputePass_.has_value() )
2087 return vsBinningAllocatorComputePass_.value();
2088 throw std::runtime_error( "No VS binning allocator compute pass found!" );
2089 }
2090
2092 {
2093 if ( vsInstanceUnpackingComputePass_.has_value() )
2094 return vsInstanceUnpackingComputePass_.value();
2095 throw std::runtime_error( "No VS instance unpacking compute pass found!" );
2096 }
2097
2099 {
2100 if ( vsPrepareDrawComputePass_.has_value() )
2101 return vsPrepareDrawComputePass_.value();
2102 throw std::runtime_error( "No VS prepare draw compute pass found!" );
2103 }
2104
2106 {
2107 TRACY_ZONE_SCOPED_NAMED( "Create Object Culling Resources" );
2108
2109 std::vector<VkDescriptorSetLayoutBinding> computeLayoutBindings =
2111 #if !defined(HEADLESS) // COMPUTE_DEBUG uses real compute shaders
2115 .addStorageBuffer( ShaderStage::PrimitiveCulling::PRIMITIVE_IDS, VK_SHADER_STAGE_COMPUTE_BIT )
2118 // Hi-Z Occlusion Culling bindings (two pyramids for two-pass culling)
2119 .addCombinedImageSampler( ShaderStage::HiZCulling::HIZ_PYRAMID, VK_SHADER_STAGE_COMPUTE_BIT ) // Previous frame's Hi-Z
2120 .addUniformBuffer( ShaderStage::HiZCulling::HIZ_VIEW_PROJECTION, VK_SHADER_STAGE_COMPUTE_BIT )
2121 .addCombinedImageSampler( ShaderStage::HiZCulling::HIZ_PYRAMID_CURRENT, VK_SHADER_STAGE_COMPUTE_BIT ) // Current frame's Hi-Z
2122 // GPU Transform Optimization: local bounds + per-object transforms for GPU-side world space computation
2125 // Two-pass occlusion culling: failed objects buffer for Pass 2 re-testing
2128 // Vertex shader path for single-meshlet geometry optimization
2134 #endif
2135 .build();
2136
2137 VkDescriptorSetLayoutCreateInfo computeLayoutCreateInfo = {
2138 .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
2139 .bindingCount = static_cast<uint32_t>( computeLayoutBindings.size() ),
2140 .pBindings = computeLayoutBindings.data()
2141 };
2142
2143 // Push constants: primitiveCount (uint32_t) + cullingPass (uint32_t)
2144 VkPushConstantRange pushConstantRange{
2145 .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
2146 .offset = 0,
2147 .size = 2 * sizeof( uint32_t )
2148 };
2149
2150 VkPipelineLayoutCreateInfo pipelineLayoutCreateInfo = {
2151 .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
2152 .pNext = nullptr,
2153 .flags = 0,
2154 .setLayoutCount = 1u,
2155 .pSetLayouts = nullptr,
2156 .pushConstantRangeCount = 1u,
2157 .pPushConstantRanges = &pushConstantRange,
2158 };
2159
2160 auto* specialization = new ComputePipelineSpecializationData(32);
2161 const auto shaderPath = Path::engineShaders() / COMPUTE_SHADER_NAME("Output/PrimitiveCulling.comp.spv");
2162 objectCullingComputePass.emplace( ComputePass( "Primitive Culling" ) );
2164 context->getVkDevice(),
2165 &pipelineLayoutCreateInfo,
2166 &computeLayoutCreateInfo,
2167 shaderPath.string(),
2168 specialization
2169 );
2170 delete specialization;
2171 }
2172
2174 {
2175 TRACY_ZONE_SCOPED_NAMED( "Create Binning Allocator Resources" );
2176
2177 std::vector<VkDescriptorSetLayoutBinding> computeLayoutBindings =
2179 #if !defined(HEADLESS) // COMPUTE_DEBUG uses real compute shaders
2185 #endif
2186 .build();
2187
2188 VkDescriptorSetLayoutCreateInfo computeLayoutCreateInfo = {
2189 .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
2190 .bindingCount = static_cast<uint32_t>( computeLayoutBindings.size() ),
2191 .pBindings = computeLayoutBindings.data()
2192 };
2193
2194 VkPipelineLayoutCreateInfo pipelineLayoutCreateInfo = {
2195 .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
2196 .pNext = nullptr,
2197 .flags = 0,
2198 .setLayoutCount = 1u,
2199 .pSetLayouts = nullptr,
2200 .pushConstantRangeCount = 0,
2201 .pPushConstantRanges = nullptr,
2202 };
2203
2204 auto* specialization = new ComputePipelineSpecializationData(32);
2205 const auto shaderPath = Path::engineShaders() / COMPUTE_SHADER_NAME("Output/MeshletBinningAllocator.comp.spv");
2206 binningAllocatorComputePass.emplace( ComputePass( "Binning Allocator" ) );
2208 context->getVkDevice(),
2209 &pipelineLayoutCreateInfo,
2210 &computeLayoutCreateInfo,
2211 shaderPath.string(),
2212 specialization
2213 );
2214 delete specialization;
2215 }
2216
2218 {
2219 TRACY_ZONE_SCOPED_NAMED( "Create Meshlet Unpacking Dispatcher" );
2220
2221 std::vector<VkDescriptorSetLayoutBinding> computeLayoutBindings =
2223 #if !defined(HEADLESS) // COMPUTE_DEBUG uses real compute shaders
2227 #endif
2228 .build();
2229
2230 VkPipelineLayoutCreateInfo pipelineLayoutCreateInfo{
2231 .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
2232 .setLayoutCount = 1u,
2233 .pSetLayouts = nullptr,
2234 };
2235
2236 VkDescriptorSetLayoutCreateInfo computeLayoutCreateInfo{
2237 .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
2238 .bindingCount = static_cast<uint32_t>( computeLayoutBindings.size() ),
2239 .pBindings = computeLayoutBindings.data(),
2240 };
2241
2242 auto* specialization = new DispatcherComputePipelineSpecializationData(1, meshletUnpackingComputePass->getThreadCount());
2243 const auto shaderName = Path::engineShaders() / COMPUTE_SHADER_NAME("Output/CountDispatcher.comp.spv");
2244 meshletUnpackingDispatchComputePass.emplace( DispatcherComputePass( "Meshlet Unpacking Dispatcher" ) );
2246 context->getVkDevice(),
2247 &pipelineLayoutCreateInfo,
2248 &computeLayoutCreateInfo,
2249 shaderName.string(),
2250 specialization
2251 );
2252 delete specialization;
2253 }
2254
2256 {
2257 TRACY_ZONE_SCOPED_NAMED( "Create Meshlet Unpacking Dispatcher" );
2258
2259 std::vector<VkDescriptorSetLayoutBinding> computeLayoutBindings =
2261#if !defined(HEADLESS) // COMPUTE_DEBUG uses real compute shaders
2264#endif
2265 .build();
2266
2267 VkPipelineLayoutCreateInfo pipelineLayoutCreateInfo{
2268 .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
2269 .setLayoutCount = 1u,
2270 .pSetLayouts = nullptr,
2271 };
2272
2273 VkDescriptorSetLayoutCreateInfo computeLayoutCreateInfo{
2274 .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
2275 .bindingCount = static_cast<uint32_t>( computeLayoutBindings.size() ),
2276 .pBindings = computeLayoutBindings.data(),
2277 };
2278
2279 auto* specialization = new DispatcherComputePipelineSpecializationData(1, meshletCullingComputePass->getThreadCount());
2280 const auto shaderName = Path::engineShaders() / COMPUTE_SHADER_NAME("Output/CountDispatcher.comp.spv");
2281 meshletCullingDispatchComputePass.emplace( DispatcherComputePass( "Meshlet Culling Dispatcher" ) );
2283 context->getVkDevice(),
2284 &pipelineLayoutCreateInfo,
2285 &computeLayoutCreateInfo,
2286 shaderName.string(),
2287 specialization
2288 );
2289 delete specialization;
2290 }
2291
2293 {
2294 // Stage 2: Meshlet Unpacking V2 bindings
2295 std::vector<VkDescriptorSetLayoutBinding> computeLayoutBindings =
2297#if !defined(HEADLESS) // COMPUTE_DEBUG uses real compute shaders
2302#endif
2303 .build();
2304
2305
2306 VkDescriptorSetLayoutCreateInfo computeLayoutCreateInfo = {
2307 .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
2308 .bindingCount = static_cast<uint32_t>( computeLayoutBindings.size() ),
2309 .pBindings = computeLayoutBindings.data()
2310 };
2311
2312 VkPipelineLayoutCreateInfo pipelineLayoutCreateInfo = {
2313 .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
2314 .pNext = nullptr,
2315 .flags = 0,
2316 .setLayoutCount = 1u,
2317 .pSetLayouts = nullptr,
2318 .pushConstantRangeCount = 0,
2319 .pPushConstantRanges = nullptr,
2320 };
2321
2322 auto* specialization = new ComputePipelineSpecializationData(128);
2323 const auto shaderName = Path::engineShaders() / COMPUTE_SHADER_NAME("Output/MeshletUnpacking.comp.spv");
2324
2325 meshletUnpackingComputePass.emplace(ComputePass("Meshlet Unpacking"));
2327 context->getVkDevice(),
2328 &pipelineLayoutCreateInfo,
2329 &computeLayoutCreateInfo,
2330 shaderName.string(),
2331 specialization
2332 );
2333 delete specialization;
2334 }
2335
2337 {
2338 TRACY_ZONE_SCOPED_NAMED( "Create Meshlet Culling Resources" );
2339 VkDevice device = context->getVkDevice();
2340
2341 // Create computeDescriptorSetLayout
2342 std::vector<VkDescriptorSetLayoutBinding> computeLayoutBindings =
2344#if !defined(HEADLESS)
2350#endif
2351 .build();
2352
2353 VkDescriptorSetLayoutCreateInfo computeSetLayoutInfo{
2354 .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
2355 .bindingCount = static_cast<uint32_t>( computeLayoutBindings.size() ),
2356 .pBindings = computeLayoutBindings.data()
2357 };
2358
2359 // Create computePipelineLayout
2360 VkPipelineLayoutCreateInfo computePipelineLayoutInfo{
2361 .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
2362 .setLayoutCount = 1,
2363 .pSetLayouts = nullptr,
2364 .pushConstantRangeCount = 0,
2365 .pPushConstantRanges = nullptr
2366 };
2367
2368 auto* specialization = new ComputePipelineSpecializationData(32);
2369 meshletCullingComputePass = ComputePass( "Meshlet Culling" );
2371 device,
2372 &computePipelineLayoutInfo,
2373 &computeSetLayoutInfo,
2374 ( Path::engineShaders() / COMPUTE_SHADER_NAME("Output/MeshletCulling.comp.spv") ).string(),
2375 specialization
2376 );
2377 delete specialization;
2378 }
2379
2381 {
2382 TRACY_ZONE_SCOPED_NAMED( "Create Prepare Draw Resources" );
2383
2384 std::vector<VkDescriptorSetLayoutBinding> prepareBindings =
2386 #if !defined(HEADLESS) // COMPUTE_DEBUG uses real compute shaders
2389 #endif
2390 .build();
2391
2392 VkDescriptorSetLayoutCreateInfo prepareLayoutInfo{
2393 .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
2394 .bindingCount = static_cast<uint32_t>( prepareBindings.size() ),
2395 .pBindings = prepareBindings.data()
2396 };
2397
2398 VkPipelineLayoutCreateInfo pipelineLayoutInfo{ .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
2399 .setLayoutCount = 1,
2400 .pSetLayouts = nullptr};
2401
2402 auto* specialization = new ComputePipelineSpecializationData( MAX_PIPELINES );
2403 const auto shaderName = Path::engineShaders() / COMPUTE_SHADER_NAME("Output/PrepareDraw.comp.spv");
2404 drawPreparationComputePass.emplace( ComputePass( "Draw Preparation" ) );
2406 context->getVkDevice(),
2407 &pipelineLayoutInfo,
2408 &prepareLayoutInfo,
2409 shaderName.string(),
2410 specialization
2411 );
2412 delete specialization;
2413 }
2414
2416 {
2417 TRACY_ZONE_SCOPED_NAMED( "Create Hi-Z Generation Resources" );
2418
2419 // Hi-Z generation shader bindings:
2420 // binding 0: Source texture (previous mip or depth buffer) - combined image sampler
2421 // binding 1: Source MSAA depth buffer (for mip 0) - combined image sampler (MS)
2422 // binding 2: Destination mip (storage image)
2423 std::vector<VkDescriptorSetLayoutBinding> hiZBindings =
2425#if !defined(HEADLESS)
2426 .addCombinedImageSampler( 0, VK_SHADER_STAGE_COMPUTE_BIT ) // u_srcTexture
2427 .addCombinedImageSampler( 1, VK_SHADER_STAGE_COMPUTE_BIT ) // u_srcDepthMS (placeholder for now)
2428 .addStorageImage( 2, VK_SHADER_STAGE_COMPUTE_BIT ) // u_dstMip
2429#endif
2430 .build();
2431
2432 VkDescriptorSetLayoutCreateInfo hiZLayoutInfo{
2433 .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
2434 .bindingCount = static_cast<uint32_t>( hiZBindings.size() ),
2435 .pBindings = hiZBindings.data()
2436 };
2437
2438 // Push constants for mip level, source dimensions, and sample count
2439 VkPushConstantRange pushConstantRange{
2440 .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
2441 .offset = 0,
2442 .size = 4 * sizeof( uint32_t ) // mipLevel, srcWidth, srcHeight, sampleCount
2443 };
2444
2445 VkPipelineLayoutCreateInfo pipelineLayoutInfo{
2446 .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
2447 .setLayoutCount = 1,
2448 .pSetLayouts = nullptr,
2449 .pushConstantRangeCount = 1,
2450 .pPushConstantRanges = &pushConstantRange
2451 };
2452
2453 auto* specialization = new ComputePipelineSpecializationData( 64 ); // 8x8 workgroups
2454 const auto shaderName = Path::engineShaders() / COMPUTE_SHADER_NAME("Output/HiZGeneration.comp.spv");
2455 hiZGenerationComputePass.emplace( ComputePass( "Hi-Z Generation" ) );
2457 context->getVkDevice(),
2458 &pipelineLayoutInfo,
2459 &hiZLayoutInfo,
2460 shaderName.string(),
2461 specialization
2462 );
2463 delete specialization;
2464
2465 PLOGI << "Hi-Z generation compute pass created";
2466 }
2467
2469 {
2470 TRACY_ZONE_SCOPED_NAMED( "Create Hi-Z Mip Descriptor Sets" );
2471
2472 if (!hiZGenerationComputePass.has_value())
2473 {
2474 PLOGW << "Hi-Z generation compute pass not available, skipping descriptor set creation";
2475 return;
2476 }
2477
2478 // Calculate mip levels from eye resolution (same formula as Hi-Z pyramid creation)
2479 const VkExtent2D eyeResolution = headset->getEyeResolution(0);
2480 const uint32_t mipLevels = static_cast<uint32_t>(
2481 std::floor(std::log2(std::max(eyeResolution.width, eyeResolution.height)))) + 1;
2482 if (mipLevels == 0)
2483 {
2484 PLOGW << "Hi-Z pyramid has 0 mip levels, skipping descriptor set creation";
2485 return;
2486 }
2487
2488 // Create a dedicated descriptor pool for Hi-Z mip descriptor sets
2489 std::array<VkDescriptorPoolSize, 2u> poolSizes{};
2490 poolSizes.at(0) = {
2491 .type = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER,
2492 .descriptorCount = static_cast<uint32_t>(MAX_FRAMES_IN_FLIGHT) * mipLevels * 2 // 2 samplers per mip
2493 };
2494 poolSizes.at(1) = {
2495 .type = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
2496 .descriptorCount = static_cast<uint32_t>(MAX_FRAMES_IN_FLIGHT) * mipLevels
2497 };
2498
2499 VkDescriptorPoolCreateInfo poolInfo{
2500 .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO,
2501 .maxSets = static_cast<uint32_t>(MAX_FRAMES_IN_FLIGHT) * mipLevels,
2502 .poolSizeCount = static_cast<uint32_t>(poolSizes.size()),
2503 .pPoolSizes = poolSizes.data()
2504 };
2505
2507 context->getVkDevice(), &poolInfo, nullptr, &hiZDescriptorPool, "Hi-Z Mip"
2508 );
2509
2510 // Allocate descriptor sets for each frame and mip level
2511 VkDescriptorSetLayout layout = hiZGenerationComputePass->getDescriptorSetLayout();
2512 for (uint32_t frame = 0; frame < MAX_FRAMES_IN_FLIGHT; frame++)
2513 {
2514 for (uint32_t mip = 0; mip < mipLevels; mip++)
2515 {
2516 VkDescriptorSetAllocateInfo allocInfo{
2517 .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO,
2518 .descriptorPool = hiZDescriptorPool,
2519 .descriptorSetCount = 1,
2520 .pSetLayouts = &layout
2521 };
2522
2523 VkResult result = vkAllocateDescriptorSets(
2524 context->getVkDevice(), &allocInfo, &hiZMipDescriptorSets[frame][mip]
2525 );
2526 if (result != VK_SUCCESS)
2527 {
2528 PLOGE << "Failed to allocate Hi-Z descriptor set for frame " << frame << " mip " << mip;
2529 }
2530 }
2531
2532 // Update descriptor sets with image bindings
2534 }
2535
2536 PLOGI << "Created Hi-Z mip descriptor sets: " << mipLevels << " mips x " << MAX_FRAMES_IN_FLIGHT << " frames";
2537 }
2538
2540 {
2541 if (!hiZGenerationComputePass.has_value())
2542 return;
2543
2544 // Use per-frame Hi-Z pyramid from the RenderProcess
2545 RenderProcess* renderProcess = renderProcesses[frameIndex];
2546 const uint32_t mipLevels = renderProcess->getHiZMipLevels();
2547
2548 for (uint32_t mip = 0; mip < mipLevels; mip++)
2549 {
2550 // Storage for image infos (must persist until vkUpdateDescriptorSets)
2551 VkDescriptorImageInfo srcTextureInfo{};
2552 VkDescriptorImageInfo srcDepthMSInfo{};
2553 VkDescriptorImageInfo dstMipInfo{};
2554
2555 // Binding 0: Source texture (previous mip level - NOT used for mip 0)
2556 if (mip == 0)
2557 {
2558 // For mip 0, binding 0 is unused (shader uses binding 1 for MSAA depth)
2559 // Use mip 0 of the Hi-Z pyramid itself as a placeholder
2560 srcTextureInfo = {
2561 .sampler = headset->getHiZSampler(),
2562 .imageView = renderProcess->getHiZPyramidMipView(0),
2563 .imageLayout = VK_IMAGE_LAYOUT_GENERAL
2564 };
2565 }
2566 else
2567 {
2568 // For other mips, we read from the previous mip level
2569 srcTextureInfo = {
2570 .sampler = headset->getHiZSampler(),
2571 .imageView = renderProcess->getHiZPyramidMipView(mip - 1),
2572 .imageLayout = VK_IMAGE_LAYOUT_GENERAL
2573 };
2574 }
2575
2576 // Binding 1: MSAA depth buffer (used for mip 0 generation)
2577 srcDepthMSInfo = {
2578 .sampler = headset->getHiZSampler(),
2579 .imageView = headset->getDepthBufferView(),
2580 .imageLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL
2581 };
2582
2583 // Binding 2: Destination mip as storage image (write to current frame's Hi-Z)
2584 dstMipInfo = {
2585 .sampler = VK_NULL_HANDLE,
2586 .imageView = renderProcess->getHiZPyramidMipView(mip),
2587 .imageLayout = VK_IMAGE_LAYOUT_GENERAL
2588 };
2589
2594 .update(context->getVkDevice());
2595 }
2596 }
2597
2598 //==============================================================================
2599 // Hi-Z SPD (Single Pass Downsampler) Implementation
2600 //==============================================================================
2601
2603 {
2604 TRACY_ZONE_SCOPED_NAMED( "Create Hi-Z SPD Resources" );
2605
2606 // Create atomic counter buffer for SPD workgroup synchronization
2607 // SPD needs one counter per slice (we have 2 slices for stereo)
2608 hiZSPDAtomicBuffer.emplace(context);
2609 constexpr VkDeviceSize atomicBufferSize = sizeof(uint32_t) * 6; // 6 counters max (SPD supports up to 6 slices)
2610 hiZSPDAtomicBuffer->create(
2611 atomicBufferSize,
2612 VK_BUFFER_USAGE_STORAGE_BUFFER_BIT,
2613 VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT
2614 );
2615 hiZSPDAtomicBuffer->setDebugName("Hi-Z SPD Atomic Counter Buffer");
2616
2617 // SPD shader bindings:
2618 // binding 0: Source MSAA depth buffer (combined image sampler)
2619 // binding 1: Destination mip levels array [12] (storage images)
2620 // binding 2: Mip level 5 - globally coherent (storage image)
2621 // binding 3: Global atomic counter buffer
2622 std::vector<VkDescriptorSetLayoutBinding> spdBindings = DescriptorSetLayoutBuilder()
2623#if !defined(HEADLESS)
2624 .addCombinedImageSampler(0, VK_SHADER_STAGE_COMPUTE_BIT) // imgSrcDepthMS
2625 .addStorageImageArray(1, 12, VK_SHADER_STAGE_COMPUTE_BIT) // imgDst[12]
2626 .addStorageImage(2, VK_SHADER_STAGE_COMPUTE_BIT) // imgDst5 (coherent)
2627 .addStorageBuffer(3, VK_SHADER_STAGE_COMPUTE_BIT) // spdGlobalAtomic
2628#endif
2629 .build();
2630
2631 VkDescriptorSetLayoutCreateInfo spdLayoutInfo{
2632 .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
2633 .bindingCount = static_cast<uint32_t>(spdBindings.size()),
2634 .pBindings = spdBindings.data()
2635 };
2636
2637 // Push constants: mips, numWorkGroups, workGroupOffset, sampleCount
2638 VkPushConstantRange pushConstantRange{
2639 .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
2640 .offset = 0,
2641 .size = sizeof(uint32_t) * 2 + sizeof(int32_t) * 2 + sizeof(uint32_t) // mips, numWorkGroups, workGroupOffset.xy, sampleCount
2642 };
2643
2644 VkPipelineLayoutCreateInfo pipelineLayoutInfo{
2645 .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
2646 .setLayoutCount = 1,
2647 .pSetLayouts = nullptr, // Will be set by ComputePass
2648 .pushConstantRangeCount = 1,
2649 .pPushConstantRanges = &pushConstantRange
2650 };
2651
2652 auto* specialization = new ComputePipelineSpecializationData(256); // SPD uses 256 threads
2653 const auto shaderName = Path::engineShaders() / COMPUTE_SHADER_NAME("Output/HiZGenerationSPD.comp.spv");
2654 hiZSPDComputePass.emplace(ComputePass("Hi-Z SPD Generation"));
2655 hiZSPDComputePass->create(
2656 context->getVkDevice(),
2657 &pipelineLayoutInfo,
2658 &spdLayoutInfo,
2659 shaderName.string(),
2660 specialization
2661 );
2662 delete specialization;
2663
2664 PLOGI << "Hi-Z SPD compute pass created";
2665 }
2666
2668 {
2669 TRACY_ZONE_SCOPED_NAMED("Create Hi-Z SPD Descriptor Sets");
2670
2671 if (!hiZSPDComputePass.has_value())
2672 {
2673 PLOGW << "Hi-Z SPD compute pass not available, skipping descriptor set creation";
2674 useSPDHiZ_ = false;
2675 return;
2676 }
2677
2678 // Create descriptor pool for SPD
2679 // Per frame: 1 combined image sampler, 12 storage images, 1 storage image (coherent), 1 storage buffer
2680 std::array<VkDescriptorPoolSize, 3u> poolSizes{};
2681 poolSizes[0] = {
2682 .type = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER,
2683 .descriptorCount = MAX_FRAMES_IN_FLIGHT
2684 };
2685 poolSizes[1] = {
2686 .type = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
2687 .descriptorCount = MAX_FRAMES_IN_FLIGHT * 13 // 12 + 1 coherent
2688 };
2689 poolSizes[2] = {
2690 .type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
2691 .descriptorCount = MAX_FRAMES_IN_FLIGHT
2692 };
2693
2694 VkDescriptorPoolCreateInfo poolInfo{
2695 .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO,
2696 .maxSets = MAX_FRAMES_IN_FLIGHT,
2697 .poolSizeCount = static_cast<uint32_t>(poolSizes.size()),
2698 .pPoolSizes = poolSizes.data()
2699 };
2700
2701 VkDescriptorPool spdDescriptorPool = VK_NULL_HANDLE;
2703 context->getVkDevice(), &poolInfo, nullptr, &spdDescriptorPool, "Hi-Z SPD"
2704 );
2705
2706 // Allocate descriptor sets
2707 VkDescriptorSetLayout layout = hiZSPDComputePass->getDescriptorSetLayout();
2708 for (uint32_t frame = 0; frame < MAX_FRAMES_IN_FLIGHT; frame++)
2709 {
2710 VkDescriptorSetAllocateInfo allocInfo{
2711 .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO,
2712 .descriptorPool = spdDescriptorPool,
2713 .descriptorSetCount = 1,
2714 .pSetLayouts = &layout
2715 };
2716
2717 VkResult result = vkAllocateDescriptorSets(
2718 context->getVkDevice(), &allocInfo, &hiZSPDDescriptorSets[frame]
2719 );
2720 if (result != VK_SUCCESS)
2721 {
2722 PLOGE << "Failed to allocate Hi-Z SPD descriptor set for frame " << frame;
2723 useSPDHiZ_ = false;
2724 return;
2725 }
2726
2728 }
2729
2730 PLOGI << "Created Hi-Z SPD descriptor sets for " << MAX_FRAMES_IN_FLIGHT << " frames";
2731 }
2732
2734 {
2735 if (!hiZSPDComputePass.has_value())
2736 return;
2737
2738 RenderProcess* renderProcess = renderProcesses[frameIndex];
2739 const uint32_t mipLevels = renderProcess->getHiZMipLevels();
2740
2741 // Binding 0: MSAA depth buffer
2742 VkDescriptorImageInfo depthMSInfo{
2743 .sampler = headset->getHiZSampler(),
2744 .imageView = headset->getDepthBufferView(),
2745 .imageLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL
2746 };
2747
2748 // Binding 1: Destination mip levels array (12 images, use mip views or placeholders)
2749 std::array<VkDescriptorImageInfo, 12> dstMipInfos{};
2750 for (uint32_t mip = 0; mip < 12; mip++)
2751 {
2752 if (mip < mipLevels)
2753 {
2754 dstMipInfos[mip] = {
2755 .sampler = VK_NULL_HANDLE,
2756 .imageView = renderProcess->getHiZPyramidMipView(mip),
2757 .imageLayout = VK_IMAGE_LAYOUT_GENERAL
2758 };
2759 }
2760 else
2761 {
2762 // Use mip 0 as placeholder for unused mip slots
2763 dstMipInfos[mip] = {
2764 .sampler = VK_NULL_HANDLE,
2765 .imageView = renderProcess->getHiZPyramidMipView(0),
2766 .imageLayout = VK_IMAGE_LAYOUT_GENERAL
2767 };
2768 }
2769 }
2770
2771 // Binding 2: Mip level 5 (globally coherent)
2772 VkDescriptorImageInfo dst5Info{
2773 .sampler = VK_NULL_HANDLE,
2774 .imageView = (mipLevels > 5) ? renderProcess->getHiZPyramidMipView(5) : renderProcess->getHiZPyramidMipView(0),
2775 .imageLayout = VK_IMAGE_LAYOUT_GENERAL
2776 };
2777
2778 // Binding 3: Atomic counter buffer
2779 VkDescriptorBufferInfo atomicBufferInfo = VulkanHelper::fullBufferInfo(hiZSPDAtomicBuffer->getBuffer());
2780
2781 // Update descriptor set
2782 std::array<VkWriteDescriptorSet, 4> writes{};
2783
2784 writes[0] = {
2785 .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
2786 .dstSet = hiZSPDDescriptorSets[frameIndex],
2787 .dstBinding = 0,
2788 .dstArrayElement = 0,
2789 .descriptorCount = 1,
2790 .descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER,
2791 .pImageInfo = &depthMSInfo
2792 };
2793
2794 writes[1] = {
2795 .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
2796 .dstSet = hiZSPDDescriptorSets[frameIndex],
2797 .dstBinding = 1,
2798 .dstArrayElement = 0,
2799 .descriptorCount = 12,
2800 .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
2801 .pImageInfo = dstMipInfos.data()
2802 };
2803
2804 writes[2] = {
2805 .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
2806 .dstSet = hiZSPDDescriptorSets[frameIndex],
2807 .dstBinding = 2,
2808 .dstArrayElement = 0,
2809 .descriptorCount = 1,
2810 .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
2811 .pImageInfo = &dst5Info
2812 };
2813
2814 writes[3] = {
2815 .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
2816 .dstSet = hiZSPDDescriptorSets[frameIndex],
2817 .dstBinding = 3,
2818 .dstArrayElement = 0,
2819 .descriptorCount = 1,
2820 .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
2821 .pBufferInfo = &atomicBufferInfo
2822 };
2823
2824 vkUpdateDescriptorSets(context->getVkDevice(), static_cast<uint32_t>(writes.size()), writes.data(), 0, nullptr);
2825 }
2826
2828 {
2829 if (!hiZSPDComputePass.has_value() || !useSPDHiZ_)
2830 return;
2831
2832 RenderProcess* currentRenderProcess = getCurrentRenderProcess();
2833 VkCommandBuffer commandBuffer = currentRenderProcess->getGraphicsCommandBuffer();
2834
2835 TRACY_VK_ZONE_C(getCurrentTracyVkContext(), commandBuffer, "Hi-Z Generation SPD", Colors::Orange)
2836
2837 const uint32_t mipLevels = currentRenderProcess->getHiZMipLevels();
2838 const VkExtent2D hiZExtent = currentRenderProcess->getHiZExtent();
2839 VkImage hiZImage = currentRenderProcess->getHiZPyramidImage();
2840
2841 // Execution barrier: ensure render pass depth writes complete before compute reads
2842 {
2843 VkMemoryBarrier2 depthBarrier{
2844 .sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER_2,
2845 .srcStageMask = VK_PIPELINE_STAGE_2_LATE_FRAGMENT_TESTS_BIT,
2846 .srcAccessMask = VK_ACCESS_2_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT,
2847 .dstStageMask = VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT,
2848 .dstAccessMask = VK_ACCESS_2_SHADER_READ_BIT
2849 };
2850 VkDependencyInfo depInfo{VK_STRUCTURE_TYPE_DEPENDENCY_INFO};
2851 depInfo.memoryBarrierCount = 1;
2852 depInfo.pMemoryBarriers = &depthBarrier;
2853 vkCmdPipelineBarrier2(commandBuffer, &depInfo);
2854 }
2855
2856 // Transition Hi-Z pyramid to GENERAL for compute writes
2857 {
2858 VkImageMemoryBarrier2 hiZBarrier{
2859 .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER_2,
2860 .srcStageMask = VK_PIPELINE_STAGE_2_NONE,
2861 .srcAccessMask = VK_ACCESS_2_NONE,
2862 .dstStageMask = VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT,
2863 .dstAccessMask = VK_ACCESS_2_SHADER_STORAGE_WRITE_BIT,
2864 .oldLayout = VK_IMAGE_LAYOUT_UNDEFINED,
2865 .newLayout = VK_IMAGE_LAYOUT_GENERAL,
2866 .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
2867 .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
2868 .image = hiZImage,
2869 .subresourceRange = {VK_IMAGE_ASPECT_COLOR_BIT, 0, mipLevels, 0, 2}
2870 };
2871 VkDependencyInfo depInfo{VK_STRUCTURE_TYPE_DEPENDENCY_INFO};
2872 depInfo.imageMemoryBarrierCount = 1;
2873 depInfo.pImageMemoryBarriers = &hiZBarrier;
2874 vkCmdPipelineBarrier2(commandBuffer, &depInfo);
2875 }
2876
2877 // Bind SPD pipeline and descriptor set
2878 hiZSPDComputePass->getComputePipeline()->bind(commandBuffer);
2879 vkCmdBindDescriptorSets(
2880 commandBuffer,
2881 VK_PIPELINE_BIND_POINT_COMPUTE,
2882 hiZSPDComputePass->getPipelineLayout(),
2883 0, 1,
2885 0, nullptr
2886 );
2887
2888 // Calculate dispatch parameters
2889 uint32_t dispatchX = (hiZExtent.width + 63) / 64;
2890 uint32_t dispatchY = (hiZExtent.height + 63) / 64;
2891 uint32_t numWorkGroups = dispatchX * dispatchY;
2892
2893 // Push constants
2894 struct SPDPushConstants {
2895 uint32_t mips;
2896 uint32_t numWorkGroups;
2897 int32_t workGroupOffsetX;
2898 int32_t workGroupOffsetY;
2899 uint32_t sampleCount;
2900 } pushConstants;
2901
2902 pushConstants.mips = mipLevels;
2903 pushConstants.numWorkGroups = numWorkGroups;
2904 pushConstants.workGroupOffsetX = 0;
2905 pushConstants.workGroupOffsetY = 0;
2906 pushConstants.sampleCount = context->getMultisampleCount();
2907
2908 vkCmdPushConstants(
2909 commandBuffer,
2910 hiZSPDComputePass->getPipelineLayout(),
2911 VK_SHADER_STAGE_COMPUTE_BIT,
2912 0,
2913 sizeof(SPDPushConstants),
2915 );
2916
2917 // Single dispatch for ALL mip levels!
2918 vkCmdDispatch(commandBuffer, dispatchX, dispatchY, 2); // 2 slices for stereo
2919
2920 // Transition Hi-Z pyramid to SHADER_READ_ONLY for culling
2921 {
2922 VkImageMemoryBarrier2 finalBarrier{
2923 .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER_2,
2924 .srcStageMask = VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT,
2925 .srcAccessMask = VK_ACCESS_2_SHADER_STORAGE_WRITE_BIT,
2926 .dstStageMask = VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT,
2927 .dstAccessMask = VK_ACCESS_2_SHADER_READ_BIT,
2928 .oldLayout = VK_IMAGE_LAYOUT_GENERAL,
2929 .newLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL,
2930 .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
2931 .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
2932 .image = hiZImage,
2933 .subresourceRange = {VK_IMAGE_ASPECT_COLOR_BIT, 0, mipLevels, 0, 2}
2934 };
2935 VkDependencyInfo depInfo{VK_STRUCTURE_TYPE_DEPENDENCY_INFO};
2936 depInfo.imageMemoryBarrierCount = 1;
2937 depInfo.pImageMemoryBarriers = &finalBarrier;
2938 vkCmdPipelineBarrier2(commandBuffer, &depInfo);
2939 }
2940 }
2941
2943 {
2944 // Skip Hi-Z generation when culling is frozen - keep using the frozen pyramid
2945 if (freezeCulling_)
2946 {
2947 return;
2948 }
2949
2950 // Use SPD (Single Pass Downsampler) by default for much faster Hi-Z generation
2951 // Falls back to multi-pass if SPD is not available
2952 if (useSPDHiZ_ && hiZSPDComputePass.has_value())
2953 {
2955 return;
2956 }
2957
2958 // Legacy multi-pass Hi-Z generation:
2959 // This function records the Hi-Z pyramid generation after the render pass.
2960 // It needs to:
2961 // 1. Transition depth buffer from SHADER_READ_ONLY to be readable
2962 // 2. For each mip level:
2963 // a. Dispatch compute shader to generate mip
2964 // b. Barrier before next mip
2965 // 3. Transition Hi-Z pyramid to SHADER_READ_ONLY for culling
2966
2967 if (!hiZGenerationComputePass.has_value())
2968 {
2969 PLOGW << "Hi-Z generation compute pass not created, skipping Hi-Z generation";
2970 return;
2971 }
2972
2973 RenderProcess* currentRenderProcess = getCurrentRenderProcess();
2974 VkCommandBuffer commandBuffer = currentRenderProcess->getGraphicsCommandBuffer();
2975 TRACY_VK_ZONE_C( getCurrentTracyVkContext(), commandBuffer, "Hi-Z Generation (Legacy)", Colors::Orange )
2976
2977 // Get Hi-Z pyramid info from current frame's RenderProcess (per-frame pyramid)
2978 const uint32_t mipLevels = currentRenderProcess->getHiZMipLevels();
2979 const VkExtent2D hiZExtent = currentRenderProcess->getHiZExtent();
2980 VkImage hiZImage = currentRenderProcess->getHiZPyramidImage();
2981
2982 // NOTE: The depth buffer is already in SHADER_READ_ONLY_OPTIMAL layout.
2983 // recordRenderPass() transitions it after vkCmdEndRendering() completes.
2984 // Hi-Z generation reads the depth buffer via combined image sampler.
2985
2986 // Transition Hi-Z pyramid to GENERAL for compute writes
2987 // Each frame has its own Hi-Z pyramid, so we can safely discard previous contents
2988 {
2989 VkImageMemoryBarrier2 hiZBarrier{
2990 .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER_2,
2991 .srcStageMask = VK_PIPELINE_STAGE_2_NONE,
2992 .srcAccessMask = VK_ACCESS_2_NONE,
2993 .dstStageMask = VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT,
2994 .dstAccessMask = VK_ACCESS_2_SHADER_STORAGE_WRITE_BIT,
2995 .oldLayout = VK_IMAGE_LAYOUT_UNDEFINED,
2996 .newLayout = VK_IMAGE_LAYOUT_GENERAL,
2997 .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
2998 .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
2999 .image = hiZImage,
3000 .subresourceRange = { VK_IMAGE_ASPECT_COLOR_BIT, 0, mipLevels, 0, 2 }
3001 };
3002
3003 VkDependencyInfo depInfo{ VK_STRUCTURE_TYPE_DEPENDENCY_INFO };
3004 depInfo.imageMemoryBarrierCount = 1;
3005 depInfo.pImageMemoryBarriers = &hiZBarrier;
3006 vkCmdPipelineBarrier2( commandBuffer, &depInfo );
3007 }
3008
3009 // Bind Hi-Z generation pipeline
3010 hiZGenerationComputePass->getComputePipeline()->bind( commandBuffer );
3011
3012 // Generate each mip level
3013 // Track source dimensions (what we read from) and destination dimensions (what we write to)
3014 // Mip 0: src = MSAA depth (full res), dst = mip 0 (full res)
3015 // Mip N: src = mip N-1, dst = mip N (half of src)
3016 uint32_t srcWidth = hiZExtent.width;
3017 uint32_t srcHeight = hiZExtent.height;
3018
3019 for (uint32_t mip = 0; mip < mipLevels; mip++)
3020 {
3021 // Calculate destination dimensions
3022 // Mip 0: same as source (1:1 copy from MSAA depth)
3023 // Mip N: half of source (2:1 reduction)
3024 uint32_t dstWidth = (mip == 0) ? srcWidth : std::max(1u, srcWidth >> 1);
3025 uint32_t dstHeight = (mip == 0) ? srcHeight : std::max(1u, srcHeight >> 1);
3026
3027 // Push constants: mipLevel, srcWidth, srcHeight, sampleCount
3028 struct HiZPushConstants {
3029 uint32_t mipLevel;
3030 uint32_t srcWidth;
3031 uint32_t srcHeight;
3032 uint32_t sampleCount;
3033 } pushConstants;
3034
3035 pushConstants.mipLevel = mip;
3036 pushConstants.srcWidth = srcWidth;
3037 pushConstants.srcHeight = srcHeight;
3038 pushConstants.sampleCount = (mip == 0) ? context->getMultisampleCount() : 1;
3039
3040 vkCmdPushConstants(
3041 commandBuffer,
3042 hiZGenerationComputePass->getPipelineLayout(),
3043 VK_SHADER_STAGE_COMPUTE_BIT,
3044 0,
3045 sizeof(HiZPushConstants),
3047 );
3048
3049 // Bind descriptor set for this mip level
3050 vkCmdBindDescriptorSets(
3051 commandBuffer,
3052 VK_PIPELINE_BIND_POINT_COMPUTE,
3053 hiZGenerationComputePass->getPipelineLayout(),
3054 0, 1,
3056 0, nullptr
3057 );
3058
3059 // Dispatch compute shader to cover destination mip
3060 uint32_t groupsX = (dstWidth + 7) / 8;
3061 uint32_t groupsY = (dstHeight + 7) / 8;
3062 uint32_t groupsZ = 2; // 2 layers for stereo
3063
3064 vkCmdDispatch( commandBuffer, groupsX, groupsY, groupsZ );
3065
3066 // Barrier between mip levels (read-after-write)
3067 if (mip < mipLevels - 1)
3068 {
3069 VkImageMemoryBarrier2 mipBarrier{
3070 .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER_2,
3071 .srcStageMask = VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT,
3072 .srcAccessMask = VK_ACCESS_2_SHADER_STORAGE_WRITE_BIT,
3073 .dstStageMask = VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT,
3074 .dstAccessMask = VK_ACCESS_2_SHADER_SAMPLED_READ_BIT,
3075 .oldLayout = VK_IMAGE_LAYOUT_GENERAL,
3076 .newLayout = VK_IMAGE_LAYOUT_GENERAL,
3077 .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
3078 .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
3079 .image = hiZImage,
3080 .subresourceRange = { VK_IMAGE_ASPECT_COLOR_BIT, mip, 1, 0, 2 }
3081 };
3082
3083 VkDependencyInfo depInfo{ VK_STRUCTURE_TYPE_DEPENDENCY_INFO };
3084 depInfo.imageMemoryBarrierCount = 1;
3085 depInfo.pImageMemoryBarriers = &mipBarrier;
3086 vkCmdPipelineBarrier2( commandBuffer, &depInfo );
3087 }
3088
3089 // Source for next iteration is this iteration's destination
3090 srcWidth = dstWidth;
3091 srcHeight = dstHeight;
3092 }
3093
3094 // Final barrier: transition Hi-Z pyramid to SHADER_READ_ONLY for culling in next frame
3095 {
3096 VkImageMemoryBarrier2 finalBarrier{
3097 .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER_2,
3098 .srcStageMask = VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT,
3099 .srcAccessMask = VK_ACCESS_2_SHADER_STORAGE_WRITE_BIT,
3100 .dstStageMask = VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT,
3101 .dstAccessMask = VK_ACCESS_2_SHADER_SAMPLED_READ_BIT,
3102 .oldLayout = VK_IMAGE_LAYOUT_GENERAL,
3103 .newLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL,
3104 .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
3105 .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
3106 .image = hiZImage,
3107 .subresourceRange = { VK_IMAGE_ASPECT_COLOR_BIT, 0, mipLevels, 0, 2 }
3108 };
3109
3110 VkDependencyInfo depInfo{ VK_STRUCTURE_TYPE_DEPENDENCY_INFO };
3111 depInfo.imageMemoryBarrierCount = 1;
3112 depInfo.pImageMemoryBarriers = &finalBarrier;
3113 vkCmdPipelineBarrier2( commandBuffer, &depInfo );
3114 }
3115
3116 }
3117
3118 //==============================================================================
3119 // Vertex Shader Path for Single-Meshlet Geometry
3120 //==============================================================================
3121
3123 {
3124 TRACY_ZONE_SCOPED_NAMED("Create Vertex Shader Path Resources");
3125
3126 PLOGI << "[VS_PATH] Creating vertex shader path pipeline resources";
3127
3128 // Vertex input binding description - matches Vertex struct (64 bytes)
3129 // vec4 position (16) + vec4 normal (16) + vec4 tangent (16) + vec2 texCoord (8) + vec2 lightmapUV (8)
3130 VkVertexInputBindingDescription vertexBindingDesc{
3131 .binding = 0,
3132 .stride = 64, // sizeof(Vertex)
3133 .inputRate = VK_VERTEX_INPUT_RATE_VERTEX
3134 };
3135
3136 // Vertex input attribute descriptions
3137 std::array<VkVertexInputAttributeDescription, 5> vertexAttrDescs = {{
3138 {.location = 0, .binding = 0, .format = VK_FORMAT_R32G32B32A32_SFLOAT, .offset = 0}, // position (vec4)
3139 {.location = 1, .binding = 0, .format = VK_FORMAT_R32G32B32A32_SFLOAT, .offset = 16}, // normal (vec4)
3140 {.location = 2, .binding = 0, .format = VK_FORMAT_R32G32B32A32_SFLOAT, .offset = 32}, // tangent (vec4)
3141 {.location = 3, .binding = 0, .format = VK_FORMAT_R32G32_SFLOAT, .offset = 48}, // texCoord (vec2)
3142 {.location = 4, .binding = 0, .format = VK_FORMAT_R32G32_SFLOAT, .offset = 56} // lightmapUV (vec2)
3143 }};
3144
3145 VkPipelineVertexInputStateCreateInfo vertexInputState{
3146 .sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO,
3147 .vertexBindingDescriptionCount = 1,
3148 .pVertexBindingDescriptions = &vertexBindingDesc,
3149 .vertexAttributeDescriptionCount = static_cast<uint32_t>(vertexAttrDescs.size()),
3150 .pVertexAttributeDescriptions = vertexAttrDescs.data()
3151 };
3152
3153 VkPipelineInputAssemblyStateCreateInfo inputAssemblyState{
3154 .sType = VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO,
3155 .topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST,
3156 .primitiveRestartEnable = VK_FALSE
3157 };
3158
3159 // Load vertex and fragment shaders
3160 // Use Placeholder.vert which reads from vs_instance_ids[] for the VS instanced drawing pipeline
3161 const auto shaderDir = Path::engineShaders() / "Output";
3162 VertexShader shader(context->getVkDevice(), shaderDir / "Placeholder.vert.spv", shaderDir / "triangle_movable_normals.frag.spv");
3163
3164 std::array<VkPipelineShaderStageCreateInfo, 2> shaderStages = {{
3165 {
3166 .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
3167 .stage = VK_SHADER_STAGE_VERTEX_BIT,
3168 .module = shader.getVertexShader(),
3169 .pName = "main"
3170 },
3171 {
3172 .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
3173 .stage = VK_SHADER_STAGE_FRAGMENT_BIT,
3174 .module = shader.getFragmentShader(),
3175 .pName = "main"
3176 }
3177 }};
3178
3179 VkPipelineViewportStateCreateInfo viewportState{
3180 .sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO,
3181 .viewportCount = 1,
3182 .scissorCount = 1
3183 };
3184
3185 VkPipelineRasterizationStateCreateInfo rasterizationState{
3186 .sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO,
3187 .polygonMode = VK_POLYGON_MODE_FILL,
3188 .cullMode = VK_CULL_MODE_NONE, // Match mesh shader pipelines
3189 .frontFace = VK_FRONT_FACE_CLOCKWISE, // Match mesh shader pipelines
3190 .lineWidth = 1.0f
3191 };
3192
3193 VkPipelineMultisampleStateCreateInfo multisampleState{
3194 .sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO,
3195 .rasterizationSamples = context->getMultisampleCount()
3196 };
3197
3198 VkPipelineDepthStencilStateCreateInfo depthStencilState{
3199 .sType = VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO,
3200 .depthTestEnable = VK_TRUE,
3201 .depthWriteEnable = VK_TRUE,
3202 .depthCompareOp = VK_COMPARE_OP_GREATER // Reverse-Z
3203 };
3204
3205 VkPipelineColorBlendAttachmentState colorBlendAttachment{
3206 .blendEnable = VK_FALSE,
3207 .colorWriteMask = VK_COLOR_COMPONENT_R_BIT | VK_COLOR_COMPONENT_G_BIT |
3208 VK_COLOR_COMPONENT_B_BIT | VK_COLOR_COMPONENT_A_BIT
3209 };
3210
3211 VkPipelineColorBlendStateCreateInfo colorBlendState{
3212 .sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO,
3213 .attachmentCount = 1,
3214 .pAttachments = &colorBlendAttachment
3215 };
3216
3217 std::array<VkDynamicState, 2> dynamicStates = {VK_DYNAMIC_STATE_VIEWPORT, VK_DYNAMIC_STATE_SCISSOR};
3218 VkPipelineDynamicStateCreateInfo dynamicState{
3219 .sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO,
3220 .dynamicStateCount = static_cast<uint32_t>(dynamicStates.size()),
3221 .pDynamicStates = dynamicStates.data()
3222 };
3223
3224 // Use dynamic rendering (same as recordRenderPass which now uses vkCmdBeginRendering)
3225 constexpr VkFormat colorFormat = VK_FORMAT_R8G8B8A8_SRGB;
3226 constexpr VkFormat depthFormat = VK_FORMAT_D32_SFLOAT;
3227 VkPipelineRenderingCreateInfo pipelineRenderingInfo{
3228 .sType = VK_STRUCTURE_TYPE_PIPELINE_RENDERING_CREATE_INFO,
3229 .viewMask = 0b11, // Stereo: render to both layers
3230 .colorAttachmentCount = 1,
3231 .pColorAttachmentFormats = &colorFormat,
3232 .depthAttachmentFormat = depthFormat,
3233 };
3234
3235 VkGraphicsPipelineCreateInfo pipelineCreateInfo{
3236 .sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO,
3237 .pNext = &pipelineRenderingInfo, // Dynamic rendering
3238 .stageCount = static_cast<uint32_t>(shaderStages.size()),
3239 .pStages = shaderStages.data(),
3240 .pVertexInputState = &vertexInputState,
3241 .pInputAssemblyState = &inputAssemblyState,
3242 .pViewportState = &viewportState,
3243 .pRasterizationState = &rasterizationState,
3244 .pMultisampleState = &multisampleState,
3245 .pDepthStencilState = &depthStencilState,
3246 .pColorBlendState = &colorBlendState,
3247 .pDynamicState = &dynamicState,
3248 .layout = graphicsPipelineLayout, // Same layout as mesh shader pipeline
3249 .renderPass = VK_NULL_HANDLE // Use dynamic rendering
3250 };
3251
3252 VkResult result = vkCreateGraphicsPipelines(
3253 context->getVkDevice(),
3254 VK_NULL_HANDLE,
3255 1,
3256 &pipelineCreateInfo,
3257 nullptr,
3259 );
3260
3261 if (result != VK_SUCCESS) {
3262 PLOGE << "[VS_PATH] Failed to create vertex shader pipeline: " << result;
3263 useVertexShaderPath_ = false;
3264 } else {
3265 VulkanHelper::setObjectName(context->getVkDevice(), vsGraphicsPipeline_, "VS Single-Meshlet Pipeline");
3266 PLOGI << "[VS_PATH] Vertex shader pipeline created successfully";
3267 }
3268
3269 shader.destroyShaders(context->getVkDevice());
3270
3271 // Create depth-only VS pipeline for Z-prepass
3272 // Uses the same vertex shader but with depth-only fragment shader and no color attachments
3273 {
3274 const auto shaderDir = Path::engineShaders() / "Output";
3275 VertexShader depthOnlyShader(context->getVkDevice(), shaderDir / "Placeholder.vert.spv", shaderDir / "depth_only.frag.spv");
3276
3277 std::array<VkPipelineShaderStageCreateInfo, 2> depthOnlyShaderStages = {{
3278 {
3279 .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
3280 .stage = VK_SHADER_STAGE_VERTEX_BIT,
3281 .module = depthOnlyShader.getVertexShader(),
3282 .pName = "main"
3283 },
3284 {
3285 .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
3286 .stage = VK_SHADER_STAGE_FRAGMENT_BIT,
3287 .module = depthOnlyShader.getFragmentShader(),
3288 .pName = "main"
3289 }
3290 }};
3291
3292 // Depth-only: no color attachments
3293 VkPipelineColorBlendStateCreateInfo depthOnlyColorBlendState{
3294 .sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO,
3295 .attachmentCount = 0,
3296 .pAttachments = nullptr
3297 };
3298
3299 VkPipelineRenderingCreateInfo depthOnlyRenderingInfo{
3300 .sType = VK_STRUCTURE_TYPE_PIPELINE_RENDERING_CREATE_INFO,
3301 .viewMask = 0b11, // Stereo: render to both layers
3302 .colorAttachmentCount = 0,
3303 .pColorAttachmentFormats = nullptr,
3304 .depthAttachmentFormat = depthFormat,
3305 };
3306
3307 VkGraphicsPipelineCreateInfo depthOnlyPipelineCreateInfo{
3308 .sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO,
3309 .pNext = &depthOnlyRenderingInfo,
3310 .stageCount = static_cast<uint32_t>(depthOnlyShaderStages.size()),
3311 .pStages = depthOnlyShaderStages.data(),
3312 .pVertexInputState = &vertexInputState,
3313 .pInputAssemblyState = &inputAssemblyState,
3314 .pViewportState = &viewportState,
3315 .pRasterizationState = &rasterizationState,
3316 .pMultisampleState = &multisampleState,
3317 .pDepthStencilState = &depthStencilState,
3318 .pColorBlendState = &depthOnlyColorBlendState,
3319 .pDynamicState = &dynamicState,
3320 .layout = graphicsPipelineLayout,
3321 .renderPass = VK_NULL_HANDLE
3322 };
3323
3324 VkResult depthOnlyResult = vkCreateGraphicsPipelines(
3325 context->getVkDevice(),
3326 VK_NULL_HANDLE,
3327 1,
3328 &depthOnlyPipelineCreateInfo,
3329 nullptr,
3331 );
3332
3333 if (depthOnlyResult != VK_SUCCESS) {
3334 PLOGE << "[VS_PATH] Failed to create depth-only VS pipeline: " << depthOnlyResult;
3335 } else {
3336 VulkanHelper::setObjectName(context->getVkDevice(), vsDepthOnlyPipeline_, "VS Depth-Only Pipeline");
3337 PLOGI << "[VS_PATH] Depth-only VS pipeline created successfully";
3338 }
3339
3340 depthOnlyShader.destroyShaders(context->getVkDevice());
3341 }
3342 }
3343
3345 {
3346 TRACY_ZONE_SCOPED_NAMED("Create VS Instanced Drawing Resources");
3347
3348 // Skip if VS path is disabled
3350 PLOGI << "[VS_INSTANCED] VS instanced drawing disabled, skipping compute pipeline creation";
3351 return;
3352 }
3353
3354 PLOGI << "[VS_INSTANCED] Creating VS instanced drawing compute pipelines";
3355
3356 // Stage 1: VSBinningAllocator - Count instances per geometry type
3357 {
3358 std::vector<VkDescriptorSetLayoutBinding> computeLayoutBindings =
3360 #if !defined(HEADLESS)
3367 #endif
3368 .build();
3369
3370 VkDescriptorSetLayoutCreateInfo computeLayoutCreateInfo = {
3371 .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
3372 .bindingCount = static_cast<uint32_t>( computeLayoutBindings.size() ),
3373 .pBindings = computeLayoutBindings.data()
3374 };
3375
3376 VkPipelineLayoutCreateInfo pipelineLayoutCreateInfo = {
3377 .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
3378 .pNext = nullptr,
3379 .flags = 0,
3380 .setLayoutCount = 1u,
3381 .pSetLayouts = nullptr,
3382 .pushConstantRangeCount = 0,
3383 .pPushConstantRanges = nullptr,
3384 };
3385
3386 auto* specialization = new ComputePipelineSpecializationData(32);
3387 // Note: MAX_GEOMETRY_TYPES (constant_id=2) uses default value of 256 in shader
3388 const auto shaderPath = Path::engineShaders() / COMPUTE_SHADER_NAME("Output/VSBinningAllocator.comp.spv");
3389 vsBinningAllocatorComputePass_.emplace( ComputePass( "VS Binning Allocator" ) );
3391 context->getVkDevice(),
3392 &pipelineLayoutCreateInfo,
3393 &computeLayoutCreateInfo,
3394 shaderPath.string(),
3395 specialization
3396 );
3397 delete specialization;
3398 PLOGI << "[VS_INSTANCED] Created VS Binning Allocator compute pass";
3399 }
3400
3401 // Stage 2: VSInstanceUnpacking - Write instance IDs to per-geometry bins
3402 {
3403 std::vector<VkDescriptorSetLayoutBinding> computeLayoutBindings =
3405 #if !defined(HEADLESS)
3409 #endif
3410 .build();
3411
3412 VkDescriptorSetLayoutCreateInfo computeLayoutCreateInfo = {
3413 .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
3414 .bindingCount = static_cast<uint32_t>( computeLayoutBindings.size() ),
3415 .pBindings = computeLayoutBindings.data()
3416 };
3417
3418 VkPipelineLayoutCreateInfo pipelineLayoutCreateInfo = {
3419 .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
3420 .pNext = nullptr,
3421 .flags = 0,
3422 .setLayoutCount = 1u,
3423 .pSetLayouts = nullptr,
3424 .pushConstantRangeCount = 0,
3425 .pPushConstantRanges = nullptr,
3426 };
3427
3428 auto* specialization = new ComputePipelineSpecializationData(32);
3429 // Note: MAX_VS_INSTANCES_PER_GEO (constant_id=2) uses default value of 16384 in shader
3430 const auto shaderPath = Path::engineShaders() / COMPUTE_SHADER_NAME("Output/VSInstanceUnpacking.comp.spv");
3431 vsInstanceUnpackingComputePass_.emplace( ComputePass( "VS Instance Unpacking" ) );
3433 context->getVkDevice(),
3434 &pipelineLayoutCreateInfo,
3435 &computeLayoutCreateInfo,
3436 shaderPath.string(),
3437 specialization
3438 );
3439 delete specialization;
3440 PLOGI << "[VS_INSTANCED] Created VS Instance Unpacking compute pass";
3441 }
3442
3443 // Stage 3: VSPrepareDraw - Generate one draw command per geometry type
3444 {
3445 std::vector<VkDescriptorSetLayoutBinding> computeLayoutBindings =
3447 #if !defined(HEADLESS)
3452 #endif
3453 .build();
3454
3455 VkDescriptorSetLayoutCreateInfo computeLayoutCreateInfo = {
3456 .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
3457 .bindingCount = static_cast<uint32_t>( computeLayoutBindings.size() ),
3458 .pBindings = computeLayoutBindings.data()
3459 };
3460
3461 // Push constants: uniqueGeometryCount (uint32_t)
3462 VkPushConstantRange pushConstantRange{
3463 .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
3464 .offset = 0,
3465 .size = sizeof( uint32_t )
3466 };
3467
3468 VkPipelineLayoutCreateInfo pipelineLayoutCreateInfo = {
3469 .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
3470 .pNext = nullptr,
3471 .flags = 0,
3472 .setLayoutCount = 1u,
3473 .pSetLayouts = nullptr,
3474 .pushConstantRangeCount = 1,
3475 .pPushConstantRanges = &pushConstantRange,
3476 };
3477
3478 auto* specialization = new ComputePipelineSpecializationData(32);
3479 // Note: MAX_VS_INSTANCES_PER_GEO (constant_id=2) uses default value of 16384 in shader
3480 const auto shaderPath = Path::engineShaders() / COMPUTE_SHADER_NAME("Output/VSPrepareDraw.comp.spv");
3481 vsPrepareDrawComputePass_.emplace( ComputePass( "VS Prepare Draw" ) );
3483 context->getVkDevice(),
3484 &pipelineLayoutCreateInfo,
3485 &computeLayoutCreateInfo,
3486 shaderPath.string(),
3487 specialization
3488 );
3489 delete specialization;
3490 PLOGI << "[VS_INSTANCED] Created VS Prepare Draw compute pass";
3491 }
3492
3493 PLOGI << "[VS_INSTANCED] All VS instanced drawing compute passes created successfully";
3494 }
3495
3497 {
3499
3500 // Skip if VS instanced drawing is disabled
3502 return;
3503 }
3504
3505 // Skip if no single-meshlet geometry exists
3506 if (!renderingDataManager->hasVertexShaderPath()) {
3507 return;
3508 }
3509
3510 RenderProcess* currentRenderProcess = getCurrentRenderProcess();
3511 VkCommandBuffer commandBuffer = currentRenderProcess->getGraphicsCommandBuffer();
3512
3513 TRACY_VK_ZONE_C( getCurrentTracyVkContext(), commandBuffer, "VS Instanced Drawing Pipeline", Colors::Cyan );
3514
3515 const uint32_t primitiveCount = renderingDataManager->getPrimitiveCount();
3516 // Use single-meshlet geometry count, NOT total unique geometry count
3517 // The VS path only handles single-meshlet geometries
3518 const uint32_t uniqueGeoCount = renderingDataManager->getSingleMeshletGeometryCount();
3519
3520 if (primitiveCount == 0 || uniqueGeoCount == 0) {
3521 return;
3522 }
3523
3524 // Reset VS instanced drawing buffers (NOT vs_visible_count - that's written by PrimitiveCulling)
3525 {
3526 TRACY_VK_ZONE_C( getCurrentTracyVkContext(), commandBuffer, "Reset VS Instanced Buffers", Colors::Cyan );
3527 // Note: vs_visible_count is populated by PrimitiveCulling.comp - do NOT reset it here!
3528 vkCmdFillBuffer( commandBuffer, currentRenderProcess->getVSGeometryCountersBuffer().getBuffer(), 0, VK_WHOLE_SIZE, 0 );
3529 vkCmdFillBuffer( commandBuffer, currentRenderProcess->getVSDrawCountBuffer().getBuffer(), 0, VK_WHOLE_SIZE, 0 );
3530
3531 VkMemoryBarrier2 fillBarrier{
3532 .sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER_2,
3533 .srcStageMask = VK_PIPELINE_STAGE_2_TRANSFER_BIT,
3534 .srcAccessMask = VK_ACCESS_2_TRANSFER_WRITE_BIT,
3535 .dstStageMask = VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT,
3536 .dstAccessMask = VK_ACCESS_2_SHADER_READ_BIT | VK_ACCESS_2_SHADER_WRITE_BIT
3537 };
3538 VkDependencyInfo fillDep{ VK_STRUCTURE_TYPE_DEPENDENCY_INFO };
3539 fillDep.memoryBarrierCount = 1;
3540 fillDep.pMemoryBarriers = &fillBarrier;
3541 vkCmdPipelineBarrier2( commandBuffer, &fillDep );
3542 }
3543
3544 // Stage 1: VSBinningAllocator
3545 {
3546 TRACY_VK_ZONE_C( getCurrentTracyVkContext(), commandBuffer, "VS Binning Allocator", Colors::Cyan );
3547 const uint32_t threadCount = getVSBinningAllocatorComputePass().getThreadCount();
3548
3550 getVSBinningAllocatorComputePass().bindDescriptorSets( commandBuffer, currentRenderProcess->getEyeIndex() );
3551
3552 // Dispatch based on max visible count (primitiveCount upper bound)
3553 vkCmdDispatch( commandBuffer, ( primitiveCount + threadCount - 1 ) / threadCount, 1, 1 );
3554 }
3555
3556 // Barrier: VSBinningAllocator -> VSInstanceUnpacking
3557 {
3558 VkMemoryBarrier2 barrier{
3559 .sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER_2,
3560 .srcStageMask = VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT,
3561 .srcAccessMask = VK_ACCESS_2_SHADER_WRITE_BIT,
3562 .dstStageMask = VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT,
3563 .dstAccessMask = VK_ACCESS_2_SHADER_READ_BIT
3564 };
3565 VkDependencyInfo depInfo{ VK_STRUCTURE_TYPE_DEPENDENCY_INFO };
3566 depInfo.memoryBarrierCount = 1;
3567 depInfo.pMemoryBarriers = &barrier;
3568 vkCmdPipelineBarrier2( commandBuffer, &depInfo );
3569 }
3570
3571 // Stage 2: VSInstanceUnpacking
3572 {
3573 TRACY_VK_ZONE_C( getCurrentTracyVkContext(), commandBuffer, "VS Instance Unpacking", Colors::Cyan );
3574 const uint32_t threadCount = getVSInstanceUnpackingComputePass().getThreadCount();
3575
3577 getVSInstanceUnpackingComputePass().bindDescriptorSets( commandBuffer, currentRenderProcess->getEyeIndex() );
3578
3579 vkCmdDispatch( commandBuffer, ( primitiveCount + threadCount - 1 ) / threadCount, 1, 1 );
3580 }
3581
3582 // Barrier: VSInstanceUnpacking -> VSPrepareDraw + Vertex Shader read
3583 {
3584 VkMemoryBarrier2 barrier{
3585 .sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER_2,
3586 .srcStageMask = VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT,
3587 .srcAccessMask = VK_ACCESS_2_SHADER_WRITE_BIT,
3588 .dstStageMask = VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT | VK_PIPELINE_STAGE_2_VERTEX_SHADER_BIT,
3589 .dstAccessMask = VK_ACCESS_2_SHADER_READ_BIT
3590 };
3591 VkDependencyInfo depInfo{ VK_STRUCTURE_TYPE_DEPENDENCY_INFO };
3592 depInfo.memoryBarrierCount = 1;
3593 depInfo.pMemoryBarriers = &barrier;
3594 vkCmdPipelineBarrier2( commandBuffer, &depInfo );
3595 }
3596
3597 // Stage 3: VSPrepareDraw
3598 {
3599 TRACY_VK_ZONE_C( getCurrentTracyVkContext(), commandBuffer, "VS Prepare Draw", Colors::Cyan );
3600 const uint32_t threadCount = getVSPrepareDrawComputePass().getThreadCount();
3601
3603 getVSPrepareDrawComputePass().bindDescriptorSets( commandBuffer, currentRenderProcess->getEyeIndex() );
3604
3605 // Push constant: uniqueGeometryCount
3606 VkPushConstantsInfo pushConstantsInfo = getVSPrepareDrawComputePass().createPushConstantsInfo( sizeof( uint32_t ), &uniqueGeoCount );
3607 vkCmdPushConstants2( commandBuffer, &pushConstantsInfo );
3608
3609 vkCmdDispatch( commandBuffer, ( uniqueGeoCount + threadCount - 1 ) / threadCount, 1, 1 );
3610 }
3611
3612 // Barrier: VSPrepareDraw -> Indirect Draw
3613 {
3614 VkMemoryBarrier2 barrier{
3615 .sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER_2,
3616 .srcStageMask = VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT,
3617 .srcAccessMask = VK_ACCESS_2_SHADER_WRITE_BIT,
3618 .dstStageMask = VK_PIPELINE_STAGE_2_DRAW_INDIRECT_BIT,
3619 .dstAccessMask = VK_ACCESS_2_INDIRECT_COMMAND_READ_BIT
3620 };
3621 VkDependencyInfo depInfo{ VK_STRUCTURE_TYPE_DEPENDENCY_INFO };
3622 depInfo.memoryBarrierCount = 1;
3623 depInfo.pMemoryBarriers = &barrier;
3624 vkCmdPipelineBarrier2( commandBuffer, &depInfo );
3625 }
3626
3627 PLOGI << "[VS_INSTANCED] Dispatched VS instanced drawing pipeline for " << uniqueGeoCount << " geometry types";
3628 }
3629
3630 void Renderer::recordVertexShaderDraws(size_t swapChainImageIndex) const
3631 {
3632 PLOGI << "[VS_DIAG] recordVertexShaderDraws: hasVSPath="
3633 << renderingDataManager->hasVertexShaderPath()
3634 << ", count=" << renderingDataManager->getSingleMeshletGeometryCount();
3635
3636 // Skip if VS path is disabled or pipeline wasn't created
3637 if (!useVertexShaderPath_ || vsGraphicsPipeline_ == VK_NULL_HANDLE) {
3638 PLOGI << "[VS_DIAG] EARLY EXIT: useVertexShaderPath_=" << useVertexShaderPath_
3639 << ", vsGraphicsPipeline_=" << (vsGraphicsPipeline_ != VK_NULL_HANDLE ? "valid" : "NULL");
3640 return;
3641 }
3642
3643 // Skip if no single-meshlet geometry exists
3644 if (!renderingDataManager->hasVertexShaderPath()) {
3645 PLOGI << "[VS_DIAG] EARLY EXIT: no single-meshlet geometry";
3646 return;
3647 }
3648
3649 // Note: Even with no textures loaded, we can still render using shaders that
3650 // don't require texture sampling (e.g., normals visualization shader)
3651
3652 PLOGI << "[VS_DIAG] Passed all checks, proceeding with VS draw recording";
3653
3654 TRACY_ZONE_SCOPED_NAMED("VS Path Draw Recording");
3655
3656 RenderProcess* currentRenderProcess = getCurrentRenderProcess();
3657 VkCommandBuffer commandBuffer = currentRenderProcess->getGraphicsCommandBuffer();
3658
3659 TRACY_VK_ZONE_C(getCurrentTracyVkContext(), commandBuffer, "Vertex Shader Draws", Colors::Cyan);
3660
3661 // Bind VS graphics pipeline
3662 vkCmdBindPipeline(commandBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, vsGraphicsPipeline_);
3663
3664 // Bind vertex buffer - use the shared vertex buffer from RenderingDataManager
3665 VkBuffer vertexBuffers[] = { renderingDataManager->getVertexBuffer().getBuffer() };
3666 VkDeviceSize offsets[] = { 0 };
3667 PLOGI << "[VS_DIAG] Binding vertex buffer: handle=" << (void*)vertexBuffers[0]
3668 << ", size=" << renderingDataManager->getVertexBuffer().getBufferSize();
3669 vkCmdBindVertexBuffers(commandBuffer, 0, 1, vertexBuffers, offsets);
3670
3671 // Bind index buffer - use the VS-specific index buffer (contiguous uint32_t indices)
3672 VkBuffer indexBuffer = renderingDataManager->getVSIndexBuffer().getBuffer();
3673 PLOGI << "[VS_DIAG] Binding index buffer: handle=" << (void*)indexBuffer
3674 << ", size=" << renderingDataManager->getVSIndexBuffer().getBufferSize();
3675 vkCmdBindIndexBuffer(commandBuffer, indexBuffer, 0, VK_INDEX_TYPE_UINT32);
3676
3677 // Bind descriptor sets - same as mesh shader path
3678 VkDescriptorSet descriptorSet = currentRenderProcess->getGraphicsDescriptorSet();
3679 vkCmdBindDescriptorSets(
3680 commandBuffer,
3681 VK_PIPELINE_BIND_POINT_GRAPHICS,
3683 0,
3684 1,
3685 &descriptorSet,
3686 0,
3687 nullptr
3688 );
3689
3690 // Note: VS path does not use push constants - the shader uses gl_InstanceIndex directly
3691 // to look up per-object data from the SSBO.
3692
3693 // Draw using indirect count - reads draw commands and count from GPU buffers
3694 // With VS instanced drawing pipeline:
3695 // - VSPrepareDraw generates one draw command per unique geometry type
3696 // - Each command has instanceCount = number of visible instances of that geometry
3697 // - maxDrawCount = unique geometry count (typically 1 for a scene of cubes)
3698 // Legacy path:
3699 // - PrimitiveCulling generates one draw command per visible primitive
3700 // - maxDrawCount = primitiveCount
3701
3703 // Use new instanced drawing buffers
3704 // maxDrawCount = single-meshlet geometry count (VS path only handles these)
3705 const uint32_t maxDrawCount = renderingDataManager->getSingleMeshletGeometryCount();
3706 PLOGI << "[VS_DIAG] VS Instanced path: maxDrawCount=" << maxDrawCount;
3707 if (maxDrawCount > 0) {
3708 VkBuffer drawCmdBuffer = currentRenderProcess->getVSDrawCommandsBuffer().getBuffer();
3709 VkBuffer drawCountBuffer = currentRenderProcess->getVSDrawCountBuffer().getBuffer();
3710 PLOGI << "[VS_BUFFER_DEBUG] Eye " << currentRenderProcess->getEyeIndex() << " Draw call using:"
3711 << " vsDrawCountBuffer=" << (void*)drawCountBuffer
3712 << " vsDrawCommandsBuffer=" << (void*)drawCmdBuffer;
3713 PLOGI << "[VS_DIAG] CALLING vkCmdDrawIndexedIndirectCount (instanced path)"
3714 << " drawCmdBuffer=" << (void*)drawCmdBuffer
3715 << " drawCountBuffer=" << (void*)drawCountBuffer
3716 << " stride=" << sizeof(VkDrawIndexedIndirectCommand);
3717 vkCmdDrawIndexedIndirectCount(
3718 commandBuffer,
3719 drawCmdBuffer,
3720 0, // offset
3721 drawCountBuffer,
3722 0, // countBufferOffset
3723 maxDrawCount,
3724 sizeof(VkDrawIndexedIndirectCommand)
3725 );
3726 }
3727 } else {
3728 // Legacy path: one draw command per visible primitive
3729 const uint32_t maxDrawCount = renderingDataManager->getPrimitiveCount();
3730 PLOGI << "[VS_DIAG] VS Legacy path: maxDrawCount=" << maxDrawCount;
3731 if (maxDrawCount > 0) {
3732 PLOGI << "[VS_DIAG] CALLING vkCmdDrawIndexedIndirectCount (legacy path)";
3733 vkCmdDrawIndexedIndirectCount(
3734 commandBuffer,
3735 currentRenderProcess->getVSIndirectDrawBuffer().getBuffer(),
3736 0, // offset
3737 currentRenderProcess->getVSIndirectDrawCountBuffer().getBuffer(),
3738 0, // countBufferOffset
3739 maxDrawCount,
3740 sizeof(VkDrawIndexedIndirectCommand)
3741 );
3742 }
3743 }
3744 }
3745
3746 void Renderer::recordVertexShaderDrawsDepthOnly(size_t swapChainImageIndex) const
3747 {
3748 // Skip if VS path is disabled or depth-only pipeline wasn't created
3749 if (!useVertexShaderPath_ || vsDepthOnlyPipeline_ == VK_NULL_HANDLE) {
3750 return;
3751 }
3752
3753 // Skip if no single-meshlet geometry exists
3754 if (!renderingDataManager->hasVertexShaderPath()) {
3755 return;
3756 }
3757
3758 TRACY_ZONE_SCOPED_NAMED("VS Path Depth-Only Draw Recording");
3759
3760 RenderProcess* currentRenderProcess = getCurrentRenderProcess();
3761 VkCommandBuffer commandBuffer = currentRenderProcess->getGraphicsCommandBuffer();
3762
3763 TRACY_VK_ZONE_C(getCurrentTracyVkContext(), commandBuffer, "Vertex Shader Draws (Depth-Only)", Colors::Cyan);
3764
3765 // Bind VS DEPTH-ONLY graphics pipeline
3766 vkCmdBindPipeline(commandBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, vsDepthOnlyPipeline_);
3767
3768 // Bind vertex buffer
3769 VkBuffer vertexBuffers[] = { renderingDataManager->getVertexBuffer().getBuffer() };
3770 VkDeviceSize offsets[] = { 0 };
3771 vkCmdBindVertexBuffers(commandBuffer, 0, 1, vertexBuffers, offsets);
3772
3773 // Bind index buffer
3774 VkBuffer indexBuffer = renderingDataManager->getVSIndexBuffer().getBuffer();
3775 vkCmdBindIndexBuffer(commandBuffer, indexBuffer, 0, VK_INDEX_TYPE_UINT32);
3776
3777 // Bind descriptor sets
3778 VkDescriptorSet descriptorSet = currentRenderProcess->getGraphicsDescriptorSet();
3779 vkCmdBindDescriptorSets(
3780 commandBuffer,
3781 VK_PIPELINE_BIND_POINT_GRAPHICS,
3783 0,
3784 1,
3785 &descriptorSet,
3786 0,
3787 nullptr
3788 );
3789
3790 // Draw using indirect count
3792 const uint32_t maxDrawCount = renderingDataManager->getSingleMeshletGeometryCount();
3793 if (maxDrawCount > 0) {
3794 VkBuffer drawCmdBuffer = currentRenderProcess->getVSDrawCommandsBuffer().getBuffer();
3795 VkBuffer drawCountBuffer = currentRenderProcess->getVSDrawCountBuffer().getBuffer();
3796 vkCmdDrawIndexedIndirectCount(
3797 commandBuffer,
3798 drawCmdBuffer,
3799 0,
3800 drawCountBuffer,
3801 0,
3802 maxDrawCount,
3803 sizeof(VkDrawIndexedIndirectCommand)
3804 );
3805 }
3806 } else {
3807 const uint32_t maxDrawCount = renderingDataManager->getPrimitiveCount();
3808 if (maxDrawCount > 0) {
3809 vkCmdDrawIndexedIndirectCount(
3810 commandBuffer,
3811 currentRenderProcess->getVSIndirectDrawBuffer().getBuffer(),
3812 0,
3813 currentRenderProcess->getVSIndirectDrawCountBuffer().getBuffer(),
3814 0,
3815 maxDrawCount,
3816 sizeof(VkDrawIndexedIndirectCommand)
3817 );
3818 }
3819 }
3820 }
3821
3822 void Renderer::recordPass2Culling( const size_t swapChainImageIndex )
3823 {
3824 TRACY_ZONE_SCOPED_NAMED( "Two-Pass Occlusion - Pass 2" );
3825
3826 RenderProcess* currentRenderProcess = getCurrentRenderProcess();
3827 VkCommandBuffer commandBuffer = currentRenderProcess->getGraphicsCommandBuffer();
3828
3829 // =====================================================================
3830 // PASS 2: Re-test failed objects against CURRENT frame's Hi-Z
3831 // =====================================================================
3832 // At this point:
3833 // - Pass 1 survivors have been rendered
3834 // - Hi-Z pyramid has been generated from Pass 1 depth
3835 // - culling_failed buffer contains object IDs that failed Pass 1 Hi-Z
3836 // - culling_failed_count contains the number of failed objects
3837 //
3838 // Pass 2 culling will:
3839 // - Read object IDs from culling_failed
3840 // - Test them against the NEW Hi-Z (current frame, just generated)
3841 // - Append survivors to culling_survivors (via atomicAdd to cull_count)
3842 // =====================================================================
3843
3844 TRACY_VK_ZONE_C( getCurrentTracyVkContext(), commandBuffer, "Pass 2 Occlusion Culling", Colors::Orange );
3845
3846 // NOTE: Pass 2 currently uses the SAME Hi-Z as Pass 1 (previous frame's).
3847 // This is suboptimal but avoids descriptor update issues.
3848 // TODO: Properly bind both Hi-Z pyramids at frame start and use push constants to select,
3849 // or use VK_DESCRIPTOR_BINDING_UPDATE_AFTER_BIND_BIT to allow mid-frame updates.
3850 // For now, Pass 2 re-tests failed objects but may still incorrectly cull some disoccluded objects.
3851
3852 // Barrier: ensure Hi-Z writes are complete before reading
3853 {
3854 VkMemoryBarrier2 hiZReadBarrier{
3855 .sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER_2,
3856 .srcStageMask = VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT,
3857 .srcAccessMask = VK_ACCESS_2_SHADER_WRITE_BIT,
3858 .dstStageMask = VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT,
3859 .dstAccessMask = VK_ACCESS_2_SHADER_READ_BIT
3860 };
3861 VkDependencyInfo depInfo{ VK_STRUCTURE_TYPE_DEPENDENCY_INFO };
3862 depInfo.memoryBarrierCount = 1;
3863 depInfo.pMemoryBarriers = &hiZReadBarrier;
3864 vkCmdPipelineBarrier2( commandBuffer, &depInfo );
3865 }
3866
3867 // Reset pipeline counters and bin offsets for Pass 2
3868 // (cull_count is NOT reset - Pass 2 survivors will append to existing count)
3869 {
3870 TRACY_VK_ZONE_C( getCurrentTracyVkContext(), commandBuffer, "Reset Pass 2 Buffers", Colors::Red );
3871
3872 // Barrier: Ensure Pass 1's compute writes complete before transfer writes (vkCmdFillBuffer)
3873 // Without this, there's a WRITE_AFTER_WRITE hazard: BinningAllocator (compute) → FillBuffer (transfer)
3874 VkMemoryBarrier2 computeToTransferBarrier{
3875 .sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER_2,
3876 .srcStageMask = VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT,
3877 .srcAccessMask = VK_ACCESS_2_SHADER_WRITE_BIT,
3878 .dstStageMask = VK_PIPELINE_STAGE_2_TRANSFER_BIT,
3879 .dstAccessMask = VK_ACCESS_2_TRANSFER_WRITE_BIT
3880 };
3881 VkDependencyInfo preFillDep{ VK_STRUCTURE_TYPE_DEPENDENCY_INFO };
3882 preFillDep.memoryBarrierCount = 1;
3883 preFillDep.pMemoryBarriers = &computeToTransferBarrier;
3884 vkCmdPipelineBarrier2( commandBuffer, &preFillDep );
3885
3886 vkCmdFillBuffer( commandBuffer, currentRenderProcess->getPipelineCountersBuffer().getBuffer(), 0, VK_WHOLE_SIZE, 0 );
3887 vkCmdFillBuffer( commandBuffer, currentRenderProcess->getPipelineBinOffsetsBuffer().getBuffer(), 0, VK_WHOLE_SIZE, 0 );
3888
3889 VkMemoryBarrier2 fillBarrier{
3890 .sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER_2,
3891 .srcStageMask = VK_PIPELINE_STAGE_2_TRANSFER_BIT,
3892 .srcAccessMask = VK_ACCESS_2_TRANSFER_WRITE_BIT,
3893 .dstStageMask = VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT,
3894 .dstAccessMask = VK_ACCESS_2_SHADER_READ_BIT | VK_ACCESS_2_SHADER_WRITE_BIT
3895 };
3896 VkDependencyInfo fillDep{ VK_STRUCTURE_TYPE_DEPENDENCY_INFO };
3897 fillDep.memoryBarrierCount = 1;
3898 fillDep.pMemoryBarriers = &fillBarrier;
3899 vkCmdPipelineBarrier2( commandBuffer, &fillDep );
3900 }
3901
3902 // Pass 2 Primitive Culling
3903 const uint32_t primitiveCount = renderingDataManager->getPrimitiveCount();
3904 {
3905 TRACY_VK_ZONE_C( getCurrentTracyVkContext(), commandBuffer, "Primitive Culling Pass 2", Colors::Red );
3906 const uint32_t threadCount = getObjectCullingComputePass().getThreadCount();
3907
3908 // Push constants: {primitiveCount, cullingPass}
3909 // Note: primitiveCount is max dispatch size, shader uses culling_failed_count for actual work
3910 struct CullingPushConstants {
3911 uint32_t primitiveCount;
3912 uint32_t cullingPass; // 1 = Pass 2
3913 } pushData = { primitiveCount, 1 };
3914
3916 getObjectCullingComputePass().bindDescriptorSets( commandBuffer, currentRenderProcess->getEyeIndex() );
3917 VkPushConstantsInfo pushConstantsInfo = getObjectCullingComputePass().createPushConstantsInfo( sizeof( CullingPushConstants ), &pushData );
3918 vkCmdPushConstants2( commandBuffer, &pushConstantsInfo );
3919
3920 // Dispatch: we dispatch primitiveCount threads, but Pass 2 shader early-exits
3921 // for threads beyond culling_failed_count (GPU-driven)
3922 if ( primitiveCount > 0 )
3923 {
3924 vkCmdDispatch( commandBuffer, ( primitiveCount + threadCount - 1 ) / threadCount, 1, 1 );
3925 }
3926 }
3927
3928 // Barrier: Pass 2 culling writes → downstream pipeline reads
3929 {
3930 VkMemoryBarrier2 cullBarrier{
3931 .sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER_2,
3932 .srcStageMask = VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT,
3933 .srcAccessMask = VK_ACCESS_2_SHADER_WRITE_BIT,
3934 .dstStageMask = VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT,
3935 .dstAccessMask = VK_ACCESS_2_SHADER_READ_BIT
3936 };
3937 VkDependencyInfo depInfo{ VK_STRUCTURE_TYPE_DEPENDENCY_INFO };
3938 depInfo.memoryBarrierCount = 1;
3939 depInfo.pMemoryBarriers = &cullBarrier;
3940 vkCmdPipelineBarrier2( commandBuffer, &depInfo );
3941 }
3942
3943 // Re-run downstream pipeline for ALL survivors (Pass 1 + Pass 2)
3944 // Note: The survivors buffer now contains both Pass 1 and Pass 2 survivors,
3945 // and cull_count reflects the total. The pipeline will process all of them.
3946
3947 // Stage 1: Binning Allocator (Pass 2)
3948 {
3949 TRACY_VK_ZONE_C( getCurrentTracyVkContext(), commandBuffer, "Binning Allocator Pass 2", Colors::Red );
3950 const uint32_t threadCount = getBinningAllocatorComputePass().getThreadCount();
3952 getBinningAllocatorComputePass().bindDescriptorSets( commandBuffer, currentRenderProcess->getEyeIndex() );
3953 if ( primitiveCount > 0 )
3954 {
3955 vkCmdDispatch( commandBuffer, ( primitiveCount + threadCount - 1 ) / threadCount, 1, 1 );
3956 }
3957 }
3958
3959 // Barrier: Binning → Unpacking
3960 {
3961 VkMemoryBarrier2 binningBarrier{
3962 .sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER_2,
3963 .srcStageMask = VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT,
3964 .srcAccessMask = VK_ACCESS_2_SHADER_WRITE_BIT,
3965 .dstStageMask = VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT,
3966 .dstAccessMask = VK_ACCESS_2_SHADER_READ_BIT
3967 };
3968 VkDependencyInfo depInfo{ VK_STRUCTURE_TYPE_DEPENDENCY_INFO };
3969 depInfo.memoryBarrierCount = 1;
3970 depInfo.pMemoryBarriers = &binningBarrier;
3971 vkCmdPipelineBarrier2( commandBuffer, &depInfo );
3972 }
3973
3974 // Meshlet Unpacking Dispatcher (Pass 2)
3975 {
3976 TRACY_VK_ZONE_C( getCurrentTracyVkContext(), commandBuffer, "Meshlet Unpacking Dispatcher Pass 2", Colors::Red );
3978 getMeshletUnpackingDispatchComputePass().bindDescriptorSets( commandBuffer, currentRenderProcess->getEyeIndex() );
3979 vkCmdDispatch( commandBuffer, 1, 1, 1 );
3980 }
3981
3982 // Barrier: Dispatcher → Unpacking
3983 {
3984 Vulkan::BarrierBundle bundle;
3985 VkDependencyInfo dependencyInfo = getMeshletUnpackingDispatchToMeshletUnpackingBarriers( bundle );
3986 vkCmdPipelineBarrier2( commandBuffer, &dependencyInfo );
3987 }
3988
3989 // Stage 2: Meshlet Unpacking (Pass 2)
3990 {
3991 TRACY_VK_ZONE_C( getCurrentTracyVkContext(), commandBuffer, "Meshlet Unpacking Pass 2", Colors::Red );
3993 getMeshletUnpackingComputePass().bindDescriptorSets( commandBuffer, currentRenderProcess->getEyeIndex() );
3994 vkCmdDispatchIndirect( commandBuffer, getDispatchBuffer().getBuffer(), 0 );
3995 }
3996
3997 // Barrier: Unpacking → Prepare Draw
3998 {
3999 VkMemoryBarrier2 memBarrier{
4000 .sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER_2,
4001 .srcStageMask = VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT,
4002 .srcAccessMask = VK_ACCESS_2_SHADER_WRITE_BIT,
4003 .dstStageMask = VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT,
4004 .dstAccessMask = VK_ACCESS_2_SHADER_READ_BIT
4005 };
4006 VkDependencyInfo depInfo{ VK_STRUCTURE_TYPE_DEPENDENCY_INFO };
4007 depInfo.memoryBarrierCount = 1;
4008 depInfo.pMemoryBarriers = &memBarrier;
4009 vkCmdPipelineBarrier2( commandBuffer, &depInfo );
4010 }
4011
4012 // Prepare Draw (Pass 2)
4013 {
4014 TRACY_VK_ZONE_C( getCurrentTracyVkContext(), commandBuffer, "Prepare Draw Pass 2", Colors::Red );
4016 getDrawPreparationComputePass().bindDescriptorSets( commandBuffer, currentRenderProcess->getEyeIndex() );
4017 vkCmdDispatch( commandBuffer, 1, 1, 1 );
4018 }
4019
4020 // Barrier: Compute → Graphics
4021 {
4022 VkBufferMemoryBarrier2 visibleBarrier{ VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER_2 };
4023 visibleBarrier.srcStageMask = VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT;
4024 visibleBarrier.srcAccessMask = VK_ACCESS_2_SHADER_STORAGE_WRITE_BIT;
4025 visibleBarrier.dstStageMask = VK_PIPELINE_STAGE_2_MESH_SHADER_BIT_EXT;
4026 visibleBarrier.dstAccessMask = VK_ACCESS_2_SHADER_STORAGE_READ_BIT;
4027 visibleBarrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
4028 visibleBarrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
4029 visibleBarrier.buffer = currentRenderProcess->getBinnedVisibleMeshletIndexBuffer().getBuffer();
4030 visibleBarrier.offset = 0;
4031 visibleBarrier.size = VK_WHOLE_SIZE;
4032
4033 VkBufferMemoryBarrier2 indirectBarrier{ VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER_2 };
4034 indirectBarrier.srcStageMask = VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT;
4035 indirectBarrier.srcAccessMask = VK_ACCESS_2_SHADER_STORAGE_WRITE_BIT;
4036 indirectBarrier.dstStageMask = VK_PIPELINE_STAGE_2_DRAW_INDIRECT_BIT;
4037 indirectBarrier.dstAccessMask = VK_ACCESS_2_INDIRECT_COMMAND_READ_BIT;
4038 indirectBarrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
4039 indirectBarrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
4040 indirectBarrier.buffer = currentRenderProcess->getIndirectDrawBuffer().getBuffer();
4041 indirectBarrier.offset = 0;
4042 indirectBarrier.size = VK_WHOLE_SIZE;
4043
4044 VkBufferMemoryBarrier2 barriers[] = { visibleBarrier, indirectBarrier };
4045 VkDependencyInfo depInfo{ VK_STRUCTURE_TYPE_DEPENDENCY_INFO };
4046 depInfo.bufferMemoryBarrierCount = 2;
4047 depInfo.pBufferMemoryBarriers = barriers;
4048 vkCmdPipelineBarrier2( commandBuffer, &depInfo );
4049 }
4050
4051 // =====================================================================
4052 // PASS 2 RENDERING
4053 // =====================================================================
4054 // Use dynamic rendering with VK_ATTACHMENT_LOAD_OP_LOAD to preserve
4055 // Pass 1 results and add Pass 2 survivors on top.
4056 // =====================================================================
4057 recordPass2RenderPass( swapChainImageIndex );
4058 }
4059
4060 void Renderer::recordPass2RenderPass( const size_t swapChainImageIndex )
4061 {
4062 TRACY_ZONE_SCOPED_NAMED( "Pass 2 Render" );
4063
4064 RenderProcess* currentRenderProcess = getCurrentRenderProcess();
4065 VkCommandBuffer commandBuffer = currentRenderProcess->getGraphicsCommandBuffer();
4066 TRACY_VK_ZONE_C( getCurrentTracyVkContext(), commandBuffer, "Pass 2 Render", Colors::Green );
4067
4068 // Get render target info
4069 const RenderTarget* renderTarget = headset->getRenderTarget( swapChainImageIndex );
4070 const VkExtent2D extent = headset->getEyeResolution( 0u );
4071
4072 // Transition images for Pass 2 rendering:
4073 // - MSAA color: UNDEFINED → COLOR_ATTACHMENT_OPTIMAL (Pass 1 was depth-only, no color write)
4074 // - Depth: SHADER_READ_ONLY (after Hi-Z) → DEPTH_STENCIL_ATTACHMENT
4075 // - Swapchain: UNDEFINED → COLOR_ATTACHMENT_OPTIMAL (first use for color this frame)
4076
4077 VkImageMemoryBarrier2 colorBarrier{
4078 .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER_2,
4079 .srcStageMask = VK_PIPELINE_STAGE_2_TOP_OF_PIPE_BIT, // No prior color work this frame
4080 .srcAccessMask = VK_ACCESS_2_NONE,
4081 .dstStageMask = VK_PIPELINE_STAGE_2_COLOR_ATTACHMENT_OUTPUT_BIT,
4082 .dstAccessMask = VK_ACCESS_2_COLOR_ATTACHMENT_READ_BIT | VK_ACCESS_2_COLOR_ATTACHMENT_WRITE_BIT,
4083 .oldLayout = VK_IMAGE_LAYOUT_UNDEFINED, // Pass 1 was depth-only
4084 .newLayout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL,
4085 .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
4086 .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
4087 .image = headset->getColorBufferImage(),
4088 .subresourceRange = {
4089 .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
4090 .baseMipLevel = 0,
4091 .levelCount = 1,
4092 .baseArrayLayer = 0,
4093 .layerCount = 2 // Stereo
4094 }
4095 };
4096
4097 VkImageMemoryBarrier2 depthBarrier{
4098 .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER_2,
4099 .srcStageMask = VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT,
4100 .srcAccessMask = VK_ACCESS_2_SHADER_READ_BIT,
4101 .dstStageMask = VK_PIPELINE_STAGE_2_EARLY_FRAGMENT_TESTS_BIT | VK_PIPELINE_STAGE_2_LATE_FRAGMENT_TESTS_BIT,
4102 .dstAccessMask = VK_ACCESS_2_DEPTH_STENCIL_ATTACHMENT_READ_BIT | VK_ACCESS_2_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT,
4103 .oldLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL,
4104 .newLayout = VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL,
4105 .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
4106 .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
4107 .image = headset->getDepthBufferImage(),
4108 .subresourceRange = {
4109 .aspectMask = VK_IMAGE_ASPECT_DEPTH_BIT,
4110 .baseMipLevel = 0,
4111 .levelCount = 1,
4112 .baseArrayLayer = 0,
4113 .layerCount = 2 // Stereo
4114 }
4115 };
4116
4117 // Swapchain: Already transitioned in recordXrSwapchainImageWritableBarrier,
4118 // just need sync barrier (no layout change needed)
4119 VkImageMemoryBarrier2 swapchainBarrier{
4120 .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER_2,
4121 .srcStageMask = VK_PIPELINE_STAGE_2_TOP_OF_PIPE_BIT, // No prior color work (Pass 1 was depth-only)
4122 .srcAccessMask = VK_ACCESS_2_NONE,
4123 .dstStageMask = VK_PIPELINE_STAGE_2_COLOR_ATTACHMENT_OUTPUT_BIT,
4124 .dstAccessMask = VK_ACCESS_2_COLOR_ATTACHMENT_WRITE_BIT,
4125 .oldLayout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL, // Set in recordXrSwapchainImageWritableBarrier
4126 .newLayout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL,
4127 .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
4128 .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
4129 .image = renderTarget->image,
4130 .subresourceRange = {
4131 .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
4132 .baseMipLevel = 0,
4133 .levelCount = 1,
4134 .baseArrayLayer = 0,
4135 .layerCount = 2 // Stereo
4136 }
4137 };
4138
4139 std::array<VkImageMemoryBarrier2, 3> barriers = { colorBarrier, depthBarrier, swapchainBarrier };
4140 VkDependencyInfo dependencyInfo{
4141 .sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO,
4142 .imageMemoryBarrierCount = static_cast<uint32_t>( barriers.size() ),
4143 .pImageMemoryBarriers = barriers.data()
4144 };
4145 vkCmdPipelineBarrier2( commandBuffer, &dependencyInfo );
4146
4147 // Use MSAA color buffer (same sample count as depth)
4148 // NOTE: Using CLEAR instead of LOAD because Pass 2 re-runs the full pipeline
4149 // on ALL survivors (Pass 1 + Pass 2 combined). We re-render everything and
4150 // the resolve will overwrite the swapchain with the complete final image.
4151 VkClearValue clearColor = { .color = { .float32 = { 0.0f, 0.0f, 0.0f, 1.0f } } };
4152 VkRenderingAttachmentInfo colorAttachment{
4153 .sType = VK_STRUCTURE_TYPE_RENDERING_ATTACHMENT_INFO,
4154 .imageView = headset->getColorBufferView(), // MSAA color buffer
4155 .imageLayout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL,
4156 .resolveMode = VK_RESOLVE_MODE_AVERAGE_BIT,
4157 .resolveImageView = renderTarget->imageView, // Resolve to swapchain
4158 .resolveImageLayout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL,
4159 .loadOp = VK_ATTACHMENT_LOAD_OP_CLEAR, // Clear - we re-render everything
4160 .storeOp = VK_ATTACHMENT_STORE_OP_STORE,
4161 .clearValue = clearColor,
4162 };
4163
4164 VkClearValue clearDepth = { .depthStencil = { .depth = 0.0f, .stencil = 0 } }; // Reverse-Z: 0 = far
4165 VkRenderingAttachmentInfo depthAttachment{
4166 .sType = VK_STRUCTURE_TYPE_RENDERING_ATTACHMENT_INFO,
4167 .imageView = headset->getDepthBufferView(), // MSAA depth buffer
4168 .imageLayout = VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL,
4169 .loadOp = VK_ATTACHMENT_LOAD_OP_CLEAR, // Clear - we re-render everything
4170 .storeOp = VK_ATTACHMENT_STORE_OP_STORE,
4171 .clearValue = clearDepth,
4172 };
4173
4174 VkRenderingInfo renderingInfo{
4175 .sType = VK_STRUCTURE_TYPE_RENDERING_INFO,
4176 .renderArea = { .offset = {0, 0}, .extent = extent },
4177 .layerCount = 1,
4178 .viewMask = 0b11, // Stereo: render to both layers
4179 .colorAttachmentCount = 1,
4180 .pColorAttachments = &colorAttachment,
4181 .pDepthAttachment = &depthAttachment,
4182 };
4183
4184 vkCmdBeginRendering( commandBuffer, &renderingInfo );
4185
4186 // Set viewport and scissor
4187 VkViewport viewport{
4188 .x = 0.0f, .y = 0.0f,
4189 .width = static_cast<float>( extent.width ),
4190 .height = static_cast<float>( extent.height ),
4191 .minDepth = 0.0f, .maxDepth = 1.0f
4192 };
4193 vkCmdSetViewport( commandBuffer, 0, 1, &viewport );
4194
4195 VkRect2D scissor{ .offset = {0, 0}, .extent = extent };
4196 vkCmdSetScissor( commandBuffer, 0, 1, &scissor );
4197
4198 // Bind descriptor set
4199 VkDescriptorSet descriptorSet = currentRenderProcess->getGraphicsDescriptorSet();
4200 vkCmdBindDescriptorSets(
4201 commandBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS,
4202 graphicsPipelineLayout, 0, 1, &descriptorSet, 0, nullptr
4203 );
4204
4205 // Draw single-meshlet geometry using vertex shader path (same as Pass 1)
4206 // This must be called since we CLEAR the framebuffer and re-render everything
4207 recordVertexShaderDraws(swapChainImageIndex);
4208
4209 // Draw Pass 2 survivors (same draw calls as Pass 1, but with updated indirect buffer)
4210 for ( uint32_t pipelineIndex = 0; pipelineIndex < graphicsPipelines.size(); pipelineIndex++ )
4211 {
4212 if ( pipelineIndex != 0 )
4213 {
4214 VkConditionalRenderingBeginInfoEXT conditionalRenderingBeginInfo = {
4215 .sType = VK_STRUCTURE_TYPE_CONDITIONAL_RENDERING_BEGIN_INFO_EXT,
4216 .buffer = currentRenderProcess->getIndirectDrawBuffer().getBuffer(),
4217 .offset = pipelineIndex * sizeof( VkDrawMeshTasksIndirectCommandEXT ) +
4218 offsetof( VkDrawMeshTasksIndirectCommandEXT, groupCountX ),
4219 .flags = 0
4220 };
4222 commandBuffer, &conditionalRenderingBeginInfo
4223 );
4224 }
4225
4226 graphicsPipelines[pipelineIndex]->bind( commandBuffer );
4227
4228 TaskConstants taskConstants( pipelineIndex * MAX_MESHLETS_PER_BIN );
4229 vkCmdPushConstants(
4230 commandBuffer, graphicsPipelineLayout,
4231 VK_SHADER_STAGE_MESH_BIT_EXT | VK_SHADER_STAGE_FRAGMENT_BIT,
4232 0, sizeof( TaskConstants ), &taskConstants
4233 );
4234
4236 commandBuffer,
4237 currentRenderProcess->getIndirectDrawBuffer().getBuffer(),
4238 pipelineIndex * sizeof( VkDrawMeshTasksIndirectCommandEXT ),
4239 1,
4240 sizeof( VkDrawMeshTasksIndirectCommandEXT )
4241 );
4242
4243 if ( pipelineIndex != 0 )
4244 {
4246 }
4247 }
4248
4249 vkCmdEndRendering( commandBuffer );
4250 }
4251
4253 {
4254 // NOTE: processPendingDeletions() moved to submitTransfer() after fence wait
4255 // to ensure GPU work is complete before deleting buffers
4256
4257 syncCopyObjects.clear();
4258 // Update the uniform buffer data
4259 {
4260 TRACY_ZONE_SCOPED_NAMED( "Update uniform buffers" );
4261
4262 RenderProcess * currentRenderProcess = getCurrentRenderProcess();
4263 Renderer * thisRenderer = this;
4264
4265 std::future<void> futureUpdatePerObjectSSBO =
4266 updateThreadPool.submit_task(
4267 [&currentRenderProcess, &thisRenderer]() -> void
4268 {
4269 currentRenderProcess->updateSSBO( thisRenderer );
4270 }
4271 );
4272
4273 std::future<void> futureMeshData =
4274 updateThreadPool.submit_task(
4275 [&currentRenderProcess]() -> void
4276 {
4277 currentRenderProcess->updateMeshRenderingData();
4278 }
4279 );
4280
4281 std::future<void> futureUpdateViewMatrix =
4282 updateThreadPool.submit_task(
4283 [this]() -> void
4284 {
4286 }
4287 );
4288
4289 std::future<void> futureUpdateFragmentTime =
4290 updateThreadPool.submit_task(
4291 [&currentRenderProcess, time]() -> void
4292 {
4293 currentRenderProcess->updateFragmentTime( time );
4294 }
4295 );
4296
4297 std::future<void> futureUpdateObjectMeshletData =
4298 updateThreadPool.submit_task(
4299 [&currentRenderProcess]() -> void
4300 {
4301 currentRenderProcess->updateMeshletDataBuffer();
4302 }
4303 );
4304
4305 {
4306 TRACY_ZONE_SCOPED_NAMED( "Waiting for buffer creation to finish" );
4307 futureUpdatePerObjectSSBO.get();
4308 futureUpdateObjectMeshletData.get();
4309 futureMeshData.get();
4310 futureUpdateViewMatrix.get();
4311 futureUpdateFragmentTime.get();
4312 }
4313
4314 // uploads data to staging buffers to be transfered later on
4315 syncCopyObjects.push_back( getCurrentRenderProcess()->uploadFrustumUBO() );
4316 syncCopyObjects.push_back( getCurrentRenderProcess()->uploadHiZViewProjectionUBO() ); // Per-frame Hi-Z data
4317 syncCopyObjects.push_back( getCurrentRenderProcess()->uploadUniformBufferData() );
4318
4319 // Add RenderingDataManager staged buffer uploads (structural updates)
4320 auto& rdmSyncObjects = renderingDataManager->getPendingSyncObjects();
4321 syncCopyObjects.insert( syncCopyObjects.end(), rdmSyncObjects.begin(), rdmSyncObjects.end() );
4322 renderingDataManager->clearPendingSyncObjects();
4323
4324 // Add RenderingDataManager transform updates (per-frame dynamic data)
4325 auto& transformSyncObjects = renderingDataManager->getPendingTransformSyncObjects();
4326 syncCopyObjects.insert( syncCopyObjects.end(), transformSyncObjects.begin(), transformSyncObjects.end() );
4327 renderingDataManager->clearPendingTransformSyncObjects();
4328 }
4329 }
4330
4332 {
4333 TRACY_ZONE_SCOPED_NAMED( "Command buffer setup" );
4334 PLOG_RETURN_FN_VK( vkResetCommandBuffer( getCurrentRenderProcess()->getGraphicsCommandBuffer(), 0u ) )
4335
4336 VkCommandBufferBeginInfo commandBufferBeginInfo{ VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO };
4337 PLOG_RETURN_FN_VK( vkBeginCommandBuffer(
4338 getCurrentRenderProcess()->getGraphicsCommandBuffer(), &commandBufferBeginInfo
4339 ) )
4340 }
4341
4342 void Renderer::recordXrSwapchainImageWritableBarrier( const uint32_t swapChainImageIndex ) const
4343 {
4344 TRACY_ZONE_SCOPED_NAMED( "Swapchain image writable barrier" );
4346 tracyVkContext[currentFrame],
4347 getCurrentRenderProcess()->getGraphicsCommandBuffer(),
4348 "Swapchain Image Barrier",
4350 )
4351
4352 VkImageMemoryBarrier2 imageBarrierToAttachment = {};
4353 imageBarrierToAttachment.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER_2;
4354
4355 // Transition from unknown layout to color attachment for rendering.
4356 // Wait for any previous render pass to complete before starting new one.
4357 // Use UNDEFINED as oldLayout since we clear the image (contents don't matter).
4358 imageBarrierToAttachment.srcStageMask = VK_PIPELINE_STAGE_2_COLOR_ATTACHMENT_OUTPUT_BIT;
4359 imageBarrierToAttachment.srcAccessMask = VK_ACCESS_2_COLOR_ATTACHMENT_WRITE_BIT;
4360 imageBarrierToAttachment.dstStageMask = VK_PIPELINE_STAGE_2_COLOR_ATTACHMENT_OUTPUT_BIT;
4361 imageBarrierToAttachment.dstAccessMask = VK_ACCESS_2_COLOR_ATTACHMENT_WRITE_BIT;
4362 imageBarrierToAttachment.oldLayout = VK_IMAGE_LAYOUT_UNDEFINED;
4363 imageBarrierToAttachment.newLayout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL;
4364 imageBarrierToAttachment.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
4365 imageBarrierToAttachment.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
4366 imageBarrierToAttachment.image = headset->getSwapchainImage( swapChainImageIndex );
4367 imageBarrierToAttachment.subresourceRange = { VK_IMAGE_ASPECT_COLOR_BIT, 0, 1, 0, 2 };
4368
4369 VkDependencyInfo dependencyInfo{ VK_STRUCTURE_TYPE_DEPENDENCY_INFO };
4370 dependencyInfo.imageMemoryBarrierCount = 1;
4371 dependencyInfo.pImageMemoryBarriers = &imageBarrierToAttachment;
4372 vkCmdPipelineBarrier2( getCurrentRenderProcess()->getGraphicsCommandBuffer(), &dependencyInfo );
4373 }
4374
4375 void Renderer::recordRenderPass( const size_t swapChainImageIndex )
4376 {
4378 TRACY_ZONE_SCOPED_NAMED( "Pass 1 Render" );
4379
4380 RenderProcess* currentRenderProcess = getCurrentRenderProcess();
4381 VkCommandBuffer commandBuffer = currentRenderProcess->getGraphicsCommandBuffer();
4382 TRACY_VK_ZONE_C( getCurrentTracyVkContext(), commandBuffer, "Pass 1 Render", Colors::Green );
4383
4384 // Get render target info
4385 const RenderTarget* renderTarget = headset->getRenderTarget( swapChainImageIndex );
4386 const VkExtent2D extent = headset->getEyeResolution( 0u );
4387
4388 // Transition images for Pass 1 (DEPTH-ONLY Z-PREPASS):
4389 // - Depth: UNDEFINED → DEPTH_STENCIL_ATTACHMENT (cleared)
4390 // - NO color transition needed - Pass 1 is depth-only for "Double-Speed Z"
4391 // - Swapchain: Already transitioned in recordXrSwapchainImageWritableBarrier
4392
4393 VkImageMemoryBarrier2 depthBarrier{
4394 .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER_2,
4395 .srcStageMask = VK_PIPELINE_STAGE_2_TOP_OF_PIPE_BIT,
4396 .srcAccessMask = VK_ACCESS_2_NONE,
4397 .dstStageMask = VK_PIPELINE_STAGE_2_EARLY_FRAGMENT_TESTS_BIT | VK_PIPELINE_STAGE_2_LATE_FRAGMENT_TESTS_BIT,
4398 .dstAccessMask = VK_ACCESS_2_DEPTH_STENCIL_ATTACHMENT_READ_BIT | VK_ACCESS_2_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT,
4399 .oldLayout = VK_IMAGE_LAYOUT_UNDEFINED,
4400 .newLayout = VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL,
4401 .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
4402 .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
4403 .image = headset->getDepthBufferImage(),
4404 .subresourceRange = {
4405 .aspectMask = VK_IMAGE_ASPECT_DEPTH_BIT,
4406 .baseMipLevel = 0,
4407 .levelCount = 1,
4408 .baseArrayLayer = 0,
4409 .layerCount = 2 // Stereo
4410 }
4411 };
4412
4413 VkDependencyInfo dependencyInfo{
4414 .sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO,
4415 .imageMemoryBarrierCount = 1,
4416 .pImageMemoryBarriers = &depthBarrier
4417 };
4418 vkCmdPipelineBarrier2( commandBuffer, &dependencyInfo );
4419
4420 // Set up dynamic rendering attachments - DEPTH ONLY (no color attachment)
4421 // This enables "Double-Speed Z" optimization on NVIDIA/AMD GPUs
4422 VkClearValue clearDepth = { .depthStencil = { .depth = 0.0f, .stencil = 0 } }; // Reverse-Z: 0 = far
4423 VkRenderingAttachmentInfo depthAttachment{
4424 .sType = VK_STRUCTURE_TYPE_RENDERING_ATTACHMENT_INFO,
4425 .imageView = headset->getDepthBufferView(), // MSAA depth buffer
4426 .imageLayout = VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL,
4427 .loadOp = VK_ATTACHMENT_LOAD_OP_CLEAR,
4428 .storeOp = VK_ATTACHMENT_STORE_OP_STORE,
4429 .clearValue = clearDepth,
4430 };
4431
4432 VkRenderingInfo renderingInfo{
4433 .sType = VK_STRUCTURE_TYPE_RENDERING_INFO,
4434 .renderArea = { .offset = {0, 0}, .extent = extent },
4435 .layerCount = 1,
4436 .viewMask = 0b11, // Stereo: render to both layers
4437 .colorAttachmentCount = 0, // NO COLOR - depth-only prepass
4438 .pColorAttachments = nullptr, // NO COLOR - depth-only prepass
4439 .pDepthAttachment = &depthAttachment,
4440 };
4441
4442 vkCmdBeginRendering( commandBuffer, &renderingInfo );
4443
4444 // Set viewport and scissor
4445 VkViewport viewport{
4446 .x = 0.0f, .y = 0.0f,
4447 .width = static_cast<float>( extent.width ),
4448 .height = static_cast<float>( extent.height ),
4449 .minDepth = 0.0f, .maxDepth = 1.0f
4450 };
4451 vkCmdSetViewport( commandBuffer, 0, 1, &viewport );
4452
4453 VkRect2D scissor{ .offset = {0, 0}, .extent = extent };
4454 vkCmdSetScissor( commandBuffer, 0, 1, &scissor );
4455
4456 // Bind descriptor set
4457 VkDescriptorSet descriptorSet = currentRenderProcess->getGraphicsDescriptorSet();
4458 vkCmdBindDescriptorSets(
4459 commandBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS,
4460 graphicsPipelineLayout, 0, 1, &descriptorSet, 0, nullptr
4461 );
4462
4463 // Draw single-meshlet geometry using vertex shader path (DEPTH-ONLY pipeline)
4464 recordVertexShaderDrawsDepthOnly(swapChainImageIndex);
4465
4466 // Loop through all DEPTH-ONLY pipelines (mesh shader path for multi-meshlet geometry)
4467 for ( uint32_t pipelineIndex = 0; pipelineIndex < depthOnlyGraphicsPipelines.size(); pipelineIndex++ )
4468 {
4469 if ( pipelineIndex != 0 )
4470 {
4471 VkConditionalRenderingBeginInfoEXT conditionalRenderingBeginInfo = {
4472 .sType = VK_STRUCTURE_TYPE_CONDITIONAL_RENDERING_BEGIN_INFO_EXT,
4473 .buffer = currentRenderProcess->getIndirectDrawBuffer().getBuffer(),
4474 .offset = pipelineIndex * sizeof( VkDrawMeshTasksIndirectCommandEXT ) +
4475 offsetof( VkDrawMeshTasksIndirectCommandEXT, groupCountX ),
4476 .flags = 0
4477 };
4479 commandBuffer, &conditionalRenderingBeginInfo
4480 );
4481 }
4482
4483 depthOnlyGraphicsPipelines[pipelineIndex]->bind( commandBuffer );
4484
4485 TaskConstants taskConstants( pipelineIndex * MAX_MESHLETS_PER_BIN );
4486 vkCmdPushConstants(
4487 commandBuffer, graphicsPipelineLayout,
4488 VK_SHADER_STAGE_MESH_BIT_EXT | VK_SHADER_STAGE_FRAGMENT_BIT,
4489 0, sizeof( TaskConstants ), &taskConstants
4490 );
4491
4493 commandBuffer,
4494 currentRenderProcess->getIndirectDrawBuffer().getBuffer(),
4495 pipelineIndex * sizeof( VkDrawMeshTasksIndirectCommandEXT ),
4496 1,
4497 sizeof( VkDrawMeshTasksIndirectCommandEXT )
4498 );
4499
4500 if ( pipelineIndex != 0 )
4501 {
4503 }
4504 }
4505
4506 vkCmdEndRendering( commandBuffer );
4507
4508 // Transition depth buffer to SHADER_READ_ONLY for Hi-Z generation
4509 // NOTE: With dynamic rendering (vkCmdBeginRendering), there's no automatic layout
4510 // transition at vkCmdEndRendering - we must do it manually.
4511 {
4512 VkImageMemoryBarrier2 depthToReadBarrier{
4513 .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER_2,
4514 .srcStageMask = VK_PIPELINE_STAGE_2_LATE_FRAGMENT_TESTS_BIT,
4515 .srcAccessMask = VK_ACCESS_2_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT,
4516 .dstStageMask = VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT,
4517 .dstAccessMask = VK_ACCESS_2_SHADER_SAMPLED_READ_BIT,
4518 .oldLayout = VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL,
4519 .newLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL,
4520 .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
4521 .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
4522 .image = headset->getDepthBufferImage(),
4523 .subresourceRange = {
4524 .aspectMask = VK_IMAGE_ASPECT_DEPTH_BIT,
4525 .baseMipLevel = 0,
4526 .levelCount = 1,
4527 .baseArrayLayer = 0,
4528 .layerCount = 2 // Stereo
4529 }
4530 };
4531
4532 VkDependencyInfo depInfo{ VK_STRUCTURE_TYPE_DEPENDENCY_INFO };
4533 depInfo.imageMemoryBarrierCount = 1;
4534 depInfo.pImageMemoryBarriers = &depthToReadBarrier;
4535 vkCmdPipelineBarrier2( commandBuffer, &depInfo );
4536 }
4537 }
4538
4539 void Renderer::recordXrSwapchainImageFinishedWritingBarrier( const uint32_t swapChainImageIndex ) const
4540 {
4541 TRACY_ZONE_SCOPED_NAMED( "Swapchain image finished writing barrier" );
4542
4543 // This barrier synchronizes the swapchain image for external consumers:
4544 // 1. MirrorView's blit operation (TRANSFER_READ at BLIT_BIT)
4545 // 2. SteamVR compositor's copy operation (TRANSFER_READ at COPY_BIT)
4546 //
4547 // IMPORTANT: Per OpenXR spec, we must NOT change the layout - it must remain
4548 // in COLOR_ATTACHMENT_OPTIMAL when xrReleaseSwapchainImage is called.
4549 // The runtime is responsible for any layout transitions it needs for compositing.
4550 //
4551 // The validation errors about SteamVR expecting TRANSFER_SRC_OPTIMAL are a
4552 // documented SteamVR spec violation (GitHub issue #1822), not our bug.
4553
4554 VkImageMemoryBarrier2 barrier{ VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER_2 };
4555 barrier.pNext = nullptr;
4556 // Wait for render pass color attachment writes to complete
4557 barrier.srcStageMask = VK_PIPELINE_STAGE_2_COLOR_ATTACHMENT_OUTPUT_BIT;
4558 barrier.srcAccessMask = VK_ACCESS_2_COLOR_ATTACHMENT_WRITE_BIT;
4559 // Make data available for transfer operations (blit/copy by MirrorView and SteamVR compositor)
4560 barrier.dstStageMask = VK_PIPELINE_STAGE_2_BLIT_BIT | VK_PIPELINE_STAGE_2_COPY_BIT;
4561 barrier.dstAccessMask = VK_ACCESS_2_TRANSFER_READ_BIT;
4562 // NO layout transition - keep in COLOR_ATTACHMENT_OPTIMAL per OpenXR spec
4563 barrier.oldLayout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL;
4564 barrier.newLayout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL;
4565 barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
4566 barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
4567 barrier.image = headset->getSwapchainImage( swapChainImageIndex );
4568 // Both array layers (for multiview stereo rendering)
4569 barrier.subresourceRange = { VK_IMAGE_ASPECT_COLOR_BIT, 0, 1, 0, 2 };
4570
4571 VkDependencyInfo dependencyInfo{ VK_STRUCTURE_TYPE_DEPENDENCY_INFO };
4572 dependencyInfo.pNext = nullptr;
4573 dependencyInfo.dependencyFlags = 0;
4574 dependencyInfo.memoryBarrierCount = 0;
4575 dependencyInfo.pMemoryBarriers = nullptr;
4576 dependencyInfo.bufferMemoryBarrierCount = 0;
4577 dependencyInfo.pBufferMemoryBarriers = nullptr;
4578 dependencyInfo.imageMemoryBarrierCount = 1;
4579 dependencyInfo.pImageMemoryBarriers = &barrier;
4580
4581 vkCmdPipelineBarrier2( getCurrentRenderingCommandBuffer(), &dependencyInfo );
4582 }
4583
4584 const std::unique_ptr<RenderingDataManager> & Renderer::getRenderingDataManager() const
4585 {
4586 return renderingDataManager;
4587 }
4588
4590 dispatchBuffer.emplace(context);
4591 constexpr VkDeviceSize size = sizeof( VkDrawMeshTasksIndirectCommandEXT );
4592 dispatchBuffer->create( size, VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | VK_BUFFER_USAGE_INDIRECT_BUFFER_BIT, VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT );
4593 dispatchBuffer->setDebugName("Dispatch Buffer");
4594 }
4595
4597 {
4598 placeholderBuffer.emplace(context);
4599 constexpr VkDeviceSize size = sizeof(uint32_t);
4600 placeholderBuffer->create( size, VK_BUFFER_USAGE_STORAGE_BUFFER_BIT, VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT );
4601 placeholderBuffer->setDebugName( "Placeholder Buffer" );
4602 }
4603
4605 {
4607 constexpr VkDeviceSize size = sizeof(uint32_t);
4608 placeholderUniformBuffer->create( size, VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT, VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT );
4609 placeholderUniformBuffer->setDebugName( "Placeholder Uniform Buffer" );
4610 }
4611
4613 counterBuffer.emplace(context);
4614 constexpr VkDeviceSize size = sizeof(uint32_t);
4615 counterBuffer->create(size, VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT, VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT );
4616 counterBuffer->setDebugName( "Counter Buffer" );
4617 }
4618
4620 objectIDsBuffer.emplace(context);
4621 // Support up to 4096 primitives (4 bytes per uint ID = 16KB)
4622 constexpr VkDeviceSize size = 4096 * sizeof(uint32_t);
4623 objectIDsBuffer->create(size, VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT, VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT);
4624 objectIDsBuffer->setDebugName("Object IDs Buffer");
4625 }
4626
4629 // Support up to 4096 primitives (32 bytes per ObjectCullingData = 128KB)
4630 constexpr VkDeviceSize size = 4096 * 32;
4631 objectCullingDataBuffer->create(size, VK_BUFFER_USAGE_STORAGE_BUFFER_BIT, VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT);
4632 objectCullingDataBuffer->setDebugName("Object Culling Data Buffer");
4633 }
4634
4637 // Support up to 4096 primitives (16 bytes per entry = 64KB)
4638 constexpr VkDeviceSize size = 4096 * 16;
4639 objectMeshletDataBuffer->create(size, VK_BUFFER_USAGE_STORAGE_BUFFER_BIT, VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT);
4640 objectMeshletDataBuffer->setDebugName("Object Meshlet Data Buffer");
4641 }
4642
4645 // Support up to 4096 primitives (4 bytes per uint survivor ID = 16KB)
4646 constexpr VkDeviceSize size = 4096 * sizeof(uint32_t);
4647 meshUnpackingDataBuffer->create(size, VK_BUFFER_USAGE_STORAGE_BUFFER_BIT, VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT);
4648 meshUnpackingDataBuffer->setDebugName("Mesh Unpacking Data Buffer");
4649 }
4650
4652 {
4653 return objectCullingDataBuffer.value();
4654 }
4655
4657 {
4658 return objectMeshletDataBuffer.value();
4659 }
4660
4662 {
4663 return objectIDsBuffer.value();
4664 }
4665
4667 {
4668 return counterBuffer.value();
4669 }
4670
4672 return dispatchBuffer.value();
4673 }
4674
4676 {
4677 return meshUnpackingDataBuffer.value();
4678 }
4679
4681 {
4682 return placeholderBuffer.value();
4683 }
4684
4686 {
4687 return placeholderUniformBuffer.value();
4688 }
4689
4690 bool Renderer::ensureOutputBufferSizes(uint32_t primitiveCount)
4691 {
4692 bool anyRecreated = false;
4693 constexpr VkMemoryPropertyFlags deviceLocal = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT;
4694 constexpr VkBufferUsageFlags storageUsage = VK_BUFFER_USAGE_STORAGE_BUFFER_BIT;
4695 constexpr VkBufferUsageFlags storageTransferUsage = VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT;
4696
4697 // Calculate required sizes based on primitive count
4698 // meshUnpackingDataBuffer stores uint per surviving primitive (culling output)
4699 const VkDeviceSize meshUnpackingSize = primitiveCount * sizeof(uint32_t);
4700 anyRecreated |= meshUnpackingDataBuffer->ensureSize(meshUnpackingSize, storageUsage, deviceLocal);
4701
4702 // objectIDsBuffer stores uint per primitive
4703 const VkDeviceSize objectIDsSize = primitiveCount * sizeof(uint32_t);
4704 anyRecreated |= objectIDsBuffer->ensureSize(objectIDsSize, storageTransferUsage, deviceLocal);
4705
4706 // objectCullingDataBuffer stores ObjectCullingData (32 bytes) per primitive
4707 const VkDeviceSize cullingDataSize = primitiveCount * 32;
4708 anyRecreated |= objectCullingDataBuffer->ensureSize(cullingDataSize, storageUsage, deviceLocal);
4709
4710 // objectMeshletDataBuffer stores meshlet metadata per primitive
4711 const VkDeviceSize meshletDataSize = primitiveCount * 16;
4712 anyRecreated |= objectMeshletDataBuffer->ensureSize(meshletDataSize, storageUsage, deviceLocal);
4713
4714 // Resize RenderProcess buffers for each eye
4715 for (RenderProcess* renderProcess : renderProcesses) {
4716 if (renderProcess) {
4717 anyRecreated |= renderProcess->ensureBufferSizes(primitiveCount);
4718 }
4719 }
4720
4721 if (anyRecreated) {
4722 PLOGI << "Renderer: Output buffers resized for " << primitiveCount << " primitives";
4723 }
4724
4725 return anyRecreated;
4726 }
4727
4729 {
4731 VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT,
4732 VK_ACCESS_2_SHADER_WRITE_BIT,
4733 VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT,
4734 VK_ACCESS_2_SHADER_READ_BIT,
4735 counterBuffer.value()
4736 );
4737
4738 return bundle.getDependencyInfo();
4739 }
4740
4743 VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT,
4744 VK_ACCESS_2_SHADER_WRITE_BIT,
4745 VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT,
4746 VK_ACCESS_2_SHADER_READ_BIT,
4747 dispatchBuffer.value()
4748 );
4749
4751 VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT,
4752 VK_ACCESS_2_SHADER_WRITE_BIT,
4753 VK_PIPELINE_STAGE_2_DRAW_INDIRECT_BIT,
4754 VK_ACCESS_2_INDIRECT_COMMAND_READ_BIT,
4755 dispatchBuffer.value()
4756 );
4757
4758 return bundle.getDependencyInfo();
4759 }
4760
4762 {
4763 // Meshlet Unpacking writes to meshletCounterBuffer (counts per pipeline)
4764 // Meshlet Culling Dispatch reads meshletCounterBuffer to compute dispatch size
4766 VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT,
4767 VK_ACCESS_2_SHADER_WRITE_BIT,
4768 VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT,
4769 VK_ACCESS_2_SHADER_READ_BIT,
4770 getCurrentRenderProcess()->getMeshletCounterBuffer()
4771 );
4772
4773 return bundle.getDependencyInfo();
4774 }
4775
4777 {
4778 // Meshlet Culling Dispatch writes to dispatchBuffer
4779 // Meshlet Culling reads dispatchBuffer via vkCmdDispatchIndirect
4780 static Vulkan::BarrierBundle bundle;
4781 bundle.clear();
4782
4784 VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT,
4785 VK_ACCESS_2_SHADER_WRITE_BIT,
4786 VK_PIPELINE_STAGE_2_DRAW_INDIRECT_BIT,
4787 VK_ACCESS_2_INDIRECT_COMMAND_READ_BIT,
4788 dispatchBuffer.value()
4789 );
4790
4791 return bundle.getDependencyInfo();
4792 }
4793
4795 {
4796 // Meshlet Culling writes to meshletCounterBuffer (visible meshlet counts)
4797 // Prepare Draw reads meshletCounterBuffer
4798 static Vulkan::BarrierBundle bundle;
4799 bundle.clear();
4800
4802 VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT,
4803 VK_ACCESS_2_SHADER_WRITE_BIT,
4804 VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT,
4805 VK_ACCESS_2_SHADER_READ_BIT,
4806 getCurrentRenderProcess()->getMeshletCounterBuffer()
4807 );
4808
4809 return bundle.getDependencyInfo();
4810 }
4811
4813 {
4814 // Barrier: wait for previous frame's fill AND compute reads before new fill
4815 // Uses ALL_COMMANDS since this buffer is shared across frames
4816 VkBufferMemoryBarrier2 barrier{ VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER_2 };
4817 barrier.srcStageMask = VK_PIPELINE_STAGE_2_CLEAR_BIT | VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT;
4818 barrier.srcAccessMask = VK_ACCESS_2_TRANSFER_WRITE_BIT | VK_ACCESS_2_SHADER_STORAGE_READ_BIT;
4819 barrier.dstStageMask = VK_PIPELINE_STAGE_2_CLEAR_BIT;
4820 barrier.dstAccessMask = VK_ACCESS_2_TRANSFER_WRITE_BIT;
4821 barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
4822 barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
4823 barrier.buffer = counterBuffer->getBuffer();
4824 barrier.offset = 0;
4825 barrier.size = VK_WHOLE_SIZE;
4826
4827 VkDependencyInfo depInfo{ VK_STRUCTURE_TYPE_DEPENDENCY_INFO };
4828 depInfo.bufferMemoryBarrierCount = 1;
4829 depInfo.pBufferMemoryBarriers = &barrier;
4830 vkCmdPipelineBarrier2( getCurrentRenderingCommandBuffer(), &depInfo );
4831
4832 vkCmdFillBuffer(getCurrentRenderingCommandBuffer(), counterBuffer->getBuffer(), 0, sizeof(uint32_t), 0);
4833 }
4834
4836 {
4837 // Reset the meshlet counter buffer before meshlet culling writes to it
4838 // This is needed because meshlet culling uses atomic adds to count visible meshlets
4839
4840 // Barrier: wait for previous frame's fill AND compute reads before new fill
4841 VkBufferMemoryBarrier2 barrier{ VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER_2 };
4842 barrier.srcStageMask = VK_PIPELINE_STAGE_2_CLEAR_BIT | VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT;
4843 barrier.srcAccessMask = VK_ACCESS_2_TRANSFER_WRITE_BIT | VK_ACCESS_2_SHADER_STORAGE_READ_BIT;
4844 barrier.dstStageMask = VK_PIPELINE_STAGE_2_CLEAR_BIT;
4845 barrier.dstAccessMask = VK_ACCESS_2_TRANSFER_WRITE_BIT;
4846 barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
4847 barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
4849 barrier.offset = 0;
4850 barrier.size = VK_WHOLE_SIZE;
4851
4852 VkDependencyInfo depInfo{ VK_STRUCTURE_TYPE_DEPENDENCY_INFO };
4853 depInfo.bufferMemoryBarrierCount = 1;
4854 depInfo.pBufferMemoryBarriers = &barrier;
4855 vkCmdPipelineBarrier2( getCurrentRenderingCommandBuffer(), &depInfo );
4856
4857 vkCmdFillBuffer(
4859 getCurrentRenderProcess()->getMeshletCounterBuffer().getBuffer(),
4860 0,
4861 VK_WHOLE_SIZE,
4862 0
4863 );
4864 }
4865
4866 bool Renderer::getPipelineIndex( GraphicsPipeline * pipeline, uint32_t & pipelineIndex ) const
4867 {
4868 const auto it = pipelineIndices.find( pipeline );
4869 if ( it != pipelineIndices.end() )
4870 {
4871 pipelineIndex = it->second;
4872 return true;
4873 }
4874 return false;
4875 }
4876
4877 bool Renderer::getPipelineIndex( PipelineNames pipelineName, uint32_t & pipelineIndex ) const
4878 {
4879 const auto it = pipelinesByName.find( pipelineName );
4880 if ( it != pipelinesByName.end() )
4881 {
4882 return getPipelineIndex( it->second, pipelineIndex );
4883 }
4884 return false;
4885 }
4886
4888 {
4889 using namespace std::filesystem;
4890 const path shaderDir = Path::engineShaders() / "Output";
4891
4892 switch ( pipelineName )
4893 {
4894 case DIFFUSE_FLAT_COLOR:
4895 return PipelineConfig{
4896 .meshShaderPath = shaderDir / "triangle.mesh.spv",
4897 .fragmentShaderPath = shaderDir / "triangle_flat.frag.spv"
4898 };
4899
4900 case DIFFUSE_SHADER:
4901 return PipelineConfig{
4902 .meshShaderPath = shaderDir / "triangle.mesh.spv",
4903 .fragmentShaderPath = shaderDir / "triangle.frag.spv"
4904 };
4905
4907 return PipelineConfig{
4908 .meshShaderPath = shaderDir / "triangle.mesh.spv",
4909 .fragmentShaderPath = shaderDir / "triangle_movable_full.frag.spv"
4910 };
4911
4912 case NORMALS_SHADER:
4913 return PipelineConfig{
4914 .meshShaderPath = shaderDir / "triangle.mesh.spv",
4915 .fragmentShaderPath = shaderDir / "triangle_movable_normals.frag.spv"
4916 };
4917
4918 case L0_SHADER:
4919 return PipelineConfig{
4920 .meshShaderPath = shaderDir / "triangle.mesh.spv",
4921 .fragmentShaderPath = shaderDir / "triangle_movable_l0.frag.spv"
4922 };
4923
4924 case L1_SHADER:
4925 return PipelineConfig{
4926 .meshShaderPath = shaderDir / "triangle.mesh.spv",
4927 .fragmentShaderPath = shaderDir / "triangle_movable_l1.frag.spv"
4928 };
4929
4930 case L2_SHADER:
4931 return PipelineConfig{
4932 .meshShaderPath = shaderDir / "triangle.mesh.spv",
4933 .fragmentShaderPath = shaderDir / "triangle_movable_l2.frag.spv"
4934 };
4935
4936 case DYNAMIC_TEXTURES:
4937 return PipelineConfig{
4938 .meshShaderPath = shaderDir / "triangle.mesh.spv",
4939 .fragmentShaderPath = shaderDir / "triangle_movable.frag.spv"
4940 };
4941
4942 case STATIC_LIGHTMAP:
4943 return PipelineConfig{
4944 .meshShaderPath = shaderDir / "triangle.mesh.spv",
4945 .fragmentShaderPath = shaderDir / "triangle_static_lightmap.frag.spv"
4946 };
4947
4948 default:
4949 PLOGW << "Unknown PipelineNames enum: " << static_cast<int>(pipelineName) << ", returning NORMALS_SHADER config";
4950 return PipelineConfig{
4951 .meshShaderPath = shaderDir / "triangle.mesh.spv",
4952 .fragmentShaderPath = shaderDir / "triangle_movable_normals.frag.spv"
4953 };
4954 }
4955 }
4956
4957#ifdef ENABLE_TRACY
4958 TracyVkCtx Renderer::getCurrentTracyVkContext() const
4959 {
4960 return tracyVkContext[currentFrame];
4961 }
4962#endif
4963} // namespace EngineCore
::EngineCore::LogCategory LogLOD("LogLOD", ::EngineCore::LogVerbosity::Info, ::EngineCore::LogVerbosity::Verbose)
#define VS_LOG_ONCE(Category, Verbosity, Format,...)
Log once - only logs the first time this line is executed.
Definition LogMacros.h:168
#define VS_LOG(Category, Verbosity, Format,...)
Log a message with category and verbosity.
Definition LogMacros.h:122
#define COMPUTE_SHADER_NAME(name)
Mode-specific macros for compute shader loading and execution guards.
Definition Renderer.cpp:62
#define MESH_STAGE
#define GRAPHICS_GUARD()
Definition Renderer.cpp:64
#define COMPUTE_GUARD()
Definition Renderer.cpp:63
constexpr uint32_t MAX_PIPELINES
Definition Settings.h:39
constexpr uint32_t MAX_MESHLETS_PER_BIN
Definition Settings.h:37
constexpr int MAX_FRAMES_IN_FLIGHT
Definition Settings.h:26
constexpr uint32_t MAX_TEXTURE_COUNT
Definition Settings.h:35
#define TRACY_ZONE_SCOPED_NAMED_D(stream_expr)
#define TRACY_ZONE_SCOPED_FUNCTION
#define TRACY_VK_ZONE_C(vk_context, cmd_buffer, name, color)
#define TRACY_VK_COLLECT(vk_context, cmd_buffer)
#define TRACY_ZONE_SCOPED_NAMED(name)
The application context is the core class which stores the basic openxr and vulkan objects.
The compute pass stores all resources which belong to a compute pipeline.
Definition ComputePass.h:26
uint32_t getThreadCount() const
Gets the amount of threads this compute shader is running on on the GPU.
void bindDescriptorSets(VkCommandBuffer commandBuffer, uint32_t frameInFlightIndex)
Binds the descriptor set of a frame in flight.
VkPushConstantsInfo createPushConstantsInfo(uint32_t size, const void *pValues) const
Used to create preconfigured push constants where you only pass in the size of the push constant and ...
ComputePipeline * getComputePipeline()
Getter for the raw compute pipeline.
Wrapper for shader specialization data for compute pipelines. This usually only stores the thread cou...
void bind(VkCommandBuffer commandBuffer)
Binds this pipeline to an arbitrary command buffer.
std::vector< VkDescriptorSetLayoutBinding > build()
DescriptorSetLayoutBuilder & addStorageBuffer(uint32_t binding, VkShaderStageFlags stageFlags)
DescriptorSetLayoutBuilder & addCombinedImageSampler(uint32_t binding, VkShaderStageFlags stageFlags)
DescriptorSetLayoutBuilder & addUniformBuffer(uint32_t binding, VkShaderStageFlags stageFlags)
DescriptorSetLayoutBuilder & addStorageImageArray(uint32_t binding, uint32_t count, VkShaderStageFlags stageFlags)
DescriptorSetLayoutBuilder & addStorageImage(uint32_t binding, VkShaderStageFlags stageFlags)
The Descriptor set updater is used to create a list of descriptor set writes which are executed with ...
void update(VkDevice device) const
Updates the bindings on a descriptor sets.
DescriptorSetUpdater & addCombinedImageSampler(uint32_t binding, const VkDescriptorImageInfo *pImageInfo)
Adds a combined image sampler to the chain of updates.
DescriptorSetUpdater & addStorageImage(uint32_t binding, const VkDescriptorImageInfo *pImageInfo)
Adds a storage image to the chain of updates.
Specialization data for mesh shaders - provides screen resolution for small triangle culling.
static constexpr ConstexprPath engineShaders()
Path to engine shaders @lsp_source_path Engine/shaders.
Definition Path.h:53
Fluent builder for queue submissions with timeline and binary semaphores.
VkResult submit(VkQueue queue, VkFence fence=VK_NULL_HANDLE)
Builds and submits to the specified queue.
QueueSubmitBuilder & waitForResourceReuse(PipelineStage stage, VkPipelineStageFlags2 waitStages)
Convenience: wait for resource reuse (frames-in-flight pattern)
QueueSubmitBuilder & withCommandBuffer(VkCommandBuffer cmdBuffer)
Adds a command buffer to be executed.
QueueSubmitBuilder & signalBinary(VkSemaphore semaphore)
Signals a binary semaphore at completion (for present)
QueueSubmitBuilder & waitForBinary(VkSemaphore semaphore, VkPipelineStageFlags2 waitStages)
Adds a binary semaphore wait (for swapchain acquire, etc.)
QueueSubmitBuilder & waitForPrevious(PipelineStage stage, VkPipelineStageFlags2 waitStages)
Convenience: wait for previous frame's stage.
QueueSubmitBuilder & signalStage(PipelineStage stage)
Signals the timeline semaphore at a specific stage.
QueueSubmitBuilder & waitForCurrent(PipelineStage stage, VkPipelineStageFlags2 waitStages)
Convenience: wait for same frame's earlier stage.
The render process class consolidates all the resources that needs to be duplicated for each frame th...
const VulkanBuffer & getMeshletCounterBuffer() const
VkCommandBuffer & getGraphicsCommandBuffer()
const VulkanBuffer & getIndirectDrawBuffer() const
void updateComputeObjectCullingDescriptorSets(VkImageView previousFrameHiZView=VK_NULL_HANDLE) const
Updates object culling descriptor sets.
const VulkanBuffer & getPipelineCountersBuffer() const
void updateFrustumUBOData(const Headset *headset)
void updateEyeViewProjectionMatrices(const Headset *headset)
writes a view projection matrix to an eye
const VulkanBuffer & getVSDrawCommandsBuffer() const
VkImageView getHiZPyramidFullView() const
uint32_t getHiZMipLevels() const
const VulkanBuffer & getCullingFailedCounterBuffer() const
VulkanStagedBufferSyncObjects uploadLodConfigBuffer()
Upload LOD config buffer to GPU.
const VulkanBuffer & getVSIndirectDrawCountBuffer() const
const VulkanBuffer & getLodClusterSurvivorCountBuffer() const
VkImageView getHiZPyramidMipView(uint32_t mipLevel) const
void updateLodConfig(const glm::vec3 &cameraPosition, const glm::mat4 &projMatrix, float nearPlane, float screenWidth, float screenHeight, float errorThresholdPixels=1.0f, bool ditherEnabled=true)
Update LOD configuration with camera and screen info Call this once per frame before dispatching prim...
void updateHiZViewProjectionData(const Headset *headset)
Updates Hi-Z view-projection data for occlusion culling Must be called after updateEyeViewProjectionM...
bool getTransferFenceSubmitted() const
VulkanBuffer & getBinnedVisibleMeshletIndexBuffer()
void setGraphicsFenceSubmitted(bool isSubmitted)
const VulkanBuffer & getVSGeometryCountersBuffer() const
VkDescriptorSet getGraphicsDescriptorSet() const
VkExtent2D getHiZExtent() const
void updateSSBO(const Renderer *renderer)
const VulkanBuffer & getVSVisibleCountBuffer() const
VkImage getHiZPyramidImage() const
const VulkanBuffer & getVSDrawCountBuffer() const
VkCommandBuffer & getTransferCommandBuffer()
void updateFragmentTime(float time)
Sets the time value for time based fragment shader effects.
void setTransferFenceSubmitted(bool isSubmitted)
VkSemaphore getMirrorViewImageSemaphore() const
const VulkanBuffer & getPipelineBinOffsetsBuffer() const
const VulkanBuffer & getVSIndirectDrawBuffer() const
Legacy VS path buffer getters (backwards compatibility)
std::optional< VulkanBuffer > meshUnpackingDataBuffer
Definition Renderer.h:219
DispatcherComputePass & getMeshletUnpackingDispatchComputePass()
void restartRenderCommandBuffers() const
Resets the command buffers of the active frame and begins a new write.
ComputePass & getMeshletUnpackingComputePass()
void createMeshletUnpackingResources()
std::vector< RenderProcess * > renderProcesses
The render processes.
Definition Renderer.h:748
void createHiZMipDescriptorSets()
uint32_t currentFrame
Definition Renderer.h:764
std::optional< ComputePass > hiZGenerationComputePass
Definition Renderer.h:639
VkDependencyInfo getObjectCullingToMeshletUnpackingDispatchBarriers(Vulkan::BarrierBundle &bundle) const
ComputePass & getObjectCullingComputePass()
VkCommandPool getTransferCommandPool() const
gets the command pool used for recording data transfer at the beginning of a frame
int64_t getFrameElapsedMs() const
Gets the elapsed time since markFrameStart() was called. Used to check if the current frame has taken...
void updateHiZSPDDescriptorSets(uint32_t frameIndex)
std::optional< ComputePass > vsPrepareDrawComputePass_
Definition Renderer.h:674
void createPlaceholderBuffer()
Creates a buffer which can be used anywhere and holds one single 32 bit integer.
std::optional< ComputePass > hiZSPDComputePass
Definition Renderer.h:648
std::unordered_map< PipelineNames, GraphicsPipeline * > pipelinesByName
Definition Renderer.h:579
void initializeXrSwapchainFormats()
bool freezeCulling_
When true, culling data (frustum planes, Hi-Z VP) is frozen for debugging.
Definition Renderer.h:681
const Engine * engine
Definition Renderer.h:581
bool ensureOutputBufferSizes(uint32_t primitiveCount)
Ensures output buffers are sized to handle the given primitive count. Called by RenderingDataManager ...
void createObjectMeshletDataBuffer()
static constexpr std::chrono::milliseconds STALL_THRESHOLD_MS
Threshold for considering a frame as "stalled" (1 second)
Definition Renderer.h:569
std::optional< ComputePass > binningAllocatorComputePass
Stage 1: Binning allocator.
Definition Renderer.h:626
VkPipeline vsGraphicsPipeline_
Definition Renderer.h:660
VkDependencyInfo getMeshletCullingToPrepareDrawBarriers()
const VulkanBuffer & getObjectCullingDataBuffer() const
VkDescriptorSetLayout graphicsDescriptorSetLayout
Definition Renderer.h:707
RenderProcess * getCurrentRenderProcess() const
Gets the render process of the current frame.
std::unique_ptr< TimelineSynchronizer > timelineSynchronizer_
Definition Renderer.h:551
const VulkanBuffer & getObjectMeshletDataBuffer() const
uint64_t getTimelineSemaphoreValue() const
void recordVertexShaderDraws(size_t swapChainImageIndex) const
void setFreezeCulling(bool freeze)
Freezes culling data (frustum planes, Hi-Z view-projection)
Definition Renderer.cpp:713
void markFrameStart()
Marks the start of a new frame for stall detection. Call this at the beginning of each render frame.
std::optional< ComputePass > drawPreparationComputePass
Definition Renderer.h:631
std::vector< VulkanStagedBufferSyncObjects > syncCopyObjects
Definition Renderer.h:571
std::unordered_map< GraphicsPipeline *, uint32_t > pipelineIndices
Definition Renderer.h:576
const VulkanBuffer & getCounterBuffer() const
void updateViewMatrix()
Requests an update of the current render processes view matrix. The matrix is pulled from the headset...
Definition Renderer.cpp:698
void recordXrSwapchainImageFinishedWritingBarrier(uint32_t swapChainImageIndex) const
VkDependencyInfo getMeshletCullingDispatchToMeshletCullingBarriers()
VkPipelineLayout graphicsPipelineLayout
Definition Renderer.h:743
void renderToXr(size_t swapChainImageIndex, float time)
Definition Renderer.cpp:849
void resetMeshletCullingDispatchBuffers()
std::optional< VulkanBuffer > hiZSPDAtomicBuffer
Definition Renderer.h:649
VkPipelineLayout computeObjectCullingPipelineLayout
The pipeline layout.
Definition Renderer.h:740
void initializeGpuBuffers() const
Definition Renderer.cpp:557
ComputePass & getMeshletCullingComputePass()
const VulkanBuffer & getPlaceholderUniformBuffer() const
const TimelineSynchronizer & getTimelineSynchronizer() const
Gets the timeline synchronizer for managing frame synchronization.
BS_tracy::tracy_thread_pool< BS_tracy::tp::none > updateThreadPool
Definition Renderer.h:573
void createVertexShaderPathResources()
VkCommandBuffer getCurrentRenderingCommandBuffer() const
DispatcherComputePass & getMeshletCullingDispatchComputePass()
const std::vector< GraphicsPipeline * > & getGraphicsPipelines() const
Gets the list of all graphics pipelines.
void createHiZGenerationResources()
std::optional< VulkanBuffer > objectMeshletDataBuffer
Definition Renderer.h:217
void recordTransfer(float time)
Definition Renderer.cpp:725
void submitGraphics(uint32_t swapChainImageIndex, VkSemaphore mirrorAcquireSemaphore=VK_NULL_HANDLE)
void cleanup()
Cleans up all resources of the renderer.
void createPlaceholderUniformBuffer()
VkCommandPool vkGraphicsCommandPool
The graphics and present command pool.
Definition Renderer.h:604
void recordPass2RenderPass(size_t swapChainImageIndex)
Records the Pass 2 render pass using dynamic rendering Uses VK_ATTACHMENT_LOAD_OP_LOAD to preserve Pa...
std::vector< VkPushConstantRange > pushConstants
Definition Renderer.h:735
void recordVSInstancedDrawingPipeline()
std::vector< GraphicsPipeline * > graphicsPipelines
The pipelines.
Definition Renderer.h:757
VkSemaphore getTimelineSemaphore() const
Getter for the main timeline renderer semaphore.
void recordVertexShaderDrawsDepthOnly(size_t swapChainImageIndex) const
std::optional< VulkanBuffer > objectCullingDataBuffer
Definition Renderer.h:216
VkCommandPool vkTransferCommandPool
The transfer command pool for all commands submitted to the transfer queue.
Definition Renderer.h:612
void createHiZSPDDescriptorSets()
std::unique_ptr< RenderingDataManager > renderingDataManager
Definition Renderer.h:214
VkCommandBuffer getCurrentTransferCommandBuffer() const
Gets the current frame in flight and retrieves the transfer command buffer.
const VulkanBuffer & getDispatchBuffer() const
std::chrono::steady_clock::time_point lastFrameStartTime_
Time when the last frame started, for stall detection.
Definition Renderer.h:566
~Renderer()
cleans up the object
Definition Renderer.cpp:202
VkPipeline vsDepthOnlyPipeline_
Definition Renderer.h:762
VkDependencyInfo getMeshletUnpackingDispatchToMeshletUnpackingBarriers(Vulkan::BarrierBundle &bundle) const
Renderer(ApplicationContext *context=nullptr, Headset *headset=nullptr, const Engine *engine=nullptr)
Constructor.
Definition Renderer.cpp:67
std::optional< ComputePass > objectCullingComputePass
The descriptor set layout.
Definition Renderer.h:625
void resetMeshletUnpackingDispatchBuffers()
std::optional< VulkanBuffer > counterBuffer
Definition Renderer.h:221
void updateHiZMipDescriptorSets(uint32_t frameIndex)
void createPrepareDrawResources()
VkCommandPool getGraphicsCommandPool() const
gets the graphics command pool
void uploadFrameData(float time)
Definition Renderer.cpp:719
void prepareTransferSubmission(uint32_t frameIndex) const
Definition Renderer.cpp:582
VkSemaphore getCurrentPresentableSemaphore(uint32_t swapchainImageIndex) const
void createObjectCullingDataBuffer()
void createVSInstancedDrawingResources()
const std::unique_ptr< RenderingDataManager > & getRenderingDataManager() const
Getter for the rendering data manager.
std::optional< ComputePass > vsBinningAllocatorComputePass_
Definition Renderer.h:672
void advanceFrameIndices()
advances the frame indices for the next frame in flight
void createMeshletCullingDispatcherResources()
Creates the compute pipeline for the meshlet culling stage. This should only be called after the mesh...
VkPipelineLayout prepareDrawsComputePipelineLayout
Definition Renderer.h:742
VkDescriptorPool hiZDescriptorPool
Definition Renderer.h:645
uint64_t renderedFrameCounter
Definition Renderer.h:556
void allocateDescriptors()
allocates the first batch of mesh data to the shaders
Definition Renderer.cpp:209
std::optional< VulkanBuffer > objectIDsBuffer
Definition Renderer.h:218
ComputePass & getDrawPreparationComputePass()
int findExistingPipeline(const std::string &meshShader, const std::string &fragShader, const PipelineMaterialPayload &pipelineData) const
Searches for the first existing pipeline which has the same vertex and fragment shader.
void createPrimitiveCullingResources()
const VulkanBuffer & getObjectIDsBuffer() const
const VulkanBuffer & getMeshUnpackingDataBuffer() const
std::array< VkDescriptorSet, MAX_FRAMES_IN_FLIGHT > hiZSPDDescriptorSets
Definition Renderer.h:650
bool shouldSkipMirrorView()
Checks if mirror view should be skipped this frame.
const VulkanBuffer & getPlaceholderBuffer() const
std::vector< GraphicsPipeline * > depthOnlyGraphicsPipelines
Definition Renderer.h:761
ComputePass & getVSPrepareDrawComputePass()
static constexpr int SKIP_FRAMES_AFTER_STALL
Number of frames to skip after detecting a stall (covers all swapchain images)
Definition Renderer.h:563
VkDescriptorPool descriptorPool
The descriptor pool for all descriptors of the renderer.
Definition Renderer.h:620
bool getPipelineIndex(GraphicsPipeline *pipeline, uint32_t &pipelineIndex) const
Gets the index of a pipeline.
void getCurrentTracyVkContext() const
Definition Renderer.h:548
std::optional< VulkanBuffer > placeholderBuffer
Definition Renderer.h:224
std::optional< DispatcherComputePass > meshletCullingDispatchComputePass
Definition Renderer.h:629
std::optional< VulkanBuffer > dispatchBuffer
Definition Renderer.h:222
void createMeshUnpackingDataBuffer()
std::vector< VkSemaphore > renderFinishedSemaphores
Definition Renderer.h:771
void createBinningAllocatorResources()
void syncTimelineAfterPause()
Synchronizes the frame counter with the timeline semaphore after pausing.
std::optional< DispatcherComputePass > meshletUnpackingDispatchComputePass
Definition Renderer.h:627
ComputePass & getVSInstanceUnpackingComputePass()
void recordPass2Culling(size_t swapChainImageIndex)
Records Pass 2 of two-pass occlusion culling Re-tests objects that failed Pass 1 Hi-Z against the new...
std::optional< VulkanBuffer > placeholderUniformBuffer
Definition Renderer.h:225
std::optional< ComputePass > vsInstanceUnpackingComputePass_
Definition Renderer.h:673
VkSemaphore getCurrentMirrorViewSemaphore() const
std::array< std::array< VkDescriptorSet, MAX_HIZ_MIP_LEVELS >, MAX_FRAMES_IN_FLIGHT > hiZMipDescriptorSets
Definition Renderer.h:644
ComputePass & getVSBinningAllocatorComputePass()
void createMeshletUnpackingDispatcherResources()
Creates the complete pipeline for the meshlet unpacker. This should only be called after the buffer h...
ComputePass & getBinningAllocatorComputePass()
void recordXrSwapchainImageWritableBarrier(uint32_t swapChainImageIndex) const
bool isInStallRecovery() const
Checks if we're currently in stall recovery mode. During stall recovery, extra GPU synchronization sh...
ApplicationContext * context
Definition Renderer.h:766
static PipelineConfig getPipelineConfig(PipelineNames pipelineName)
Gets the static configuration for a given pipeline type Returns shader paths and pipeline data for th...
void updateCpuRenderResources(float time)
Updates all data for gpu frame buffers.
std::vector< VkCommandBuffer > getGraphicsCommandBuffers() const
Gets all graphics command buffers from all owned EngineCore::RenderProcess.
VkDependencyInfo getMeshletUnpackingToMeshletCullingDispatchBarriers(Vulkan::BarrierBundle &bundle)
std::optional< ComputePass > meshletUnpackingComputePass
Definition Renderer.h:628
std::optional< ComputePass > meshletCullingComputePass
Definition Renderer.h:630
void recordRenderPass(size_t swapChainImageIndex)
void createMeshletCullingResources()
void recordHiZGeneration()
Records the Hi-Z pyramid generation compute pass Should be called after the render pass completes.
Centralized timeline semaphore management for the rendering pipeline.
RAII wrapper for Vulkan buffer and device memory.
VkBuffer getBuffer() const
void destroyShaders(VkDevice device) override
Definition Shader.cpp:90
VkShaderModule getFragmentShader() const
Definition Shader.cpp:85
VkShaderModule getVertexShader() const
Definition Shader.cpp:80
static VkResult createPipelineLayout(VkDevice device, const VkPipelineLayoutCreateInfo *pCreateInfo, const VkAllocationCallbacks *pAllocator, VkPipelineLayout *pPipelineLayout, const std::string &name)
static void endSingleTimeCommands(VkDevice device, VkQueue graphicsQueue, VkCommandPool commandPool, VkCommandBuffer commandBuffer)
static std::string strIsValid(void *object)
static void setObjectName(VkDevice device, VulkanObjectType objectHandle, const std::string &name)
static VkDescriptorBufferInfo fullBufferInfo(VkBuffer buffer)
static VkResult createDescriptorPool(VkDevice device, const VkDescriptorPoolCreateInfo *pCreateInfo, const VkAllocationCallbacks *pAllocator, VkDescriptorPool *pDescriptorPool, const std::string &name)
static const VulkanFunctions & getFunctions()
static VkResult createDescriptorSetLayout(VkDevice device, const VkDescriptorSetLayoutCreateInfo *pCreateInfo, const VkAllocationCallbacks *pAllocator, VkDescriptorSetLayout *pSetLayout, const std::string &name)
static VkCommandBuffer beginSingleTimeCommands(VkDevice device, VkCommandPool commandPool)
VkDependencyInfo getDependencyInfo() const
void addComputeBufferBarrier(VkPipelineStageFlags2 srcStageMask, VkAccessFlags2 srcAccessMask, VkPipelineStageFlags2 dstStageMask, VkAccessFlags2 dstAccessMask, const EngineCore::VulkanBuffer &buffer)
constexpr uint32_t Gray
Definition Colors.h:9
constexpr uint32_t Blue
Definition Colors.h:12
constexpr uint32_t Cyan
Definition Colors.h:8
constexpr uint32_t Turquoise
Definition Colors.h:14
constexpr uint32_t Red
Definition Colors.h:13
constexpr uint32_t Orange
Definition Colors.h:11
constexpr uint32_t Green
Definition Colors.h:10
Log category system implementation.
constexpr uint32_t PIPELINE_STAGE_COUNT
@ Warning
Potential issue.
Definition LogCategory.h:31
@ MOVABLE_DIFFUSE_SHADER
STL namespace.
Static configuration for a graphics pipeline Defines which shaders to use for a given PipelineNames e...
Definition Renderer.h:53
std::filesystem::path fragmentShaderPath
Definition Renderer.h:55
PipelineMaterialPayload pipelineData
Definition Renderer.h:56
std::filesystem::path meshShaderPath
Definition Renderer.h:54
A render target which contains all resources to access the rendered image.
Definition Headset.h:26
VkImageView imageView
Definition Headset.h:28
Used to tell the pipeline which objects are shaded with which shader.
Definition Renderer.h:80
The fundamental building block of all meshes in this engine.
Definition Vertex.h:15
PFN_vkCmdEndConditionalRenderingEXT vkCmdEndConditionalRenderingEXT
PFN_vkCmdDrawMeshTasksIndirectEXT vkCmdDrawMeshTasksIndirectEXT
PFN_vkCmdBeginConditionalRenderingEXT vkCmdBeginConditionalRenderingEXT