use std::sync::Arc; use ash::vk; use crate::*; /// Timeout to wait for GPU execution in drop function. static DROP_TIMEOUT: std::time::Duration = std::time::Duration::from_secs(30 * 60); /// GPU execution context. /// It records commands and run them on GPU. /// It keeps track of resources used in the commands. /// Warnings! /// Context is not thread safe. /// Execution order is not guaranteed. Don't rely on it. /// If you need to run commands in specific order, use `wait_finish` method. /// And start next command after previous one is finished. pub struct Context { // Which device to execute on. device: Arc, // GPU execution handler. vk_queue: vk::Queue, // Command buffer is created using command pool. vk_command_pool: vk::CommandPool, // Command buffer is used to record commands to execute. vk_command_buffer: vk::CommandBuffer, // Synchronization fence to wait for GPU execution. vk_fence: vk::Fence, // Resources used in the context. resources: Vec>, } impl Context { pub fn new(device: Arc) -> GpuResult { // Get GPU execution queue from device. let queue = device.compute_queue(); // Create command pool. let command_pool_create_info = vk::CommandPoolCreateInfo::default() .queue_family_index(queue.vk_queue_family_index as u32) .flags(vk::CommandPoolCreateFlags::default()); let vk_command_pool = unsafe { device .vk_device() .create_command_pool(&command_pool_create_info, device.cpu_allocation_callbacks())? }; // Create fence to wait for GPU execution. // We create fence as signaled because we reset fence before start. let fence_create_info = vk::FenceCreateInfo::default().flags(vk::FenceCreateFlags::SIGNALED); let vk_fence = unsafe { device .vk_device() .create_fence(&fence_create_info, device.cpu_allocation_callbacks()) }; let vk_fence = match vk_fence { Ok(fence) => fence, Err(e) => { // If fence creation failed, destroy created command pool and return error. unsafe { device .vk_device() .destroy_command_pool(vk_command_pool, device.cpu_allocation_callbacks()); } return Err(GpuError::from(e)); } }; let mut context = Self { vk_queue: queue.vk_queue, device, vk_command_pool, vk_command_buffer: vk::CommandBuffer::null(), vk_fence, resources: Vec::new(), }; context.init_command_buffer()?; Ok(context) } pub fn dispatch(&mut self, x: usize, y: usize, z: usize) -> GpuResult<()> { if self.vk_command_buffer == vk::CommandBuffer::null() { self.init_command_buffer()?; } let max_compute_work_group_count = self.device.max_compute_work_group_count(); if x > max_compute_work_group_count[0] || y > max_compute_work_group_count[1] || z > max_compute_work_group_count[2] { return Err(GpuError::OutOfBounds( "Dispatch work group size is out of bounds".to_string(), )); } unsafe { self.device.vk_device().cmd_dispatch( self.vk_command_buffer, x as u32, y as u32, z as u32, ); } Ok(()) } /// Bind pipeline to the context. /// It means which shader and binded resources to shader will be used. /// It records command to run it on GPU after `run` call. pub fn bind_pipeline( &mut self, pipeline: Arc, descriptor_sets: &[Arc], ) -> GpuResult<()> { if self.vk_command_buffer == vk::CommandBuffer::null() { self.init_command_buffer()?; } unsafe { self.device.vk_device().cmd_bind_pipeline( self.vk_command_buffer, vk::PipelineBindPoint::COMPUTE, pipeline.vk_pipeline(), ); } unsafe { if !descriptor_sets.is_empty() { let vk_descriptor_sets: Vec<_> = descriptor_sets .iter() .map(|set| set.as_ref().vk_descriptor_set()) .collect(); self.device.vk_device().cmd_bind_descriptor_sets( self.vk_command_buffer, vk::PipelineBindPoint::COMPUTE, pipeline.vk_pipeline_layout(), 0, &vk_descriptor_sets, &[], ); } } // Add resources to the list to keep them alive. self.resources.extend( descriptor_sets .iter() .map(|r| r.clone() as Arc), ); self.resources.push(pipeline); Ok(()) } /// Copy data from one buffer to another. It records command to run it on GPU after `run` call. pub fn copy_gpu_buffer( &mut self, src: Arc, dst: Arc, src_offset: usize, dst_offset: usize, size: usize, ) -> GpuResult<()> { if self.vk_command_buffer == vk::CommandBuffer::null() { self.init_command_buffer()?; } if src.size() < src_offset + size || dst.size() < dst_offset + size { return Err(GpuError::OutOfBounds( "Buffer copy out of bounds".to_string(), )); } let buffer_copy = vk::BufferCopy::default() .src_offset(src_offset as vk::DeviceSize) .dst_offset(dst_offset as vk::DeviceSize) .size(size as vk::DeviceSize); unsafe { self.device.vk_device().cmd_copy_buffer( self.vk_command_buffer, src.vk_buffer(), dst.vk_buffer(), &[buffer_copy], ); } // Add resources to the list to keep them alive. self.resources.push(src); self.resources.push(dst); Ok(()) } /// Clear buffer with zeros command. It records command to run it on GPU after `run` call. pub fn clear_buffer(&mut self, buffer: Arc) -> GpuResult<()> { if buffer.size() % std::mem::size_of::() != 0 { return Err(GpuError::OutOfBounds( "Buffer size must be a multiple of `uint32` size to clear it".to_string(), )); } if self.vk_command_buffer == vk::CommandBuffer::null() { self.init_command_buffer()?; } unsafe { self.device.vk_device().cmd_fill_buffer( self.vk_command_buffer, buffer.vk_buffer(), 0, buffer.size() as vk::DeviceSize, 0, ); } // Add resources to the list to keep them alive. self.resources.push(buffer); Ok(()) } /// Run the recorded commands on GPU. /// Warning: order of recorded commands is not guaranteed. Don't rely on it. pub fn run(&mut self) -> GpuResult<()> { if self.vk_command_buffer == vk::CommandBuffer::null() { // Nothing to run. return Ok(()); } // Finish recording of command buffer. let end_record_result = unsafe { self.device .vk_device() .end_command_buffer(self.vk_command_buffer) }; // If command buffer recording failed, destroy created command buffer and return error. if let Err(e) = end_record_result { self.destroy_command_buffer(); return Err(GpuError::from(e)); } // Reset fence to unsignaled state. let fence_reset_result = unsafe { self.device.vk_device().reset_fences(&[self.vk_fence]) }; if let Err(e) = fence_reset_result { self.destroy_command_buffer(); return Err(GpuError::from(e)); } // Start execution of recorded commands. let submit_buffers = [self.vk_command_buffer]; let submit_info = vec![vk::SubmitInfo::default().command_buffers(&submit_buffers)]; let submit_result = unsafe { self.device .vk_device() .queue_submit(self.vk_queue, &submit_info, self.vk_fence) }; if let Err(e) = submit_result { // If submit failed, destroy created command buffer and return error. // It's important here to avoid fence waiting of non-started command buffer. self.destroy_command_buffer(); return Err(GpuError::from(e)); } Ok(()) } /// Wait for GPU execution to finish. pub fn wait_finish(&mut self, timeout: std::time::Duration) -> GpuResult<()> { if self.vk_command_buffer == vk::CommandBuffer::null() { // Nothing to wait for. return Ok(()); } // Get the current status of fence. let fence_status = unsafe { self.device .vk_device() .get_fence_status(self.vk_fence) .map_err(GpuError::from) }; match fence_status { Ok(true) => { // GPU execution finished already, clear command buffer and return. self.destroy_command_buffer(); Ok(()) } Ok(false) => { // GPU is processing. Wait for signal with timeout. let wait_result = unsafe { self.device .vk_device() .wait_for_fences(&[self.vk_fence], true, timeout.as_nanos() as u64) .map_err(GpuError::from) }; if matches!(wait_result, Err(GpuError::Timeout)) { // If we detect timeout, don't clear command buffer, just return a timeout error. Err(GpuError::Timeout) } else { // If the error is not a timeout, clear command buffer. self.destroy_command_buffer(); wait_result } } Err(e) => { // By Vulkan specification, error while getting fence status // may happen is special cases like hardware device lost. // In this cases we don't care about status of gpu execution and just clear resources. self.destroy_command_buffer(); Err(e) } } } fn init_command_buffer(&mut self) -> GpuResult<()> { if self.vk_command_buffer != vk::CommandBuffer::null() { return Err(GpuError::Other( "Vulkan command buffer was already created".to_string(), )); } // Create new command buffer from pool. let command_buffer_allocate_info = vk::CommandBufferAllocateInfo::default() .command_pool(self.vk_command_pool) .level(vk::CommandBufferLevel::PRIMARY) .command_buffer_count(1); self.vk_command_buffer = unsafe { self.device .vk_device() .allocate_command_buffers(&command_buffer_allocate_info)?[0] }; let command_buffer_begin_info = vk::CommandBufferBeginInfo::default().flags(vk::CommandBufferUsageFlags::default()); //.inheritance_info(..); let begin_result = unsafe { self.device .vk_device() .begin_command_buffer(self.vk_command_buffer, &command_buffer_begin_info) }; // If command buffer creation failed, destroy created command buffer and return error. if let Err(e) = begin_result { self.destroy_command_buffer(); return Err(GpuError::from(e)); } Ok(()) } fn destroy_command_buffer(&mut self) { if self.vk_command_buffer != vk::CommandBuffer::null() { unsafe { self.device .vk_device() .free_command_buffers(self.vk_command_pool, &[self.vk_command_buffer]); } self.vk_command_buffer = vk::CommandBuffer::null(); } self.resources.clear(); } } impl Drop for Context { fn drop(&mut self) { let wait_result = self.wait_finish(DROP_TIMEOUT); match wait_result { Err(GpuError::Timeout) => { // Timeout reached, resources are still in use. // Vulkan API cannot stop GPU execution. // This sutiation may appear if shader has infinite loop, etc. // There is no good way to handle this error. // So just log it and ignore resources deallocation. // This approach may cause memory leaks and used gpu kernels, // but it's better than potential segfault. log::error!("Failed to wait for GPU context to finish"); // Error was logged, do memory leak to keep the gpu running. let resources = self.resources.clone(); self.resources.clear(); for resource in resources.into_iter() { // !!!!!!!!! std::mem::forget(resource); } } // If there is no timeout, we can safely deallocate resources. wait_result => { wait_result.unwrap_or_else(|e| // Cannot return error from Drop trait. // Log it instead. log::error!("Error while clear GPU context: {:?}", e)); // If command buffer was not destroyed, destroy it. // This situation may appear if `wait_finish` is an error. self.destroy_command_buffer(); // Destroy fence. if self.vk_fence != vk::Fence::null() { unsafe { self.device .vk_device() .destroy_fence(self.vk_fence, self.device.cpu_allocation_callbacks()); } self.vk_fence = vk::Fence::null(); } // Destroy command pool. if self.vk_command_pool != vk::CommandPool::null() { unsafe { self.device.vk_device().destroy_command_pool( self.vk_command_pool, self.device.cpu_allocation_callbacks(), ); } self.vk_command_pool = vk::CommandPool::null(); } } } } }