Spaces:

reztilop
/

colibri.qdrant

Build error

colibri.qdrant / lib /gpu /src /context.rs

Gouzi Mohaled

Ajout du dossier lib

84d2a97 7 months ago

15.1 kB

	use std::sync::Arc;

	use ash::vk;

	use crate::*;

	/// Timeout to wait for GPU execution in drop function.
	static DROP_TIMEOUT: std::time::Duration = std::time::Duration::from_secs(30 * 60);

	/// GPU execution context.
	/// It records commands and run them on GPU.
	/// It keeps track of resources used in the commands.
	/// Warnings!
	/// Context is not thread safe.
	/// Execution order is not guaranteed. Don't rely on it.
	/// If you need to run commands in specific order, use `wait_finish` method.
	/// And start next command after previous one is finished.
	pub struct Context {
	// Which device to execute on.
	device: Arc<Device>,

	// GPU execution handler.
	vk_queue: vk::Queue,

	// Command buffer is created using command pool.
	vk_command_pool: vk::CommandPool,

	// Command buffer is used to record commands to execute.
	vk_command_buffer: vk::CommandBuffer,

	// Synchronization fence to wait for GPU execution.
	vk_fence: vk::Fence,

	// Resources used in the context.
	resources: Vec<Arc<dyn Resource>>,
	}

	impl Context {
	pub fn new(device: Arc<Device>) -> GpuResult<Self> {
	// Get GPU execution queue from device.
	let queue = device.compute_queue();

	// Create command pool.
	let command_pool_create_info = vk::CommandPoolCreateInfo::default()
	.queue_family_index(queue.vk_queue_family_index as u32)
	.flags(vk::CommandPoolCreateFlags::default());
	let vk_command_pool = unsafe {
	device
	.vk_device()
	.create_command_pool(&command_pool_create_info, device.cpu_allocation_callbacks())?
	};

	// Create fence to wait for GPU execution.
	// We create fence as signaled because we reset fence before start.
	let fence_create_info =
	vk::FenceCreateInfo::default().flags(vk::FenceCreateFlags::SIGNALED);
	let vk_fence = unsafe {
	device
	.vk_device()
	.create_fence(&fence_create_info, device.cpu_allocation_callbacks())
	};
	let vk_fence = match vk_fence {
	Ok(fence) => fence,
	Err(e) => {
	// If fence creation failed, destroy created command pool and return error.
	unsafe {
	device
	.vk_device()
	.destroy_command_pool(vk_command_pool, device.cpu_allocation_callbacks());
	}
	return Err(GpuError::from(e));
	}
	};

	let mut context = Self {
	vk_queue: queue.vk_queue,
	device,
	vk_command_pool,
	vk_command_buffer: vk::CommandBuffer::null(),
	vk_fence,
	resources: Vec::new(),
	};

	context.init_command_buffer()?;

	Ok(context)
	}

	pub fn dispatch(&mut self, x: usize, y: usize, z: usize) -> GpuResult<()> {
	if self.vk_command_buffer == vk::CommandBuffer::null() {
	self.init_command_buffer()?;
	}

	let max_compute_work_group_count = self.device.max_compute_work_group_count();
	if x > max_compute_work_group_count[0]
	\|\| y > max_compute_work_group_count[1]
	\|\| z > max_compute_work_group_count[2]
	{
	return Err(GpuError::OutOfBounds(
	"Dispatch work group size is out of bounds".to_string(),
	));
	}

	unsafe {
	self.device.vk_device().cmd_dispatch(
	self.vk_command_buffer,
	x as u32,
	y as u32,
	z as u32,
	);
	}
	Ok(())
	}

	/// Bind pipeline to the context.
	/// It means which shader and binded resources to shader will be used.
	/// It records command to run it on GPU after `run` call.
	pub fn bind_pipeline(
	&mut self,
	pipeline: Arc<Pipeline>,
	descriptor_sets: &[Arc<DescriptorSet>],
	) -> GpuResult<()> {
	if self.vk_command_buffer == vk::CommandBuffer::null() {
	self.init_command_buffer()?;
	}

	unsafe {
	self.device.vk_device().cmd_bind_pipeline(
	self.vk_command_buffer,
	vk::PipelineBindPoint::COMPUTE,
	pipeline.vk_pipeline(),
	);
	}

	unsafe {
	if !descriptor_sets.is_empty() {
	let vk_descriptor_sets: Vec<_> = descriptor_sets
	.iter()
	.map(\|set\| set.as_ref().vk_descriptor_set())
	.collect();
	self.device.vk_device().cmd_bind_descriptor_sets(
	self.vk_command_buffer,
	vk::PipelineBindPoint::COMPUTE,
	pipeline.vk_pipeline_layout(),
	0,
	&vk_descriptor_sets,
	&[],
	);
	}
	}

	// Add resources to the list to keep them alive.
	self.resources.extend(
	descriptor_sets
	.iter()
	.map(\|r\| r.clone() as Arc<dyn Resource>),
	);
	self.resources.push(pipeline);

	Ok(())
	}

	/// Copy data from one buffer to another. It records command to run it on GPU after `run` call.
	pub fn copy_gpu_buffer(
	&mut self,
	src: Arc<Buffer>,
	dst: Arc<Buffer>,
	src_offset: usize,
	dst_offset: usize,
	size: usize,
	) -> GpuResult<()> {
	if self.vk_command_buffer == vk::CommandBuffer::null() {
	self.init_command_buffer()?;
	}

	if src.size() < src_offset + size \|\| dst.size() < dst_offset + size {
	return Err(GpuError::OutOfBounds(
	"Buffer copy out of bounds".to_string(),
	));
	}

	let buffer_copy = vk::BufferCopy::default()
	.src_offset(src_offset as vk::DeviceSize)
	.dst_offset(dst_offset as vk::DeviceSize)
	.size(size as vk::DeviceSize);
	unsafe {
	self.device.vk_device().cmd_copy_buffer(
	self.vk_command_buffer,
	src.vk_buffer(),
	dst.vk_buffer(),
	&[buffer_copy],
	);
	}

	// Add resources to the list to keep them alive.
	self.resources.push(src);
	self.resources.push(dst);

	Ok(())
	}

	/// Clear buffer with zeros command. It records command to run it on GPU after `run` call.
	pub fn clear_buffer(&mut self, buffer: Arc<Buffer>) -> GpuResult<()> {
	if buffer.size() % std::mem::size_of::<u32>() != 0 {
	return Err(GpuError::OutOfBounds(
	"Buffer size must be a multiple of `uint32` size to clear it".to_string(),
	));
	}

	if self.vk_command_buffer == vk::CommandBuffer::null() {
	self.init_command_buffer()?;
	}

	unsafe {
	self.device.vk_device().cmd_fill_buffer(
	self.vk_command_buffer,
	buffer.vk_buffer(),
	0,
	buffer.size() as vk::DeviceSize,
	0,
	);
	}

	// Add resources to the list to keep them alive.
	self.resources.push(buffer);

	Ok(())
	}

	/// Run the recorded commands on GPU.
	/// Warning: order of recorded commands is not guaranteed. Don't rely on it.
	pub fn run(&mut self) -> GpuResult<()> {
	if self.vk_command_buffer == vk::CommandBuffer::null() {
	// Nothing to run.
	return Ok(());
	}

	// Finish recording of command buffer.
	let end_record_result = unsafe {
	self.device
	.vk_device()
	.end_command_buffer(self.vk_command_buffer)
	};

	// If command buffer recording failed, destroy created command buffer and return error.
	if let Err(e) = end_record_result {
	self.destroy_command_buffer();
	return Err(GpuError::from(e));
	}

	// Reset fence to unsignaled state.
	let fence_reset_result = unsafe { self.device.vk_device().reset_fences(&[self.vk_fence]) };
	if let Err(e) = fence_reset_result {
	self.destroy_command_buffer();
	return Err(GpuError::from(e));
	}

	// Start execution of recorded commands.
	let submit_buffers = [self.vk_command_buffer];
	let submit_info = vec![vk::SubmitInfo::default().command_buffers(&submit_buffers)];
	let submit_result = unsafe {
	self.device
	.vk_device()
	.queue_submit(self.vk_queue, &submit_info, self.vk_fence)
	};

	if let Err(e) = submit_result {
	// If submit failed, destroy created command buffer and return error.
	// It's important here to avoid fence waiting of non-started command buffer.
	self.destroy_command_buffer();
	return Err(GpuError::from(e));
	}

	Ok(())
	}

	/// Wait for GPU execution to finish.
	pub fn wait_finish(&mut self, timeout: std::time::Duration) -> GpuResult<()> {
	if self.vk_command_buffer == vk::CommandBuffer::null() {
	// Nothing to wait for.
	return Ok(());
	}

	// Get the current status of fence.
	let fence_status = unsafe {
	self.device
	.vk_device()
	.get_fence_status(self.vk_fence)
	.map_err(GpuError::from)
	};

	match fence_status {
	Ok(true) => {
	// GPU execution finished already, clear command buffer and return.
	self.destroy_command_buffer();
	Ok(())
	}
	Ok(false) => {
	// GPU is processing. Wait for signal with timeout.
	let wait_result = unsafe {
	self.device
	.vk_device()
	.wait_for_fences(&[self.vk_fence], true, timeout.as_nanos() as u64)
	.map_err(GpuError::from)
	};

	if matches!(wait_result, Err(GpuError::Timeout)) {
	// If we detect timeout, don't clear command buffer, just return a timeout error.
	Err(GpuError::Timeout)
	} else {
	// If the error is not a timeout, clear command buffer.
	self.destroy_command_buffer();
	wait_result
	}
	}
	Err(e) => {
	// By Vulkan specification, error while getting fence status
	// may happen is special cases like hardware device lost.
	// In this cases we don't care about status of gpu execution and just clear resources.
	self.destroy_command_buffer();
	Err(e)
	}
	}
	}

	fn init_command_buffer(&mut self) -> GpuResult<()> {
	if self.vk_command_buffer != vk::CommandBuffer::null() {
	return Err(GpuError::Other(
	"Vulkan command buffer was already created".to_string(),
	));
	}

	// Create new command buffer from pool.
	let command_buffer_allocate_info = vk::CommandBufferAllocateInfo::default()
	.command_pool(self.vk_command_pool)
	.level(vk::CommandBufferLevel::PRIMARY)
	.command_buffer_count(1);
	self.vk_command_buffer = unsafe {
	self.device
	.vk_device()
	.allocate_command_buffers(&command_buffer_allocate_info)?[0]
	};

	let command_buffer_begin_info =
	vk::CommandBufferBeginInfo::default().flags(vk::CommandBufferUsageFlags::default());
	//.inheritance_info(..);

	let begin_result = unsafe {
	self.device
	.vk_device()
	.begin_command_buffer(self.vk_command_buffer, &command_buffer_begin_info)
	};

	// If command buffer creation failed, destroy created command buffer and return error.
	if let Err(e) = begin_result {
	self.destroy_command_buffer();
	return Err(GpuError::from(e));
	}

	Ok(())
	}

	fn destroy_command_buffer(&mut self) {
	if self.vk_command_buffer != vk::CommandBuffer::null() {
	unsafe {
	self.device
	.vk_device()
	.free_command_buffers(self.vk_command_pool, &[self.vk_command_buffer]);
	}
	self.vk_command_buffer = vk::CommandBuffer::null();
	}
	self.resources.clear();
	}
	}

	impl Drop for Context {
	fn drop(&mut self) {
	let wait_result = self.wait_finish(DROP_TIMEOUT);
	match wait_result {
	Err(GpuError::Timeout) => {
	// Timeout reached, resources are still in use.
	// Vulkan API cannot stop GPU execution.
	// This sutiation may appear if shader has infinite loop, etc.
	// There is no good way to handle this error.
	// So just log it and ignore resources deallocation.
	// This approach may cause memory leaks and used gpu kernels,
	// but it's better than potential segfault.
	log::error!("Failed to wait for GPU context to finish");

	// Error was logged, do memory leak to keep the gpu running.
	let resources = self.resources.clone();
	self.resources.clear();
	for resource in resources.into_iter() {
	// !!!!!!!!!
	std::mem::forget(resource);
	}
	}
	// If there is no timeout, we can safely deallocate resources.
	wait_result => {
	wait_result.unwrap_or_else(\|e\|
	// Cannot return error from Drop trait.
	// Log it instead.
	log::error!("Error while clear GPU context: {:?}", e));

	// If command buffer was not destroyed, destroy it.
	// This situation may appear if `wait_finish` is an error.
	self.destroy_command_buffer();

	// Destroy fence.
	if self.vk_fence != vk::Fence::null() {
	unsafe {
	self.device
	.vk_device()
	.destroy_fence(self.vk_fence, self.device.cpu_allocation_callbacks());
	}
	self.vk_fence = vk::Fence::null();
	}

	// Destroy command pool.
	if self.vk_command_pool != vk::CommandPool::null() {
	unsafe {
	self.device.vk_device().destroy_command_pool(
	self.vk_command_pool,
	self.device.cpu_allocation_callbacks(),
	);
	}
	self.vk_command_pool = vk::CommandPool::null();
	}
	}
	}
	}
	}