35#ifndef AcceleratorBuffer_h
36#define AcceleratorBuffer_h
101 using value_type = T;
102 using iterator_category = std::forward_iterator_tag;
103 using difference_type = std::ptrdiff_t;
105 using reference = T&;
113 {
return m_buf[m_index]; }
115 size_t GetIndex()
const
122 return (m_index != it.m_index);
138{
return a.GetIndex() - b.GetIndex(); }
177 MEM_ATTRIB_CPU_SIDE = 0x1,
178 MEM_ATTRIB_GPU_SIDE = 0x2,
181 MEM_ATTRIB_CPU_REACHABLE = 0x4,
182 MEM_ATTRIB_GPU_REACHABLE = 0x8,
185 MEM_ATTRIB_CPU_FAST = 0x10,
186 MEM_ATTRIB_GPU_FAST = 0x20
199 MEM_ATTRIB_CPU_SIDE | MEM_ATTRIB_CPU_REACHABLE,
203 MEM_ATTRIB_CPU_SIDE | MEM_ATTRIB_CPU_REACHABLE | MEM_ATTRIB_CPU_FAST,
208 MEM_TYPE_CPU_DMA_CAPABLE =
209 MEM_ATTRIB_CPU_SIDE | MEM_ATTRIB_CPU_REACHABLE | MEM_ATTRIB_CPU_FAST | MEM_ATTRIB_GPU_REACHABLE,
213 MEM_ATTRIB_GPU_SIDE | MEM_ATTRIB_GPU_REACHABLE | MEM_ATTRIB_GPU_FAST,
218 MEM_TYPE_GPU_DMA_CAPABLE =
219 MEM_ATTRIB_GPU_SIDE | MEM_ATTRIB_GPU_REACHABLE | MEM_ATTRIB_GPU_FAST | MEM_ATTRIB_CPU_REACHABLE
228 {
return (mt & MEM_ATTRIB_CPU_REACHABLE) != 0; }
234 {
return (mt & MEM_ATTRIB_GPU_REACHABLE) != 0; }
240 {
return (mt & MEM_ATTRIB_CPU_FAST) != 0; }
246 {
return (mt & MEM_ATTRIB_GPU_FAST) != 0; }
448 return vk::DescriptorBufferInfo(
514 if(!std::is_trivially_copyable<T>::value)
516 for(
size_t i=0; i<
m_size; i++)
533 vk::BufferCopy region(0, 0,
m_size *
sizeof(T));
549 void Reallocate(
size_t size)
565 size = (size | 3) + 1;
590 AllocateCpuBuffer(size);
597 if(!std::is_trivially_copyable<T>::value)
599 for(
size_t i=0; i<
m_size; i++)
608 #pragma GCC diagnostic push
609 #pragma GCC diagnostic ignored "-Wclass-memaccess"
613 #pragma GCC diagnostic pop
621 FreeCpuPointer(pOld, pOldPin, type,
m_capacity);
627 AllocateCpuBuffer(size);
656 if(AllocateGpuBuffer(size))
662 vk::BufferCopy region(0, 0,
m_size *
sizeof(T));
685 AllocateCpuBuffer(size);
697 if(AllocateGpuBuffer(size))
712 AllocateCpuBuffer(size);
743 const T& operator[](
size_t i)
const
746 T& operator[](
size_t i)
793 if(!std::is_trivially_copyable<T>::value)
795 for(
size_t i=0; i<cursize; i++)
828 if(!std::is_trivially_copyable<T>::value)
830 for(
size_t i=0; i<
m_size-1; i++)
864 if(reallocateImmediately && (
m_size != 0))
877 if(!std::is_trivially_copyable<T>::value)
882 if(reallocateImmediately && (
m_size != 0))
1003 assert(std::is_trivially_copyable<T>::value);
1009 vk::BufferCopy region(0, 0,
m_size *
sizeof(T));
1024 assert(std::is_trivially_copyable<T>::value);
1030 vk::BufferCopy region(0, 0,
m_size *
sizeof(T));
1048 assert(std::is_trivially_copyable<T>::value);
1051 vk::BufferCopy region(0, 0,
m_size *
sizeof(T));
1055 cmdBuf.pipelineBarrier(
1056 vk::PipelineStageFlagBits::eTransfer,
1057 vk::PipelineStageFlagBits::eComputeShader,
1060 vk::AccessFlagBits::eTransferWrite,
1061 vk::AccessFlagBits::eShaderRead | vk::AccessFlagBits::eShaderWrite),
1073 cmdBuf.pipelineBarrier(
1074 vk::PipelineStageFlagBits::eTransfer,
1075 vk::PipelineStageFlagBits::eComputeShader,
1078 vk::AccessFlagBits::eTransferWrite,
1079 vk::AccessFlagBits::eShaderRead | vk::AccessFlagBits::eShaderWrite),
1140 LogWarning(
"Freeing a GPU buffer without any CPU backing, may cause data loss\n");
1161 void AllocateCpuBuffer(
size_t size)
1164 LogFatal(
"AllocateCpuBuffer with size zero (invalid)\n");
1170 vk::BufferCreateInfo bufinfo(
1173 vk::BufferUsageFlagBits::eTransferSrc |
1174 vk::BufferUsageFlagBits::eTransferDst |
1175 vk::BufferUsageFlagBits::eStorageBuffer);
1222 char fname[] =
"/tmp/glscopeclient-tmpXXXXXX";
1226 LogError(
"Failed to create temporary file %s\n", fname);
1231 size_t bytesize = size *
sizeof(T);
1234 LogError(
"Failed to resize temporary file %s\n", fname);
1239 m_cpuPtr =
reinterpret_cast<T*
>(mmap(
1242 PROT_READ | PROT_WRITE,
1248 LogError(
"Failed to map temporary file %s\n", fname);
1249 perror(
"mmap failed: ");
1255 if(0 != unlink(fname))
1256 LogWarning(
"Failed to unlink temporary file %s, file will remain after application terminates\n", fname);
1263 if(!std::is_trivially_copyable<T>::value)
1265 for(
size_t i=0; i<size; i++)
1278 void FreeCpuPointer(T* ptr,
MemoryType type,
size_t size)
1281 if(!std::is_trivially_copyable<T>::value)
1283 for(
size_t i=0; i<size; i++)
1293 case MEM_TYPE_CPU_DMA_CAPABLE:
1294 LogFatal(
"FreeCpuPointer for MEM_TYPE_CPU_DMA_CAPABLE requires the vk::raii::DeviceMemory\n");
1297 case MEM_TYPE_CPU_PAGED:
1299 munmap(ptr, size *
sizeof(T));
1305 case MEM_TYPE_CPU_ONLY:
1310 LogFatal(
"FreeCpuPointer: invalid type %x\n", type);
1322 void FreeCpuPointer(T* ptr, std::unique_ptr<vk::raii::DeviceMemory>& buf,
MemoryType type,
size_t size)
1326 case MEM_TYPE_CPU_DMA_CAPABLE:
1331 FreeCpuPointer(ptr, type, size);
1341 bool AllocateGpuBuffer(
size_t size)
1343 assert(std::is_trivially_copyable<T>::value);
1346 vk::BufferCreateInfo bufinfo(
1349 vk::BufferUsageFlagBits::eTransferSrc |
1350 vk::BufferUsageFlagBits::eTransferDst |
1351 vk::BufferUsageFlagBits::eStorageBuffer);
1367 catch(vk::OutOfDeviceMemoryError& ex)
1382 catch(vk::OutOfDeviceMemoryError& ex2)
1384 LogDebug(
"Allocation failed again\n");
1393 LogDebug(
"Final retry\n");
1399 catch(vk::OutOfDeviceMemoryError& ex2)
1401 LogDebug(
"Allocation failed again\n");
1410 "Failed to allocate %s of GPU memory despite our best efforts to reclaim space, falling back to CPU-side pinned allocation\n",
1411 Unit(Unit::UNIT_BYTES).PrettyPrint(req.size, 4).c_str());
1437 void UpdateGpuNames()
1439 std::string sname = m_name;
1442 std::string prefix = std::string(
"AcceleratorBuffer.") + sname +
".";
1444 std::string gpuBufName = prefix +
"m_gpuBuffer";
1445 std::string gpuPhysName = prefix +
"m_gpuPhysMem";
1448 vk::DebugUtilsObjectNameInfoEXT(
1449 vk::ObjectType::eBuffer,
1450 reinterpret_cast<uint64_t
>(
static_cast<VkBuffer
>(**
m_gpuBuffer)),
1451 gpuBufName.c_str()));
1454 vk::DebugUtilsObjectNameInfoEXT(
1455 vk::ObjectType::eDeviceMemory,
1456 reinterpret_cast<uint64_t
>(
static_cast<VkDeviceMemory
>(**
m_gpuPhysMem)),
1457 gpuPhysName.c_str()));
1464 void UpdateCpuNames()
1466 std::string sname = m_name;
1469 std::string prefix = std::string(
"AcceleratorBuffer.") + sname +
".";
1471 std::string cpuBufName = prefix +
"m_cpuBuffer";
1472 std::string cpuPhysName = prefix +
"m_cpuPhysMem";
1475 vk::DebugUtilsObjectNameInfoEXT(
1476 vk::ObjectType::eBuffer,
1477 reinterpret_cast<uint64_t
>(
static_cast<VkBuffer
>(**
m_cpuBuffer)),
1478 cpuBufName.c_str()));
1481 vk::DebugUtilsObjectNameInfoEXT(
1482 vk::ObjectType::eDeviceMemory,
1483 reinterpret_cast<uint64_t
>(
static_cast<VkDeviceMemory
>(**
m_cpuPhysMem)),
1484 cpuPhysName.c_str()));
bool(* MemoryPressureHandler)(MemoryPressureLevel level, MemoryPressureType type, size_t requestedSize)
Memory pressure handler type, called when free memory reaches a warning level or a Vulkan allocation ...
Definition: AcceleratorBuffer.h:93
MemoryPressureLevel
Levels of memory pressure.
Definition: AcceleratorBuffer.h:67
@ Hard
A memory allocation has failed and we need to free memory immediately to continue execution.
@ Soft
Free memory has reached a warning threshold.
bool OnMemoryPressure(MemoryPressureLevel level, MemoryPressureType type, size_t requestedSize)
Called when we run low on memory.
Definition: scopehal.cpp:1038
MemoryPressureType
Types of memory pressure.
Definition: AcceleratorBuffer.h:84
@ Host
Pinned CPU-side memory.
std::set< MemoryPressureHandler > g_memoryPressureHandlers
List of handlers for low memory registered by various subsystems.
Definition: scopehal.cpp:147
Declaration of AlignedAllocator.
Declaration of QueueManager and QueueHandle.
Definition: AcceleratorBuffer.h:99
A buffer of memory which may be used by GPU acceleration.
Definition: AcceleratorBuffer.h:158
void CopyToGpu()
Copy the buffer contents from CPU to GPU and blocks until the transfer completes.
Definition: AcceleratorBuffer.h:1022
std::unique_ptr< vk::raii::Buffer > m_gpuBuffer
Buffer object for GPU-side memory.
Definition: AcceleratorBuffer.h:270
bool empty() const
Returns true if the container is empty.
Definition: AcceleratorBuffer.h:391
T * m_cpuPtr
CPU-side mapped pointer.
Definition: AcceleratorBuffer.h:258
void pop_front()
Removes the first item in the container.
Definition: AcceleratorBuffer.h:814
__attribute__((noinline)) void CopyFrom(const AcceleratorBuffer< T > &rhs)
Copies our content from another AcceleratorBuffer.
Definition: AcceleratorBuffer.h:502
void push_back(const T &value)
Adds a new element to the end of the container, allocating space if needed.
Definition: AcceleratorBuffer.h:752
bool IsCpuBufferStale() const
Returns true if the CPU-side buffer is stale.
Definition: AcceleratorBuffer.h:397
void push_back_nomarkmod(const T &value)
Adds a new element to the end of the container, allocating space if needed but without calling MarkMo...
Definition: AcceleratorBuffer.h:764
vk::DescriptorBufferInfo GetBufferInfo()
Returns a vk::DescriptorBufferInfo suitable for binding this object to.
Definition: AcceleratorBuffer.h:446
std::unique_ptr< vk::raii::DeviceMemory > m_cpuPhysMem
CPU-side physical memory.
Definition: AcceleratorBuffer.h:261
void MarkModifiedFromGpu()
Marks the GPU-side copy of the buffer as modified.
Definition: AcceleratorBuffer.h:905
size_t m_size
Size of the memory actually being used.
Definition: AcceleratorBuffer.h:296
bool IsReachableFromCpu(MemoryType mt)
Returns true if the given buffer type can be reached from the CPU.
Definition: AcceleratorBuffer.h:227
bool IsSingleSharedBuffer() const
Returns true if the object contains only a single buffer.
Definition: AcceleratorBuffer.h:421
bool HasCpuBuffer() const
Returns true if there is currently a CPU-side buffer.
Definition: AcceleratorBuffer.h:409
void MarkModifiedFromCpu()
Marks the CPU-side copy of the buffer as modified.
Definition: AcceleratorBuffer.h:894
std::unique_ptr< vk::raii::DeviceMemory > m_gpuPhysMem
GPU-side physical memory.
Definition: AcceleratorBuffer.h:264
void PrepareForCpuAccess()
Prepares the buffer to be accessed from the CPU.
Definition: AcceleratorBuffer.h:919
size_t size() const
Returns the actual size of the container (may be smaller than what was allocated)
Definition: AcceleratorBuffer.h:357
void SetGpuAccessHint(UsageHint hint, bool reallocateImmediately=false)
Sets a hint to the buffer on how often we expect to use it on the GPU in the future.
Definition: AcceleratorBuffer.h:874
static void HostToDeviceTransferMemoryBarrier(vk::raii::CommandBuffer &cmdBuf)
Adds a memory barrier for transferring data from host to device.
Definition: AcceleratorBuffer.h:1071
bool m_gpuPhysMemIsStale
True if m_gpuPhysMem contains stale data (m_cpuPtr has been modified and they point to different memo...
Definition: AcceleratorBuffer.h:279
bool IsReachableFromGpu(MemoryType mt)
Returns true if the given buffer type can be reached from the GPU.
Definition: AcceleratorBuffer.h:233
vk::Buffer GetBuffer()
Returns the preferred buffer for GPU-side access.
Definition: AcceleratorBuffer.h:429
void pop_back()
Removes the last item in the container.
Definition: AcceleratorBuffer.h:774
size_t GetCpuMemoryBytes() const
Returns the total reserved CPU memory, in bytes.
Definition: AcceleratorBuffer.h:369
void resize(size_t size)
Change the usable size of the container.
Definition: AcceleratorBuffer.h:457
size_t capacity() const
Returns the allocated size of the container.
Definition: AcceleratorBuffer.h:363
bool m_buffersAreSame
True if we have only one piece of physical memory accessible from both sides.
Definition: AcceleratorBuffer.h:273
void reserve(size_t size)
Reallocates buffers so that at least size elements of storage are available.
Definition: AcceleratorBuffer.h:484
UsageHint m_gpuAccessHint
Hint about how likely future GPU access is.
Definition: AcceleratorBuffer.h:313
MemoryType m_cpuMemoryType
Type of the CPU-side buffer.
Definition: AcceleratorBuffer.h:249
bool IsFastFromGpu(MemoryType mt)
Returns true if the given buffer type is fast to access from the GPU.
Definition: AcceleratorBuffer.h:245
void PrepareForGpuAccess(bool outputOnly=false)
Prepares the buffer to be accessed from the GPU.
Definition: AcceleratorBuffer.h:941
std::unique_ptr< vk::raii::Buffer > m_cpuBuffer
Buffer object for CPU-side memory.
Definition: AcceleratorBuffer.h:267
MemoryType m_gpuMemoryType
Type of the GPU-side buffer.
Definition: AcceleratorBuffer.h:252
__attribute__((noinline)) void UpdateGpuNames()
Pushes our friendly name to the underlying Vulkan objects.
Definition: AcceleratorBuffer.h:1436
void SetCpuAccessHint(UsageHint hint, bool reallocateImmediately=false)
Sets a hint to the buffer on how often we expect to use it on the CPU in the future.
Definition: AcceleratorBuffer.h:860
void FreeCpuBuffer()
Free the CPU-side buffer and underlying physical memory.
Definition: AcceleratorBuffer.h:1093
void PrepareForGpuAccessNonblocking(bool outputOnly, vk::raii::CommandBuffer &cmdBuf)
Prepares the buffer to be accessed from the GPU.
Definition: AcceleratorBuffer.h:971
__attribute__((noinline)) void UpdateCpuNames()
Pushes our friendly name to the underlying Vulkan objects.
Definition: AcceleratorBuffer.h:1463
bool HasGpuBuffer() const
Returns true if there is currently a GPU-side buffer.
Definition: AcceleratorBuffer.h:415
void push_front(const T &value)
Inserts a new item at the beginning of the container. This is inefficient due to copying.
Definition: AcceleratorBuffer.h:785
void CopyToGpuNonblocking(vk::raii::CommandBuffer &cmdBuf)
Copy the buffer contents from CPU to GPU without blocking on the CPU.
Definition: AcceleratorBuffer.h:1046
size_t GetGpuMemoryBytes() const
Returns the total reserved GPU memory, in bytes.
Definition: AcceleratorBuffer.h:380
int m_tempFileHandle
File handle used for MEM_TYPE_CPU_PAGED.
Definition: AcceleratorBuffer.h:283
void shrink_to_fit()
Frees unused memory so that m_size == m_capacity.
Definition: AcceleratorBuffer.h:493
size_t m_capacity
Size of the allocated memory space (may be larger than m_size)
Definition: AcceleratorBuffer.h:293
void FreeGpuBuffer(bool dataLossOK=false)
Free the GPU-side buffer and underlying physical memory.
Definition: AcceleratorBuffer.h:1131
bool IsFastFromCpu(MemoryType mt)
Returns true if the given buffer type is fast to access from the CPU.
Definition: AcceleratorBuffer.h:239
void CopyToCpu()
Copy the buffer contents from GPU to CPU and blocks until the transfer completes.
Definition: AcceleratorBuffer.h:1001
__attribute__((noinline)) void FreeCpuPointer(T *ptr
Frees a CPU-side buffer.
AcceleratorBuffer(const std::string &name="")
Creates a new AcceleratorBuffer with no content.
Definition: AcceleratorBuffer.h:322
MemoryAttributes
Attributes that a memory buffer can have.
Definition: AcceleratorBuffer.h:175
bool m_cpuPhysMemIsStale
True if m_cpuPtr contains stale data (m_gpuPhysMem has been modified and they point to different memo...
Definition: AcceleratorBuffer.h:276
__attribute__((noinline)) void AllocateCpuBuffer(size_t size)
Allocates a buffer for CPU access.
Definition: AcceleratorBuffer.h:1160
__attribute__((noinline)) void Reallocate(size_t size)
Reallocates the buffer so that it contains exactly size elements.
Definition: AcceleratorBuffer.h:548
UsageHint m_cpuAccessHint
Hint about how likely future CPU access is.
Definition: AcceleratorBuffer.h:310
MemoryType
Types of memory buffer.
Definition: AcceleratorBuffer.h:193
T * GetCpuPointer()
Gets a pointer to the CPU-side buffer.
Definition: AcceleratorBuffer.h:440
void SetName(std::string name)
Sets the debug name for this buffer.
Definition: AcceleratorBuffer.h:1497
void clear()
Resize the container to be empty (but don't free memory)
Definition: AcceleratorBuffer.h:478
bool IsGpuBufferStale() const
Returns true if the GPU-side buffer is stale.
Definition: AcceleratorBuffer.h:403
void deallocate(T *const p, const size_t unused) const
Free a block of memory.
Definition: AlignedAllocator.h:194
T * allocate(size_t n) const
Allocate a block of memory.
Definition: AlignedAllocator.h:159
A unit of measurement, plus conversion to pretty-printed output.
Definition: Unit.h:57
uint32_t g_vkPinnedMemoryType
Vulkan memory type for CPU-based memory that is also GPU-readable.
Definition: VulkanInit.cpp:118
std::mutex g_vkTransferMutex
Mutex for interlocking access to g_vkTransferCommandBuffer and g_vkTransferCommandPool.
Definition: VulkanInit.cpp:112
bool g_hasDebugUtils
Indicates whether the VK_EXT_debug_utils extension is available.
Definition: VulkanInit.cpp:195
std::shared_ptr< vk::raii::Device > g_vkComputeDevice
The Vulkan device selected for compute operations (may or may not be same device as rendering)
Definition: VulkanInit.cpp:71
std::unique_ptr< vk::raii::CommandBuffer > g_vkTransferCommandBuffer
Command buffer for AcceleratorBuffer transfers.
Definition: VulkanInit.cpp:89
std::shared_ptr< QueueHandle > g_vkTransferQueue
Queue for AcceleratorBuffer transfers.
Definition: VulkanInit.cpp:98
uint32_t g_vkLocalMemoryType
Vulkan memory type for GPU-based memory (generally not CPU-readable, except on unified memory systems...
Definition: VulkanInit.cpp:124
bool g_vulkanDeviceHasUnifiedMemory
Indicates whether the Vulkan device is unified memory.
Definition: VulkanInit.cpp:220