35#ifndef AcceleratorBuffer_h
36#define AcceleratorBuffer_h
101 using value_type = T;
102 using iterator_category = std::forward_iterator_tag;
103 using difference_type = std::ptrdiff_t;
105 using reference = T&;
113 {
return m_buf[m_index]; }
115 size_t GetIndex()
const
122 return (m_index != it.m_index);
138{
return a.GetIndex() - b.GetIndex(); }
177 MEM_ATTRIB_CPU_SIDE = 0x1,
178 MEM_ATTRIB_GPU_SIDE = 0x2,
181 MEM_ATTRIB_CPU_REACHABLE = 0x4,
182 MEM_ATTRIB_GPU_REACHABLE = 0x8,
185 MEM_ATTRIB_CPU_FAST = 0x10,
186 MEM_ATTRIB_GPU_FAST = 0x20
199 MEM_ATTRIB_CPU_SIDE | MEM_ATTRIB_CPU_REACHABLE,
203 MEM_ATTRIB_CPU_SIDE | MEM_ATTRIB_CPU_REACHABLE | MEM_ATTRIB_CPU_FAST,
208 MEM_TYPE_CPU_DMA_CAPABLE =
209 MEM_ATTRIB_CPU_SIDE | MEM_ATTRIB_CPU_REACHABLE | MEM_ATTRIB_CPU_FAST | MEM_ATTRIB_GPU_REACHABLE,
213 MEM_ATTRIB_GPU_SIDE | MEM_ATTRIB_GPU_REACHABLE | MEM_ATTRIB_GPU_FAST,
218 MEM_TYPE_GPU_DMA_CAPABLE =
219 MEM_ATTRIB_GPU_SIDE | MEM_ATTRIB_GPU_REACHABLE | MEM_ATTRIB_GPU_FAST | MEM_ATTRIB_CPU_REACHABLE
228 {
return (mt & MEM_ATTRIB_CPU_REACHABLE) != 0; }
234 {
return (mt & MEM_ATTRIB_GPU_REACHABLE) != 0; }
240 {
return (mt & MEM_ATTRIB_CPU_FAST) != 0; }
246 {
return (mt & MEM_ATTRIB_GPU_FAST) != 0; }
448 return vk::DescriptorBufferInfo(
514 if(!std::is_trivially_copyable<T>::value)
516 for(
size_t i=0; i<
m_size; i++)
533 vk::BufferCopy region(0, 0,
m_size *
sizeof(T));
549 void Reallocate(
size_t size)
565 size = (size | 3) + 1;
590 AllocateCpuBuffer(size);
597 if(!std::is_trivially_copyable<T>::value)
599 for(
size_t i=0; i<
m_size; i++)
608 #pragma GCC diagnostic push
609 #pragma GCC diagnostic ignored "-Wclass-memaccess"
613 #pragma GCC diagnostic pop
621 FreeCpuPointer(pOld, pOldPin, type,
m_capacity);
627 AllocateCpuBuffer(size);
655 AllocateGpuBuffer(size);
661 vk::BufferCopy region(0, 0,
m_size *
sizeof(T));
675 AllocateGpuBuffer(size);
711 const T& operator[](
size_t i)
const
714 T& operator[](
size_t i)
751 if(!std::is_trivially_copyable<T>::value)
753 for(
size_t i=0; i<cursize; i++)
786 if(!std::is_trivially_copyable<T>::value)
788 for(
size_t i=0; i<
m_size-1; i++)
822 if(reallocateImmediately && (
m_size != 0))
835 if(!std::is_trivially_copyable<T>::value)
840 if(reallocateImmediately && (
m_size != 0))
955 assert(std::is_trivially_copyable<T>::value);
961 vk::BufferCopy region(0, 0,
m_size *
sizeof(T));
976 assert(std::is_trivially_copyable<T>::value);
982 vk::BufferCopy region(0, 0,
m_size *
sizeof(T));
1000 assert(std::is_trivially_copyable<T>::value);
1003 vk::BufferCopy region(0, 0,
m_size *
sizeof(T));
1007 cmdBuf.pipelineBarrier(
1008 vk::PipelineStageFlagBits::eTransfer,
1009 vk::PipelineStageFlagBits::eComputeShader,
1012 vk::AccessFlagBits::eTransferWrite,
1013 vk::AccessFlagBits::eShaderRead | vk::AccessFlagBits::eShaderWrite),
1025 cmdBuf.pipelineBarrier(
1026 vk::PipelineStageFlagBits::eTransfer,
1027 vk::PipelineStageFlagBits::eComputeShader,
1030 vk::AccessFlagBits::eTransferWrite,
1031 vk::AccessFlagBits::eShaderRead | vk::AccessFlagBits::eShaderWrite),
1092 LogWarning(
"Freeing a GPU buffer without any CPU backing, may cause data loss\n");
1113 void AllocateCpuBuffer(
size_t size)
1116 LogFatal(
"AllocateCpuBuffer with size zero (invalid)\n");
1122 vk::BufferCreateInfo bufinfo(
1125 vk::BufferUsageFlagBits::eTransferSrc |
1126 vk::BufferUsageFlagBits::eTransferDst |
1127 vk::BufferUsageFlagBits::eStorageBuffer);
1174 char fname[] =
"/tmp/glscopeclient-tmpXXXXXX";
1178 LogError(
"Failed to create temporary file %s\n", fname);
1183 size_t bytesize = size *
sizeof(T);
1186 LogError(
"Failed to resize temporary file %s\n", fname);
1191 m_cpuPtr =
reinterpret_cast<T*
>(mmap(
1194 PROT_READ | PROT_WRITE,
1200 LogError(
"Failed to map temporary file %s\n", fname);
1201 perror(
"mmap failed: ");
1207 if(0 != unlink(fname))
1208 LogWarning(
"Failed to unlink temporary file %s, file will remain after application terminates\n", fname);
1215 if(!std::is_trivially_copyable<T>::value)
1217 for(
size_t i=0; i<size; i++)
1230 void FreeCpuPointer(T* ptr,
MemoryType type,
size_t size)
1233 if(!std::is_trivially_copyable<T>::value)
1235 for(
size_t i=0; i<size; i++)
1245 case MEM_TYPE_CPU_DMA_CAPABLE:
1246 LogFatal(
"FreeCpuPointer for MEM_TYPE_CPU_DMA_CAPABLE requires the vk::raii::DeviceMemory\n");
1249 case MEM_TYPE_CPU_PAGED:
1251 munmap(ptr, size *
sizeof(T));
1257 case MEM_TYPE_CPU_ONLY:
1262 LogFatal(
"FreeCpuPointer: invalid type %x\n", type);
1274 void FreeCpuPointer(T* ptr, std::unique_ptr<vk::raii::DeviceMemory>& buf,
MemoryType type,
size_t size)
1278 case MEM_TYPE_CPU_DMA_CAPABLE:
1283 FreeCpuPointer(ptr, type, size);
1291 void AllocateGpuBuffer(
size_t size)
1293 assert(std::is_trivially_copyable<T>::value);
1296 vk::BufferCreateInfo bufinfo(
1299 vk::BufferUsageFlagBits::eTransferSrc |
1300 vk::BufferUsageFlagBits::eTransferDst |
1301 vk::BufferUsageFlagBits::eStorageBuffer);
1317 catch(vk::OutOfDeviceMemoryError& ex)
1332 catch(vk::OutOfDeviceMemoryError& ex2)
1334 LogDebug(
"Allocation failed again\n");
1343 LogDebug(
"Final retry\n");
1349 catch(vk::OutOfDeviceMemoryError& ex2)
1351 LogDebug(
"Allocation failed again\n");
1360 "Failed to allocate %s of GPU memory despite our best efforts to reclaim space\n"
1361 "This is unrecoverable (for now).\n",
1362 Unit(Unit::UNIT_BYTES).PrettyPrint(req.size, 4).c_str());
1384 void UpdateGpuNames()
1386 std::string sname = m_name;
1389 std::string prefix = std::string(
"AcceleratorBuffer.") + sname +
".";
1391 std::string gpuBufName = prefix +
"m_gpuBuffer";
1392 std::string gpuPhysName = prefix +
"m_gpuPhysMem";
1395 vk::DebugUtilsObjectNameInfoEXT(
1396 vk::ObjectType::eBuffer,
1397 reinterpret_cast<uint64_t
>(
static_cast<VkBuffer
>(**
m_gpuBuffer)),
1398 gpuBufName.c_str()));
1401 vk::DebugUtilsObjectNameInfoEXT(
1402 vk::ObjectType::eDeviceMemory,
1403 reinterpret_cast<uint64_t
>(
static_cast<VkDeviceMemory
>(**
m_gpuPhysMem)),
1404 gpuPhysName.c_str()));
1411 void UpdateCpuNames()
1413 std::string sname = m_name;
1416 std::string prefix = std::string(
"AcceleratorBuffer.") + sname +
".";
1418 std::string cpuBufName = prefix +
"m_cpuBuffer";
1419 std::string cpuPhysName = prefix +
"m_cpuPhysMem";
1422 vk::DebugUtilsObjectNameInfoEXT(
1423 vk::ObjectType::eBuffer,
1424 reinterpret_cast<uint64_t
>(
static_cast<VkBuffer
>(**
m_cpuBuffer)),
1425 cpuBufName.c_str()));
1428 vk::DebugUtilsObjectNameInfoEXT(
1429 vk::ObjectType::eDeviceMemory,
1430 reinterpret_cast<uint64_t
>(
static_cast<VkDeviceMemory
>(**
m_cpuPhysMem)),
1431 cpuPhysName.c_str()));
bool(* MemoryPressureHandler)(MemoryPressureLevel level, MemoryPressureType type, size_t requestedSize)
Memory pressure handler type, called when free memory reaches a warning level or a Vulkan allocation ...
Definition: AcceleratorBuffer.h:93
MemoryPressureLevel
Levels of memory pressure.
Definition: AcceleratorBuffer.h:67
@ Hard
A memory allocation has failed and we need to free memory immediately to continue execution.
@ Soft
Free memory has reached a warning threshold.
bool OnMemoryPressure(MemoryPressureLevel level, MemoryPressureType type, size_t requestedSize)
Called when we run low on memory.
Definition: scopehal.cpp:1023
MemoryPressureType
Types of memory pressure.
Definition: AcceleratorBuffer.h:84
@ Host
Pinned CPU-side memory.
std::set< MemoryPressureHandler > g_memoryPressureHandlers
List of handlers for low memory registered by various subsystems.
Definition: scopehal.cpp:143
Declaration of AlignedAllocator.
Declaration of QueueManager and QueueHandle.
Definition: AcceleratorBuffer.h:99
A buffer of memory which may be used by GPU acceleration.
Definition: AcceleratorBuffer.h:158
void CopyToGpu()
Copy the buffer contents from CPU to GPU and blocks until the transfer completes.
Definition: AcceleratorBuffer.h:974
std::unique_ptr< vk::raii::Buffer > m_gpuBuffer
Buffer object for GPU-side memory.
Definition: AcceleratorBuffer.h:270
bool empty() const
Returns true if the container is empty.
Definition: AcceleratorBuffer.h:391
T * m_cpuPtr
CPU-side mapped pointer.
Definition: AcceleratorBuffer.h:258
void pop_front()
Removes the first item in the container.
Definition: AcceleratorBuffer.h:772
__attribute__((noinline)) void CopyFrom(const AcceleratorBuffer< T > &rhs)
Copies our content from another AcceleratorBuffer.
Definition: AcceleratorBuffer.h:502
void push_back(const T &value)
Adds a new element to the end of the container, allocating space if needed.
Definition: AcceleratorBuffer.h:720
bool IsCpuBufferStale() const
Returns true if the CPU-side buffer is stale.
Definition: AcceleratorBuffer.h:397
vk::DescriptorBufferInfo GetBufferInfo()
Returns a vk::DescriptorBufferInfo suitable for binding this object to.
Definition: AcceleratorBuffer.h:446
std::unique_ptr< vk::raii::DeviceMemory > m_cpuPhysMem
CPU-side physical memory.
Definition: AcceleratorBuffer.h:261
void MarkModifiedFromGpu()
Marks the GPU-side copy of the buffer as modified.
Definition: AcceleratorBuffer.h:863
size_t m_size
Size of the memory actually being used.
Definition: AcceleratorBuffer.h:296
bool IsReachableFromCpu(MemoryType mt)
Returns true if the given buffer type can be reached from the CPU.
Definition: AcceleratorBuffer.h:227
bool IsSingleSharedBuffer() const
Returns true if the object contains only a single buffer.
Definition: AcceleratorBuffer.h:421
bool HasCpuBuffer() const
Returns true if there is currently a CPU-side buffer.
Definition: AcceleratorBuffer.h:409
void MarkModifiedFromCpu()
Marks the CPU-side copy of the buffer as modified.
Definition: AcceleratorBuffer.h:852
std::unique_ptr< vk::raii::DeviceMemory > m_gpuPhysMem
GPU-side physical memory.
Definition: AcceleratorBuffer.h:264
void PrepareForCpuAccess()
Prepares the buffer to be accessed from the CPU.
Definition: AcceleratorBuffer.h:877
size_t size() const
Returns the actual size of the container (may be smaller than what was allocated)
Definition: AcceleratorBuffer.h:357
void SetGpuAccessHint(UsageHint hint, bool reallocateImmediately=false)
Sets a hint to the buffer on how often we expect to use it on the GPU in the future.
Definition: AcceleratorBuffer.h:832
static void HostToDeviceTransferMemoryBarrier(vk::raii::CommandBuffer &cmdBuf)
Adds a memory barrier for transferring data from host to device.
Definition: AcceleratorBuffer.h:1023
bool m_gpuPhysMemIsStale
True if m_gpuPhysMem contains stale data (m_cpuPtr has been modified and they point to different memo...
Definition: AcceleratorBuffer.h:279
bool IsReachableFromGpu(MemoryType mt)
Returns true if the given buffer type can be reached from the GPU.
Definition: AcceleratorBuffer.h:233
vk::Buffer GetBuffer()
Returns the preferred buffer for GPU-side access.
Definition: AcceleratorBuffer.h:429
void pop_back()
Removes the last item in the container.
Definition: AcceleratorBuffer.h:732
size_t GetCpuMemoryBytes() const
Returns the total reserved CPU memory, in bytes.
Definition: AcceleratorBuffer.h:369
void resize(size_t size)
Change the usable size of the container.
Definition: AcceleratorBuffer.h:457
size_t capacity() const
Returns the allocated size of the container.
Definition: AcceleratorBuffer.h:363
bool m_buffersAreSame
True if we have only one piece of physical memory accessible from both sides.
Definition: AcceleratorBuffer.h:273
void reserve(size_t size)
Reallocates buffers so that at least size elements of storage are available.
Definition: AcceleratorBuffer.h:484
UsageHint m_gpuAccessHint
Hint about how likely future GPU access is.
Definition: AcceleratorBuffer.h:313
MemoryType m_cpuMemoryType
Type of the CPU-side buffer.
Definition: AcceleratorBuffer.h:249
bool IsFastFromGpu(MemoryType mt)
Returns true if the given buffer type is fast to access from the GPU.
Definition: AcceleratorBuffer.h:245
void PrepareForGpuAccess(bool outputOnly=false)
Prepares the buffer to be accessed from the GPU.
Definition: AcceleratorBuffer.h:899
std::unique_ptr< vk::raii::Buffer > m_cpuBuffer
Buffer object for CPU-side memory.
Definition: AcceleratorBuffer.h:267
MemoryType m_gpuMemoryType
Type of the GPU-side buffer.
Definition: AcceleratorBuffer.h:252
__attribute__((noinline)) void UpdateGpuNames()
Pushes our friendly name to the underlying Vulkan objects.
Definition: AcceleratorBuffer.h:1383
void SetCpuAccessHint(UsageHint hint, bool reallocateImmediately=false)
Sets a hint to the buffer on how often we expect to use it on the CPU in the future.
Definition: AcceleratorBuffer.h:818
void FreeCpuBuffer()
Free the CPU-side buffer and underlying physical memory.
Definition: AcceleratorBuffer.h:1045
void PrepareForGpuAccessNonblocking(bool outputOnly, vk::raii::CommandBuffer &cmdBuf)
Prepares the buffer to be accessed from the GPU.
Definition: AcceleratorBuffer.h:926
__attribute__((noinline)) void UpdateCpuNames()
Pushes our friendly name to the underlying Vulkan objects.
Definition: AcceleratorBuffer.h:1410
bool HasGpuBuffer() const
Returns true if there is currently a GPU-side buffer.
Definition: AcceleratorBuffer.h:415
void push_front(const T &value)
Inserts a new item at the beginning of the container. This is inefficient due to copying.
Definition: AcceleratorBuffer.h:743
void CopyToGpuNonblocking(vk::raii::CommandBuffer &cmdBuf)
Copy the buffer contents from CPU to GPU without blocking on the CPU.
Definition: AcceleratorBuffer.h:998
size_t GetGpuMemoryBytes() const
Returns the total reserved GPU memory, in bytes.
Definition: AcceleratorBuffer.h:380
int m_tempFileHandle
File handle used for MEM_TYPE_CPU_PAGED.
Definition: AcceleratorBuffer.h:283
void shrink_to_fit()
Frees unused memory so that m_size == m_capacity.
Definition: AcceleratorBuffer.h:493
size_t m_capacity
Size of the allocated memory space (may be larger than m_size)
Definition: AcceleratorBuffer.h:293
void FreeGpuBuffer(bool dataLossOK=false)
Free the GPU-side buffer and underlying physical memory.
Definition: AcceleratorBuffer.h:1083
bool IsFastFromCpu(MemoryType mt)
Returns true if the given buffer type is fast to access from the CPU.
Definition: AcceleratorBuffer.h:239
void CopyToCpu()
Copy the buffer contents from GPU to CPU and blocks until the transfer completes.
Definition: AcceleratorBuffer.h:953
__attribute__((noinline)) void FreeCpuPointer(T *ptr
Frees a CPU-side buffer.
AcceleratorBuffer(const std::string &name="")
Creates a new AcceleratorBuffer with no content.
Definition: AcceleratorBuffer.h:322
MemoryAttributes
Attributes that a memory buffer can have.
Definition: AcceleratorBuffer.h:175
bool m_cpuPhysMemIsStale
True if m_cpuPtr contains stale data (m_gpuPhysMem has been modified and they point to different memo...
Definition: AcceleratorBuffer.h:276
__attribute__((noinline)) void AllocateCpuBuffer(size_t size)
Allocates a buffer for CPU access.
Definition: AcceleratorBuffer.h:1112
__attribute__((noinline)) void Reallocate(size_t size)
Reallocates the buffer so that it contains exactly size elements.
Definition: AcceleratorBuffer.h:548
UsageHint m_cpuAccessHint
Hint about how likely future CPU access is.
Definition: AcceleratorBuffer.h:310
MemoryType
Types of memory buffer.
Definition: AcceleratorBuffer.h:193
T * GetCpuPointer()
Gets a pointer to the CPU-side buffer.
Definition: AcceleratorBuffer.h:440
void SetName(std::string name)
Sets the debug name for this buffer.
Definition: AcceleratorBuffer.h:1444
void clear()
Resize the container to be empty (but don't free memory)
Definition: AcceleratorBuffer.h:478
bool IsGpuBufferStale() const
Returns true if the GPU-side buffer is stale.
Definition: AcceleratorBuffer.h:403
void deallocate(T *const p, const size_t unused) const
Free a block of memory.
Definition: AlignedAllocator.h:194
T * allocate(size_t n) const
Allocate a block of memory.
Definition: AlignedAllocator.h:159
A unit of measurement, plus conversion to pretty-printed output.
Definition: Unit.h:57
uint32_t g_vkPinnedMemoryType
Vulkan memory type for CPU-based memory that is also GPU-readable.
Definition: VulkanInit.cpp:118
std::mutex g_vkTransferMutex
Mutex for interlocking access to g_vkTransferCommandBuffer and g_vkTransferCommandPool.
Definition: VulkanInit.cpp:112
bool g_hasDebugUtils
Indicates whether the VK_EXT_debug_utils extension is available.
Definition: VulkanInit.cpp:195
std::shared_ptr< vk::raii::Device > g_vkComputeDevice
The Vulkan device selected for compute operations (may or may not be same device as rendering)
Definition: VulkanInit.cpp:71
std::unique_ptr< vk::raii::CommandBuffer > g_vkTransferCommandBuffer
Command buffer for AcceleratorBuffer transfers.
Definition: VulkanInit.cpp:89
std::shared_ptr< QueueHandle > g_vkTransferQueue
Queue for AcceleratorBuffer transfers.
Definition: VulkanInit.cpp:98
uint32_t g_vkLocalMemoryType
Vulkan memory type for GPU-based memory (generally not CPU-readable, except on unified memory systems...
Definition: VulkanInit.cpp:124
bool g_vulkanDeviceHasUnifiedMemory
Indicates whether the Vulkan device is unified memory.
Definition: VulkanInit.cpp:220