ngscopeclient 0.1-dev+51fbda87c
AcceleratorBuffer.h
Go to the documentation of this file.
1/***********************************************************************************************************************
2* *
3* libscopehal *
4* *
5* Copyright (c) 2012-2024 Andrew D. Zonenberg and contributors *
6* All rights reserved. *
7* *
8* Redistribution and use in source and binary forms, with or without modification, are permitted provided that the *
9* following conditions are met: *
10* *
11* * Redistributions of source code must retain the above copyright notice, this list of conditions, and the *
12* following disclaimer. *
13* *
14* * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the *
15* following disclaimer in the documentation and/or other materials provided with the distribution. *
16* *
17* * Neither the name of the author nor the names of any contributors may be used to endorse or promote products *
18* derived from this software without specific prior written permission. *
19* *
20* THIS SOFTWARE IS PROVIDED BY THE AUTHORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED *
21* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL *
22* THE AUTHORS BE HELD LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES *
23* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR *
24* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT *
25* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE *
26* POSSIBILITY OF SUCH DAMAGE. *
27* *
28***********************************************************************************************************************/
29
35#ifndef AcceleratorBuffer_h
36#define AcceleratorBuffer_h
37
38#include "AlignedAllocator.h"
39#include "QueueManager.h"
40
41#ifdef _WIN32
42#undef MemoryBarrier
43#endif
44
45#ifndef _WIN32
46#include <sys/mman.h>
47#include <unistd.h>
48#endif
49
50#include <type_traits>
51
52extern uint32_t g_vkPinnedMemoryType;
53extern uint32_t g_vkLocalMemoryType;
54extern std::shared_ptr<vk::raii::Device> g_vkComputeDevice;
55extern std::unique_ptr<vk::raii::CommandBuffer> g_vkTransferCommandBuffer;
56extern std::shared_ptr<QueueHandle> g_vkTransferQueue;
57extern std::mutex g_vkTransferMutex;
58
59extern bool g_hasDebugUtils;
61
62template<class T>
64
67{
69 Hard,
70
79 Soft
80};
81
84{
86 Host,
87
89 Device
90};
91
93typedef bool (*MemoryPressureHandler)(MemoryPressureLevel level, MemoryPressureType type, size_t requestedSize);
94
95bool OnMemoryPressure(MemoryPressureLevel level, MemoryPressureType type, size_t requestedSize);
96
97template<class T>
99{
100public:
101 using value_type = T;
102 using iterator_category = std::forward_iterator_tag;
103 using difference_type = std::ptrdiff_t;
104 using pointer = T*;
105 using reference = T&;
106
108 : m_index(i)
109 , m_buf(buf)
110 {}
111
112 T& operator*()
113 { return m_buf[m_index]; }
114
115 size_t GetIndex() const
116 { return m_index; }
117
118 bool operator!=(AcceleratorBufferIterator<T>& it)
119 {
120 //TODO: should we check m_buf equality too?
121 //Will slow things down, but be more semantically correct. Does anything care?
122 return (m_index != it.m_index);
123 }
124
125 AcceleratorBufferIterator<T>& operator++()
126 {
127 m_index ++;
128 return *this;
129 }
130
131protected:
132 size_t m_index;
134};
135
136template<class T>
137std::ptrdiff_t operator-(const AcceleratorBufferIterator<T>& a, const AcceleratorBufferIterator<T>& b)
138{ return a.GetIndex() - b.GetIndex(); }
139
156template<class T>
158{
159protected:
160
162 // Allocator for CPU-only memory
163
164 AlignedAllocator<T, 32> m_cpuAllocator;
165
166public:
167
169 // Buffer types
170
175 {
176 //Location of the memory
177 MEM_ATTRIB_CPU_SIDE = 0x1,
178 MEM_ATTRIB_GPU_SIDE = 0x2,
179
180 //Reachability
181 MEM_ATTRIB_CPU_REACHABLE = 0x4,
182 MEM_ATTRIB_GPU_REACHABLE = 0x8,
183
184 //Speed
185 MEM_ATTRIB_CPU_FAST = 0x10,
186 MEM_ATTRIB_GPU_FAST = 0x20
187 };
188
193 {
194 //Pointer is invalid
195 MEM_TYPE_NULL = 0,
196
197 //Memory is located on the CPU but backed by a file and may get paged out
198 MEM_TYPE_CPU_PAGED =
199 MEM_ATTRIB_CPU_SIDE | MEM_ATTRIB_CPU_REACHABLE,
200
201 //Memory is located on the CPU but not pinned, or otherwise accessible to the GPU
202 MEM_TYPE_CPU_ONLY =
203 MEM_ATTRIB_CPU_SIDE | MEM_ATTRIB_CPU_REACHABLE | MEM_ATTRIB_CPU_FAST,
204
205 //Memory is located on the CPU, but can be accessed by the GPU.
206 //Fast to access from the CPU, but accesses from the GPU require PCIe DMA and is slow
207 //(unless platform uses unified memory, in which case g_vulkanDeviceHasUnifiedMemory will be true)
208 MEM_TYPE_CPU_DMA_CAPABLE =
209 MEM_ATTRIB_CPU_SIDE | MEM_ATTRIB_CPU_REACHABLE | MEM_ATTRIB_CPU_FAST | MEM_ATTRIB_GPU_REACHABLE,
210
211 //Memory is located on the GPU and cannot be directly accessed by the CPU
212 MEM_TYPE_GPU_ONLY =
213 MEM_ATTRIB_GPU_SIDE | MEM_ATTRIB_GPU_REACHABLE | MEM_ATTRIB_GPU_FAST,
214
215 //Memory is located on the GPU, but can be accessed by the CPU.
216 //Fast to access from the GPU, but accesses from the CPU require PCIe DMA and is slow
217 //(should not be used if platform uses unified memory, in which case g_vulkanDeviceHasUnifiedMemory will be true)
218 MEM_TYPE_GPU_DMA_CAPABLE =
219 MEM_ATTRIB_GPU_SIDE | MEM_ATTRIB_GPU_REACHABLE | MEM_ATTRIB_GPU_FAST | MEM_ATTRIB_CPU_REACHABLE
220 };
221
222protected:
223
228 { return (mt & MEM_ATTRIB_CPU_REACHABLE) != 0; }
229
234 { return (mt & MEM_ATTRIB_GPU_REACHABLE) != 0; }
235
240 { return (mt & MEM_ATTRIB_CPU_FAST) != 0; }
241
246 { return (mt & MEM_ATTRIB_GPU_FAST) != 0; }
247
250
253
255 // The actual memory buffers
256
259
261 std::unique_ptr<vk::raii::DeviceMemory> m_cpuPhysMem;
262
264 std::unique_ptr<vk::raii::DeviceMemory> m_gpuPhysMem;
265
267 std::unique_ptr<vk::raii::Buffer> m_cpuBuffer;
268
270 std::unique_ptr<vk::raii::Buffer> m_gpuBuffer;
271
274
277
280
282#ifndef _WIN32
284#endif
285
287 // Iteration
288
290 // Sizes of buffers
291
294
296 size_t m_size;
297
299 // Hint configuration
300public:
301 enum UsageHint
302 {
303 HINT_NEVER,
304 HINT_UNLIKELY,
305 HINT_LIKELY
306 };
307
308protected:
311
314
316 // Construction / destruction
317public:
318
322 AcceleratorBuffer(const std::string& name = "")
323 : m_cpuMemoryType(MEM_TYPE_NULL)
324 , m_gpuMemoryType(MEM_TYPE_NULL)
325 , m_cpuPtr(nullptr)
326 , m_gpuPhysMem(nullptr)
327 , m_buffersAreSame(false)
328 , m_cpuPhysMemIsStale(false)
329 , m_gpuPhysMemIsStale(false)
330 #ifndef _WIN32
332 #endif
333 , m_capacity(0)
334 , m_size(0)
335 , m_cpuAccessHint(HINT_LIKELY) //default access hint: CPU-side pinned memory
336 , m_gpuAccessHint(HINT_UNLIKELY)
337 , m_name(name)
338 {
339 //non-trivially-copyable types can't be copied to GPU except on unified memory platforms
340 if(!std::is_trivially_copyable<T>::value && !g_vulkanDeviceHasUnifiedMemory)
341 m_gpuAccessHint = HINT_NEVER;
342 }
343
345 {
347 FreeGpuBuffer(true);
348 }
349
351 // General accessors
352public:
353
357 size_t size() const
358 { return m_size; }
359
363 size_t capacity() const
364 { return m_capacity; }
365
369 size_t GetCpuMemoryBytes() const
370 {
371 if(m_cpuMemoryType == MEM_TYPE_NULL)
372 return 0;
373 else
374 return m_capacity * sizeof(T);
375 }
376
380 size_t GetGpuMemoryBytes() const
381 {
382 if(m_gpuMemoryType == MEM_TYPE_NULL)
383 return 0;
384 else
385 return m_capacity * sizeof(T);
386 }
387
391 bool empty() const
392 { return (m_size == 0); }
393
397 bool IsCpuBufferStale() const
398 { return m_cpuPhysMemIsStale; }
399
403 bool IsGpuBufferStale() const
404 { return m_gpuPhysMemIsStale; }
405
409 bool HasCpuBuffer() const
410 { return (m_cpuPtr != nullptr); }
411
415 bool HasGpuBuffer() const
416 { return (m_gpuPhysMem != nullptr); }
417
422 { return m_buffersAreSame; }
423
429 vk::Buffer GetBuffer()
430 {
431 if(m_gpuBuffer != nullptr)
432 return **m_gpuBuffer;
433 else
434 return **m_cpuBuffer;
435 }
436
441 { return m_cpuPtr; }
442
446 vk::DescriptorBufferInfo GetBufferInfo()
447 {
448 return vk::DescriptorBufferInfo(
449 GetBuffer(),
450 0,
451 m_size * sizeof(T));
452 }
453
457 void resize(size_t size)
458 {
459 //Need to grow?
460 if(size > m_capacity)
461 {
462 //Default to doubling in size each time to avoid excessive copying.
463 if(m_capacity == 0)
464 reserve(size);
465 else if(size > m_capacity*2)
466 reserve(size);
467 else
468 reserve(m_capacity * 2);
469 }
470
471 //Update our size
472 m_size = size;
473 }
474
478 void clear()
479 { resize(0); }
480
484 void reserve(size_t size)
485 {
486 if(size >= m_capacity)
487 Reallocate(size);
488 }
489
494 {
495 if(m_size != m_capacity)
496 Reallocate(m_size);
497 }
498
502 __attribute__((noinline))
503 void CopyFrom(const AcceleratorBuffer<T>& rhs)
504 {
505 //Copy placement hints from the other instance, then resize to match
508 resize(rhs.m_size);
509
510 //Valid data CPU side? Copy it to here
511 if(rhs.HasCpuBuffer() && !rhs.m_cpuPhysMemIsStale)
512 {
513 //non-trivially-copyable types have to be copied one at a time
514 if(!std::is_trivially_copyable<T>::value)
515 {
516 for(size_t i=0; i<m_size; i++)
517 m_cpuPtr[i] = rhs.m_cpuPtr[i];
518 }
519
520 //Trivially copyable types can be done more efficiently in a block
521 else
522 memcpy(m_cpuPtr, rhs.m_cpuPtr, m_size * sizeof(T));
523 }
525
526 //Valid data GPU side? Copy it to here
527 if(rhs.HasGpuBuffer() && !rhs.m_gpuPhysMemIsStale)
528 {
529 std::lock_guard<std::mutex> lock(g_vkTransferMutex);
530
531 //Make the transfer request
532 g_vkTransferCommandBuffer->begin({});
533 vk::BufferCopy region(0, 0, m_size * sizeof(T));
534 g_vkTransferCommandBuffer->copyBuffer(**rhs.m_gpuBuffer, **m_gpuBuffer, {region});
536
537 //Submit the request and block until it completes
539 }
541 }
542
543protected:
544
548 __attribute__((noinline))
549 void Reallocate(size_t size)
550 {
551 if(size == 0)
552 return;
553
554 /*
555 If we are a bool[] or similar one-byte type, we are likely going to be accessed from the GPU via a uint32
556 descriptor for at least some shaders (such as rendering).
557
558 Round our actual allocated size to the next multiple of 4 bytes. The padding values are unimportant as the
559 bytes are never written, and the data read from the high bytes in the uint32 is discarded by the GPU.
560 We just need to ensure the memory is allocated so the 32-bit read is legal to perform.
561 */
562 if( (sizeof(T) == 1) && (m_gpuAccessHint != HINT_NEVER) )
563 {
564 if(size & 3)
565 size = (size | 3) + 1;
566 }
567
568 //If we do not anticipate using the data on the CPU, we shouldn't waste RAM.
569 //Allocate a GPU-local buffer, copy data to it, then free the CPU-side buffer
570 //Don't do this if the platform has unified memory
571 if( (m_cpuAccessHint == HINT_NEVER) && !g_vulkanDeviceHasUnifiedMemory)
572 {
575 }
576
577 else
578 {
579 //Resize CPU memory
580 //TODO: optimization, when expanding a MEM_TYPE_CPU_PAGED we can just enlarge the file
581 //and not have to make a new temp file and copy the content
582 if(m_cpuPtr != nullptr)
583 {
584 //Save the old pointer
585 auto pOld = m_cpuPtr;
586 auto pOldPin = std::move(m_cpuPhysMem);
587 auto type = m_cpuMemoryType;
588
589 //Allocate the new buffer
590 AllocateCpuBuffer(size);
591
592 //If CPU-side data is valid, copy existing data over.
593 //New pointer is still valid in this case.
595 {
596 //non-trivially-copyable types have to be copied one at a time
597 if(!std::is_trivially_copyable<T>::value)
598 {
599 for(size_t i=0; i<m_size; i++)
600 m_cpuPtr[i] = std::move(pOld[i]);
601 }
602
603 //Trivially copyable types can be done more efficiently in a block
604 //gcc warns about this even though we only call this code if the type is trivially copyable,
605 //so disable the warning.
606 else
607 {
608 #pragma GCC diagnostic push
609 #pragma GCC diagnostic ignored "-Wclass-memaccess"
610
611 memcpy(m_cpuPtr, pOld, m_size * sizeof(T));
612
613 #pragma GCC diagnostic pop
614 }
615 }
616
617 //If CPU-side data is stale, just allocate the new buffer but leave it as stale
618 //(don't do a potentially unnecessary copy from the GPU)
619
620 //Now we're done with the old pointer so get rid of it
621 FreeCpuPointer(pOld, pOldPin, type, m_capacity);
622 }
623
624 //Allocate new CPU memory, replacing our current (null) pointer
625 else
626 {
627 AllocateCpuBuffer(size);
628
629 //If we already had GPU-side memory containing data, then the new CPU-side buffer is stale
630 //until we copy stuff over to it
631 if(m_gpuPhysMem != nullptr)
632 m_cpuPhysMemIsStale = true;
633 }
634 }
635
636 //We're expecting to use data on the GPU, so prepare to do stuff with it
637 if(m_gpuAccessHint != HINT_NEVER)
638 {
639 //If GPU access is unlikely, we probably want to just use pinned memory.
640 //If available, mark buffers as the same, and free any existing GPU buffer we might have
641 //Always use pinned memory if the platform has unified memory
642 if( ((m_gpuAccessHint == HINT_UNLIKELY) && (m_cpuMemoryType == MEM_TYPE_CPU_DMA_CAPABLE)) || g_vulkanDeviceHasUnifiedMemory )
644
645 //Nope, we need to allocate dedicated GPU memory
646 else
647 {
648 //If we have an existing buffer with valid content, save it and copy content over
649 if( (m_gpuPhysMem != nullptr) && !m_gpuPhysMemIsStale && (m_size != 0))
650 {
651 auto pOld = std::move(m_gpuPhysMem);
652 //auto type = m_gpuMemoryType;
653 auto bOld = std::move(m_gpuBuffer);
654
655 AllocateGpuBuffer(size);
656
657 std::lock_guard<std::mutex> lock(g_vkTransferMutex);
658
659 //Make the transfer request
660 g_vkTransferCommandBuffer->begin({});
661 vk::BufferCopy region(0, 0, m_size * sizeof(T));
662 g_vkTransferCommandBuffer->copyBuffer(**bOld, **m_gpuBuffer, {region});
664
665 //Submit the request and block until it completes
667
668 //make sure buffer is freed before underlying physical memory (pOld) goes out of scope
669 bOld = nullptr;
670 }
671
672 //Nope, just allocate a new buffer
673 else
674 {
675 AllocateGpuBuffer(size);
676
677 //If we already had CPU-side memory containing data, then the new GPU-side buffer is stale
678 //until we copy stuff over to it.
679 //Special case: if m_size is 0 (newly allocated buffer) we're not stale yet
680 if( (m_cpuPhysMem != nullptr) && (m_size != 0) )
681 m_gpuPhysMemIsStale = true;
682 }
683 }
684 }
685
686 //Existing GPU buffer we never expect to use again - needs to be freed
687 else if(m_gpuPhysMem != nullptr)
689
690 //We are never going to use the buffer on the GPU, but don't have any existing GPU memory
691 //so no action required
692 else
693 {
694 }
695
696 //Update our capacity
697 m_capacity = size;
698
699 //If we have a pinned buffer and nothing on the other side, there's a single shared physical memory region
701 ( (m_cpuMemoryType == MEM_TYPE_CPU_DMA_CAPABLE) && (m_gpuMemoryType == MEM_TYPE_NULL) ) ||
702 ( (m_cpuMemoryType == MEM_TYPE_NULL) && (m_gpuMemoryType == MEM_TYPE_GPU_DMA_CAPABLE) );
703 }
704
706 // CPU-side STL-esque container API
707
708 //PrepareForCpuAccess() *must* be called prior to calling any of these methods.
709public:
710
711 const T& operator[](size_t i) const
712 { return m_cpuPtr[i]; }
713
714 T& operator[](size_t i)
715 { return m_cpuPtr[i]; }
716
720 void push_back(const T& value)
721 {
722 size_t cursize = m_size;
723 resize(m_size + 1);
724 m_cpuPtr[cursize] = value;
725
727 }
728
732 void pop_back()
733 {
734 if(!empty())
735 resize(m_size - 1);
736 }
737
743 void push_front(const T& value)
744 {
745 size_t cursize = m_size;
746 resize(m_size + 1);
747
749
750 //non-trivially-copyable types have to be copied one at a time
751 if(!std::is_trivially_copyable<T>::value)
752 {
753 for(size_t i=0; i<cursize; i++)
754 m_cpuPtr[i+1] = std::move(m_cpuPtr[i]);
755 }
756
757 //Trivially copyable types can be done more efficiently in a block
758 else
759 memmove(m_cpuPtr+1, m_cpuPtr, sizeof(T) * (cursize));
760
761 //Insert the new first element
762 m_cpuPtr[0] = value;
763
765 }
766
773 {
774 //No need to move data if popping last element
775 if(m_size == 1)
776 {
777 clear();
778 return;
779 }
780
781 //Don't touch GPU side buffer
782
784
785 //non-trivially-copyable types have to be copied one at a time
786 if(!std::is_trivially_copyable<T>::value)
787 {
788 for(size_t i=0; i<m_size-1; i++)
789 m_cpuPtr[i] = std::move(m_cpuPtr[i+1]);
790 }
791
792 //Trivially copyable types can be done more efficiently in a block
793 else
794 memmove(m_cpuPtr, m_cpuPtr+1, sizeof(T) * (m_size-1));
795
796 resize(m_size - 1);
797
799 }
800
802 { return AcceleratorBufferIterator<T>(*this, 0); }
803
805 { return AcceleratorBufferIterator<T>(*this, m_size); }
806
808 // Hints about near-future usage patterns
809
810public:
811
818 void SetCpuAccessHint(UsageHint hint, bool reallocateImmediately = false)
819 {
820 m_cpuAccessHint = hint;
821
822 if(reallocateImmediately && (m_size != 0))
823 Reallocate(m_size);
824 }
825
832 void SetGpuAccessHint(UsageHint hint, bool reallocateImmediately = false)
833 {
834 //Only trivially copyable datatypes are allowed on the GPU
835 if(!std::is_trivially_copyable<T>::value)
836 hint = HINT_NEVER;
837
838 m_gpuAccessHint = hint;
839
840 if(reallocateImmediately && (m_size != 0))
841 Reallocate(m_size);
842 }
843
845 // Cache invalidation
846
853 {
855 m_gpuPhysMemIsStale = true;
856 }
857
864 {
866 m_cpuPhysMemIsStale = true;
867 }
868
870 // Preparation for access
871
878 {
879 //Early out if no content
880 if(m_size == 0)
881 return;
882
883 //If there's no buffer at all on the CPU, allocate one
884 if(!HasCpuBuffer() && (m_gpuMemoryType != MEM_TYPE_GPU_DMA_CAPABLE))
885 AllocateCpuBuffer(m_capacity);
886
888 CopyToCpu();
889 }
890
899 void PrepareForGpuAccess(bool outputOnly = false)
900 {
901 //Early out if no content or if unified memory
903 return;
904
905 //If our current hint has no GPU access at all, update to say "unlikely" and reallocate
906 if(m_gpuAccessHint == HINT_NEVER)
907 SetGpuAccessHint(HINT_UNLIKELY, true);
908
909 //If we don't have a buffer, allocate one unless our CPU buffer is pinned and GPU-readable
910 if(!HasGpuBuffer() && (m_cpuMemoryType != MEM_TYPE_CPU_DMA_CAPABLE) )
911 AllocateGpuBuffer(m_capacity);
912
913 //Make sure the GPU-side buffer is up to date
914 if(m_gpuPhysMemIsStale && !outputOnly)
915 CopyToGpu();
916 }
917
926 void PrepareForGpuAccessNonblocking(bool outputOnly, vk::raii::CommandBuffer& cmdBuf)
927 {
928 //Early out if no content or if unified memory
930 return;
931
932 //If our current hint has no GPU access at all, update to say "unlikely" and reallocate
933 if(m_gpuAccessHint == HINT_NEVER)
934 SetGpuAccessHint(HINT_UNLIKELY, true);
935
936 //If we don't have a buffer, allocate one unless our CPU buffer is pinned and GPU-readable
937 if(!HasGpuBuffer() && (m_cpuMemoryType != MEM_TYPE_CPU_DMA_CAPABLE) )
938 AllocateGpuBuffer(m_capacity);
939
940 //Make sure the GPU-side buffer is up to date
941 if(m_gpuPhysMemIsStale && !outputOnly)
942 CopyToGpuNonblocking(cmdBuf);
943 }
944
945protected:
946
948 // Copying of buffer content
949
954 {
955 assert(std::is_trivially_copyable<T>::value);
956
957 std::lock_guard<std::mutex> lock(g_vkTransferMutex);
958
959 //Make the transfer request
960 g_vkTransferCommandBuffer->begin({});
961 vk::BufferCopy region(0, 0, m_size * sizeof(T));
962 g_vkTransferCommandBuffer->copyBuffer(**m_gpuBuffer, **m_cpuBuffer, {region});
964
965 //Submit the request and block until it completes
967
968 m_cpuPhysMemIsStale = false;
969 }
970
975 {
976 assert(std::is_trivially_copyable<T>::value);
977
978 std::lock_guard<std::mutex> lock(g_vkTransferMutex);
979
980 //Make the transfer request
981 g_vkTransferCommandBuffer->begin({});
982 vk::BufferCopy region(0, 0, m_size * sizeof(T));
983 g_vkTransferCommandBuffer->copyBuffer(**m_cpuBuffer, **m_gpuBuffer, {region});
985
986 //Submit the request and block until it completes
988
989 m_gpuPhysMemIsStale = false;
990 }
991
992
998 void CopyToGpuNonblocking(vk::raii::CommandBuffer& cmdBuf)
999 {
1000 assert(std::is_trivially_copyable<T>::value);
1001
1002 //Make the transfer request
1003 vk::BufferCopy region(0, 0, m_size * sizeof(T));
1004 cmdBuf.copyBuffer(**m_cpuBuffer, **m_gpuBuffer, {region});
1005
1006 //Add the barrier
1007 cmdBuf.pipelineBarrier(
1008 vk::PipelineStageFlagBits::eTransfer,
1009 vk::PipelineStageFlagBits::eComputeShader,
1010 {},
1011 vk::MemoryBarrier(
1012 vk::AccessFlagBits::eTransferWrite,
1013 vk::AccessFlagBits::eShaderRead | vk::AccessFlagBits::eShaderWrite),
1014 {},
1015 {});
1016
1017 m_gpuPhysMemIsStale = false;
1018 }
1019public:
1023 static void HostToDeviceTransferMemoryBarrier(vk::raii::CommandBuffer& cmdBuf)
1024 {
1025 cmdBuf.pipelineBarrier(
1026 vk::PipelineStageFlagBits::eTransfer,
1027 vk::PipelineStageFlagBits::eComputeShader,
1028 {},
1029 vk::MemoryBarrier(
1030 vk::AccessFlagBits::eTransferWrite,
1031 vk::AccessFlagBits::eShaderRead | vk::AccessFlagBits::eShaderWrite),
1032 {},
1033 {});
1034 }
1035
1036
1037protected:
1038
1040 // Cleanup
1041
1046 {
1047 //Early out if buffer is already null
1048 if(m_cpuPtr == nullptr)
1049 return;
1050
1051 //We have a buffer on the GPU.
1052 //If it's stale, need to push our updated content there before freeing the CPU-side copy
1053 if( (m_gpuMemoryType != MEM_TYPE_NULL) && m_gpuPhysMemIsStale && !empty())
1054 CopyToGpu();
1055
1056 //Free the Vulkan buffer object
1057 m_cpuBuffer = nullptr;
1058
1059 //Free the buffer and unmap any memory
1061
1062 //Mark CPU-side buffer as empty
1063 m_cpuPtr = nullptr;
1064 m_cpuPhysMem = nullptr;
1065 m_cpuMemoryType = MEM_TYPE_NULL;
1066 m_buffersAreSame = false;
1067
1068 //If we have no GPU-side buffer either, we're empty
1069 if(m_gpuMemoryType == MEM_TYPE_NULL)
1070 {
1071 m_size = 0;
1072 m_capacity = 0;
1073 }
1074 }
1075
1076public:
1083 void FreeGpuBuffer(bool dataLossOK = false)
1084 {
1085 //Early out if buffer is already null
1086 if(m_gpuPhysMem == nullptr)
1087 return;
1088
1089 //If we do NOT have a CPU-side buffer, we're deleting all of our data! Warn for now
1090 if( (m_cpuMemoryType == MEM_TYPE_NULL) && m_gpuPhysMemIsStale && !empty() && !dataLossOK)
1091 {
1092 LogWarning("Freeing a GPU buffer without any CPU backing, may cause data loss\n");
1093 }
1094
1095 //If we have a CPU-side buffer, and it's stale, move our about-to-be-deleted content over before we free it
1096 if( (m_cpuMemoryType != MEM_TYPE_NULL) && m_cpuPhysMemIsStale && !empty() )
1097 CopyToCpu();
1098
1099 m_gpuBuffer = nullptr;
1100 m_gpuPhysMem = nullptr;
1101 m_gpuMemoryType = MEM_TYPE_NULL;
1102 }
1103
1104protected:
1105
1107 // Allocation
1108
1112 __attribute__((noinline))
1113 void AllocateCpuBuffer(size_t size)
1114 {
1115 if(size == 0)
1116 LogFatal("AllocateCpuBuffer with size zero (invalid)\n");
1117
1118 //If any GPU access is expected, use pinned memory so we don't have to move things around
1119 if(m_gpuAccessHint != HINT_NEVER)
1120 {
1121 //Make a Vulkan buffer first
1122 vk::BufferCreateInfo bufinfo(
1123 {},
1124 size * sizeof(T),
1125 vk::BufferUsageFlagBits::eTransferSrc |
1126 vk::BufferUsageFlagBits::eTransferDst |
1127 vk::BufferUsageFlagBits::eStorageBuffer);
1128 m_cpuBuffer = std::make_unique<vk::raii::Buffer>(*g_vkComputeDevice, bufinfo);
1129
1130 //Figure out actual memory requirements of the buffer
1131 //(may be rounded up from what we asked for)
1132 auto req = m_cpuBuffer->getMemoryRequirements();
1133
1134 //Allocate the physical memory to back the buffer
1135 vk::MemoryAllocateInfo info(req.size, g_vkPinnedMemoryType);
1136 m_cpuPhysMem = std::make_unique<vk::raii::DeviceMemory>(*g_vkComputeDevice, info);
1137
1138 //Map it and bind to the buffer
1139 m_cpuPtr = reinterpret_cast<T*>(m_cpuPhysMem->mapMemory(0, req.size));
1140 m_cpuBuffer->bindMemory(**m_cpuPhysMem, 0);
1141
1142 //We now have pinned memory
1143 m_cpuMemoryType = MEM_TYPE_CPU_DMA_CAPABLE;
1144
1145 if(g_hasDebugUtils)
1146 UpdateCpuNames();
1147 }
1148
1149 //If frequent CPU access is expected, use normal host memory
1150 else if(m_cpuAccessHint == HINT_LIKELY)
1151 {
1152 m_cpuBuffer = nullptr;
1153 m_cpuMemoryType = MEM_TYPE_CPU_ONLY;
1154 m_cpuPtr = m_cpuAllocator.allocate(size);
1155 }
1156
1157 //If infrequent CPU access is expected, use a memory mapped temporary file so it can be paged out to disk
1158 else
1159 {
1160 #ifdef _WIN32
1161
1162 //On Windows, use normal memory for now
1163 //until we figure out how to do this there
1164 m_cpuBuffer = nullptr;
1165 m_cpuMemoryType = MEM_TYPE_CPU_ONLY;
1166 m_cpuPtr = m_cpuAllocator.allocate(size);
1167
1168 #else
1169
1170 m_cpuBuffer = nullptr;
1171 m_cpuMemoryType = MEM_TYPE_CPU_PAGED;
1172
1173 //Make the temp file
1174 char fname[] = "/tmp/glscopeclient-tmpXXXXXX";
1175 m_tempFileHandle = mkstemp(fname);
1176 if(m_tempFileHandle < 0)
1177 {
1178 LogError("Failed to create temporary file %s\n", fname);
1179 abort();
1180 }
1181
1182 //Resize it to our desired file size
1183 size_t bytesize = size * sizeof(T);
1184 if(0 != ftruncate(m_tempFileHandle, bytesize))
1185 {
1186 LogError("Failed to resize temporary file %s\n", fname);
1187 abort();
1188 }
1189
1190 //Map it
1191 m_cpuPtr = reinterpret_cast<T*>(mmap(
1192 nullptr,
1193 bytesize,
1194 PROT_READ | PROT_WRITE,
1195 MAP_SHARED/* | MAP_UNINITIALIZED*/,
1197 0));
1198 if(m_cpuPtr == MAP_FAILED)
1199 {
1200 LogError("Failed to map temporary file %s\n", fname);
1201 perror("mmap failed: ");
1202 abort();
1203 }
1204 m_cpuMemoryType = MEM_TYPE_CPU_PAGED;
1205
1206 //Delete it (file will be removed by the OS after our active handle is closed)
1207 if(0 != unlink(fname))
1208 LogWarning("Failed to unlink temporary file %s, file will remain after application terminates\n", fname);
1209
1210 #endif
1211 }
1212
1213 //Memory has been allocated. Call constructors iff type is not trivially copyable
1214 //(This is not exactly 1:1 with having a constructor, but hopefully good enough?)
1215 if(!std::is_trivially_copyable<T>::value)
1216 {
1217 for(size_t i=0; i<size; i++)
1218 new(m_cpuPtr +i) T;
1219 }
1220 }
1221
1229 __attribute__((noinline))
1230 void FreeCpuPointer(T* ptr, MemoryType type, size_t size)
1231 {
1232 //Call destructors iff type is not trivially copyable
1233 if(!std::is_trivially_copyable<T>::value)
1234 {
1235 for(size_t i=0; i<size; i++)
1236 ptr[i].~T();
1237 }
1238
1239 switch(type)
1240 {
1241 case MEM_TYPE_NULL:
1242 //legal no-op
1243 break;
1244
1245 case MEM_TYPE_CPU_DMA_CAPABLE:
1246 LogFatal("FreeCpuPointer for MEM_TYPE_CPU_DMA_CAPABLE requires the vk::raii::DeviceMemory\n");
1247 break;
1248
1249 case MEM_TYPE_CPU_PAGED:
1250 #ifndef _WIN32
1251 munmap(ptr, size * sizeof(T));
1252 close(m_tempFileHandle);
1253 m_tempFileHandle = -1;
1254 #endif
1255 break;
1256
1257 case MEM_TYPE_CPU_ONLY:
1258 m_cpuAllocator.deallocate(ptr, size);
1259 break;
1260
1261 default:
1262 LogFatal("FreeCpuPointer: invalid type %x\n", type);
1263 }
1264 }
1265
1273 __attribute__((noinline))
1274 void FreeCpuPointer(T* ptr, std::unique_ptr<vk::raii::DeviceMemory>& buf, MemoryType type, size_t size)
1275 {
1276 switch(type)
1277 {
1278 case MEM_TYPE_CPU_DMA_CAPABLE:
1279 buf->unmapMemory();
1280 break;
1281
1282 default:
1283 FreeCpuPointer(ptr, type, size);
1284 }
1285 }
1286
1290 __attribute__((noinline))
1291 void AllocateGpuBuffer(size_t size)
1292 {
1293 assert(std::is_trivially_copyable<T>::value);
1294
1295 //Make a Vulkan buffer first
1296 vk::BufferCreateInfo bufinfo(
1297 {},
1298 size * sizeof(T),
1299 vk::BufferUsageFlagBits::eTransferSrc |
1300 vk::BufferUsageFlagBits::eTransferDst |
1301 vk::BufferUsageFlagBits::eStorageBuffer);
1302 m_gpuBuffer = std::make_unique<vk::raii::Buffer>(*g_vkComputeDevice, bufinfo);
1303
1304 //Figure out actual memory requirements of the buffer
1305 //(may be rounded up from what we asked for)
1306 auto req = m_gpuBuffer->getMemoryRequirements();
1307
1308 //Try to allocate the memory
1309 vk::MemoryAllocateInfo info(req.size, g_vkLocalMemoryType);
1310 try
1311 {
1312 //For now, always use local memory
1313 m_gpuPhysMem = std::make_unique<vk::raii::DeviceMemory>(*g_vkComputeDevice, info);
1314 }
1315
1316 //Fallback path in case of low memory
1317 catch(vk::OutOfDeviceMemoryError& ex)
1318 {
1319 bool ok = false;
1320 while(!ok)
1321 {
1322 //Attempt to free memory and stop if we couldn't free more
1324 break;
1325
1327 try
1328 {
1329 m_gpuPhysMem = std::make_unique<vk::raii::DeviceMemory>(*g_vkComputeDevice, info);
1330 ok = true;
1331 }
1332 catch(vk::OutOfDeviceMemoryError& ex2)
1333 {
1334 LogDebug("Allocation failed again\n");
1335 }
1336 }
1337
1338 //Retry one more time.
1339 //If we OOM simultaneously in two threads, it's possible to have the second OnMemoryPressure call
1340 //return false because the first one already freed all it could. But we might have enough free to continue.
1341 if(!ok)
1342 {
1343 LogDebug("Final retry\n");
1344 try
1345 {
1346 m_gpuPhysMem = std::make_unique<vk::raii::DeviceMemory>(*g_vkComputeDevice, info);
1347 ok = true;
1348 }
1349 catch(vk::OutOfDeviceMemoryError& ex2)
1350 {
1351 LogDebug("Allocation failed again\n");
1352 }
1353 }
1354
1355 //If we get here, we couldn't allocate no matter what
1356 //TODO: Fall back to a CPU-side allocation
1357 if(!ok)
1358 {
1359 LogError(
1360 "Failed to allocate %s of GPU memory despite our best efforts to reclaim space\n"
1361 "This is unrecoverable (for now).\n",
1362 Unit(Unit::UNIT_BYTES).PrettyPrint(req.size, 4).c_str());
1363
1364 std::abort();
1365 }
1366 }
1367 m_gpuMemoryType = MEM_TYPE_GPU_ONLY;
1368
1369 m_gpuBuffer->bindMemory(**m_gpuPhysMem, 0);
1370
1371 if(g_hasDebugUtils)
1372 UpdateGpuNames();
1373 }
1374
1375protected:
1376
1378 std::string m_name;
1379
1383 __attribute__((noinline))
1384 void UpdateGpuNames()
1385 {
1386 std::string sname = m_name;
1387 if(sname.empty())
1388 sname = "unnamed";
1389 std::string prefix = std::string("AcceleratorBuffer.") + sname + ".";
1390
1391 std::string gpuBufName = prefix + "m_gpuBuffer";
1392 std::string gpuPhysName = prefix + "m_gpuPhysMem";
1393
1394 g_vkComputeDevice->setDebugUtilsObjectNameEXT(
1395 vk::DebugUtilsObjectNameInfoEXT(
1396 vk::ObjectType::eBuffer,
1397 reinterpret_cast<uint64_t>(static_cast<VkBuffer>(**m_gpuBuffer)),
1398 gpuBufName.c_str()));
1399
1400 g_vkComputeDevice->setDebugUtilsObjectNameEXT(
1401 vk::DebugUtilsObjectNameInfoEXT(
1402 vk::ObjectType::eDeviceMemory,
1403 reinterpret_cast<uint64_t>(static_cast<VkDeviceMemory>(**m_gpuPhysMem)),
1404 gpuPhysName.c_str()));
1405 }
1406
1410 __attribute__((noinline))
1411 void UpdateCpuNames()
1412 {
1413 std::string sname = m_name;
1414 if(sname.empty())
1415 sname = "unnamed";
1416 std::string prefix = std::string("AcceleratorBuffer.") + sname + ".";
1417
1418 std::string cpuBufName = prefix + "m_cpuBuffer";
1419 std::string cpuPhysName = prefix + "m_cpuPhysMem";
1420
1421 g_vkComputeDevice->setDebugUtilsObjectNameEXT(
1422 vk::DebugUtilsObjectNameInfoEXT(
1423 vk::ObjectType::eBuffer,
1424 reinterpret_cast<uint64_t>(static_cast<VkBuffer>(**m_cpuBuffer)),
1425 cpuBufName.c_str()));
1426
1427 g_vkComputeDevice->setDebugUtilsObjectNameEXT(
1428 vk::DebugUtilsObjectNameInfoEXT(
1429 vk::ObjectType::eDeviceMemory,
1430 reinterpret_cast<uint64_t>(static_cast<VkDeviceMemory>(**m_cpuPhysMem)),
1431 cpuPhysName.c_str()));
1432 }
1433
1434public:
1435
1444 void SetName(std::string name)
1445 {
1446 m_name = name;
1447 if(g_hasDebugUtils)
1448 {
1449 if(m_gpuBuffer != nullptr)
1450 UpdateGpuNames();
1451 if(m_cpuBuffer != nullptr)
1452 UpdateCpuNames();
1453 }
1454 }
1455
1456};
1457
1458extern std::set<MemoryPressureHandler> g_memoryPressureHandlers;
1459
1460#endif
bool(* MemoryPressureHandler)(MemoryPressureLevel level, MemoryPressureType type, size_t requestedSize)
Memory pressure handler type, called when free memory reaches a warning level or a Vulkan allocation ...
Definition: AcceleratorBuffer.h:93
MemoryPressureLevel
Levels of memory pressure.
Definition: AcceleratorBuffer.h:67
@ Hard
A memory allocation has failed and we need to free memory immediately to continue execution.
@ Soft
Free memory has reached a warning threshold.
bool OnMemoryPressure(MemoryPressureLevel level, MemoryPressureType type, size_t requestedSize)
Called when we run low on memory.
Definition: scopehal.cpp:1023
MemoryPressureType
Types of memory pressure.
Definition: AcceleratorBuffer.h:84
@ Host
Pinned CPU-side memory.
@ Device
GPU-side memory.
std::set< MemoryPressureHandler > g_memoryPressureHandlers
List of handlers for low memory registered by various subsystems.
Definition: scopehal.cpp:143
Declaration of AlignedAllocator.
Declaration of QueueManager and QueueHandle.
Definition: AcceleratorBuffer.h:99
A buffer of memory which may be used by GPU acceleration.
Definition: AcceleratorBuffer.h:158
void CopyToGpu()
Copy the buffer contents from CPU to GPU and blocks until the transfer completes.
Definition: AcceleratorBuffer.h:974
std::unique_ptr< vk::raii::Buffer > m_gpuBuffer
Buffer object for GPU-side memory.
Definition: AcceleratorBuffer.h:270
bool empty() const
Returns true if the container is empty.
Definition: AcceleratorBuffer.h:391
T * m_cpuPtr
CPU-side mapped pointer.
Definition: AcceleratorBuffer.h:258
void pop_front()
Removes the first item in the container.
Definition: AcceleratorBuffer.h:772
__attribute__((noinline)) void CopyFrom(const AcceleratorBuffer< T > &rhs)
Copies our content from another AcceleratorBuffer.
Definition: AcceleratorBuffer.h:502
void push_back(const T &value)
Adds a new element to the end of the container, allocating space if needed.
Definition: AcceleratorBuffer.h:720
bool IsCpuBufferStale() const
Returns true if the CPU-side buffer is stale.
Definition: AcceleratorBuffer.h:397
vk::DescriptorBufferInfo GetBufferInfo()
Returns a vk::DescriptorBufferInfo suitable for binding this object to.
Definition: AcceleratorBuffer.h:446
std::unique_ptr< vk::raii::DeviceMemory > m_cpuPhysMem
CPU-side physical memory.
Definition: AcceleratorBuffer.h:261
void MarkModifiedFromGpu()
Marks the GPU-side copy of the buffer as modified.
Definition: AcceleratorBuffer.h:863
size_t m_size
Size of the memory actually being used.
Definition: AcceleratorBuffer.h:296
bool IsReachableFromCpu(MemoryType mt)
Returns true if the given buffer type can be reached from the CPU.
Definition: AcceleratorBuffer.h:227
bool IsSingleSharedBuffer() const
Returns true if the object contains only a single buffer.
Definition: AcceleratorBuffer.h:421
bool HasCpuBuffer() const
Returns true if there is currently a CPU-side buffer.
Definition: AcceleratorBuffer.h:409
void MarkModifiedFromCpu()
Marks the CPU-side copy of the buffer as modified.
Definition: AcceleratorBuffer.h:852
std::unique_ptr< vk::raii::DeviceMemory > m_gpuPhysMem
GPU-side physical memory.
Definition: AcceleratorBuffer.h:264
void PrepareForCpuAccess()
Prepares the buffer to be accessed from the CPU.
Definition: AcceleratorBuffer.h:877
size_t size() const
Returns the actual size of the container (may be smaller than what was allocated)
Definition: AcceleratorBuffer.h:357
void SetGpuAccessHint(UsageHint hint, bool reallocateImmediately=false)
Sets a hint to the buffer on how often we expect to use it on the GPU in the future.
Definition: AcceleratorBuffer.h:832
static void HostToDeviceTransferMemoryBarrier(vk::raii::CommandBuffer &cmdBuf)
Adds a memory barrier for transferring data from host to device.
Definition: AcceleratorBuffer.h:1023
bool m_gpuPhysMemIsStale
True if m_gpuPhysMem contains stale data (m_cpuPtr has been modified and they point to different memo...
Definition: AcceleratorBuffer.h:279
bool IsReachableFromGpu(MemoryType mt)
Returns true if the given buffer type can be reached from the GPU.
Definition: AcceleratorBuffer.h:233
vk::Buffer GetBuffer()
Returns the preferred buffer for GPU-side access.
Definition: AcceleratorBuffer.h:429
void pop_back()
Removes the last item in the container.
Definition: AcceleratorBuffer.h:732
size_t GetCpuMemoryBytes() const
Returns the total reserved CPU memory, in bytes.
Definition: AcceleratorBuffer.h:369
void resize(size_t size)
Change the usable size of the container.
Definition: AcceleratorBuffer.h:457
size_t capacity() const
Returns the allocated size of the container.
Definition: AcceleratorBuffer.h:363
bool m_buffersAreSame
True if we have only one piece of physical memory accessible from both sides.
Definition: AcceleratorBuffer.h:273
void reserve(size_t size)
Reallocates buffers so that at least size elements of storage are available.
Definition: AcceleratorBuffer.h:484
UsageHint m_gpuAccessHint
Hint about how likely future GPU access is.
Definition: AcceleratorBuffer.h:313
MemoryType m_cpuMemoryType
Type of the CPU-side buffer.
Definition: AcceleratorBuffer.h:249
bool IsFastFromGpu(MemoryType mt)
Returns true if the given buffer type is fast to access from the GPU.
Definition: AcceleratorBuffer.h:245
void PrepareForGpuAccess(bool outputOnly=false)
Prepares the buffer to be accessed from the GPU.
Definition: AcceleratorBuffer.h:899
std::unique_ptr< vk::raii::Buffer > m_cpuBuffer
Buffer object for CPU-side memory.
Definition: AcceleratorBuffer.h:267
MemoryType m_gpuMemoryType
Type of the GPU-side buffer.
Definition: AcceleratorBuffer.h:252
__attribute__((noinline)) void UpdateGpuNames()
Pushes our friendly name to the underlying Vulkan objects.
Definition: AcceleratorBuffer.h:1383
void SetCpuAccessHint(UsageHint hint, bool reallocateImmediately=false)
Sets a hint to the buffer on how often we expect to use it on the CPU in the future.
Definition: AcceleratorBuffer.h:818
void FreeCpuBuffer()
Free the CPU-side buffer and underlying physical memory.
Definition: AcceleratorBuffer.h:1045
void PrepareForGpuAccessNonblocking(bool outputOnly, vk::raii::CommandBuffer &cmdBuf)
Prepares the buffer to be accessed from the GPU.
Definition: AcceleratorBuffer.h:926
__attribute__((noinline)) void UpdateCpuNames()
Pushes our friendly name to the underlying Vulkan objects.
Definition: AcceleratorBuffer.h:1410
bool HasGpuBuffer() const
Returns true if there is currently a GPU-side buffer.
Definition: AcceleratorBuffer.h:415
void push_front(const T &value)
Inserts a new item at the beginning of the container. This is inefficient due to copying.
Definition: AcceleratorBuffer.h:743
void CopyToGpuNonblocking(vk::raii::CommandBuffer &cmdBuf)
Copy the buffer contents from CPU to GPU without blocking on the CPU.
Definition: AcceleratorBuffer.h:998
size_t GetGpuMemoryBytes() const
Returns the total reserved GPU memory, in bytes.
Definition: AcceleratorBuffer.h:380
int m_tempFileHandle
File handle used for MEM_TYPE_CPU_PAGED.
Definition: AcceleratorBuffer.h:283
void shrink_to_fit()
Frees unused memory so that m_size == m_capacity.
Definition: AcceleratorBuffer.h:493
size_t m_capacity
Size of the allocated memory space (may be larger than m_size)
Definition: AcceleratorBuffer.h:293
void FreeGpuBuffer(bool dataLossOK=false)
Free the GPU-side buffer and underlying physical memory.
Definition: AcceleratorBuffer.h:1083
bool IsFastFromCpu(MemoryType mt)
Returns true if the given buffer type is fast to access from the CPU.
Definition: AcceleratorBuffer.h:239
void CopyToCpu()
Copy the buffer contents from GPU to CPU and blocks until the transfer completes.
Definition: AcceleratorBuffer.h:953
__attribute__((noinline)) void FreeCpuPointer(T *ptr
Frees a CPU-side buffer.
AcceleratorBuffer(const std::string &name="")
Creates a new AcceleratorBuffer with no content.
Definition: AcceleratorBuffer.h:322
MemoryAttributes
Attributes that a memory buffer can have.
Definition: AcceleratorBuffer.h:175
bool m_cpuPhysMemIsStale
True if m_cpuPtr contains stale data (m_gpuPhysMem has been modified and they point to different memo...
Definition: AcceleratorBuffer.h:276
__attribute__((noinline)) void AllocateCpuBuffer(size_t size)
Allocates a buffer for CPU access.
Definition: AcceleratorBuffer.h:1112
__attribute__((noinline)) void Reallocate(size_t size)
Reallocates the buffer so that it contains exactly size elements.
Definition: AcceleratorBuffer.h:548
UsageHint m_cpuAccessHint
Hint about how likely future CPU access is.
Definition: AcceleratorBuffer.h:310
MemoryType
Types of memory buffer.
Definition: AcceleratorBuffer.h:193
T * GetCpuPointer()
Gets a pointer to the CPU-side buffer.
Definition: AcceleratorBuffer.h:440
void SetName(std::string name)
Sets the debug name for this buffer.
Definition: AcceleratorBuffer.h:1444
void clear()
Resize the container to be empty (but don't free memory)
Definition: AcceleratorBuffer.h:478
bool IsGpuBufferStale() const
Returns true if the GPU-side buffer is stale.
Definition: AcceleratorBuffer.h:403
void deallocate(T *const p, const size_t unused) const
Free a block of memory.
Definition: AlignedAllocator.h:194
T * allocate(size_t n) const
Allocate a block of memory.
Definition: AlignedAllocator.h:159
A unit of measurement, plus conversion to pretty-printed output.
Definition: Unit.h:57
uint32_t g_vkPinnedMemoryType
Vulkan memory type for CPU-based memory that is also GPU-readable.
Definition: VulkanInit.cpp:118
std::mutex g_vkTransferMutex
Mutex for interlocking access to g_vkTransferCommandBuffer and g_vkTransferCommandPool.
Definition: VulkanInit.cpp:112
bool g_hasDebugUtils
Indicates whether the VK_EXT_debug_utils extension is available.
Definition: VulkanInit.cpp:195
std::shared_ptr< vk::raii::Device > g_vkComputeDevice
The Vulkan device selected for compute operations (may or may not be same device as rendering)
Definition: VulkanInit.cpp:71
std::unique_ptr< vk::raii::CommandBuffer > g_vkTransferCommandBuffer
Command buffer for AcceleratorBuffer transfers.
Definition: VulkanInit.cpp:89
std::shared_ptr< QueueHandle > g_vkTransferQueue
Queue for AcceleratorBuffer transfers.
Definition: VulkanInit.cpp:98
uint32_t g_vkLocalMemoryType
Vulkan memory type for GPU-based memory (generally not CPU-readable, except on unified memory systems...
Definition: VulkanInit.cpp:124
bool g_vulkanDeviceHasUnifiedMemory
Indicates whether the Vulkan device is unified memory.
Definition: VulkanInit.cpp:220