ngscopeclient v0.1
AcceleratorBuffer.h
Go to the documentation of this file.
1/***********************************************************************************************************************
2* *
3* libscopehal *
4* *
5* Copyright (c) 2012-2025 Andrew D. Zonenberg and contributors *
6* All rights reserved. *
7* *
8* Redistribution and use in source and binary forms, with or without modification, are permitted provided that the *
9* following conditions are met: *
10* *
11* * Redistributions of source code must retain the above copyright notice, this list of conditions, and the *
12* following disclaimer. *
13* *
14* * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the *
15* following disclaimer in the documentation and/or other materials provided with the distribution. *
16* *
17* * Neither the name of the author nor the names of any contributors may be used to endorse or promote products *
18* derived from this software without specific prior written permission. *
19* *
20* THIS SOFTWARE IS PROVIDED BY THE AUTHORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED *
21* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL *
22* THE AUTHORS BE HELD LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES *
23* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR *
24* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT *
25* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE *
26* POSSIBILITY OF SUCH DAMAGE. *
27* *
28***********************************************************************************************************************/
29
35#ifndef AcceleratorBuffer_h
36#define AcceleratorBuffer_h
37
38#include "AlignedAllocator.h"
39#include "QueueManager.h"
40
41#ifdef _WIN32
42#undef MemoryBarrier
43#endif
44
45#ifndef _WIN32
46#include <sys/mman.h>
47#include <unistd.h>
48#endif
49
50#include <type_traits>
51
52extern uint32_t g_vkPinnedMemoryType;
53extern uint32_t g_vkLocalMemoryType;
54extern std::shared_ptr<vk::raii::Device> g_vkComputeDevice;
55extern std::unique_ptr<vk::raii::CommandBuffer> g_vkTransferCommandBuffer;
56extern std::shared_ptr<QueueHandle> g_vkTransferQueue;
57extern std::mutex g_vkTransferMutex;
58
59extern bool g_hasDebugUtils;
61
62template<class T>
64
67{
69 Hard,
70
79 Soft
80};
81
84{
86 Host,
87
89 Device
90};
91
93typedef bool (*MemoryPressureHandler)(MemoryPressureLevel level, MemoryPressureType type, size_t requestedSize);
94
95bool OnMemoryPressure(MemoryPressureLevel level, MemoryPressureType type, size_t requestedSize);
96
97template<class T>
99{
100public:
101 using value_type = T;
102 using iterator_category = std::forward_iterator_tag;
103 using difference_type = std::ptrdiff_t;
104 using pointer = T*;
105 using reference = T&;
106
108 : m_index(i)
109 , m_buf(buf)
110 {}
111
112 T& operator*()
113 { return m_buf[m_index]; }
114
115 size_t GetIndex() const
116 { return m_index; }
117
118 bool operator!=(AcceleratorBufferIterator<T>& it)
119 {
120 //TODO: should we check m_buf equality too?
121 //Will slow things down, but be more semantically correct. Does anything care?
122 return (m_index != it.m_index);
123 }
124
125 AcceleratorBufferIterator<T>& operator++()
126 {
127 m_index ++;
128 return *this;
129 }
130
131protected:
132 size_t m_index;
134};
135
136template<class T>
137std::ptrdiff_t operator-(const AcceleratorBufferIterator<T>& a, const AcceleratorBufferIterator<T>& b)
138{ return a.GetIndex() - b.GetIndex(); }
139
156template<class T>
158{
159protected:
160
162 // Allocator for CPU-only memory
163
164 AlignedAllocator<T, 32> m_cpuAllocator;
165
166public:
167
169 // Buffer types
170
175 {
176 //Location of the memory
177 MEM_ATTRIB_CPU_SIDE = 0x1,
178 MEM_ATTRIB_GPU_SIDE = 0x2,
179
180 //Reachability
181 MEM_ATTRIB_CPU_REACHABLE = 0x4,
182 MEM_ATTRIB_GPU_REACHABLE = 0x8,
183
184 //Speed
185 MEM_ATTRIB_CPU_FAST = 0x10,
186 MEM_ATTRIB_GPU_FAST = 0x20
187 };
188
193 {
194 //Pointer is invalid
195 MEM_TYPE_NULL = 0,
196
197 //Memory is located on the CPU but backed by a file and may get paged out
198 MEM_TYPE_CPU_PAGED =
199 MEM_ATTRIB_CPU_SIDE | MEM_ATTRIB_CPU_REACHABLE,
200
201 //Memory is located on the CPU but not pinned, or otherwise accessible to the GPU
202 MEM_TYPE_CPU_ONLY =
203 MEM_ATTRIB_CPU_SIDE | MEM_ATTRIB_CPU_REACHABLE | MEM_ATTRIB_CPU_FAST,
204
205 //Memory is located on the CPU, but can be accessed by the GPU.
206 //Fast to access from the CPU, but accesses from the GPU require PCIe DMA and is slow
207 //(unless platform uses unified memory, in which case g_vulkanDeviceHasUnifiedMemory will be true)
208 MEM_TYPE_CPU_DMA_CAPABLE =
209 MEM_ATTRIB_CPU_SIDE | MEM_ATTRIB_CPU_REACHABLE | MEM_ATTRIB_CPU_FAST | MEM_ATTRIB_GPU_REACHABLE,
210
211 //Memory is located on the GPU and cannot be directly accessed by the CPU
212 MEM_TYPE_GPU_ONLY =
213 MEM_ATTRIB_GPU_SIDE | MEM_ATTRIB_GPU_REACHABLE | MEM_ATTRIB_GPU_FAST,
214
215 //Memory is located on the GPU, but can be accessed by the CPU.
216 //Fast to access from the GPU, but accesses from the CPU require PCIe DMA and is slow
217 //(should not be used if platform uses unified memory, in which case g_vulkanDeviceHasUnifiedMemory will be true)
218 MEM_TYPE_GPU_DMA_CAPABLE =
219 MEM_ATTRIB_GPU_SIDE | MEM_ATTRIB_GPU_REACHABLE | MEM_ATTRIB_GPU_FAST | MEM_ATTRIB_CPU_REACHABLE
220 };
221
222protected:
223
228 { return (mt & MEM_ATTRIB_CPU_REACHABLE) != 0; }
229
234 { return (mt & MEM_ATTRIB_GPU_REACHABLE) != 0; }
235
240 { return (mt & MEM_ATTRIB_CPU_FAST) != 0; }
241
246 { return (mt & MEM_ATTRIB_GPU_FAST) != 0; }
247
250
253
255 // The actual memory buffers
256
259
261 std::unique_ptr<vk::raii::DeviceMemory> m_cpuPhysMem;
262
264 std::unique_ptr<vk::raii::DeviceMemory> m_gpuPhysMem;
265
267 std::unique_ptr<vk::raii::Buffer> m_cpuBuffer;
268
270 std::unique_ptr<vk::raii::Buffer> m_gpuBuffer;
271
274
277
280
282#ifndef _WIN32
284#endif
285
287 // Iteration
288
290 // Sizes of buffers
291
294
296 size_t m_size;
297
299 // Hint configuration
300public:
301 enum UsageHint
302 {
303 HINT_NEVER,
304 HINT_UNLIKELY,
305 HINT_LIKELY
306 };
307
308protected:
311
314
316 // Construction / destruction
317public:
318
322 AcceleratorBuffer(const std::string& name = "")
323 : m_cpuMemoryType(MEM_TYPE_NULL)
324 , m_gpuMemoryType(MEM_TYPE_NULL)
325 , m_cpuPtr(nullptr)
326 , m_gpuPhysMem(nullptr)
327 , m_buffersAreSame(false)
328 , m_cpuPhysMemIsStale(false)
329 , m_gpuPhysMemIsStale(false)
330 #ifndef _WIN32
332 #endif
333 , m_capacity(0)
334 , m_size(0)
335 , m_cpuAccessHint(HINT_LIKELY) //default access hint: CPU-side pinned memory
336 , m_gpuAccessHint(HINT_UNLIKELY)
337 , m_name(name)
338 {
339 //non-trivially-copyable types can't be copied to GPU except on unified memory platforms
340 if(!std::is_trivially_copyable<T>::value && !g_vulkanDeviceHasUnifiedMemory)
341 m_gpuAccessHint = HINT_NEVER;
342 }
343
345 {
347 FreeGpuBuffer(true);
348 }
349
351 // General accessors
352public:
353
357 size_t size() const
358 { return m_size; }
359
363 size_t capacity() const
364 { return m_capacity; }
365
369 size_t GetCpuMemoryBytes() const
370 {
371 if(m_cpuMemoryType == MEM_TYPE_NULL)
372 return 0;
373 else
374 return m_capacity * sizeof(T);
375 }
376
380 size_t GetGpuMemoryBytes() const
381 {
382 if(m_gpuMemoryType == MEM_TYPE_NULL)
383 return 0;
384 else
385 return m_capacity * sizeof(T);
386 }
387
391 bool empty() const
392 { return (m_size == 0); }
393
397 bool IsCpuBufferStale() const
398 { return m_cpuPhysMemIsStale; }
399
403 bool IsGpuBufferStale() const
404 { return m_gpuPhysMemIsStale; }
405
409 bool HasCpuBuffer() const
410 { return (m_cpuPtr != nullptr); }
411
415 bool HasGpuBuffer() const
416 { return (m_gpuPhysMem != nullptr); }
417
422 { return m_buffersAreSame; }
423
429 vk::Buffer GetBuffer()
430 {
431 if(m_gpuBuffer != nullptr)
432 return **m_gpuBuffer;
433 else
434 return **m_cpuBuffer;
435 }
436
441 { return m_cpuPtr; }
442
446 vk::DescriptorBufferInfo GetBufferInfo()
447 {
448 return vk::DescriptorBufferInfo(
449 GetBuffer(),
450 0,
451 m_size * sizeof(T));
452 }
453
457 void resize(size_t size)
458 {
459 //Need to grow?
460 if(size > m_capacity)
461 {
462 //Default to doubling in size each time to avoid excessive copying.
463 if(m_capacity == 0)
464 reserve(size);
465 else if(size > m_capacity*2)
466 reserve(size);
467 else
468 reserve(m_capacity * 2);
469 }
470
471 //Update our size
472 m_size = size;
473 }
474
478 void clear()
479 { resize(0); }
480
484 void reserve(size_t size)
485 {
486 if(size > m_capacity)
487 Reallocate(size);
488 }
489
494 {
495 if(m_size != m_capacity)
496 Reallocate(m_size);
497 }
498
502 __attribute__((noinline))
503 void CopyFrom(const AcceleratorBuffer<T>& rhs)
504 {
505 //Copy placement hints from the other instance, then resize to match
508 resize(rhs.m_size);
509
510 //Valid data CPU side? Copy it to here
511 if(rhs.HasCpuBuffer() && !rhs.m_cpuPhysMemIsStale)
512 {
513 //non-trivially-copyable types have to be copied one at a time
514 if(!std::is_trivially_copyable<T>::value)
515 {
516 for(size_t i=0; i<m_size; i++)
517 m_cpuPtr[i] = rhs.m_cpuPtr[i];
518 }
519
520 //Trivially copyable types can be done more efficiently in a block
521 else
522 memcpy(m_cpuPtr, rhs.m_cpuPtr, m_size * sizeof(T));
523 }
525
526 //Valid data GPU side? Copy it to here
527 if(rhs.HasGpuBuffer() && !rhs.m_gpuPhysMemIsStale)
528 {
529 std::lock_guard<std::mutex> lock(g_vkTransferMutex);
530
531 //Make the transfer request
532 g_vkTransferCommandBuffer->begin({});
533 vk::BufferCopy region(0, 0, m_size * sizeof(T));
534 g_vkTransferCommandBuffer->copyBuffer(**rhs.m_gpuBuffer, **m_gpuBuffer, {region});
536
537 //Submit the request and block until it completes
539 }
541 }
542
543protected:
544
548 __attribute__((noinline))
549 void Reallocate(size_t size)
550 {
551 if(size == 0)
552 return;
553
554 /*
555 If we are a bool[] or similar one-byte type, we are likely going to be accessed from the GPU via a uint32
556 descriptor for at least some shaders (such as rendering).
557
558 Round our actual allocated size to the next multiple of 4 bytes. The padding values are unimportant as the
559 bytes are never written, and the data read from the high bytes in the uint32 is discarded by the GPU.
560 We just need to ensure the memory is allocated so the 32-bit read is legal to perform.
561 */
562 if( (sizeof(T) == 1) && (m_gpuAccessHint != HINT_NEVER) )
563 {
564 if(size & 3)
565 size = (size | 3) + 1;
566 }
567
568 //If we do not anticipate using the data on the CPU, we shouldn't waste RAM.
569 //Allocate a GPU-local buffer, copy data to it, then free the CPU-side buffer
570 //Don't do this if the platform has unified memory
571 if( (m_cpuAccessHint == HINT_NEVER) && !g_vulkanDeviceHasUnifiedMemory)
572 {
575 }
576
577 else
578 {
579 //Resize CPU memory
580 //TODO: optimization, when expanding a MEM_TYPE_CPU_PAGED we can just enlarge the file
581 //and not have to make a new temp file and copy the content
582 if(m_cpuPtr != nullptr)
583 {
584 //Save the old pointer
585 auto pOld = m_cpuPtr;
586 auto pOldPin = std::move(m_cpuPhysMem);
587 auto type = m_cpuMemoryType;
588
589 //Allocate the new buffer
590 AllocateCpuBuffer(size);
591
592 //If CPU-side data is valid, copy existing data over.
593 //New pointer is still valid in this case.
595 {
596 //non-trivially-copyable types have to be copied one at a time
597 if(!std::is_trivially_copyable<T>::value)
598 {
599 for(size_t i=0; i<m_size; i++)
600 m_cpuPtr[i] = std::move(pOld[i]);
601 }
602
603 //Trivially copyable types can be done more efficiently in a block
604 //gcc warns about this even though we only call this code if the type is trivially copyable,
605 //so disable the warning.
606 else
607 {
608 #pragma GCC diagnostic push
609 #pragma GCC diagnostic ignored "-Wclass-memaccess"
610
611 memcpy(m_cpuPtr, pOld, m_size * sizeof(T));
612
613 #pragma GCC diagnostic pop
614 }
615 }
616
617 //If CPU-side data is stale, just allocate the new buffer but leave it as stale
618 //(don't do a potentially unnecessary copy from the GPU)
619
620 //Now we're done with the old pointer so get rid of it
621 FreeCpuPointer(pOld, pOldPin, type, m_capacity);
622 }
623
624 //Allocate new CPU memory, replacing our current (null) pointer
625 else
626 {
627 AllocateCpuBuffer(size);
628
629 //If we already had GPU-side memory containing data, then the new CPU-side buffer is stale
630 //until we copy stuff over to it
631 if(m_gpuPhysMem != nullptr)
632 m_cpuPhysMemIsStale = true;
633 }
634 }
635
636 //We're expecting to use data on the GPU, so prepare to do stuff with it
637 if(m_gpuAccessHint != HINT_NEVER)
638 {
639 //If GPU access is unlikely, we probably want to just use pinned memory.
640 //If available, mark buffers as the same, and free any existing GPU buffer we might have
641 //Always use pinned memory if the platform has unified memory
642 if( ((m_gpuAccessHint == HINT_UNLIKELY) && (m_cpuMemoryType == MEM_TYPE_CPU_DMA_CAPABLE)) || g_vulkanDeviceHasUnifiedMemory )
644
645 //Nope, we need to allocate dedicated GPU memory
646 else
647 {
648 //If we have an existing buffer with valid content, save it and copy content over
649 if( (m_gpuPhysMem != nullptr) && !m_gpuPhysMemIsStale && (m_size != 0))
650 {
651 auto pOld = std::move(m_gpuPhysMem);
652 //auto type = m_gpuMemoryType;
653 auto bOld = std::move(m_gpuBuffer);
654
655 //Allocation successful!
656 if(AllocateGpuBuffer(size))
657 {
658 std::lock_guard<std::mutex> lock(g_vkTransferMutex);
659
660 //Make the transfer request
661 g_vkTransferCommandBuffer->begin({});
662 vk::BufferCopy region(0, 0, m_size * sizeof(T));
663 g_vkTransferCommandBuffer->copyBuffer(**bOld, **m_gpuBuffer, {region});
665
666 //Submit the request and block until it completes
668
669 //make sure buffer is freed before underlying physical memory (pOld) goes out of scope
670 bOld = nullptr;
671 }
672
673 //Allocation failed!
674 else
675 {
676 //Revert to the old buffer. We're now in a consistent state again
677 m_gpuPhysMem = std::move(pOld);
678 m_gpuBuffer = std::move(bOld);
679
680 //Make sure we have a CPU side buffer that's DMA capable
681 if(m_cpuMemoryType != MEM_TYPE_CPU_DMA_CAPABLE)
682 {
683 SetCpuAccessHint(HINT_LIKELY);
684 SetGpuAccessHint(HINT_LIKELY);
685 AllocateCpuBuffer(size);
686 }
687
688 //Free the GPU buffer, moving its contents to the CPU
690 }
691 }
692
693 //Nope, just allocate a new buffer
694 else
695 {
696 //Allocation successful? We now have the buffer
697 if(AllocateGpuBuffer(size))
698 {
699 //If we already had CPU-side memory containing data, then the new GPU-side buffer is stale
700 //until we copy stuff over to it.
701 //Special case: if m_size is 0 (newly allocated buffer) we're not stale yet
702 if( (m_cpuPhysMem != nullptr) && (m_size != 0) )
703 m_gpuPhysMemIsStale = true;
704 }
705
706 //Allocation failed? No change, we already had the CPU buffer and don't have to touch anything
707 //But did the CPU buffer exist? if not, allocate *something*
708 else if(m_cpuPhysMem == nullptr)
709 {
710 SetCpuAccessHint(HINT_LIKELY);
711 SetGpuAccessHint(HINT_LIKELY);
712 AllocateCpuBuffer(size);
713 }
714 }
715 }
716 }
717
718 //Existing GPU buffer we never expect to use again - needs to be freed
719 else if(m_gpuPhysMem != nullptr)
721
722 //We are never going to use the buffer on the GPU, but don't have any existing GPU memory
723 //so no action required
724 else
725 {
726 }
727
728 //Update our capacity
729 m_capacity = size;
730
731 //If we have a pinned buffer and nothing on the other side, there's a single shared physical memory region
733 ( (m_cpuMemoryType == MEM_TYPE_CPU_DMA_CAPABLE) && (m_gpuMemoryType == MEM_TYPE_NULL) ) ||
734 ( (m_cpuMemoryType == MEM_TYPE_NULL) && (m_gpuMemoryType == MEM_TYPE_GPU_DMA_CAPABLE) );
735 }
736
738 // CPU-side STL-esque container API
739
740 //PrepareForCpuAccess() *must* be called prior to calling any of these methods.
741public:
742
743 const T& operator[](size_t i) const
744 { return m_cpuPtr[i]; }
745
746 T& operator[](size_t i)
747 { return m_cpuPtr[i]; }
748
752 void push_back(const T& value)
753 {
754 size_t cursize = m_size;
755 resize(m_size + 1);
756 m_cpuPtr[cursize] = value;
757
759 }
760
764 void push_back_nomarkmod(const T& value)
765 {
766 size_t cursize = m_size;
767 resize(m_size + 1);
768 m_cpuPtr[cursize] = value;
769 }
770
774 void pop_back()
775 {
776 if(!empty())
777 resize(m_size - 1);
778 }
779
785 void push_front(const T& value)
786 {
787 size_t cursize = m_size;
788 resize(m_size + 1);
789
791
792 //non-trivially-copyable types have to be copied one at a time
793 if(!std::is_trivially_copyable<T>::value)
794 {
795 for(size_t i=0; i<cursize; i++)
796 m_cpuPtr[i+1] = std::move(m_cpuPtr[i]);
797 }
798
799 //Trivially copyable types can be done more efficiently in a block
800 else
801 memmove(m_cpuPtr+1, m_cpuPtr, sizeof(T) * (cursize));
802
803 //Insert the new first element
804 m_cpuPtr[0] = value;
805
807 }
808
815 {
816 //No need to move data if popping last element
817 if(m_size == 1)
818 {
819 clear();
820 return;
821 }
822
823 //Don't touch GPU side buffer
824
826
827 //non-trivially-copyable types have to be copied one at a time
828 if(!std::is_trivially_copyable<T>::value)
829 {
830 for(size_t i=0; i<m_size-1; i++)
831 m_cpuPtr[i] = std::move(m_cpuPtr[i+1]);
832 }
833
834 //Trivially copyable types can be done more efficiently in a block
835 else
836 memmove(m_cpuPtr, m_cpuPtr+1, sizeof(T) * (m_size-1));
837
838 resize(m_size - 1);
839
841 }
842
844 { return AcceleratorBufferIterator<T>(*this, 0); }
845
847 { return AcceleratorBufferIterator<T>(*this, m_size); }
848
850 // Hints about near-future usage patterns
851
852public:
853
860 void SetCpuAccessHint(UsageHint hint, bool reallocateImmediately = false)
861 {
862 m_cpuAccessHint = hint;
863
864 if(reallocateImmediately && (m_size != 0))
865 Reallocate(m_size);
866 }
867
874 void SetGpuAccessHint(UsageHint hint, bool reallocateImmediately = false)
875 {
876 //Only trivially copyable datatypes are allowed on the GPU
877 if(!std::is_trivially_copyable<T>::value)
878 hint = HINT_NEVER;
879
880 m_gpuAccessHint = hint;
881
882 if(reallocateImmediately && (m_size != 0))
883 Reallocate(m_size);
884 }
885
887 // Cache invalidation
888
895 {
897 m_gpuPhysMemIsStale = true;
898 }
899
906 {
908 m_cpuPhysMemIsStale = true;
909 }
910
912 // Preparation for access
913
920 {
921 //Early out if no content
922 if(m_size == 0)
923 return;
924
925 //If there's no buffer at all on the CPU, allocate one
926 if(!HasCpuBuffer() && (m_gpuMemoryType != MEM_TYPE_GPU_DMA_CAPABLE))
927 AllocateCpuBuffer(m_capacity);
928
930 CopyToCpu();
931 }
932
941 void PrepareForGpuAccess(bool outputOnly = false)
942 {
943 //Early out if no content or if unified memory
945 return;
946
947 //If our current hint has no GPU access at all, update to say "unlikely" and reallocate
948 if(m_gpuAccessHint == HINT_NEVER)
949 SetGpuAccessHint(HINT_UNLIKELY, true);
950
951 //If we don't have a buffer, allocate one unless our CPU buffer is pinned and GPU-readable
952 if(!HasGpuBuffer() && (m_cpuMemoryType != MEM_TYPE_CPU_DMA_CAPABLE) )
953 {
954 if(!AllocateGpuBuffer(m_capacity))
955 return;
956 }
957
958 //Make sure the GPU-side buffer is up to date
959 if(m_gpuPhysMemIsStale && !outputOnly)
960 CopyToGpu();
961 }
962
971 void PrepareForGpuAccessNonblocking(bool outputOnly, vk::raii::CommandBuffer& cmdBuf)
972 {
973 //Early out if no content or if unified memory
975 return;
976
977 //If our current hint has no GPU access at all, update to say "unlikely" and reallocate
978 if(m_gpuAccessHint == HINT_NEVER)
979 SetGpuAccessHint(HINT_UNLIKELY, true);
980
981 //If we don't have a buffer, allocate one unless our CPU buffer is pinned and GPU-readable
982 if(!HasGpuBuffer() && (m_cpuMemoryType != MEM_TYPE_CPU_DMA_CAPABLE) )
983 {
984 if(!AllocateGpuBuffer(m_capacity))
985 return;
986 }
987
988 //Make sure the GPU-side buffer is up to date
989 if(m_gpuPhysMemIsStale && !outputOnly)
990 CopyToGpuNonblocking(cmdBuf);
991 }
992
993protected:
994
996 // Copying of buffer content
997
1002 {
1003 assert(std::is_trivially_copyable<T>::value);
1004
1005 std::lock_guard<std::mutex> lock(g_vkTransferMutex);
1006
1007 //Make the transfer request
1008 g_vkTransferCommandBuffer->begin({});
1009 vk::BufferCopy region(0, 0, m_size * sizeof(T));
1010 g_vkTransferCommandBuffer->copyBuffer(**m_gpuBuffer, **m_cpuBuffer, {region});
1012
1013 //Submit the request and block until it completes
1015
1016 m_cpuPhysMemIsStale = false;
1017 }
1018
1023 {
1024 assert(std::is_trivially_copyable<T>::value);
1025
1026 std::lock_guard<std::mutex> lock(g_vkTransferMutex);
1027
1028 //Make the transfer request
1029 g_vkTransferCommandBuffer->begin({});
1030 vk::BufferCopy region(0, 0, m_size * sizeof(T));
1031 g_vkTransferCommandBuffer->copyBuffer(**m_cpuBuffer, **m_gpuBuffer, {region});
1033
1034 //Submit the request and block until it completes
1036
1037 m_gpuPhysMemIsStale = false;
1038 }
1039
1040
1046 void CopyToGpuNonblocking(vk::raii::CommandBuffer& cmdBuf)
1047 {
1048 assert(std::is_trivially_copyable<T>::value);
1049
1050 //Make the transfer request
1051 vk::BufferCopy region(0, 0, m_size * sizeof(T));
1052 cmdBuf.copyBuffer(**m_cpuBuffer, **m_gpuBuffer, {region});
1053
1054 //Add the barrier
1055 cmdBuf.pipelineBarrier(
1056 vk::PipelineStageFlagBits::eTransfer,
1057 vk::PipelineStageFlagBits::eComputeShader,
1058 {},
1059 vk::MemoryBarrier(
1060 vk::AccessFlagBits::eTransferWrite,
1061 vk::AccessFlagBits::eShaderRead | vk::AccessFlagBits::eShaderWrite),
1062 {},
1063 {});
1064
1065 m_gpuPhysMemIsStale = false;
1066 }
1067public:
1071 static void HostToDeviceTransferMemoryBarrier(vk::raii::CommandBuffer& cmdBuf)
1072 {
1073 cmdBuf.pipelineBarrier(
1074 vk::PipelineStageFlagBits::eTransfer,
1075 vk::PipelineStageFlagBits::eComputeShader,
1076 {},
1077 vk::MemoryBarrier(
1078 vk::AccessFlagBits::eTransferWrite,
1079 vk::AccessFlagBits::eShaderRead | vk::AccessFlagBits::eShaderWrite),
1080 {},
1081 {});
1082 }
1083
1084
1085protected:
1086
1088 // Cleanup
1089
1094 {
1095 //Early out if buffer is already null
1096 if(m_cpuPtr == nullptr)
1097 return;
1098
1099 //We have a buffer on the GPU.
1100 //If it's stale, need to push our updated content there before freeing the CPU-side copy
1101 if( (m_gpuMemoryType != MEM_TYPE_NULL) && m_gpuPhysMemIsStale && !empty())
1102 CopyToGpu();
1103
1104 //Free the Vulkan buffer object
1105 m_cpuBuffer = nullptr;
1106
1107 //Free the buffer and unmap any memory
1109
1110 //Mark CPU-side buffer as empty
1111 m_cpuPtr = nullptr;
1112 m_cpuPhysMem = nullptr;
1113 m_cpuMemoryType = MEM_TYPE_NULL;
1114 m_buffersAreSame = false;
1115
1116 //If we have no GPU-side buffer either, we're empty
1117 if(m_gpuMemoryType == MEM_TYPE_NULL)
1118 {
1119 m_size = 0;
1120 m_capacity = 0;
1121 }
1122 }
1123
1124public:
1131 void FreeGpuBuffer(bool dataLossOK = false)
1132 {
1133 //Early out if buffer is already null
1134 if(m_gpuPhysMem == nullptr)
1135 return;
1136
1137 //If we do NOT have a CPU-side buffer, we're deleting all of our data! Warn for now
1138 if( (m_cpuMemoryType == MEM_TYPE_NULL) && m_gpuPhysMemIsStale && !empty() && !dataLossOK)
1139 {
1140 LogWarning("Freeing a GPU buffer without any CPU backing, may cause data loss\n");
1141 }
1142
1143 //If we have a CPU-side buffer, and it's stale, move our about-to-be-deleted content over before we free it
1144 if( (m_cpuMemoryType != MEM_TYPE_NULL) && m_cpuPhysMemIsStale && !empty() )
1145 CopyToCpu();
1146
1147 m_gpuBuffer = nullptr;
1148 m_gpuPhysMem = nullptr;
1149 m_gpuMemoryType = MEM_TYPE_NULL;
1150 }
1151
1152protected:
1153
1155 // Allocation
1156
1160 __attribute__((noinline))
1161 void AllocateCpuBuffer(size_t size)
1162 {
1163 if(size == 0)
1164 LogFatal("AllocateCpuBuffer with size zero (invalid)\n");
1165
1166 //If any GPU access is expected, use pinned memory so we don't have to move things around
1167 if(m_gpuAccessHint != HINT_NEVER)
1168 {
1169 //Make a Vulkan buffer first
1170 vk::BufferCreateInfo bufinfo(
1171 {},
1172 size * sizeof(T),
1173 vk::BufferUsageFlagBits::eTransferSrc |
1174 vk::BufferUsageFlagBits::eTransferDst |
1175 vk::BufferUsageFlagBits::eStorageBuffer);
1176 m_cpuBuffer = std::make_unique<vk::raii::Buffer>(*g_vkComputeDevice, bufinfo);
1177
1178 //Figure out actual memory requirements of the buffer
1179 //(may be rounded up from what we asked for)
1180 auto req = m_cpuBuffer->getMemoryRequirements();
1181
1182 //Allocate the physical memory to back the buffer
1183 vk::MemoryAllocateInfo info(req.size, g_vkPinnedMemoryType);
1184 m_cpuPhysMem = std::make_unique<vk::raii::DeviceMemory>(*g_vkComputeDevice, info);
1185
1186 //Map it and bind to the buffer
1187 m_cpuPtr = reinterpret_cast<T*>(m_cpuPhysMem->mapMemory(0, req.size));
1188 m_cpuBuffer->bindMemory(**m_cpuPhysMem, 0);
1189
1190 //We now have pinned memory
1191 m_cpuMemoryType = MEM_TYPE_CPU_DMA_CAPABLE;
1192
1193 if(g_hasDebugUtils)
1194 UpdateCpuNames();
1195 }
1196
1197 //If frequent CPU access is expected, use normal host memory
1198 else if(m_cpuAccessHint == HINT_LIKELY)
1199 {
1200 m_cpuBuffer = nullptr;
1201 m_cpuMemoryType = MEM_TYPE_CPU_ONLY;
1202 m_cpuPtr = m_cpuAllocator.allocate(size);
1203 }
1204
1205 //If infrequent CPU access is expected, use a memory mapped temporary file so it can be paged out to disk
1206 else
1207 {
1208 #ifdef _WIN32
1209
1210 //On Windows, use normal memory for now
1211 //until we figure out how to do this there
1212 m_cpuBuffer = nullptr;
1213 m_cpuMemoryType = MEM_TYPE_CPU_ONLY;
1214 m_cpuPtr = m_cpuAllocator.allocate(size);
1215
1216 #else
1217
1218 m_cpuBuffer = nullptr;
1219 m_cpuMemoryType = MEM_TYPE_CPU_PAGED;
1220
1221 //Make the temp file
1222 char fname[] = "/tmp/glscopeclient-tmpXXXXXX";
1223 m_tempFileHandle = mkstemp(fname);
1224 if(m_tempFileHandle < 0)
1225 {
1226 LogError("Failed to create temporary file %s\n", fname);
1227 abort();
1228 }
1229
1230 //Resize it to our desired file size
1231 size_t bytesize = size * sizeof(T);
1232 if(0 != ftruncate(m_tempFileHandle, bytesize))
1233 {
1234 LogError("Failed to resize temporary file %s\n", fname);
1235 abort();
1236 }
1237
1238 //Map it
1239 m_cpuPtr = reinterpret_cast<T*>(mmap(
1240 nullptr,
1241 bytesize,
1242 PROT_READ | PROT_WRITE,
1243 MAP_SHARED/* | MAP_UNINITIALIZED*/,
1245 0));
1246 if(m_cpuPtr == MAP_FAILED)
1247 {
1248 LogError("Failed to map temporary file %s\n", fname);
1249 perror("mmap failed: ");
1250 abort();
1251 }
1252 m_cpuMemoryType = MEM_TYPE_CPU_PAGED;
1253
1254 //Delete it (file will be removed by the OS after our active handle is closed)
1255 if(0 != unlink(fname))
1256 LogWarning("Failed to unlink temporary file %s, file will remain after application terminates\n", fname);
1257
1258 #endif
1259 }
1260
1261 //Memory has been allocated. Call constructors iff type is not trivially copyable
1262 //(This is not exactly 1:1 with having a constructor, but hopefully good enough?)
1263 if(!std::is_trivially_copyable<T>::value)
1264 {
1265 for(size_t i=0; i<size; i++)
1266 new(m_cpuPtr +i) T;
1267 }
1268 }
1269
1277 __attribute__((noinline))
1278 void FreeCpuPointer(T* ptr, MemoryType type, size_t size)
1279 {
1280 //Call destructors iff type is not trivially copyable
1281 if(!std::is_trivially_copyable<T>::value)
1282 {
1283 for(size_t i=0; i<size; i++)
1284 ptr[i].~T();
1285 }
1286
1287 switch(type)
1288 {
1289 case MEM_TYPE_NULL:
1290 //legal no-op
1291 break;
1292
1293 case MEM_TYPE_CPU_DMA_CAPABLE:
1294 LogFatal("FreeCpuPointer for MEM_TYPE_CPU_DMA_CAPABLE requires the vk::raii::DeviceMemory\n");
1295 break;
1296
1297 case MEM_TYPE_CPU_PAGED:
1298 #ifndef _WIN32
1299 munmap(ptr, size * sizeof(T));
1300 close(m_tempFileHandle);
1301 m_tempFileHandle = -1;
1302 #endif
1303 break;
1304
1305 case MEM_TYPE_CPU_ONLY:
1306 m_cpuAllocator.deallocate(ptr, size);
1307 break;
1308
1309 default:
1310 LogFatal("FreeCpuPointer: invalid type %x\n", type);
1311 }
1312 }
1313
1321 __attribute__((noinline))
1322 void FreeCpuPointer(T* ptr, std::unique_ptr<vk::raii::DeviceMemory>& buf, MemoryType type, size_t size)
1323 {
1324 switch(type)
1325 {
1326 case MEM_TYPE_CPU_DMA_CAPABLE:
1327 buf->unmapMemory();
1328 break;
1329
1330 default:
1331 FreeCpuPointer(ptr, type, size);
1332 }
1333 }
1334
1340 __attribute__((noinline))
1341 bool AllocateGpuBuffer(size_t size)
1342 {
1343 assert(std::is_trivially_copyable<T>::value);
1344
1345 //Make a Vulkan buffer first
1346 vk::BufferCreateInfo bufinfo(
1347 {},
1348 size * sizeof(T),
1349 vk::BufferUsageFlagBits::eTransferSrc |
1350 vk::BufferUsageFlagBits::eTransferDst |
1351 vk::BufferUsageFlagBits::eStorageBuffer);
1352 m_gpuBuffer = std::make_unique<vk::raii::Buffer>(*g_vkComputeDevice, bufinfo);
1353
1354 //Figure out actual memory requirements of the buffer
1355 //(may be rounded up from what we asked for)
1356 auto req = m_gpuBuffer->getMemoryRequirements();
1357
1358 //Try to allocate the memory
1359 vk::MemoryAllocateInfo info(req.size, g_vkLocalMemoryType);
1360 try
1361 {
1362 //For now, always use local memory
1363 m_gpuPhysMem = std::make_unique<vk::raii::DeviceMemory>(*g_vkComputeDevice, info);
1364 }
1365
1366 //Fallback path in case of low memory
1367 catch(vk::OutOfDeviceMemoryError& ex)
1368 {
1369 bool ok = false;
1370 while(!ok)
1371 {
1372 //Attempt to free memory and stop if we couldn't free more
1374 break;
1375
1377 try
1378 {
1379 m_gpuPhysMem = std::make_unique<vk::raii::DeviceMemory>(*g_vkComputeDevice, info);
1380 ok = true;
1381 }
1382 catch(vk::OutOfDeviceMemoryError& ex2)
1383 {
1384 LogDebug("Allocation failed again\n");
1385 }
1386 }
1387
1388 //Retry one more time.
1389 //If we OOM simultaneously in two threads, it's possible to have the second OnMemoryPressure call
1390 //return false because the first one already freed all it could. But we might have enough free to continue.
1391 if(!ok)
1392 {
1393 LogDebug("Final retry\n");
1394 try
1395 {
1396 m_gpuPhysMem = std::make_unique<vk::raii::DeviceMemory>(*g_vkComputeDevice, info);
1397 ok = true;
1398 }
1399 catch(vk::OutOfDeviceMemoryError& ex2)
1400 {
1401 LogDebug("Allocation failed again\n");
1402 }
1403 }
1404
1405 //If we get here, we couldn't allocate no matter what
1406 //Fall back to a CPU-side allocation
1407 if(!ok)
1408 {
1409 LogError(
1410 "Failed to allocate %s of GPU memory despite our best efforts to reclaim space, falling back to CPU-side pinned allocation\n",
1411 Unit(Unit::UNIT_BYTES).PrettyPrint(req.size, 4).c_str());
1412 m_gpuMemoryType = MEM_TYPE_NULL;
1413 m_gpuPhysMem = nullptr;
1414 m_gpuBuffer = nullptr;
1415 return false;
1416 }
1417 }
1418 m_gpuMemoryType = MEM_TYPE_GPU_ONLY;
1419
1420 m_gpuBuffer->bindMemory(**m_gpuPhysMem, 0);
1421
1422 if(g_hasDebugUtils)
1423 UpdateGpuNames();
1424
1425 return true;
1426 }
1427
1428protected:
1429
1431 std::string m_name;
1432
1436 __attribute__((noinline))
1437 void UpdateGpuNames()
1438 {
1439 std::string sname = m_name;
1440 if(sname.empty())
1441 sname = "unnamed";
1442 std::string prefix = std::string("AcceleratorBuffer.") + sname + ".";
1443
1444 std::string gpuBufName = prefix + "m_gpuBuffer";
1445 std::string gpuPhysName = prefix + "m_gpuPhysMem";
1446
1447 g_vkComputeDevice->setDebugUtilsObjectNameEXT(
1448 vk::DebugUtilsObjectNameInfoEXT(
1449 vk::ObjectType::eBuffer,
1450 reinterpret_cast<uint64_t>(static_cast<VkBuffer>(**m_gpuBuffer)),
1451 gpuBufName.c_str()));
1452
1453 g_vkComputeDevice->setDebugUtilsObjectNameEXT(
1454 vk::DebugUtilsObjectNameInfoEXT(
1455 vk::ObjectType::eDeviceMemory,
1456 reinterpret_cast<uint64_t>(static_cast<VkDeviceMemory>(**m_gpuPhysMem)),
1457 gpuPhysName.c_str()));
1458 }
1459
1463 __attribute__((noinline))
1464 void UpdateCpuNames()
1465 {
1466 std::string sname = m_name;
1467 if(sname.empty())
1468 sname = "unnamed";
1469 std::string prefix = std::string("AcceleratorBuffer.") + sname + ".";
1470
1471 std::string cpuBufName = prefix + "m_cpuBuffer";
1472 std::string cpuPhysName = prefix + "m_cpuPhysMem";
1473
1474 g_vkComputeDevice->setDebugUtilsObjectNameEXT(
1475 vk::DebugUtilsObjectNameInfoEXT(
1476 vk::ObjectType::eBuffer,
1477 reinterpret_cast<uint64_t>(static_cast<VkBuffer>(**m_cpuBuffer)),
1478 cpuBufName.c_str()));
1479
1480 g_vkComputeDevice->setDebugUtilsObjectNameEXT(
1481 vk::DebugUtilsObjectNameInfoEXT(
1482 vk::ObjectType::eDeviceMemory,
1483 reinterpret_cast<uint64_t>(static_cast<VkDeviceMemory>(**m_cpuPhysMem)),
1484 cpuPhysName.c_str()));
1485 }
1486
1487public:
1488
1497 void SetName(std::string name)
1498 {
1499 m_name = name;
1500 if(g_hasDebugUtils)
1501 {
1502 if(m_gpuBuffer != nullptr)
1503 UpdateGpuNames();
1504 if(m_cpuBuffer != nullptr)
1505 UpdateCpuNames();
1506 }
1507 }
1508
1509};
1510
1511extern std::set<MemoryPressureHandler> g_memoryPressureHandlers;
1512
1513#endif
bool(* MemoryPressureHandler)(MemoryPressureLevel level, MemoryPressureType type, size_t requestedSize)
Memory pressure handler type, called when free memory reaches a warning level or a Vulkan allocation ...
Definition: AcceleratorBuffer.h:93
MemoryPressureLevel
Levels of memory pressure.
Definition: AcceleratorBuffer.h:67
@ Hard
A memory allocation has failed and we need to free memory immediately to continue execution.
@ Soft
Free memory has reached a warning threshold.
bool OnMemoryPressure(MemoryPressureLevel level, MemoryPressureType type, size_t requestedSize)
Called when we run low on memory.
Definition: scopehal.cpp:1038
MemoryPressureType
Types of memory pressure.
Definition: AcceleratorBuffer.h:84
@ Host
Pinned CPU-side memory.
@ Device
GPU-side memory.
std::set< MemoryPressureHandler > g_memoryPressureHandlers
List of handlers for low memory registered by various subsystems.
Definition: scopehal.cpp:147
Declaration of AlignedAllocator.
Declaration of QueueManager and QueueHandle.
Definition: AcceleratorBuffer.h:99
A buffer of memory which may be used by GPU acceleration.
Definition: AcceleratorBuffer.h:158
void CopyToGpu()
Copy the buffer contents from CPU to GPU and blocks until the transfer completes.
Definition: AcceleratorBuffer.h:1022
std::unique_ptr< vk::raii::Buffer > m_gpuBuffer
Buffer object for GPU-side memory.
Definition: AcceleratorBuffer.h:270
bool empty() const
Returns true if the container is empty.
Definition: AcceleratorBuffer.h:391
T * m_cpuPtr
CPU-side mapped pointer.
Definition: AcceleratorBuffer.h:258
void pop_front()
Removes the first item in the container.
Definition: AcceleratorBuffer.h:814
__attribute__((noinline)) void CopyFrom(const AcceleratorBuffer< T > &rhs)
Copies our content from another AcceleratorBuffer.
Definition: AcceleratorBuffer.h:502
void push_back(const T &value)
Adds a new element to the end of the container, allocating space if needed.
Definition: AcceleratorBuffer.h:752
bool IsCpuBufferStale() const
Returns true if the CPU-side buffer is stale.
Definition: AcceleratorBuffer.h:397
void push_back_nomarkmod(const T &value)
Adds a new element to the end of the container, allocating space if needed but without calling MarkMo...
Definition: AcceleratorBuffer.h:764
vk::DescriptorBufferInfo GetBufferInfo()
Returns a vk::DescriptorBufferInfo suitable for binding this object to.
Definition: AcceleratorBuffer.h:446
std::unique_ptr< vk::raii::DeviceMemory > m_cpuPhysMem
CPU-side physical memory.
Definition: AcceleratorBuffer.h:261
void MarkModifiedFromGpu()
Marks the GPU-side copy of the buffer as modified.
Definition: AcceleratorBuffer.h:905
size_t m_size
Size of the memory actually being used.
Definition: AcceleratorBuffer.h:296
bool IsReachableFromCpu(MemoryType mt)
Returns true if the given buffer type can be reached from the CPU.
Definition: AcceleratorBuffer.h:227
bool IsSingleSharedBuffer() const
Returns true if the object contains only a single buffer.
Definition: AcceleratorBuffer.h:421
bool HasCpuBuffer() const
Returns true if there is currently a CPU-side buffer.
Definition: AcceleratorBuffer.h:409
void MarkModifiedFromCpu()
Marks the CPU-side copy of the buffer as modified.
Definition: AcceleratorBuffer.h:894
std::unique_ptr< vk::raii::DeviceMemory > m_gpuPhysMem
GPU-side physical memory.
Definition: AcceleratorBuffer.h:264
void PrepareForCpuAccess()
Prepares the buffer to be accessed from the CPU.
Definition: AcceleratorBuffer.h:919
size_t size() const
Returns the actual size of the container (may be smaller than what was allocated)
Definition: AcceleratorBuffer.h:357
void SetGpuAccessHint(UsageHint hint, bool reallocateImmediately=false)
Sets a hint to the buffer on how often we expect to use it on the GPU in the future.
Definition: AcceleratorBuffer.h:874
static void HostToDeviceTransferMemoryBarrier(vk::raii::CommandBuffer &cmdBuf)
Adds a memory barrier for transferring data from host to device.
Definition: AcceleratorBuffer.h:1071
bool m_gpuPhysMemIsStale
True if m_gpuPhysMem contains stale data (m_cpuPtr has been modified and they point to different memo...
Definition: AcceleratorBuffer.h:279
bool IsReachableFromGpu(MemoryType mt)
Returns true if the given buffer type can be reached from the GPU.
Definition: AcceleratorBuffer.h:233
vk::Buffer GetBuffer()
Returns the preferred buffer for GPU-side access.
Definition: AcceleratorBuffer.h:429
void pop_back()
Removes the last item in the container.
Definition: AcceleratorBuffer.h:774
size_t GetCpuMemoryBytes() const
Returns the total reserved CPU memory, in bytes.
Definition: AcceleratorBuffer.h:369
void resize(size_t size)
Change the usable size of the container.
Definition: AcceleratorBuffer.h:457
size_t capacity() const
Returns the allocated size of the container.
Definition: AcceleratorBuffer.h:363
bool m_buffersAreSame
True if we have only one piece of physical memory accessible from both sides.
Definition: AcceleratorBuffer.h:273
void reserve(size_t size)
Reallocates buffers so that at least size elements of storage are available.
Definition: AcceleratorBuffer.h:484
UsageHint m_gpuAccessHint
Hint about how likely future GPU access is.
Definition: AcceleratorBuffer.h:313
MemoryType m_cpuMemoryType
Type of the CPU-side buffer.
Definition: AcceleratorBuffer.h:249
bool IsFastFromGpu(MemoryType mt)
Returns true if the given buffer type is fast to access from the GPU.
Definition: AcceleratorBuffer.h:245
void PrepareForGpuAccess(bool outputOnly=false)
Prepares the buffer to be accessed from the GPU.
Definition: AcceleratorBuffer.h:941
std::unique_ptr< vk::raii::Buffer > m_cpuBuffer
Buffer object for CPU-side memory.
Definition: AcceleratorBuffer.h:267
MemoryType m_gpuMemoryType
Type of the GPU-side buffer.
Definition: AcceleratorBuffer.h:252
__attribute__((noinline)) void UpdateGpuNames()
Pushes our friendly name to the underlying Vulkan objects.
Definition: AcceleratorBuffer.h:1436
void SetCpuAccessHint(UsageHint hint, bool reallocateImmediately=false)
Sets a hint to the buffer on how often we expect to use it on the CPU in the future.
Definition: AcceleratorBuffer.h:860
void FreeCpuBuffer()
Free the CPU-side buffer and underlying physical memory.
Definition: AcceleratorBuffer.h:1093
void PrepareForGpuAccessNonblocking(bool outputOnly, vk::raii::CommandBuffer &cmdBuf)
Prepares the buffer to be accessed from the GPU.
Definition: AcceleratorBuffer.h:971
__attribute__((noinline)) void UpdateCpuNames()
Pushes our friendly name to the underlying Vulkan objects.
Definition: AcceleratorBuffer.h:1463
bool HasGpuBuffer() const
Returns true if there is currently a GPU-side buffer.
Definition: AcceleratorBuffer.h:415
void push_front(const T &value)
Inserts a new item at the beginning of the container. This is inefficient due to copying.
Definition: AcceleratorBuffer.h:785
void CopyToGpuNonblocking(vk::raii::CommandBuffer &cmdBuf)
Copy the buffer contents from CPU to GPU without blocking on the CPU.
Definition: AcceleratorBuffer.h:1046
size_t GetGpuMemoryBytes() const
Returns the total reserved GPU memory, in bytes.
Definition: AcceleratorBuffer.h:380
int m_tempFileHandle
File handle used for MEM_TYPE_CPU_PAGED.
Definition: AcceleratorBuffer.h:283
void shrink_to_fit()
Frees unused memory so that m_size == m_capacity.
Definition: AcceleratorBuffer.h:493
size_t m_capacity
Size of the allocated memory space (may be larger than m_size)
Definition: AcceleratorBuffer.h:293
void FreeGpuBuffer(bool dataLossOK=false)
Free the GPU-side buffer and underlying physical memory.
Definition: AcceleratorBuffer.h:1131
bool IsFastFromCpu(MemoryType mt)
Returns true if the given buffer type is fast to access from the CPU.
Definition: AcceleratorBuffer.h:239
void CopyToCpu()
Copy the buffer contents from GPU to CPU and blocks until the transfer completes.
Definition: AcceleratorBuffer.h:1001
__attribute__((noinline)) void FreeCpuPointer(T *ptr
Frees a CPU-side buffer.
AcceleratorBuffer(const std::string &name="")
Creates a new AcceleratorBuffer with no content.
Definition: AcceleratorBuffer.h:322
MemoryAttributes
Attributes that a memory buffer can have.
Definition: AcceleratorBuffer.h:175
bool m_cpuPhysMemIsStale
True if m_cpuPtr contains stale data (m_gpuPhysMem has been modified and they point to different memo...
Definition: AcceleratorBuffer.h:276
__attribute__((noinline)) void AllocateCpuBuffer(size_t size)
Allocates a buffer for CPU access.
Definition: AcceleratorBuffer.h:1160
__attribute__((noinline)) void Reallocate(size_t size)
Reallocates the buffer so that it contains exactly size elements.
Definition: AcceleratorBuffer.h:548
UsageHint m_cpuAccessHint
Hint about how likely future CPU access is.
Definition: AcceleratorBuffer.h:310
MemoryType
Types of memory buffer.
Definition: AcceleratorBuffer.h:193
T * GetCpuPointer()
Gets a pointer to the CPU-side buffer.
Definition: AcceleratorBuffer.h:440
void SetName(std::string name)
Sets the debug name for this buffer.
Definition: AcceleratorBuffer.h:1497
void clear()
Resize the container to be empty (but don't free memory)
Definition: AcceleratorBuffer.h:478
bool IsGpuBufferStale() const
Returns true if the GPU-side buffer is stale.
Definition: AcceleratorBuffer.h:403
void deallocate(T *const p, const size_t unused) const
Free a block of memory.
Definition: AlignedAllocator.h:194
T * allocate(size_t n) const
Allocate a block of memory.
Definition: AlignedAllocator.h:159
A unit of measurement, plus conversion to pretty-printed output.
Definition: Unit.h:57
uint32_t g_vkPinnedMemoryType
Vulkan memory type for CPU-based memory that is also GPU-readable.
Definition: VulkanInit.cpp:118
std::mutex g_vkTransferMutex
Mutex for interlocking access to g_vkTransferCommandBuffer and g_vkTransferCommandPool.
Definition: VulkanInit.cpp:112
bool g_hasDebugUtils
Indicates whether the VK_EXT_debug_utils extension is available.
Definition: VulkanInit.cpp:195
std::shared_ptr< vk::raii::Device > g_vkComputeDevice
The Vulkan device selected for compute operations (may or may not be same device as rendering)
Definition: VulkanInit.cpp:71
std::unique_ptr< vk::raii::CommandBuffer > g_vkTransferCommandBuffer
Command buffer for AcceleratorBuffer transfers.
Definition: VulkanInit.cpp:89
std::shared_ptr< QueueHandle > g_vkTransferQueue
Queue for AcceleratorBuffer transfers.
Definition: VulkanInit.cpp:98
uint32_t g_vkLocalMemoryType
Vulkan memory type for GPU-based memory (generally not CPU-readable, except on unified memory systems...
Definition: VulkanInit.cpp:124
bool g_vulkanDeviceHasUnifiedMemory
Indicates whether the Vulkan device is unified memory.
Definition: VulkanInit.cpp:220