ngscopeclient v0.1-rc1
AcceleratorBuffer.h
Go to the documentation of this file.
1/***********************************************************************************************************************
2* *
3* libscopehal *
4* *
5* Copyright (c) 2012-2025 Andrew D. Zonenberg and contributors *
6* All rights reserved. *
7* *
8* Redistribution and use in source and binary forms, with or without modification, are permitted provided that the *
9* following conditions are met: *
10* *
11* * Redistributions of source code must retain the above copyright notice, this list of conditions, and the *
12* following disclaimer. *
13* *
14* * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the *
15* following disclaimer in the documentation and/or other materials provided with the distribution. *
16* *
17* * Neither the name of the author nor the names of any contributors may be used to endorse or promote products *
18* derived from this software without specific prior written permission. *
19* *
20* THIS SOFTWARE IS PROVIDED BY THE AUTHORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED *
21* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL *
22* THE AUTHORS BE HELD LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES *
23* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR *
24* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT *
25* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE *
26* POSSIBILITY OF SUCH DAMAGE. *
27* *
28***********************************************************************************************************************/
29
35#ifndef AcceleratorBuffer_h
36#define AcceleratorBuffer_h
37
38#include "AlignedAllocator.h"
39#include "QueueManager.h"
40
41#ifdef _WIN32
42#undef MemoryBarrier
43#endif
44
45#ifndef _WIN32
46#include <sys/mman.h>
47#include <unistd.h>
48#endif
49
50#include <type_traits>
51
52extern uint32_t g_vkPinnedMemoryType;
53extern uint32_t g_vkLocalMemoryType;
54extern std::shared_ptr<vk::raii::Device> g_vkComputeDevice;
55extern std::unique_ptr<vk::raii::CommandBuffer> g_vkTransferCommandBuffer;
56extern std::shared_ptr<QueueHandle> g_vkTransferQueue;
57extern std::mutex g_vkTransferMutex;
58
59extern bool g_hasDebugUtils;
61
62template<class T>
64
67{
69 Hard,
70
79 Soft
80};
81
84{
86 Host,
87
89 Device
90};
91
93typedef bool (*MemoryPressureHandler)(MemoryPressureLevel level, MemoryPressureType type, size_t requestedSize);
94
95bool OnMemoryPressure(MemoryPressureLevel level, MemoryPressureType type, size_t requestedSize);
96
97template<class T>
99{
100public:
101 using value_type = T;
102 using iterator_category = std::forward_iterator_tag;
103 using difference_type = std::ptrdiff_t;
104 using pointer = T*;
105 using reference = T&;
106
108 : m_index(i)
109 , m_buf(buf)
110 {}
111
112 T& operator*()
113 { return m_buf[m_index]; }
114
115 size_t GetIndex() const
116 { return m_index; }
117
118 bool operator!=(AcceleratorBufferIterator<T>& it)
119 {
120 //TODO: should we check m_buf equality too?
121 //Will slow things down, but be more semantically correct. Does anything care?
122 return (m_index != it.m_index);
123 }
124
125 AcceleratorBufferIterator<T>& operator++()
126 {
127 m_index ++;
128 return *this;
129 }
130
131protected:
132 size_t m_index;
134};
135
136template<class T>
137std::ptrdiff_t operator-(const AcceleratorBufferIterator<T>& a, const AcceleratorBufferIterator<T>& b)
138{ return a.GetIndex() - b.GetIndex(); }
139
156template<class T>
158{
159protected:
160
162 // Allocator for CPU-only memory
163
164 AlignedAllocator<T, 32> m_cpuAllocator;
165
166public:
167
169 // Buffer types
170
175 {
176 //Location of the memory
177 MEM_ATTRIB_CPU_SIDE = 0x1,
178 MEM_ATTRIB_GPU_SIDE = 0x2,
179
180 //Reachability
181 MEM_ATTRIB_CPU_REACHABLE = 0x4,
182 MEM_ATTRIB_GPU_REACHABLE = 0x8,
183
184 //Speed
185 MEM_ATTRIB_CPU_FAST = 0x10,
186 MEM_ATTRIB_GPU_FAST = 0x20
187 };
188
193 {
194 //Pointer is invalid
195 MEM_TYPE_NULL = 0,
196
197 //Memory is located on the CPU but backed by a file and may get paged out
198 MEM_TYPE_CPU_PAGED =
199 MEM_ATTRIB_CPU_SIDE | MEM_ATTRIB_CPU_REACHABLE,
200
201 //Memory is located on the CPU but not pinned, or otherwise accessible to the GPU
202 MEM_TYPE_CPU_ONLY =
203 MEM_ATTRIB_CPU_SIDE | MEM_ATTRIB_CPU_REACHABLE | MEM_ATTRIB_CPU_FAST,
204
205 //Memory is located on the CPU, but can be accessed by the GPU.
206 //Fast to access from the CPU, but accesses from the GPU require PCIe DMA and is slow
207 //(unless platform uses unified memory, in which case g_vulkanDeviceHasUnifiedMemory will be true)
208 MEM_TYPE_CPU_DMA_CAPABLE =
209 MEM_ATTRIB_CPU_SIDE | MEM_ATTRIB_CPU_REACHABLE | MEM_ATTRIB_CPU_FAST | MEM_ATTRIB_GPU_REACHABLE,
210
211 //Memory is located on the GPU and cannot be directly accessed by the CPU
212 MEM_TYPE_GPU_ONLY =
213 MEM_ATTRIB_GPU_SIDE | MEM_ATTRIB_GPU_REACHABLE | MEM_ATTRIB_GPU_FAST,
214
215 //Memory is located on the GPU, but can be accessed by the CPU.
216 //Fast to access from the GPU, but accesses from the CPU require PCIe DMA and is slow
217 //(should not be used if platform uses unified memory, in which case g_vulkanDeviceHasUnifiedMemory will be true)
218 MEM_TYPE_GPU_DMA_CAPABLE =
219 MEM_ATTRIB_GPU_SIDE | MEM_ATTRIB_GPU_REACHABLE | MEM_ATTRIB_GPU_FAST | MEM_ATTRIB_CPU_REACHABLE
220 };
221
222protected:
223
228 { return (mt & MEM_ATTRIB_CPU_REACHABLE) != 0; }
229
234 { return (mt & MEM_ATTRIB_GPU_REACHABLE) != 0; }
235
240 { return (mt & MEM_ATTRIB_CPU_FAST) != 0; }
241
246 { return (mt & MEM_ATTRIB_GPU_FAST) != 0; }
247
250
253
255 // The actual memory buffers
256
259
261 std::unique_ptr<vk::raii::DeviceMemory> m_cpuPhysMem;
262
264 std::unique_ptr<vk::raii::DeviceMemory> m_gpuPhysMem;
265
267 std::unique_ptr<vk::raii::Buffer> m_cpuBuffer;
268
270 std::unique_ptr<vk::raii::Buffer> m_gpuBuffer;
271
274
277
280
282#ifndef _WIN32
284#endif
285
287 // Iteration
288
290 // Sizes of buffers
291
294
296 size_t m_size;
297
299 // Hint configuration
300public:
301 enum UsageHint
302 {
303 HINT_NEVER,
304 HINT_UNLIKELY,
305 HINT_LIKELY
306 };
307
308protected:
311
314
316 // Construction / destruction
317public:
318
322 AcceleratorBuffer(const std::string& name = "")
323 : m_cpuMemoryType(MEM_TYPE_NULL)
324 , m_gpuMemoryType(MEM_TYPE_NULL)
325 , m_cpuPtr(nullptr)
326 , m_gpuPhysMem(nullptr)
327 , m_buffersAreSame(false)
328 , m_cpuPhysMemIsStale(false)
329 , m_gpuPhysMemIsStale(false)
330 #ifndef _WIN32
332 #endif
333 , m_capacity(0)
334 , m_size(0)
335 , m_cpuAccessHint(HINT_LIKELY) //default access hint: CPU-side pinned memory
336 , m_gpuAccessHint(HINT_UNLIKELY)
337 , m_name(name)
338 {
339 //non-trivially-copyable types can't be copied to GPU except on unified memory platforms
340 if(!std::is_trivially_copyable<T>::value && !g_vulkanDeviceHasUnifiedMemory)
341 m_gpuAccessHint = HINT_NEVER;
342 }
343
345 {
347 FreeGpuBuffer(true);
348 }
349
351 // General accessors
352public:
353
357 size_t size() const
358 { return m_size; }
359
363 size_t capacity() const
364 { return m_capacity; }
365
369 size_t GetCpuMemoryBytes() const
370 {
371 if(m_cpuMemoryType == MEM_TYPE_NULL)
372 return 0;
373 else
374 return m_capacity * sizeof(T);
375 }
376
380 size_t GetGpuMemoryBytes() const
381 {
382 if(m_gpuMemoryType == MEM_TYPE_NULL)
383 return 0;
384 else
385 return m_capacity * sizeof(T);
386 }
387
391 bool empty() const
392 { return (m_size == 0); }
393
397 bool IsCpuBufferStale() const
398 { return m_cpuPhysMemIsStale; }
399
403 bool IsGpuBufferStale() const
404 { return m_gpuPhysMemIsStale; }
405
409 bool HasCpuBuffer() const
410 { return (m_cpuPtr != nullptr); }
411
415 bool HasGpuBuffer() const
416 { return (m_gpuPhysMem != nullptr); }
417
422 { return m_buffersAreSame; }
423
429 vk::Buffer GetBuffer()
430 {
431 if(m_gpuBuffer != nullptr)
432 return **m_gpuBuffer;
433 else
434 return **m_cpuBuffer;
435 }
436
441 { return m_cpuPtr; }
442
446 vk::DescriptorBufferInfo GetBufferInfo()
447 {
448 return vk::DescriptorBufferInfo(
449 GetBuffer(),
450 0,
451 m_size * sizeof(T));
452 }
453
457 void resize(size_t size)
458 {
459 //Need to grow?
460 if(size > m_capacity)
461 {
462 //Default to doubling in size each time to avoid excessive copying.
463 if(m_capacity == 0)
464 reserve(size);
465 else if(size > m_capacity*2)
466 reserve(size);
467 else
468 reserve(m_capacity * 2);
469 }
470
471 //Update our size
472 m_size = size;
473 }
474
478 void clear()
479 { resize(0); }
480
484 void reserve(size_t size)
485 {
486 if(size >= m_capacity)
487 Reallocate(size);
488 }
489
494 {
495 if(m_size != m_capacity)
496 Reallocate(m_size);
497 }
498
502 __attribute__((noinline))
503 void CopyFrom(const AcceleratorBuffer<T>& rhs)
504 {
505 //Copy placement hints from the other instance, then resize to match
508 resize(rhs.m_size);
509
510 //Valid data CPU side? Copy it to here
511 if(rhs.HasCpuBuffer() && !rhs.m_cpuPhysMemIsStale)
512 {
513 //non-trivially-copyable types have to be copied one at a time
514 if(!std::is_trivially_copyable<T>::value)
515 {
516 for(size_t i=0; i<m_size; i++)
517 m_cpuPtr[i] = rhs.m_cpuPtr[i];
518 }
519
520 //Trivially copyable types can be done more efficiently in a block
521 else
522 memcpy(m_cpuPtr, rhs.m_cpuPtr, m_size * sizeof(T));
523 }
525
526 //Valid data GPU side? Copy it to here
527 if(rhs.HasGpuBuffer() && !rhs.m_gpuPhysMemIsStale)
528 {
529 std::lock_guard<std::mutex> lock(g_vkTransferMutex);
530
531 //Make the transfer request
532 g_vkTransferCommandBuffer->begin({});
533 vk::BufferCopy region(0, 0, m_size * sizeof(T));
534 g_vkTransferCommandBuffer->copyBuffer(**rhs.m_gpuBuffer, **m_gpuBuffer, {region});
536
537 //Submit the request and block until it completes
539 }
541 }
542
543protected:
544
548 __attribute__((noinline))
549 void Reallocate(size_t size)
550 {
551 if(size == 0)
552 return;
553
554 /*
555 If we are a bool[] or similar one-byte type, we are likely going to be accessed from the GPU via a uint32
556 descriptor for at least some shaders (such as rendering).
557
558 Round our actual allocated size to the next multiple of 4 bytes. The padding values are unimportant as the
559 bytes are never written, and the data read from the high bytes in the uint32 is discarded by the GPU.
560 We just need to ensure the memory is allocated so the 32-bit read is legal to perform.
561 */
562 if( (sizeof(T) == 1) && (m_gpuAccessHint != HINT_NEVER) )
563 {
564 if(size & 3)
565 size = (size | 3) + 1;
566 }
567
568 //If we do not anticipate using the data on the CPU, we shouldn't waste RAM.
569 //Allocate a GPU-local buffer, copy data to it, then free the CPU-side buffer
570 //Don't do this if the platform has unified memory
571 if( (m_cpuAccessHint == HINT_NEVER) && !g_vulkanDeviceHasUnifiedMemory)
572 {
575 }
576
577 else
578 {
579 //Resize CPU memory
580 //TODO: optimization, when expanding a MEM_TYPE_CPU_PAGED we can just enlarge the file
581 //and not have to make a new temp file and copy the content
582 if(m_cpuPtr != nullptr)
583 {
584 //Save the old pointer
585 auto pOld = m_cpuPtr;
586 auto pOldPin = std::move(m_cpuPhysMem);
587 auto type = m_cpuMemoryType;
588
589 //Allocate the new buffer
590 AllocateCpuBuffer(size);
591
592 //If CPU-side data is valid, copy existing data over.
593 //New pointer is still valid in this case.
595 {
596 //non-trivially-copyable types have to be copied one at a time
597 if(!std::is_trivially_copyable<T>::value)
598 {
599 for(size_t i=0; i<m_size; i++)
600 m_cpuPtr[i] = std::move(pOld[i]);
601 }
602
603 //Trivially copyable types can be done more efficiently in a block
604 //gcc warns about this even though we only call this code if the type is trivially copyable,
605 //so disable the warning.
606 else
607 {
608 #pragma GCC diagnostic push
609 #pragma GCC diagnostic ignored "-Wclass-memaccess"
610
611 memcpy(m_cpuPtr, pOld, m_size * sizeof(T));
612
613 #pragma GCC diagnostic pop
614 }
615 }
616
617 //If CPU-side data is stale, just allocate the new buffer but leave it as stale
618 //(don't do a potentially unnecessary copy from the GPU)
619
620 //Now we're done with the old pointer so get rid of it
621 FreeCpuPointer(pOld, pOldPin, type, m_capacity);
622 }
623
624 //Allocate new CPU memory, replacing our current (null) pointer
625 else
626 {
627 AllocateCpuBuffer(size);
628
629 //If we already had GPU-side memory containing data, then the new CPU-side buffer is stale
630 //until we copy stuff over to it
631 if(m_gpuPhysMem != nullptr)
632 m_cpuPhysMemIsStale = true;
633 }
634 }
635
636 //We're expecting to use data on the GPU, so prepare to do stuff with it
637 if(m_gpuAccessHint != HINT_NEVER)
638 {
639 //If GPU access is unlikely, we probably want to just use pinned memory.
640 //If available, mark buffers as the same, and free any existing GPU buffer we might have
641 //Always use pinned memory if the platform has unified memory
642 if( ((m_gpuAccessHint == HINT_UNLIKELY) && (m_cpuMemoryType == MEM_TYPE_CPU_DMA_CAPABLE)) || g_vulkanDeviceHasUnifiedMemory )
644
645 //Nope, we need to allocate dedicated GPU memory
646 else
647 {
648 //If we have an existing buffer with valid content, save it and copy content over
649 if( (m_gpuPhysMem != nullptr) && !m_gpuPhysMemIsStale && (m_size != 0))
650 {
651 auto pOld = std::move(m_gpuPhysMem);
652 //auto type = m_gpuMemoryType;
653 auto bOld = std::move(m_gpuBuffer);
654
655 //Allocation successful!
656 if(AllocateGpuBuffer(size))
657 {
658 std::lock_guard<std::mutex> lock(g_vkTransferMutex);
659
660 //Make the transfer request
661 g_vkTransferCommandBuffer->begin({});
662 vk::BufferCopy region(0, 0, m_size * sizeof(T));
663 g_vkTransferCommandBuffer->copyBuffer(**bOld, **m_gpuBuffer, {region});
665
666 //Submit the request and block until it completes
668
669 //make sure buffer is freed before underlying physical memory (pOld) goes out of scope
670 bOld = nullptr;
671 }
672
673 //Allocation failed!
674 else
675 {
676 //Revert to the old buffer. We're now in a consistent state again
677 m_gpuPhysMem = std::move(pOld);
678 m_gpuBuffer = std::move(bOld);
679
680 //Make sure we have a CPU side buffer that's DMA capable
681 if(m_cpuMemoryType != MEM_TYPE_CPU_DMA_CAPABLE)
682 {
683 SetCpuAccessHint(HINT_LIKELY);
684 SetGpuAccessHint(HINT_LIKELY);
685 AllocateCpuBuffer(size);
686 }
687
688 //Free the GPU buffer, moving its contents to the CPU
690 }
691 }
692
693 //Nope, just allocate a new buffer
694 else
695 {
696 //Allocation successful? We now have the buffer
697 if(AllocateGpuBuffer(size))
698 {
699 //If we already had CPU-side memory containing data, then the new GPU-side buffer is stale
700 //until we copy stuff over to it.
701 //Special case: if m_size is 0 (newly allocated buffer) we're not stale yet
702 if( (m_cpuPhysMem != nullptr) && (m_size != 0) )
703 m_gpuPhysMemIsStale = true;
704 }
705
706 //Allocation failed? No change, we already had the CPU buffer and don't have to touch anything
707 //But did the CPU buffer exist? if not, allocate *something*
708 else if(m_cpuPhysMem == nullptr)
709 {
710 SetCpuAccessHint(HINT_LIKELY);
711 SetGpuAccessHint(HINT_LIKELY);
712 AllocateCpuBuffer(size);
713 }
714 }
715 }
716 }
717
718 //Existing GPU buffer we never expect to use again - needs to be freed
719 else if(m_gpuPhysMem != nullptr)
721
722 //We are never going to use the buffer on the GPU, but don't have any existing GPU memory
723 //so no action required
724 else
725 {
726 }
727
728 //Update our capacity
729 m_capacity = size;
730
731 //If we have a pinned buffer and nothing on the other side, there's a single shared physical memory region
733 ( (m_cpuMemoryType == MEM_TYPE_CPU_DMA_CAPABLE) && (m_gpuMemoryType == MEM_TYPE_NULL) ) ||
734 ( (m_cpuMemoryType == MEM_TYPE_NULL) && (m_gpuMemoryType == MEM_TYPE_GPU_DMA_CAPABLE) );
735 }
736
738 // CPU-side STL-esque container API
739
740 //PrepareForCpuAccess() *must* be called prior to calling any of these methods.
741public:
742
743 const T& operator[](size_t i) const
744 { return m_cpuPtr[i]; }
745
746 T& operator[](size_t i)
747 { return m_cpuPtr[i]; }
748
752 void push_back(const T& value)
753 {
754 size_t cursize = m_size;
755 resize(m_size + 1);
756 m_cpuPtr[cursize] = value;
757
759 }
760
764 void pop_back()
765 {
766 if(!empty())
767 resize(m_size - 1);
768 }
769
775 void push_front(const T& value)
776 {
777 size_t cursize = m_size;
778 resize(m_size + 1);
779
781
782 //non-trivially-copyable types have to be copied one at a time
783 if(!std::is_trivially_copyable<T>::value)
784 {
785 for(size_t i=0; i<cursize; i++)
786 m_cpuPtr[i+1] = std::move(m_cpuPtr[i]);
787 }
788
789 //Trivially copyable types can be done more efficiently in a block
790 else
791 memmove(m_cpuPtr+1, m_cpuPtr, sizeof(T) * (cursize));
792
793 //Insert the new first element
794 m_cpuPtr[0] = value;
795
797 }
798
805 {
806 //No need to move data if popping last element
807 if(m_size == 1)
808 {
809 clear();
810 return;
811 }
812
813 //Don't touch GPU side buffer
814
816
817 //non-trivially-copyable types have to be copied one at a time
818 if(!std::is_trivially_copyable<T>::value)
819 {
820 for(size_t i=0; i<m_size-1; i++)
821 m_cpuPtr[i] = std::move(m_cpuPtr[i+1]);
822 }
823
824 //Trivially copyable types can be done more efficiently in a block
825 else
826 memmove(m_cpuPtr, m_cpuPtr+1, sizeof(T) * (m_size-1));
827
828 resize(m_size - 1);
829
831 }
832
834 { return AcceleratorBufferIterator<T>(*this, 0); }
835
837 { return AcceleratorBufferIterator<T>(*this, m_size); }
838
840 // Hints about near-future usage patterns
841
842public:
843
850 void SetCpuAccessHint(UsageHint hint, bool reallocateImmediately = false)
851 {
852 m_cpuAccessHint = hint;
853
854 if(reallocateImmediately && (m_size != 0))
855 Reallocate(m_size);
856 }
857
864 void SetGpuAccessHint(UsageHint hint, bool reallocateImmediately = false)
865 {
866 //Only trivially copyable datatypes are allowed on the GPU
867 if(!std::is_trivially_copyable<T>::value)
868 hint = HINT_NEVER;
869
870 m_gpuAccessHint = hint;
871
872 if(reallocateImmediately && (m_size != 0))
873 Reallocate(m_size);
874 }
875
877 // Cache invalidation
878
885 {
887 m_gpuPhysMemIsStale = true;
888 }
889
896 {
898 m_cpuPhysMemIsStale = true;
899 }
900
902 // Preparation for access
903
910 {
911 //Early out if no content
912 if(m_size == 0)
913 return;
914
915 //If there's no buffer at all on the CPU, allocate one
916 if(!HasCpuBuffer() && (m_gpuMemoryType != MEM_TYPE_GPU_DMA_CAPABLE))
917 AllocateCpuBuffer(m_capacity);
918
920 CopyToCpu();
921 }
922
931 void PrepareForGpuAccess(bool outputOnly = false)
932 {
933 //Early out if no content or if unified memory
935 return;
936
937 //If our current hint has no GPU access at all, update to say "unlikely" and reallocate
938 if(m_gpuAccessHint == HINT_NEVER)
939 SetGpuAccessHint(HINT_UNLIKELY, true);
940
941 //If we don't have a buffer, allocate one unless our CPU buffer is pinned and GPU-readable
942 if(!HasGpuBuffer() && (m_cpuMemoryType != MEM_TYPE_CPU_DMA_CAPABLE) )
943 {
944 if(!AllocateGpuBuffer(m_capacity))
945 return;
946 }
947
948 //Make sure the GPU-side buffer is up to date
949 if(m_gpuPhysMemIsStale && !outputOnly)
950 CopyToGpu();
951 }
952
961 void PrepareForGpuAccessNonblocking(bool outputOnly, vk::raii::CommandBuffer& cmdBuf)
962 {
963 //Early out if no content or if unified memory
965 return;
966
967 //If our current hint has no GPU access at all, update to say "unlikely" and reallocate
968 if(m_gpuAccessHint == HINT_NEVER)
969 SetGpuAccessHint(HINT_UNLIKELY, true);
970
971 //If we don't have a buffer, allocate one unless our CPU buffer is pinned and GPU-readable
972 if(!HasGpuBuffer() && (m_cpuMemoryType != MEM_TYPE_CPU_DMA_CAPABLE) )
973 {
974 if(!AllocateGpuBuffer(m_capacity))
975 return;
976 }
977
978 //Make sure the GPU-side buffer is up to date
979 if(m_gpuPhysMemIsStale && !outputOnly)
980 CopyToGpuNonblocking(cmdBuf);
981 }
982
983protected:
984
986 // Copying of buffer content
987
992 {
993 assert(std::is_trivially_copyable<T>::value);
994
995 std::lock_guard<std::mutex> lock(g_vkTransferMutex);
996
997 //Make the transfer request
998 g_vkTransferCommandBuffer->begin({});
999 vk::BufferCopy region(0, 0, m_size * sizeof(T));
1000 g_vkTransferCommandBuffer->copyBuffer(**m_gpuBuffer, **m_cpuBuffer, {region});
1002
1003 //Submit the request and block until it completes
1005
1006 m_cpuPhysMemIsStale = false;
1007 }
1008
1013 {
1014 assert(std::is_trivially_copyable<T>::value);
1015
1016 std::lock_guard<std::mutex> lock(g_vkTransferMutex);
1017
1018 //Make the transfer request
1019 g_vkTransferCommandBuffer->begin({});
1020 vk::BufferCopy region(0, 0, m_size * sizeof(T));
1021 g_vkTransferCommandBuffer->copyBuffer(**m_cpuBuffer, **m_gpuBuffer, {region});
1023
1024 //Submit the request and block until it completes
1026
1027 m_gpuPhysMemIsStale = false;
1028 }
1029
1030
1036 void CopyToGpuNonblocking(vk::raii::CommandBuffer& cmdBuf)
1037 {
1038 assert(std::is_trivially_copyable<T>::value);
1039
1040 //Make the transfer request
1041 vk::BufferCopy region(0, 0, m_size * sizeof(T));
1042 cmdBuf.copyBuffer(**m_cpuBuffer, **m_gpuBuffer, {region});
1043
1044 //Add the barrier
1045 cmdBuf.pipelineBarrier(
1046 vk::PipelineStageFlagBits::eTransfer,
1047 vk::PipelineStageFlagBits::eComputeShader,
1048 {},
1049 vk::MemoryBarrier(
1050 vk::AccessFlagBits::eTransferWrite,
1051 vk::AccessFlagBits::eShaderRead | vk::AccessFlagBits::eShaderWrite),
1052 {},
1053 {});
1054
1055 m_gpuPhysMemIsStale = false;
1056 }
1057public:
1061 static void HostToDeviceTransferMemoryBarrier(vk::raii::CommandBuffer& cmdBuf)
1062 {
1063 cmdBuf.pipelineBarrier(
1064 vk::PipelineStageFlagBits::eTransfer,
1065 vk::PipelineStageFlagBits::eComputeShader,
1066 {},
1067 vk::MemoryBarrier(
1068 vk::AccessFlagBits::eTransferWrite,
1069 vk::AccessFlagBits::eShaderRead | vk::AccessFlagBits::eShaderWrite),
1070 {},
1071 {});
1072 }
1073
1074
1075protected:
1076
1078 // Cleanup
1079
1084 {
1085 //Early out if buffer is already null
1086 if(m_cpuPtr == nullptr)
1087 return;
1088
1089 //We have a buffer on the GPU.
1090 //If it's stale, need to push our updated content there before freeing the CPU-side copy
1091 if( (m_gpuMemoryType != MEM_TYPE_NULL) && m_gpuPhysMemIsStale && !empty())
1092 CopyToGpu();
1093
1094 //Free the Vulkan buffer object
1095 m_cpuBuffer = nullptr;
1096
1097 //Free the buffer and unmap any memory
1099
1100 //Mark CPU-side buffer as empty
1101 m_cpuPtr = nullptr;
1102 m_cpuPhysMem = nullptr;
1103 m_cpuMemoryType = MEM_TYPE_NULL;
1104 m_buffersAreSame = false;
1105
1106 //If we have no GPU-side buffer either, we're empty
1107 if(m_gpuMemoryType == MEM_TYPE_NULL)
1108 {
1109 m_size = 0;
1110 m_capacity = 0;
1111 }
1112 }
1113
1114public:
1121 void FreeGpuBuffer(bool dataLossOK = false)
1122 {
1123 //Early out if buffer is already null
1124 if(m_gpuPhysMem == nullptr)
1125 return;
1126
1127 //If we do NOT have a CPU-side buffer, we're deleting all of our data! Warn for now
1128 if( (m_cpuMemoryType == MEM_TYPE_NULL) && m_gpuPhysMemIsStale && !empty() && !dataLossOK)
1129 {
1130 LogWarning("Freeing a GPU buffer without any CPU backing, may cause data loss\n");
1131 }
1132
1133 //If we have a CPU-side buffer, and it's stale, move our about-to-be-deleted content over before we free it
1134 if( (m_cpuMemoryType != MEM_TYPE_NULL) && m_cpuPhysMemIsStale && !empty() )
1135 CopyToCpu();
1136
1137 m_gpuBuffer = nullptr;
1138 m_gpuPhysMem = nullptr;
1139 m_gpuMemoryType = MEM_TYPE_NULL;
1140 }
1141
1142protected:
1143
1145 // Allocation
1146
1150 __attribute__((noinline))
1151 void AllocateCpuBuffer(size_t size)
1152 {
1153 if(size == 0)
1154 LogFatal("AllocateCpuBuffer with size zero (invalid)\n");
1155
1156 //If any GPU access is expected, use pinned memory so we don't have to move things around
1157 if(m_gpuAccessHint != HINT_NEVER)
1158 {
1159 //Make a Vulkan buffer first
1160 vk::BufferCreateInfo bufinfo(
1161 {},
1162 size * sizeof(T),
1163 vk::BufferUsageFlagBits::eTransferSrc |
1164 vk::BufferUsageFlagBits::eTransferDst |
1165 vk::BufferUsageFlagBits::eStorageBuffer);
1166 m_cpuBuffer = std::make_unique<vk::raii::Buffer>(*g_vkComputeDevice, bufinfo);
1167
1168 //Figure out actual memory requirements of the buffer
1169 //(may be rounded up from what we asked for)
1170 auto req = m_cpuBuffer->getMemoryRequirements();
1171
1172 //Allocate the physical memory to back the buffer
1173 vk::MemoryAllocateInfo info(req.size, g_vkPinnedMemoryType);
1174 m_cpuPhysMem = std::make_unique<vk::raii::DeviceMemory>(*g_vkComputeDevice, info);
1175
1176 //Map it and bind to the buffer
1177 m_cpuPtr = reinterpret_cast<T*>(m_cpuPhysMem->mapMemory(0, req.size));
1178 m_cpuBuffer->bindMemory(**m_cpuPhysMem, 0);
1179
1180 //We now have pinned memory
1181 m_cpuMemoryType = MEM_TYPE_CPU_DMA_CAPABLE;
1182
1183 if(g_hasDebugUtils)
1184 UpdateCpuNames();
1185 }
1186
1187 //If frequent CPU access is expected, use normal host memory
1188 else if(m_cpuAccessHint == HINT_LIKELY)
1189 {
1190 m_cpuBuffer = nullptr;
1191 m_cpuMemoryType = MEM_TYPE_CPU_ONLY;
1192 m_cpuPtr = m_cpuAllocator.allocate(size);
1193 }
1194
1195 //If infrequent CPU access is expected, use a memory mapped temporary file so it can be paged out to disk
1196 else
1197 {
1198 #ifdef _WIN32
1199
1200 //On Windows, use normal memory for now
1201 //until we figure out how to do this there
1202 m_cpuBuffer = nullptr;
1203 m_cpuMemoryType = MEM_TYPE_CPU_ONLY;
1204 m_cpuPtr = m_cpuAllocator.allocate(size);
1205
1206 #else
1207
1208 m_cpuBuffer = nullptr;
1209 m_cpuMemoryType = MEM_TYPE_CPU_PAGED;
1210
1211 //Make the temp file
1212 char fname[] = "/tmp/glscopeclient-tmpXXXXXX";
1213 m_tempFileHandle = mkstemp(fname);
1214 if(m_tempFileHandle < 0)
1215 {
1216 LogError("Failed to create temporary file %s\n", fname);
1217 abort();
1218 }
1219
1220 //Resize it to our desired file size
1221 size_t bytesize = size * sizeof(T);
1222 if(0 != ftruncate(m_tempFileHandle, bytesize))
1223 {
1224 LogError("Failed to resize temporary file %s\n", fname);
1225 abort();
1226 }
1227
1228 //Map it
1229 m_cpuPtr = reinterpret_cast<T*>(mmap(
1230 nullptr,
1231 bytesize,
1232 PROT_READ | PROT_WRITE,
1233 MAP_SHARED/* | MAP_UNINITIALIZED*/,
1235 0));
1236 if(m_cpuPtr == MAP_FAILED)
1237 {
1238 LogError("Failed to map temporary file %s\n", fname);
1239 perror("mmap failed: ");
1240 abort();
1241 }
1242 m_cpuMemoryType = MEM_TYPE_CPU_PAGED;
1243
1244 //Delete it (file will be removed by the OS after our active handle is closed)
1245 if(0 != unlink(fname))
1246 LogWarning("Failed to unlink temporary file %s, file will remain after application terminates\n", fname);
1247
1248 #endif
1249 }
1250
1251 //Memory has been allocated. Call constructors iff type is not trivially copyable
1252 //(This is not exactly 1:1 with having a constructor, but hopefully good enough?)
1253 if(!std::is_trivially_copyable<T>::value)
1254 {
1255 for(size_t i=0; i<size; i++)
1256 new(m_cpuPtr +i) T;
1257 }
1258 }
1259
1267 __attribute__((noinline))
1268 void FreeCpuPointer(T* ptr, MemoryType type, size_t size)
1269 {
1270 //Call destructors iff type is not trivially copyable
1271 if(!std::is_trivially_copyable<T>::value)
1272 {
1273 for(size_t i=0; i<size; i++)
1274 ptr[i].~T();
1275 }
1276
1277 switch(type)
1278 {
1279 case MEM_TYPE_NULL:
1280 //legal no-op
1281 break;
1282
1283 case MEM_TYPE_CPU_DMA_CAPABLE:
1284 LogFatal("FreeCpuPointer for MEM_TYPE_CPU_DMA_CAPABLE requires the vk::raii::DeviceMemory\n");
1285 break;
1286
1287 case MEM_TYPE_CPU_PAGED:
1288 #ifndef _WIN32
1289 munmap(ptr, size * sizeof(T));
1290 close(m_tempFileHandle);
1291 m_tempFileHandle = -1;
1292 #endif
1293 break;
1294
1295 case MEM_TYPE_CPU_ONLY:
1296 m_cpuAllocator.deallocate(ptr, size);
1297 break;
1298
1299 default:
1300 LogFatal("FreeCpuPointer: invalid type %x\n", type);
1301 }
1302 }
1303
1311 __attribute__((noinline))
1312 void FreeCpuPointer(T* ptr, std::unique_ptr<vk::raii::DeviceMemory>& buf, MemoryType type, size_t size)
1313 {
1314 switch(type)
1315 {
1316 case MEM_TYPE_CPU_DMA_CAPABLE:
1317 buf->unmapMemory();
1318 break;
1319
1320 default:
1321 FreeCpuPointer(ptr, type, size);
1322 }
1323 }
1324
1330 __attribute__((noinline))
1331 bool AllocateGpuBuffer(size_t size)
1332 {
1333 assert(std::is_trivially_copyable<T>::value);
1334
1335 //Make a Vulkan buffer first
1336 vk::BufferCreateInfo bufinfo(
1337 {},
1338 size * sizeof(T),
1339 vk::BufferUsageFlagBits::eTransferSrc |
1340 vk::BufferUsageFlagBits::eTransferDst |
1341 vk::BufferUsageFlagBits::eStorageBuffer);
1342 m_gpuBuffer = std::make_unique<vk::raii::Buffer>(*g_vkComputeDevice, bufinfo);
1343
1344 //Figure out actual memory requirements of the buffer
1345 //(may be rounded up from what we asked for)
1346 auto req = m_gpuBuffer->getMemoryRequirements();
1347
1348 //Try to allocate the memory
1349 vk::MemoryAllocateInfo info(req.size, g_vkLocalMemoryType);
1350 try
1351 {
1352 //For now, always use local memory
1353 m_gpuPhysMem = std::make_unique<vk::raii::DeviceMemory>(*g_vkComputeDevice, info);
1354 }
1355
1356 //Fallback path in case of low memory
1357 catch(vk::OutOfDeviceMemoryError& ex)
1358 {
1359 bool ok = false;
1360 while(!ok)
1361 {
1362 //Attempt to free memory and stop if we couldn't free more
1364 break;
1365
1367 try
1368 {
1369 m_gpuPhysMem = std::make_unique<vk::raii::DeviceMemory>(*g_vkComputeDevice, info);
1370 ok = true;
1371 }
1372 catch(vk::OutOfDeviceMemoryError& ex2)
1373 {
1374 LogDebug("Allocation failed again\n");
1375 }
1376 }
1377
1378 //Retry one more time.
1379 //If we OOM simultaneously in two threads, it's possible to have the second OnMemoryPressure call
1380 //return false because the first one already freed all it could. But we might have enough free to continue.
1381 if(!ok)
1382 {
1383 LogDebug("Final retry\n");
1384 try
1385 {
1386 m_gpuPhysMem = std::make_unique<vk::raii::DeviceMemory>(*g_vkComputeDevice, info);
1387 ok = true;
1388 }
1389 catch(vk::OutOfDeviceMemoryError& ex2)
1390 {
1391 LogDebug("Allocation failed again\n");
1392 }
1393 }
1394
1395 //If we get here, we couldn't allocate no matter what
1396 //Fall back to a CPU-side allocation
1397 if(!ok)
1398 {
1399 LogError(
1400 "Failed to allocate %s of GPU memory despite our best efforts to reclaim space, falling back to CPU-side pinned allocation\n",
1401 Unit(Unit::UNIT_BYTES).PrettyPrint(req.size, 4).c_str());
1402 m_gpuMemoryType = MEM_TYPE_NULL;
1403 m_gpuPhysMem = nullptr;
1404 m_gpuBuffer = nullptr;
1405 return false;
1406 }
1407 }
1408 m_gpuMemoryType = MEM_TYPE_GPU_ONLY;
1409
1410 m_gpuBuffer->bindMemory(**m_gpuPhysMem, 0);
1411
1412 if(g_hasDebugUtils)
1413 UpdateGpuNames();
1414
1415 return true;
1416 }
1417
1418protected:
1419
1421 std::string m_name;
1422
1426 __attribute__((noinline))
1427 void UpdateGpuNames()
1428 {
1429 std::string sname = m_name;
1430 if(sname.empty())
1431 sname = "unnamed";
1432 std::string prefix = std::string("AcceleratorBuffer.") + sname + ".";
1433
1434 std::string gpuBufName = prefix + "m_gpuBuffer";
1435 std::string gpuPhysName = prefix + "m_gpuPhysMem";
1436
1437 g_vkComputeDevice->setDebugUtilsObjectNameEXT(
1438 vk::DebugUtilsObjectNameInfoEXT(
1439 vk::ObjectType::eBuffer,
1440 reinterpret_cast<uint64_t>(static_cast<VkBuffer>(**m_gpuBuffer)),
1441 gpuBufName.c_str()));
1442
1443 g_vkComputeDevice->setDebugUtilsObjectNameEXT(
1444 vk::DebugUtilsObjectNameInfoEXT(
1445 vk::ObjectType::eDeviceMemory,
1446 reinterpret_cast<uint64_t>(static_cast<VkDeviceMemory>(**m_gpuPhysMem)),
1447 gpuPhysName.c_str()));
1448 }
1449
1453 __attribute__((noinline))
1454 void UpdateCpuNames()
1455 {
1456 std::string sname = m_name;
1457 if(sname.empty())
1458 sname = "unnamed";
1459 std::string prefix = std::string("AcceleratorBuffer.") + sname + ".";
1460
1461 std::string cpuBufName = prefix + "m_cpuBuffer";
1462 std::string cpuPhysName = prefix + "m_cpuPhysMem";
1463
1464 g_vkComputeDevice->setDebugUtilsObjectNameEXT(
1465 vk::DebugUtilsObjectNameInfoEXT(
1466 vk::ObjectType::eBuffer,
1467 reinterpret_cast<uint64_t>(static_cast<VkBuffer>(**m_cpuBuffer)),
1468 cpuBufName.c_str()));
1469
1470 g_vkComputeDevice->setDebugUtilsObjectNameEXT(
1471 vk::DebugUtilsObjectNameInfoEXT(
1472 vk::ObjectType::eDeviceMemory,
1473 reinterpret_cast<uint64_t>(static_cast<VkDeviceMemory>(**m_cpuPhysMem)),
1474 cpuPhysName.c_str()));
1475 }
1476
1477public:
1478
1487 void SetName(std::string name)
1488 {
1489 m_name = name;
1490 if(g_hasDebugUtils)
1491 {
1492 if(m_gpuBuffer != nullptr)
1493 UpdateGpuNames();
1494 if(m_cpuBuffer != nullptr)
1495 UpdateCpuNames();
1496 }
1497 }
1498
1499};
1500
1501extern std::set<MemoryPressureHandler> g_memoryPressureHandlers;
1502
1503#endif
bool(* MemoryPressureHandler)(MemoryPressureLevel level, MemoryPressureType type, size_t requestedSize)
Memory pressure handler type, called when free memory reaches a warning level or a Vulkan allocation ...
Definition: AcceleratorBuffer.h:93
MemoryPressureLevel
Levels of memory pressure.
Definition: AcceleratorBuffer.h:67
@ Hard
A memory allocation has failed and we need to free memory immediately to continue execution.
@ Soft
Free memory has reached a warning threshold.
bool OnMemoryPressure(MemoryPressureLevel level, MemoryPressureType type, size_t requestedSize)
Called when we run low on memory.
Definition: scopehal.cpp:1038
MemoryPressureType
Types of memory pressure.
Definition: AcceleratorBuffer.h:84
@ Host
Pinned CPU-side memory.
@ Device
GPU-side memory.
std::set< MemoryPressureHandler > g_memoryPressureHandlers
List of handlers for low memory registered by various subsystems.
Definition: scopehal.cpp:147
Declaration of AlignedAllocator.
Declaration of QueueManager and QueueHandle.
Definition: AcceleratorBuffer.h:99
A buffer of memory which may be used by GPU acceleration.
Definition: AcceleratorBuffer.h:158
void CopyToGpu()
Copy the buffer contents from CPU to GPU and blocks until the transfer completes.
Definition: AcceleratorBuffer.h:1012
std::unique_ptr< vk::raii::Buffer > m_gpuBuffer
Buffer object for GPU-side memory.
Definition: AcceleratorBuffer.h:270
bool empty() const
Returns true if the container is empty.
Definition: AcceleratorBuffer.h:391
T * m_cpuPtr
CPU-side mapped pointer.
Definition: AcceleratorBuffer.h:258
void pop_front()
Removes the first item in the container.
Definition: AcceleratorBuffer.h:804
__attribute__((noinline)) void CopyFrom(const AcceleratorBuffer< T > &rhs)
Copies our content from another AcceleratorBuffer.
Definition: AcceleratorBuffer.h:502
void push_back(const T &value)
Adds a new element to the end of the container, allocating space if needed.
Definition: AcceleratorBuffer.h:752
bool IsCpuBufferStale() const
Returns true if the CPU-side buffer is stale.
Definition: AcceleratorBuffer.h:397
vk::DescriptorBufferInfo GetBufferInfo()
Returns a vk::DescriptorBufferInfo suitable for binding this object to.
Definition: AcceleratorBuffer.h:446
std::unique_ptr< vk::raii::DeviceMemory > m_cpuPhysMem
CPU-side physical memory.
Definition: AcceleratorBuffer.h:261
void MarkModifiedFromGpu()
Marks the GPU-side copy of the buffer as modified.
Definition: AcceleratorBuffer.h:895
size_t m_size
Size of the memory actually being used.
Definition: AcceleratorBuffer.h:296
bool IsReachableFromCpu(MemoryType mt)
Returns true if the given buffer type can be reached from the CPU.
Definition: AcceleratorBuffer.h:227
bool IsSingleSharedBuffer() const
Returns true if the object contains only a single buffer.
Definition: AcceleratorBuffer.h:421
bool HasCpuBuffer() const
Returns true if there is currently a CPU-side buffer.
Definition: AcceleratorBuffer.h:409
void MarkModifiedFromCpu()
Marks the CPU-side copy of the buffer as modified.
Definition: AcceleratorBuffer.h:884
std::unique_ptr< vk::raii::DeviceMemory > m_gpuPhysMem
GPU-side physical memory.
Definition: AcceleratorBuffer.h:264
void PrepareForCpuAccess()
Prepares the buffer to be accessed from the CPU.
Definition: AcceleratorBuffer.h:909
size_t size() const
Returns the actual size of the container (may be smaller than what was allocated)
Definition: AcceleratorBuffer.h:357
void SetGpuAccessHint(UsageHint hint, bool reallocateImmediately=false)
Sets a hint to the buffer on how often we expect to use it on the GPU in the future.
Definition: AcceleratorBuffer.h:864
static void HostToDeviceTransferMemoryBarrier(vk::raii::CommandBuffer &cmdBuf)
Adds a memory barrier for transferring data from host to device.
Definition: AcceleratorBuffer.h:1061
bool m_gpuPhysMemIsStale
True if m_gpuPhysMem contains stale data (m_cpuPtr has been modified and they point to different memo...
Definition: AcceleratorBuffer.h:279
bool IsReachableFromGpu(MemoryType mt)
Returns true if the given buffer type can be reached from the GPU.
Definition: AcceleratorBuffer.h:233
vk::Buffer GetBuffer()
Returns the preferred buffer for GPU-side access.
Definition: AcceleratorBuffer.h:429
void pop_back()
Removes the last item in the container.
Definition: AcceleratorBuffer.h:764
size_t GetCpuMemoryBytes() const
Returns the total reserved CPU memory, in bytes.
Definition: AcceleratorBuffer.h:369
void resize(size_t size)
Change the usable size of the container.
Definition: AcceleratorBuffer.h:457
size_t capacity() const
Returns the allocated size of the container.
Definition: AcceleratorBuffer.h:363
bool m_buffersAreSame
True if we have only one piece of physical memory accessible from both sides.
Definition: AcceleratorBuffer.h:273
void reserve(size_t size)
Reallocates buffers so that at least size elements of storage are available.
Definition: AcceleratorBuffer.h:484
UsageHint m_gpuAccessHint
Hint about how likely future GPU access is.
Definition: AcceleratorBuffer.h:313
MemoryType m_cpuMemoryType
Type of the CPU-side buffer.
Definition: AcceleratorBuffer.h:249
bool IsFastFromGpu(MemoryType mt)
Returns true if the given buffer type is fast to access from the GPU.
Definition: AcceleratorBuffer.h:245
void PrepareForGpuAccess(bool outputOnly=false)
Prepares the buffer to be accessed from the GPU.
Definition: AcceleratorBuffer.h:931
std::unique_ptr< vk::raii::Buffer > m_cpuBuffer
Buffer object for CPU-side memory.
Definition: AcceleratorBuffer.h:267
MemoryType m_gpuMemoryType
Type of the GPU-side buffer.
Definition: AcceleratorBuffer.h:252
__attribute__((noinline)) void UpdateGpuNames()
Pushes our friendly name to the underlying Vulkan objects.
Definition: AcceleratorBuffer.h:1426
void SetCpuAccessHint(UsageHint hint, bool reallocateImmediately=false)
Sets a hint to the buffer on how often we expect to use it on the CPU in the future.
Definition: AcceleratorBuffer.h:850
void FreeCpuBuffer()
Free the CPU-side buffer and underlying physical memory.
Definition: AcceleratorBuffer.h:1083
void PrepareForGpuAccessNonblocking(bool outputOnly, vk::raii::CommandBuffer &cmdBuf)
Prepares the buffer to be accessed from the GPU.
Definition: AcceleratorBuffer.h:961
__attribute__((noinline)) void UpdateCpuNames()
Pushes our friendly name to the underlying Vulkan objects.
Definition: AcceleratorBuffer.h:1453
bool HasGpuBuffer() const
Returns true if there is currently a GPU-side buffer.
Definition: AcceleratorBuffer.h:415
void push_front(const T &value)
Inserts a new item at the beginning of the container. This is inefficient due to copying.
Definition: AcceleratorBuffer.h:775
void CopyToGpuNonblocking(vk::raii::CommandBuffer &cmdBuf)
Copy the buffer contents from CPU to GPU without blocking on the CPU.
Definition: AcceleratorBuffer.h:1036
size_t GetGpuMemoryBytes() const
Returns the total reserved GPU memory, in bytes.
Definition: AcceleratorBuffer.h:380
int m_tempFileHandle
File handle used for MEM_TYPE_CPU_PAGED.
Definition: AcceleratorBuffer.h:283
void shrink_to_fit()
Frees unused memory so that m_size == m_capacity.
Definition: AcceleratorBuffer.h:493
size_t m_capacity
Size of the allocated memory space (may be larger than m_size)
Definition: AcceleratorBuffer.h:293
void FreeGpuBuffer(bool dataLossOK=false)
Free the GPU-side buffer and underlying physical memory.
Definition: AcceleratorBuffer.h:1121
bool IsFastFromCpu(MemoryType mt)
Returns true if the given buffer type is fast to access from the CPU.
Definition: AcceleratorBuffer.h:239
void CopyToCpu()
Copy the buffer contents from GPU to CPU and blocks until the transfer completes.
Definition: AcceleratorBuffer.h:991
__attribute__((noinline)) void FreeCpuPointer(T *ptr
Frees a CPU-side buffer.
AcceleratorBuffer(const std::string &name="")
Creates a new AcceleratorBuffer with no content.
Definition: AcceleratorBuffer.h:322
MemoryAttributes
Attributes that a memory buffer can have.
Definition: AcceleratorBuffer.h:175
bool m_cpuPhysMemIsStale
True if m_cpuPtr contains stale data (m_gpuPhysMem has been modified and they point to different memo...
Definition: AcceleratorBuffer.h:276
__attribute__((noinline)) void AllocateCpuBuffer(size_t size)
Allocates a buffer for CPU access.
Definition: AcceleratorBuffer.h:1150
__attribute__((noinline)) void Reallocate(size_t size)
Reallocates the buffer so that it contains exactly size elements.
Definition: AcceleratorBuffer.h:548
UsageHint m_cpuAccessHint
Hint about how likely future CPU access is.
Definition: AcceleratorBuffer.h:310
MemoryType
Types of memory buffer.
Definition: AcceleratorBuffer.h:193
T * GetCpuPointer()
Gets a pointer to the CPU-side buffer.
Definition: AcceleratorBuffer.h:440
void SetName(std::string name)
Sets the debug name for this buffer.
Definition: AcceleratorBuffer.h:1487
void clear()
Resize the container to be empty (but don't free memory)
Definition: AcceleratorBuffer.h:478
bool IsGpuBufferStale() const
Returns true if the GPU-side buffer is stale.
Definition: AcceleratorBuffer.h:403
void deallocate(T *const p, const size_t unused) const
Free a block of memory.
Definition: AlignedAllocator.h:194
T * allocate(size_t n) const
Allocate a block of memory.
Definition: AlignedAllocator.h:159
A unit of measurement, plus conversion to pretty-printed output.
Definition: Unit.h:57
uint32_t g_vkPinnedMemoryType
Vulkan memory type for CPU-based memory that is also GPU-readable.
Definition: VulkanInit.cpp:118
std::mutex g_vkTransferMutex
Mutex for interlocking access to g_vkTransferCommandBuffer and g_vkTransferCommandPool.
Definition: VulkanInit.cpp:112
bool g_hasDebugUtils
Indicates whether the VK_EXT_debug_utils extension is available.
Definition: VulkanInit.cpp:195
std::shared_ptr< vk::raii::Device > g_vkComputeDevice
The Vulkan device selected for compute operations (may or may not be same device as rendering)
Definition: VulkanInit.cpp:71
std::unique_ptr< vk::raii::CommandBuffer > g_vkTransferCommandBuffer
Command buffer for AcceleratorBuffer transfers.
Definition: VulkanInit.cpp:89
std::shared_ptr< QueueHandle > g_vkTransferQueue
Queue for AcceleratorBuffer transfers.
Definition: VulkanInit.cpp:98
uint32_t g_vkLocalMemoryType
Vulkan memory type for GPU-based memory (generally not CPU-readable, except on unified memory systems...
Definition: VulkanInit.cpp:124
bool g_vulkanDeviceHasUnifiedMemory
Indicates whether the Vulkan device is unified memory.
Definition: VulkanInit.cpp:220