devdocs/AcceleratorBuffer_8h_source.html

/***********************************************************************************************************************

*                                                                                                                      *

* libscopehal                                                                                                          *

*                                                                                                                      *

* Copyright (c) 2012-2024 Andrew D. Zonenberg and contributors                                                         *

* All rights reserved.                                                                                                 *

*                                                                                                                      *

* Redistribution and use in source and binary forms, with or without modification, are permitted provided that the     *

* following conditions are met:                                                                                        *

*                                                                                                                      *

*    * Redistributions of source code must retain the above copyright notice, this list of conditions, and the         *

*      following disclaimer.                                                                                           *

*                                                                                                                      *

*    * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the       *

*      following disclaimer in the documentation and/or other materials provided with the distribution.                *

*                                                                                                                      *

*    * Neither the name of the author nor the names of any contributors may be used to endorse or promote products     *

*      derived from this software without specific prior written permission.                                           *

*                                                                                                                      *

* THIS SOFTWARE IS PROVIDED BY THE AUTHORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED   *

* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL *

* THE AUTHORS BE HELD LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES        *

* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR       *

* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT *

* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE       *

* POSSIBILITY OF SUCH DAMAGE.                                                                                          *

*                                                                                                                      *

***********************************************************************************************************************/


#ifndef AcceleratorBuffer_h

#define AcceleratorBuffer_h


#include "AlignedAllocator.h"

#include "QueueManager.h"


#ifdef _WIN32

#undef MemoryBarrier

#endif


#ifndef _WIN32

#include <sys/mman.h>

#include <unistd.h>

#endif


#include <type_traits>


extern uint32_t g_vkPinnedMemoryType;

extern uint32_t g_vkLocalMemoryType;

extern std::shared_ptr<vk::raii::Device> g_vkComputeDevice;

extern std::unique_ptr<vk::raii::CommandBuffer> g_vkTransferCommandBuffer;

extern std::shared_ptr<QueueHandle> g_vkTransferQueue;

extern std::mutex g_vkTransferMutex;


extern bool g_hasDebugUtils;

extern bool g_vulkanDeviceHasUnifiedMemory;


template<class T>

class AcceleratorBuffer;


enum class MemoryPressureLevel

{

    Hard,


    Soft

};


enum class MemoryPressureType

{

    Host,


    Device

};


typedef bool (*MemoryPressureHandler)(MemoryPressureLevel level, MemoryPressureType type, size_t requestedSize);


bool OnMemoryPressure(MemoryPressureLevel level, MemoryPressureType type, size_t requestedSize);


template<class T>

class AcceleratorBufferIterator

{

public:

    using value_type = T;

    using iterator_category = std::forward_iterator_tag;

    using difference_type = std::ptrdiff_t;

    using pointer = T*;

    using reference = T&;


    AcceleratorBufferIterator(AcceleratorBuffer<T>& buf, size_t i)

    : m_index(i)

    , m_buf(buf)

    {}


    T& operator*()

    { return m_buf[m_index]; }


    size_t GetIndex() const

    { return m_index; }


    bool operator!=(AcceleratorBufferIterator<T>& it)

    {

        //TODO: should we check m_buf equality too?

        //Will slow things down, but be more semantically correct. Does anything care?

        return (m_index != it.m_index);

    }


    AcceleratorBufferIterator<T>& operator++()

    {

        m_index ++;

        return *this;

    }


protected:

    size_t m_index;

    AcceleratorBuffer<T>& m_buf;

};


template<class T>

std::ptrdiff_t operator-(const AcceleratorBufferIterator<T>& a, const AcceleratorBufferIterator<T>& b)

{ return a.GetIndex() - b.GetIndex(); }


template<class T>

class AcceleratorBuffer

{

protected:


    // Allocator for CPU-only memory


    AlignedAllocator<T, 32> m_cpuAllocator;


public:


    // Buffer types


    enum MemoryAttributes

    {

        //Location of the memory

        MEM_ATTRIB_CPU_SIDE         = 0x1,

        MEM_ATTRIB_GPU_SIDE         = 0x2,


        //Reachability

        MEM_ATTRIB_CPU_REACHABLE    = 0x4,

        MEM_ATTRIB_GPU_REACHABLE    = 0x8,


        //Speed

        MEM_ATTRIB_CPU_FAST         = 0x10,

        MEM_ATTRIB_GPU_FAST         = 0x20

    };


    enum MemoryType

    {

        //Pointer is invalid

        MEM_TYPE_NULL = 0,


        //Memory is located on the CPU but backed by a file and may get paged out

        MEM_TYPE_CPU_PAGED =

            MEM_ATTRIB_CPU_SIDE | MEM_ATTRIB_CPU_REACHABLE,


        //Memory is located on the CPU but not pinned, or otherwise accessible to the GPU

        MEM_TYPE_CPU_ONLY =

            MEM_ATTRIB_CPU_SIDE | MEM_ATTRIB_CPU_REACHABLE | MEM_ATTRIB_CPU_FAST,


        //Memory is located on the CPU, but can be accessed by the GPU.

        //Fast to access from the CPU, but accesses from the GPU require PCIe DMA and is slow

        //(unless platform uses unified memory, in which case g_vulkanDeviceHasUnifiedMemory will be true)

        MEM_TYPE_CPU_DMA_CAPABLE =

            MEM_ATTRIB_CPU_SIDE | MEM_ATTRIB_CPU_REACHABLE | MEM_ATTRIB_CPU_FAST | MEM_ATTRIB_GPU_REACHABLE,


        //Memory is located on the GPU and cannot be directly accessed by the CPU

        MEM_TYPE_GPU_ONLY =

            MEM_ATTRIB_GPU_SIDE | MEM_ATTRIB_GPU_REACHABLE | MEM_ATTRIB_GPU_FAST,


        //Memory is located on the GPU, but can be accessed by the CPU.

        //Fast to access from the GPU, but accesses from the CPU require PCIe DMA and is slow

        //(should not be used if platform uses unified memory, in which case g_vulkanDeviceHasUnifiedMemory will be true)

        MEM_TYPE_GPU_DMA_CAPABLE =

            MEM_ATTRIB_GPU_SIDE | MEM_ATTRIB_GPU_REACHABLE | MEM_ATTRIB_GPU_FAST | MEM_ATTRIB_CPU_REACHABLE

    };


protected:


    bool IsReachableFromCpu(MemoryType mt)

    { return (mt & MEM_ATTRIB_CPU_REACHABLE) != 0; }


    bool IsReachableFromGpu(MemoryType mt)

    { return (mt & MEM_ATTRIB_GPU_REACHABLE) != 0; }


    bool IsFastFromCpu(MemoryType mt)

    { return (mt & MEM_ATTRIB_CPU_FAST) != 0; }


    bool IsFastFromGpu(MemoryType mt)

    { return (mt & MEM_ATTRIB_GPU_FAST) != 0; }


    MemoryType m_cpuMemoryType;


    MemoryType m_gpuMemoryType;


    // The actual memory buffers


    T* m_cpuPtr;


    std::unique_ptr<vk::raii::DeviceMemory> m_cpuPhysMem;


    std::unique_ptr<vk::raii::DeviceMemory> m_gpuPhysMem;


    std::unique_ptr<vk::raii::Buffer> m_cpuBuffer;


    std::unique_ptr<vk::raii::Buffer> m_gpuBuffer;


    bool m_buffersAreSame;


    bool m_cpuPhysMemIsStale;


    bool m_gpuPhysMemIsStale;


#ifndef _WIN32

    int m_tempFileHandle;

#endif


    // Iteration


    // Sizes of buffers


    size_t m_capacity;


    size_t m_size;


    // Hint configuration

public:

    enum UsageHint

    {

        HINT_NEVER,

        HINT_UNLIKELY,

        HINT_LIKELY

    };


protected:

    UsageHint m_cpuAccessHint;


    UsageHint m_gpuAccessHint;


    // Construction / destruction

public:


    AcceleratorBuffer(const std::string& name = "")

        : m_cpuMemoryType(MEM_TYPE_NULL)

        , m_gpuMemoryType(MEM_TYPE_NULL)

        , m_cpuPtr(nullptr)

        , m_gpuPhysMem(nullptr)

        , m_buffersAreSame(false)

        , m_cpuPhysMemIsStale(false)

        , m_gpuPhysMemIsStale(false)

        #ifndef _WIN32

        , m_tempFileHandle(0)

        #endif

        , m_capacity(0)

        , m_size(0)

        , m_cpuAccessHint(HINT_LIKELY)  //default access hint: CPU-side pinned memory

        , m_gpuAccessHint(HINT_UNLIKELY)

        , m_name(name)

    {

        //non-trivially-copyable types can't be copied to GPU except on unified memory platforms

        if(!std::is_trivially_copyable<T>::value && !g_vulkanDeviceHasUnifiedMemory)

            m_gpuAccessHint = HINT_NEVER;

    }


    ~AcceleratorBuffer()

    {

        FreeCpuBuffer();

        FreeGpuBuffer(true);

    }


    // General accessors

public:


    size_t size() const

    { return m_size; }


    size_t capacity() const

    { return m_capacity; }


    size_t GetCpuMemoryBytes() const

    {

        if(m_cpuMemoryType == MEM_TYPE_NULL)

            return 0;

        else

            return m_capacity * sizeof(T);

    }


    size_t GetGpuMemoryBytes() const

    {

        if(m_gpuMemoryType == MEM_TYPE_NULL)

            return 0;

        else

            return m_capacity * sizeof(T);

    }


    bool empty() const

    { return (m_size == 0); }


    bool IsCpuBufferStale() const

    { return m_cpuPhysMemIsStale; }


    bool IsGpuBufferStale() const

    { return m_gpuPhysMemIsStale; }


    bool HasCpuBuffer() const

    { return (m_cpuPtr != nullptr); }


    bool HasGpuBuffer() const

    { return (m_gpuPhysMem != nullptr); }


    bool IsSingleSharedBuffer() const

    { return m_buffersAreSame; }


    vk::Buffer GetBuffer()

    {

        if(m_gpuBuffer != nullptr)

            return **m_gpuBuffer;

        else

            return **m_cpuBuffer;

    }


    T* GetCpuPointer()

    { return m_cpuPtr; }


    vk::DescriptorBufferInfo GetBufferInfo()

    {

        return vk::DescriptorBufferInfo(

            GetBuffer(),

            0,

            m_size * sizeof(T));

    }


    void resize(size_t size)

    {

        //Need to grow?

        if(size > m_capacity)

        {

            //Default to doubling in size each time to avoid excessive copying.

            if(m_capacity == 0)

                reserve(size);

            else if(size > m_capacity*2)

                reserve(size);

            else

                reserve(m_capacity * 2);

        }


        //Update our size

        m_size = size;

    }


    void clear()

    { resize(0); }


    void reserve(size_t size)

    {

        if(size >= m_capacity)

            Reallocate(size);

    }


    void shrink_to_fit()

    {

        if(m_size != m_capacity)

            Reallocate(m_size);

    }


     __attribute__((noinline))

    void CopyFrom(const AcceleratorBuffer<T>& rhs)

    {

        //Copy placement hints from the other instance, then resize to match

        SetCpuAccessHint(rhs.m_cpuAccessHint);

        SetGpuAccessHint(rhs.m_gpuAccessHint, true);

        resize(rhs.m_size);


        //Valid data CPU side? Copy it to here

        if(rhs.HasCpuBuffer() && !rhs.m_cpuPhysMemIsStale)

        {

            //non-trivially-copyable types have to be copied one at a time

            if(!std::is_trivially_copyable<T>::value)

            {

                for(size_t i=0; i<m_size; i++)

                    m_cpuPtr[i] = rhs.m_cpuPtr[i];

            }


            //Trivially copyable types can be done more efficiently in a block

            else

                memcpy(m_cpuPtr, rhs.m_cpuPtr, m_size * sizeof(T));

        }

        m_cpuPhysMemIsStale = rhs.m_cpuPhysMemIsStale;


        //Valid data GPU side? Copy it to here

        if(rhs.HasGpuBuffer() && !rhs.m_gpuPhysMemIsStale)

        {

            std::lock_guard<std::mutex> lock(g_vkTransferMutex);


            //Make the transfer request

            g_vkTransferCommandBuffer->begin({});

            vk::BufferCopy region(0, 0, m_size * sizeof(T));

            g_vkTransferCommandBuffer->copyBuffer(**rhs.m_gpuBuffer, **m_gpuBuffer, {region});

            g_vkTransferCommandBuffer->end();


            //Submit the request and block until it completes

            g_vkTransferQueue->SubmitAndBlock(*g_vkTransferCommandBuffer);

        }

        m_gpuPhysMemIsStale = rhs.m_gpuPhysMemIsStale;

    }


protected:


    __attribute__((noinline))

    void Reallocate(size_t size)

    {

        if(size == 0)

            return;


        /*

            If we are a bool[] or similar one-byte type, we are likely going to be accessed from the GPU via a uint32

            descriptor for at least some shaders (such as rendering).


            Round our actual allocated size to the next multiple of 4 bytes. The padding values are unimportant as the

            bytes are never written, and the data read from the high bytes in the uint32 is discarded by the GPU.

            We just need to ensure the memory is allocated so the 32-bit read is legal to perform.

         */

        if( (sizeof(T) == 1) && (m_gpuAccessHint != HINT_NEVER) )

        {

            if(size & 3)

                size = (size | 3) + 1;

        }


        //If we do not anticipate using the data on the CPU, we shouldn't waste RAM.

        //Allocate a GPU-local buffer, copy data to it, then free the CPU-side buffer

        //Don't do this if the platform has unified memory

        if( (m_cpuAccessHint == HINT_NEVER) && !g_vulkanDeviceHasUnifiedMemory)

        {

            PrepareForGpuAccess();

            FreeCpuBuffer();

        }


        else

        {

            //Resize CPU memory

            //TODO: optimization, when expanding a MEM_TYPE_CPU_PAGED we can just enlarge the file

            //and not have to make a new temp file and copy the content

            if(m_cpuPtr != nullptr)

            {

                //Save the old pointer

                auto pOld = m_cpuPtr;

                auto pOldPin = std::move(m_cpuPhysMem);

                auto type = m_cpuMemoryType;


                //Allocate the new buffer

                AllocateCpuBuffer(size);


                //If CPU-side data is valid, copy existing data over.

                //New pointer is still valid in this case.

                if(!m_cpuPhysMemIsStale)

                {

                    //non-trivially-copyable types have to be copied one at a time

                    if(!std::is_trivially_copyable<T>::value)

                    {

                        for(size_t i=0; i<m_size; i++)

                            m_cpuPtr[i] = std::move(pOld[i]);

                    }


                    //Trivially copyable types can be done more efficiently in a block

                    //gcc warns about this even though we only call this code if the type is trivially copyable,

                    //so disable the warning.

                    else

                    {

                        #pragma GCC diagnostic push

                        #pragma GCC diagnostic ignored "-Wclass-memaccess"


                        memcpy(m_cpuPtr, pOld, m_size * sizeof(T));


                        #pragma GCC diagnostic pop

                    }

                }


                //If CPU-side data is stale, just allocate the new buffer but leave it as stale

                //(don't do a potentially unnecessary copy from the GPU)


                //Now we're done with the old pointer so get rid of it

                FreeCpuPointer(pOld, pOldPin, type, m_capacity);

            }


            //Allocate new CPU memory, replacing our current (null) pointer

            else

            {

                AllocateCpuBuffer(size);


                //If we already had GPU-side memory containing data, then the new CPU-side buffer is stale

                //until we copy stuff over to it

                if(m_gpuPhysMem != nullptr)

                    m_cpuPhysMemIsStale = true;

            }

        }


        //We're expecting to use data on the GPU, so prepare to do stuff with it

        if(m_gpuAccessHint != HINT_NEVER)

        {

            //If GPU access is unlikely, we probably want to just use pinned memory.

            //If available, mark buffers as the same, and free any existing GPU buffer we might have

            //Always use pinned memory if the platform has unified memory

            if( ((m_gpuAccessHint == HINT_UNLIKELY) && (m_cpuMemoryType == MEM_TYPE_CPU_DMA_CAPABLE)) || g_vulkanDeviceHasUnifiedMemory )

                FreeGpuBuffer();


            //Nope, we need to allocate dedicated GPU memory

            else

            {

                //If we have an existing buffer with valid content, save it and copy content over

                if( (m_gpuPhysMem != nullptr) && !m_gpuPhysMemIsStale && (m_size != 0))

                {

                    auto pOld = std::move(m_gpuPhysMem);

                    //auto type = m_gpuMemoryType;

                    auto bOld = std::move(m_gpuBuffer);


                    AllocateGpuBuffer(size);


                    std::lock_guard<std::mutex> lock(g_vkTransferMutex);


                    //Make the transfer request

                    g_vkTransferCommandBuffer->begin({});

                    vk::BufferCopy region(0, 0, m_size * sizeof(T));

                    g_vkTransferCommandBuffer->copyBuffer(**bOld, **m_gpuBuffer, {region});

                    g_vkTransferCommandBuffer->end();


                    //Submit the request and block until it completes

                    g_vkTransferQueue->SubmitAndBlock(*g_vkTransferCommandBuffer);


                    //make sure buffer is freed before underlying physical memory (pOld) goes out of scope

                    bOld = nullptr;

                }


                //Nope, just allocate a new buffer

                else

                {

                    AllocateGpuBuffer(size);


                    //If we already had CPU-side memory containing data, then the new GPU-side buffer is stale

                    //until we copy stuff over to it.

                    //Special case: if m_size is 0 (newly allocated buffer) we're not stale yet

                    if( (m_cpuPhysMem != nullptr) && (m_size != 0) )

                        m_gpuPhysMemIsStale = true;

                }

            }

        }


        //Existing GPU buffer we never expect to use again - needs to be freed

        else if(m_gpuPhysMem != nullptr)

            FreeGpuBuffer();


        //We are never going to use the buffer on the GPU, but don't have any existing GPU memory

        //so no action required

        else

        {

        }


        //Update our capacity

        m_capacity = size;


        //If we have a pinned buffer and nothing on the other side, there's a single shared physical memory region

        m_buffersAreSame =

            ( (m_cpuMemoryType == MEM_TYPE_CPU_DMA_CAPABLE) && (m_gpuMemoryType == MEM_TYPE_NULL) ) ||

            ( (m_cpuMemoryType == MEM_TYPE_NULL) && (m_gpuMemoryType == MEM_TYPE_GPU_DMA_CAPABLE) );

    }


    // CPU-side STL-esque container API


    //PrepareForCpuAccess() *must* be called prior to calling any of these methods.

public:


    const T& operator[](size_t i) const

    { return m_cpuPtr[i]; }


    T& operator[](size_t i)

    { return m_cpuPtr[i]; }


    void push_back(const T& value)

    {

        size_t cursize = m_size;

        resize(m_size + 1);

        m_cpuPtr[cursize] = value;


        MarkModifiedFromCpu();

    }


    void pop_back()

    {

        if(!empty())

            resize(m_size - 1);

    }


    void push_front(const T& value)

    {

        size_t cursize = m_size;

        resize(m_size + 1);


        PrepareForCpuAccess();


        //non-trivially-copyable types have to be copied one at a time

        if(!std::is_trivially_copyable<T>::value)

        {

            for(size_t i=0; i<cursize; i++)

                m_cpuPtr[i+1] = std::move(m_cpuPtr[i]);

        }


        //Trivially copyable types can be done more efficiently in a block

        else

            memmove(m_cpuPtr+1, m_cpuPtr, sizeof(T) * (cursize));


        //Insert the new first element

        m_cpuPtr[0] = value;


        MarkModifiedFromCpu();

    }


    void pop_front()

    {

        //No need to move data if popping last element

        if(m_size == 1)

        {

            clear();

            return;

        }


        //Don't touch GPU side buffer


        PrepareForCpuAccess();


        //non-trivially-copyable types have to be copied one at a time

        if(!std::is_trivially_copyable<T>::value)

        {

            for(size_t i=0; i<m_size-1; i++)

                m_cpuPtr[i] = std::move(m_cpuPtr[i+1]);

        }


        //Trivially copyable types can be done more efficiently in a block

        else

            memmove(m_cpuPtr, m_cpuPtr+1, sizeof(T) * (m_size-1));


        resize(m_size - 1);


        MarkModifiedFromCpu();

    }


    AcceleratorBufferIterator<T> begin()

    { return AcceleratorBufferIterator<T>(*this, 0); }


    AcceleratorBufferIterator<T> end()

    { return AcceleratorBufferIterator<T>(*this, m_size); }


    // Hints about near-future usage patterns


public:


    void SetCpuAccessHint(UsageHint hint, bool reallocateImmediately = false)

    {

        m_cpuAccessHint = hint;


        if(reallocateImmediately && (m_size != 0))

            Reallocate(m_size);

    }


    void SetGpuAccessHint(UsageHint hint, bool reallocateImmediately = false)

    {

        //Only trivially copyable datatypes are allowed on the GPU

        if(!std::is_trivially_copyable<T>::value)

            hint = HINT_NEVER;


        m_gpuAccessHint = hint;


        if(reallocateImmediately && (m_size != 0))

            Reallocate(m_size);

    }


    // Cache invalidation


    void MarkModifiedFromCpu()

    {

        if(!m_buffersAreSame)

            m_gpuPhysMemIsStale = true;

    }


    void MarkModifiedFromGpu()

    {

        if(!m_buffersAreSame)

            m_cpuPhysMemIsStale = true;

    }


    // Preparation for access


    void PrepareForCpuAccess()

    {

        //Early out if no content

        if(m_size == 0)

            return;


        //If there's no buffer at all on the CPU, allocate one

        if(!HasCpuBuffer() && (m_gpuMemoryType != MEM_TYPE_GPU_DMA_CAPABLE))

            AllocateCpuBuffer(m_capacity);


        if(m_cpuPhysMemIsStale)

            CopyToCpu();

    }


    void PrepareForGpuAccess(bool outputOnly = false)

    {

        //Early out if no content or if unified memory

        if(m_size == 0 || g_vulkanDeviceHasUnifiedMemory)

            return;


        //If our current hint has no GPU access at all, update to say "unlikely" and reallocate

        if(m_gpuAccessHint == HINT_NEVER)

            SetGpuAccessHint(HINT_UNLIKELY, true);


        //If we don't have a buffer, allocate one unless our CPU buffer is pinned and GPU-readable

        if(!HasGpuBuffer() && (m_cpuMemoryType != MEM_TYPE_CPU_DMA_CAPABLE) )

            AllocateGpuBuffer(m_capacity);


        //Make sure the GPU-side buffer is up to date

        if(m_gpuPhysMemIsStale && !outputOnly)

            CopyToGpu();

    }


    void PrepareForGpuAccessNonblocking(bool outputOnly, vk::raii::CommandBuffer& cmdBuf)

    {

        //Early out if no content or if unified memory

        if(m_size == 0 || g_vulkanDeviceHasUnifiedMemory)

            return;


        //If our current hint has no GPU access at all, update to say "unlikely" and reallocate

        if(m_gpuAccessHint == HINT_NEVER)

            SetGpuAccessHint(HINT_UNLIKELY, true);


        //If we don't have a buffer, allocate one unless our CPU buffer is pinned and GPU-readable

        if(!HasGpuBuffer() && (m_cpuMemoryType != MEM_TYPE_CPU_DMA_CAPABLE) )

            AllocateGpuBuffer(m_capacity);


        //Make sure the GPU-side buffer is up to date

        if(m_gpuPhysMemIsStale && !outputOnly)

            CopyToGpuNonblocking(cmdBuf);

    }


protected:


    // Copying of buffer content


    void CopyToCpu()

    {

        assert(std::is_trivially_copyable<T>::value);


        std::lock_guard<std::mutex> lock(g_vkTransferMutex);


        //Make the transfer request

        g_vkTransferCommandBuffer->begin({});

        vk::BufferCopy region(0, 0, m_size * sizeof(T));

        g_vkTransferCommandBuffer->copyBuffer(**m_gpuBuffer, **m_cpuBuffer, {region});

        g_vkTransferCommandBuffer->end();


        //Submit the request and block until it completes

        g_vkTransferQueue->SubmitAndBlock(*g_vkTransferCommandBuffer);


        m_cpuPhysMemIsStale = false;

    }


    void CopyToGpu()

    {

        assert(std::is_trivially_copyable<T>::value);


        std::lock_guard<std::mutex> lock(g_vkTransferMutex);


        //Make the transfer request

        g_vkTransferCommandBuffer->begin({});

        vk::BufferCopy region(0, 0, m_size * sizeof(T));

        g_vkTransferCommandBuffer->copyBuffer(**m_cpuBuffer, **m_gpuBuffer, {region});

        g_vkTransferCommandBuffer->end();


        //Submit the request and block until it completes

        g_vkTransferQueue->SubmitAndBlock(*g_vkTransferCommandBuffer);


        m_gpuPhysMemIsStale = false;

    }


    void CopyToGpuNonblocking(vk::raii::CommandBuffer& cmdBuf)

    {

        assert(std::is_trivially_copyable<T>::value);


        //Make the transfer request

        vk::BufferCopy region(0, 0, m_size * sizeof(T));

        cmdBuf.copyBuffer(**m_cpuBuffer, **m_gpuBuffer, {region});


        //Add the barrier

        cmdBuf.pipelineBarrier(

            vk::PipelineStageFlagBits::eTransfer,

            vk::PipelineStageFlagBits::eComputeShader,

            {},

            vk::MemoryBarrier(

                vk::AccessFlagBits::eTransferWrite,

                vk::AccessFlagBits::eShaderRead | vk::AccessFlagBits::eShaderWrite),

            {},

            {});


        m_gpuPhysMemIsStale = false;

    }

public:

    static void HostToDeviceTransferMemoryBarrier(vk::raii::CommandBuffer& cmdBuf)

    {

        cmdBuf.pipelineBarrier(

            vk::PipelineStageFlagBits::eTransfer,

            vk::PipelineStageFlagBits::eComputeShader,

            {},

            vk::MemoryBarrier(

                vk::AccessFlagBits::eTransferWrite,

                vk::AccessFlagBits::eShaderRead | vk::AccessFlagBits::eShaderWrite),

            {},

            {});

    }


protected:


    // Cleanup


    void FreeCpuBuffer()

    {

        //Early out if buffer is already null

        if(m_cpuPtr == nullptr)

            return;


        //We have a buffer on the GPU.

        //If it's stale, need to push our updated content there before freeing the CPU-side copy

        if( (m_gpuMemoryType != MEM_TYPE_NULL) && m_gpuPhysMemIsStale && !empty())

            CopyToGpu();


        //Free the Vulkan buffer object

        m_cpuBuffer = nullptr;


        //Free the buffer and unmap any memory

        FreeCpuPointer(m_cpuPtr, m_cpuPhysMem, m_cpuMemoryType, m_capacity);


        //Mark CPU-side buffer as empty

        m_cpuPtr = nullptr;

        m_cpuPhysMem = nullptr;

        m_cpuMemoryType = MEM_TYPE_NULL;

        m_buffersAreSame = false;


        //If we have no GPU-side buffer either, we're empty

        if(m_gpuMemoryType == MEM_TYPE_NULL)

        {

            m_size = 0;

            m_capacity = 0;

        }

    }


public:

    void FreeGpuBuffer(bool dataLossOK = false)

    {

        //Early out if buffer is already null

        if(m_gpuPhysMem == nullptr)

            return;


        //If we do NOT have a CPU-side buffer, we're deleting all of our data! Warn for now

        if( (m_cpuMemoryType == MEM_TYPE_NULL) && m_gpuPhysMemIsStale && !empty() && !dataLossOK)

        {

            LogWarning("Freeing a GPU buffer without any CPU backing, may cause data loss\n");

        }


        //If we have a CPU-side buffer, and it's stale, move our about-to-be-deleted content over before we free it

        if( (m_cpuMemoryType != MEM_TYPE_NULL) && m_cpuPhysMemIsStale && !empty() )

            CopyToCpu();


        m_gpuBuffer = nullptr;

        m_gpuPhysMem = nullptr;

        m_gpuMemoryType = MEM_TYPE_NULL;

    }


protected:


    // Allocation


    __attribute__((noinline))

    void AllocateCpuBuffer(size_t size)

    {

        if(size == 0)

            LogFatal("AllocateCpuBuffer with size zero (invalid)\n");


        //If any GPU access is expected, use pinned memory so we don't have to move things around

        if(m_gpuAccessHint != HINT_NEVER)

        {

            //Make a Vulkan buffer first

            vk::BufferCreateInfo bufinfo(

                {},

                size * sizeof(T),

                vk::BufferUsageFlagBits::eTransferSrc |

                    vk::BufferUsageFlagBits::eTransferDst |

                    vk::BufferUsageFlagBits::eStorageBuffer);

            m_cpuBuffer = std::make_unique<vk::raii::Buffer>(*g_vkComputeDevice, bufinfo);


            //Figure out actual memory requirements of the buffer

            //(may be rounded up from what we asked for)

            auto req = m_cpuBuffer->getMemoryRequirements();


            //Allocate the physical memory to back the buffer

            vk::MemoryAllocateInfo info(req.size, g_vkPinnedMemoryType);

            m_cpuPhysMem = std::make_unique<vk::raii::DeviceMemory>(*g_vkComputeDevice, info);


            //Map it and bind to the buffer

            m_cpuPtr = reinterpret_cast<T*>(m_cpuPhysMem->mapMemory(0, req.size));

            m_cpuBuffer->bindMemory(**m_cpuPhysMem, 0);


            //We now have pinned memory

            m_cpuMemoryType = MEM_TYPE_CPU_DMA_CAPABLE;


            if(g_hasDebugUtils)

                UpdateCpuNames();

        }


        //If frequent CPU access is expected, use normal host memory

        else if(m_cpuAccessHint == HINT_LIKELY)

        {

            m_cpuBuffer = nullptr;

            m_cpuMemoryType = MEM_TYPE_CPU_ONLY;

            m_cpuPtr = m_cpuAllocator.allocate(size);

        }


        //If infrequent CPU access is expected, use a memory mapped temporary file so it can be paged out to disk

        else

        {

            #ifdef _WIN32


                //On Windows, use normal memory for now

                //until we figure out how to do this there

                m_cpuBuffer = nullptr;

                m_cpuMemoryType = MEM_TYPE_CPU_ONLY;

                m_cpuPtr = m_cpuAllocator.allocate(size);


            #else


                m_cpuBuffer = nullptr;

                m_cpuMemoryType = MEM_TYPE_CPU_PAGED;


                //Make the temp file

                char fname[] = "/tmp/glscopeclient-tmpXXXXXX";

                m_tempFileHandle = mkstemp(fname);

                if(m_tempFileHandle < 0)

                {

                    LogError("Failed to create temporary file %s\n", fname);

                    abort();

                }


                //Resize it to our desired file size

                size_t bytesize = size * sizeof(T);

                if(0 != ftruncate(m_tempFileHandle, bytesize))

                {

                    LogError("Failed to resize temporary file %s\n", fname);

                    abort();

                }


                //Map it

                m_cpuPtr = reinterpret_cast<T*>(mmap(

                    nullptr,

                    bytesize,

                    PROT_READ | PROT_WRITE,

                    MAP_SHARED/* | MAP_UNINITIALIZED*/,

                    m_tempFileHandle,

                    0));

                if(m_cpuPtr == MAP_FAILED)

                {

                    LogError("Failed to map temporary file %s\n", fname);

                    perror("mmap failed: ");

                    abort();

                }

                m_cpuMemoryType = MEM_TYPE_CPU_PAGED;


                //Delete it (file will be removed by the OS after our active handle is closed)

                if(0 != unlink(fname))

                    LogWarning("Failed to unlink temporary file %s, file will remain after application terminates\n", fname);


            #endif

        }


        //Memory has been allocated. Call constructors iff type is not trivially copyable

        //(This is not exactly 1:1 with having a constructor, but hopefully good enough?)

        if(!std::is_trivially_copyable<T>::value)

        {

            for(size_t i=0; i<size; i++)

                new(m_cpuPtr +i) T;

        }

    }


    __attribute__((noinline))

    void FreeCpuPointer(T* ptr, MemoryType type, size_t size)

    {

        //Call destructors iff type is not trivially copyable

        if(!std::is_trivially_copyable<T>::value)

        {

            for(size_t i=0; i<size; i++)

                ptr[i].~T();

        }


        switch(type)

        {

            case MEM_TYPE_NULL:

                //legal no-op

                break;


            case MEM_TYPE_CPU_DMA_CAPABLE:

                LogFatal("FreeCpuPointer for MEM_TYPE_CPU_DMA_CAPABLE requires the vk::raii::DeviceMemory\n");

                break;


            case MEM_TYPE_CPU_PAGED:

                #ifndef _WIN32

                    munmap(ptr, size * sizeof(T));

                    close(m_tempFileHandle);

                    m_tempFileHandle = -1;

                #endif

                break;


            case MEM_TYPE_CPU_ONLY:

                m_cpuAllocator.deallocate(ptr, size);

                break;


            default:

                LogFatal("FreeCpuPointer: invalid type %x\n", type);

        }

    }


    __attribute__((noinline))

    void FreeCpuPointer(T* ptr, std::unique_ptr<vk::raii::DeviceMemory>& buf, MemoryType type, size_t size)

    {

        switch(type)

        {

            case MEM_TYPE_CPU_DMA_CAPABLE:

                buf->unmapMemory();

                break;


            default:

                FreeCpuPointer(ptr, type, size);

        }

    }


    __attribute__((noinline))

    void AllocateGpuBuffer(size_t size)

    {

        assert(std::is_trivially_copyable<T>::value);


        //Make a Vulkan buffer first

        vk::BufferCreateInfo bufinfo(

            {},

            size * sizeof(T),

            vk::BufferUsageFlagBits::eTransferSrc |

                vk::BufferUsageFlagBits::eTransferDst |

                vk::BufferUsageFlagBits::eStorageBuffer);

        m_gpuBuffer = std::make_unique<vk::raii::Buffer>(*g_vkComputeDevice, bufinfo);


        //Figure out actual memory requirements of the buffer

        //(may be rounded up from what we asked for)

        auto req = m_gpuBuffer->getMemoryRequirements();


        //Try to allocate the memory

        vk::MemoryAllocateInfo info(req.size, g_vkLocalMemoryType);

        try

        {

            //For now, always use local memory

            m_gpuPhysMem = std::make_unique<vk::raii::DeviceMemory>(*g_vkComputeDevice, info);

        }


        //Fallback path in case of low memory

        catch(vk::OutOfDeviceMemoryError& ex)

        {

            bool ok = false;

            while(!ok)

            {

                //Attempt to free memory and stop if we couldn't free more

                if(!OnMemoryPressure(MemoryPressureLevel::Hard, MemoryPressureType::Device, req.size))

                    break;


                try

                {

                    m_gpuPhysMem = std::make_unique<vk::raii::DeviceMemory>(*g_vkComputeDevice, info);

                    ok = true;

                }

                catch(vk::OutOfDeviceMemoryError& ex2)

                {

                    LogDebug("Allocation failed again\n");

                }

            }


            //Retry one more time.

            //If we OOM simultaneously in two threads, it's possible to have the second OnMemoryPressure call

            //return false because the first one already freed all it could. But we might have enough free to continue.

            if(!ok)

            {

                LogDebug("Final retry\n");

                try

                {

                    m_gpuPhysMem = std::make_unique<vk::raii::DeviceMemory>(*g_vkComputeDevice, info);

                    ok = true;

                }

                catch(vk::OutOfDeviceMemoryError& ex2)

                {

                    LogDebug("Allocation failed again\n");

                }

            }


            //If we get here, we couldn't allocate no matter what

            //TODO: Fall back to a CPU-side allocation

            if(!ok)

            {

                LogError(

                    "Failed to allocate %s of GPU memory despite our best efforts to reclaim space\n"

                    "This is unrecoverable (for now).\n",

                    Unit(Unit::UNIT_BYTES).PrettyPrint(req.size, 4).c_str());


                std::abort();

            }

        }

        m_gpuMemoryType = MEM_TYPE_GPU_ONLY;


        m_gpuBuffer->bindMemory(**m_gpuPhysMem, 0);


        if(g_hasDebugUtils)

            UpdateGpuNames();

    }


protected:


    std::string m_name;


    __attribute__((noinline))

    void UpdateGpuNames()

    {

        std::string sname = m_name;

        if(sname.empty())

            sname = "unnamed";

        std::string prefix = std::string("AcceleratorBuffer.") + sname + ".";


        std::string gpuBufName = prefix + "m_gpuBuffer";

        std::string gpuPhysName = prefix + "m_gpuPhysMem";


        g_vkComputeDevice->setDebugUtilsObjectNameEXT(

            vk::DebugUtilsObjectNameInfoEXT(

                vk::ObjectType::eBuffer,

                reinterpret_cast<uint64_t>(static_cast<VkBuffer>(**m_gpuBuffer)),

                gpuBufName.c_str()));


        g_vkComputeDevice->setDebugUtilsObjectNameEXT(

            vk::DebugUtilsObjectNameInfoEXT(

                vk::ObjectType::eDeviceMemory,

                reinterpret_cast<uint64_t>(static_cast<VkDeviceMemory>(**m_gpuPhysMem)),

                gpuPhysName.c_str()));

    }


    __attribute__((noinline))

    void UpdateCpuNames()

    {

        std::string sname = m_name;

        if(sname.empty())

            sname = "unnamed";

        std::string prefix = std::string("AcceleratorBuffer.") + sname + ".";


        std::string cpuBufName = prefix + "m_cpuBuffer";

        std::string cpuPhysName = prefix + "m_cpuPhysMem";


        g_vkComputeDevice->setDebugUtilsObjectNameEXT(

            vk::DebugUtilsObjectNameInfoEXT(

                vk::ObjectType::eBuffer,

                reinterpret_cast<uint64_t>(static_cast<VkBuffer>(**m_cpuBuffer)),

                cpuBufName.c_str()));


        g_vkComputeDevice->setDebugUtilsObjectNameEXT(

            vk::DebugUtilsObjectNameInfoEXT(

                vk::ObjectType::eDeviceMemory,

                reinterpret_cast<uint64_t>(static_cast<VkDeviceMemory>(**m_cpuPhysMem)),

                cpuPhysName.c_str()));

    }


public:


    void SetName(std::string name)

    {

        m_name = name;

        if(g_hasDebugUtils)

        {

            if(m_gpuBuffer != nullptr)

                UpdateGpuNames();

            if(m_cpuBuffer != nullptr)

                UpdateCpuNames();

        }

    }


};


extern std::set<MemoryPressureHandler> g_memoryPressureHandlers;


#endif

MemoryPressureHandler
bool(* MemoryPressureHandler)(MemoryPressureLevel level, MemoryPressureType type, size_t requestedSize)
Memory pressure handler type, called when free memory reaches a warning level or a Vulkan allocation ...
Definition: AcceleratorBuffer.h:93

MemoryPressureLevel
MemoryPressureLevel
Levels of memory pressure.
Definition: AcceleratorBuffer.h:67

MemoryPressureLevel::Hard
@ Hard
A memory allocation has failed and we need to free memory immediately to continue execution.

MemoryPressureLevel::Soft
@ Soft
Free memory has reached a warning threshold.

OnMemoryPressure
bool OnMemoryPressure(MemoryPressureLevel level, MemoryPressureType type, size_t requestedSize)
Called when we run low on memory.
Definition: scopehal.cpp:1023

MemoryPressureType
MemoryPressureType
Types of memory pressure.
Definition: AcceleratorBuffer.h:84

MemoryPressureType::Host
@ Host
Pinned CPU-side memory.

MemoryPressureType::Device
@ Device
GPU-side memory.

g_memoryPressureHandlers
std::set< MemoryPressureHandler > g_memoryPressureHandlers
List of handlers for low memory registered by various subsystems.
Definition: scopehal.cpp:143

AlignedAllocator.h
Declaration of AlignedAllocator.

QueueManager.h
Declaration of QueueManager and QueueHandle.

AcceleratorBufferIterator
Definition: AcceleratorBuffer.h:99

AcceleratorBuffer
A buffer of memory which may be used by GPU acceleration.
Definition: AcceleratorBuffer.h:158

AcceleratorBuffer::CopyToGpu
void CopyToGpu()
Copy the buffer contents from CPU to GPU and blocks until the transfer completes.
Definition: AcceleratorBuffer.h:974

AcceleratorBuffer::m_gpuBuffer
std::unique_ptr< vk::raii::Buffer > m_gpuBuffer
Buffer object for GPU-side memory.
Definition: AcceleratorBuffer.h:270

AcceleratorBuffer::empty
bool empty() const
Returns true if the container is empty.
Definition: AcceleratorBuffer.h:391

AcceleratorBuffer::m_cpuPtr
T * m_cpuPtr
CPU-side mapped pointer.
Definition: AcceleratorBuffer.h:258

AcceleratorBuffer::pop_front
void pop_front()
Removes the first item in the container.
Definition: AcceleratorBuffer.h:772

AcceleratorBuffer::__attribute__
__attribute__((noinline)) void CopyFrom(const AcceleratorBuffer< T > &rhs)
Copies our content from another AcceleratorBuffer.
Definition: AcceleratorBuffer.h:502

AcceleratorBuffer::push_back
void push_back(const T &value)
Adds a new element to the end of the container, allocating space if needed.
Definition: AcceleratorBuffer.h:720

AcceleratorBuffer::IsCpuBufferStale
bool IsCpuBufferStale() const
Returns true if the CPU-side buffer is stale.
Definition: AcceleratorBuffer.h:397

AcceleratorBuffer::GetBufferInfo
vk::DescriptorBufferInfo GetBufferInfo()
Returns a vk::DescriptorBufferInfo suitable for binding this object to.
Definition: AcceleratorBuffer.h:446

AcceleratorBuffer::m_cpuPhysMem
std::unique_ptr< vk::raii::DeviceMemory > m_cpuPhysMem
CPU-side physical memory.
Definition: AcceleratorBuffer.h:261

AcceleratorBuffer::MarkModifiedFromGpu
void MarkModifiedFromGpu()
Marks the GPU-side copy of the buffer as modified.
Definition: AcceleratorBuffer.h:863

AcceleratorBuffer::m_size
size_t m_size
Size of the memory actually being used.
Definition: AcceleratorBuffer.h:296

AcceleratorBuffer::IsReachableFromCpu
bool IsReachableFromCpu(MemoryType mt)
Returns true if the given buffer type can be reached from the CPU.
Definition: AcceleratorBuffer.h:227

AcceleratorBuffer::IsSingleSharedBuffer
bool IsSingleSharedBuffer() const
Returns true if the object contains only a single buffer.
Definition: AcceleratorBuffer.h:421

AcceleratorBuffer::HasCpuBuffer
bool HasCpuBuffer() const
Returns true if there is currently a CPU-side buffer.
Definition: AcceleratorBuffer.h:409

AcceleratorBuffer::MarkModifiedFromCpu
void MarkModifiedFromCpu()
Marks the CPU-side copy of the buffer as modified.
Definition: AcceleratorBuffer.h:852

AcceleratorBuffer::m_gpuPhysMem
std::unique_ptr< vk::raii::DeviceMemory > m_gpuPhysMem
GPU-side physical memory.
Definition: AcceleratorBuffer.h:264

AcceleratorBuffer::PrepareForCpuAccess
void PrepareForCpuAccess()
Prepares the buffer to be accessed from the CPU.
Definition: AcceleratorBuffer.h:877

AcceleratorBuffer::size
size_t size() const
Returns the actual size of the container (may be smaller than what was allocated)
Definition: AcceleratorBuffer.h:357

AcceleratorBuffer::SetGpuAccessHint
void SetGpuAccessHint(UsageHint hint, bool reallocateImmediately=false)
Sets a hint to the buffer on how often we expect to use it on the GPU in the future.
Definition: AcceleratorBuffer.h:832

AcceleratorBuffer::HostToDeviceTransferMemoryBarrier
static void HostToDeviceTransferMemoryBarrier(vk::raii::CommandBuffer &cmdBuf)
Adds a memory barrier for transferring data from host to device.
Definition: AcceleratorBuffer.h:1023

AcceleratorBuffer::m_gpuPhysMemIsStale
bool m_gpuPhysMemIsStale
True if m_gpuPhysMem contains stale data (m_cpuPtr has been modified and they point to different memo...
Definition: AcceleratorBuffer.h:279

AcceleratorBuffer::IsReachableFromGpu
bool IsReachableFromGpu(MemoryType mt)
Returns true if the given buffer type can be reached from the GPU.
Definition: AcceleratorBuffer.h:233

AcceleratorBuffer::GetBuffer
vk::Buffer GetBuffer()
Returns the preferred buffer for GPU-side access.
Definition: AcceleratorBuffer.h:429

AcceleratorBuffer::pop_back
void pop_back()
Removes the last item in the container.
Definition: AcceleratorBuffer.h:732

AcceleratorBuffer::GetCpuMemoryBytes
size_t GetCpuMemoryBytes() const
Returns the total reserved CPU memory, in bytes.
Definition: AcceleratorBuffer.h:369

AcceleratorBuffer::resize
void resize(size_t size)
Change the usable size of the container.
Definition: AcceleratorBuffer.h:457

AcceleratorBuffer::capacity
size_t capacity() const
Returns the allocated size of the container.
Definition: AcceleratorBuffer.h:363

AcceleratorBuffer::m_buffersAreSame
bool m_buffersAreSame
True if we have only one piece of physical memory accessible from both sides.
Definition: AcceleratorBuffer.h:273

AcceleratorBuffer::reserve
void reserve(size_t size)
Reallocates buffers so that at least size elements of storage are available.
Definition: AcceleratorBuffer.h:484

AcceleratorBuffer::m_gpuAccessHint
UsageHint m_gpuAccessHint
Hint about how likely future GPU access is.
Definition: AcceleratorBuffer.h:313

AcceleratorBuffer::m_cpuMemoryType
MemoryType m_cpuMemoryType
Type of the CPU-side buffer.
Definition: AcceleratorBuffer.h:249

AcceleratorBuffer::IsFastFromGpu
bool IsFastFromGpu(MemoryType mt)
Returns true if the given buffer type is fast to access from the GPU.
Definition: AcceleratorBuffer.h:245

AcceleratorBuffer::PrepareForGpuAccess
void PrepareForGpuAccess(bool outputOnly=false)
Prepares the buffer to be accessed from the GPU.
Definition: AcceleratorBuffer.h:899

AcceleratorBuffer::m_cpuBuffer
std::unique_ptr< vk::raii::Buffer > m_cpuBuffer
Buffer object for CPU-side memory.
Definition: AcceleratorBuffer.h:267

AcceleratorBuffer::m_gpuMemoryType
MemoryType m_gpuMemoryType
Type of the GPU-side buffer.
Definition: AcceleratorBuffer.h:252

AcceleratorBuffer::__attribute__
__attribute__((noinline)) void UpdateGpuNames()
Pushes our friendly name to the underlying Vulkan objects.
Definition: AcceleratorBuffer.h:1383

AcceleratorBuffer::SetCpuAccessHint
void SetCpuAccessHint(UsageHint hint, bool reallocateImmediately=false)
Sets a hint to the buffer on how often we expect to use it on the CPU in the future.
Definition: AcceleratorBuffer.h:818

AcceleratorBuffer::FreeCpuBuffer
void FreeCpuBuffer()
Free the CPU-side buffer and underlying physical memory.
Definition: AcceleratorBuffer.h:1045

AcceleratorBuffer::PrepareForGpuAccessNonblocking
void PrepareForGpuAccessNonblocking(bool outputOnly, vk::raii::CommandBuffer &cmdBuf)
Prepares the buffer to be accessed from the GPU.
Definition: AcceleratorBuffer.h:926

AcceleratorBuffer::__attribute__
__attribute__((noinline)) void UpdateCpuNames()
Pushes our friendly name to the underlying Vulkan objects.
Definition: AcceleratorBuffer.h:1410

AcceleratorBuffer::HasGpuBuffer
bool HasGpuBuffer() const
Returns true if there is currently a GPU-side buffer.
Definition: AcceleratorBuffer.h:415

AcceleratorBuffer::push_front
void push_front(const T &value)
Inserts a new item at the beginning of the container. This is inefficient due to copying.
Definition: AcceleratorBuffer.h:743

AcceleratorBuffer::CopyToGpuNonblocking
void CopyToGpuNonblocking(vk::raii::CommandBuffer &cmdBuf)
Copy the buffer contents from CPU to GPU without blocking on the CPU.
Definition: AcceleratorBuffer.h:998

AcceleratorBuffer::GetGpuMemoryBytes
size_t GetGpuMemoryBytes() const
Returns the total reserved GPU memory, in bytes.
Definition: AcceleratorBuffer.h:380

AcceleratorBuffer::m_tempFileHandle
int m_tempFileHandle
File handle used for MEM_TYPE_CPU_PAGED.
Definition: AcceleratorBuffer.h:283

AcceleratorBuffer::shrink_to_fit
void shrink_to_fit()
Frees unused memory so that m_size == m_capacity.
Definition: AcceleratorBuffer.h:493

AcceleratorBuffer::m_capacity
size_t m_capacity
Size of the allocated memory space (may be larger than m_size)
Definition: AcceleratorBuffer.h:293

AcceleratorBuffer::FreeGpuBuffer
void FreeGpuBuffer(bool dataLossOK=false)
Free the GPU-side buffer and underlying physical memory.
Definition: AcceleratorBuffer.h:1083

AcceleratorBuffer::IsFastFromCpu
bool IsFastFromCpu(MemoryType mt)
Returns true if the given buffer type is fast to access from the CPU.
Definition: AcceleratorBuffer.h:239

AcceleratorBuffer::CopyToCpu
void CopyToCpu()
Copy the buffer contents from GPU to CPU and blocks until the transfer completes.
Definition: AcceleratorBuffer.h:953

AcceleratorBuffer::__attribute__
__attribute__((noinline)) void FreeCpuPointer(T *ptr
Frees a CPU-side buffer.

AcceleratorBuffer::AcceleratorBuffer
AcceleratorBuffer(const std::string &name="")
Creates a new AcceleratorBuffer with no content.
Definition: AcceleratorBuffer.h:322

AcceleratorBuffer::MemoryAttributes
MemoryAttributes
Attributes that a memory buffer can have.
Definition: AcceleratorBuffer.h:175

AcceleratorBuffer::m_cpuPhysMemIsStale
bool m_cpuPhysMemIsStale
True if m_cpuPtr contains stale data (m_gpuPhysMem has been modified and they point to different memo...
Definition: AcceleratorBuffer.h:276

AcceleratorBuffer::__attribute__
__attribute__((noinline)) void AllocateCpuBuffer(size_t size)
Allocates a buffer for CPU access.
Definition: AcceleratorBuffer.h:1112

AcceleratorBuffer::__attribute__
__attribute__((noinline)) void Reallocate(size_t size)
Reallocates the buffer so that it contains exactly size elements.
Definition: AcceleratorBuffer.h:548

AcceleratorBuffer::m_cpuAccessHint
UsageHint m_cpuAccessHint
Hint about how likely future CPU access is.
Definition: AcceleratorBuffer.h:310

AcceleratorBuffer::MemoryType
MemoryType
Types of memory buffer.
Definition: AcceleratorBuffer.h:193

AcceleratorBuffer::GetCpuPointer
T * GetCpuPointer()
Gets a pointer to the CPU-side buffer.
Definition: AcceleratorBuffer.h:440

AcceleratorBuffer::SetName
void SetName(std::string name)
Sets the debug name for this buffer.
Definition: AcceleratorBuffer.h:1444

AcceleratorBuffer::clear
void clear()
Resize the container to be empty (but don't free memory)
Definition: AcceleratorBuffer.h:478

AcceleratorBuffer::IsGpuBufferStale
bool IsGpuBufferStale() const
Returns true if the GPU-side buffer is stale.
Definition: AcceleratorBuffer.h:403

AlignedAllocator< T, 32 >

AlignedAllocator::deallocate
void deallocate(T *const p, const size_t unused) const
Free a block of memory.
Definition: AlignedAllocator.h:194

AlignedAllocator::allocate
T * allocate(size_t n) const
Allocate a block of memory.
Definition: AlignedAllocator.h:159

Unit
A unit of measurement, plus conversion to pretty-printed output.
Definition: Unit.h:57

g_vkPinnedMemoryType
uint32_t g_vkPinnedMemoryType
Vulkan memory type for CPU-based memory that is also GPU-readable.
Definition: VulkanInit.cpp:118

g_vkTransferMutex
std::mutex g_vkTransferMutex
Mutex for interlocking access to g_vkTransferCommandBuffer and g_vkTransferCommandPool.
Definition: VulkanInit.cpp:112

g_hasDebugUtils
bool g_hasDebugUtils
Indicates whether the VK_EXT_debug_utils extension is available.
Definition: VulkanInit.cpp:195

g_vkComputeDevice
std::shared_ptr< vk::raii::Device > g_vkComputeDevice
The Vulkan device selected for compute operations (may or may not be same device as rendering)
Definition: VulkanInit.cpp:71

g_vkTransferCommandBuffer
std::unique_ptr< vk::raii::CommandBuffer > g_vkTransferCommandBuffer
Command buffer for AcceleratorBuffer transfers.
Definition: VulkanInit.cpp:89

g_vkTransferQueue
std::shared_ptr< QueueHandle > g_vkTransferQueue
Queue for AcceleratorBuffer transfers.
Definition: VulkanInit.cpp:98

g_vkLocalMemoryType
uint32_t g_vkLocalMemoryType
Vulkan memory type for GPU-based memory (generally not CPU-readable, except on unified memory systems...
Definition: VulkanInit.cpp:124

g_vulkanDeviceHasUnifiedMemory
bool g_vulkanDeviceHasUnifiedMemory
Indicates whether the Vulkan device is unified memory.
Definition: VulkanInit.cpp:220