Program Listing for File xallocator.cpp
↰ Return to documentation for file (core/lib/utils/blockAllocator/xallocator.cpp
)
//==================================================================================
// BSD 2-Clause License
//
// Copyright (c) 2014-2022, NJIT, Duality Technologies Inc. and other contributors
//
// All rights reserved.
//
// Author TPOC: contact@openfhe.org
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are met:
//
// 1. Redistributions of source code must retain the above copyright notice, this
// list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//==================================================================================
/*
See http://www.codeproject.com/Articles/1089905/A-Custom-STL-std-allocator-Replacement-Improves-Performance-
*/
#include <cassert>
#include <cstring> // for memcpy consider changing to copy
#include <iostream>
#include <mutex>
#include <thread>
#include "utils/blockAllocator/blockAllocator.h"
#include "utils/blockAllocator/xallocator.h"
#include "utils/debug.h"
#ifndef CHAR_BIT
#define CHAR_BIT 8
#endif
// static CRITICAL_SECTION _criticalSection;
static std::mutex xalloc_mutex;
static bool _xallocInitialized = false;
// Define STATIC_POOLS to switch from heap blocks mode to static pools mode
// #define STATIC_POOLS
#ifdef STATIC_POOLS
// Update this section as necessary if you want to use static memory pools.
// See also xalloc_init() and xalloc_destroy() for additional updates required.
#define MAX_ALLOCATORS 14
// #define MAX_BLOCKS 2048
#define MAX_BLOCKS 18948
// Create static storage for each static allocator instance
char* _allocator8[sizeof(AllocatorPool<char[8], MAX_BLOCKS>)];
char* _allocator16[sizeof(AllocatorPool<char[16], MAX_BLOCKS>)];
char* _allocator32[sizeof(AllocatorPool<char[32], MAX_BLOCKS>)];
char* _allocator64[sizeof(AllocatorPool<char[64], MAX_BLOCKS>)];
char* _allocator128[sizeof(AllocatorPool<char[128], MAX_BLOCKS>)];
char* _allocator256[sizeof(AllocatorPool<char[256], MAX_BLOCKS>)];
char* _allocator396[sizeof(AllocatorPool<char[396], MAX_BLOCKS>)];
char* _allocator512[sizeof(AllocatorPool<char[512], MAX_BLOCKS>)];
char* _allocator768[sizeof(AllocatorPool<char[768], MAX_BLOCKS>)];
char* _allocator1024[sizeof(AllocatorPool<char[1024], MAX_BLOCKS>)];
char* _allocator2048[sizeof(AllocatorPool<char[2048], MAX_BLOCKS>)];
char* _allocator4096[sizeof(AllocatorPool<char[4096], MAX_BLOCKS>)];
char* _allocator8192[sizeof(AllocatorPool<char[8192], MAX_BLOCKS>)];
char* _allocator16384[sizeof(AllocatorPool<char[16384], MAX_BLOCKS>)];
// Array of pointers to all allocator instances
static Allocator* _allocators[MAX_ALLOCATORS];
#else
#define MAX_ALLOCATORS 15
static Allocator* _allocators[MAX_ALLOCATORS];
#endif // STATIC_POOLS
static XallocInitDestroy xallocInitDestroy;
// For C++ applications, must define AUTOMATIC_XALLOCATOR_INIT_DESTROY to
// correctly ensure allocators are initialized before any static user C++
// construtor/destructor executes which might call into the xallocator API.
// This feature costs 1-byte of RAM per C++ translation unit. This feature
// can be disabled only under the following circumstances:
//
// 1) The xallocator is only used within C files.
// 2) STATIC_POOLS is undefined and the application never exits main (e.g.
// an embedded system).
//
// In either of the two cases above, call xalloc_init() in main at startup,
// and xalloc_destroy() before main exits. In all other situations
// XallocInitDestroy must be used to call xalloc_init() and xalloc_destroy().
#ifdef AUTOMATIC_XALLOCATOR_INIT_DESTROY
usint XallocInitDestroy::refCount = 0;
XallocInitDestroy::XallocInitDestroy() {
// Track how many static instances of XallocInitDestroy are created
if (refCount++ == 0)
xalloc_init();
}
XallocInitDestroy::~XallocInitDestroy() {
// Last static instance to have destructor called?
if (--refCount == 0)
xalloc_destroy();
}
#endif // AUTOMATIC_XALLOCATOR_INIT_DESTROY
template <class T>
T nexthigher(T k) {
k--;
for (size_t i = 1; i < sizeof(T) * CHAR_BIT; i <<= 1)
k |= (k >> i);
return k + 1;
}
static void lock_init() {
_xallocInitialized = true;
}
static void lock_destroy() {
#if 0
// DeleteCriticalSection(&_criticalSection);
irc = pthread_mutex_destroy(&xalloc_mutex);
#endif
_xallocInitialized = false;
}
static inline void lock_get() {
if (_xallocInitialized == false)
return;
#if 0
// Acquire the mutex to access the shared resource
pthread_mutex_lock(&xalloc_mutex);
// EnterCriticalSection(&_criticalSection);
#endif
}
static inline void lock_release() {
if (_xallocInitialized == false)
return;
#if 0
// Release the mutex and release the access to shared resource
pthread_mutex_unlock(&xalloc_mutex);
// LeaveCriticalSection(&_criticalSection);
#endif
}
static inline void* set_block_allocator(void* block, Allocator* allocator) {
// Cast the raw block memory to a Allocator pointer
Allocator** pAllocatorInBlock = static_cast<Allocator**>(block);
// Write the size into the memory block
*pAllocatorInBlock = allocator;
// Advance the pointer past the Allocator* block size and return a pointer to
// the client's memory region
return ++pAllocatorInBlock;
}
static inline Allocator* get_block_allocator(void* block) {
// Cast the client memory to a Allocator pointer
Allocator** pAllocatorInBlock = static_cast<Allocator**>(block);
// Back up one Allocator* position to get the stored allocator instance
pAllocatorInBlock--;
// Return the allocator instance stored within the memory block
return *pAllocatorInBlock;
}
static inline void* get_block_ptr(void* block) {
// Cast the client memory to a Allocator* pointer
Allocator** pAllocatorInBlock = static_cast<Allocator**>(block);
// Back up one Allocator* position and return the original raw memory block
// pointer
return --pAllocatorInBlock;
}
static inline Allocator* find_allocator(size_t size) {
OPENFHE_DEBUG_FLAG(false);
for (usint i = 0; i < MAX_ALLOCATORS; i++) {
OPENFHE_DEBUG("allocator " << i << " " << _allocators[i]);
if (_allocators[i] == 0)
break;
if (_allocators[i]->GetBlockSize() == size)
return _allocators[i];
}
return nullptr;
}
static inline void insert_allocator(Allocator* allocator) {
for (usint i = 0; i < MAX_ALLOCATORS; i++) {
if (_allocators[i] == 0) {
_allocators[i] = allocator;
return;
}
}
assert(0);
}
extern "C" void xalloc_init() {
lock_init();
#ifdef STATIC_POOLS
// For STATIC_POOLS mode, the allocators must be initialized before any other
// static user class constructor is run. Therefore, use placement new to
// initialize each allocator into the previously reserved static memory
// locations.
new (&_allocator8) AllocatorPool<char[8], MAX_BLOCKS>();
new (&_allocator16) AllocatorPool<char[16], MAX_BLOCKS>();
new (&_allocator32) AllocatorPool<char[32], MAX_BLOCKS>();
new (&_allocator64) AllocatorPool<char[64], MAX_BLOCKS>();
new (&_allocator128) AllocatorPool<char[128], MAX_BLOCKS>();
new (&_allocator256) AllocatorPool<char[256], MAX_BLOCKS>();
new (&_allocator396) AllocatorPool<char[396], MAX_BLOCKS>();
new (&_allocator512) AllocatorPool<char[512], MAX_BLOCKS>();
new (&_allocator768) AllocatorPool<char[768], MAX_BLOCKS>();
new (&_allocator1024) AllocatorPool<char[1024], MAX_BLOCKS>();
new (&_allocator2048) AllocatorPool<char[2048], MAX_BLOCKS>();
new (&_allocator4096) AllocatorPool<char[4096], MAX_BLOCKS>();
new (&_allocator8192) AllocatorPool<char[8192], MAX_BLOCKS>();
new (&_allocator16384) AllocatorPool<char[16384], MAX_BLOCKS>();
// Populate allocator array with all instances
_allocators[0] = reinterpret_cast<Allocator*>(&_allocator8);
_allocators[1] = reinterpret_cast<Allocator*>(&_allocator16);
_allocators[2] = reinterpret_cast<Allocator*>(&_allocator32);
_allocators[3] = reinterpret_cast<Allocator*>(&_allocator64);
_allocators[4] = reinterpret_cast<Allocator*>(&_allocator128);
_allocators[5] = reinterpret_cast<Allocator*>(&_allocator256);
_allocators[6] = reinterpret_cast<Allocator*>(&_allocator396);
_allocators[7] = reinterpret_cast<Allocator*>(&_allocator512);
_allocators[8] = reinterpret_cast<Allocator*>(&_allocator768);
_allocators[9] = reinterpret_cast<Allocator*>(&_allocator1024);
_allocators[10] = reinterpret_cast<Allocator*>(&_allocator2048);
_allocators[11] = reinterpret_cast<Allocator*>(&_allocator4096);
_allocators[12] = reinterpret_cast<Allocator*>(&_allocator8192);
_allocators[13] = reinterpret_cast<Allocator*>(&_allocator16384);
#endif
}
extern "C" void xalloc_destroy() {
lock_get();
std::unique_lock<std::mutex> lock(xalloc_mutex);
{
#ifdef STATIC_POOLS
for (usint i = 0; i < MAX_ALLOCATORS; i++) {
_allocators[i]->~Allocator();
_allocators[i] = 0;
}
#else
for (usint i = 0; i < MAX_ALLOCATORS; i++) {
if (_allocators[i] == 0)
break;
delete _allocators[i];
_allocators[i] = 0;
}
#endif
}
lock_release();
lock_destroy();
}
extern "C" Allocator* xallocator_get_allocator(size_t size) {
// Based on the size, find the next higher powers of two value.
// Add sizeof(Allocator*) to the requested block size to hold the size
// within the block memory region. Most blocks are powers of two,
// however some common allocator block sizes can be explicitly defined
// to minimize wasted storage. This offers application specific tuning.
OPENFHE_DEBUG_FLAG(false);
size_t blockSize = size + sizeof(Allocator*);
if (blockSize > 256 && blockSize <= 396)
blockSize = 396;
else if (blockSize > 512 && blockSize <= 768)
blockSize = 768;
else
blockSize = nexthigher<size_t>(blockSize);
OPENFHE_DEBUG("finding allocator for blockSize " << blockSize);
Allocator* allocator = find_allocator(blockSize);
#ifdef STATIC_POOLS
if (allocator == nullptr) {
std::cerr << "static pool allocator for blockSize " << blockSize << " not found! " << std::endl;
}
assert(allocator != nullptr);
#else
// If there is not an allocator already created to handle this block size
if (allocator == nullptr) {
// Create a new allocator to handle blocks of the size required
allocator = new Allocator(blockSize, 0, 0, "xallocator");
// Insert allocator into array
insert_allocator(allocator);
}
#endif
return allocator;
}
extern "C" void* xmalloc(size_t size) {
OPENFHE_DEBUG_FLAG(false);
Allocator* allocator;
void* blockMemoryPtr;
lock_get();
std::unique_lock<std::mutex> lock(xalloc_mutex);
{
// Allocate a raw memory block
allocator = xallocator_get_allocator(size);
blockMemoryPtr = allocator->Allocate(sizeof(Allocator*) + size);
OPENFHE_DEBUG("xmalloc " << size);
}
lock_release();
// Set the block Allocator* within the raw memory block region
void* clientsMemoryPtr = set_block_allocator(blockMemoryPtr, allocator);
return clientsMemoryPtr;
}
extern "C" void xfree(void* ptr) {
OPENFHE_DEBUG_FLAG(false);
OPENFHE_DEBUG("xfree ");
if (ptr == 0)
return;
// Extract the original allocator instance from the caller's block pointer
Allocator* allocator = get_block_allocator(ptr);
// Convert the client pointer into the original raw block pointer
void* blockPtr = get_block_ptr(ptr);
lock_get();
std::unique_lock<std::mutex> lock(xalloc_mutex);
{
// Deallocate the block
allocator->Deallocate(blockPtr);
}
lock_release();
}
extern "C" void* xrealloc(void* oldMem, size_t size) {
if (oldMem == 0)
return xmalloc(size);
if (size == 0) {
xfree(oldMem);
return 0;
}
else {
// Create a new memory block
void* newMem = xmalloc(size);
if (newMem != 0) {
// Get the original allocator instance from the old memory block
Allocator* oldAllocator = get_block_allocator(oldMem);
size_t oldSize = oldAllocator->GetBlockSize() - sizeof(Allocator*);
// Copy the bytes from the old memory block into the new (as much as will
// fit)
std::memcpy(newMem, oldMem, (oldSize < size) ? oldSize : size);
// Free the old memory block
xfree(oldMem);
// Return the client pointer to the new memory block
return newMem;
}
return 0;
}
}
extern "C" void xalloc_stats() {
lock_get();
std::unique_lock<std::mutex> lock(xalloc_mutex);
{
#ifdef STATIC_POOLS
std::cout << " Static Pools " << std::endl;
#endif
for (usint i = 0; i < MAX_ALLOCATORS; i++) {
if (_allocators[i] == 0)
break;
if (_allocators[i]->GetName() != nullptr)
std::cout << _allocators[i]->GetName();
std::cout << " Block Size: " << _allocators[i]->GetBlockSize();
std::cout << " Block Count: " << _allocators[i]->GetBlockCount();
std::cout << " Blocks In Use: " << _allocators[i]->GetBlocksInUse();
std::cout << std::endl;
}
}
lock_release();
}