Prerequisite description

Note contrast G2.9 # 207 and G4.9 # 97, and the new version, using the :: operator new for memory applications, means that we can be overloaded After doing some statistical observation

Implementation of G2.9 chunk_alloc

Note: start_free = (char *)malloc(bytes_to_get);

template <bool threads, int inst>
char *default_alloc_template<threads, inst>::chunk_alloc(size_t size, int &nobjs)
{
    char *result;
    size_t total_bytes = size * nobjs;
    size_t bytes_left = end_free - start_free;

    if (bytes_left > total_bytes)
    {
        result = start_free;
        start_free += total_bytes;
        return result;
    }
    else if (bytes_left >= size)
    {
        nobjs = bytes_left / size;
        total_bytes = size * nobjs;
        result = start_free;
        start_free += total_bytes;
        return result;
    }
    else
    {
        size_t bytes_to_get = 2 * total_bytes + ROUND_UP(heap_size >> 4);
        if (bytes_left > 0)
        {
            obj **my_free_list = free_list + FREELIST_INDEX(bytes_left);
            ((obj*)start_free)->free_list_link = *my_free_list;
            *my_free_list = (obj*)start_free;
        }
        start_free = (char *)malloc(bytes_to_get);
        if (0 == start_free)
        {
            int i;
            obj **my_free_list;
            obj *p;

            for (i=size+__ALIGN; i<=__MAX_BYTES; i+=__ALIGN)
            {
                my_free_list = free_list + FREELIST_INDEX(i);
                p = *my_free_list;
                if (0 != p)
                {
                    *my_free_list = p->free_list_link;
                    start_free = (char*)p;
                    end_free   = start_free + i;

                    return chunk_alloc(size, nobjs);
                }
            }
        }

        heap_size += bytes_to_get;
        end_free = start_free + bytes_to_get;
        return chunk_alloc(size, nobjs);
    }
}
Implementation of G4.9 chunk_alloc

Note: _S_start_free = static_cast<char*>(::operator new(__bytes_to_get));

char *__pool_alloc_base::_M_allocate_chunk(size_t __n, int& __nobjs)
{
    char* __result;
    size_t __total_bytes = __n * __nobjs;
    size_t __bytes_left = _S_end_free - _S_start_free;
    
    if (__bytes_left >= __total_bytes)
    {
        __result = _S_start_free;
        _S_start_free += __total_bytes;
        return __result ;
    }
    else if (__bytes_left >= __n)
    {
        __nobjs = (int)(__bytes_left / __n);
        __total_bytes = __n * __nobjs;
        __result = _S_start_free;
        _S_start_free += __total_bytes;
        return __result;
    }
    else
    {
        // Try to make use of the left-over piece.
        if (__bytes_left > 0)
        {
            _Obj* volatile* __free_list = _M_get_free_list(__bytes_left);
            ((_Obj*)(void*)_S_start_free)->_M_free_list_link = *__free_list;
            *__free_list = (_Obj*)(void*)_S_start_free;
        }
        
        size_t __bytes_to_get = (2 * __total_bytes
                                 + _M_round_up(_S_heap_size >> 4));
        __try
        {
            _S_start_free = static_cast<char*>(::operator new(__bytes_to_get));
        }
        __catch(const std::bad_alloc&)
        {
            // Try to make do with what we have.  That can't hurt.  We
            // do not try smaller requests, since that tends to result
            // in disaster on multi-process machines.
            size_t __i = __n;
            for (; __i <= (size_t) _S_max_bytes; __i += (size_t) _S_align)
            {
                _Obj* volatile* __free_list = _M_get_free_list(__i);
                _Obj* __p = *__free_list;
                if (__p != 0)
                {
                    *__free_list = __p->_M_free_list_link;
                    _S_start_free = (char*)__p;
                    _S_end_free = _S_start_free + __i;
                    return _M_allocate_chunk(__n, __nobjs);
                    // Any leftover piece will eventually make it to the
                    // right free list.
                }
            }
            // What we have wasn't enough.  Rethrow.
            _S_start_free = _S_end_free = 0;   // We have no chunk.
            __throw_exception_again;
        }
        _S_heap_size += __bytes_to_get;
        _S_end_free = _S_start_free + __bytes_to_get;
        return _M_allocate_chunk(__n, __nobjs);
    }
}

Observe G2.9 : start_free = (char *)malloc(bytes_to_get); and G4.9 : _S_start_free = static_cast<char*>(::operator new(__bytes_to_get));

G4.9 memory allocation uses ::operator new , which means that we can observe and count memory applications for overloading

G4.9 pool allocator to observe

Design an operator new/delete that can accumulate statistics on allocation and release
除非用户直接使用 malloc/free, 否则都避不开它们,这样就可以累计总量

Base code

#include <iostream>
#include <string>
#include <list>
#include <vector>
#include<cstdlib>

using std::cout;
using std::endl;
using std::string;
using std::list;
using std::vector;

static long long countNew = 0;
static long long countDel = 0;
static long long countArrayNew = 0;
static long long countArrayDel = 0;
static long long timesNew = 0;

void *operator new(size_t size)
{
    cout << "operator new, \t" << size << "\n";

    countNew += size;
    timesNew ++;

    return malloc(size);
}

void *operator new[](size_t size)
{
    cout << "operator new[], \t" << size << "\n";

    countArrayNew += size;

    return malloc(size);
}

// 一下 (1)(2)可以并存并由(2)抓住流程(但对这的测试无用)
// 当只存在 (1) 时,任无法扎住流程
// 在 class members 中二者只能选一 (任一均可)
// (1)
void operator delete(void *ptr, size_t size)
{
    cout << "operator delete(ptr, size), \t" << ptr << "\n";
    countDel += size;
    free(ptr);
}

// (2)
void operator delete(void *ptr)
{
    cout << "operator delete(ptr), \t" << ptr << "\n";
    free(ptr);
}

// (1)
void operator delete[](void *ptr, size_t size)
{
    cout << "operator delete[](ptr, size), \t" << ptr << "\n";
    countArrayDel += size;
    free(ptr);
}

// (2)
void operator delete[](void *ptr)
{
    cout << "operator delete[](ptr), \t" << ptr << "\n";
    free(ptr);
}

func1

void func1()
{
    cout << "::countNew= " << ::countNew << endl;
    cout << "::countDel= " << ::countDel << endl;
    cout << "::timesNew= " << ::timesNew << endl;

    string *p = new string("My name is Ace");

    delete p;

    cout << "::countNew= " << ::countNew << endl;
    cout << "::countDel= " << ::countDel << endl;
    cout << "::timesNew= " << ::timesNew << endl;
}

Output:

::countNew= 0
::countDel= 0
::timesNew= 0
operator new,   4                    // string size
operator new,   27                   // string(REP) + extra 
operator delete(ptr),   0xc92380
operator delete(ptr),   0xc956b0
::countNew= 31                       // 4 + 27
::countDel= 0                        // 本测试永远观察不到想要的结果,因为进不去 operator delete(pre, size) 版本
::timesNew= 2                       

func2

void func2()
{
    string *p = new string[3];
    
    delete[] p;
    
    cout << "::countNew= " << ::countNew << endl;
    cout << "::countArrayNew= " << ::countArrayNew<< endl;
    cout << "::timesNew= " << ::timesNew << endl;
}

Output:

operator new[],         16                     // 其中包含 arraySize filed: 4 bytes. 所以 16 - 4 = 12 ==> 4 * 3, 也就是三个 string 每个占 4 bytes
operator new,   13                             // Nil string
operator new,   13                             // Nil string
operator new,   13                             // Nil string
operator delete(ptr),   0x835bc0
operator delete(ptr),   0x835ba8
operator delete(ptr),   0x832398
operator delete[](ptr),         0x832380
::countNew= 39                                 // 13 + 13 + 13
::countArrayNew= 16                            
::timesNew= 3

func3

void func3()
{
    vector<int> vec(10);    // vector object 本身不是 dynamic allocated  
    
    vec.push_back(1);
    vec.push_back(1);
    vec.push_back(1);
    
    cout << "::countNew= " << ::countNew << endl;
    cout << "::timesNew= " << ::timesNew << endl;
}

Output:

operator new,   40                             //  10 ints
operator new,   80
operator delete(ptr),   0x772380
::countNew= 120                                // 80 + 40
::timesNew= 2
operator delete(ptr),   0x775ba8

func4

void func4()
{
    list<int> lst;     // list object 本身不是 dynamic allocated  
    lst.push_back(1);  
    lst.push_back(1);
    lst.push_back(1);
    
    cout << "::countNew= " << ::countNew << endl;
    cout << "::timesNew= " << ::timesNew << endl;
} 

Output

operator new,   12    // 每个 node 是 12 bytes
operator new,   12
operator new,   12
::countNew= 36
::timesNew= 3
operator delete(ptr),   0x712380
operator delete(ptr),   0x712398
operator delete(ptr),   0x715ba8

Focus

func5

void func6()
{
    list<double> lst;
    for (int i=0; i< 1000000; ++i)
        lst.push_back(i);
    cout << "::countNew= " << ::countNew << endl;
    cout << "::timesNew= " << ::timesNew << endl;
}

Output:

::countNew= 16000000    // 注意, node 都帶 cookie !!!
::timesNew= 1000000     

func6

template <typename T>
using listPoll = list<T, __gnu_cxx::__pool_alloc<T>>;

void func5()
{
    //list<double, __gnu_cxx::__pool_alloc<double>> lst;
    //上一行改用 C++/11 alias template 來寫 :
    listPoll<double> lst;

    for (int i=0; i< 1000000; ++i)
        lst.push_back(i);
    cout << "::countNew= " << ::countNew << endl;      //16752832 (注意, node 都不帶 cookie), 由于添加了额外的内存管理,因此申请了多于 func6 的申请量
    cout << "::timesNew= " << ::timesNew << endl;
}

Output:

::countNew= 16752832  // 注意, node 都不帶 cookie). 由于添加了额外的内存管理,因此产生了多于 func6 的申请量
::timesNew= 122       // 内存的申请次数明显降低 !!!  

G2.9 std::alloc ported to C

std::alloc is composed of static data member and static member functions. There is only one class as a whole and no branch, so it is easy to port to C. Note:

chunck_alloc(size_t size, int &nobjs);

When pass by reference is moved to C, it may need to be changed to pass by pointer

Porting steps
  • Remove template
  • Move union obj to _default_alloc_template externally independent
  • Move all static data to global
  • Rename __malloc_alloc to malloc_alloc, and rename __default_alloc to alloc
  • Move all static functions of malloc_alloc to global
  • Move all static functions of alloc to global
  • Change .cpp to .c, change the above pass by reference to pass by pointer, and then change:
union obj {
    union obj *free_list_link;
};

==>

typedef union __obj {
    union __obj *free_list_link;
}obj;

或

typedef struct __obj {
    struct __obj *free_list_link;
}obj;

alloc.h

// author : Hou Jie (侯捷)
// date : 2015/11/11 
// compiler : DevC++ 5.61 (MinGW with GNU 4.9.2)
//
// 說明:這是侯捷 E-learning video "C++內存管理" 的實例程式.
//
// filename : allocc.h
// 取材自 SGI STL 2.91 <stl_alloc.h>, 移植至 C language.


#include <stdlib.h>  //for malloc(),realloc()
#include <stddef.h>  //for size_t
#include <memory.h>  //for memcpy()

//#define __THROW_BAD_ALLOC   cerr << "out of memory" << endl; exit(1)
#define __THROW_BAD_ALLOC   exit(1)

//----------------------------------------------
// 第1級配置器。
//----------------------------------------------

void (*oom_handler)() = 0;

void* oom_malloc(size_t n)
{
  void (*my_malloc_handler)();
  void* result;

  for (;;) {    //不斷嘗試釋放、配置、再釋放、再配置…
    my_malloc_handler = oom_handler;
    if (0 == my_malloc_handler) { __THROW_BAD_ALLOC; }
    (*my_malloc_handler)();    //呼叫處理常式,企圖釋放記憶體
    result = malloc(n);        //再次嘗試配置記憶體
    if (result) return(result);
  }
}

void* oom_realloc(void *p, size_t n)
{
  void (*my_malloc_handler)();
  void* result;

  for (;;) {    //不斷嘗試釋放、配置、再釋放、再配置…
    my_malloc_handler = oom_handler;
    if (0 == my_malloc_handler) { __THROW_BAD_ALLOC; }
    (*my_malloc_handler)();    //呼叫處理常式,企圖釋放記憶體。
    result = realloc(p, n);    //再次嘗試配置記憶體。
    if (result) return(result);
  }
}

void* malloc_allocate(size_t n)
{
  void *result = malloc(n);   //直接使用 malloc()
  if (0 == result) result = oom_malloc(n);
  return result;
}

void malloc_deallocate(void* p, size_t n)
{
  free(p);  //直接使用 free()
}

void* malloc_reallocate(void *p, size_t old_sz, size_t new_sz)
{
  void* result = realloc(p, new_sz); //直接使用 realloc()
  if (0 == result) result = oom_realloc(p, new_sz);
  return result;
}

void (*set_malloc_handler(void (*f)()))()
{ //類似 C++ 的 set_new_handler().
  void (*old)() = oom_handler;
  oom_handler = f;
  return(old);
}

//----------------------------------------------
//第二級配置器
//----------------------------------------------

enum {__ALIGN = 8};                        //小區塊的上調邊界
enum {__MAX_BYTES = 128};                  //小區塊的上限
enum {__NFREELISTS = __MAX_BYTES/__ALIGN}; //free-lists 個數

// union obj {                   //G291[o],CB5[x],VC6[x]
//   union obj* free_list_link;  //這麼寫在 VC6 和 CB5 中也可以,
// };                            //但以後就得使用 "union obj" 而不能只寫 "obj"
typedef struct __obj {
  struct __obj* free_list_link;
} obj;

char*   start_free = 0;
char*   end_free = 0;
size_t  heap_size = 0;
obj* free_list[__NFREELISTS]
     = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 };

size_t ROUND_UP(size_t bytes) {
    return (((bytes) + __ALIGN-1) & ~(__ALIGN - 1));
}
size_t FREELIST_INDEX(size_t bytes) {
    return (((bytes) + __ALIGN-1)/__ALIGN - 1);
}

//----------------------------------------------
// We allocate memory in large chunks in order to
// avoid fragmentingthe malloc heap too much.
// We assume that size is properly aligned.
//
// Allocates a chunk for nobjs of size "size".
// nobjs may be reduced if it is inconvenient to
// allocate the requested number.
//----------------------------------------------
//char* chunk_alloc(size_t size, int& nobjs)  //G291[o],VC6[x],CB5[x]
char* chunk_alloc(size_t size, int* nobjs)
{
  char* result;
  size_t total_bytes = size * (*nobjs);   //原 nobjs 改為 (*nobjs)
  size_t bytes_left = end_free - start_free;

  if (bytes_left >= total_bytes) {
      result = start_free;
      start_free += total_bytes;
      return(result);
  } else if (bytes_left >= size) {
      *nobjs = bytes_left / size;         //原 nobjs 改為 (*nobjs)
      total_bytes = size * (*nobjs);      //原 nobjs 改為 (*nobjs)
      result = start_free;
      start_free += total_bytes;
      return(result);
  } else {
      size_t bytes_to_get =
                 2 * total_bytes + ROUND_UP(heap_size >> 4);
      // Try to make use of the left-over piece.
      if (bytes_left > 0) {
          obj* volatile *my_free_list =
                 free_list + FREELIST_INDEX(bytes_left);

          ((obj*)start_free)->free_list_link = *my_free_list;
          *my_free_list = (obj*)start_free;
      }
      start_free = (char*)malloc(bytes_to_get);
      if (0 == start_free) {
          int i;
          obj* volatile *my_free_list, *p;

          //Try to make do with what we have. That can't
          //hurt. We do not try smaller requests, since that tends
          //to result in disaster on multi-process machines.
          for (i = size; i <= __MAX_BYTES; i += __ALIGN) {
              my_free_list = free_list + FREELIST_INDEX(i);
              p = *my_free_list;
              if (0 != p) {
                  *my_free_list = p -> free_list_link;
                  start_free = (char*)p;
                  end_free = start_free + i;
                  return(chunk_alloc(size, nobjs));
                  //Any leftover piece will eventually make it to the
                  //right free list.
              }
          }
          end_free = 0;       //In case of exception.
          start_free = (char*)malloc_allocate(bytes_to_get);
          //This should either throw an exception or
          //remedy the situation. Thus we assume it
          //succeeded.
      }
      heap_size += bytes_to_get;
      end_free = start_free + bytes_to_get;
      return(chunk_alloc(size, nobjs));
  }
}
//----------------------------------------------
// Returns an object of size n, and optionally adds
// to size n free list. We assume that n is properly aligned.
// We hold the allocation lock.
//----------------------------------------------
void* refill(size_t n)
{
    int nobjs = 20;
    char* chunk = chunk_alloc(n,&nobjs);
    obj* volatile *my_free_list;   //obj** my_free_list;
    obj* result;
    obj* current_obj;
    obj* next_obj;
    int i;

    if (1 == nobjs) return(chunk);
    my_free_list = free_list + FREELIST_INDEX(n);

    //Build free list in chunk
    result = (obj*)chunk;
    *my_free_list = next_obj = (obj*)(chunk + n);
    for (i=1;  ; ++i) {
      current_obj = next_obj;
      next_obj = (obj*)((char*)next_obj + n);
      if (nobjs-1 == i) {
          current_obj->free_list_link = 0;
          break;
      } else {
          current_obj->free_list_link = next_obj;
      }
    }
    return(result);
}
//----------------------------------------------
//
//----------------------------------------------
void* allocate(size_t n)  //n must be > 0
{
  obj* volatile *my_free_list;    //obj** my_free_list;
  obj* result;

  if (n > (size_t)__MAX_BYTES) {
      return(malloc_allocate(n));
  }

  my_free_list = free_list + FREELIST_INDEX(n);
  result = *my_free_list;
  if (result == 0) {
      void* r = refill(ROUND_UP(n));
      return r;
  }

  *my_free_list = result->free_list_link;
  return (result);
}
//----------------------------------------------
//
//----------------------------------------------
void deallocate(void *p, size_t n)  //p may not be 0
{
  obj* q = (obj*)p;
  obj* volatile *my_free_list;   //obj** my_free_list;

  if (n > (size_t) __MAX_BYTES) {
      malloc_deallocate(p, n);
      return;
  }
  my_free_list = free_list + FREELIST_INDEX(n);
  q->free_list_link = *my_free_list;
  *my_free_list = q;
}
//----------------------------------------------
//
//----------------------------------------------
void* reallocate(void *p, size_t old_sz, size_t new_sz)
{
  void * result;
  size_t copy_sz;

  if (old_sz > (size_t) __MAX_BYTES && new_sz > (size_t) __MAX_BYTES) {
      return(realloc(p, new_sz));
  }
  if (ROUND_UP(old_sz) == ROUND_UP(new_sz)) return(p);
  result = allocate(new_sz);
  copy_sz = new_sz > old_sz? old_sz : new_sz;
  memcpy(result, p, copy_sz);
  deallocate(p, old_sz);
  return(result);
}
//----------------------------------------------

TianSong
737 声望139 粉丝

阿里山神木的种子在3000年前已经埋下,今天不过是看到当年注定的结果,为了未来的自己,今天就埋下一颗好种子吧