1

容器 - 结构与分类

1_meitu_1.jpg

容器 array

TR1 实现:[没有 ctor, 没有 dtor]

template<typename _Tp, std::size_t _Nm>
struct array
{
    typedef _Tp             value_type;
    typedef _Tp*            pointer;
    typedef value_type*     iterator;   // 其实 iterator 是 native pointer

    // Support for zero-sized arrays mandatory
    value_type _M_instance[_Nm ? _Nm : 1];

    iterator begin()
    {
        return iterator(&_M_instance[0]);
    }

    iterator end()
    {
        return iterator(&_M_instance[_Nm]);
    }

    // ...
};
void func()
{
    array<int, 10> myArray;
    auto ite = myArray.begin();
    
    // array<int, 10>::iterator ite = ...
    
    ite += 3;
    cout >> *ite;
}

2_meitu_2.jpg


G4.9 实现

文件:array

template<typename _Tp, std::size_t _Nm>
struct array
{
  typedef _Tp                                         value_type;
  typedef value_type*                                pointer;
  typedef const value_type*                         const_pointer;
  typedef value_type&                               reference;
  typedef const value_type&                         const_reference;
  typedef value_type*                                  iterator;
  typedef const value_type*                            const_iterator;
  typedef std::size_t                               size_type;
  typedef std::ptrdiff_t                            difference_type;
  typedef std::reverse_iterator<iterator>           reverse_iterator;
  typedef std::reverse_iterator<const_iterator>     const_reverse_iterator;

  // Support for zero-sized arrays mandatory.
  typedef _GLIBCXX_STD_C::__array_traits<_Tp, _Nm> _AT_Type;
  typename _AT_Type::_Type                         _M_elems;  // 注意这里!!

  // No explicit construct/copy/destroy for aggregate type.
  iterator 
  begin() noexcept
  { return iterator(data()); }

  iterator 
  end() noexcept
  { return iterator(data() + _Nm); }

  constexpr size_type
  size() const noexcept { return _Nm; }

  constexpr bool
  empty() const noexcept { return size() == 0; }

  reference
  operator[](size_type __n) noexcept    // 没有边检
  { return _AT_Type::_S_ref(_M_elems, __n); }

  reference 
  at(size_type __n)                     // 有边检
  {
    if (__n >= _Nm)
      std::__throw_out_of_range_fmt(__N("array::at: __n (which is %zu) "
                        ">= _Nm (which is %zu)"),
                    __n, _Nm);
    return _AT_Type::_S_ref(_M_elems, __n);
  }

  // ...
template<typename _Tp, std::size_t _Nm>
struct __array_traits
{
  typedef _Tp _Type[_Nm];       // 使用示例:typedef int T[100]; T _M_elems; ==> int _M_elems[100];

  static constexpr _Tp&
  _S_ref(const _Type& __t, std::size_t __n) noexcept
  { return const_cast<_Tp&>(__t[__n]); }
  
  // ...
}

3_meitu_3.jpg

容器 hashtable

4_meitu_4.jpg

Separate Chaining.虽然list是线性搜索时间,如果list够小,搜索速度仍然非常快。

上图当前篮子的数量为53([0-52],一般为vector),元素个数为6(元素代码hashcode[53, 55, 2, 108, 59, 63],一般挂接在单向或双向链表)。当再安插 48 个元素,使元素数量达到54个,超过当前的篮子数量 (buckets vector)大小53, 于是 rehash(元素数量大于等于篮子数量时都会发生)。于是可知篮子的数量永远大于元素数量。

hashcode(元素代码),由对应的 hashfunction计算得出(使用者提供),越乱越好,尽量使不同元素得出的元素代码不同。

元素放在哪个篮子之下由 hashcode 取余 篮子大小 得出。

6_meitu_6.jpg

unordered 容器

Before c++11:

  • hash_set
  • hash_multiset
  • hash_map
  • hash_multimap

Since C++11:

  • unordered_set
  • unordered_multiset
  • unordered-map
  • unordered-multimap

文件:unorder_set.h && unorder_map.h

  template<class _Value,
       class _Hash = hash<_Value>,
       class _Pred = std::equal_to<_Value>,
       class _Alloc = std::allocator<_Value> >
    class unordered_set;

  template<class _Value,
       class _Hash = hash<_Value>,
       class _Pred = std::equal_to<_Value>,
       class _Alloc = std::allocator<_Value> >
    class unordered_multiset  

template<class _Key, class _Tp,
       class _Hash = hash<_Key>,
       class _Pred = std::equal_to<_Key>,
       class _Alloc = std::allocator<std::pair<const _Key, _Tp> > >
    class unordered_map;

  template<class _Key, class _Tp,
       class _Hash = hash<_Key>,
       class _Pred = std::equal_to<_Key>,
       class _Alloc = std::allocator<std::pair<const _Key, _Tp> > >
    class unordered_multimap

关于 hash function

文件:Test.cpp

#include <iostream>
#include <string>

using namespace std;

int main()
{
    void *pi = static_cast<void*>(new int(100));

    cout << hash<int>()(123) << endl;
    cout << hash<long>()(123L) << endl;
    cout << hash<string>()(string("Ace")) << endl;
    cout << hash<const char*>()("Ace") << endl;
    cout << hash<char>()('A') << endl;
    cout << hash<float>()(3.1415926f) << endl;
    cout << hash<double>()(3.1415926) << endl;
    cout << hash<void*>()(pi) << endl;

    return 0;
}

输出:

123
123
1765813650
4210761
65
1630017722
2019885673
16284376

hash function 的目的,就是根据元素值算出一个hash code(一个可进行 modulus 运算的值, hashcode % buckets vector size),使得元素经 hash code 映射之后能够[够混论乱够随机]地被至于 hashtable 内。越是随机,约不容易发生碰撞。


标准库中的部分 hash-function, 文件:funtional_hash.h

template<typename _Result, typename _Arg>
struct __hash_base
{
  typedef _Result     result_type;
  typedef _Arg      argument_type;
};

// .....

/// Primary class template hash.
template<typename _Tp>
struct hash;

// .....

/// Partial specializations for pointer types.
template<typename _Tp>
struct hash<_Tp*> : public __hash_base<size_t, _Tp*>
{
  size_t
  operator()(_Tp* __p) const noexcept
  { return reinterpret_cast<size_t>(__p); }
};

// Explicit specializations for integer types.
#define _Cxx_hashtable_define_trivial_hash(_Tp)     \
template<>                        \
struct hash<_Tp> : public __hash_base<size_t, _Tp>  \
{                                                   \
  size_t                                            \
  operator()(_Tp __val) const noexcept              \
  { return static_cast<size_t>(__val); }            \
};

/// Explicit specialization for bool.
_Cxx_hashtable_define_trivial_hash(bool)

/// Explicit specialization for char.
_Cxx_hashtable_define_trivial_hash(char)

/// Explicit specialization for signed char.
_Cxx_hashtable_define_trivial_hash(signed char)

/// Explicit specialization for unsigned char.
_Cxx_hashtable_define_trivial_hash(unsigned char)

/// Explicit specialization for wchar_t.
_Cxx_hashtable_define_trivial_hash(wchar_t)

/// Explicit specialization for char16_t.
_Cxx_hashtable_define_trivial_hash(char16_t)

/// Explicit specialization for char32_t.
_Cxx_hashtable_define_trivial_hash(char32_t)

/// Explicit specialization for short.
_Cxx_hashtable_define_trivial_hash(short)

/// Explicit specialization for int.
_Cxx_hashtable_define_trivial_hash(int)

/// Explicit specialization for long.
_Cxx_hashtable_define_trivial_hash(long)

/// Explicit specialization for long long.
_Cxx_hashtable_define_trivial_hash(long long)

/// Explicit specialization for unsigned short.
_Cxx_hashtable_define_trivial_hash(unsigned short)

/// Explicit specialization for unsigned int.
_Cxx_hashtable_define_trivial_hash(unsigned int)

/// Explicit specialization for unsigned long.
_Cxx_hashtable_define_trivial_hash(unsigned long)

/// Explicit specialization for unsigned long long.
_Cxx_hashtable_define_trivial_hash(unsigned long long)
#undef _Cxx_hashtable_define_trivial_hash

//......

struct _Hash_impl
{
static size_t
hash(const void* __ptr, size_t __clength,
 size_t __seed = static_cast<size_t>(0xc70f6907UL))
{ return _Hash_bytes(__ptr, __clength, __seed); }  // 注:_Hash_bytes 未找到定义只在 hash_bytes.h 中找到了声明

template<typename _Tp>
  static size_t
  hash(const _Tp& __val)
  { return hash(&__val, sizeof(__val)); }

template<typename _Tp>
  static size_t
  __hash_combine(const _Tp& __val, size_t __hash)
  { return hash(&__val, sizeof(__val), __hash); }
};

// A hash function similar to FNV-1a (see PR59406 for how it differs).
struct _Fnv_hash_impl
{
static size_t
hash(const void* __ptr, size_t __clength,
 size_t __seed = static_cast<size_t>(2166136261UL))
{ return _Fnv_hash_bytes(__ptr, __clength, __seed); }

template<typename _Tp>
  static size_t
  hash(const _Tp& __val)
  { return hash(&__val, sizeof(__val)); }

template<typename _Tp>
  static size_t
  __hash_combine(const _Tp& __val, size_t __hash)
  { return hash(&__val, sizeof(__val), __hash); }
};

/// Specialization for float.
template<>
struct hash<float> : public __hash_base<size_t, float>
{
  size_t
  operator()(float __val) const noexcept
  {
// 0 and -0 both hash to zero.
return __val != 0.0f ? std::_Hash_impl::hash(__val) : 0;
  }
};

/// Specialization for double.
template<>
struct hash<double> : public __hash_base<size_t, double>
{
  size_t
  operator()(double __val) const noexcept
  {
// 0 and -0 both hash to zero.
return __val != 0.0 ? std::_Hash_impl::hash(__val) : 0;
  }
};

/// Specialization for long double.
template<>
struct hash<long double>
: public __hash_base<size_t, long double>
{
  _GLIBCXX_PURE size_t
  operator()(long double __val) const noexcept;
};

// @} group hashes

// Hint about performance of hash functor. If not fast the hash-based
// containers will cache the hash code.
// Default behavior is to consider that hashers are fast unless specified
// otherwise.
template<typename _Hash>
struct __is_fast_hash : public std::true_type
{ };

template<>
struct __is_fast_hash<hash<long double>> : public std::false_type
{ };

文件:hash_bytes.h [未找到定义]

// Hash function implementation for the nontrivial specialization.
// All of them are based on a primitive that hashes a pointer to a
// byte array. The actual hash algorithm is not guaranteed to stay
// the same from release to release -- it may be updated or tuned to
// improve hash quality or speed.
size_t
_Hash_bytes(const void* __ptr, size_t __len, size_t __seed);

// A similar hash primitive, using the FNV hash algorithm. This
// algorithm is guaranteed to stay the same from release to release.
// (although it might not produce the same values on different
// machines.)
size_t
_Fnv_hash_bytes(const void* __ptr, size_t __len, size_t __seed);

string 的 hash_function 实现,文件:basic_string.h

template<>
struct hash<string>
: public __hash_base<size_t, string>
{
  size_t
  operator()(const string& __s) const noexcept
  { return std::_Hash_impl::hash(__s.data(), __s.length()); }
};

一个万用的 Hash Function

自定义 hash_function 的三种形式

class Customer
{
public:
    string fname;
    string lname;
    int no;
};
  • 形式一:定义函数对象
class CustomerHash
{
public:
    std::size_t operator() (const CustomerHash &c) const
    {
        retrun ...
    }
};

unordered_set<Customer, CustomerHash> cusset;  // 使用时!!
  • 形式二:普通函数
std::size_t customer_hash_func(const Customer &c)
{
    return ...;
}

unordered_set<Customer, size_t(*)(const Customer&)> custset(20, customer_hash_func);  // 使用时!!
  • 形式三: hash 的特化 (注意:必须在 std 空间内)
namespace std
{
template <>
struct hash<Customer>
{
    size_t operator()(const Customer &)
    {
        return ...;
    }
};
}

unordered_set<Customer> custset;  // 使用时!!

万用的 Hash Function 的实现

实现思想

把一个复杂的结构分解为多个基本类型,然后将各个基本类型的 hashcode 进行组合。

初次尝试

class CustomerHash
{
public:
    std::size_t operator() (const Customer &c) const
    {
        return std::hash<std::string>()(c.fname)
             + std::hash<std::string>()(c.lname)
             + std::hash<long>()(c.no);
    }
};

总结:不理想。会造成 hashcode 较多的碰撞,即同一篮子里含有较多的元素,降低了查找速度。

最终版本

#include <iostream>
#include <unordered_set>

using namespace std;

//-----------------------------------------------

// from boost (functional/hash)
template<typename T>
inline void hash_combine(size_t &seed, const T &val)
{
    seed ^= hash<T>()(val)
            + 0x9e3779b9 // 0x9e3779b9 黄金比例,此处不必细究!
            + (seed << 6)
            + (seed >> 2);
}

// auxiliary generic functions to create a hash value using a seed
template<typename T>
inline void hash_val(size_t &seed, const T &val)
{
    hash_combine(seed, val);
}

template<typename T, typename... Types>
inline void hash_val(size_t &seed, const T &val, const Types&... args)
{
    hash_combine(seed, val);  // 逐一取 val 改变 seed (pass by reference)
    hash_val(seed, args...);
}

// auxiliary generic functions
template<typename... Types>
inline size_t hash_val(const Types&... args)
{
    size_t seed = 0;

    hash_val(seed, args...);

    return seed;
}

//---------------------------

class Customer
{
public:
    Customer(string f, string l, long n) : fname(f), lname(l), no(n)
    { }


    string fname;
    string lname;
    long no;
};

// 1. 函数对象
class CustomerHash
{
public:
    size_t operator()(const Customer &c) const
    {
        return hash_val(c.fname, c.lname, c.no);
    }
};

// 2. 普通函数
size_t customer_hash_func(const Customer &c)
{
    return hash_val(c.fname, c.lname, c.no);
}

// 3. hash 特化
namespace std
{

template<>
struct hash<Customer>
{
    size_t operator()(const Customer &c) const
    {
        return hash_val(c.fname, c.lname, c.no);
    }
};

}

int main()
{
    cout << CustomerHash()(Customer("Ace", "Hou", 1)) << endl;
    cout << customer_hash_func(Customer("Ace", "Hou", 1)) << endl;
    cout << hash<Customer>()(Customer("Ace", "Hou", 1)) << endl;

    return 0;
}

输出:

2329839052
2329839052
2329839052

hash table 观察

#include <iostream>
#include <unordered_set>

using namespace std;

//-----------------------------------------------

// from boost (functional/hash)
template<typename T>
inline void hash_combine(size_t &seed, const T &val)
{
    seed ^= hash<T>()(val)
            + 0x9e3779b9 // 0x9e3779b9 黄金比例,此处不必细究!
            + (seed << 6)
            + (seed >> 2);
}

// auxiliary generic functions to create a hash value using a seed
template<typename T>
inline void hash_val(size_t &seed, const T &val)
{
    hash_combine(seed, val);
}

template<typename T, typename... Types>
inline void hash_val(size_t &seed, const T &val, const Types&... args)
{
    hash_combine(seed, val);
    hash_val(seed, args...);
}


template<typename... Types>
inline size_t hash_val(const Types&... args)
{
    size_t seed = 0;

    hash_val(seed, args...);

    return seed;
}

//---------------------------

class Customer
{
public:
    Customer(string f, string l, long n) : fname(f), lname(l), no(n)
    { }

    bool operator== (const Customer rhs) const
    {
        return (fname == rhs.fname) && (lname == rhs.lname) && (no == rhs.no);
    }


    string fname;
    string lname;
    long no;
};

class CustomerHash
{
public:
    size_t operator()(const Customer &c) const
    {
        return hash_val(c.fname, c.lname, c.no);
    }
};

size_t func1()
{
    unordered_set<Customer, CustomerHash> set;

    set.insert(Customer("Ace", "Hou", 1L));
    set.insert(Customer("Sabri", "Hou", 2L));
    set.insert(Customer("Stacy", "Chen", 3L));
    set.insert(Customer("Mike", "Tseng", 4L));
    set.insert(Customer("Paili", "Chen", 5L));
    set.insert(Customer("Light", "Shiau", 6L));
    set.insert(Customer("Shally", "Hwung", 7L));

    cout << "set current bucket_count : " << set.bucket_count() << endl;

    return set.bucket_count();
}

void func2(const size_t mod)
{
    CustomerHash hh;

    cout << "bucket position of Ace = " << hh(Customer("Ace", "Hou", 1L)) % mod << endl;
    cout << "bucket position of Sabri = " << hh(Customer("Sabri", "Hou", 2L)) % mod << endl;
    cout << "bucket position of Stacy = " << hh(Customer("Stacy", "Chen", 3L)) % mod << endl;
    cout << "bucket position of Mike = " << hh(Customer("Mike", "Tseng", 4L)) % mod << endl;
    cout << "bucket position of Paili = " << hh(Customer("Paili", "Chen", 5L)) % mod << endl;
    cout << "bucket position of Light = " << hh(Customer("Light", "Shiau", 6L)) % mod << endl;
    cout << "bucket position of Shally = " << hh(Customer("Shally", "Hwung", 7L)) % mod << endl;
}


int main()
{
    size_t mod = func1();

    func2(mod);

    return 0;
}

输出:

set current bucket_count : 17
bucket position of Ace = 0
bucket position of Sabri = 11
bucket position of Stacy = 3
bucket position of Mike = 4
bucket position of Paili = 9
bucket position of Light = 10
bucket position of Shally = 15

TianSong
737 声望139 粉丝

阿里山神木的种子在3000年前已经埋下,今天不过是看到当年注定的结果,为了未来的自己,今天就埋下一颗好种子吧