容器 - 结构与分类
容器 array
TR1 实现:[没有 ctor, 没有 dtor]
template<typename _Tp, std::size_t _Nm>
struct array
{
typedef _Tp value_type;
typedef _Tp* pointer;
typedef value_type* iterator; // 其实 iterator 是 native pointer
// Support for zero-sized arrays mandatory
value_type _M_instance[_Nm ? _Nm : 1];
iterator begin()
{
return iterator(&_M_instance[0]);
}
iterator end()
{
return iterator(&_M_instance[_Nm]);
}
// ...
};
void func()
{
array<int, 10> myArray;
auto ite = myArray.begin();
// array<int, 10>::iterator ite = ...
ite += 3;
cout >> *ite;
}
G4.9 实现
文件:array
template<typename _Tp, std::size_t _Nm>
struct array
{
typedef _Tp value_type;
typedef value_type* pointer;
typedef const value_type* const_pointer;
typedef value_type& reference;
typedef const value_type& const_reference;
typedef value_type* iterator;
typedef const value_type* const_iterator;
typedef std::size_t size_type;
typedef std::ptrdiff_t difference_type;
typedef std::reverse_iterator<iterator> reverse_iterator;
typedef std::reverse_iterator<const_iterator> const_reverse_iterator;
// Support for zero-sized arrays mandatory.
typedef _GLIBCXX_STD_C::__array_traits<_Tp, _Nm> _AT_Type;
typename _AT_Type::_Type _M_elems; // 注意这里!!
// No explicit construct/copy/destroy for aggregate type.
iterator
begin() noexcept
{ return iterator(data()); }
iterator
end() noexcept
{ return iterator(data() + _Nm); }
constexpr size_type
size() const noexcept { return _Nm; }
constexpr bool
empty() const noexcept { return size() == 0; }
reference
operator[](size_type __n) noexcept // 没有边检
{ return _AT_Type::_S_ref(_M_elems, __n); }
reference
at(size_type __n) // 有边检
{
if (__n >= _Nm)
std::__throw_out_of_range_fmt(__N("array::at: __n (which is %zu) "
">= _Nm (which is %zu)"),
__n, _Nm);
return _AT_Type::_S_ref(_M_elems, __n);
}
// ...
template<typename _Tp, std::size_t _Nm>
struct __array_traits
{
typedef _Tp _Type[_Nm]; // 使用示例:typedef int T[100]; T _M_elems; ==> int _M_elems[100];
static constexpr _Tp&
_S_ref(const _Type& __t, std::size_t __n) noexcept
{ return const_cast<_Tp&>(__t[__n]); }
// ...
}
容器 hashtable
Separate Chaining.虽然list是线性搜索时间,如果list够小,搜索速度仍然非常快。
上图当前篮子的数量为53([0-52],一般为vector),元素个数为6(元素代码hashcode[53, 55, 2, 108, 59, 63],一般挂接在单向或双向链表)。当再安插 48 个元素,使元素数量达到54个,超过当前的篮子数量 (buckets vector)大小53, 于是 rehash(元素数量大于等于篮子数量时都会发生)。于是可知篮子的数量永远大于元素数量。
hashcode(元素代码),由对应的 hashfunction计算得出(使用者提供),越乱越好,尽量使不同元素得出的元素代码不同。
元素放在哪个篮子之下由 hashcode 取余 篮子大小 得出。
unordered 容器
Before c++11:
- hash_set
- hash_multiset
- hash_map
- hash_multimap
Since C++11:
- unordered_set
- unordered_multiset
- unordered-map
- unordered-multimap
文件:unorder_set.h && unorder_map.h
template<class _Value,
class _Hash = hash<_Value>,
class _Pred = std::equal_to<_Value>,
class _Alloc = std::allocator<_Value> >
class unordered_set;
template<class _Value,
class _Hash = hash<_Value>,
class _Pred = std::equal_to<_Value>,
class _Alloc = std::allocator<_Value> >
class unordered_multiset
template<class _Key, class _Tp,
class _Hash = hash<_Key>,
class _Pred = std::equal_to<_Key>,
class _Alloc = std::allocator<std::pair<const _Key, _Tp> > >
class unordered_map;
template<class _Key, class _Tp,
class _Hash = hash<_Key>,
class _Pred = std::equal_to<_Key>,
class _Alloc = std::allocator<std::pair<const _Key, _Tp> > >
class unordered_multimap
关于 hash function
文件:Test.cpp
#include <iostream>
#include <string>
using namespace std;
int main()
{
void *pi = static_cast<void*>(new int(100));
cout << hash<int>()(123) << endl;
cout << hash<long>()(123L) << endl;
cout << hash<string>()(string("Ace")) << endl;
cout << hash<const char*>()("Ace") << endl;
cout << hash<char>()('A') << endl;
cout << hash<float>()(3.1415926f) << endl;
cout << hash<double>()(3.1415926) << endl;
cout << hash<void*>()(pi) << endl;
return 0;
}
输出:
123
123
1765813650
4210761
65
1630017722
2019885673
16284376
hash function 的目的,就是根据元素值算出一个hash code(一个可进行 modulus 运算的值, hashcode % buckets vector size),使得元素经 hash code 映射之后能够[够混论乱够随机]地被至于 hashtable 内。越是随机,约不容易发生碰撞。
标准库中的部分 hash-function, 文件:funtional_hash.h
template<typename _Result, typename _Arg>
struct __hash_base
{
typedef _Result result_type;
typedef _Arg argument_type;
};
// .....
/// Primary class template hash.
template<typename _Tp>
struct hash;
// .....
/// Partial specializations for pointer types.
template<typename _Tp>
struct hash<_Tp*> : public __hash_base<size_t, _Tp*>
{
size_t
operator()(_Tp* __p) const noexcept
{ return reinterpret_cast<size_t>(__p); }
};
// Explicit specializations for integer types.
#define _Cxx_hashtable_define_trivial_hash(_Tp) \
template<> \
struct hash<_Tp> : public __hash_base<size_t, _Tp> \
{ \
size_t \
operator()(_Tp __val) const noexcept \
{ return static_cast<size_t>(__val); } \
};
/// Explicit specialization for bool.
_Cxx_hashtable_define_trivial_hash(bool)
/// Explicit specialization for char.
_Cxx_hashtable_define_trivial_hash(char)
/// Explicit specialization for signed char.
_Cxx_hashtable_define_trivial_hash(signed char)
/// Explicit specialization for unsigned char.
_Cxx_hashtable_define_trivial_hash(unsigned char)
/// Explicit specialization for wchar_t.
_Cxx_hashtable_define_trivial_hash(wchar_t)
/// Explicit specialization for char16_t.
_Cxx_hashtable_define_trivial_hash(char16_t)
/// Explicit specialization for char32_t.
_Cxx_hashtable_define_trivial_hash(char32_t)
/// Explicit specialization for short.
_Cxx_hashtable_define_trivial_hash(short)
/// Explicit specialization for int.
_Cxx_hashtable_define_trivial_hash(int)
/// Explicit specialization for long.
_Cxx_hashtable_define_trivial_hash(long)
/// Explicit specialization for long long.
_Cxx_hashtable_define_trivial_hash(long long)
/// Explicit specialization for unsigned short.
_Cxx_hashtable_define_trivial_hash(unsigned short)
/// Explicit specialization for unsigned int.
_Cxx_hashtable_define_trivial_hash(unsigned int)
/// Explicit specialization for unsigned long.
_Cxx_hashtable_define_trivial_hash(unsigned long)
/// Explicit specialization for unsigned long long.
_Cxx_hashtable_define_trivial_hash(unsigned long long)
#undef _Cxx_hashtable_define_trivial_hash
//......
struct _Hash_impl
{
static size_t
hash(const void* __ptr, size_t __clength,
size_t __seed = static_cast<size_t>(0xc70f6907UL))
{ return _Hash_bytes(__ptr, __clength, __seed); } // 注:_Hash_bytes 未找到定义只在 hash_bytes.h 中找到了声明
template<typename _Tp>
static size_t
hash(const _Tp& __val)
{ return hash(&__val, sizeof(__val)); }
template<typename _Tp>
static size_t
__hash_combine(const _Tp& __val, size_t __hash)
{ return hash(&__val, sizeof(__val), __hash); }
};
// A hash function similar to FNV-1a (see PR59406 for how it differs).
struct _Fnv_hash_impl
{
static size_t
hash(const void* __ptr, size_t __clength,
size_t __seed = static_cast<size_t>(2166136261UL))
{ return _Fnv_hash_bytes(__ptr, __clength, __seed); }
template<typename _Tp>
static size_t
hash(const _Tp& __val)
{ return hash(&__val, sizeof(__val)); }
template<typename _Tp>
static size_t
__hash_combine(const _Tp& __val, size_t __hash)
{ return hash(&__val, sizeof(__val), __hash); }
};
/// Specialization for float.
template<>
struct hash<float> : public __hash_base<size_t, float>
{
size_t
operator()(float __val) const noexcept
{
// 0 and -0 both hash to zero.
return __val != 0.0f ? std::_Hash_impl::hash(__val) : 0;
}
};
/// Specialization for double.
template<>
struct hash<double> : public __hash_base<size_t, double>
{
size_t
operator()(double __val) const noexcept
{
// 0 and -0 both hash to zero.
return __val != 0.0 ? std::_Hash_impl::hash(__val) : 0;
}
};
/// Specialization for long double.
template<>
struct hash<long double>
: public __hash_base<size_t, long double>
{
_GLIBCXX_PURE size_t
operator()(long double __val) const noexcept;
};
// @} group hashes
// Hint about performance of hash functor. If not fast the hash-based
// containers will cache the hash code.
// Default behavior is to consider that hashers are fast unless specified
// otherwise.
template<typename _Hash>
struct __is_fast_hash : public std::true_type
{ };
template<>
struct __is_fast_hash<hash<long double>> : public std::false_type
{ };
文件:hash_bytes.h [未找到定义]
// Hash function implementation for the nontrivial specialization.
// All of them are based on a primitive that hashes a pointer to a
// byte array. The actual hash algorithm is not guaranteed to stay
// the same from release to release -- it may be updated or tuned to
// improve hash quality or speed.
size_t
_Hash_bytes(const void* __ptr, size_t __len, size_t __seed);
// A similar hash primitive, using the FNV hash algorithm. This
// algorithm is guaranteed to stay the same from release to release.
// (although it might not produce the same values on different
// machines.)
size_t
_Fnv_hash_bytes(const void* __ptr, size_t __len, size_t __seed);
string 的 hash_function 实现,文件:basic_string.h
template<>
struct hash<string>
: public __hash_base<size_t, string>
{
size_t
operator()(const string& __s) const noexcept
{ return std::_Hash_impl::hash(__s.data(), __s.length()); }
};
一个万用的 Hash Function
自定义 hash_function 的三种形式
class Customer
{
public:
string fname;
string lname;
int no;
};
- 形式一:定义函数对象
class CustomerHash
{
public:
std::size_t operator() (const CustomerHash &c) const
{
retrun ...
}
};
unordered_set<Customer, CustomerHash> cusset; // 使用时!!
- 形式二:普通函数
std::size_t customer_hash_func(const Customer &c)
{
return ...;
}
unordered_set<Customer, size_t(*)(const Customer&)> custset(20, customer_hash_func); // 使用时!!
- 形式三: hash 的特化 (注意:必须在 std 空间内)
namespace std
{
template <>
struct hash<Customer>
{
size_t operator()(const Customer &)
{
return ...;
}
};
}
unordered_set<Customer> custset; // 使用时!!
万用的 Hash Function 的实现
实现思想
把一个复杂的结构分解为多个基本类型,然后将各个基本类型的 hashcode 进行组合。
初次尝试
class CustomerHash
{
public:
std::size_t operator() (const Customer &c) const
{
return std::hash<std::string>()(c.fname)
+ std::hash<std::string>()(c.lname)
+ std::hash<long>()(c.no);
}
};
总结:不理想。会造成 hashcode 较多的碰撞,即同一篮子里含有较多的元素,降低了查找速度。
最终版本
#include <iostream>
#include <unordered_set>
using namespace std;
//-----------------------------------------------
// from boost (functional/hash)
template<typename T>
inline void hash_combine(size_t &seed, const T &val)
{
seed ^= hash<T>()(val)
+ 0x9e3779b9 // 0x9e3779b9 黄金比例,此处不必细究!
+ (seed << 6)
+ (seed >> 2);
}
// auxiliary generic functions to create a hash value using a seed
template<typename T>
inline void hash_val(size_t &seed, const T &val)
{
hash_combine(seed, val);
}
template<typename T, typename... Types>
inline void hash_val(size_t &seed, const T &val, const Types&... args)
{
hash_combine(seed, val); // 逐一取 val 改变 seed (pass by reference)
hash_val(seed, args...);
}
// auxiliary generic functions
template<typename... Types>
inline size_t hash_val(const Types&... args)
{
size_t seed = 0;
hash_val(seed, args...);
return seed;
}
//---------------------------
class Customer
{
public:
Customer(string f, string l, long n) : fname(f), lname(l), no(n)
{ }
string fname;
string lname;
long no;
};
// 1. 函数对象
class CustomerHash
{
public:
size_t operator()(const Customer &c) const
{
return hash_val(c.fname, c.lname, c.no);
}
};
// 2. 普通函数
size_t customer_hash_func(const Customer &c)
{
return hash_val(c.fname, c.lname, c.no);
}
// 3. hash 特化
namespace std
{
template<>
struct hash<Customer>
{
size_t operator()(const Customer &c) const
{
return hash_val(c.fname, c.lname, c.no);
}
};
}
int main()
{
cout << CustomerHash()(Customer("Ace", "Hou", 1)) << endl;
cout << customer_hash_func(Customer("Ace", "Hou", 1)) << endl;
cout << hash<Customer>()(Customer("Ace", "Hou", 1)) << endl;
return 0;
}
输出:
2329839052
2329839052
2329839052
hash table 观察
#include <iostream>
#include <unordered_set>
using namespace std;
//-----------------------------------------------
// from boost (functional/hash)
template<typename T>
inline void hash_combine(size_t &seed, const T &val)
{
seed ^= hash<T>()(val)
+ 0x9e3779b9 // 0x9e3779b9 黄金比例,此处不必细究!
+ (seed << 6)
+ (seed >> 2);
}
// auxiliary generic functions to create a hash value using a seed
template<typename T>
inline void hash_val(size_t &seed, const T &val)
{
hash_combine(seed, val);
}
template<typename T, typename... Types>
inline void hash_val(size_t &seed, const T &val, const Types&... args)
{
hash_combine(seed, val);
hash_val(seed, args...);
}
template<typename... Types>
inline size_t hash_val(const Types&... args)
{
size_t seed = 0;
hash_val(seed, args...);
return seed;
}
//---------------------------
class Customer
{
public:
Customer(string f, string l, long n) : fname(f), lname(l), no(n)
{ }
bool operator== (const Customer rhs) const
{
return (fname == rhs.fname) && (lname == rhs.lname) && (no == rhs.no);
}
string fname;
string lname;
long no;
};
class CustomerHash
{
public:
size_t operator()(const Customer &c) const
{
return hash_val(c.fname, c.lname, c.no);
}
};
size_t func1()
{
unordered_set<Customer, CustomerHash> set;
set.insert(Customer("Ace", "Hou", 1L));
set.insert(Customer("Sabri", "Hou", 2L));
set.insert(Customer("Stacy", "Chen", 3L));
set.insert(Customer("Mike", "Tseng", 4L));
set.insert(Customer("Paili", "Chen", 5L));
set.insert(Customer("Light", "Shiau", 6L));
set.insert(Customer("Shally", "Hwung", 7L));
cout << "set current bucket_count : " << set.bucket_count() << endl;
return set.bucket_count();
}
void func2(const size_t mod)
{
CustomerHash hh;
cout << "bucket position of Ace = " << hh(Customer("Ace", "Hou", 1L)) % mod << endl;
cout << "bucket position of Sabri = " << hh(Customer("Sabri", "Hou", 2L)) % mod << endl;
cout << "bucket position of Stacy = " << hh(Customer("Stacy", "Chen", 3L)) % mod << endl;
cout << "bucket position of Mike = " << hh(Customer("Mike", "Tseng", 4L)) % mod << endl;
cout << "bucket position of Paili = " << hh(Customer("Paili", "Chen", 5L)) % mod << endl;
cout << "bucket position of Light = " << hh(Customer("Light", "Shiau", 6L)) % mod << endl;
cout << "bucket position of Shally = " << hh(Customer("Shally", "Hwung", 7L)) % mod << endl;
}
int main()
{
size_t mod = func1();
func2(mod);
return 0;
}
输出:
set current bucket_count : 17
bucket position of Ace = 0
bucket position of Sabri = 11
bucket position of Stacy = 3
bucket position of Mike = 4
bucket position of Paili = 9
bucket position of Light = 10
bucket position of Shally = 15
**粗体** _斜体_ [链接](http://example.com) `代码` - 列表 > 引用
。你还可以使用@
来通知其他用户。