在上一篇博客中,我们简单地介绍了哈希表,以及解决哈希冲突的办法;今天我们介绍解决哈希冲突的另一种方法——开散列法。开散列法又叫链地址法,首先对关键码集合用散列函数计算散列地址,具有相同地址的关键码归于同一子集合,每一个子集合称为一个桶,各个桶中的元素通过一个单链表链接起来,各链表的头结点组成一个向量。说起来可能很复杂,其实很简单,看一个图就明白了:假设哈希函数为Hash(key)=key%10
以下是我实现的哈希桶:
// 获得一个适合的素数
const int _PrimeSize = 28;//素数表
static const unsigned long _PrimeList[_PrimeSize] =
{
53ul,97ul,193ul,389ul,769ul,1543ul,3079ul,6151ul,12289ul,24593ul,49157ul,98317ul,196613ul,393241ul,786433ul,1572869ul,3145739ul,6291469ul,12582917ul,25165843ul,50331653ul,100663319ul,201326611ul,402653189ul,805306457ul,1610612741ul,3221225473ul,4294967291ul
};
size_t GetNextPrime(size_t num)
{
for (size_t i = 0; i < _PrimeSize; i++)
{
if (_PrimeList[i]>num)
return _PrimeList[i];
}
return _PrimeList[_PrimeSize - 1];
}
template <class K>
class KeyToIntDef
{
public:
size_t operator()(const K& key)
{
return key;
}
};
// 将字符串转换成整型
static size_t BKDRHash(const char * str)
{
unsigned int seed = 131; // 31 131 1313 13131 131313
unsigned int hash = 0;
while (*str)
{
hash = hash * seed + (*str++);
}
size_t ret = (hash & 0x7FFFFFFF);
return ret;
}
class StringToInt
{
public:
size_t operator()(const string& key)
{
return BKDRHash(key.c_str());
}
};
#include <iostream>
#include <vector>
#include <string>
using namespace std;
template <class K,class V>
struct HashNode//结点
{
pair<K,V> _kv;
HashNode<K,V>* _pNext;
HashNode(const pair<K,V>& kv)
: _kv(kv),_pNext(NULL)
{}
};
template <class K,class V,class KeyToInt>
class HashTable;//前置声明
//迭代器
template <class K,class KeyToInt = KeyToIntDef<K>>
struct HashTableIterator
{
typedef HashNode<K,V> Node;
typedef Node* PNode;
typedef HashTableIterator<K,V,KeyToInt> Self;
PNode _pCur;
HashTable<K,KeyToInt>* _ht;
public:
HashTableIterator()
: _pCur(NULL),_ht(NULL)
{}
HashTableIterator(const PNode pCur,HashTable<K,KeyToInt>* ht)
: _pCur(pCur),_ht(ht)
{}
HashTableIterator(Self& s)
: _pCur(s._pCur),_ht(s._ht)
{}
Self& operator++()
{
Next();
return *this;
}
Self operator++(int)
{
HashTableIterator temp(*this);
Next();
return temp;
}
pair<K,V>& operator*()
{
return _pCur->_kv;
}
pair<K,V>* operator->()
{
return &(operator*());
}
bool operator==(const Self& s)
{
return _pCur == s._pCur;
}
bool operator!=(const Self& s)
{
return _pCur != s._pCur;
}
private:
void Next()
{
if (_pCur->_pNext)
_pCur = _pCur->_pNext;
else
{//找下一个非空桶
size_t bucketNo = _ht->HashFunc(_pCur->_kv.first) + 1;
for (; bucketNo < _ht->_hashTable.capacity(); bucketNo++)
{
if (_ht->_hashTable[bucketNo])
{
_pCur = _ht->_hashTable[bucketNo];
return;
}
}
_pCur = NULL;//没有找到
}
return;
}
};
//哈希桶
template <class K,class KeyToInt = KeyToIntDef<K>>
class HashTable
{
typedef HashNode<K,V> Node;
typedef Node* PNode;
friend HashTableIterator<K,KeyToInt>;
public:
typedef HashTableIterator<K,KeyToInt> Iterator;
public:
HashTable(size_t capacity = 10)
{
capacity = GetNextPrimer(capacity);
_hashTable.resize(capacity);
_size = 0;
}
Iterator Begin()
{
for (size_t bucketNo = 0; bucketNo < _hashTable.capacity(); bucketNo++)
{
if (_hashTable[bucketNo])
return Iterator(_hashTable[bucketNo],this);
}
return Iterator(NULL,this);
}
Iterator End()
{
return Iterator(NULL,this);
}
//////////////////////////插入,查找及删除/////////////////////////////////
pair<Iterator,bool> InsertEqual(const pair<K,V>& kv)//允许重复
{
CheckCapacity();
size_t bucketNo = HashFunc(kv.first);
PNode pNewNode = new Node(kv);
pNewNode->_pNext = _hashTable[bucketNo];//头插
_hashTable[bucketNo] = pNewNode;
_size++;
return make_pair(Iterator(pNewNode,this),true);
}
pair<Iterator,bool> InsertUnique(const pair<K,V>& kv)//key值唯一
{
CheckCapacity();
size_t bucketNo = HashFunc(kv.first);
PNode pCur = _hashTable[bucketNo];
while (pCur)
{
if (kv.first == pCur->_kv.first)
return make_pair(Iterator(pCur,false);
pCur = pCur->_pNext;
}
pCur = new Node(kv);
pCur->_pNext = _hashTable[bucketNo];
_hashTable[bucketNo] = pCur;
_size++;
return make_pair(Iterator(pCur,true);
}
Iterator Find(const K& key)
{
size_t bucketNo = HashFunc(key);
PNode pCur = _hashTable[bucketNo];
while (pCur)
{
if (pCur->_kv.first == key)
return Iterator(pCur,this);
pCur = pCur->_pNext;
}
return Iterator(NULL,this);
}
Iterator Erase(Iterator pos)//key值唯一
{
if (pos._pCur == NULL)
return Iterator(NULL,this);
size_t key = pos._pCur->_kv.first;
size_t bucketNo = HashFunc(key);
PNode pCur = _hashTable[bucketNo];
PNode pPre = NULL;
while (pCur)
{
if (pCur->_kv.first == key)
{
if (_hashTable[bucketNo] == pCur)//pCur是首元素结点
_hashTable[bucketNo] = pCur->_pNext;
else
pPre->_pNext = pCur->_pNext;
delete pCur;
pCur = NULL;
_size--;
return Iterator(pPre,this);
}
else
{
pPre = pCur;
pCur = pCur->_pNext;
}
}
return Iterator(NULL,this);
}
size_t Erase(const K& key)//key值可以重复
{
size_t oldsize = _size;
size_t bucketNo = HashFunc(key);
PNode pCur = _hashTable[bucketNo];
PNode pPre = NULL;
while (pCur)
{
if (pCur->_kv.first == key)
{
if (pCur == _hashTable[bucketNo])
{
_hashTable[bucketNo] = pCur->_pNext;
delete pCur;
pCur = _hashTable[bucketNo];
}
else
{
pPre->_pNext = pCur->_pNext;
delete pCur;
pCur = pPre->_pNext;
}
_size--;
}
else
{
pPre = pCur;
pCur = pPre->_pNext;
}
}
if (oldsize == _size)
return 0;
else
return oldsize - _size;
}
///////////////////////////其它常用函数///////////////////////////////
size_t Count(const K key)//值为key的元素个数
{
size_t count = 0;
size_t bucketNo = HashFunc(key);
PNode pCur = _hashTable[bucketNo];
while (pCur)
{
if (pCur->_kv.first == key)
count++;
pCur = pCur->_pNext;
}
return count;
}
size_t BucketCount()const//桶的个数
{
return _hashTable.capacity();
}
size_t BucketSize(size_t bucketNo)const//桶内元素个数
{
size_t count = 0;
PNode pCur = _hashTable[bucketNo];
while (pCur)
{
count++;
pCur = pCur->_pNext;
}
return count;
}
V& FindORInsert(const K& key)//查找值为key,如果找到了,返回对应的value,
//如果没有找到插入该结点,返回缺省的value
{
Iterator ret = InsertUnique(make_pair(key,V())).first;
return (*ret).second;
}
bool Empty()const//是否为空
{
return _size == 0;
}
size_t Size()const//插入的总数
{
return _size;
}
void Clear()//清空
{
for (size_t bucketNo = 0; bucketNo < _hashTable.capacity(); bucketNo++)
{
PNode pCur = _hashTable[bucketNo];
while (pCur)
{
_hashTable[bucketNo] = pCur->_pNext;
delete pCur;
pCur = _hashTable[bucketNo];
_size--;
}
}
}
~HashTable()
{
Clear();
}
private:
size_t HashFunc(const K& key)//哈希函数
{
return KeyToInt()(key) % _hashTable.capacity();
}
void CheckCapacity()//扩容
{
size_t capacity = _hashTable.capacity();
if (_size == capacity)
{
HashTable<K,KeyToInt> newHt(GetNextPrime(capacity));
for (size_t bucketNo = 0; bucketNo < capacity; bucketNo++)
{
PNode pCur = _hashTable[bucketNo];
while (pCur)
{
newHt.InsertEqual(pCur->_kv);
pCur = pCur->_pNext;
}
}
_hashTable.swap(newHt._hashTable);
}
}
private:
vector<PNode> _hashTable;
size_t _size;
};
void test()
{
HashTable<int,int> ht;
ht.InsertEqual(make_pair(1,1));
ht.InsertEqual(make_pair(5,5));
ht.InsertEqual(make_pair(15,15));
ht.InsertEqual(make_pair(15,15));
ht.InsertEqual(make_pair(35,35));
ht.InsertEqual(make_pair(9,9));
HashTable<int,int>::Iterator it = ht.Begin();
if (!ht.Empty())
{
while (it != ht.End())
{
cout << it->first << " " ;
cout << (*it).second << endl;
it++;
}
cout << endl;
cout << ht.BucketSize(5) << endl;
cout << ht.BucketCount() << endl;
cout << ht.Count(15) << endl;
}
it = ht.Begin();
cout << ht.Erase(15) << endl;
HashTable<int,int>::Iterator ret = ht.Find(1);
ht.Erase(ret);
cout << ht.Size() << endl;
ht.Clear();
HashTable<string,string,StringToInt> ht1;
ht1.InsertUnique(make_pair("111","111"));
ht1.InsertUnique(make_pair("111","111"));
cout << ht1.FindORInsert("111") << endl;
}