数据结构之散列哈希表分离链接法，开放定值法，再散列

2022-07-29 13:26:22

针对于Hash表其实就是将键值经过一系列计算(Horner运算法则,不懂得请自行百度)，在规定大小（可进行再散列，这里只是做讲解）的散列表中进行存储，散列表的长度一般为素数（具体原因个人也没理解，但如果用非素数，那需要进行散列的key将集中在与之本身Mod值为0的数）。而对于进行通过Hash算法得到的值也许会有冲突，这种情况并且经常发生，所以下面讲解解决冲突的两种方法，对于哈希表本身的实现并不难，重点是解决冲突和装填因子的比例问题。

1、分离连接法： 简单点说，其实就是声明出一个链表数组，对于进行冲突的值（比如：我们假设数组长度为10，而进行Hash计算的公式为：X mod 10，所以29和39他俩都会存储在数组下标为9的位置上，这是就会有冲突，所以我们就用链表的方法来对他进行并列存储），下面是代码实现，看代码就可以理解了，这里只做思想讲解。

#include <stdio.h>
#include <stdlib.h>
#include <string.h>

//定义存储值的类型
typedef char * ElemType;

//定义链表存储结构
typedef struct ListNode
{
	ElemType data;
	struct ListNode *next;
}ListNode, *Position;
typedef Position List;

//定义哈希表存储结构
typedef struct HashNode
{
	int size;
	List *theList;
}HashNode, *HashTable;

//获取下一个质数
int NextPrime(int size)
{
	while(true)
	{
		if(size % 2 == 0 || size % 3 == 0)
		{
			size++;
		}
		else
		{
			return size;
		}
	}
}

//计算哈希值
int Hash(const char * key, int hashSize)
{
	unsigned int hashVal = 0;
	while(*key != '\0')
	{
		hashVal = (hashVal << 5) + *key++;
	}
	return hashVal % hashSize;
}

//获取一个指定大小的哈希表
HashTable Initialization(int hashSize)
{
	HashTable table;
	table = (HashTable)malloc(sizeof(HashNode));
	table -> size = NextPrime(hashSize);
	table -> theList = (List *)malloc(sizeof(ListNode) * table -> size);
	for(int i = 0; i < table -> size; i++)
	{
		table -> theList[i] = (List)malloc(sizeof(ListNode));
		table -> theList[i] -> next = NULL;
	}
	return table;
}

//根据key值查找节点
Position Find(ElemType key, HashTable table)
{
	List l = table -> theList[Hash(key, table -> size)];
	Position p = l -> next;
	while(p != NULL && p -> data != key)
	{
		p = p -> next;
	}
	return p;
}

//添加新元素
void Add(ElemType key, HashTable *table)
{
	Position p = Find(key, (*table));
	if(p == NULL)
	{
		int index = Hash(key, (*table) -> size);
		Position newNode = (Position)malloc(sizeof(ListNode));
		newNode -> data = key;
		newNode -> next = (*table) -> theList[index] -> next;
		(*table) -> theList[index] -> next = newNode;
	}

}

int main(void)
{
	HashTable table = Initialization(20);
	char s[] = "hello";
	Add(s, &table);
	ElemType d = table -> theList[19] -> next -> data;
	printf("%s\n", d);
	Position p = Find(s, table);
	if(p)
	{
		printf("%s\n", "abc");
		printf("%s\n", p -> data);
	}
	return 0;
}

2.开放定值法：由于分离链表法要经常对结点进行分配空间，对于运行速度就会有所减慢，而开放定值法就很巧妙地解决了这种方式，用完全数组方式来实现，而所谓的线性探测法，平方探测法，双散列其实就是不同的计算方式。

1-线性探测法：当对一个key进行hash计算后得出一个数组的下标，如果该位置没有元素将正常进行存储，否则向下寻找，直到找到空位置才进行存储，查找同理。

2-平方探测法：其实就对存在冲突的元素的下一个位置如果也存在元素将进行2^2的计算，以此类推，知道找到空位置，否则直接进行存储。

3-双散列：就是当存在冲突元素，将对该元素的Hash值重新计算，R - (x mod R)（R : 比散列长度小的素数值来进行存储）。这里要着重注意冲突元素的下个一存储单元是否被占据，这里可以进行灵活变换。

4-再散列：这里顺便介绍下再散列，因为开放定值法很容易让装填因子达到界值，所以我们要用到再散列，其实就是开辟一个比原散列大2倍与之最近的一个素数值的散列，来重新进行分配，这种方式很可取，因为并不用频繁开辟新散列，但要考虑到数据量突然剧增，这样很容易让你的主存占满，导致分配不出来新空间。

下面就直接上代码了，只实现了平方探测法，其他实现的思想大同小异。

#include <stdio.h>
#include <stdlib.h>
#include <string.h>

//定义存储单元的状态
typedef enum Kind
{
	Legitimate,
	Empty,
	Delte
}Kind;

//定义哈希表元素下标
typedef int Index;

//定义数据类型
typedef char * ElemType;

//定义存储单元的存储结构
typedef struct CellNode
{
	ElemType data;
	Kind statu;
}Position, Cell;

//定义哈希表的存储结构
typedef struct HashNode
{
	int size;
	Cell *cells;
}HashNode, *HashTable;

//获取下一个质数
int NextPrime(int hashSize)
{
	while(true)
	{
		if(hashSize % 2 == 0 || hashSize % 3 == 0)
		{
			hashSize++;
		}
		else
		{
			return hashSize;
		}
	}
}

//获取Hash值
int Hash(char * key, HashTable table)
{
    int HashVal = 0;
	while(*key != '\0')
	{
		HashVal = (HashVal << 5) + *key++;
	}
	return HashVal % table -> size;
}

//初始化哈希表
HashTable Initialization(int hashSize)
{
	HashTable table = (HashTable)malloc(sizeof(HashNode));
	table -> size = NextPrime(hashSize);
	table -> cells = (Cell *)malloc(sizeof(Cell) * table -> size);
	for(int i = 0; i < table -> size; i++)
	{
		table -> cells[i].statu = Empty;
	}
	return table;
}

//查找节点
Index Find(ElemType key, HashTable table)
{
	int collisionNum = 0;
	Index pointer = Hash(key, table);
	Position p = table -> cells[pointer];
	while(p.statu != Empty && p.data != key)
	{
		pointer += 2 * ++collisionNum - 1;
		if(pointer >= table -> size)
		{
			pointer -= table -> size;
		}
		p = table -> cells[pointer];
	}
	return pointer;
}

//添加元素
void Add(ElemType key, HashTable *table)
{
  	Index pointer = Find(key, (*table));
	if((*table) -> cells[pointer].statu != Legitimate)
	{
		(*table) -> cells[pointer].data = key;
		(*table) -> cells[pointer].statu = Legitimate;
	}
}

int main(void)
{
	HashTable table = Initialization(20);
	printf("%d\n", table -> size);
	char s[] = "my name is lambert!";
	int hash = Hash(s, table);
	printf("%d\n", hash);
	Add(s, &table);
	Index i = Find(s, table);
	printf("%d\n", i);
	printf("%s\n", table -> cells[i].data);
	return 0;
}