~rycwo/forge

ref: HEAD forge/src/hash_table.c -rw-r--r-- 4.9 KiB
d7ee94d6Ryan Chan Fix missing cd in .build.yml 14 days ago
                                                                                
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
// This file is part of Forge, the foundation library for Forge tools.
//
// Copyright (C) 2021 Ryan Chan <rycwo@posteo.net>
// SPDX-License-Identifier: GPL-3.0-only
//
// This Source Code Form is subject to the terms of the GNU General Public
// License v3.0 only. You should have received a copy of the license along with
// this program. If not, see <https://www.gnu.org/licenses/>.

#include "forge/hash_table.h"

#include <assert.h>
#include <stdbool.h>
#include <string.h>

#include "forge/murmur_hash.h"

#define HASH_TABLE_EMPTY_ENTRY INT64_MIN

#define hash_table_pos(table, key) \
	(key % (uint64_t)table->capacity)

#define hash_table_load_factor(table) \
	((float)table->size / (float)table->capacity)

struct hash_table_entry {
	int64_t index;
	bool empty;
};

static struct hash_table_entry
probe_hash_table(struct fg_hash_table const* table, uint64_t key) {
	assert(table->capacity > 0);
	// Search for either: 1) an empty entry; or 2) or a matching key,
	// in that order.
	int64_t i = hash_table_pos(table, key);
	int64_t j;
	int64_t const offset = i + table->capacity;
	for (; i < offset; ++i) {
		j = i % table->capacity;
		if (table->values[j] == HASH_TABLE_EMPTY_ENTRY)
			return (struct hash_table_entry){j, true};
		if (table->keys[j] == key)
			return (struct hash_table_entry){j, false};
	}
	// Exhausted the whole table with no success. This should never happen
	// as long as the table continuously grows to accomodate new entries.
	return (struct hash_table_entry){-1, true};
}

static inline void
set_hash_table_entry(
		struct fg_hash_table* table,
		uint64_t key,
		int64_t val) {
	struct hash_table_entry const entry = probe_hash_table(table, key);
	assert(entry.empty);
	assert(entry.index >= 0);
	table->keys[entry.index] = key;
	table->values[entry.index] = val;
}

static void
resize_hash_table(struct fg_hash_table* table, int64_t capacity) {
	assert(capacity > 0);
	if (capacity == table->capacity)
		return;

	struct fg_hash_table orig = *table;
	// Allocate for the new capacity,
	// then rehash all the existing key/value pairs
	fg_alloc_hash_table(table, &table->alloc, capacity);
	for (int i = 0; i < orig.capacity; ++i) {
		if (orig.values[i] != HASH_TABLE_EMPTY_ENTRY)
			set_hash_table_entry(table, orig.keys[i], orig.values[i]);
	}
	table->size = orig.size;
	fg_free_hash_table(&orig);
}

void
fg_alloc_hash_table(
		struct fg_hash_table* table,
		struct fg_allocator const* alloc,
		int64_t initial_capacity) {
	assert(initial_capacity >= 0);
	table->keys = (uint64_t*)alloc->alloc(sizeof(uint64_t) * initial_capacity);
	table->values = (int64_t*)alloc->alloc(sizeof(int64_t) * initial_capacity);
	// Wipe table entries to empty
	for (int i = 0; i < initial_capacity; ++i)
		table->values[i] = HASH_TABLE_EMPTY_ENTRY;
	table->alloc = *alloc;
	table->capacity = initial_capacity;
	table->size = 0;
}

void
fg_free_hash_table(struct fg_hash_table* table) {
	table->alloc.free(table->keys);
	table->alloc.free(table->values);
	table->capacity = 0;
	table->size = 0;
}

int64_t
fg_lookup_hash_table_entry(
		struct fg_hash_table const* table,
		uint64_t key,
		int64_t default_val) {
	if (table->size <= 0)
		return default_val;
	struct hash_table_entry const entry = probe_hash_table(table, key);
	if (entry.index < 0 || entry.empty)
		return default_val;
	return table->values[entry.index];
}

void
fg_insert_hash_table_entry(
		struct fg_hash_table* table,
		uint64_t key,
		int64_t val) {
	if (table->capacity <= 0)
		resize_hash_table(table, FG_HASH_TABLE_INIT_CAPACITY);
	set_hash_table_entry(table, key, val);
	table->size++;
	if (hash_table_load_factor(table) > FG_HASH_TABLE_MAX_LOAD_FACTOR)
		resize_hash_table(table, table->capacity * FG_HASH_TABLE_GROWTH_FACTOR);
}

void
fg_delete_hash_table_entry(struct fg_hash_table* table, uint64_t key) {
	struct hash_table_entry entry = probe_hash_table(table, key);
	// Deleting a non-existent key is an error
	assert(!entry.empty);
	assert(entry.index >= 0);

	// From Wikipedia:
	// "For all records in a cluster, there must be no vacant slots between
	// their natural hash position and their current position (else lookups
	// will terminate before finding the record)."

	int64_t j, k;
	int64_t hole = entry.index;
	int64_t const offset = hole + table->capacity;
	for (int64_t i = hole; i < offset; ++i) {
		j = i % table->capacity;
		if (table->values[j] == HASH_TABLE_EMPTY_ENTRY)
			break;
		k = hash_table_pos(table, table->keys[j]);
		// Make sure the hole is not between j and k, taking into account
		// that we may have looped around to the front of the array.
		if ((j > hole && (k <= hole || k > j))
				|| (j < hole && k <= hole && k > j)) {
			table->keys[hole] = table->keys[j];
			table->values[hole] = table->values[j];
			hole = j;
		}
	}

	table->values[hole] = HASH_TABLE_EMPTY_ENTRY;
	table->size--;
}

uint64_t
fg_hash_int(int val, uint64_t seed) {
	return fg_murmur_hash_64(&val, sizeof(int), seed);
}

uint64_t
fg_hash_string(char const* val, uint64_t seed) {
	assert(val != NULL);
	return fg_murmur_hash_64(val, strlen(val), seed);
}