summaryrefslogtreecommitdiff
path: root/ewah/ewok.h
blob: e73252536702aaf9fed17757937fbaf4b4593f91 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
/**
 * Copyright 2013, GitHub, Inc
 * Copyright 2009-2013, Daniel Lemire, Cliff Moon,
 *	David McIntosh, Robert Becho, Google Inc. and Veronika Zenz
 *
 * This program is free software; you can redistribute it and/or
 * modify it under the terms of the GNU General Public License
 * as published by the Free Software Foundation; either version 2
 * of the License, or (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
 */
#ifndef __EWOK_BITMAP_H__
#define __EWOK_BITMAP_H__

#ifndef ewah_malloc
#	define ewah_malloc xmalloc
#endif
#ifndef ewah_realloc
#	define ewah_realloc xrealloc
#endif
#ifndef ewah_calloc
#	define ewah_calloc xcalloc
#endif

struct strbuf;
typedef uint64_t eword_t;
#define BITS_IN_WORD (sizeof(eword_t) * 8)

/**
 * Do not use __builtin_popcountll. The GCC implementation
 * is notoriously slow on all platforms.
 *
 * See: http://gcc.gnu.org/bugzilla/show_bug.cgi?id=36041
 */
static inline uint32_t ewah_bit_popcount64(uint64_t x)
{
	x = (x & 0x5555555555555555ULL) + ((x >>  1) & 0x5555555555555555ULL);
	x = (x & 0x3333333333333333ULL) + ((x >>  2) & 0x3333333333333333ULL);
	x = (x & 0x0F0F0F0F0F0F0F0FULL) + ((x >>  4) & 0x0F0F0F0F0F0F0F0FULL);
	return (x * 0x0101010101010101ULL) >> 56;
}

/* __builtin_ctzll was not available until 3.4.0 */
#if defined(__GNUC__) && (__GNUC__ > 3 || (__GNUC__ == 3  && __GNUC_MINOR > 3))
#define ewah_bit_ctz64(x) __builtin_ctzll(x)
#else
static inline int ewah_bit_ctz64(uint64_t x)
{
	int n = 0;
	if ((x & 0xffffffff) == 0) { x >>= 32; n += 32; }
	if ((x &     0xffff) == 0) { x >>= 16; n += 16; }
	if ((x &       0xff) == 0) { x >>=  8; n +=  8; }
	if ((x &        0xf) == 0) { x >>=  4; n +=  4; }
	if ((x &        0x3) == 0) { x >>=  2; n +=  2; }
	if ((x &        0x1) == 0) { x >>=  1; n +=  1; }
	return n + !x;
}
#endif

struct ewah_bitmap {
	eword_t *buffer;
	size_t buffer_size;
	size_t alloc_size;
	size_t bit_size;
	eword_t *rlw;
};

typedef void (*ewah_callback)(size_t pos, void *);

struct ewah_bitmap *ewah_pool_new(void);
void ewah_pool_free(struct ewah_bitmap *self);

/**
 * Allocate a new EWAH Compressed bitmap
 */
struct ewah_bitmap *ewah_new(void);

/**
 * Clear all the bits in the bitmap. Does not free or resize
 * memory.
 */
void ewah_clear(struct ewah_bitmap *self);

/**
 * Free all the memory of the bitmap
 */
void ewah_free(struct ewah_bitmap *self);

int ewah_serialize_to(struct ewah_bitmap *self,
		      int (*write_fun)(void *out, const void *buf, size_t len),
		      void *out);
int ewah_serialize(struct ewah_bitmap *self, int fd);
int ewah_serialize_native(struct ewah_bitmap *self, int fd);
int ewah_serialize_strbuf(struct ewah_bitmap *self, struct strbuf *);

int ewah_deserialize(struct ewah_bitmap *self, int fd);
int ewah_read_mmap(struct ewah_bitmap *self, const void *map, size_t len);

uint32_t ewah_checksum(struct ewah_bitmap *self);

/**
 * Logical not (bitwise negation) in-place on the bitmap
 *
 * This operation is linear time based on the size of the bitmap.
 */
void ewah_not(struct ewah_bitmap *self);

/**
 * Call the given callback with the position of every single bit
 * that has been set on the bitmap.
 *
 * This is an efficient operation that does not fully decompress
 * the bitmap.
 */
void ewah_each_bit(struct ewah_bitmap *self, ewah_callback callback, void *payload);

/**
 * Set a given bit on the bitmap.
 *
 * The bit at position `pos` will be set to true. Because of the
 * way that the bitmap is compressed, a set bit cannot be unset
 * later on.
 *
 * Furthermore, since the bitmap uses streaming compression, bits
 * can only set incrementally.
 *
 * E.g.
 *		ewah_set(bitmap, 1); // ok
 *		ewah_set(bitmap, 76); // ok
 *		ewah_set(bitmap, 77); // ok
 *		ewah_set(bitmap, 8712800127); // ok
 *		ewah_set(bitmap, 25); // failed, assert raised
 */
void ewah_set(struct ewah_bitmap *self, size_t i);

struct ewah_iterator {
	const eword_t *buffer;
	size_t buffer_size;

	size_t pointer;
	eword_t compressed, literals;
	eword_t rl, lw;
	int b;
};

/**
 * Initialize a new iterator to run through the bitmap in uncompressed form.
 *
 * The iterator can be stack allocated. The underlying bitmap must not be freed
 * before the iteration is over.
 *
 * E.g.
 *
 *		struct ewah_bitmap *bitmap = ewah_new();
 *		struct ewah_iterator it;
 *
 *		ewah_iterator_init(&it, bitmap);
 */
void ewah_iterator_init(struct ewah_iterator *it, struct ewah_bitmap *parent);

/**
 * Yield every single word in the bitmap in uncompressed form. This is:
 * yield single words (32-64 bits) where each bit represents an actual
 * bit from the bitmap.
 *
 * Return: true if a word was yield, false if there are no words left
 */
int ewah_iterator_next(eword_t *next, struct ewah_iterator *it);

void ewah_or(
	struct ewah_bitmap *ewah_i,
	struct ewah_bitmap *ewah_j,
	struct ewah_bitmap *out);

void ewah_and_not(
	struct ewah_bitmap *ewah_i,
	struct ewah_bitmap *ewah_j,
	struct ewah_bitmap *out);

void ewah_xor(
	struct ewah_bitmap *ewah_i,
	struct ewah_bitmap *ewah_j,
	struct ewah_bitmap *out);

void ewah_and(
	struct ewah_bitmap *ewah_i,
	struct ewah_bitmap *ewah_j,
	struct ewah_bitmap *out);

/**
 * Direct word access
 */
size_t ewah_add_empty_words(struct ewah_bitmap *self, int v, size_t number);
void ewah_add_dirty_words(
	struct ewah_bitmap *self, const eword_t *buffer, size_t number, int negate);
size_t ewah_add(struct ewah_bitmap *self, eword_t word);


/**
 * Uncompressed, old-school bitmap that can be efficiently compressed
 * into an `ewah_bitmap`.
 */
struct bitmap {
	eword_t *words;
	size_t word_alloc;
};

struct bitmap *bitmap_new(void);
void bitmap_set(struct bitmap *self, size_t pos);
void bitmap_clear(struct bitmap *self, size_t pos);
int bitmap_get(struct bitmap *self, size_t pos);
void bitmap_reset(struct bitmap *self);
void bitmap_free(struct bitmap *self);
int bitmap_equals(struct bitmap *self, struct bitmap *other);
int bitmap_is_subset(struct bitmap *self, struct bitmap *super);

struct ewah_bitmap * bitmap_to_ewah(struct bitmap *bitmap);
struct bitmap *ewah_to_bitmap(struct ewah_bitmap *ewah);

void bitmap_and_not(struct bitmap *self, struct bitmap *other);
void bitmap_or_ewah(struct bitmap *self, struct ewah_bitmap *other);
void bitmap_or(struct bitmap *self, const struct bitmap *other);

void bitmap_each_bit(struct bitmap *self, ewah_callback callback, void *data);
size_t bitmap_popcount(struct bitmap *self);

#endif