diff options
Diffstat (limited to 'ewah')
-rw-r--r-- | ewah/bitmap.c | 39 | ||||
-rw-r--r-- | ewah/ewah_bitmap.c | 249 | ||||
-rw-r--r-- | ewah/ewah_io.c | 116 | ||||
-rw-r--r-- | ewah/ewah_rlw.c | 8 | ||||
-rw-r--r-- | ewah/ewok.h | 37 | ||||
-rw-r--r-- | ewah/ewok_rlw.h | 5 |
6 files changed, 49 insertions, 405 deletions
diff --git a/ewah/bitmap.c b/ewah/bitmap.c index 756bdd050e..d8cec585af 100644 --- a/ewah/bitmap.c +++ b/ewah/bitmap.c @@ -22,21 +22,26 @@ #define EWAH_MASK(x) ((eword_t)1 << (x % BITS_IN_EWORD)) #define EWAH_BLOCK(x) (x / BITS_IN_EWORD) -struct bitmap *bitmap_new(void) +struct bitmap *bitmap_word_alloc(size_t word_alloc) { struct bitmap *bitmap = xmalloc(sizeof(struct bitmap)); - bitmap->words = xcalloc(32, sizeof(eword_t)); - bitmap->word_alloc = 32; + bitmap->words = xcalloc(word_alloc, sizeof(eword_t)); + bitmap->word_alloc = word_alloc; return bitmap; } +struct bitmap *bitmap_new(void) +{ + return bitmap_word_alloc(32); +} + void bitmap_set(struct bitmap *self, size_t pos) { size_t block = EWAH_BLOCK(pos); if (block >= self->word_alloc) { size_t old_size = self->word_alloc; - self->word_alloc = block * 2; + self->word_alloc = block ? block * 2 : 1; REALLOC_ARRAY(self->words, self->word_alloc); memset(self->words + old_size, 0x0, (self->word_alloc - old_size) * sizeof(eword_t)); @@ -45,7 +50,7 @@ void bitmap_set(struct bitmap *self, size_t pos) self->words[block] |= EWAH_MASK(pos); } -void bitmap_clear(struct bitmap *self, size_t pos) +void bitmap_unset(struct bitmap *self, size_t pos) { size_t block = EWAH_BLOCK(pos); @@ -137,30 +142,6 @@ void bitmap_or_ewah(struct bitmap *self, struct ewah_bitmap *other) self->words[i++] |= word; } -void bitmap_each_bit(struct bitmap *self, ewah_callback callback, void *data) -{ - size_t pos = 0, i; - - for (i = 0; i < self->word_alloc; ++i) { - eword_t word = self->words[i]; - uint32_t offset; - - if (word == (eword_t)~0) { - for (offset = 0; offset < BITS_IN_EWORD; ++offset) - callback(pos++, data); - } else { - for (offset = 0; offset < BITS_IN_EWORD; ++offset) { - if ((word >> offset) == 0) - break; - - offset += ewah_bit_ctz64(word >> offset); - callback(pos + offset, data); - } - pos += BITS_IN_EWORD; - } - } -} - size_t bitmap_popcount(struct bitmap *self) { size_t i, count = 0; diff --git a/ewah/ewah_bitmap.c b/ewah/ewah_bitmap.c index b9fdda1d3d..d59b1afe3d 100644 --- a/ewah/ewah_bitmap.c +++ b/ewah/ewah_bitmap.c @@ -276,6 +276,18 @@ void ewah_each_bit(struct ewah_bitmap *self, void (*callback)(size_t, void*), vo } } +/** + * Clear all the bits in the bitmap. Does not free or resize + * memory. + */ +static void ewah_clear(struct ewah_bitmap *self) +{ + self->buffer_size = 1; + self->buffer[0] = 0; + self->bit_size = 0; + self->rlw = self->buffer; +} + struct ewah_bitmap *ewah_new(void) { struct ewah_bitmap *self; @@ -288,14 +300,6 @@ struct ewah_bitmap *ewah_new(void) return self; } -void ewah_clear(struct ewah_bitmap *self) -{ - self->buffer_size = 1; - self->buffer[0] = 0; - self->bit_size = 0; - self->rlw = self->buffer; -} - void ewah_free(struct ewah_bitmap *self) { if (!self) @@ -376,25 +380,6 @@ void ewah_iterator_init(struct ewah_iterator *it, struct ewah_bitmap *parent) read_new_rlw(it); } -void ewah_not(struct ewah_bitmap *self) -{ - size_t pointer = 0; - - while (pointer < self->buffer_size) { - eword_t *word = &self->buffer[pointer]; - size_t literals, k; - - rlw_xor_run_bit(word); - ++pointer; - - literals = rlw_get_literal_words(word); - for (k = 0; k < literals; ++k) { - self->buffer[pointer] = ~self->buffer[pointer]; - ++pointer; - } - } -} - void ewah_xor( struct ewah_bitmap *ewah_i, struct ewah_bitmap *ewah_j, @@ -459,216 +444,6 @@ void ewah_xor( out->bit_size = max_size(ewah_i->bit_size, ewah_j->bit_size); } -void ewah_and( - struct ewah_bitmap *ewah_i, - struct ewah_bitmap *ewah_j, - struct ewah_bitmap *out) -{ - struct rlw_iterator rlw_i; - struct rlw_iterator rlw_j; - size_t literals; - - rlwit_init(&rlw_i, ewah_i); - rlwit_init(&rlw_j, ewah_j); - - while (rlwit_word_size(&rlw_i) > 0 && rlwit_word_size(&rlw_j) > 0) { - while (rlw_i.rlw.running_len > 0 || rlw_j.rlw.running_len > 0) { - struct rlw_iterator *prey, *predator; - - if (rlw_i.rlw.running_len < rlw_j.rlw.running_len) { - prey = &rlw_i; - predator = &rlw_j; - } else { - prey = &rlw_j; - predator = &rlw_i; - } - - if (predator->rlw.running_bit == 0) { - ewah_add_empty_words(out, 0, - predator->rlw.running_len); - rlwit_discard_first_words(prey, - predator->rlw.running_len); - rlwit_discard_first_words(predator, - predator->rlw.running_len); - } else { - size_t index = rlwit_discharge(prey, out, - predator->rlw.running_len, 0); - ewah_add_empty_words(out, 0, - predator->rlw.running_len - index); - rlwit_discard_first_words(predator, - predator->rlw.running_len); - } - } - - literals = min_size( - rlw_i.rlw.literal_words, - rlw_j.rlw.literal_words); - - if (literals) { - size_t k; - - for (k = 0; k < literals; ++k) { - ewah_add(out, - rlw_i.buffer[rlw_i.literal_word_start + k] & - rlw_j.buffer[rlw_j.literal_word_start + k] - ); - } - - rlwit_discard_first_words(&rlw_i, literals); - rlwit_discard_first_words(&rlw_j, literals); - } - } - - if (rlwit_word_size(&rlw_i) > 0) - rlwit_discharge_empty(&rlw_i, out); - else - rlwit_discharge_empty(&rlw_j, out); - - out->bit_size = max_size(ewah_i->bit_size, ewah_j->bit_size); -} - -void ewah_and_not( - struct ewah_bitmap *ewah_i, - struct ewah_bitmap *ewah_j, - struct ewah_bitmap *out) -{ - struct rlw_iterator rlw_i; - struct rlw_iterator rlw_j; - size_t literals; - - rlwit_init(&rlw_i, ewah_i); - rlwit_init(&rlw_j, ewah_j); - - while (rlwit_word_size(&rlw_i) > 0 && rlwit_word_size(&rlw_j) > 0) { - while (rlw_i.rlw.running_len > 0 || rlw_j.rlw.running_len > 0) { - struct rlw_iterator *prey, *predator; - - if (rlw_i.rlw.running_len < rlw_j.rlw.running_len) { - prey = &rlw_i; - predator = &rlw_j; - } else { - prey = &rlw_j; - predator = &rlw_i; - } - - if ((predator->rlw.running_bit && prey == &rlw_i) || - (!predator->rlw.running_bit && prey != &rlw_i)) { - ewah_add_empty_words(out, 0, - predator->rlw.running_len); - rlwit_discard_first_words(prey, - predator->rlw.running_len); - rlwit_discard_first_words(predator, - predator->rlw.running_len); - } else { - size_t index; - int negate_words; - - negate_words = (&rlw_i != prey); - index = rlwit_discharge(prey, out, - predator->rlw.running_len, negate_words); - ewah_add_empty_words(out, negate_words, - predator->rlw.running_len - index); - rlwit_discard_first_words(predator, - predator->rlw.running_len); - } - } - - literals = min_size( - rlw_i.rlw.literal_words, - rlw_j.rlw.literal_words); - - if (literals) { - size_t k; - - for (k = 0; k < literals; ++k) { - ewah_add(out, - rlw_i.buffer[rlw_i.literal_word_start + k] & - ~(rlw_j.buffer[rlw_j.literal_word_start + k]) - ); - } - - rlwit_discard_first_words(&rlw_i, literals); - rlwit_discard_first_words(&rlw_j, literals); - } - } - - if (rlwit_word_size(&rlw_i) > 0) - rlwit_discharge(&rlw_i, out, ~0, 0); - else - rlwit_discharge_empty(&rlw_j, out); - - out->bit_size = max_size(ewah_i->bit_size, ewah_j->bit_size); -} - -void ewah_or( - struct ewah_bitmap *ewah_i, - struct ewah_bitmap *ewah_j, - struct ewah_bitmap *out) -{ - struct rlw_iterator rlw_i; - struct rlw_iterator rlw_j; - size_t literals; - - rlwit_init(&rlw_i, ewah_i); - rlwit_init(&rlw_j, ewah_j); - - while (rlwit_word_size(&rlw_i) > 0 && rlwit_word_size(&rlw_j) > 0) { - while (rlw_i.rlw.running_len > 0 || rlw_j.rlw.running_len > 0) { - struct rlw_iterator *prey, *predator; - - if (rlw_i.rlw.running_len < rlw_j.rlw.running_len) { - prey = &rlw_i; - predator = &rlw_j; - } else { - prey = &rlw_j; - predator = &rlw_i; - } - - if (predator->rlw.running_bit) { - ewah_add_empty_words(out, 0, - predator->rlw.running_len); - rlwit_discard_first_words(prey, - predator->rlw.running_len); - rlwit_discard_first_words(predator, - predator->rlw.running_len); - } else { - size_t index = rlwit_discharge(prey, out, - predator->rlw.running_len, 0); - ewah_add_empty_words(out, 0, - predator->rlw.running_len - index); - rlwit_discard_first_words(predator, - predator->rlw.running_len); - } - } - - literals = min_size( - rlw_i.rlw.literal_words, - rlw_j.rlw.literal_words); - - if (literals) { - size_t k; - - for (k = 0; k < literals; ++k) { - ewah_add(out, - rlw_i.buffer[rlw_i.literal_word_start + k] | - rlw_j.buffer[rlw_j.literal_word_start + k] - ); - } - - rlwit_discard_first_words(&rlw_i, literals); - rlwit_discard_first_words(&rlw_j, literals); - } - } - - if (rlwit_word_size(&rlw_i) > 0) - rlwit_discharge(&rlw_i, out, ~0, 0); - else - rlwit_discharge(&rlw_j, out, ~0, 0); - - out->bit_size = max_size(ewah_i->bit_size, ewah_j->bit_size); -} - - #define BITMAP_POOL_MAX 16 static struct ewah_bitmap *bitmap_pool[BITMAP_POOL_MAX]; static size_t bitmap_pool_size; diff --git a/ewah/ewah_io.c b/ewah/ewah_io.c index bed1994551..9035ee65ea 100644 --- a/ewah/ewah_io.c +++ b/ewah/ewah_io.c @@ -20,32 +20,6 @@ #include "ewok.h" #include "strbuf.h" -int ewah_serialize_native(struct ewah_bitmap *self, int fd) -{ - uint32_t write32; - size_t to_write = self->buffer_size * 8; - - /* 32 bit -- bit size for the map */ - write32 = (uint32_t)self->bit_size; - if (write(fd, &write32, 4) != 4) - return -1; - - /** 32 bit -- number of compressed 64-bit words */ - write32 = (uint32_t)self->buffer_size; - if (write(fd, &write32, 4) != 4) - return -1; - - if (write(fd, self->buffer, to_write) != to_write) - return -1; - - /** 32 bit -- position for the RLW */ - write32 = self->rlw - self->buffer; - if (write(fd, &write32, 4) != 4) - return -1; - - return (3 * 4) + to_write; -} - int ewah_serialize_to(struct ewah_bitmap *self, int (*write_fun)(void *, const void *, size_t), void *data) @@ -100,16 +74,6 @@ int ewah_serialize_to(struct ewah_bitmap *self, return (3 * 4) + (self->buffer_size * 8); } -static int write_helper(void *fd, const void *buf, size_t len) -{ - return write((intptr_t)fd, buf, len); -} - -int ewah_serialize(struct ewah_bitmap *self, int fd) -{ - return ewah_serialize_to(self, write_helper, (void *)(intptr_t)fd); -} - static int write_strbuf(void *user_data, const void *data, size_t len) { struct strbuf *sb = user_data; @@ -122,16 +86,23 @@ int ewah_serialize_strbuf(struct ewah_bitmap *self, struct strbuf *sb) return ewah_serialize_to(self, write_strbuf, sb); } -int ewah_read_mmap(struct ewah_bitmap *self, const void *map, size_t len) +ssize_t ewah_read_mmap(struct ewah_bitmap *self, const void *map, size_t len) { const uint8_t *ptr = map; + size_t data_len; size_t i; + if (len < sizeof(uint32_t)) + return error("corrupt ewah bitmap: eof before bit size"); self->bit_size = get_be32(ptr); ptr += sizeof(uint32_t); + len -= sizeof(uint32_t); + if (len < sizeof(uint32_t)) + return error("corrupt ewah bitmap: eof before length"); self->buffer_size = self->alloc_size = get_be32(ptr); ptr += sizeof(uint32_t); + len -= sizeof(uint32_t); REALLOC_ARRAY(self->buffer, self->alloc_size); @@ -141,68 +112,23 @@ int ewah_read_mmap(struct ewah_bitmap *self, const void *map, size_t len) * the endianness conversion in a separate pass to ensure * we're loading 8-byte aligned words. */ - memcpy(self->buffer, ptr, self->buffer_size * sizeof(eword_t)); - ptr += self->buffer_size * sizeof(eword_t); + data_len = st_mult(self->buffer_size, sizeof(eword_t)); + if (len < data_len) + return error("corrupt ewah bitmap: eof in data " + "(%"PRIuMAX" bytes short)", + (uintmax_t)(data_len - len)); + memcpy(self->buffer, ptr, data_len); + ptr += data_len; + len -= data_len; for (i = 0; i < self->buffer_size; ++i) self->buffer[i] = ntohll(self->buffer[i]); + if (len < sizeof(uint32_t)) + return error("corrupt ewah bitmap: eof before rlw"); self->rlw = self->buffer + get_be32(ptr); + ptr += sizeof(uint32_t); + len -= sizeof(uint32_t); - return (3 * 4) + (self->buffer_size * 8); -} - -int ewah_deserialize(struct ewah_bitmap *self, int fd) -{ - size_t i; - eword_t dump[2048]; - const size_t words_per_dump = sizeof(dump) / sizeof(eword_t); - uint32_t bitsize, word_count, rlw_pos; - - eword_t *buffer = NULL; - size_t words_left; - - ewah_clear(self); - - /* 32 bit -- bit size for the map */ - if (read(fd, &bitsize, 4) != 4) - return -1; - - self->bit_size = (size_t)ntohl(bitsize); - - /** 32 bit -- number of compressed 64-bit words */ - if (read(fd, &word_count, 4) != 4) - return -1; - - self->buffer_size = self->alloc_size = (size_t)ntohl(word_count); - REALLOC_ARRAY(self->buffer, self->alloc_size); - - /** 64 bit x N -- compressed words */ - buffer = self->buffer; - words_left = self->buffer_size; - - while (words_left >= words_per_dump) { - if (read(fd, dump, sizeof(dump)) != sizeof(dump)) - return -1; - - for (i = 0; i < words_per_dump; ++i, ++buffer) - *buffer = ntohll(dump[i]); - - words_left -= words_per_dump; - } - - if (words_left) { - if (read(fd, dump, words_left * 8) != words_left * 8) - return -1; - - for (i = 0; i < words_left; ++i, ++buffer) - *buffer = ntohll(dump[i]); - } - - /** 32 bit -- position for the RLW */ - if (read(fd, &rlw_pos, 4) != 4) - return -1; - - self->rlw = self->buffer + ntohl(rlw_pos); - return 0; + return ptr - (const uint8_t *)map; } diff --git a/ewah/ewah_rlw.c b/ewah/ewah_rlw.c index b9643b7d0f..5093d43e2f 100644 --- a/ewah/ewah_rlw.c +++ b/ewah/ewah_rlw.c @@ -104,11 +104,3 @@ size_t rlwit_discharge( return index; } - -void rlwit_discharge_empty(struct rlw_iterator *it, struct ewah_bitmap *out) -{ - while (rlwit_word_size(it) > 0) { - ewah_add_empty_words(out, 0, rlwit_word_size(it)); - rlwit_discard_first_words(it, rlwit_word_size(it)); - } -} diff --git a/ewah/ewok.h b/ewah/ewok.h index dc43d05b64..011852bef1 100644 --- a/ewah/ewok.h +++ b/ewah/ewok.h @@ -73,12 +73,6 @@ void ewah_pool_free(struct ewah_bitmap *self); struct ewah_bitmap *ewah_new(void); /** - * Clear all the bits in the bitmap. Does not free or resize - * memory. - */ -void ewah_clear(struct ewah_bitmap *self); - -/** * Free all the memory of the bitmap */ void ewah_free(struct ewah_bitmap *self); @@ -86,23 +80,13 @@ void ewah_free(struct ewah_bitmap *self); int ewah_serialize_to(struct ewah_bitmap *self, int (*write_fun)(void *out, const void *buf, size_t len), void *out); -int ewah_serialize(struct ewah_bitmap *self, int fd); -int ewah_serialize_native(struct ewah_bitmap *self, int fd); int ewah_serialize_strbuf(struct ewah_bitmap *self, struct strbuf *); -int ewah_deserialize(struct ewah_bitmap *self, int fd); -int ewah_read_mmap(struct ewah_bitmap *self, const void *map, size_t len); +ssize_t ewah_read_mmap(struct ewah_bitmap *self, const void *map, size_t len); uint32_t ewah_checksum(struct ewah_bitmap *self); /** - * Logical not (bitwise negation) in-place on the bitmap - * - * This operation is linear time based on the size of the bitmap. - */ -void ewah_not(struct ewah_bitmap *self); - -/** * Call the given callback with the position of every single bit * that has been set on the bitmap. * @@ -164,26 +148,11 @@ void ewah_iterator_init(struct ewah_iterator *it, struct ewah_bitmap *parent); */ int ewah_iterator_next(eword_t *next, struct ewah_iterator *it); -void ewah_or( - struct ewah_bitmap *ewah_i, - struct ewah_bitmap *ewah_j, - struct ewah_bitmap *out); - -void ewah_and_not( - struct ewah_bitmap *ewah_i, - struct ewah_bitmap *ewah_j, - struct ewah_bitmap *out); - void ewah_xor( struct ewah_bitmap *ewah_i, struct ewah_bitmap *ewah_j, struct ewah_bitmap *out); -void ewah_and( - struct ewah_bitmap *ewah_i, - struct ewah_bitmap *ewah_j, - struct ewah_bitmap *out); - /** * Direct word access */ @@ -203,8 +172,9 @@ struct bitmap { }; struct bitmap *bitmap_new(void); +struct bitmap *bitmap_word_alloc(size_t word_alloc); void bitmap_set(struct bitmap *self, size_t pos); -void bitmap_clear(struct bitmap *self, size_t pos); +void bitmap_unset(struct bitmap *self, size_t pos); int bitmap_get(struct bitmap *self, size_t pos); void bitmap_reset(struct bitmap *self); void bitmap_free(struct bitmap *self); @@ -218,7 +188,6 @@ void bitmap_and_not(struct bitmap *self, struct bitmap *other); void bitmap_or_ewah(struct bitmap *self, struct ewah_bitmap *other); void bitmap_or(struct bitmap *self, const struct bitmap *other); -void bitmap_each_bit(struct bitmap *self, ewah_callback callback, void *data); size_t bitmap_popcount(struct bitmap *self); #endif diff --git a/ewah/ewok_rlw.h b/ewah/ewok_rlw.h index bb3c6ff7e0..bafa24f4c3 100644 --- a/ewah/ewok_rlw.h +++ b/ewah/ewok_rlw.h @@ -19,6 +19,8 @@ #ifndef __EWOK_RLW_H__ #define __EWOK_RLW_H__ +#include "ewok.h" + #define RLW_RUNNING_BITS (sizeof(eword_t) * 4) #define RLW_LITERAL_BITS (sizeof(eword_t) * 8 - 1 - RLW_RUNNING_BITS) @@ -29,7 +31,7 @@ #define RLW_RUNNING_LEN_PLUS_BIT (((eword_t)1 << (RLW_RUNNING_BITS + 1)) - 1) -static int rlw_get_run_bit(const eword_t *word) +static inline int rlw_get_run_bit(const eword_t *word) { return *word & (eword_t)1; } @@ -98,7 +100,6 @@ void rlwit_init(struct rlw_iterator *it, struct ewah_bitmap *bitmap); void rlwit_discard_first_words(struct rlw_iterator *it, size_t x); size_t rlwit_discharge( struct rlw_iterator *it, struct ewah_bitmap *out, size_t max, int negate); -void rlwit_discharge_empty(struct rlw_iterator *it, struct ewah_bitmap *out); static inline size_t rlwit_word_size(struct rlw_iterator *it) { |