Retire rabinpoly fingerprinting code

For now let's retire this and reintroduce it as part of the updated pack-objects series from Geert when it is ready. Signed-off-by: Junio C Hamano <junkio@cox.net>
author: Junio C Hamano <junkio@cox.net> 2006-04-27 12:48:26 -0700
committer: Junio C Hamano <junkio@cox.net> 2006-04-27 12:48:26 -0700
commit: b8c59e6ad30697e7313b28ec0f7c7c9f1e4c7b5e (patch)
tree: 3ddf449c06c3da3ab102f7747e365b574e77db94 /gsimm.c
parent: Merge branch 'master' into next (diff)
download: tgif-b8c59e6ad30697e7313b28ec0f7c7c9f1e4c7b5e.tar.xz
1 files changed, 0 insertions, 94 deletions
diff --git a/gsimm.c b/gsimm.c
deleted file mode 100644
index 7024bf8f58..0000000000
--- a/gsimm.c
+++ /dev/null
@@ -1,94 +0,0 @@
-#include "rabinpoly.h"
-#include "gsimm.h"
-
-/* Has to be power of two. Since the Rabin hash only has 63
-   usable bits, the number of hashes is limited to 32.
-   Lower powers of two could be used for speeding up processing
-   of very large files.  */
-#define NUM_HASHES_PER_CHAR 32
-
-/* Size of cache used to eliminate duplicate substrings.
-   Make small enough to comfortably fit in L1 cache.  */
-#define DUP_CACHE_SIZE 256
-
-/* For the final counting, do not count each bit individually, but
-   group them. Must be power of two, at most NUM_HASHES_PER_CHAR.
-   However, larger sizes result in higher cache usage. Use 8 bits
-   per group for efficient processing of large files on fast machines
-   with decent caches, or 4 bits for faster processing of small files
-   and for machines with small caches.  */
-#define GROUP_BITS 4
-#define GROUP_COUNTERS (1<<GROUP_BITS)
-
-static void freq_to_md(u_char *md, int *freq)
-{ int j, k;
-
-  for (j = 0; j < MD_LENGTH; j++)
-  { u_char ch = 0;
-
-    for (k = 0; k < 8; k++) ch = 2*ch + (freq[8*j+k] > 0);
-    md[j] = ch;
-  }
-  bzero (freq, sizeof(freq[0]) * MD_BITS);
-}
-
-void gb_simm_process(u_char *data, unsigned len, u_char *md)
-{ size_t j = 0;
-  u_int32_t ofs;
-  u_int32_t dup_cache[DUP_CACHE_SIZE];
-  u_int32_t count [MD_BITS * (GROUP_COUNTERS/GROUP_BITS)];
-  int freq[MD_BITS];
-
-  bzero (freq, sizeof(freq[0]) * MD_BITS);
-  bzero (dup_cache, DUP_CACHE_SIZE * sizeof (u_int32_t));
-  bzero (count, (MD_BITS * (GROUP_COUNTERS/GROUP_BITS) * sizeof (u_int32_t)));
-
-  /* Ignore incomplete substrings */
-  while (j < len && j < RABIN_WINDOW_SIZE) rabin_slide8 (data[j++]);
-
-  while (j < len)
-  { u_int64_t hash;
-    u_int32_t ofs, sum;
-    u_char idx;
-    int k;
-
-    hash = rabin_slide8 (data[j++]);
-
-    /* In order to update a much larger frequency table
-       with only 32 bits of checksum, randomly select a
-       part of the table to update. The selection should
-       only depend on the content of the represented data,
-       and be independent of the bits used for the update.
-
-       Instead of updating 32 individual counters, process
-       the checksum in MD_BITS / GROUP_BITS groups of
-       GROUP_BITS bits, and count the frequency of each bit pattern.
-    */
-
-    idx = (hash >> 32);
-    sum = (u_int32_t) hash;
-    ofs = idx % (MD_BITS / NUM_HASHES_PER_CHAR) * NUM_HASHES_PER_CHAR;
-    idx %= DUP_CACHE_SIZE;
-    if (dup_cache[idx] != sum)
-    { dup_cache[idx] = sum;
-      for (k = 0; k < NUM_HASHES_PER_CHAR / GROUP_BITS; k++)
-      { count[ofs * GROUP_COUNTERS / GROUP_BITS + (sum % GROUP_COUNTERS)]++;
-        ofs += GROUP_BITS;
-        sum >>= GROUP_BITS;
-  } } }
-
-  /* Distribute the occurrences of each bit group over the frequency table. */
-  for (ofs = 0; ofs < MD_BITS; ofs += GROUP_BITS)
-  { int j;
-    for (j = 0; j < GROUP_COUNTERS; j++)
-    { int k;
-      for (k = 0; k < GROUP_BITS; k++)
-      { freq[ofs + k] += ((1<<k) & j)
-          ? count[ofs * GROUP_COUNTERS / GROUP_BITS + j]
-          : -count[ofs * GROUP_COUNTERS / GROUP_BITS + j];
-  } } }
-
-  if (md)
-  { rabin_reset();
-    freq_to_md (md, freq);
-} }
author	Junio C Hamano <junkio@cox.net>	2006-04-27 12:48:26 -0700
committer	Junio C Hamano <junkio@cox.net>	2006-04-27 12:48:26 -0700
commit	b8c59e6ad30697e7313b28ec0f7c7c9f1e4c7b5e (patch)
tree	3ddf449c06c3da3ab102f7747e365b574e77db94 /gsimm.c
parent	Merge branch 'master' into next (diff)
download	tgif-b8c59e6ad30697e7313b28ec0f7c7c9f1e4c7b5e.tar.xz