summaryrefslogtreecommitdiff
path: root/gsimm.h
diff options
context:
space:
mode:
Diffstat (limited to 'gsimm.h')
-rw-r--r--gsimm.h28
1 files changed, 28 insertions, 0 deletions
diff --git a/gsimm.h b/gsimm.h
new file mode 100644
index 0000000000..4b023b91a9
--- /dev/null
+++ b/gsimm.h
@@ -0,0 +1,28 @@
+#ifndef GSIMM_H
+#define GSIMM_H
+
+/* Length of file message digest (MD) in bytes. Longer MD's are
+ better, but increase processing time for diminishing returns.
+ Must be multiple of NUM_HASHES_PER_CHAR / 8, and at least 24
+ for good results
+*/
+#define MD_LENGTH 32
+#define MD_BITS (MD_LENGTH * 8)
+
+/* The MIN_FILE_SIZE indicates the absolute minimal file size that
+ can be processed. As indicated above, the first and last
+ RABIN_WINDOW_SIZE - 1 bytes are skipped.
+ In order to get at least an average of 12 samples
+ per bit in the final message digest, require at least 3 * MD_LENGTH
+ complete windows in the file. */
+#define MIN_FILE_SIZE (3 * MD_LENGTH + 2 * (RABIN_WINDOW_SIZE - 1))
+
+/* Limit matching algorithm to files less than 256 MB, so we can use
+ 32 bit integers everywhere without fear of overflow. For larger
+ files we should add logic to mmap the file by piece and accumulate
+ the frequency counts. */
+#define MAX_FILE_SIZE (256*1024*1024 - 1)
+
+void gb_simm_process(u_char *data, unsigned len, u_char *md);
+
+#endif