diff options
Diffstat (limited to 'xdiff/xprepare.c')
-rw-r--r-- | xdiff/xprepare.c | 143 |
1 files changed, 63 insertions, 80 deletions
diff --git a/xdiff/xprepare.c b/xdiff/xprepare.c index 1689085235..620fc9a657 100644 --- a/xdiff/xprepare.c +++ b/xdiff/xprepare.c @@ -26,6 +26,8 @@ #define XDL_KPDIS_RUN 4 #define XDL_MAX_EQLIMIT 1024 #define XDL_SIMSCAN_WINDOW 100 +#define XDL_GUESS_NLINES1 256 +#define XDL_GUESS_NLINES2 20 typedef struct s_xdlclass { @@ -64,8 +66,6 @@ static int xdl_optimize_ctxs(xdfile_t *xdf1, xdfile_t *xdf2); static int xdl_init_classifier(xdlclassifier_t *cf, long size, long flags) { - long i; - cf->flags = flags; cf->hbits = xdl_hashbits((unsigned int) size); @@ -80,8 +80,7 @@ static int xdl_init_classifier(xdlclassifier_t *cf, long size, long flags) { xdl_cha_free(&cf->ncha); return -1; } - for (i = 0; i < cf->hsize; i++) - cf->rchash[i] = NULL; + memset(cf->rchash, 0, cf->hsize * sizeof(xdlclass_t *)); cf->count = 0; @@ -136,7 +135,7 @@ static int xdl_classify_record(xdlclassifier_t *cf, xrecord_t **rhash, unsigned static int xdl_prepare_ctx(mmfile_t *mf, long narec, xpparam_t const *xpp, xdlclassifier_t *cf, xdfile_t *xdf) { unsigned int hbits; - long i, nrec, hsize, bsize; + long nrec, hsize, bsize; unsigned long hav; char const *blk, *cur, *top, *prev; xrecord_t *crec; @@ -146,96 +145,59 @@ static int xdl_prepare_ctx(mmfile_t *mf, long narec, xpparam_t const *xpp, char *rchg; long *rindex; - if (xdl_cha_init(&xdf->rcha, sizeof(xrecord_t), narec / 4 + 1) < 0) { - - return -1; - } - if (!(recs = (xrecord_t **) xdl_malloc(narec * sizeof(xrecord_t *)))) { - - xdl_cha_free(&xdf->rcha); - return -1; - } - - hbits = xdl_hashbits((unsigned int) narec); - hsize = 1 << hbits; - if (!(rhash = (xrecord_t **) xdl_malloc(hsize * sizeof(xrecord_t *)))) { - - xdl_free(recs); - xdl_cha_free(&xdf->rcha); - return -1; + ha = NULL; + rindex = NULL; + rchg = NULL; + rhash = NULL; + recs = NULL; + + if (xdl_cha_init(&xdf->rcha, sizeof(xrecord_t), narec / 4 + 1) < 0) + goto abort; + if (!(recs = (xrecord_t **) xdl_malloc(narec * sizeof(xrecord_t *)))) + goto abort; + + if (xpp->flags & XDF_HISTOGRAM_DIFF) + hbits = hsize = 0; + else { + hbits = xdl_hashbits((unsigned int) narec); + hsize = 1 << hbits; + if (!(rhash = (xrecord_t **) xdl_malloc(hsize * sizeof(xrecord_t *)))) + goto abort; + memset(rhash, 0, hsize * sizeof(xrecord_t *)); } - for (i = 0; i < hsize; i++) - rhash[i] = NULL; nrec = 0; if ((cur = blk = xdl_mmfile_first(mf, &bsize)) != NULL) { - for (top = blk + bsize;;) { - if (cur >= top) { - if (!(cur = blk = xdl_mmfile_next(mf, &bsize))) - break; - top = blk + bsize; - } + for (top = blk + bsize; cur < top; ) { prev = cur; hav = xdl_hash_record(&cur, top, xpp->flags); if (nrec >= narec) { narec *= 2; - if (!(rrecs = (xrecord_t **) xdl_realloc(recs, narec * sizeof(xrecord_t *)))) { - - xdl_free(rhash); - xdl_free(recs); - xdl_cha_free(&xdf->rcha); - return -1; - } + if (!(rrecs = (xrecord_t **) xdl_realloc(recs, narec * sizeof(xrecord_t *)))) + goto abort; recs = rrecs; } - if (!(crec = xdl_cha_alloc(&xdf->rcha))) { - - xdl_free(rhash); - xdl_free(recs); - xdl_cha_free(&xdf->rcha); - return -1; - } + if (!(crec = xdl_cha_alloc(&xdf->rcha))) + goto abort; crec->ptr = prev; crec->size = (long) (cur - prev); crec->ha = hav; recs[nrec++] = crec; - if (xdl_classify_record(cf, rhash, hbits, crec) < 0) { - - xdl_free(rhash); - xdl_free(recs); - xdl_cha_free(&xdf->rcha); - return -1; - } + if (!(xpp->flags & XDF_HISTOGRAM_DIFF) && + xdl_classify_record(cf, rhash, hbits, crec) < 0) + goto abort; } } - if (!(rchg = (char *) xdl_malloc((nrec + 2) * sizeof(char)))) { - - xdl_free(rhash); - xdl_free(recs); - xdl_cha_free(&xdf->rcha); - return -1; - } + if (!(rchg = (char *) xdl_malloc((nrec + 2) * sizeof(char)))) + goto abort; memset(rchg, 0, (nrec + 2) * sizeof(char)); - if (!(rindex = (long *) xdl_malloc((nrec + 1) * sizeof(long)))) { - - xdl_free(rchg); - xdl_free(rhash); - xdl_free(recs); - xdl_cha_free(&xdf->rcha); - return -1; - } - if (!(ha = (unsigned long *) xdl_malloc((nrec + 1) * sizeof(unsigned long)))) { - - xdl_free(rindex); - xdl_free(rchg); - xdl_free(rhash); - xdl_free(recs); - xdl_cha_free(&xdf->rcha); - return -1; - } + if (!(rindex = (long *) xdl_malloc((nrec + 1) * sizeof(long)))) + goto abort; + if (!(ha = (unsigned long *) xdl_malloc((nrec + 1) * sizeof(unsigned long)))) + goto abort; xdf->nrec = nrec; xdf->recs = recs; @@ -249,6 +211,15 @@ static int xdl_prepare_ctx(mmfile_t *mf, long narec, xpparam_t const *xpp, xdf->dend = nrec - 1; return 0; + +abort: + xdl_free(ha); + xdl_free(rindex); + xdl_free(rchg); + xdl_free(rhash); + xdl_free(recs); + xdl_cha_free(&xdf->rcha); + return -1; } @@ -265,13 +236,23 @@ static void xdl_free_ctx(xdfile_t *xdf) { int xdl_prepare_env(mmfile_t *mf1, mmfile_t *mf2, xpparam_t const *xpp, xdfenv_t *xe) { - long enl1, enl2; + long enl1, enl2, sample; xdlclassifier_t cf; - enl1 = xdl_guess_lines(mf1) + 1; - enl2 = xdl_guess_lines(mf2) + 1; + /* + * For histogram diff, we can afford a smaller sample size and + * thus a poorer estimate of the number of lines, as the hash + * table (rhash) won't be filled up/grown. The number of lines + * (nrecs) will be updated correctly anyway by + * xdl_prepare_ctx(). + */ + sample = xpp->flags & XDF_HISTOGRAM_DIFF ? XDL_GUESS_NLINES2 : XDL_GUESS_NLINES1; + + enl1 = xdl_guess_lines(mf1, sample) + 1; + enl2 = xdl_guess_lines(mf2, sample) + 1; - if (xdl_init_classifier(&cf, enl1 + enl2 + 1, xpp->flags) < 0) { + if (!(xpp->flags & XDF_HISTOGRAM_DIFF) && + xdl_init_classifier(&cf, enl1 + enl2 + 1, xpp->flags) < 0) { return -1; } @@ -288,9 +269,11 @@ int xdl_prepare_env(mmfile_t *mf1, mmfile_t *mf2, xpparam_t const *xpp, return -1; } - xdl_free_classifier(&cf); + if (!(xpp->flags & XDF_HISTOGRAM_DIFF)) + xdl_free_classifier(&cf); if (!(xpp->flags & XDF_PATIENCE_DIFF) && + !(xpp->flags & XDF_HISTOGRAM_DIFF) && xdl_optimize_ctxs(&xe->xdf1, &xe->xdf2) < 0) { xdl_free_ctx(&xe->xdf2); |