summaryrefslogtreecommitdiff
path: root/xdiff/xutils.c
diff options
context:
space:
mode:
Diffstat (limited to 'xdiff/xutils.c')
-rw-r--r--xdiff/xutils.c154
1 files changed, 42 insertions, 112 deletions
diff --git a/xdiff/xutils.c b/xdiff/xutils.c
index 62cb23dfd3..88e5995535 100644
--- a/xdiff/xutils.c
+++ b/xdiff/xutils.c
@@ -13,8 +13,8 @@
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
- * License along with this library; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ * License along with this library; if not, see
+ * <http://www.gnu.org/licenses/>.
*
* Davide Libenzi <davidel@xmailserver.org>
*
@@ -156,6 +156,24 @@ int xdl_blankline(const char *line, long size, long flags)
return (i == size);
}
+/*
+ * Have we eaten everything on the line, except for an optional
+ * CR at the very end?
+ */
+static int ends_with_optional_cr(const char *l, long s, long i)
+{
+ int complete = s && l[s-1] == '\n';
+
+ if (complete)
+ s--;
+ if (s == i)
+ return 1;
+ /* do not ignore CR at the end of an incomplete line */
+ if (complete && s == i + 1 && l[i] == '\r')
+ return 1;
+ return 0;
+}
+
int xdl_recmatch(const char *l1, long s1, const char *l2, long s2, long flags)
{
int i1, i2;
@@ -170,7 +188,8 @@ int xdl_recmatch(const char *l1, long s1, const char *l2, long s2, long flags)
/*
* -w matches everything that matches with -b, and -b in turn
- * matches everything that matches with --ignore-space-at-eol.
+ * matches everything that matches with --ignore-space-at-eol,
+ * which in turn matches everything that matches with --ignore-cr-at-eol.
*
* Each flavor of ignoring needs different logic to skip whitespaces
* while we have both sides to compare.
@@ -200,8 +219,18 @@ int xdl_recmatch(const char *l1, long s1, const char *l2, long s2, long flags)
return 0;
}
} else if (flags & XDF_IGNORE_WHITESPACE_AT_EOL) {
- while (i1 < s1 && i2 < s2 && l1[i1++] == l2[i2++])
- ; /* keep going */
+ while (i1 < s1 && i2 < s2 && l1[i1] == l2[i2]) {
+ i1++;
+ i2++;
+ }
+ } else if (flags & XDF_IGNORE_CR_AT_EOL) {
+ /* Find the first difference and see how the line ends */
+ while (i1 < s1 && i2 < s2 && l1[i1] == l2[i2]) {
+ i1++;
+ i2++;
+ }
+ return (ends_with_optional_cr(l1, s1, i1) &&
+ ends_with_optional_cr(l2, s2, i2));
}
/*
@@ -228,9 +257,16 @@ static unsigned long xdl_hash_record_with_whitespace(char const **data,
char const *top, long flags) {
unsigned long ha = 5381;
char const *ptr = *data;
+ int cr_at_eol_only = (flags & XDF_WHITESPACE_FLAGS) == XDF_IGNORE_CR_AT_EOL;
for (; ptr < top && *ptr != '\n'; ptr++) {
- if (XDL_ISSPACE(*ptr)) {
+ if (cr_at_eol_only) {
+ /* do not ignore CR at the end of an incomplete line */
+ if (*ptr == '\r' &&
+ (ptr + 1 < top && ptr[1] == '\n'))
+ continue;
+ }
+ else if (XDL_ISSPACE(*ptr)) {
const char *ptr2 = ptr;
int at_eol;
while (ptr + 1 < top && XDL_ISSPACE(ptr[1])
@@ -262,110 +298,6 @@ static unsigned long xdl_hash_record_with_whitespace(char const **data,
return ha;
}
-#ifdef XDL_FAST_HASH
-
-#define REPEAT_BYTE(x) ((~0ul / 0xff) * (x))
-
-#define ONEBYTES REPEAT_BYTE(0x01)
-#define NEWLINEBYTES REPEAT_BYTE(0x0a)
-#define HIGHBITS REPEAT_BYTE(0x80)
-
-/* Return the high bit set in the first byte that is a zero */
-static inline unsigned long has_zero(unsigned long a)
-{
- return ((a - ONEBYTES) & ~a) & HIGHBITS;
-}
-
-static inline long count_masked_bytes(unsigned long mask)
-{
- if (sizeof(long) == 8) {
- /*
- * Jan Achrenius on G+: microoptimized version of
- * the simpler "(mask & ONEBYTES) * ONEBYTES >> 56"
- * that works for the bytemasks without having to
- * mask them first.
- */
- /*
- * return mask * 0x0001020304050608 >> 56;
- *
- * Doing it like this avoids warnings on 32-bit machines.
- */
- long a = (REPEAT_BYTE(0x01) / 0xff + 1);
- return mask * a >> (sizeof(long) * 7);
- } else {
- /* Carl Chatfield / Jan Achrenius G+ version for 32-bit */
- /* (000000 0000ff 00ffff ffffff) -> ( 1 1 2 3 ) */
- long a = (0x0ff0001 + mask) >> 23;
- /* Fix the 1 for 00 case */
- return a & mask;
- }
-}
-
-unsigned long xdl_hash_record(char const **data, char const *top, long flags)
-{
- unsigned long hash = 5381;
- unsigned long a = 0, mask = 0;
- char const *ptr = *data;
- char const *end = top - sizeof(unsigned long) + 1;
-
- if (flags & XDF_WHITESPACE_FLAGS)
- return xdl_hash_record_with_whitespace(data, top, flags);
-
- ptr -= sizeof(unsigned long);
- do {
- hash += hash << 5;
- hash ^= a;
- ptr += sizeof(unsigned long);
- if (ptr >= end)
- break;
- a = *(unsigned long *)ptr;
- /* Do we have any '\n' bytes in this word? */
- mask = has_zero(a ^ NEWLINEBYTES);
- } while (!mask);
-
- if (ptr >= end) {
- /*
- * There is only a partial word left at the end of the
- * buffer. Because we may work with a memory mapping,
- * we have to grab the rest byte by byte instead of
- * blindly reading it.
- *
- * To avoid problems with masking in a signed value,
- * we use an unsigned char here.
- */
- const char *p;
- for (p = top - 1; p >= ptr; p--)
- a = (a << 8) + *((const unsigned char *)p);
- mask = has_zero(a ^ NEWLINEBYTES);
- if (!mask)
- /*
- * No '\n' found in the partial word. Make a
- * mask that matches what we read.
- */
- mask = 1UL << (8 * (top - ptr) + 7);
- }
-
- /* The mask *below* the first high bit set */
- mask = (mask - 1) & ~mask;
- mask >>= 7;
- hash += hash << 5;
- hash ^= a & mask;
-
- /* Advance past the last (possibly partial) word */
- ptr += count_masked_bytes(mask);
-
- if (ptr < top) {
- assert(*ptr == '\n');
- ptr++;
- }
-
- *data = ptr;
-
- return hash;
-}
-
-#else /* XDL_FAST_HASH */
-
unsigned long xdl_hash_record(char const **data, char const *top, long flags) {
unsigned long ha = 5381;
char const *ptr = *data;
@@ -382,8 +314,6 @@ unsigned long xdl_hash_record(char const **data, char const *top, long flags) {
return ha;
}
-#endif /* XDL_FAST_HASH */
-
unsigned int xdl_hashbits(unsigned int size) {
unsigned int val = 1, bits = 0;