summary refs log tree commit diff
path: root/utf8.c
diff options
context:
space:
mode:
authorJunio C Hamano <gitster@pobox.com>2014-12-17 11:46:57 -0800
committerJunio C Hamano <gitster@pobox.com>2014-12-17 11:46:57 -0800
commit77933f4449b8d6aa7529d627f3c7b55336f491db (patch)
tree91ce62aba00c10c6f2f4346f404f64dec00b5c3a /utf8.c
parent8213d87a83867809f490e8d683ebbaa857547189 (diff)
parent8e36a6d5752ced382ecd46a5f2cd94276f79451c (diff)
Sync with v2.1.4
* maint-2.1:
  Git 2.1.4
  Git 2.0.5
  Git 1.9.5
  Git 1.8.5.6
  fsck: complain about NTFS ".git" aliases in trees
  read-cache: optionally disallow NTFS .git variants
  path: add is_ntfs_dotgit() helper
  fsck: complain about HFS+ ".git" aliases in trees
  read-cache: optionally disallow HFS+ .git variants
  utf8: add is_hfs_dotgit() helper
  fsck: notice .git case-insensitively
  t1450: refactor ".", "..", and ".git" fsck tests
  verify_dotfile(): reject .git case-insensitively
  read-tree: add tests for confusing paths like ".." and ".git"
  unpack-trees: propagate errors adding entries to the index
Diffstat (limited to 'utf8.c')
-rw-r--r--utf8.c64
1 files changed, 64 insertions, 0 deletions
diff --git a/utf8.c b/utf8.c
index 4541777949..9a3f4ad232 100644
--- a/utf8.c
+++ b/utf8.c
@@ -561,3 +561,67 @@ int mbs_chrlen(const char **text, size_t *remainder_p, const char *encoding)
 
 	return chrlen;
 }
+
+/*
+ * Pick the next char from the stream, folding as an HFS+ filename comparison
+ * would. Note that this is _not_ complete by any means. It's just enough
+ * to make is_hfs_dotgit() work, and should not be used otherwise.
+ */
+static ucs_char_t next_hfs_char(const char **in)
+{
+	while (1) {
+		ucs_char_t out = pick_one_utf8_char(in, NULL);
+		/*
+		 * check for malformed utf8. Technically this
+		 * gets converted to a percent-sequence, but
+		 * returning 0 is good enough for is_hfs_dotgit
+		 * to realize it cannot be .git
+		 */
+		if (!*in)
+			return 0;
+
+		/* these code points are ignored completely */
+		switch (out) {
+		case 0x200c: /* ZERO WIDTH NON-JOINER */
+		case 0x200d: /* ZERO WIDTH JOINER */
+		case 0x200e: /* LEFT-TO-RIGHT MARK */
+		case 0x200f: /* RIGHT-TO-LEFT MARK */
+		case 0x202a: /* LEFT-TO-RIGHT EMBEDDING */
+		case 0x202b: /* RIGHT-TO-LEFT EMBEDDING */
+		case 0x202c: /* POP DIRECTIONAL FORMATTING */
+		case 0x202d: /* LEFT-TO-RIGHT OVERRIDE */
+		case 0x202e: /* RIGHT-TO-LEFT OVERRIDE */
+		case 0x206a: /* INHIBIT SYMMETRIC SWAPPING */
+		case 0x206b: /* ACTIVATE SYMMETRIC SWAPPING */
+		case 0x206c: /* INHIBIT ARABIC FORM SHAPING */
+		case 0x206d: /* ACTIVATE ARABIC FORM SHAPING */
+		case 0x206e: /* NATIONAL DIGIT SHAPES */
+		case 0x206f: /* NOMINAL DIGIT SHAPES */
+		case 0xfeff: /* ZERO WIDTH NO-BREAK SPACE */
+			continue;
+		}
+
+		/*
+		 * there's a great deal of other case-folding that occurs,
+		 * but this is enough to catch anything that will convert
+		 * to ".git"
+		 */
+		return tolower(out);
+	}
+}
+
+int is_hfs_dotgit(const char *path)
+{
+	ucs_char_t c;
+
+	if (next_hfs_char(&path) != '.' ||
+	    next_hfs_char(&path) != 'g' ||
+	    next_hfs_char(&path) != 'i' ||
+	    next_hfs_char(&path) != 't')
+		return 0;
+	c = next_hfs_char(&path);
+	if (c && !is_dir_sep(c))
+		return 0;
+
+	return 1;
+}