summaryrefslogtreecommitdiff
path: root/utf8.c
diff options
context:
space:
mode:
Diffstat (limited to 'utf8.c')
-rw-r--r--utf8.c82
1 files changed, 75 insertions, 7 deletions
diff --git a/utf8.c b/utf8.c
index b30790d043..520fbb4994 100644
--- a/utf8.c
+++ b/utf8.c
@@ -239,13 +239,6 @@ int is_utf8(const char *text)
return 1;
}
-static void strbuf_addchars(struct strbuf *sb, int c, size_t n)
-{
- strbuf_grow(sb, n);
- memset(sb->buf + sb->len, c, n);
- strbuf_setlen(sb, sb->len + n);
-}
-
static void strbuf_add_indented_text(struct strbuf *buf, const char *text,
int indent, int indent2)
{
@@ -382,6 +375,9 @@ void strbuf_utf8_replace(struct strbuf *sb_src, int pos, int width,
dst += n;
}
+ if (src >= end)
+ break;
+
old = src;
n = utf8_width((const char**)&src, NULL);
if (!src) /* broken utf-8, do nothing */
@@ -565,3 +561,75 @@ int mbs_chrlen(const char **text, size_t *remainder_p, const char *encoding)
return chrlen;
}
+
+/*
+ * Pick the next char from the stream, ignoring codepoints an HFS+ would.
+ * Note that this is _not_ complete by any means. It's just enough
+ * to make is_hfs_dotgit() work, and should not be used otherwise.
+ */
+static ucs_char_t next_hfs_char(const char **in)
+{
+ while (1) {
+ ucs_char_t out = pick_one_utf8_char(in, NULL);
+ /*
+ * check for malformed utf8. Technically this
+ * gets converted to a percent-sequence, but
+ * returning 0 is good enough for is_hfs_dotgit
+ * to realize it cannot be .git
+ */
+ if (!*in)
+ return 0;
+
+ /* these code points are ignored completely */
+ switch (out) {
+ case 0x200c: /* ZERO WIDTH NON-JOINER */
+ case 0x200d: /* ZERO WIDTH JOINER */
+ case 0x200e: /* LEFT-TO-RIGHT MARK */
+ case 0x200f: /* RIGHT-TO-LEFT MARK */
+ case 0x202a: /* LEFT-TO-RIGHT EMBEDDING */
+ case 0x202b: /* RIGHT-TO-LEFT EMBEDDING */
+ case 0x202c: /* POP DIRECTIONAL FORMATTING */
+ case 0x202d: /* LEFT-TO-RIGHT OVERRIDE */
+ case 0x202e: /* RIGHT-TO-LEFT OVERRIDE */
+ case 0x206a: /* INHIBIT SYMMETRIC SWAPPING */
+ case 0x206b: /* ACTIVATE SYMMETRIC SWAPPING */
+ case 0x206c: /* INHIBIT ARABIC FORM SHAPING */
+ case 0x206d: /* ACTIVATE ARABIC FORM SHAPING */
+ case 0x206e: /* NATIONAL DIGIT SHAPES */
+ case 0x206f: /* NOMINAL DIGIT SHAPES */
+ case 0xfeff: /* ZERO WIDTH NO-BREAK SPACE */
+ continue;
+ }
+
+ return out;
+ }
+}
+
+int is_hfs_dotgit(const char *path)
+{
+ ucs_char_t c;
+
+ c = next_hfs_char(&path);
+ if (c != '.')
+ return 0;
+ c = next_hfs_char(&path);
+
+ /*
+ * there's a great deal of other case-folding that occurs
+ * in HFS+, but this is enough to catch anything that will
+ * convert to ".git"
+ */
+ if (c != 'g' && c != 'G')
+ return 0;
+ c = next_hfs_char(&path);
+ if (c != 'i' && c != 'I')
+ return 0;
+ c = next_hfs_char(&path);
+ if (c != 't' && c != 'T')
+ return 0;
+ c = next_hfs_char(&path);
+ if (c && !is_dir_sep(c))
+ return 0;
+
+ return 1;
+}