diff options
Diffstat (limited to 'utf8.c')
-rw-r--r-- | utf8.c | 148 |
1 files changed, 117 insertions, 31 deletions
@@ -1,4 +1,5 @@ #include "git-compat-util.h" +#include "strbuf.h" #include "utf8.h" /* This code is originally from http://www.cl.cam.ac.uk/~mgk25/ucs/ */ @@ -162,7 +163,7 @@ static int git_wcwidth(ucs_char_t ch) * If the string was not a valid UTF-8, *start pointer is set to NULL * and the return value is undefined. */ -ucs_char_t pick_one_utf8_char(const char **start, size_t *remainder_p) +static ucs_char_t pick_one_utf8_char(const char **start, size_t *remainder_p) { unsigned char *s = (unsigned char *)*start; ucs_char_t ch; @@ -279,14 +280,41 @@ int is_utf8(const char *text) return 1; } -static void print_spaces(int count) +static void strbuf_addchars(struct strbuf *sb, int c, size_t n) { - static const char s[] = " "; - while (count >= sizeof(s)) { - fwrite(s, sizeof(s) - 1, 1, stdout); - count -= sizeof(s) - 1; + strbuf_grow(sb, n); + memset(sb->buf + sb->len, c, n); + strbuf_setlen(sb, sb->len + n); +} + +static void strbuf_add_indented_text(struct strbuf *buf, const char *text, + int indent, int indent2) +{ + if (indent < 0) + indent = 0; + while (*text) { + const char *eol = strchrnul(text, '\n'); + if (*eol == '\n') + eol++; + strbuf_addchars(buf, ' ', indent); + strbuf_add(buf, text, eol - text); + text = eol; + indent = indent2; } - fwrite(s, count, 1, stdout); +} + +static size_t display_mode_esc_sequence_len(const char *s) +{ + const char *p = s; + if (*p++ != '\033') + return 0; + if (*p++ != '[') + return 0; + while (isdigit(*p) || *p == ';') + p++; + if (*p++ != 'm') + return 0; + return p - s; } /* @@ -295,51 +323,96 @@ static void print_spaces(int count) * If indent is negative, assume that already -indent columns have been * consumed (and no extra indent is necessary for the first line). */ -int print_wrapped_text(const char *text, int indent, int indent2, int width) +void strbuf_add_wrapped_text(struct strbuf *buf, + const char *text, int indent1, int indent2, int width) { - int w = indent, assume_utf8 = is_utf8(text); - const char *bol = text, *space = NULL; + int indent, w, assume_utf8 = 1; + const char *bol, *space, *start = text; + size_t orig_len = buf->len; + + if (width <= 0) { + strbuf_add_indented_text(buf, text, indent1, indent2); + return; + } +retry: + bol = text; + w = indent = indent1; + space = NULL; if (indent < 0) { w = -indent; space = text; } for (;;) { - char c = *text; + char c; + size_t skip; + + while ((skip = display_mode_esc_sequence_len(text))) + text += skip; + + c = *text; if (!c || isspace(c)) { - if (w < width || !space) { + if (w <= width || !space) { const char *start = bol; + if (!c && text == start) + return; if (space) start = space; else - print_spaces(indent); - fwrite(start, text - start, 1, stdout); + strbuf_addchars(buf, ' ', indent); + strbuf_add(buf, start, text - start); if (!c) - return w; - else if (c == '\t') - w |= 0x07; + return; space = text; + if (c == '\t') + w |= 0x07; + else if (c == '\n') { + space++; + if (*space == '\n') { + strbuf_addch(buf, '\n'); + goto new_line; + } + else if (!isalnum(*space)) + goto new_line; + else + strbuf_addch(buf, ' '); + } w++; text++; } else { - putchar('\n'); +new_line: + strbuf_addch(buf, '\n'); text = bol = space + isspace(*space); space = NULL; w = indent = indent2; } continue; } - if (assume_utf8) + if (assume_utf8) { w += utf8_width(&text, NULL); - else { + if (!text) { + assume_utf8 = 0; + text = start; + strbuf_setlen(buf, orig_len); + goto retry; + } + } else { w++; text++; } } } +void strbuf_add_wrapped_bytes(struct strbuf *buf, const char *data, int len, + int indent, int indent2, int width) +{ + char *tmp = xstrndup(data, len); + strbuf_add_wrapped_text(buf, tmp, indent, indent2, width); + free(tmp); +} + int is_encoding_utf8(const char *name) { if (!name) @@ -349,29 +422,29 @@ int is_encoding_utf8(const char *name) return 0; } +int same_encoding(const char *src, const char *dst) +{ + if (is_encoding_utf8(src) && is_encoding_utf8(dst)) + return 1; + return !strcasecmp(src, dst); +} + /* * Given a buffer and its encoding, return it re-encoded * with iconv. If the conversion fails, returns NULL. */ #ifndef NO_ICONV -#ifdef OLD_ICONV +#if defined(OLD_ICONV) || (defined(__sun__) && !defined(_XPG6)) typedef const char * iconv_ibp; #else typedef char * iconv_ibp; #endif -char *reencode_string(const char *in, const char *out_encoding, const char *in_encoding) +char *reencode_string_iconv(const char *in, size_t insz, iconv_t conv) { - iconv_t conv; - size_t insz, outsz, outalloc; + size_t outsz, outalloc; char *out, *outpos; iconv_ibp cp; - if (!in_encoding) - return NULL; - conv = iconv_open(out_encoding, in_encoding); - if (conv == (iconv_t) -1) - return NULL; - insz = strlen(in); outsz = insz; outalloc = outsz + 1; /* for terminating NUL */ out = xmalloc(outalloc); @@ -385,7 +458,6 @@ char *reencode_string(const char *in, const char *out_encoding, const char *in_e size_t sofar; if (errno != E2BIG) { free(out); - iconv_close(conv); return NULL; } /* insz has remaining number of bytes. @@ -404,6 +476,20 @@ char *reencode_string(const char *in, const char *out_encoding, const char *in_e break; } } + return out; +} + +char *reencode_string(const char *in, const char *out_encoding, const char *in_encoding) +{ + iconv_t conv; + char *out; + + if (!in_encoding) + return NULL; + conv = iconv_open(out_encoding, in_encoding); + if (conv == (iconv_t) -1) + return NULL; + out = reencode_string_iconv(in, strlen(in), conv); iconv_close(conv); return out; } |