diff options
Diffstat (limited to 'convert.c')
-rw-r--r-- | convert.c | 113 |
1 files changed, 112 insertions, 1 deletions
@@ -7,6 +7,7 @@ #include "sigchain.h" #include "pkt-line.h" #include "sub-process.h" +#include "utf8.h" /* * convert.c - convert a file when checking it out and checking it in. @@ -265,6 +266,78 @@ static int will_convert_lf_to_crlf(size_t len, struct text_stat *stats, } +static const char *default_encoding = "UTF-8"; + +static int encode_to_git(const char *path, const char *src, size_t src_len, + struct strbuf *buf, const char *enc, int conv_flags) +{ + char *dst; + int dst_len; + int die_on_error = conv_flags & CONV_WRITE_OBJECT; + + /* + * No encoding is specified or there is nothing to encode. + * Tell the caller that the content was not modified. + */ + if (!enc || (src && !src_len)) + return 0; + + /* + * Looks like we got called from "would_convert_to_git()". + * This means Git wants to know if it would encode (= modify!) + * the content. Let's answer with "yes", since an encoding was + * specified. + */ + if (!buf && !src) + return 1; + + dst = reencode_string_len(src, src_len, default_encoding, enc, + &dst_len); + if (!dst) { + /* + * We could add the blob "as-is" to Git. However, on checkout + * we would try to reencode to the original encoding. This + * would fail and we would leave the user with a messed-up + * working tree. Let's try to avoid this by screaming loud. + */ + const char* msg = _("failed to encode '%s' from %s to %s"); + if (die_on_error) + die(msg, path, enc, default_encoding); + else { + error(msg, path, enc, default_encoding); + return 0; + } + } + + strbuf_attach(buf, dst, dst_len, dst_len + 1); + return 1; +} + +static int encode_to_worktree(const char *path, const char *src, size_t src_len, + struct strbuf *buf, const char *enc) +{ + char *dst; + int dst_len; + + /* + * No encoding is specified or there is nothing to encode. + * Tell the caller that the content was not modified. + */ + if (!enc || (src && !src_len)) + return 0; + + dst = reencode_string_len(src, src_len, enc, default_encoding, + &dst_len); + if (!dst) { + error("failed to encode '%s' from %s to %s", + path, default_encoding, enc); + return 0; + } + + strbuf_attach(buf, dst, dst_len, dst_len + 1); + return 1; +} + static int crlf_to_git(const struct index_state *istate, const char *path, const char *src, size_t len, struct strbuf *buf, @@ -978,6 +1051,24 @@ static int ident_to_worktree(const char *path, const char *src, size_t len, return 1; } +static const char *git_path_check_encoding(struct attr_check_item *check) +{ + const char *value = check->value; + + if (ATTR_UNSET(value) || !strlen(value)) + return NULL; + + if (ATTR_TRUE(value) || ATTR_FALSE(value)) { + die(_("true/false are no valid working-tree-encodings")); + } + + /* Don't encode to the default encoding */ + if (same_encoding(value, default_encoding)) + return NULL; + + return value; +} + static enum crlf_action git_path_check_crlf(struct attr_check_item *check) { const char *value = check->value; @@ -1033,6 +1124,7 @@ struct conv_attrs { enum crlf_action attr_action; /* What attr says */ enum crlf_action crlf_action; /* When no attr is set, use core.autocrlf */ int ident; + const char *working_tree_encoding; /* Supported encoding or default encoding if NULL */ }; static void convert_attrs(struct conv_attrs *ca, const char *path) @@ -1041,7 +1133,8 @@ static void convert_attrs(struct conv_attrs *ca, const char *path) if (!check) { check = attr_check_initl("crlf", "ident", "filter", - "eol", "text", NULL); + "eol", "text", "working-tree-encoding", + NULL); user_convert_tail = &user_convert; git_config(read_convert_config, NULL); } @@ -1064,6 +1157,7 @@ static void convert_attrs(struct conv_attrs *ca, const char *path) else if (eol_attr == EOL_CRLF) ca->crlf_action = CRLF_TEXT_CRLF; } + ca->working_tree_encoding = git_path_check_encoding(ccheck + 5); } else { ca->drv = NULL; ca->crlf_action = CRLF_UNDEFINED; @@ -1144,6 +1238,13 @@ int convert_to_git(const struct index_state *istate, src = dst->buf; len = dst->len; } + + ret |= encode_to_git(path, src, len, dst, ca.working_tree_encoding, conv_flags); + if (ret && dst) { + src = dst->buf; + len = dst->len; + } + if (!(conv_flags & CONV_EOL_KEEP_CRLF)) { ret |= crlf_to_git(istate, path, src, len, dst, ca.crlf_action, conv_flags); if (ret && dst) { @@ -1167,6 +1268,7 @@ void convert_to_git_filter_fd(const struct index_state *istate, if (!apply_filter(path, NULL, 0, fd, dst, ca.drv, CAP_CLEAN, NULL)) die("%s: clean filter '%s' failed", path, ca.drv->name); + encode_to_git(path, dst->buf, dst->len, dst, ca.working_tree_encoding, conv_flags); crlf_to_git(istate, path, dst->buf, dst->len, dst, ca.crlf_action, conv_flags); ident_to_git(path, dst->buf, dst->len, dst, ca.ident); } @@ -1198,6 +1300,12 @@ static int convert_to_working_tree_internal(const char *path, const char *src, } } + ret |= encode_to_worktree(path, src, len, dst, ca.working_tree_encoding); + if (ret) { + src = dst->buf; + len = dst->len; + } + ret_filter = apply_filter( path, src, len, -1, dst, ca.drv, CAP_SMUDGE, dco); if (!ret_filter && ca.drv && ca.drv->required) @@ -1664,6 +1772,9 @@ struct stream_filter *get_stream_filter(const char *path, const unsigned char *s if (ca.drv && (ca.drv->process || ca.drv->smudge || ca.drv->clean)) return NULL; + if (ca.working_tree_encoding) + return NULL; + if (ca.crlf_action == CRLF_AUTO || ca.crlf_action == CRLF_AUTO_CRLF) return NULL; |