summaryrefslogtreecommitdiff
path: root/convert.c
diff options
context:
space:
mode:
Diffstat (limited to 'convert.c')
-rw-r--r--convert.c61
1 files changed, 61 insertions, 0 deletions
diff --git a/convert.c b/convert.c
index 21d5cb60da..0e7930c154 100644
--- a/convert.c
+++ b/convert.c
@@ -266,6 +266,64 @@ static int will_convert_lf_to_crlf(size_t len, struct text_stat *stats,
}
+static int validate_encoding(const char *path, const char *enc,
+ const char *data, size_t len, int die_on_error)
+{
+ /* We only check for UTF here as UTF?? can be an alias for UTF-?? */
+ if (istarts_with(enc, "UTF")) {
+ /*
+ * Check for detectable errors in UTF encodings
+ */
+ if (has_prohibited_utf_bom(enc, data, len)) {
+ const char *error_msg = _(
+ "BOM is prohibited in '%s' if encoded as %s");
+ /*
+ * This advice is shown for UTF-??BE and UTF-??LE encodings.
+ * We cut off the last two characters of the encoding name
+ * to generate the encoding name suitable for BOMs.
+ */
+ const char *advise_msg = _(
+ "The file '%s' contains a byte order "
+ "mark (BOM). Please use UTF-%s as "
+ "working-tree-encoding.");
+ const char *stripped = NULL;
+ char *upper = xstrdup_toupper(enc);
+ upper[strlen(upper)-2] = '\0';
+ if (!skip_prefix(upper, "UTF-", &stripped))
+ skip_prefix(stripped, "UTF", &stripped);
+ advise(advise_msg, path, stripped);
+ free(upper);
+ if (die_on_error)
+ die(error_msg, path, enc);
+ else {
+ return error(error_msg, path, enc);
+ }
+
+ } else if (is_missing_required_utf_bom(enc, data, len)) {
+ const char *error_msg = _(
+ "BOM is required in '%s' if encoded as %s");
+ const char *advise_msg = _(
+ "The file '%s' is missing a byte order "
+ "mark (BOM). Please use UTF-%sBE or UTF-%sLE "
+ "(depending on the byte order) as "
+ "working-tree-encoding.");
+ const char *stripped = NULL;
+ char *upper = xstrdup_toupper(enc);
+ if (!skip_prefix(upper, "UTF-", &stripped))
+ skip_prefix(stripped, "UTF", &stripped);
+ advise(advise_msg, path, stripped, stripped);
+ free(upper);
+ if (die_on_error)
+ die(error_msg, path, enc);
+ else {
+ return error(error_msg, path, enc);
+ }
+ }
+
+ }
+ return 0;
+}
+
static const char *default_encoding = "UTF-8";
static int encode_to_git(const char *path, const char *src, size_t src_len,
@@ -291,6 +349,9 @@ static int encode_to_git(const char *path, const char *src, size_t src_len,
if (!buf && !src)
return 1;
+ if (validate_encoding(path, enc, src, src_len, die_on_error))
+ return 0;
+
dst = reencode_string_len(src, src_len, default_encoding, enc,
&dst_len);
if (!dst) {